summaryrefslogtreecommitdiff
path: root/storage
diff options
context:
space:
mode:
authorVasil Dimov <vasil.dimov@oracle.com>2011-02-23 19:33:41 +0200
committerVasil Dimov <vasil.dimov@oracle.com>2011-02-23 19:33:41 +0200
commit19372f10940cef96664577a69e835ff3cdee8383 (patch)
tree1ceac4a64b06c1db388aba0116499a8dceb2a8a9 /storage
parent0c9dde5c702d055682cdc4f0d9950476abfc4b2d (diff)
parent9f699b39f86a67df2d848918b7553eb1922be36c (diff)
downloadmariadb-git-19372f10940cef96664577a69e835ff3cdee8383.tar.gz
Merge mysql-5.5-innodb -> mysql-5.5
Diffstat (limited to 'storage')
-rw-r--r--storage/innobase/CMakeLists.txt4
-rw-r--r--storage/innobase/handler/ha_innodb.cc18
-rw-r--r--storage/innobase/include/buf0flu.h13
-rw-r--r--storage/innobase/include/srv0srv.h7
-rw-r--r--storage/innobase/include/sync0sync.h6
-rw-r--r--storage/innobase/include/trx0purge.h22
-rw-r--r--storage/innobase/include/trx0rseg.h12
-rw-r--r--storage/innobase/include/trx0sys.h10
-rw-r--r--storage/innobase/include/trx0sys.ic12
-rw-r--r--storage/innobase/include/ut0bh.h152
-rw-r--r--storage/innobase/include/ut0bh.ic125
-rw-r--r--storage/innobase/srv/srv0srv.c22
-rw-r--r--storage/innobase/sync/sync0sync.c14
-rw-r--r--storage/innobase/trx/trx0purge.c399
-rw-r--r--storage/innobase/trx/trx0rseg.c52
-rw-r--r--storage/innobase/trx/trx0sys.c43
-rw-r--r--storage/innobase/trx/trx0trx.c256
-rw-r--r--storage/innobase/ut/ut0bh.c164
-rw-r--r--storage/innobase/ut/ut0ut.c9
19 files changed, 969 insertions, 371 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt
index 4bbda0d2477..d4b98f2af0d 100644
--- a/storage/innobase/CMakeLists.txt
+++ b/storage/innobase/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@@ -246,7 +246,7 @@ SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c
trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
usr/usr0sess.c
ut/ut0byte.c ut/ut0dbg.c ut/ut0list.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c
- ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c)
+ ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c ut/ut0bh.c)
IF(WITH_INNODB)
# Legacy option
diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
index daeaca90aec..3763e6f9cef 100644
--- a/storage/innobase/handler/ha_innodb.cc
+++ b/storage/innobase/handler/ha_innodb.cc
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2011, MySQL AB & Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -262,7 +262,7 @@ static PSI_mutex_info all_innodb_mutexes[] = {
# endif /* UNIV_MEM_DEBUG */
{&mem_pool_mutex_key, "mem_pool_mutex", 0},
{&mutex_list_mutex_key, "mutex_list_mutex", 0},
- {&purge_sys_mutex_key, "purge_sys_mutex", 0},
+ {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
{&recv_sys_mutex_key, "recv_sys_mutex", 0},
{&rseg_mutex_key, "rseg_mutex", 0},
# ifdef UNIV_SYNC_DEBUG
@@ -10982,16 +10982,23 @@ static MYSQL_SYSVAR_ULONG(io_capacity, srv_io_capacity,
static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
PLUGIN_VAR_OPCMDARG,
- "Number of UNDO logs to purge in one batch from the history list. "
- "Default is 20",
+ "Number of UNDO log pages to purge in one batch from the history list.",
NULL, NULL,
20, /* Default setting */
1, /* Minimum value */
5000, 0); /* Maximum value */
+static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments,
+ PLUGIN_VAR_OPCMDARG,
+ "Number of UNDO logs to use.",
+ NULL, NULL,
+ 128, /* Default setting */
+ 1, /* Minimum value */
+ TRX_SYS_N_RSEGS, 0); /* Maximum value */
+
static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
- "Purge threads can be either 0 or 1. Default is 0.",
+ "Purge threads can be either 0 or 1.",
NULL, NULL,
0, /* Default setting */
0, /* Minimum value */
@@ -11342,6 +11349,7 @@ static struct st_mysql_sys_var* innobase_system_variables[]= {
MYSQL_SYSVAR(io_capacity),
MYSQL_SYSVAR(purge_threads),
MYSQL_SYSVAR(purge_batch_size),
+ MYSQL_SYSVAR(rollback_segments),
NULL
};
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 28d9b90e755..ae27f5dab0e 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -138,7 +138,18 @@ UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+/******************************************************************//**
+Waits until a flush batch of the given type ends. This is called by
+a thread that only wants to wait for a flush to end but doesn't do
+any flushing itself. */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end_wait_only(
+/*===============================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
enum buf_flush type); /*!< in: BUF_FLUSH_LRU
or BUF_FLUSH_LIST */
/********************************************************************//**
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index d600ad4a034..252d1424c63 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -294,9 +294,12 @@ extern ulint srv_log_waits;
/* the number of purge threads to use from the worker pool (currently 0 or 1) */
extern ulong srv_n_purge_threads;
-/* the number of records to purge in one batch */
+/* the number of pages to purge in one batch */
extern ulong srv_purge_batch_size;
+/* the number of rollback segments to use */
+extern ulong srv_rollback_segments;
+
/* variable that counts amount of data read in total (in bytes) */
extern ulint srv_data_read;
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index fb3a24c5ee5..b4f9e21933f 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -93,7 +93,7 @@ extern mysql_pfs_key_t mem_hash_mutex_key;
# endif /* UNIV_MEM_DEBUG */
extern mysql_pfs_key_t mem_pool_mutex_key;
extern mysql_pfs_key_t mutex_list_mutex_key;
-extern mysql_pfs_key_t purge_sys_mutex_key;
+extern mysql_pfs_key_t purge_sys_bh_mutex_key;
extern mysql_pfs_key_t recv_sys_mutex_key;
extern mysql_pfs_key_t rseg_mutex_key;
# ifdef UNIV_SYNC_DEBUG
@@ -637,7 +637,6 @@ or row lock! */
#define SYNC_TREE_NODE_NEW 892
#define SYNC_TREE_NODE_FROM_HASH 891
#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_SYS 810
#define SYNC_PURGE_LATCH 800
#define SYNC_TRX_UNDO 700
#define SYNC_RSEG 600
@@ -659,6 +658,7 @@ or row lock! */
#define SYNC_REC_LOCK 299
#define SYNC_TRX_LOCK_HEAP 298
#define SYNC_TRX_SYS_HEADER 290
+#define SYNC_PURGE_QUEUE 200
#define SYNC_LOG 170
#define SYNC_LOG_FLUSH_ORDER 147
#define SYNC_RECV 168
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index d2730a68a78..0b83a76cab7 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -68,8 +68,9 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
-trx_purge_sys_create(void);
-/*======================*/
+trx_purge_sys_create(
+/*=================*/
+ ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
/********************************************************************//**
Frees the global purge system control structure. */
UNIV_INTERN
@@ -128,20 +129,20 @@ struct trx_purge_struct{
ulint state; /*!< Purge system state */
sess_t* sess; /*!< System session running the purge
query */
- trx_t* trx; /*!< System transaction running the purge
+ trx_t* trx; /*!< System transaction running the
+ purge
query: this trx is not in the trx list
of the trx system and it never ends */
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- rw_lock_t latch; /*!< The latch protecting the purge view.
- A purge operation must acquire an
- x-latch here for the instant at which
+ rw_lock_t latch; /*!< The latch protecting the purge
+ view. A purge operation must acquire
+ an x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
obtaining an s-latch here. */
read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- mutex_t mutex; /*!< Mutex protecting the fields below */
ulint n_pages_handled;/*!< Approximate number of undo log
pages processed in purge */
ulint handle_limit; /*!< Target of how many pages to get
@@ -179,6 +180,11 @@ struct trx_purge_struct{
mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
+ /*-----------------------------*/
+ ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
+ rseg_queue_t::trx_no. It is protected
+ by the bh_mutex */
+ mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
};
#define TRX_PURGE_ON 1 /* purge operation is running */
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index a293d9b896e..5acde05de3d 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -113,7 +113,9 @@ void
trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr); /*!< in: mtr */
+
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
@@ -180,6 +182,14 @@ struct trx_rseg_struct{
memory objects */
};
+/** For prioritising the rollback segments for purge. */
+struct rseg_queue_struct {
+ trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
+ trx_rseg_t* rseg; /*!< Rollback segment */
+};
+
+typedef struct rseg_queue_struct rseg_queue_t;
+
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index 63e3f6be934..dc0ca2285b9 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mem0mem.h"
#include "sync0sync.h"
#include "ut0lst.h"
+#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
@@ -221,13 +222,6 @@ UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void);
-/*========================*/
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
index 385c7f4f0cc..355f118a1ec 100644
--- a/storage/innobase/include/trx0sys.ic
+++ b/storage/innobase/include/trx0sys.ic
@@ -369,16 +369,4 @@ trx_sys_get_new_trx_id(void)
return(id);
}
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void)
-/*========================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- return(trx_sys_get_new_trx_id());
-}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/innobase/include/ut0bh.h b/storage/innobase/include/ut0bh.h
new file mode 100644
index 00000000000..1b211390283
--- /dev/null
+++ b/storage/innobase/include/ut0bh.h
@@ -0,0 +1,152 @@
+/***************************************************************************//**
+
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.h
+Binary min-heap interface.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_UT0BH_H
+#define INNOBASE_UT0BH_H
+
+#include "univ.i"
+
+/** Comparison function for objects in the binary heap. */
+typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
+
+typedef struct ib_bh_struct ib_bh_t;
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in: instance */
+ ulint i); /*!< in: index */
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ ulint i, /*!< in: index */
+ const void* elem); /*!< in: element to add */
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+ ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+ ib_bh_t* ib_bh); /*!< in/out: instance */
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+ ib_bh_cmp_t compare, /*!< in: comparator */
+ ulint sizeof_elem, /*!< in: size of one element */
+ ulint max_elems); /*!< in: max elements allowed */
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+ ib_bh_t* ib_bh); /*!< in,own: instance */
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ const void* elem); /*!< in: element to add */
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+ ib_bh_t* ib_bh); /*!< in/out: instance */
+
+/** Binary heap data structure */
+struct ib_bh_struct {
+ ulint max_elems; /*!< max elements allowed */
+ ulint n_elems; /*!< current size */
+ ulint sizeof_elem; /*!< sizeof element */
+ ib_bh_cmp_t compare; /*!< comparator */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/innobase/include/ut0bh.ic b/storage/innobase/include/ut0bh.ic
new file mode 100644
index 00000000000..afbe58e7e3b
--- /dev/null
+++ b/storage/innobase/include/ut0bh.ic
@@ -0,0 +1,125 @@
+/***************************************************************************//**
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.ic
+Binary min-heap implementation.
+
+Created 2011-01-15 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h" /* For ut_memcpy() */
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh->n_elems);
+}
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) == 0);
+}
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
+}
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in: instance */
+ ulint i) /*!< in: index */
+{
+ byte* ptr = (byte*) (ib_bh + 1);
+
+ ut_a(i < ib_bh_size(ib_bh));
+
+ return(ptr + (ib_bh->sizeof_elem * i));
+}
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ ulint i, /*!< in: index */
+ const void* elem) /*!< in: element to add */
+{
+ void* ptr = ib_bh_get(ib_bh, i);
+
+ ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
+
+ return(ptr);
+}
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
+}
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in/out: instance */
+{
+ return(ib_bh_is_empty(ib_bh)
+ ? NULL
+ : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
+}
+
+
diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
index 826145005a4..3c5eed448bd 100644
--- a/storage/innobase/srv/srv0srv.c
+++ b/storage/innobase/srv/srv0srv.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, 2009 Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -269,9 +269,12 @@ UNIV_INTERN ulong srv_max_buf_pool_modified_pct = 75;
/* the number of purge threads to use from the worker pool (currently 0 or 1).*/
UNIV_INTERN ulong srv_n_purge_threads = 0;
-/* the number of records to purge in one batch */
+/* the number of pages to purge in one batch */
UNIV_INTERN ulong srv_purge_batch_size = 20;
+/* the number of rollback segments to use */
+UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
+
/* variable counts amount of data read in total (in bytes) */
UNIV_INTERN ulint srv_data_read = 0;
@@ -3096,6 +3099,7 @@ srv_purge_thread(
required by os_thread_create */
{
srv_slot_t* slot;
+ ulint retries = 0;
ulint slot_no = ULINT_UNDEFINED;
ulint n_total_purged = ULINT_UNDEFINED;
@@ -3122,7 +3126,7 @@ srv_purge_thread(
while (srv_shutdown_state != SRV_SHUTDOWN_EXIT_THREADS) {
- ulint n_pages_purged;
+ ulint n_pages_purged = 0;
/* If there are very few records to purge or the last
purge didn't purge any records then wait for activity.
@@ -3130,7 +3134,8 @@ srv_purge_thread(
because in the worst case we will end up waiting for
the next purge event. */
if (trx_sys->rseg_history_len < srv_purge_batch_size
- || n_total_purged == 0) {
+ || (n_total_purged == 0
+ && retries >= TRX_SYS_N_RSEGS)) {
os_event_t event;
@@ -3141,6 +3146,8 @@ srv_purge_thread(
mutex_exit(&kernel_mutex);
os_event_wait(event);
+
+ retries = 0;
}
/* Check for shutdown and whether we should do purge at all. */
@@ -3151,7 +3158,12 @@ srv_purge_thread(
break;
}
- n_total_purged = 0;
+ if (n_total_purged == 0 && retries <= TRX_SYS_N_RSEGS) {
+ ++retries;
+ } else if (n_total_purged > 0) {
+ retries = 0;
+ n_total_purged = 0;
+ }
/* Purge until there are no more records to purge and there is
no change in configuration or server state. */
diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
index 453314f465d..0f6a60ca260 100644
--- a/storage/innobase/sync/sync0sync.c
+++ b/storage/innobase/sync/sync0sync.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -1172,7 +1172,7 @@ sync_thread_add_level(
case SYNC_RSEG:
case SYNC_TRX_UNDO:
case SYNC_PURGE_LATCH:
- case SYNC_PURGE_SYS:
+ case SYNC_PURGE_QUEUE:
case SYNC_DICT_AUTOINC_MUTEX:
case SYNC_DICT_OPERATION:
case SYNC_DICT_HEADER:
@@ -1239,10 +1239,16 @@ sync_thread_add_level(
|| sync_thread_levels_g(array, SYNC_FSP, TRUE));
break;
case SYNC_TRX_UNDO_PAGE:
+ /* Purge is allowed to read in as many UNDO pages as it likes,
+ there was a bogus rule here earlier that forced the caller to
+ acquire the purge_sys_t::mutex. The purge mutex did not really
+ protect anything because it was only ever acquired by the
+ single purge thread. The purge thread can read the UNDO pages
+ without any covering mutex. */
+
ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
|| sync_thread_levels_contain(array, SYNC_RSEG)
- || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
- || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
+ || sync_thread_levels_g(array, level - 1, TRUE));
break;
case SYNC_RSEG_HEADER:
ut_a(sync_thread_levels_contain(array, SYNC_RSEG));
diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c
index 4c787579a03..02ec9f1c072 100644
--- a/storage/innobase/trx/trx0purge.c
+++ b/storage/innobase/trx/trx0purge.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,8 +57,8 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_latch_key;
#endif /* UNIV_PFS_RWLOCK */
#ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t purge_sys_mutex_key;
+/* Key to register purge_sys_bh_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key;
#endif /* UNIV_PFS_MUTEX */
/*****************************************************************//**
@@ -219,13 +219,16 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
-trx_purge_sys_create(void)
-/*======================*/
+trx_purge_sys_create(
+/*=================*/
+ ib_bh_t* ib_bh) /*!< in, own: UNDO log min binary heap */
{
ut_ad(mutex_own(&kernel_mutex));
- purge_sys = mem_alloc(sizeof(trx_purge_t));
+ purge_sys = mem_zalloc(sizeof(trx_purge_t));
+ /* Take ownership of ib_bh, we are responsible for freeing it. */
+ purge_sys->ib_bh = ib_bh;
purge_sys->state = TRX_STOP_PURGE;
purge_sys->n_pages_handled = 0;
@@ -237,8 +240,9 @@ trx_purge_sys_create(void)
rw_lock_create(trx_purge_latch_key,
&purge_sys->latch, SYNC_PURGE_LATCH);
- mutex_create(purge_sys_mutex_key,
- &purge_sys->mutex, SYNC_PURGE_SYS);
+ mutex_create(
+ purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
+ SYNC_PURGE_QUEUE);
purge_sys->heap = mem_heap_create(256);
@@ -288,9 +292,12 @@ trx_purge_sys_close(void)
trx_undo_arr_free(purge_sys->arr);
rw_lock_free(&purge_sys->latch);
- mutex_free(&purge_sys->mutex);
+ mutex_free(&purge_sys->bh_mutex);
mem_heap_free(purge_sys->heap);
+
+ ib_bh_free(purge_sys->ib_bh);
+
mem_free(purge_sys);
purge_sys = NULL;
@@ -311,34 +318,31 @@ trx_purge_add_update_undo_to_history(
mtr_t* mtr) /*!< in: mtr */
{
trx_undo_t* undo;
- trx_rseg_t* rseg;
trx_rsegf_t* rseg_header;
-#ifdef UNIV_DEBUG
- trx_usegf_t* seg_header;
-#endif /* UNIV_DEBUG */
trx_ulogf_t* undo_header;
- ulint hist_size;
undo = trx->update_undo;
ut_ad(undo);
- rseg = undo->rseg;
+ ut_ad(mutex_own(&undo->rseg->mutex));
- ut_ad(mutex_own(&(rseg->mutex)));
-
- rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
- rseg->page_no, mtr);
+ rseg_header = trx_rsegf_get(
+ undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
+ mtr);
undo_header = undo_page + undo->hdr_offset;
+ /* Add the log as the first in the history list */
+
+ if (undo->state != TRX_UNDO_CACHED) {
+ ulint hist_size;
#ifdef UNIV_DEBUG
- seg_header = undo_page + TRX_UNDO_SEG_HDR;
+ trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR;
#endif /* UNIV_DEBUG */
- if (undo->state != TRX_UNDO_CACHED) {
/* The undo log segment will not be reused */
- if (undo->id >= TRX_RSEG_N_SLOTS) {
+ if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
fprintf(stderr,
"InnoDB: Error: undo->id is %lu\n",
(ulong) undo->id);
@@ -347,42 +351,50 @@ trx_purge_add_update_undo_to_history(
trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr);
- hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- MLOG_4BYTES, mtr);
+ hist_size = mtr_read_ulint(
+ rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
+
ut_ad(undo->size == flst_get_len(
seg_header + TRX_UNDO_PAGE_LIST, mtr));
- mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
- hist_size + undo->size, MLOG_4BYTES, mtr);
+ mlog_write_ulint(
+ rseg_header + TRX_RSEG_HISTORY_SIZE,
+ hist_size + undo->size, MLOG_4BYTES, mtr);
}
- /* Add the log as the first in the history list */
- flst_add_first(rseg_header + TRX_RSEG_HISTORY,
- undo_header + TRX_UNDO_HISTORY_NODE, mtr);
- mutex_enter(&kernel_mutex);
- trx_sys->rseg_history_len++;
- mutex_exit(&kernel_mutex);
-
- if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
- /* Inform the purge thread that there is work to do. */
- srv_wake_purge_thread_if_not_active();
- }
+ flst_add_first(
+ rseg_header + TRX_RSEG_HISTORY,
+ undo_header + TRX_UNDO_HISTORY_NODE, mtr);
/* Write the trx number to the undo log header */
+
mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
+
/* Write information about delete markings to the undo log header */
if (!undo->del_marks) {
- mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE,
- MLOG_2BYTES, mtr);
+ mlog_write_ulint(
+ undo_header + TRX_UNDO_DEL_MARKS, FALSE,
+ MLOG_2BYTES, mtr);
}
- if (rseg->last_page_no == FIL_NULL) {
+ if (undo->rseg->last_page_no == FIL_NULL) {
+ undo->rseg->last_trx_no = trx->no;
+ undo->rseg->last_offset = undo->hdr_offset;
+ undo->rseg->last_page_no = undo->hdr_page_no;
+ undo->rseg->last_del_marks = undo->del_marks;
- rseg->last_page_no = undo->hdr_page_no;
- rseg->last_offset = undo->hdr_offset;
- rseg->last_trx_no = trx->no;
- rseg->last_del_marks = undo->del_marks;
+ /* FIXME: Add a bin heap validate function to check that
+ the rseg exists. */
+ }
+
+ mutex_enter(&kernel_mutex);
+ trx_sys->rseg_history_len++;
+ mutex_exit(&kernel_mutex);
+
+ if (!(trx_sys->rseg_history_len % srv_purge_batch_size)) {
+ /* Inform the purge thread that there is work to do. */
+ srv_wake_purge_thread_if_not_active();
}
}
@@ -411,7 +423,6 @@ trx_purge_free_segment(
/* fputs("Freeing an update undo log segment\n", stderr); */
- ut_ad(mutex_own(&(purge_sys->mutex)));
loop:
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
@@ -515,8 +526,6 @@ trx_purge_truncate_rseg_history(
mtr_t mtr;
trx_id_t undo_trx_no;
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
mtr_start(&mtr);
mutex_enter(&(rseg->mutex));
@@ -609,10 +618,8 @@ trx_purge_truncate_history(void)
trx_id_t limit_trx_no;
undo_no_t limit_undo_no;
- ut_ad(mutex_own(&(purge_sys->mutex)));
-
- trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no,
- &limit_undo_no);
+ trx_purge_arr_get_biggest(
+ purge_sys->arr, &limit_trx_no, &limit_undo_no);
if (limit_trx_no == 0) {
@@ -630,34 +637,29 @@ trx_purge_truncate_history(void)
ut_ad(limit_trx_no <= purge_sys->view->low_limit_no);
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ for (rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ rseg != NULL;
+ rseg = UT_LIST_GET_NEXT(rseg_list, rseg)) {
- while (rseg) {
- trx_purge_truncate_rseg_history(rseg, limit_trx_no,
- limit_undo_no);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
+ trx_purge_truncate_rseg_history(
+ rseg, limit_trx_no, limit_undo_no);
}
}
/********************************************************************//**
Does a truncate if the purge array is empty. NOTE that when this function is
-called, the caller must not have any latches on undo log pages!
-@return TRUE if array empty */
+called, the caller must not have any latches on undo log pages! */
UNIV_INLINE
-ibool
+void
trx_purge_truncate_if_arr_empty(void)
/*=================================*/
{
- ut_ad(mutex_own(&(purge_sys->mutex)));
+ static ulint count;
- if (purge_sys->arr->n_used == 0) {
+ if (!(++count % TRX_SYS_N_RSEGS) && purge_sys->arr->n_used == 0) {
trx_purge_truncate_history();
-
- return(TRUE);
}
-
- return(FALSE);
}
/***********************************************************************//**
@@ -675,8 +677,8 @@ trx_purge_rseg_get_next_history_log(
trx_id_t trx_no;
ibool del_marks;
mtr_t mtr;
-
- ut_ad(mutex_own(&(purge_sys->mutex)));
+ rseg_queue_t rseg_queue;
+ const void* ptr;
mutex_enter(&(rseg->mutex));
@@ -688,8 +690,9 @@ trx_purge_rseg_get_next_history_log(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
- rseg->last_page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(
+ rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
+
log_hdr = undo_page + rseg->last_offset;
/* Increase the purge page count by one for every handled log */
@@ -698,6 +701,7 @@ trx_purge_rseg_get_next_history_log(
prev_log_addr = trx_purge_get_log_from_hist(
flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr));
+
if (prev_log_addr.page == FIL_NULL) {
/* No logs left in the history list */
@@ -712,11 +716,11 @@ trx_purge_rseg_get_next_history_log(
on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
file-based list was corrupt. The prev node pointer was
FIL_NULL, even though the list length was over 8 million nodes!
- We assume that purge truncates the history list in moderate
+ We assume that purge truncates the history list in large
size pieces, and if we here reach the head of the list, the
- list cannot be longer than 20 000 undo logs now. */
+ list cannot be longer than 2000 000 undo logs now. */
- if (trx_sys->rseg_history_len > 20000) {
+ if (trx_sys->rseg_history_len > 2000000) {
ut_print_timestamp(stderr);
fprintf(stderr,
" InnoDB: Warning: purge reached the"
@@ -756,105 +760,150 @@ trx_purge_rseg_get_next_history_log(
rseg->last_trx_no = trx_no;
rseg->last_del_marks = del_marks;
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = rseg->last_trx_no;
+
+ /* Purge can also produce events, however these are already ordered
+ in the rollback segment and any user generated event will be greater
+ than the events that Purge produces. ie. Purge can never produce
+ events from an empty rollback segment. */
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr != NULL);
+
+ mutex_exit(&purge_sys->bh_mutex);
+
mutex_exit(&(rseg->mutex));
}
/***********************************************************************//**
-Chooses the next undo log to purge and updates the info in purge_sys. This
-function is used to initialize purge_sys when the next record to purge is
-not known, and also to update the purge system info on the next record when
-purge has handled the whole undo log for a transaction. */
+Chooses the rollback segment with the smallest trx_id.
+@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
+ no rollback segments to purge, 0 for non compressed tables. */
static
-void
-trx_purge_choose_next_log(void)
-/*===========================*/
+ulint
+trx_purge_get_rseg_with_min_trx_id(
+/*===============================*/
+ trx_purge_t* purge_sys) /*!< in/out: purge instance */
+
{
- trx_undo_rec_t* rec;
- trx_rseg_t* rseg;
- trx_rseg_t* min_rseg;
- trx_id_t min_trx_no;
- ulint space = 0; /* remove warning (??? bug ???) */
ulint zip_size = 0;
- ulint page_no = 0; /* remove warning (??? bug ???) */
- ulint offset = 0; /* remove warning (??? bug ???) */
- mtr_t mtr;
- ut_ad(mutex_own(&(purge_sys->mutex)));
- ut_ad(purge_sys->next_stored == FALSE);
+ mutex_enter(&purge_sys->bh_mutex);
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ /* Only purge consumes events from the binary heap, user
+ threads only produce the events. */
- min_trx_no = IB_ULONGLONG_MAX;
+ if (!ib_bh_is_empty(purge_sys->ib_bh)) {
+ trx_rseg_t* rseg;
- min_rseg = NULL;
+ rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
+ ib_bh_pop(purge_sys->ib_bh);
- while (rseg) {
- mutex_enter(&(rseg->mutex));
+ mutex_exit(&purge_sys->bh_mutex);
- if (rseg->last_page_no != FIL_NULL) {
+ purge_sys->rseg = rseg;
+ } else {
+ mutex_exit(&purge_sys->bh_mutex);
- if (min_rseg == NULL
- || min_trx_no > rseg->last_trx_no) {
+ purge_sys->rseg = NULL;
- min_rseg = rseg;
- min_trx_no = rseg->last_trx_no;
- space = rseg->space;
- zip_size = rseg->zip_size;
- ut_a(space == 0); /* We assume in purge of
- externally stored fields
- that space id == 0 */
- page_no = rseg->last_page_no;
- offset = rseg->last_offset;
- }
- }
+ return(ULINT_UNDEFINED);
+ }
- mutex_exit(&(rseg->mutex));
+ ut_a(purge_sys->rseg != NULL);
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
+ mutex_enter(&purge_sys->rseg->mutex);
- if (min_rseg == NULL) {
+ ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
- return;
- }
+ /* We assume in purge of externally stored fields
+ that space id == 0 */
+ ut_a(purge_sys->rseg->space == 0);
- mtr_start(&mtr);
+ zip_size = purge_sys->rseg->zip_size;
- if (!min_rseg->last_del_marks) {
- /* No need to purge this log */
+ ut_a(purge_sys->purge_trx_no <= purge_sys->rseg->last_trx_no);
- rec = &trx_purge_dummy_rec;
- } else {
- rec = trx_undo_get_first_rec(space, zip_size, page_no, offset,
- RW_S_LATCH, &mtr);
- if (rec == NULL) {
- /* Undo log empty */
+ purge_sys->purge_trx_no = purge_sys->rseg->last_trx_no;
+
+ purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+
+ purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+
+ mutex_exit(&purge_sys->rseg->mutex);
+
+ return(zip_size);
+}
+
+/***********************************************************************//**
+Position the purge sys "iterator" on the undo record to use for purging. */
+static
+void
+trx_purge_read_undo_rec(
+/*====================*/
+ trx_purge_t* purge_sys, /*!< in/out: purge instance */
+ ulint zip_size) /*!< in: block size or 0 */
+{
+ ulint page_no;
+ ulint offset = 0;
+ ib_uint64_t undo_no = 0;
+
+ purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+ page_no = purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
+
+ if (purge_sys->rseg->last_del_marks) {
+ mtr_t mtr;
+ trx_undo_rec_t* undo_rec;
- rec = &trx_purge_dummy_rec;
+ mtr_start(&mtr);
+
+ undo_rec = trx_undo_get_first_rec(
+ 0 /* System space id */, zip_size,
+ purge_sys->hdr_page_no,
+ purge_sys->hdr_offset, RW_S_LATCH, &mtr);
+
+ if (undo_rec != NULL) {
+ offset = page_offset(undo_rec);
+ undo_no = trx_undo_rec_get_undo_no(undo_rec);
+ page_no = page_get_page_no(page_align(undo_rec));
}
+
+ mtr_commit(&mtr);
}
+ purge_sys->offset = offset;
+ purge_sys->page_no = page_no;
+ purge_sys->purge_undo_no = undo_no;
+
purge_sys->next_stored = TRUE;
- purge_sys->rseg = min_rseg;
+}
- purge_sys->hdr_page_no = page_no;
- purge_sys->hdr_offset = offset;
+/***********************************************************************//**
+Chooses the next undo log to purge and updates the info in purge_sys. This
+function is used to initialize purge_sys when the next record to purge is
+not known, and also to update the purge system info on the next record when
+purge has handled the whole undo log for a transaction. */
+static
+void
+trx_purge_choose_next_log(void)
+/*===========================*/
+{
+ ulint zip_size;
- purge_sys->purge_trx_no = min_trx_no;
+ ut_ad(purge_sys->next_stored == FALSE);
- if (rec == &trx_purge_dummy_rec) {
+ zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
- purge_sys->purge_undo_no = 0;
- purge_sys->page_no = page_no;
- purge_sys->offset = 0;
- } else {
- purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec);
+ if (purge_sys->rseg != NULL) {
- purge_sys->page_no = page_get_page_no(page_align(rec));
- purge_sys->offset = page_offset(rec);
+ trx_purge_read_undo_rec(purge_sys, zip_size);
+ } else {
+ /* There is nothing to do yet. */
+ os_thread_yield();
}
-
- mtr_commit(&mtr);
}
/***********************************************************************//**
@@ -880,7 +929,6 @@ trx_purge_get_next_rec(
ulint cmpl_info;
mtr_t mtr;
- ut_ad(mutex_own(&(purge_sys->mutex)));
ut_ad(purge_sys->next_stored);
space = purge_sys->rseg->space;
@@ -903,8 +951,8 @@ trx_purge_get_next_rec(
mtr_start(&mtr);
- undo_page = trx_undo_page_get_s_latched(space, zip_size,
- page_no, &mtr);
+ undo_page = trx_undo_page_get_s_latched(space, zip_size, page_no, &mtr);
+
rec = undo_page + offset;
rec2 = rec;
@@ -913,9 +961,9 @@ trx_purge_get_next_rec(
/* Try first to find the next record which requires a purge
operation from the same page of the same undo log */
- next_rec = trx_undo_page_get_next_rec(rec2,
- purge_sys->hdr_page_no,
- purge_sys->hdr_offset);
+ next_rec = trx_undo_page_get_next_rec(
+ rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
+
if (next_rec == NULL) {
rec2 = trx_undo_get_next_rec(
rec2, purge_sys->hdr_page_no,
@@ -995,17 +1043,12 @@ trx_purge_fetch_next_rec(
{
trx_undo_rec_t* undo_rec;
- mutex_enter(&(purge_sys->mutex));
if (purge_sys->state == TRX_STOP_PURGE) {
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
- }
-
- if (!purge_sys->next_stored) {
+ } else if (!purge_sys->next_stored) {
trx_purge_choose_next_log();
if (!purge_sys->next_stored) {
@@ -1020,8 +1063,6 @@ trx_purge_fetch_next_rec(
(ulong) purge_sys->n_pages_handled);
}
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
}
}
@@ -1032,18 +1073,12 @@ trx_purge_fetch_next_rec(
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
- }
-
- if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
+ } else if (purge_sys->purge_trx_no >= purge_sys->view->low_limit_no) {
purge_sys->state = TRX_STOP_PURGE;
trx_purge_truncate_if_arr_empty();
- mutex_exit(&(purge_sys->mutex));
-
return(NULL);
}
@@ -1052,12 +1087,13 @@ trx_purge_fetch_next_rec(
(ullint) purge_sys->purge_trx_no,
(ullint) purge_sys->purge_undo_no); */
- *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id,
- purge_sys->page_no,
- purge_sys->offset);
- *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no,
- purge_sys->purge_undo_no);
+ *roll_ptr = trx_undo_build_roll_ptr(
+ FALSE, (purge_sys->rseg)->id, purge_sys->page_no,
+ purge_sys->offset);
+
+ *cell = trx_purge_arr_store_info(
+ purge_sys->purge_trx_no, purge_sys->purge_undo_no);
ut_ad(purge_sys->purge_trx_no < purge_sys->view->low_limit_no);
@@ -1066,8 +1102,6 @@ trx_purge_fetch_next_rec(
undo_rec = trx_purge_get_next_rec(heap);
- mutex_exit(&(purge_sys->mutex));
-
return(undo_rec);
}
@@ -1079,11 +1113,7 @@ trx_purge_rec_release(
/*==================*/
trx_undo_inf_t* cell) /*!< in: storage cell */
{
- mutex_enter(&(purge_sys->mutex));
-
trx_purge_arr_remove_info(cell);
-
- mutex_exit(&(purge_sys->mutex));
}
/*******************************************************************//**
@@ -1097,23 +1127,11 @@ trx_purge(
purge in one batch */
{
que_thr_t* thr;
- /* que_thr_t* thr2; */
ulint old_pages_handled;
- mutex_enter(&(purge_sys->mutex));
-
- if (purge_sys->trx->n_active_thrs > 0) {
+ ut_a(purge_sys->trx->n_active_thrs == 0);
- mutex_exit(&(purge_sys->mutex));
-
- /* Should not happen */
-
- ut_error;
-
- return(0);
- }
-
- rw_lock_x_lock(&(purge_sys->latch));
+ rw_lock_x_lock(&purge_sys->latch);
mutex_enter(&kernel_mutex);
@@ -1147,8 +1165,9 @@ trx_purge(
}
}
- purge_sys->view = read_view_oldest_copy_or_open_new(0,
- purge_sys->heap);
+ purge_sys->view = read_view_oldest_copy_or_open_new(
+ 0, purge_sys->heap);
+
mutex_exit(&kernel_mutex);
rw_lock_x_unlock(&(purge_sys->latch));
@@ -1159,7 +1178,6 @@ trx_purge(
old_pages_handled = purge_sys->n_pages_handled;
- mutex_exit(&(purge_sys->mutex));
mutex_enter(&kernel_mutex);
@@ -1167,15 +1185,8 @@ trx_purge(
ut_ad(thr);
- /* thr2 = que_fork_start_command(purge_sys->query);
-
- ut_ad(thr2); */
-
-
mutex_exit(&kernel_mutex);
- /* srv_que_task_enqueue(thr2); */
-
if (srv_print_thread_releases) {
fputs("Starting purge\n", stderr);
diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c
index 740320f68c1..85beac8afbc 100644
--- a/storage/innobase/trx/trx0rseg.c
+++ b/storage/innobase/trx/trx0rseg.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -50,11 +50,11 @@ trx_rseg_get_on_id(
{
trx_rseg_t* rseg;
- rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
+ ut_a(id < TRX_SYS_N_RSEGS);
- while (rseg && rseg->id != id) {
- rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- }
+ rseg = trx_sys->rseg_array[id];
+
+ ut_a(rseg == NULL || id == rseg->id);
return(rseg);
}
@@ -181,12 +181,15 @@ static
trx_rseg_t*
trx_rseg_mem_create(
/*================*/
- ulint id, /*!< in: rollback segment id */
- ulint space, /*!< in: space where the segment placed */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number of the segment header */
- mtr_t* mtr) /*!< in: mtr */
+ ulint id, /*!< in: rollback segment id */
+ ulint space, /*!< in: space where the segment
+ placed */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number of the segment
+ header */
+ ib_bh_t* ib_bh, /*!< in/out: rseg queue */
+ mtr_t* mtr) /*!< in: mtr */
{
ulint len;
trx_rseg_t* rseg;
@@ -225,6 +228,9 @@ trx_rseg_mem_create(
len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
if (len > 0) {
+ const void* ptr;
+ rseg_queue_t rseg_queue;
+
trx_sys->rseg_history_len += len;
node_addr = trx_purge_get_log_from_hist(
@@ -240,6 +246,17 @@ trx_rseg_mem_create(
undo_log_hdr + TRX_UNDO_TRX_NO);
rseg->last_del_marks = mtr_read_ulint(
undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = rseg->last_trx_no;
+
+ if (rseg->last_page_no != FIL_NULL) {
+ /* There is no need to cover this operation by the purge
+ mutex because we are still bootstrapping. */
+
+ ptr = ib_bh_push(ib_bh, &rseg_queue);
+ ut_a(ptr != NULL);
+ }
} else {
rseg->last_page_no = FIL_NULL;
}
@@ -255,6 +272,7 @@ void
trx_rseg_create_instance(
/*=====================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in/out: rseg queue */
mtr_t* mtr) /*!< in: mtr */
{
ulint i;
@@ -278,7 +296,7 @@ trx_rseg_create_instance(
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
- i, space, zip_size, page_no, mtr);
+ i, space, zip_size, page_no, ib_bh, mtr);
ut_a(rseg->id == i);
}
@@ -327,7 +345,8 @@ trx_rseg_create(void)
zip_size = space ? fil_space_get_zip_size(space) : 0;
rseg = trx_rseg_mem_create(
- slot_no, space, zip_size, page_no, &mtr);
+ slot_no, space, zip_size, page_no,
+ purge_sys->ib_bh, &mtr);
}
mutex_exit(&kernel_mutex);
@@ -342,13 +361,14 @@ UNIV_INTERN
void
trx_rseg_list_and_array_init(
/*=========================*/
- trx_sysf_t* sys_header, /* in: trx system header */
- mtr_t* mtr) /* in: mtr */
+ trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in: rseg queue */
+ mtr_t* mtr) /*!< in: mtr */
{
UT_LIST_INIT(trx_sys->rseg_list);
trx_sys->rseg_history_len = 0;
- trx_rseg_create_instance(sys_header, mtr);
+ trx_rseg_create_instance(sys_header, ib_bh, mtr);
}
diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
index 101f225a06f..7af9fbb1af8 100644
--- a/storage/innobase/trx/trx0sys.c
+++ b/storage/innobase/trx/trx0sys.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -946,6 +946,31 @@ trx_sysf_create(
}
/*****************************************************************//**
+Compare two trx_rseg_t instances on last_trx_no. */
+static
+int
+trx_rseg_compare_last_trx_no(
+/*=========================*/
+ const void* p1, /*!< in: elem to compare */
+ const void* p2) /*!< in: elem to compare */
+{
+ ib_int64_t cmp;
+
+ const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1;
+ const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2;
+
+ cmp = rseg_q1->trx_no - rseg_q2->trx_no;
+
+ if (cmp < 0) {
+ return(-1);
+ } else if (cmp > 0) {
+ return(1);
+ }
+
+ return(0);
+}
+
+/*****************************************************************//**
Creates and initializes the central memory structures for the transaction
system. This is called when the database is started. */
UNIV_INTERN
@@ -958,6 +983,7 @@ trx_sys_init_at_db_start(void)
const char* unit = "";
trx_t* trx;
mtr_t mtr;
+ ib_bh_t* ib_bh;
mtr_start(&mtr);
@@ -965,11 +991,19 @@ trx_sys_init_at_db_start(void)
mutex_enter(&kernel_mutex);
- trx_sys = mem_alloc(sizeof(trx_sys_t));
+ /* We create the min binary heap here and pass ownership to
+ purge when we init the purge sub-system. Purge is responsible
+ for freeing the binary heap. */
+
+ ib_bh = ib_bh_create(
+ trx_rseg_compare_last_trx_no,
+ sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
+
+ trx_sys = mem_zalloc(sizeof(*trx_sys));
sys_header = trx_sysf_get(&mtr);
- trx_rseg_list_and_array_init(sys_header, &mtr);
+ trx_rseg_list_and_array_init(sys_header, ib_bh, &mtr);
trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
@@ -1023,7 +1057,8 @@ trx_sys_init_at_db_start(void)
UT_LIST_INIT(trx_sys->view_list);
- trx_purge_sys_create();
+ /* Transfer ownership to purge. */
+ trx_purge_sys_create(ib_bh);
mutex_exit(&kernel_mutex);
diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
index 4bbcee210b0..820c40fe857 100644
--- a/storage/innobase/trx/trx0trx.c
+++ b/storage/innobase/trx/trx0trx.c
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,6 +42,7 @@ Created 3/26/1996 Heikki Tuuri
#include "btr0sea.h"
#include "os0proc.h"
#include "trx0xa.h"
+#include "trx0purge.h"
#include "ha_prototypes.h"
/** Dummy session used currently in MySQL interface */
@@ -604,36 +605,26 @@ trx_lists_init_at_db_start(void)
/******************************************************************//**
Assigns a rollback segment to a transaction in a round-robin fashion.
-Skips the SYSTEM rollback segment if another is available.
-@return assigned rollback segment id */
+@return assigned rollback segment instance */
UNIV_INLINE
-ulint
-trx_assign_rseg(void)
-/*=================*/
+trx_rseg_t*
+trx_assign_rseg(
+/*============*/
+ ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */
{
- trx_rseg_t* rseg = trx_sys->latest_rseg;
+ trx_rseg_t* rseg = trx_sys->latest_rseg;
ut_ad(mutex_own(&kernel_mutex));
-loop:
- /* Get next rseg in a round-robin fashion */
rseg = UT_LIST_GET_NEXT(rseg_list, rseg);
- if (rseg == NULL) {
+ if (rseg == NULL || rseg->id == max_undo_logs - 1) {
rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list);
}
- /* If it is the SYSTEM rollback segment, and there exist others, skip
- it */
-
- if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID)
- && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) {
- goto loop;
- }
-
trx_sys->latest_rseg = rseg;
- return(rseg->id);
+ return(rseg);
}
/****************************************************************//**
@@ -663,12 +654,9 @@ trx_start_low(
ut_ad(trx->conc_state != TRX_ACTIVE);
- if (rseg_id == ULINT_UNDEFINED) {
-
- rseg_id = trx_assign_rseg();
- }
+ ut_a(rseg_id == ULINT_UNDEFINED);
- rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id);
+ rseg = trx_assign_rseg(srv_rollback_segments);
trx->id = trx_sys_get_new_trx_id();
@@ -719,107 +707,179 @@ trx_start(
}
/****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Set the transaction serialisation number. */
+static
void
-trx_commit_off_kernel(
-/*==================*/
- trx_t* trx) /*!< in: transaction */
+trx_serialisation_number_get(
+/*=========================*/
+ trx_t* trx) /*!< in: transaction */
{
- page_t* update_hdr_page;
- ib_uint64_t lsn = 0;
trx_rseg_t* rseg;
- trx_undo_t* undo;
- mtr_t mtr;
- ut_ad(mutex_own(&kernel_mutex));
+ rseg = trx->rseg;
- trx->must_flush_log_later = FALSE;
+ ut_ad(mutex_own(&rseg->mutex));
- rseg = trx->rseg;
+ mutex_enter(&kernel_mutex);
- if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ trx->no = trx_sys_get_new_trx_id();
+
+ /* If the rollack segment is not empty then the
+ new trx_t::no can't be less than any trx_t::no
+ already in the rollback segment. User threads only
+ produce events when a rollback segment is empty. */
+
+ if (rseg->last_page_no == FIL_NULL) {
+ void* ptr;
+ rseg_queue_t rseg_queue;
+
+ rseg_queue.rseg = rseg;
+ rseg_queue.trx_no = trx->no;
+
+ mutex_enter(&purge_sys->bh_mutex);
+
+ /* This is to reduce the pressure on the kernel mutex,
+ though in reality it should make very little (read no)
+ difference because this code path is only taken when the
+ rbs is empty. */
mutex_exit(&kernel_mutex);
- mtr_start(&mtr);
+ ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+ ut_a(ptr);
- /* Change the undo log segment states from TRX_UNDO_ACTIVE
- to some other state: these modifications to the file data
- structure define the transaction as committed in the file
- based world, at the serialization point of the log sequence
- number lsn obtained below. */
+ mutex_exit(&purge_sys->bh_mutex);
+ } else {
+ mutex_exit(&kernel_mutex);
+ }
+}
- mutex_enter(&(rseg->mutex));
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment.
+@return the LSN of the UNDO log write. */
+static
+ib_uint64_t
+trx_write_serialisation_history(
+/*============================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ mtr_t mtr;
+ trx_rseg_t* rseg;
- if (trx->insert_undo != NULL) {
- trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
- }
+ ut_ad(!mutex_own(&kernel_mutex));
- undo = trx->update_undo;
+ rseg = trx->rseg;
- if (undo) {
- mutex_enter(&kernel_mutex);
- trx->no = trx_sys_get_new_trx_no();
- mutex_exit(&kernel_mutex);
+ mtr_start(&mtr);
- /* It is not necessary to obtain trx->undo_mutex here
- because only a single OS thread is allowed to do the
- transaction commit for this transaction. */
+ /* Change the undo log segment states from TRX_UNDO_ACTIVE
+ to some other state: these modifications to the file data
+ structure define the transaction as committed in the file
+ based domain, at the serialization point of the log sequence
+ number lsn obtained below. */
- update_hdr_page = trx_undo_set_state_at_finish(
- undo, &mtr);
+ if (trx->update_undo != NULL) {
+ page_t* undo_hdr_page;
+ trx_undo_t* undo = trx->update_undo;
- /* We have to do the cleanup for the update log while
- holding the rseg mutex because update log headers
- have to be put to the history list in the order of
- the trx number. */
+ /* We have to hold the rseg mutex because update
+ log headers have to be put to the history list in the
+ (serialisation) order of the UNDO trx number. This is
+ required for the purge in-memory data structures too. */
- trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
- }
+ mutex_enter(&rseg->mutex);
- mutex_exit(&(rseg->mutex));
+ /* Assign the transaction serialisation number and also
+ update the purge min binary heap if this is the first
+ UNDO log being written to the assigned rollback segment. */
- /* Update the latest MySQL binlog name and offset info
- in trx sys header if MySQL binlogging is on or the database
- server is a MySQL replication slave */
-
- if (trx->mysql_log_file_name
- && trx->mysql_log_file_name[0] != '\0') {
- trx_sys_update_mysql_binlog_offset(
- trx->mysql_log_file_name,
- trx->mysql_log_offset,
- TRX_SYS_MYSQL_LOG_INFO, &mtr);
- trx->mysql_log_file_name = NULL;
- }
+ trx_serialisation_number_get(trx);
- /* The following call commits the mini-transaction, making the
- whole transaction committed in the file-based world, at this
- log sequence number. The transaction becomes 'durable' when
- we write the log to disk, but in the logical sense the commit
- in the file-based data structures (undo logs etc.) happens
- here.
-
- NOTE that transaction numbers, which are assigned only to
- transactions with an update undo log, do not necessarily come
- in exactly the same order as commit lsn's, if the transactions
- have different rollback segments. To get exactly the same
- order we should hold the kernel mutex up to this point,
- adding to the contention of the kernel mutex. However, if
- a transaction T2 is able to see modifications made by
- a transaction T1, T2 will always get a bigger transaction
- number and a bigger commit lsn than T1. */
+ /* It is not necessary to obtain trx->undo_mutex here
+ because only a single OS thread is allowed to do the
+ transaction commit for this transaction. */
- /*--------------*/
- mtr_commit(&mtr);
- /*--------------*/
- lsn = mtr.end_lsn;
+ undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
+
+ trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+ } else {
+ mutex_enter(&rseg->mutex);
+ }
+
+ if (trx->insert_undo != NULL) {
+ trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+ }
+
+ mutex_exit(&rseg->mutex);
+
+ /* Update the latest MySQL binlog name and offset info
+ in trx sys header if MySQL binlogging is on or the database
+ server is a MySQL replication slave */
+
+ if (trx->mysql_log_file_name
+ && trx->mysql_log_file_name[0] != '\0') {
+
+ trx_sys_update_mysql_binlog_offset(
+ trx->mysql_log_file_name,
+ trx->mysql_log_offset,
+ TRX_SYS_MYSQL_LOG_INFO, &mtr);
+
+ trx->mysql_log_file_name = NULL;
+ }
+
+ /* The following call commits the mini-transaction, making the
+ whole transaction committed in the file-based world, at this
+ log sequence number. The transaction becomes 'durable' when
+ we write the log to disk, but in the logical sense the commit
+ in the file-based data structures (undo logs etc.) happens
+ here.
+
+ NOTE that transaction numbers, which are assigned only to
+ transactions with an update undo log, do not necessarily come
+ in exactly the same order as commit lsn's, if the transactions
+ have different rollback segments. To get exactly the same
+ order we should hold the kernel mutex up to this point,
+ adding to the contention of the kernel mutex. However, if
+ a transaction T2 is able to see modifications made by
+ a transaction T1, T2 will always get a bigger transaction
+ number and a bigger commit lsn than T1. */
+
+ /*--------------*/
+ mtr_commit(&mtr);
+ /*--------------*/
+
+ return(mtr.end_lsn);
+}
+
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit_off_kernel(
+/*==================*/
+ trx_t* trx) /*!< in: transaction */
+{
+ ib_uint64_t lsn;
+
+ ut_ad(mutex_own(&kernel_mutex));
+
+ trx->must_flush_log_later = FALSE;
+
+ /* If the transaction made any updates then we need to write the
+ UNDO logs for the updates to the assigned rollback segment. */
+
+ if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+ mutex_exit(&kernel_mutex);
+
+ lsn = trx_write_serialisation_history(trx);
mutex_enter(&kernel_mutex);
+ } else {
+ lsn = 0;
}
- ut_ad(trx->conc_state == TRX_ACTIVE
- || trx->conc_state == TRX_PREPARED);
+ ut_ad(trx->conc_state == TRX_ACTIVE || trx->conc_state == TRX_PREPARED);
ut_ad(mutex_own(&kernel_mutex));
/* The following assignment makes the transaction committed in memory
diff --git a/storage/innobase/ut/ut0bh.c b/storage/innobase/ut/ut0bh.c
new file mode 100644
index 00000000000..ae0b1aff207
--- /dev/null
+++ b/storage/innobase/ut/ut0bh.c
@@ -0,0 +1,164 @@
+/***************************************************************************//**
+Copyright (c) 2010, 2011, Oracle Corpn. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file ut/ut0bh.c
+Binary min-heap implementation.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h"
+
+#ifdef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#include <string.h>
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+ ib_bh_cmp_t compare, /*!< in: comparator */
+ ulint sizeof_elem, /*!< in: size of one element */
+ ulint max_elems) /*!< in: max elements allowed */
+{
+ ulint sz;
+ ib_bh_t* ib_bh;
+
+ sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
+
+ ib_bh = (ib_bh_t*) ut_malloc(sz);
+ memset(ib_bh, 0x0, sz);
+
+ ib_bh->compare = compare;
+ ib_bh->max_elems = max_elems;
+ ib_bh->sizeof_elem = sizeof_elem;
+
+ return(ib_bh);
+}
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+ ib_bh_t* ib_bh) /*!< in/own: instance */
+{
+ ut_free(ib_bh);
+}
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ const void* elem) /*!< in: element to add */
+{
+ void* ptr;
+
+ if (ib_bh_is_full(ib_bh)) {
+ return(NULL);
+ } else if (ib_bh_is_empty(ib_bh)) {
+ ++ib_bh->n_elems;
+ return(ib_bh_set(ib_bh, 0, elem));
+ } else {
+ ulint i;
+
+ i = ib_bh->n_elems;
+
+ ++ib_bh->n_elems;
+
+ for (ptr = ib_bh_get(ib_bh, i >> 1);
+ i > 0 && ib_bh->compare(ptr, elem) > 0;
+ i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
+
+ ib_bh_set(ib_bh, i, ptr);
+ }
+
+ ptr = ib_bh_set(ib_bh, i, elem);
+ }
+
+ return(ptr);
+}
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+ ib_bh_t* ib_bh) /*!< in/out: instance */
+{
+ byte* ptr;
+ byte* last;
+ ulint parent = 0;
+
+ if (ib_bh_is_empty(ib_bh)) {
+ return;
+ } else if (ib_bh_size(ib_bh) == 1) {
+ --ib_bh->n_elems;
+ return;
+ }
+
+ last = (byte*) ib_bh_last(ib_bh);
+
+ /* Start from the child node */
+ ptr = (byte*) ib_bh_get(ib_bh, 1);
+
+ while (ptr < last) {
+ /* If the "right" child node is < "left" child node */
+ if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
+ ptr += ib_bh->sizeof_elem;
+ }
+
+ if (ib_bh->compare(last, ptr) <= 0) {
+ break;
+ }
+
+ ib_bh_set(ib_bh, parent, ptr);
+
+ parent = (ptr - (byte*) ib_bh_first(ib_bh))
+ / ib_bh->sizeof_elem;
+
+ if ((parent << 1) >= ib_bh_size(ib_bh)) {
+ break;
+ }
+
+ ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
+ }
+
+ --ib_bh->n_elems;
+
+ ib_bh_set(ib_bh, parent, last);
+}
diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
index 4f4dfc5eed8..cd0894b132a 100644
--- a/storage/innobase/ut/ut0ut.c
+++ b/storage/innobase/ut/ut0ut.c
@@ -1,13 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software