summaryrefslogtreecommitdiff
path: root/storage/xtradb/include
diff options
context:
space:
mode:
authorSergei Golubchik <sergii@pisem.net>2011-07-18 23:04:24 +0200
committerSergei Golubchik <sergii@pisem.net>2011-07-18 23:04:24 +0200
commit4e46d8e5bff140f2549841167dc4b65a3c0a645d (patch)
treec6612dcc1d0fbd801c084e6c36307d9e5913a293 /storage/xtradb/include
parent9a02c69f5c6766eaf552284a2a4dd0f1d26ecd2c (diff)
parentd4d7a8fa62c406be73f6c0f6d75e795293db548b (diff)
downloadmariadb-git-4e46d8e5bff140f2549841167dc4b65a3c0a645d.tar.gz
merge with xtradb-5.5.15
fix test cases
Diffstat (limited to 'storage/xtradb/include')
-rw-r--r--storage/xtradb/include/btr0btr.h219
-rw-r--r--storage/xtradb/include/btr0btr.ic45
-rw-r--r--storage/xtradb/include/btr0cur.h168
-rw-r--r--storage/xtradb/include/btr0pcur.h40
-rw-r--r--storage/xtradb/include/btr0pcur.ic45
-rw-r--r--storage/xtradb/include/btr0sea.h51
-rw-r--r--storage/xtradb/include/btr0sea.ic73
-rw-r--r--storage/xtradb/include/btr0types.h125
-rw-r--r--storage/xtradb/include/buf0buddy.h29
-rw-r--r--storage/xtradb/include/buf0buddy.ic72
-rw-r--r--storage/xtradb/include/buf0buf.h538
-rw-r--r--storage/xtradb/include/buf0buf.ic383
-rw-r--r--storage/xtradb/include/buf0flu.h95
-rw-r--r--storage/xtradb/include/buf0flu.ic69
-rw-r--r--storage/xtradb/include/buf0lru.h59
-rw-r--r--storage/xtradb/include/buf0rea.h10
-rw-r--r--storage/xtradb/include/buf0types.h4
-rw-r--r--storage/xtradb/include/data0data.h13
-rw-r--r--storage/xtradb/include/data0data.ic26
-rw-r--r--storage/xtradb/include/data0type.h52
-rw-r--r--storage/xtradb/include/data0type.ic75
-rw-r--r--storage/xtradb/include/db0err.h15
-rw-r--r--storage/xtradb/include/dict0boot.h36
-rw-r--r--storage/xtradb/include/dict0boot.ic16
-rw-r--r--storage/xtradb/include/dict0dict.h119
-rw-r--r--storage/xtradb/include/dict0dict.ic112
-rw-r--r--storage/xtradb/include/dict0load.h238
-rw-r--r--storage/xtradb/include/dict0mem.h142
-rw-r--r--storage/xtradb/include/dict0mem.ic46
-rw-r--r--storage/xtradb/include/dict0types.h22
-rw-r--r--storage/xtradb/include/fil0fil.h31
-rw-r--r--storage/xtradb/include/fsp0types.h2
-rw-r--r--storage/xtradb/include/ha_prototypes.h33
-rw-r--r--storage/xtradb/include/handler0alter.h2
-rw-r--r--storage/xtradb/include/hash0hash.h49
-rw-r--r--storage/xtradb/include/ibuf0ibuf.h136
-rw-r--r--storage/xtradb/include/ibuf0ibuf.ic64
-rw-r--r--storage/xtradb/include/lock0lock.h18
-rw-r--r--storage/xtradb/include/lock0lock.ic6
-rw-r--r--storage/xtradb/include/log0log.h25
-rw-r--r--storage/xtradb/include/log0log.ic2
-rw-r--r--storage/xtradb/include/mach0data.h112
-rw-r--r--storage/xtradb/include/mach0data.ic229
-rw-r--r--storage/xtradb/include/mem0mem.h2
-rw-r--r--storage/xtradb/include/mem0mem.ic12
-rw-r--r--storage/xtradb/include/mtr0log.h22
-rw-r--r--storage/xtradb/include/mtr0log.ic12
-rw-r--r--storage/xtradb/include/mtr0mtr.h23
-rw-r--r--storage/xtradb/include/mtr0mtr.ic15
-rw-r--r--storage/xtradb/include/mysql_addons.h33
-rw-r--r--storage/xtradb/include/os0file.h532
-rw-r--r--storage/xtradb/include/os0file.ic422
-rw-r--r--storage/xtradb/include/os0proc.h28
-rw-r--r--storage/xtradb/include/os0sync.h88
-rw-r--r--storage/xtradb/include/os0sync.ic11
-rw-r--r--storage/xtradb/include/os0thread.h40
-rw-r--r--storage/xtradb/include/page0cur.h16
-rw-r--r--storage/xtradb/include/page0page.h29
-rw-r--r--storage/xtradb/include/page0page.ic28
-rw-r--r--storage/xtradb/include/page0zip.h2
-rw-r--r--storage/xtradb/include/pars0pars.h10
-rw-r--r--storage/xtradb/include/que0que.h3
-rw-r--r--storage/xtradb/include/read0read.h11
-rw-r--r--storage/xtradb/include/read0read.ic14
-rw-r--r--storage/xtradb/include/rem0cmp.h7
-rw-r--r--storage/xtradb/include/rem0cmp.ic2
-rw-r--r--storage/xtradb/include/rem0rec.h33
-rw-r--r--storage/xtradb/include/rem0rec.ic55
-rw-r--r--storage/xtradb/include/rem0types.h20
-rw-r--r--storage/xtradb/include/row0ext.h13
-rw-r--r--storage/xtradb/include/row0ext.ic9
-rw-r--r--storage/xtradb/include/row0ins.h5
-rw-r--r--storage/xtradb/include/row0merge.h2
-rw-r--r--storage/xtradb/include/row0mysql.h27
-rw-r--r--storage/xtradb/include/row0purge.h22
-rw-r--r--storage/xtradb/include/row0row.h50
-rw-r--r--storage/xtradb/include/row0row.ic36
-rw-r--r--storage/xtradb/include/row0types.h2
-rw-r--r--storage/xtradb/include/row0upd.h47
-rw-r--r--storage/xtradb/include/row0upd.ic2
-rw-r--r--storage/xtradb/include/srv0que.h42
-rw-r--r--storage/xtradb/include/srv0srv.h272
-rw-r--r--storage/xtradb/include/srv0start.h7
-rw-r--r--storage/xtradb/include/sync0arr.h7
-rw-r--r--storage/xtradb/include/sync0rw.h366
-rw-r--r--storage/xtradb/include/sync0rw.ic257
-rw-r--r--storage/xtradb/include/sync0sync.h314
-rw-r--r--storage/xtradb/include/sync0sync.ic147
-rw-r--r--storage/xtradb/include/thr0loc.h101
-rw-r--r--storage/xtradb/include/thr0loc.ic24
-rw-r--r--storage/xtradb/include/trx0i_s.h89
-rw-r--r--storage/xtradb/include/trx0purge.h52
-rw-r--r--storage/xtradb/include/trx0rec.h4
-rw-r--r--storage/xtradb/include/trx0rec.ic4
-rw-r--r--storage/xtradb/include/trx0rseg.h40
-rw-r--r--storage/xtradb/include/trx0sys.h49
-rw-r--r--storage/xtradb/include/trx0sys.ic27
-rw-r--r--storage/xtradb/include/trx0trx.h63
-rw-r--r--storage/xtradb/include/trx0trx.ic12
-rw-r--r--storage/xtradb/include/trx0types.h13
-rw-r--r--storage/xtradb/include/trx0undo.h13
-rw-r--r--storage/xtradb/include/trx0undo.ic43
-rw-r--r--storage/xtradb/include/univ.i123
-rw-r--r--storage/xtradb/include/ut0auxconf.h14
-rw-r--r--storage/xtradb/include/ut0bh.h152
-rw-r--r--storage/xtradb/include/ut0bh.ic125
-rw-r--r--storage/xtradb/include/ut0byte.h169
-rw-r--r--storage/xtradb/include/ut0byte.ic254
-rw-r--r--storage/xtradb/include/ut0dbg.h55
-rw-r--r--storage/xtradb/include/ut0lst.h47
-rw-r--r--storage/xtradb/include/ut0mem.h2
-rw-r--r--storage/xtradb/include/ut0rbt.h169
-rw-r--r--storage/xtradb/include/ut0rnd.h8
-rw-r--r--storage/xtradb/include/ut0rnd.ic15
-rw-r--r--storage/xtradb/include/ut0ut.h43
-rw-r--r--storage/xtradb/include/ut0vec.h19
-rw-r--r--storage/xtradb/include/ut0vec.ic29
117 files changed, 5963 insertions, 2538 deletions
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h
index 5e6a76c7d21..c632e034581 100644
--- a/storage/xtradb/include/btr0btr.h
+++ b/storage/xtradb/include/btr0btr.h
@@ -68,19 +68,115 @@ enum btr_latch_mode {
BTR_MODIFY_PREV = 36
};
+/* BTR_INSERT, BTR_DELETE and BTR_DELETE_MARK are mutually exclusive. */
+
/** If this is ORed to btr_latch_mode, it means that the search tuple
-will be inserted to the index, at the searched position */
+will be inserted to the index, at the searched position.
+When the record is not in the buffer pool, try to use the insert buffer. */
#define BTR_INSERT 512
/** This flag ORed to btr_latch_mode says that we do the search in query
optimization */
#define BTR_ESTIMATE 1024
-/** This flag ORed to btr_latch_mode says that we can ignore possible
+/** This flag ORed to BTR_INSERT says that we can ignore possible
UNIQUE definition on secondary indexes when we decide if we can use
the insert buffer to speed up inserts */
#define BTR_IGNORE_SEC_UNIQUE 2048
+/** Try to delete mark the record at the searched position using the
+insert/delete buffer when the record is not in the buffer pool. */
+#define BTR_DELETE_MARK 4096
+
+/** Try to purge the record at the searched position using the insert/delete
+buffer when the record is not in the buffer pool. */
+#define BTR_DELETE 8192
+
+#ifdef UNIV_BLOB_DEBUG
+# include "ut0rbt.h"
+/** An index->blobs entry for keeping track of off-page column references */
+struct btr_blob_dbg_struct
+{
+ unsigned blob_page_no:32; /*!< first BLOB page number */
+ unsigned ref_page_no:32; /*!< referring page number */
+ unsigned ref_heap_no:16; /*!< referring heap number */
+ unsigned ref_field_no:10; /*!< referring field number */
+ unsigned owner:1; /*!< TRUE if BLOB owner */
+ unsigned always_owner:1; /*!< TRUE if always
+ has been the BLOB owner;
+ reset to TRUE on B-tree
+ page splits and merges */
+ unsigned del:1; /*!< TRUE if currently
+ delete-marked */
+};
+
+/**************************************************************//**
+Add a reference to an off-page column to the index->blobs map. */
+UNIV_INTERN
+void
+btr_blob_dbg_add_blob(
+/*==================*/
+ const rec_t* rec, /*!< in: clustered index record */
+ ulint field_no, /*!< in: number of off-page column */
+ ulint page_no, /*!< in: start page of the column */
+ dict_index_t* index, /*!< in/out: index tree */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+/**************************************************************//**
+Display the references to off-page columns.
+This function is to be called from a debugger,
+for example when a breakpoint on ut_dbg_assertion_failed is hit. */
+UNIV_INTERN
+void
+btr_blob_dbg_print(
+/*===============*/
+ const dict_index_t* index) /*!< in: index tree */
+ __attribute__((nonnull));
+/**************************************************************//**
+Check that there are no references to off-page columns from or to
+the given page. Invoked when freeing or clearing a page.
+@return TRUE when no orphan references exist */
+UNIV_INTERN
+ibool
+btr_blob_dbg_is_empty(
+/*==================*/
+ dict_index_t* index, /*!< in: index */
+ ulint page_no) /*!< in: page number */
+ __attribute__((nonnull, warn_unused_result));
+
+/**************************************************************//**
+Modify the 'deleted' flag of a record. */
+UNIV_INTERN
+void
+btr_blob_dbg_set_deleted_flag(
+/*==========================*/
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in/out: index */
+ const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
+ ibool del) /*!< in: TRUE=deleted, FALSE=exists */
+ __attribute__((nonnull));
+/**************************************************************//**
+Change the ownership of an off-page column. */
+UNIV_INTERN
+void
+btr_blob_dbg_owner(
+/*===============*/
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in/out: index */
+ const ulint* offsets,/*!< in: rec_get_offs(rec, index) */
+ ulint i, /*!< in: ith field in rec */
+ ibool own) /*!< in: TRUE=owned, FALSE=disowned */
+ __attribute__((nonnull));
+/** Assert that there are no BLOB references to or from the given page. */
+# define btr_blob_dbg_assert_empty(index, page_no) \
+ ut_a(btr_blob_dbg_is_empty(index, page_no))
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_blob(rec, field_no, page, index, ctx) ((void) 0)
+# define btr_blob_dbg_set_deleted_flag(rec, index, offsets, del)((void) 0)
+# define btr_blob_dbg_owner(rec, index, offsets, i, val) ((void) 0)
+# define btr_blob_dbg_assert_empty(index, page_no) ((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
/**************************************************************//**
Gets the root node of a tree and x-latches it.
@return root page, x-latched */
@@ -94,32 +190,52 @@ btr_root_get(
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
-btr_block_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr); /*!< in: mtr */
+btr_block_get_func(
+/*===============*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in/out: mtr */
+ __attribute__((nonnull));
+/** Gets a buffer page and declares its latching order level.
+@param space tablespace identifier
+@param zip_size compressed page size in bytes or 0 for uncompressed pages
+@param page_no page number
+@param mode latch mode
+@param mtr mini-transaction handle
+@return the block descriptor */
+# define btr_block_get(space,zip_size,page_no,mode,mtr) \
+ btr_block_get_func(space,zip_size,page_no,mode,__FILE__,__LINE__,mtr)
+/** Gets a buffer page and declares its latching order level.
+@param space tablespace identifier
+@param zip_size compressed page size in bytes or 0 for uncompressed pages
+@param page_no page number
+@param mode latch mode
+@param mtr mini-transaction handle
+@return the uncompressed page frame */
+# define btr_page_get(space,zip_size,page_no,mode,mtr) \
+ buf_block_get_frame(btr_block_get(space,zip_size,page_no,mode,mtr))
/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
+Sets the index id field of a page. */
UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr); /*!< in: mtr */
+void
+btr_page_set_index_id(
+/*==================*/
+ page_t* page, /*!< in: page to be created */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ index_id_t id, /*!< in: index id */
+ mtr_t* mtr); /*!< in: mtr */
#endif /* !UNIV_HOTBACKUP */
/**************************************************************//**
Gets the index id field of a page.
@return index id */
UNIV_INLINE
-dulint
+index_id_t
btr_page_get_index_id(
/*==================*/
const page_t* page); /*!< in: index page */
@@ -151,6 +267,17 @@ btr_page_get_next(
const page_t* page, /*!< in: index page */
mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
+Sets the next index page field. */
+UNIV_INLINE
+void
+btr_page_set_next(
+/*==============*/
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ ulint next, /*!< in: next page number */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
Gets the previous index page number.
@return prev page number */
UNIV_INLINE
@@ -159,6 +286,17 @@ btr_page_get_prev(
/*==============*/
const page_t* page, /*!< in: index page */
mtr_t* mtr); /*!< in: mini-transaction handle */
+/********************************************************//**
+Sets the previous index page field. */
+UNIV_INLINE
+void
+btr_page_set_prev(
+/*==============*/
+ page_t* page, /*!< in: index page */
+ page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
+ part will be updated, or NULL */
+ ulint prev, /*!< in: previous page number */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
/*************************************************************//**
Gets pointer to the previous user record in the tree. It is assumed
that the caller has appropriate latches on the page and its neighbor.
@@ -204,6 +342,18 @@ btr_node_ptr_get_child_page_no(
/*===========================*/
const rec_t* rec, /*!< in: node pointer record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+/**************************************************************//**
+Creates a new index page (not the root, and also not
+used in page reorganization). @see btr_page_empty(). */
+UNIV_INTERN
+void
+btr_page_create(
+/*============*/
+ buf_block_t* block, /*!< in/out: page to be created */
+ page_zip_des_t* page_zip,/*!< in/out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr); /*!< in: mtr */
/************************************************************//**
Creates the root node for a new index tree.
@return page number of the created root, FIL_NULL if did not succeed */
@@ -215,7 +365,7 @@ btr_create(
ulint space, /*!< in: space where created */
ulint zip_size,/*!< in: compressed page size in bytes
or 0 for uncompressed pages */
- dulint index_id,/*!< in: index id */
+ index_id_t index_id,/*!< in: index id */
dict_index_t* index, /*!< in: index */
mtr_t* mtr); /*!< in: mini-transaction handle */
/************************************************************//**
@@ -274,6 +424,17 @@ btr_page_reorganize(
dict_index_t* index, /*!< in: record descriptor */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
+Empties an index page. @see btr_page_create(). */
+UNIV_INTERN
+void
+btr_page_empty(
+/*===========*/
+ buf_block_t* block, /*!< in: page to be emptied */
+ page_zip_des_t* page_zip,/*!< out: compressed page, or NULL */
+ dict_index_t* index, /*!< in: index of the page */
+ ulint level, /*!< in: the B-tree level of the page */
+ mtr_t* mtr); /*!< in: mtr */
+/*************************************************************//**
Decides if the page should be split at the convergence point of
inserts converging to left.
@return TRUE if split recommended */
@@ -332,6 +493,20 @@ btr_insert_on_non_leaf_level_func(
# define btr_insert_on_non_leaf_level(i,l,t,m) \
btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
#endif /* !UNIV_HOTBACKUP */
+/**************************************************************//**
+Attaches the halves of an index page on the appropriate level in an
+index tree. */
+UNIV_INTERN
+void
+btr_attach_half_pages(
+/*==================*/
+ dict_index_t* index, /*!< in: the index tree */
+ buf_block_t* block, /*!< in/out: page to be split */
+ rec_t* split_rec, /*!< in: first record on upper
+ half page */
+ buf_block_t* new_block, /*!< in/out: the new half page */
+ ulint direction, /*!< in: FSP_UP or FSP_DOWN */
+ mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
Sets a record as the predefined minimum record. */
UNIV_INTERN
diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic
index c9c38f3c3b3..99c02ab30dc 100644
--- a/storage/xtradb/include/btr0btr.ic
+++ b/storage/xtradb/include/btr0btr.ic
@@ -39,18 +39,21 @@ Created 6/2/1994 Heikki Tuuri
Gets a buffer page and declares its latching order level. */
UNIV_INLINE
buf_block_t*
-btr_block_get(
-/*==========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr) /*!< in: mtr */
+btr_block_get_func(
+/*===============*/
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes
+ or 0 for uncompressed pages */
+ ulint page_no, /*!< in: page number */
+ ulint mode, /*!< in: latch mode */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in/out: mtr */
{
buf_block_t* block;
- block = buf_page_get(space, zip_size, page_no, mode, mtr);
+ block = buf_page_get_gen(space, zip_size, page_no, mode,
+ NULL, BUF_GET, file, line, mtr);
ut_a(srv_pass_corrupt_table || block);
@@ -63,23 +66,6 @@ btr_block_get(
}
/**************************************************************//**
-Gets a buffer page and declares its latching order level. */
-UNIV_INLINE
-page_t*
-btr_page_get(
-/*=========*/
- ulint space, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes
- or 0 for uncompressed pages */
- ulint page_no, /*!< in: page number */
- ulint mode, /*!< in: latch mode */
- mtr_t* mtr) /*!< in: mtr */
-{
- return(buf_block_get_frame(btr_block_get(space, zip_size, page_no,
- mode, mtr)));
-}
-
-/**************************************************************//**
Sets the index id field of a page. */
UNIV_INLINE
void
@@ -88,7 +74,7 @@ btr_page_set_index_id(
page_t* page, /*!< in: page to be created */
page_zip_des_t* page_zip,/*!< in: compressed page whose uncompressed
part will be updated, or NULL */
- dulint id, /*!< in: index id */
+ index_id_t id, /*!< in: index id */
mtr_t* mtr) /*!< in: mtr */
{
if (UNIV_LIKELY_NULL(page_zip)) {
@@ -97,8 +83,7 @@ btr_page_set_index_id(
page + (PAGE_HEADER + PAGE_INDEX_ID),
8, mtr);
} else {
- mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID),
- id, mtr);
+ mlog_write_ull(page + (PAGE_HEADER + PAGE_INDEX_ID), id, mtr);
}
}
#endif /* !UNIV_HOTBACKUP */
@@ -107,7 +92,7 @@ btr_page_set_index_id(
Gets the index id field of a page.
@return index id */
UNIV_INLINE
-dulint
+index_id_t
btr_page_get_index_id(
/*==================*/
const page_t* page) /*!< in: index page */
diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h
index e151fdcb563..be918439f59 100644
--- a/storage/xtradb/include/btr0cur.h
+++ b/storage/xtradb/include/btr0cur.h
@@ -138,7 +138,8 @@ btr_cur_search_to_nth_level(
should always be made using PAGE_CUR_LE to
search the position! */
ulint latch_mode, /*!< in: BTR_SEARCH_LEAF, ..., ORed with
- BTR_INSERT and BTR_ESTIMATE;
+ at most one of BTR_INSERT, BTR_DELETE_MARK,
+ BTR_DELETE, or BTR_ESTIMATE;
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
@@ -242,6 +243,22 @@ btr_cur_pessimistic_insert(
que_thr_t* thr, /*!< in: query thread or NULL */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
+See if there is enough place in the page modification log to log
+an update-in-place.
+@return TRUE if enough place */
+UNIV_INTERN
+ibool
+btr_cur_update_alloc_zip(
+/*=====================*/
+ page_zip_des_t* page_zip,/*!< in/out: compressed page */
+ buf_block_t* block, /*!< in/out: buffer page */
+ dict_index_t* index, /*!< in: the index corresponding to the block */
+ ulint length, /*!< in: size needed */
+ ibool create, /*!< in: TRUE=delete-and-insert,
+ FALSE=update-in-place */
+ mtr_t* mtr) /*!< in: mini-transaction */
+ __attribute__((nonnull, warn_unused_result));
+/*************************************************************//**
Updates a record when the update causes no size changes in its fields.
@return DB_SUCCESS or error number */
UNIV_INTERN
@@ -316,10 +333,14 @@ ulint
btr_cur_del_mark_set_clust_rec(
/*===========================*/
ulint flags, /*!< in: undo logging and locking flags */
- btr_cur_t* cursor, /*!< in: cursor */
+ buf_block_t* block, /*!< in/out: buffer block of the record */
+ rec_t* rec, /*!< in/out: record */
+ dict_index_t* index, /*!< in: clustered index of the record */
+ const ulint* offsets,/*!< in: rec_get_offsets(rec) */
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
- mtr_t* mtr); /*!< in: mtr */
+ mtr_t* mtr) /*!< in: mtr */
+ __attribute__((nonnull));
/***********************************************************//**
Sets a secondary index record delete mark to TRUE or FALSE.
@return DB_SUCCESS, DB_LOCK_WAIT, or error number */
@@ -332,19 +353,6 @@ btr_cur_del_mark_set_sec_rec(
ibool val, /*!< in: value to set */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
-/***********************************************************//**
-Clear a secondary index record's delete mark. This function is only
-used by the insert buffer insert merge mechanism. */
-UNIV_INTERN
-void
-btr_cur_del_unmark_for_ibuf(
-/*========================*/
- rec_t* rec, /*!< in/out: record to delete unmark */
- page_zip_des_t* page_zip, /*!< in/out: compressed page
- corresponding to rec, or NULL
- when the tablespace is
- uncompressed */
- mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
Tries to compress a page of the tree if it seems useful. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@@ -458,57 +466,42 @@ btr_estimate_n_rows_in_range(
/*******************************************************************//**
Estimates the number of different key values in a given index, for
each n-column prefix of the index where n <= dict_index_get_n_unique(index).
-The estimates are stored in the array index->stat_n_diff_key_vals. */
+The estimates are stored in the array index->stat_n_diff_key_vals.
+If innodb_stats_method is nulls_ignored, we also record the number of
+non-null values for each prefix and stored the estimates in
+array index->stat_n_non_null_key_vals. */
UNIV_INTERN
void
btr_estimate_number_of_different_key_vals(
/*======================================*/
dict_index_t* index); /*!< in: index */
/*******************************************************************//**
-Marks not updated extern fields as not-owned by this record. The ownership
-is transferred to the updated record which is inserted elsewhere in the
+Marks non-updated off-page fields as disowned by this record. The ownership
+must be transferred to the updated record which is inserted elsewhere in the
index tree. In purge only the owner of externally stored field is allowed
-to free the field.
-@return TRUE if BLOB ownership was transferred */
+to free the field. */
UNIV_INTERN
-ibool
-btr_cur_mark_extern_inherited_fields(
-/*=================================*/
+void
+btr_cur_disown_inherited_fields(
+/*============================*/
page_zip_des_t* page_zip,/*!< in/out: compressed page whose uncompressed
part will be updated, or NULL */
rec_t* rec, /*!< in/out: record in a clustered index */
dict_index_t* index, /*!< in: index of the page */
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
const upd_t* update, /*!< in: update vector */
- mtr_t* mtr); /*!< in: mtr, or NULL if not logged */
-/*******************************************************************//**
-The complement of the previous function: in an update entry may inherit
-some externally stored fields from a record. We must mark them as inherited
-in entry, so that they are not freed in a rollback. */
-UNIV_INTERN
-void
-btr_cur_mark_dtuple_inherited_extern(
-/*=================================*/
- dtuple_t* entry, /*!< in/out: updated entry to be
- inserted to clustered index */
- const upd_t* update); /*!< in: update vector */
-/*******************************************************************//**
-Marks all extern fields in a dtuple as owned by the record. */
-UNIV_INTERN
-void
-btr_cur_unmark_dtuple_extern_fields(
-/*================================*/
- dtuple_t* entry); /*!< in/out: clustered index entry */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull(2,3,4,5,6)));
/*******************************************************************//**
Stores the fields in big_rec_vec to the tablespace and puts pointers to
them in rec. The extern flags in rec will have to be set beforehand.
The fields are stored on pages allocated from leaf node
file segment of the index tree.
-@return DB_SUCCESS or error */
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
UNIV_INTERN
ulint
-btr_store_big_rec_extern_fields(
-/*============================*/
+btr_store_big_rec_extern_fields_func(
+/*=================================*/
dict_index_t* index, /*!< in: index of rec; the index tree
MUST be X-latched */
buf_block_t* rec_block, /*!< in/out: block containing rec */
@@ -517,10 +510,42 @@ btr_store_big_rec_extern_fields(
the "external storage" flags in offsets
will not correspond to rec when
this function returns */
- big_rec_t* big_rec_vec, /*!< in: vector containing fields
+#ifdef UNIV_DEBUG
+ mtr_t* local_mtr, /*!< in: mtr containing the
+ latch to rec and to the tree */
+#endif /* UNIV_DEBUG */
+#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
+ ibool update_in_place,/*! in: TRUE if the record is updated
+ in place (not delete+insert) */
+#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+ const big_rec_t*big_rec_vec) /*!< in: vector containing fields
to be stored externally */
- mtr_t* local_mtr); /*!< in: mtr containing the latch to
- rec and to the tree */
+ __attribute__((nonnull));
+
+/** Stores the fields in big_rec_vec to the tablespace and puts pointers to
+them in rec. The extern flags in rec will have to be set beforehand.
+The fields are stored on pages allocated from leaf node
+file segment of the index tree.
+@param index in: clustered index; MUST be X-latched by mtr
+@param b in/out: block containing rec; MUST be X-latched by mtr
+@param rec in/out: clustered index record
+@param offsets in: rec_get_offsets(rec, index);
+ the "external storage" flags in offsets will not be adjusted
+@param mtr in: mini-transaction that holds x-latch on index and b
+@param upd in: TRUE if the record is updated in place (not delete+insert)
+@param big in: vector containing fields to be stored externally
+@return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
+#ifdef UNIV_DEBUG
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,mtr,upd,big)
+#elif defined UNIV_BLOB_LIGHT_DEBUG
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,upd,big)
+#else
+# define btr_store_big_rec_extern_fields(index,b,rec,offsets,mtr,upd,big) \
+ btr_store_big_rec_extern_fields_func(index,b,rec,offsets,big)
+#endif
+
/*******************************************************************//**
Frees the space in an externally stored field to the file space
management if the field in data is owned the externally stored field,
@@ -597,7 +622,20 @@ btr_push_update_extern_fields(
const upd_t* update, /*!< in: update vector */
mem_heap_t* heap) /*!< in: memory heap */
__attribute__((nonnull));
-
+/***********************************************************//**
+Sets a secondary index record's delete mark to the given value. This
+function is only used by the insert buffer merge mechanism. */
+UNIV_INTERN
+void
+btr_cur_set_deleted_flag_for_ibuf(
+/*==============================*/
+ rec_t* rec, /*!< in/out: record */
+ page_zip_des_t* page_zip, /*!< in/out: compressed page
+ corresponding to rec, or NULL
+ when the tablespace is
+ uncompressed */
+ ibool val, /*!< in: value to set */
+ mtr_t* mtr); /*!< in: mtr */
/*######################################################################*/
/** In the pessimistic delete, if the page data size drops below this
@@ -615,6 +653,11 @@ struct btr_path_struct{
order); value ULINT_UNDEFINED
denotes array end */
ulint n_recs; /*!< number of records on the page */
+ ulint page_no; /*!< no of the page containing the record */
+ ulint page_level; /*!< level of the page, if later we fetch
+ the page under page_no and it is no different
+ level then we know that the tree has been
+ reorganized */
};
#define BTR_PATH_ARRAY_N_SLOTS 250 /*!< size of path array (in slots) */
@@ -629,8 +672,13 @@ enum btr_cur_method {
hash_node, and might be necessary to
update */
BTR_CUR_BINARY, /*!< success using the binary search */
- BTR_CUR_INSERT_TO_IBUF /*!< performed the intended insert to
+ BTR_CUR_INSERT_TO_IBUF, /*!< performed the intended insert to
the insert buffer */
+ BTR_CUR_DEL_MARK_IBUF, /*!< performed the intended delete
+ mark in the insert/delete buffer */
+ BTR_CUR_DELETE_IBUF, /*!< performed the intended delete in
+ the insert/delete buffer */
+ BTR_CUR_DELETE_REF /*!< row_purge_poss_sec() failed */
};
/** The tree cursor: the definition appears here only for the compiler
@@ -638,6 +686,7 @@ to know struct size! */
struct btr_cur_struct {
dict_index_t* index; /*!< index where positioned */
page_cur_t page_cur; /*!< page cursor */
+ purge_node_t* purge_node; /*!< purge node, for BTR_DELETE */
buf_block_t* left_block; /*!< this field is used to store
a pointer to the left neighbor
page, in the cases
@@ -694,6 +743,23 @@ struct btr_cur_struct {
NULL */
ulint fold; /*!< fold value used in the search if
flag is BTR_CUR_HASH */
+ /*----- Delete buffering -------*/
+ ulint ibuf_cnt; /* in searches done on insert buffer
+ trees, this contains the "counter"
+ value (the first two bytes of the
+ fourth field) extracted from the
+ page above the leaf page, from the
+ father node pointer that pointed to
+ the leaf page. in other words, it
+ contains the minimum counter value
+ for records to be inserted on the
+ chosen leaf page. If for some reason
+ this can't be read, or if the search
+ ended on the leftmost leaf page in
+ the tree (in which case the father
+ node pointer had the 'minimum
+ record' flag set), this is
+ ULINT_UNDEFINED. */
/*------------------------------*/
/* @} */
btr_path_t* path_arr; /*!< in estimating the number of
diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h
index 2334a266280..6c11c973fc9 100644
--- a/storage/xtradb/include/btr0pcur.h
+++ b/storage/xtradb/include/btr0pcur.h
@@ -150,7 +150,7 @@ UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
- btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
+ const btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
Gets the low_match value for a pcur after a search.
@return number of matched fields at the cursor or to the right if
@@ -159,7 +159,7 @@ UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
- btr_pcur_t* cursor); /*!< in: memory buffer for persistent cursor */
+ const btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first
user record satisfying the search condition, in the case PAGE_CUR_L or
@@ -264,22 +264,6 @@ ulint
btr_pcur_get_rel_pos(
/*=================*/
const btr_pcur_t* cursor);/*!< in: persistent cursor */
-/*********************************************************//**
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in, own: mtr */
-/*********************************************************//**
-Gets the mtr field for a pcur.
-@return mtr */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- btr_pcur_t* cursor); /*!< in: persistent cursor */
/**************************************************************//**
Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
that is, the cursor becomes detached. If there have been modifications
@@ -387,10 +371,6 @@ page_cur_t*
btr_pcur_get_page_cur(
/*==================*/
const btr_pcur_t* cursor); /*!< in: persistent cursor */
-#else /* UNIV_DEBUG */
-# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
-# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
-#endif /* UNIV_DEBUG */
/*********************************************************//**
Returns the page of a persistent cursor.
@return pointer to the page */
@@ -398,7 +378,7 @@ UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the buffer block of a persistent cursor.
@return pointer to the block */
@@ -406,7 +386,7 @@ UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
/*********************************************************//**
Returns the record of a persistent cursor.
@return pointer to the record */
@@ -414,7 +394,14 @@ UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
- btr_pcur_t* cursor);/*!< in: persistent cursor */
+ const btr_pcur_t* cursor);/*!< in: persistent cursor */
+#else /* UNIV_DEBUG */
+# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur)
+# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur)
+# define btr_pcur_get_page(cursor) ((cursor)->btr_cur.page_cur.block->frame)
+# define btr_pcur_get_block(cursor) ((cursor)->btr_cur.page_cur.block)
+# define btr_pcur_get_rec(cursor) ((cursor)->btr_cur.page_cur.rec)
+#endif /* UNIV_DEBUG */
/*********************************************************//**
Checks if the persistent cursor is on a user record. */
UNIV_INLINE
@@ -517,9 +504,6 @@ struct btr_pcur_struct{
/* NOTE that the following fields may possess dynamically allocated
memory which should be freed if not needed anymore! */
- mtr_t* mtr; /*!< NULL, or this field may contain
- a mini-transaction which holds the
- latch on the cursor page */
byte* old_rec_buf; /*!< NULL, or a dynamically allocated
buffer for old_rec */
ulint buf_size; /*!< old_rec_buf size if old_rec_buf
diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic
index 0c38797e6c5..59fdb21824b 100644
--- a/storage/xtradb/include/btr0pcur.ic
+++ b/storage/xtradb/include/btr0pcur.ic
@@ -42,34 +42,6 @@ btr_pcur_get_rel_pos(
return(cursor->rel_pos);
}
-/*********************************************************//**
-Sets the mtr field for a pcur. */
-UNIV_INLINE
-void
-btr_pcur_set_mtr(
-/*=============*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in, own: mtr */
-{
- ut_ad(cursor);
-
- cursor->mtr = mtr;
-}
-
-/*********************************************************//**
-Gets the mtr field for a pcur.
-@return mtr */
-UNIV_INLINE
-mtr_t*
-btr_pcur_get_mtr(
-/*=============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
-{
- ut_ad(cursor);
-
- return(cursor->mtr);
-}
-
#ifdef UNIV_DEBUG
/*********************************************************//**
Returns the btr cursor component of a persistent cursor.
@@ -95,7 +67,7 @@ btr_pcur_get_page_cur(
{
return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor)));
}
-#endif /* UNIV_DEBUG */
+
/*********************************************************//**
Returns the page of a persistent cursor.
@return pointer to the page */
@@ -103,7 +75,7 @@ UNIV_INLINE
page_t*
btr_pcur_get_page(
/*==============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -117,7 +89,7 @@ UNIV_INLINE
buf_block_t*
btr_pcur_get_block(
/*===============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
@@ -131,13 +103,14 @@ UNIV_INLINE
rec_t*
btr_pcur_get_rec(
/*=============*/
- btr_pcur_t* cursor) /*!< in: persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor)));
}
+#endif /* UNIV_DEBUG */
/**************************************************************//**
Gets the up_match value for a pcur after a search.
@@ -147,9 +120,9 @@ UNIV_INLINE
ulint
btr_pcur_get_up_match(
/*==================*/
- btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- btr_cur_t* btr_cursor;
+ const btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
@@ -169,9 +142,9 @@ UNIV_INLINE
ulint
btr_pcur_get_low_match(
/*===================*/
- btr_pcur_t* cursor) /*!< in: memory buffer for persistent cursor */
+ const btr_pcur_t* cursor) /*!< in: persistent cursor */
{
- btr_cur_t* btr_cursor;
+ const btr_cur_t* btr_cursor;
ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED)
|| (cursor->pos_state == BTR_PCUR_IS_POSITIONED));
diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h
index f6d194319ae..9e82602934e 100644
--- a/storage/xtradb/include/btr0sea.h
+++ b/storage/xtradb/include/btr0sea.h
@@ -85,7 +85,8 @@ UNIV_INTERN
ulint
btr_search_info_get_ref_count(
/*==========================*/
- btr_search_t* info); /*!< in: search info. */
+ btr_search_t* info, /*!< in: search info. */
+ index_id_t key);
/*********************************************************************//**
Updates the search info. */
UNIV_INLINE
@@ -136,10 +137,11 @@ UNIV_INTERN
void
btr_search_drop_page_hash_index(
/*============================*/
- buf_block_t* block); /*!< in: block containing index page,
+ buf_block_t* block, /*!< in: block containing index page,
s- or x-latched, or an index page
for which we know that
block->buf_fix_count == 0 */
+ dict_index_t* index_in);
/************************************************************************
Drops a page hash index based on index */
UNIV_INTERN
@@ -199,10 +201,47 @@ btr_search_validate(void);
# define btr_search_validate() TRUE
#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */
+/********************************************************************//**
+New functions to control split btr_search_index */
+UNIV_INLINE
+hash_table_t*
+btr_search_get_hash_index(
+/*======================*/
+ index_id_t key);
+
+UNIV_INLINE
+rw_lock_t*
+btr_search_get_latch(
+/*=================*/
+ index_id_t key);
+
+UNIV_INLINE
+void
+btr_search_x_lock_all(void);
+/*========================*/
+
+UNIV_INLINE
+void
+btr_search_x_unlock_all(void);
+/*==========================*/
+
+UNIV_INLINE
+void
+btr_search_s_lock_all(void);
+/*========================*/
+
+UNIV_INLINE
+void
+btr_search_s_unlock_all(void);
+/*==========================*/
+
+
/** Flag: has the search system been enabled?
Protected by btr_search_latch and btr_search_enabled_mutex. */
extern char btr_search_enabled;
+extern ulint btr_search_index_num;
+
/** Flag: whether the search system has completed its disabling process,
It is set to TRUE right after buf_pool_drop_hash_index() in
btr_search_disable(), indicating hash index entries are cleaned up.
@@ -269,7 +308,7 @@ typedef struct btr_search_sys_struct btr_search_sys_t;
/** The hash index system */
struct btr_search_sys_struct{
- hash_table_t* hash_index; /*!< the adaptive hash index,
+ hash_table_t** hash_index; /*!< the adaptive hash index,
mapping dtuple_fold values
to rec_t pointers on index pages */
};
@@ -290,10 +329,12 @@ but does NOT protect:
Bear in mind (3) and (4) when using the hash index.
*/
-extern rw_lock_t* btr_search_latch_temp;
+//extern rw_lock_t* btr_search_latch_temp;
+
+extern rw_lock_t** btr_search_latch_part;
/** The latch protecting the adaptive search system */
-#define btr_search_latch (*btr_search_latch_temp)
+//#define btr_search_latch (*btr_search_latch_temp)
#ifdef UNIV_SEARCH_PERF_STAT
/** Number of successful adaptive hash index lookups */
diff --git a/storage/xtradb/include/btr0sea.ic b/storage/xtradb/include/btr0sea.ic
index beadeeb8d02..e5a8e1190e3 100644
--- a/storage/xtradb/include/btr0sea.ic
+++ b/storage/xtradb/include/btr0sea.ic
@@ -62,8 +62,8 @@ btr_search_info_update(
btr_search_t* info;
#ifdef UNIV_SYNC_DEBUG
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED));
- ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX));
+ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_SHARED));
+ ut_ad(!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
info = btr_search_get_info(index);
@@ -82,3 +82,72 @@ btr_search_info_update(
btr_search_info_update_slow(info, cursor);
}
+
+/*********************************************************************//**
+New functions to control split btr_search_index */
+UNIV_INLINE
+hash_table_t*
+btr_search_get_hash_index(
+/*======================*/
+ index_id_t key)
+{
+ return(btr_search_sys->hash_index[key % btr_search_index_num]);
+}
+
+UNIV_INLINE
+rw_lock_t*
+btr_search_get_latch(
+/*=================*/
+ index_id_t key)
+{
+ return(btr_search_latch_part[key % btr_search_index_num]);
+}
+
+UNIV_INLINE
+void
+btr_search_x_lock_all(void)
+/*=======================*/
+{
+ ulint i;
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ rw_lock_x_lock(btr_search_latch_part[i]);
+ }
+}
+
+UNIV_INLINE
+void
+btr_search_x_unlock_all(void)
+/*==========================*/
+{
+ ulint i;
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ rw_lock_x_unlock(btr_search_latch_part[i]);
+ }
+}
+
+UNIV_INLINE
+void
+btr_search_s_lock_all(void)
+/*=======================*/
+{
+ ulint i;
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ rw_lock_s_lock(btr_search_latch_part[i]);
+ }
+}
+
+UNIV_INLINE
+void
+btr_search_s_unlock_all(void)
+/*=========================*/
+{
+ ulint i;
+
+ for (i = 0; i < btr_search_index_num; i++) {
+ rw_lock_s_unlock(btr_search_latch_part[i]);
+ }
+}
+
diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h
index ef4a6b04b34..07c06fb18d7 100644
--- a/storage/xtradb/include/btr0types.h
+++ b/storage/xtradb/include/btr0types.h
@@ -38,6 +38,131 @@ typedef struct btr_cur_struct btr_cur_t;
/** B-tree search information for the adaptive hash index */
typedef struct btr_search_struct btr_search_t;
+#ifdef UNIV_BLOB_DEBUG
+# include "buf0types.h"
+/** An index->blobs entry for keeping track of off-page column references */
+typedef struct btr_blob_dbg_struct btr_blob_dbg_t;
+
+/** Insert to index->blobs a reference to an off-page column.
+@param index the index tree
+@param b the reference
+@param ctx context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_insert(
+/*====================*/
+ dict_index_t* index, /*!< in/out: index tree */
+ const btr_blob_dbg_t* b, /*!< in: the reference */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+
+/** Remove from index->blobs a reference to an off-page column.
+@param index the index tree
+@param b the reference
+@param ctx context (for logging) */
+UNIV_INTERN
+void
+btr_blob_dbg_rbt_delete(
+/*====================*/
+ dict_index_t* index, /*!< in/out: index tree */
+ const btr_blob_dbg_t* b, /*!< in: the reference */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+
+/**************************************************************//**
+Add to index->blobs any references to off-page columns from a record.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add_rec(
+/*=================*/
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in/out: index */
+ const ulint* offsets,/*!< in: offsets */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+/**************************************************************//**
+Remove from index->blobs any references to off-page columns from a record.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove_rec(
+/*====================*/
+ const rec_t* rec, /*!< in: record */
+ dict_index_t* index, /*!< in/out: index */
+ const ulint* offsets,/*!< in: offsets */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+/**************************************************************//**
+Count and add to index->blobs any references to off-page columns
+from records on a page.
+@return number of references added */
+UNIV_INTERN
+ulint
+btr_blob_dbg_add(
+/*=============*/
+ const page_t* page, /*!< in: rewritten page */
+ dict_index_t* index, /*!< in/out: index */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+/**************************************************************//**
+Count and remove from index->blobs any references to off-page columns
+from records on a page.
+Used when reorganizing a page, before copying the records.
+@return number of references removed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_remove(
+/*================*/
+ const page_t* page, /*!< in: b-tree page */
+ dict_index_t* index, /*!< in/out: index */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+/**************************************************************//**
+Restore in index->blobs any references to off-page columns
+Used when page reorganize fails due to compressed page overflow. */
+UNIV_INTERN
+void
+btr_blob_dbg_restore(
+/*=================*/
+ const page_t* npage, /*!< in: page that failed to compress */
+ const page_t* page, /*!< in: copy of original page */
+ dict_index_t* index, /*!< in/out: index */
+ const char* ctx) /*!< in: context (for logging) */
+ __attribute__((nonnull));
+
+/** Operation that processes the BLOB references of an index record
+@param[in] rec record on index page
+@param[in/out] index the index tree of the record
+@param[in] offsets rec_get_offsets(rec,index)
+@param[in] ctx context (for logging)
+@return number of BLOB references processed */
+typedef ulint (*btr_blob_dbg_op_f)
+(const rec_t* rec,dict_index_t* index,const ulint* offsets,const char* ctx);
+
+/**************************************************************//**
+Count and process all references to off-page columns on a page.
+@return number of references processed */
+UNIV_INTERN
+ulint
+btr_blob_dbg_op(
+/*============*/
+ const page_t* page, /*!< in: B-tree leaf page */
+ const rec_t* rec, /*!< in: record to start from
+ (NULL to process the whole page) */
+ dict_index_t* index, /*!< in/out: index */
+ const char* ctx, /*!< in: context (for logging) */
+ const btr_blob_dbg_op_f op) /*!< in: operation on records */
+ __attribute__((nonnull(1,3,4,5)));
+#else /* UNIV_BLOB_DEBUG */
+# define btr_blob_dbg_add_rec(rec, index, offsets, ctx) ((void) 0)
+# define btr_blob_dbg_add(page, index, ctx) ((void) 0)
+# define btr_blob_dbg_remove_rec(rec, index, offsets, ctx) ((void) 0)
+# define btr_blob_dbg_remove(page, index, ctx) ((void) 0)
+# define btr_blob_dbg_restore(npage, page, index, ctx) ((void) 0)
+# define btr_blob_dbg_op(page, rec, index, ctx, op) ((void) 0)
+#endif /* UNIV_BLOB_DEBUG */
+
/** The size of a reference to data stored on a different page.
The reference is stored at the end of the prefix of the field
in the index record. */
diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h
index 3a35f8e46e9..f51b8020918 100644
--- a/storage/xtradb/include/buf0buddy.h
+++ b/storage/xtradb/include/buf0buddy.h
@@ -36,22 +36,24 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex. The buf_pool_mutex may only be released and reacquired
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any
+block->mutex. The buf_pool->mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
+buf_pool->mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
+ buf_pool_t* buf_pool,
+ /*!< buffer pool in which the block resides */
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ibool* lru, /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
+ and buf_pool->mutex was temporarily released,
or NULL if the LRU list should not be used */
ibool have_page_hash_mutex)
__attribute__((malloc));
@@ -62,29 +64,14 @@ UNIV_INLINE
void
buf_buddy_free(
/*===========*/
+ buf_pool_t* buf_pool,
+ /*!< buffer pool in which the block resides */
void* buf, /*!< in: block to be freed, must not be
pointed to by the buffer pool */
ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
ibool have_page_hash_mutex)
__attribute__((nonnull));
-/** Statistics of buddy blocks of a given size. */
-struct buf_buddy_stat_struct {
- /** Number of blocks allocated from the buddy system. */
- ulint used;
- /** Number of blocks relocated by the buddy system. */
- ib_uint64_t relocated;
- /** Total duration of block relocations, in microseconds. */
- ib_uint64_t relocated_usec;
-};
-
-/** Statistics of buddy blocks of a given size. */
-typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
-
-/** Statistics of the buddy system, indexed by block size.
-Protected by buf_pool_mutex. */
-extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
-
#ifndef UNIV_NONINL
# include "buf0buddy.ic"
#endif
diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic
index 69659fb69d6..4dbfed52abe 100644
--- a/storage/xtradb/include/buf0buddy.ic
+++ b/storage/xtradb/include/buf0buddy.ic
@@ -35,18 +35,20 @@ Created December 2006 by Marko Makela
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex.
-The buf_pool_mutex may only be released and reacquired if lru != NULL.
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any block->mutex.
+The buf_pool->mutex may only be released and reacquired if lru != NULL.
@return allocated block, possibly NULL if lru==NULL */
UNIV_INTERN
void*
buf_buddy_alloc_low(
/*================*/
+ buf_pool_t* buf_pool,
+ /*!< in: buffer pool in which the page resides */
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
ibool* lru, /*!< in: pointer to a variable that will be assigned
TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
+ and buf_pool->mutex was temporarily released,
or NULL if the LRU list should not be used */
ibool have_page_hash_mutex)
__attribute__((malloc));
@@ -57,11 +59,12 @@ UNIV_INTERN
void
buf_buddy_free_low(
/*===============*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint i, /*!< in: index of buf_pool->zip_free[],
- or BUF_BUDDY_SIZES */
- ibool have_page_hash_mutex)
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+ ulint i, /*!< in: index of buf_pool->zip_free[],
+ or BUF_BUDDY_SIZES */
+ ibool have_page_hash_mutex)
__attribute__((nonnull));
/**********************************************************************//**
@@ -85,28 +88,33 @@ buf_buddy_get_slot(
/**********************************************************************//**
Allocate a block. The thread calling this function must hold
-buf_pool_mutex and must not hold buf_pool_zip_mutex or any
-block->mutex. The buf_pool_mutex may only be released and reacquired
+buf_pool->mutex and must not hold buf_pool->zip_mutex or any
+block->mutex. The buf_pool->mutex may only be released and reacquired
if lru != NULL. This function should only be used for allocating
compressed page frames or control blocks (buf_page_t). Allocated
control blocks must be properly initialized immediately after
buf_buddy_alloc() has returned the memory, before releasing
-buf_pool_mutex.
+buf_pool->mutex.
@return allocated block, possibly NULL if lru == NULL */
UNIV_INLINE
void*
buf_buddy_alloc(
/*============*/
- ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
- ibool* lru, /*!< in: pointer to a variable that will be assigned
- TRUE if storage was allocated from the LRU list
- and buf_pool_mutex was temporarily released,
- or NULL if the LRU list should not be used */
- ibool have_page_hash_mutex)
+ buf_pool_t* buf_pool, /*!< in: buffer pool in which
+ the page resides */
+ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
+ ibool* lru, /*!< in: pointer to a variable
+ that will be assigned TRUE if
+ storage was allocated from the
+ LRU list and buf_pool->mutex was
+ temporarily released, or NULL if
+ the LRU list should not be used */
+ ibool have_page_hash_mutex)
{
- //ut_ad(buf_pool_mutex_own());
+ //ut_ad(buf_pool_mutex_own(buf_pool));
- return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex));
+ return(buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size), lru, have_page_hash_mutex));
}
/**********************************************************************//**
@@ -115,25 +123,27 @@ UNIV_INLINE
void
buf_buddy_free(
/*===========*/
- void* buf, /*!< in: block to be freed, must not be
- pointed to by the buffer pool */
- ulint size, /*!< in: block size, up to UNIV_PAGE_SIZE */
- ibool have_page_hash_mutex)
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ void* buf, /*!< in: block to be freed, must not be
+ pointed to by the buffer pool */
+ ulint size, /*!< in: block size, up to
+ UNIV_PAGE_SIZE */
+ ibool have_page_hash_mutex)
{
- //ut_ad(buf_pool_mutex_own());
+ //ut_ad(buf_pool_mutex_own(buf_pool));
if (!have_page_hash_mutex) {
- mutex_enter(&LRU_list_mutex);
- rw_lock_x_lock(&page_hash_latch);
+ mutex_enter(&buf_pool->LRU_list_mutex);
+ rw_lock_x_lock(&buf_pool->page_hash_latch);
}
- mutex_enter(&zip_free_mutex);
- buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE);
- mutex_exit(&zip_free_mutex);
+ mutex_enter(&buf_pool->zip_free_mutex);
+ buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size), TRUE);
+ mutex_exit(&buf_pool->zip_free_mutex);
if (!have_page_hash_mutex) {
- mutex_exit(&LRU_list_mutex);
- rw_lock_x_unlock(&page_hash_latch);
+ mutex_exit(&buf_pool->LRU_list_mutex);
+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
}
}
diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h
index e06927f42f0..ef6a26f9459 100644
--- a/storage/xtradb/include/buf0buf.h
+++ b/storage/xtradb/include/buf0buf.h
@@ -33,21 +33,29 @@ Created 11/5/1995 Heikki Tuuri
#include "hash0hash.h"
#include "ut0byte.h"
#include "page0types.h"
-#include "ut0rbt.h"
#ifndef UNIV_HOTBACKUP
+#include "ut0rbt.h"
#include "os0proc.h"
-#include "srv0srv.h"
/** @name Modes for buf_page_get_gen */
/* @{ */
#define BUF_GET 10 /*!< get always */
#define BUF_GET_IF_IN_POOL 11 /*!< get if in pool */
+#define BUF_PEEK_IF_IN_POOL 12 /*!< get if in pool, do not make
+ the block young in the LRU list */
#define BUF_GET_NO_LATCH 14 /*!< get and bufferfix, but
set no latch; we have
separated this case, because
it is error-prone programming
not to set a latch, and it
should be used with care */
+#define BUF_GET_IF_IN_POOL_OR_WATCH 15
+ /*!< Get the page only if it's in the
+ buffer pool, if not then set a watch
+ on the page. */
+#define BUF_GET_POSSIBLY_FREED 16
+ /*!< Like BUF_GET, but do not mind
+ if the file page has been freed. */
/* @} */
/** @name Modes for buf_page_get_known_nowait */
/* @{ */
@@ -60,7 +68,14 @@ Created 11/5/1995 Heikki Tuuri
position of the block. */
/* @} */
-extern buf_pool_t* buf_pool; /*!< The buffer pool of the database */
+#define MAX_BUFFER_POOLS 64 /*!< The maximum number of buffer
+ pools that can be defined */
+
+#define BUF_POOL_WATCH_SIZE 1 /*!< Maximum number of concurrent
+ buffer pool watches */
+
+extern buf_pool_t* buf_pool_ptr; /*!< The buffer pools
+ of the database */
#ifdef UNIV_DEBUG
extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
prints info whenever read or flush
@@ -68,6 +83,8 @@ extern ibool buf_debug_prints;/*!< If this is set TRUE, the program
#endif /* UNIV_DEBUG */
extern ulint srv_buf_pool_write_requests; /*!< variable to count write request
issued */
+extern ulint srv_buf_pool_instances;
+extern ulint srv_buf_pool_curr_size;
#else /* !UNIV_HOTBACKUP */
extern buf_block_t* back_block1; /*!< first block, for --apply-log */
extern buf_block_t* back_block2; /*!< second block, for page reorganize */
@@ -83,6 +100,8 @@ The enumeration values must be 0..7. */
enum buf_page_state {
BUF_BLOCK_ZIP_FREE = 0, /*!< contains a free
compressed page */
+ BUF_BLOCK_POOL_WATCH = 0, /*!< a sentinel for the buffer pool
+ watch, element of buf_pool->watch[] */
BUF_BLOCK_ZIP_PAGE, /*!< contains a clean
compressed page */
BUF_BLOCK_ZIP_DIRTY, /*!< contains a compressed
@@ -102,21 +121,123 @@ enum buf_page_state {
before putting to the free list */
};
+
+/** This structure defines information we will fetch from each buffer pool. It
+will be used to print table IO stats */
+struct buf_pool_info_struct{
+ /* General buffer pool info */
+ ulint pool_unique_id; /*!< Buffer Pool ID */
+ ulint pool_size; /*!< Buffer Pool size in pages */
+ ulint pool_size_bytes;
+ ulint lru_len; /*!< Length of buf_pool->LRU */
+ ulint old_lru_len; /*!< buf_pool->LRU_old_len */
+ ulint free_list_len; /*!< Length of buf_pool->free list */
+ ulint flush_list_len; /*!< Length of buf_pool->flush_list */
+ ulint n_pend_unzip; /*!< buf_pool->n_pend_unzip, pages
+ pending decompress */
+ ulint n_pend_reads; /*!< buf_pool->n_pend_reads, pages
+ pending read */
+ ulint n_pending_flush_lru; /*!< Pages pending flush in LRU */
+ ulint n_pending_flush_list; /*!< Pages pending flush in FLUSH
+ LIST */
+ ulint n_pending_flush_single_page;/*!< Pages pending flush in
+ BUF_FLUSH_SINGLE_PAGE list */
+ ulint n_pages_made_young; /*!< number of pages made young */
+ ulint n_pages_not_made_young; /*!< number of pages not made young */
+ ulint n_pages_read; /*!< buf_pool->n_pages_read */
+ ulint n_pages_created; /*!< buf_pool->n_pages_created */
+ ulint n_pages_written; /*!< buf_pool->n_pages_written */
+ ulint n_page_gets; /*!< buf_pool->n_page_gets */
+ ulint n_ra_pages_read; /*!< buf_pool->n_ra_pages_read, number
+ of pages readahead */
+ ulint n_ra_pages_evicted; /*!< buf_pool->n_ra_pages_evicted,
+ number of readahead pages evicted
+ without access */
+ ulint n_page_get_delta; /*!< num of buffer pool page gets since
+ last printout */
+
+ /* Buffer pool access stats */
+ double page_made_young_rate; /*!< page made young rate in pages
+ per second */
+ double page_not_made_young_rate;/*!< page not made young rate
+ in pages per second */
+ double pages_read_rate; /*!< num of pages read per second */
+ double pages_created_rate; /*!< num of pages create per second */
+ double pages_written_rate; /*!< num of pages written per second */
+ ulint page_read_delta; /*!< num of pages read since last
+ printout */
+ ulint young_making_delta; /*!< num of pages made young since
+ last printout */
+ ulint not_young_making_delta; /*!< num of pages not make young since
+ last printout */
+
+ /* Statistics about read ahead algorithm. */
+ double pages_readahead_rate; /*!< readahead rate in pages per
+ second */
+ double pages_evicted_rate; /*!< rate of readahead page evicted
+ without access, in pages per second */
+
+ /* Stats about LRU eviction */
+ ulint unzip_lru_len; /*!< length of buf_pool->unzip_LRU
+ list */
+ /* Counters for LRU policy */
+ ulint io_sum; /*!< buf_LRU_stat_sum.io */
+ ulint io_cur; /*!< buf_LRU_stat_cur.io, num of IO
+ for current interval */
+ ulint unzip_sum; /*!< buf_LRU_stat_sum.unzip */
+ ulint unzip_cur; /*!< buf_LRU_stat_cur.unzip, num
+ pages decompressed in current
+ interval */
+};
+
+typedef struct buf_pool_info_struct buf_pool_info_t;
+
#ifndef UNIV_HOTBACKUP
/********************************************************************//**
+Acquire mutex on all buffer pool instances */
+UNIV_INLINE
+void
+buf_pool_mutex_enter_all(void);
+/*===========================*/
+
+/********************************************************************//**
+Release mutex on all buffer pool instances */
+UNIV_INLINE
+void
+buf_pool_mutex_exit_all(void);
+/*==========================*/
+
+/********************************************************************//**
+*/
+UNIV_INLINE
+void
+buf_pool_page_hash_x_lock_all(void);
+/*================================*/
+
+/********************************************************************//**
+*/
+UNIV_INLINE
+void
+buf_pool_page_hash_x_unlock_all(void);
+/*==================================*/
+
+/********************************************************************//**
Creates the buffer pool.
@return own: buf_pool object, NULL if not enough memory or error */
UNIV_INTERN
-buf_pool_t*
-buf_pool_init(void);
-/*===============*/
+ulint
+buf_pool_init(
+/*=========*/
+ ulint size, /*!< in: Size of the total pool in bytes */
+ ulint n_instances); /*!< in: Number of instances */
/********************************************************************//**
Frees the buffer pool at shutdown. This must not be invoked before
freeing all mutexes. */
UNIV_INTERN
void
-buf_pool_free(void);
-/*===============*/
+buf_pool_free(
+/*==========*/
+ ulint n_instances); /*!< in: numbere of instances to free */
/********************************************************************//**
Drops the adaptive hash index. To prevent a livelock, this function
@@ -153,23 +274,31 @@ UNIV_INLINE
ulint
buf_pool_get_curr_size(void);
/*========================*/
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in frames.
+@return size in pages */
+UNIV_INLINE
+ulint
+buf_pool_get_n_pages(void);
+/*=======================*/
/********************************************************************//**
Gets the smallest oldest_modification lsn for any page in the pool. Returns
zero if all modified pages have been flushed to disk.
@return oldest modification in pool, zero if none */
-UNIV_INLINE
+UNIV_INTERN
ib_uint64_t
buf_pool_get_oldest_modification(void);
/*==================================*/
/********************************************************************//**
Allocates a buffer block.
@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INLINE
+UNIV_INTERN
buf_block_t*
buf_block_alloc(
/*============*/
- ulint zip_size); /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
+ buf_pool_t* buf_pool); /*!< in: buffer pool instance,
+ or NULL for round-robin selection
+ of the buffer pool */
/********************************************************************//**
Frees a buffer block which does not contain a file page. */
UNIV_INLINE
@@ -287,7 +416,8 @@ buf_page_get_gen(
ulint rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */
buf_block_t* guess, /*!< in: guessed block or NULL */
ulint mode, /*!< in: BUF_GET, BUF_GET_IF_IN_POOL,
- BUF_GET_NO_LATCH */
+ BUF_PEEK_IF_IN_POOL, BUF_GET_NO_LATCH or
+ BUF_GET_IF_IN_POOL_OR_WATCH */
const char* file, /*!< in: file name */
ulint line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mini-transaction */
@@ -337,9 +467,8 @@ void
buf_page_release(
/*=============*/
buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
+ ulint rw_latch); /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
- mtr_t* mtr); /*!< in: mtr */
/********************************************************************//**
Moves a page to the start of the buffer pool LRU list. This high-level
function can be used to prevent an important page from slipping out of
@@ -371,7 +500,7 @@ buf_reset_check_index_page_at_flush(
/*================================*/
ulint space, /*!< in: space id */
ulint offset);/*!< in: page number */
-#ifdef UNIV_DEBUG_FILE_ACCESSES
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
/********************************************************************//**
Sets file_page_was_freed TRUE if the page is found in the buffer pool.
This function should be called when we free a file page and want the
@@ -396,7 +525,7 @@ buf_page_reset_file_page_was_freed(
/*===============================*/
ulint space, /*!< in: space id */
ulint offset); /*!< in: page number */
-#endif /* UNIV_DEBUG_FILE_ACCESSES */
+#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
@@ -449,7 +578,7 @@ buf_page_get_newest_modification(
page frame */
/********************************************************************//**
Increments the modify clock of a frame by 1. The caller must (1) own the
-buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock
+buf_pool->mutex and block bufferfix count has to be zero, (2) or own an x-lock
on the block. */
UNIV_INLINE
void
@@ -536,7 +665,8 @@ UNIV_INTERN
buf_block_t*
buf_pool_contains_zip(
/*==================*/
- const void* data); /*!< in: pointer to compressed page */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ const void* data); /*!< in: pointer to compressed page */
#endif /* UNIV_DEBUG */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/*********************************************************************//**
@@ -610,8 +740,15 @@ buf_get_modified_ratio_pct(void);
Refreshes the statistics used to print per-second averages. */
UNIV_INTERN
void
-buf_refresh_io_stats(void);
-/*======================*/
+buf_refresh_io_stats(
+/*=================*/
+ buf_pool_t* buf_pool); /*!< buffer pool instance */
+/**********************************************************************//**
+Refreshes the statistics used to print per-second averages. */
+UNIV_INTERN
+void
+buf_refresh_io_stats_all(void);
+/*=================*/
/*********************************************************************//**
Asserts that all file pages in the buffer are in a replaceable state.
@return TRUE */
@@ -836,7 +973,8 @@ buf_page_set_accessed(
__attribute__((nonnull));
/*********************************************************************//**
Gets the buf_block_t handle of a buffered file block if an uncompressed
-page frame exists, or NULL.
+page frame exists, or NULL. Note: even though bpage is not declared a
+const we don't update its value. It is safe to make this pure.
@return control block, or NULL */
UNIV_INLINE
buf_block_t*
@@ -988,8 +1126,7 @@ UNIV_INTERN
void
buf_page_io_complete(
/*=================*/
- buf_page_t* bpage, /*!< in: pointer to the block in question */
- trx_t* trx);
+ buf_page_t* bpage); /*!< in: pointer to the block in question */
/********************************************************************//**
Calculates a folded value of a file page address to use in the page hash
table.
@@ -1001,15 +1138,81 @@ buf_page_address_fold(
ulint space, /*!< in: space id */
ulint offset) /*!< in: offset of the page within space */
__attribute__((const));
+/********************************************************************//**
+Calculates the index of a buffer pool to the buf_pool[] array.
+@return the position of the buffer pool in buf_pool[] */
+UNIV_INLINE
+ulint
+buf_pool_index(
+/*===========*/
+ const buf_pool_t* buf_pool) /*!< in: buffer pool */
+ __attribute__((nonnull, const));
+/********************************************************************//**
+*/
+UNIV_INTERN
+buf_block_t*
+buf_page_from_array(
+/*================*/
+ buf_pool_t* buf_pool,
+ ulint n_block);
+/******************************************************************//**
+Returns the buffer pool instance given a page instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_bpage(
+/*================*/
+ const buf_page_t* bpage); /*!< in: buffer pool page */
+/******************************************************************//**
+Returns the buffer pool instance given a block instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_block(
+/*================*/
+ const buf_block_t* block); /*!< in: block */
+/******************************************************************//**
+Returns the buffer pool instance given space and offset of page
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_get(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: offset of the page within space */
+/******************************************************************//**
+Returns the buffer pool instance given its array index
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_array(
+/*================*/
+ ulint index); /*!< in: array index to get
+ buffer pool instance from */
/******************************************************************//**
Returns the control block of a file page, NULL if not found.
@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
+buf_page_hash_get_low(
+/*==================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page
+ within space */
+ ulint fold); /*!< in: buf_page_address_fold(
+ space, offset) */
+/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return block, NULL if not found or not a real control block */
+UNIV_INLINE
+buf_page_t*
buf_page_hash_get(
/*==============*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset); /*!< in: offset of the page
+ within space */
/******************************************************************//**
Returns the control block of a file page, NULL if not found
or an uncompressed page frame does not exist.
@@ -1018,8 +1221,10 @@ UNIV_INLINE
buf_block_t*
buf_block_hash_get(
/*===============*/
- ulint space, /*!< in: space id */
- ulint offset);/*!< in: offset of the page within space */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset); /*!< in: offset of the page
+ within space */
/*********************************************************************//**
Gets the current length of the free list of buffer blocks.
@return length of the free list */
@@ -1027,8 +1232,67 @@ UNIV_INTERN
ulint
buf_get_free_list_len(void);
/*=======================*/
-#endif /* !UNIV_HOTBACKUP */
+/********************************************************************
+Determine if a block is a sentinel for a buffer pool watch.
+@return TRUE if a sentinel for a buffer pool watch, FALSE if not */
+UNIV_INTERN
+ibool
+buf_pool_watch_is_sentinel(
+/*=======================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ const buf_page_t* bpage) /*!< in: block */
+ __attribute__((nonnull, warn_unused_result));
+/****************************************************************//**
+Add watch for the given page to be read in. Caller must have the buffer pool
+@return NULL if watch set, block if the page is in the buffer pool */
+UNIV_INTERN
+buf_page_t*
+buf_pool_watch_set(
+/*===============*/
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: page number */
+ ulint fold) /*!< in: buf_page_address_fold(space, offset) */
+ __attribute__((warn_unused_result));
+/****************************************************************//**
+Stop watching if the page has been read in.
+buf_pool_watch_set(space,offset) must have returned NULL before. */
+UNIV_INTERN
+void
+buf_pool_watch_unset(
+/*=================*/
+ ulint space, /*!< in: space id */
+ ulint offset);/*!< in: page number */
+/****************************************************************//**
+Check if the page has been read in.
+This may only be called after buf_pool_watch_set(space,offset)
+has returned NULL and before invoking buf_pool_watch_unset(space,offset).
+@return FALSE if the given page was not read in, TRUE if it was */
+UNIV_INTERN
+ibool
+buf_pool_watch_occurred(
+/*====================*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: page number */
+ __attribute__((warn_unused_result));
+/********************************************************************//**
+Get total buffer pool statistics. */
+UNIV_INTERN
+void
+buf_get_total_list_len(
+/*===================*/
+ ulint* LRU_len, /*!< out: length of all LRU lists */
+ ulint* free_len, /*!< out: length of all free lists */
+ ulint* flush_list_len);/*!< out: length of all flush lists */
+/********************************************************************//**
+Get total buffer pool statistics. */
+UNIV_INTERN
+void
+buf_get_total_stat(
+/*===============*/
+ buf_pool_stat_t*tot_stat); /*!< out: buffer pool stats */
+
+#endif /* !UNIV_HOTBACKUP */
/** The common buffer control block structure
for compressed and uncompressed frames */
@@ -1037,18 +1301,18 @@ struct buf_page_struct{
/** @name General fields
None of these bit-fields must be modified without holding
buf_page_get_mutex() [buf_block_struct::mutex or
- buf_pool_zip_mutex], since they can be stored in the same
+ buf_pool->zip_mutex], since they can be stored in the same
machine word. Some of these fields are additionally protected
- by buf_pool_mutex. */
+ by buf_pool->mutex. */
/* @{ */
unsigned space:32; /*!< tablespace id; also protected
- by buf_pool_mutex. */
+ by buf_pool->mutex. */
unsigned offset:32; /*!< page number; also protected
- by buf_pool_mutex. */
+ by buf_pool->mutex. */
unsigned state:3; /*!< state of the control block; also
- protected by buf_pool_mutex.
+ protected by buf_pool->mutex.
State transitions from
BUF_BLOCK_READY_FOR_USE to
BUF_BLOCK_MEMORY need not be
@@ -1060,15 +1324,23 @@ struct buf_page_struct{
flush_type.
@see enum buf_flush */
unsigned io_fix:2; /*!< type of pending I/O operation;
- also protected by buf_pool_mutex
+ also protected by buf_pool->mutex
@see enum buf_io_fix */
- unsigned buf_fix_count:25;/*!< count of how manyfold this block
+ unsigned buf_fix_count:19;/*!< count of how manyfold this block
is currently bufferfixed */
+ unsigned buf_pool_index:6;/*!< index number of the buffer pool
+ that this block belongs to */
+# if MAX_BUFFER_POOLS > 64
+# error "MAX_BUFFER_POOLS > 64; redefine buf_pool_index:6"
+# endif
/* @} */
#endif /* !UNIV_HOTBACKUP */
page_zip_des_t zip; /*!< compressed page; zip.data
(but not the data it points to) is
- also protected by buf_pool_mutex */
+ also protected by buf_pool->mutex;
+ state == BUF_BLOCK_ZIP_PAGE and
+ zip.data == NULL means an active
+ buf_pool->watch */
#ifndef UNIV_HOTBACKUP
buf_page_t* hash; /*!< node used in chaining to
buf_pool->page_hash or
@@ -1079,14 +1351,16 @@ struct buf_page_struct{
#endif /* UNIV_DEBUG */
/** @name Page flushing fields
- All these are protected by buf_pool_mutex. */
+ All these are protected by buf_pool->mutex. */
/* @{ */
/* UT_LIST_NODE_T(buf_page_t) list; */
/*!< based on state, this is a
- list node, protected only by
- buf_pool_mutex, in one of the
- following lists in buf_pool:
+ list node, protected either by
+ buf_pool->mutex or by
+ buf_pool->flush_list_mutex,
+ in one of the following lists in
+ buf_pool:
- BUF_BLOCK_NOT_USED: free
- BUF_BLOCK_FILE_PAGE: flush_list
@@ -1094,6 +1368,12 @@ struct buf_page_struct{
- BUF_BLOCK_ZIP_PAGE: zip_clean
- BUF_BLOCK_ZIP_FREE: zip_free[]
+ If bpage is part of flush_list
+ then the node pointers are
+ covered by buf_pool->flush_list_mutex.
+ Otherwise these pointers are
+ protected by buf_pool->mutex.
+
The contents of the list node
is undefined if !in_flush_list
&& state == BUF_BLOCK_FILE_PAGE,
@@ -1108,12 +1388,18 @@ struct buf_page_struct{
UT_LIST_NODE_T(buf_page_t) zip_list; /* zip_clean or zip_free[] */
#ifdef UNIV_DEBUG
ibool in_flush_list; /*!< TRUE if in buf_pool->flush_list;
- when buf_pool_mutex is free, the
- following should hold: in_flush_list
+ when buf_pool->flush_list_mutex is
+ free, the following should hold:
+ in_flush_list
== (state == BUF_BLOCK_FILE_PAGE
- || state == BUF_BLOCK_ZIP_DIRTY) */
+ || state == BUF_BLOCK_ZIP_DIRTY)
+ Writes to this field must be
+ covered by both block->mutex
+ and buf_pool->flush_list_mutex. Hence
+ reads can happen while holding
+ any one of the two mutexes */
ibool in_free_list; /*!< TRUE if in buf_pool->free; when
- buf_pool_mutex is free, the following
+ buf_pool->mutex is free, the following
should hold: in_free_list
== (state == BUF_BLOCK_NOT_USED) */
#endif /* UNIV_DEBUG */
@@ -1121,7 +1407,8 @@ struct buf_page_struct{
/*!< log sequence number of
the youngest modification to
this block, zero if not
- modified */
+ modified. Protected by block
+ mutex */
ib_uint64_t oldest_modification;
/*!< log sequence number of
the START of the log entry
@@ -1129,11 +1416,16 @@ struct buf_page_struct{
modification to this block
which has not yet been flushed
on disk; zero if all
- modifications are on disk */
+ modifications are on disk.
+ Writes to this field must be
+ covered by both block->mutex
+ and buf_pool->flush_list_mutex. Hence
+ reads can happen while holding
+ any one of the two mutexes */
/* @} */
/** @name LRU replacement algorithm fields
- These fields are protected by buf_pool_mutex only (not
- buf_pool_zip_mutex or buf_block_struct::mutex). */
+ These fields are protected by buf_pool->mutex only (not
+ buf_pool->zip_mutex or buf_block_struct::mutex). */
/* @{ */
UT_LIST_NODE_T(buf_page_t) LRU;
@@ -1157,12 +1449,13 @@ struct buf_page_struct{
0 if the block was never accessed
in the buffer pool */
/* @} */
+ ibool space_was_being_deleted;
ibool is_corrupt;
-# ifdef UNIV_DEBUG_FILE_ACCESSES
+# if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
ibool file_page_was_freed;
/*!< this is set to TRUE when fsp
frees a page in buffer pool */
-# endif /* UNIV_DEBUG_FILE_ACCESSES */
+# endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
#endif /* !UNIV_HOTBACKUP */
};
@@ -1203,7 +1496,7 @@ struct buf_block_struct{
unsigned lock_hash_val:32;/*!< hashed value of the page address
in the record lock hash table;
protected by buf_block_t::lock
- (or buf_block_t::mutex, buf_pool_mutex
+ (or buf_block_t::mutex, buf_pool->mutex
in buf_page_get_gen(),
buf_page_init_for_read()
and buf_page_create()) */
@@ -1264,7 +1557,7 @@ struct buf_block_struct{
pointers in the adaptive hash index
pointing to this frame */
#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
- unsigned is_hashed:1; /*!< TRUE if hash index has
+ volatile unsigned is_hashed:1; /*!< TRUE if hash index has
already been built on this
page; note that it does not
guarantee that the index is
@@ -1278,6 +1571,7 @@ struct buf_block_struct{
unsigned curr_left_side:1;/*!< TRUE or FALSE in hash indexing */
dict_index_t* index; /*!< Index for which the adaptive
hash index has been created. */
+ volatile rw_lock_t* btr_search_latch;
/* @} */
# ifdef UNIV_SYNC_DEBUG
/** @name Debug fields */
@@ -1302,10 +1596,7 @@ struct buf_block_struct{
/**********************************************************************//**
Compute the hash fold value for blocks in buf_pool->zip_hash. */
/* @{ */
-/* the fold should be relative when srv_buffer_pool_shm_key is enabled */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) (!srv_buffer_pool_shm_key\
- ?((ulint) (ptr) / UNIV_PAGE_SIZE)\
- :((ulint) ((byte*)ptr - (byte*)(buf_pool->chunks->blocks->frame)) / UNIV_PAGE_SIZE))
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
@@ -1344,6 +1635,16 @@ struct buf_pool_stat_struct{
buf_page_peek_if_too_old() */
};
+/** Statistics of buddy blocks of a given size. */
+struct buf_buddy_stat_struct {
+ /** Number of blocks allocated from the buddy system. */
+ ulint used;
+ /** Number of blocks relocated by the buddy system. */
+ ib_uint64_t relocated;
+ /** Total duration of block relocations, in microseconds. */
+ ib_uint64_t relocated_usec;
+};
+
/** @brief The buffer pool structure.
NOTE! The definition appears here only for other modules of this
@@ -1353,7 +1654,30 @@ struct buf_pool_struct{
/** @name General fields */
/* @{ */
-
+ mutex_t mutex; /*!< Buffer pool mutex of this
+ instance */
+ mutex_t zip_mutex; /*!< Zip mutex of this buffer
+ pool instance, protects compressed
+ only pages (of type buf_page_t, not
+ buf_block_t */
+ mutex_t LRU_list_mutex;
+ rw_lock_t page_hash_latch;
+ mutex_t free_list_mutex;
+ mutex_t zip_free_mutex;
+ mutex_t zip_hash_mutex;
+ ulint instance_no; /*!< Array index of this buffer
+ pool instance */
+ ulint old_pool_size; /*!< Old pool size in bytes */
+ ulint curr_pool_size; /*!< Current pool size in bytes */
+ ulint LRU_old_ratio; /*!< Reserve this much of the buffer
+ pool for "old" blocks */
+#ifdef UNIV_DEBUG
+ ulint buddy_n_frames; /*!< Number of frames allocated from
+ the buffer pool to the buddy system */
+#endif
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+ ulint mutex_exit_forbidden; /*!< Forbid release mutex */
+#endif
ulint n_chunks; /*!< number of buffer pool chunks */
buf_chunk_t* chunks; /*!< buffer pool chunks */
ulint curr_size; /*!< current pool size in pages */
@@ -1365,12 +1689,16 @@ struct buf_pool_struct{
whose frames are allocated to the
zip buddy system,
indexed by block->frame */
- ulint n_pend_reads; /*!< number of pending read operations */
+ ulint n_pend_reads; /*!< number of pending read
+ operations */
ulint n_pend_unzip; /*!< number of pending decompressions */
time_t last_printout_time;
/*!< when buf_print_io was last time
called */
+ buf_buddy_stat_t buddy_stat[BUF_BUDDY_SIZES_MAX + 1];
+ /*!< Statistics of buddy system,
+ indexed by block size */
buf_pool_stat_t stat; /*!< current statistics */
buf_pool_stat_t old_stat; /*!< old statistics */
@@ -1380,6 +1708,13 @@ struct buf_pool_struct{
/* @{ */
+ mutex_t flush_list_mutex;/*!< mutex protecting the
+ flush list access. This mutex
+ protects flush_list, flush_rbt
+ and bpage::list pointers when
+ the bpage is on flush_list. It
+ also protects writes to
+ bpage::oldest_modification */
UT_LIST_BASE_NODE_T(buf_page_t) flush_list;
/*!< base node of the modified block
list */
@@ -1393,7 +1728,7 @@ struct buf_pool_struct{
/*!< this is in the set state
when there is no flush batch
of the given type running */
- ib_rbt_t* flush_rbt; /* !< a red-black tree is used
+ ib_rbt_t* flush_rbt; /*!< a red-black tree is used
exclusively during recovery to
speed up insertions in the
flush_list. This tree contains
@@ -1405,7 +1740,8 @@ struct buf_pool_struct{
also be on the flush_list.
This tree is relevant only in
recovery and is set to NULL
- once the recovery is over. */
+ once the recovery is over.
+ Protected by flush_list_mutex */
ulint freed_page_clock;/*!< a sequence number used
to count the number of buffer
blocks removed from the end of
@@ -1419,8 +1755,8 @@ struct buf_pool_struct{
this is incremented by one; this is
set to zero when a buffer block is
allocated */
-
/* @} */
+
/** @name LRU replacement algorithm fields */
/* @{ */
@@ -1458,6 +1794,12 @@ struct buf_pool_struct{
/*!< unmodified compressed pages */
UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES_MAX];
/*!< buddy free lists */
+
+ buf_page_t watch[BUF_POOL_WATCH_SIZE];
+ /*!< Sentinel records for buffer
+ pool watches. Protected by
+ buf_pool->mutex. */
+
//#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE
//# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE"
//#endif
@@ -1467,58 +1809,56 @@ struct buf_pool_struct{
/* @} */
};
-/** mutex protecting the buffer pool struct and control blocks, except the
-read-write lock in them */
-extern mutex_t buf_pool_mutex;
-extern mutex_t LRU_list_mutex;
-extern mutex_t flush_list_mutex;
-extern rw_lock_t page_hash_latch;
-extern mutex_t free_list_mutex;
-extern mutex_t zip_free_mutex;
-extern mutex_t zip_hash_mutex;
-/** mutex protecting the control blocks of compressed-only pages
-(of type buf_page_t, not buf_block_t) */
-extern mutex_t buf_pool_zip_mutex;
-
-/** @name Accessors for buf_pool_mutex.
-Use these instead of accessing buf_pool_mutex directly. */
+/** @name Accessors for buf_pool->mutex.
+Use these instead of accessing buf_pool->mutex directly. */
/* @{ */
-/** Test if buf_pool_mutex is owned. */
-#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex)
-/** Acquire the buffer pool mutex. */
-#define buf_pool_mutex_enter() do { \
- ut_ad(!mutex_own(&buf_pool_zip_mutex)); \
- mutex_enter(&buf_pool_mutex); \
+/** Test if a buffer pool mutex is owned. */
+#define buf_pool_mutex_own(b) mutex_own(&b->mutex)
+/** Acquire a buffer pool mutex. */
+/* the buf_pool_mutex is changed the latch order */
+#define buf_pool_mutex_enter(b) do { \
+ mutex_enter(&b->mutex); \
+} while (0)
+
+/** Test if flush list mutex is owned. */
+#define buf_flush_list_mutex_own(b) mutex_own(&b->flush_list_mutex)
+
+/** Acquire the flush list mutex. */
+#define buf_flush_list_mutex_enter(b) do { \
+ mutex_enter(&b->flush_list_mutex); \
+} while (0)
+/** Release the flush list mutex. */
+# define buf_flush_list_mutex_exit(b) do { \
+ mutex_exit(&b->flush_list_mutex); \
} while (0)
+
+
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
-/** Flag to forbid the release of the buffer pool mutex.
-Protected by buf_pool_mutex. */
-extern ulint buf_pool_mutex_exit_forbidden;
/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() do { \
- ut_ad(buf_pool_mutex_own()); \
- buf_pool_mutex_exit_forbidden++; \
+# define buf_pool_mutex_exit_forbid(b) do { \
+ ut_ad(buf_pool_mutex_own(b)); \
+ b->mutex_exit_forbidden++; \
} while (0)
/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() do { \
- ut_ad(buf_pool_mutex_own()); \
- ut_a(buf_pool_mutex_exit_forbidden); \
- buf_pool_mutex_exit_forbidden--; \
+# define buf_pool_mutex_exit_allow(b) do { \
+ ut_ad(buf_pool_mutex_own(b)); \
+ ut_a(b->mutex_exit_forbidden); \
+ b->mutex_exit_forbidden--; \
} while (0)
/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit() do { \
- ut_a(!buf_pool_mutex_exit_forbidden); \
- mutex_exit(&buf_pool_mutex); \
+# define buf_pool_mutex_exit(b) do { \
+ ut_a(!b->mutex_exit_forbidden); \
+ mutex_exit(&b->mutex); \
} while (0)
#else
/** Forbid the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_forbid() ((void) 0)
+# define buf_pool_mutex_exit_forbid(b) ((void) 0)
/** Allow the release of the buffer pool mutex. */
-# define buf_pool_mutex_exit_allow() ((void) 0)
+# define buf_pool_mutex_exit_allow(b) ((void) 0)
/** Release the buffer pool mutex. */
-# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex)
+# define buf_pool_mutex_exit(b) mutex_exit(&b->mutex)
#endif
#endif /* !UNIV_HOTBACKUP */
/* @} */
diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic
index 93cc68e7fc9..1a96b575d9b 100644
--- a/storage/xtradb/include/buf0buf.ic
+++ b/storage/xtradb/include/buf0buf.ic
@@ -31,11 +31,74 @@ Created 11/5/1995 Heikki Tuuri
*******************************************************/
#include "mtr0mtr.h"
-#ifndef UNIV_HOTBACKUP
#include "buf0flu.h"
#include "buf0lru.h"
#include "buf0rea.h"
#include "srv0srv.h"
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in bytes.
+@return size in bytes */
+UNIV_INLINE
+ulint
+buf_pool_get_curr_size(void)
+/*========================*/
+{
+ return(srv_buf_pool_curr_size);
+}
+
+/********************************************************************//**
+Calculates the index of a buffer pool to the buf_pool[] array.
+@return the position of the buffer pool in buf_pool[] */
+UNIV_INLINE
+ulint
+buf_pool_index(
+/*===========*/
+ const buf_pool_t* buf_pool) /*!< in: buffer pool */
+{
+ ulint i = buf_pool - buf_pool_ptr;
+ ut_ad(i < MAX_BUFFER_POOLS);
+ ut_ad(i < srv_buf_pool_instances);
+ return(i);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given a page instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_bpage(
+/*================*/
+ const buf_page_t* bpage) /*!< in: buffer pool page */
+{
+ ulint i;
+ i = bpage->buf_pool_index;
+ ut_ad(i < srv_buf_pool_instances);
+ return(&buf_pool_ptr[i]);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given a block instance
+@return buf_pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_block(
+/*================*/
+ const buf_block_t* block) /*!< in: block */
+{
+ return(buf_pool_from_bpage(&block->page));
+}
+
+/*********************************************************************//**
+Gets the current size of buffer buf_pool in pages.
+@return size in pages*/
+UNIV_INLINE
+ulint
+buf_pool_get_n_pages(void)
+/*======================*/
+{
+ return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+}
+
/********************************************************************//**
Reads the freed_page_clock of a buffer block.
@return freed_page_clock */
@@ -45,7 +108,7 @@ buf_page_get_freed_page_clock(
/*==========================*/
const buf_page_t* bpage) /*!< in: block */
{
- /* This is sometimes read without holding buf_pool_mutex. */
+ /* This is sometimes read without holding buf_pool->mutex. */
return(bpage->freed_page_clock);
}
@@ -72,6 +135,8 @@ buf_page_peek_if_too_old(
/*=====================*/
const buf_page_t* bpage) /*!< in: block to make younger */
{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
if (UNIV_UNLIKELY(buf_pool->freed_page_clock == 0)) {
/* If eviction has not started yet, do not update the
statistics or move blocks in the LRU list. This is
@@ -93,62 +158,12 @@ buf_page_peek_if_too_old(
return((buf_pool->freed_page_clock & ((1UL << 31) - 1))
> ((ulint) bpage->freed_page_clock
+ (buf_pool->curr_size
- * (BUF_LRU_OLD_RATIO_DIV - buf_LRU_old_ratio)
+ * (BUF_LRU_OLD_RATIO_DIV - buf_pool->LRU_old_ratio)
/ (BUF_LRU_OLD_RATIO_DIV * 4))));
}
}
/*********************************************************************//**
-Gets the current size of buffer buf_pool in bytes.
-@return size in bytes */
-UNIV_INLINE
-ulint
-buf_pool_get_curr_size(void)
-/*========================*/
-{
- return(buf_pool->curr_size * UNIV_PAGE_SIZE);
-}
-
-/********************************************************************//**
-Gets the smallest oldest_modification lsn for any page in the pool. Returns
-zero if all modified pages have been flushed to disk.
-@return oldest modification in pool, zero if none */
-UNIV_INLINE
-ib_uint64_t
-buf_pool_get_oldest_modification(void)
-/*==================================*/
-{
- buf_page_t* bpage;
- ib_uint64_t lsn;
-
-try_again:
- //buf_pool_mutex_enter();
- mutex_enter(&flush_list_mutex);
-
- bpage = UT_LIST_GET_LAST(buf_pool->flush_list);
-
- if (bpage == NULL) {
- lsn = 0;
- } else {
- ut_ad(bpage->in_flush_list);
- lsn = bpage->oldest_modification;
- if (lsn == 0) {
- mutex_exit(&flush_list_mutex);
- goto try_again;
- }
- }
-
- //buf_pool_mutex_exit();
- mutex_exit(&flush_list_mutex);
-
- /* The returned answer may be out of date: the flush_list can
- change after the mutex has been released. */
-
- return(lsn);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/*********************************************************************//**
Gets the state of a block.
@return state */
UNIV_INLINE
@@ -300,13 +315,22 @@ buf_page_get_mutex(
/*===============*/
const buf_page_t* bpage) /*!< in: pointer to control block */
{
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+
+ if (/*equivalent to buf_pool_watch_is_sentinel(buf_pool, bpage)*/
+ bpage >= &buf_pool->watch[0]
+ && bpage < &buf_pool->watch[BUF_POOL_WATCH_SIZE]) {
+ /* TODO: this code is the interim. should be confirmed later. */
+ return(&buf_pool->zip_mutex);
+ }
+
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_FREE:
/* ut_error; */ /* optimistic */
return(NULL);
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
- return(&buf_pool_zip_mutex);
+ return(&buf_pool->zip_mutex);
default:
return(&((buf_block_t*) bpage)->mutex);
}
@@ -384,6 +408,7 @@ buf_block_set_file_page(
buf_block_set_state(block, BUF_BLOCK_FILE_PAGE);
block->page.space = space;
block->page.offset = page_no;
+ block->page.space_was_being_deleted = FALSE;
}
/*********************************************************************//**
@@ -414,7 +439,7 @@ Gets the io_fix state of a block.
UNIV_INLINE
enum buf_io_fix
buf_block_get_io_fix(
-/*================*/
+/*=================*/
const buf_block_t* block) /*!< in: pointer to the control block */
{
return(buf_page_get_io_fix(&block->page));
@@ -429,7 +454,10 @@ buf_page_set_io_fix(
buf_page_t* bpage, /*!< in/out: control block */
enum buf_io_fix io_fix) /*!< in: io_fix state */
{
- //ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_DEBUG
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
bpage->io_fix = io_fix;
@@ -457,10 +485,12 @@ buf_page_can_relocate(
/*==================*/
const buf_page_t* bpage) /*!< control block being relocated */
{
- //ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_DEBUG
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
ut_ad(buf_page_in_file(bpage));
- /* optimistic */
//ut_ad(bpage->in_LRU_list);
return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE
@@ -476,8 +506,11 @@ buf_page_is_old(
/*============*/
const buf_page_t* bpage) /*!< in: control block */
{
+#ifdef UNIV_DEBUG
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_ad(buf_page_in_file(bpage));
- //ut_ad(buf_pool_mutex_own()); /* This is used in optimistic */
return(bpage->old);
}
@@ -491,9 +524,12 @@ buf_page_set_old(
buf_page_t* bpage, /*!< in/out: control block */
ibool old) /*!< in: old */
{
+#ifdef UNIV_DEBUG
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+#endif /* UNIV_DEBUG */
ut_a(buf_page_in_file(bpage));
- //ut_ad(buf_pool_mutex_own());
- ut_ad(mutex_own(&LRU_list_mutex));
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+ ut_ad(mutex_own(&buf_pool->LRU_list_mutex));
ut_ad(bpage->in_LRU_list);
#ifdef UNIV_LRU_DEBUG
@@ -539,9 +575,12 @@ buf_page_set_accessed(
buf_page_t* bpage, /*!< in/out: control block */
ulint time_ms) /*!< in: ut_time_ms() */
{
- ut_a(buf_page_in_file(bpage));
- //ut_ad(buf_pool_mutex_own());
+#ifdef UNIV_DEBUG
+ //buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
+ //ut_ad(buf_pool_mutex_own(buf_pool));
+#endif
ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+ ut_a(buf_page_in_file(bpage));
if (!bpage->access_time) {
/* Make this the time of the first access. */
@@ -753,25 +792,6 @@ buf_block_get_lock_hash_val(
}
/********************************************************************//**
-Allocates a buffer block.
-@return own: the allocated block, in state BUF_BLOCK_MEMORY */
-UNIV_INLINE
-buf_block_t*
-buf_block_alloc(
-/*============*/
- ulint zip_size) /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
-{
- buf_block_t* block;
-
- block = buf_LRU_get_free_block(zip_size);
-
- buf_block_set_state(block, BUF_BLOCK_MEMORY);
-
- return(block);
-}
-
-/********************************************************************//**
Frees a buffer block which does not contain a file page. */
UNIV_INLINE
void
@@ -779,7 +799,9 @@ buf_block_free(
/*===========*/
buf_block_t* block) /*!< in, own: block to be freed */
{
- //buf_pool_mutex_enter();
+ //buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+ //buf_pool_mutex_enter(buf_pool);
mutex_enter(&block->mutex);
@@ -789,7 +811,7 @@ buf_block_free(
mutex_exit(&block->mutex);
- //buf_pool_mutex_exit();
+ //buf_pool_mutex_exit(buf_pool);
}
#endif /* !UNIV_HOTBACKUP */
@@ -863,7 +885,9 @@ buf_block_modify_clock_inc(
buf_block_t* block) /*!< in: block */
{
#ifdef UNIV_SYNC_DEBUG
- ut_ad((mutex_own(&LRU_list_mutex)
+ buf_pool_t* buf_pool = buf_pool_from_bpage((buf_page_t*)block);
+
+ ut_ad((mutex_own(&buf_pool->LRU_list_mutex)
&& (block->page.buf_fix_count == 0))
|| rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE));
#endif /* UNIV_SYNC_DEBUG */
@@ -942,29 +966,66 @@ buf_block_buf_fix_dec(
}
/******************************************************************//**
+Returns the buffer pool instance given space and offset of page
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_get(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page within space */
+{
+ ulint fold;
+ ulint index;
+ ulint ignored_offset;
+
+ ignored_offset = offset >> 6; /* 2log of BUF_READ_AHEAD_AREA (64)*/
+ fold = buf_page_address_fold(space, ignored_offset);
+ index = fold % srv_buf_pool_instances;
+ return(&buf_pool_ptr[index]);
+}
+
+/******************************************************************//**
+Returns the buffer pool instance given its array index
+@return buffer pool */
+UNIV_INLINE
+buf_pool_t*
+buf_pool_from_array(
+/*================*/
+ ulint index) /*!< in: array index to get
+ buffer pool instance from */
+{
+ ut_ad(index < MAX_BUFFER_POOLS);
+ ut_ad(index < srv_buf_pool_instances);
+ return(&buf_pool_ptr[index]);
+}
+
+/******************************************************************//**
Returns the control block of a file page, NULL if not found.
@return block, NULL if not found */
UNIV_INLINE
buf_page_t*
-buf_page_hash_get(
-/*==============*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
+buf_page_hash_get_low(
+/*==================*/
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset, /*!< in: offset of the page
+ within space */
+ ulint fold) /*!< in: buf_page_address_fold(
+ space, offset) */
{
buf_page_t* bpage;
- ulint fold;
ut_ad(buf_pool);
- //ut_ad(buf_pool_mutex_own());
+ //ut_ad(buf_pool_mutex_own(buf_pool));
#ifdef UNIV_SYNC_DEBUG
- ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX)
- || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED));
+ ut_ad(rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_EX)
+ || rw_lock_own(&buf_pool->page_hash_latch, RW_LOCK_SHARED));
#endif
+ ut_ad(fold == buf_page_address_fold(space, offset));
/* Look for the page in the hash table */
- fold = buf_page_address_fold(space, offset);
-
HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage,
ut_ad(bpage->in_page_hash && !bpage->in_zip_hash
&& buf_page_in_file(bpage)),
@@ -985,6 +1046,30 @@ buf_page_hash_get(
}
/******************************************************************//**
+Returns the control block of a file page, NULL if not found.
+@return block, NULL if not found or not a real control block */
+UNIV_INLINE
+buf_page_t*
+buf_page_hash_get(
+/*==============*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page
+ within space */
+{
+ buf_page_t* bpage;
+ ulint fold = buf_page_address_fold(space, offset);
+
+ bpage = buf_page_hash_get_low(buf_pool, space, offset, fold);
+
+ if (bpage && buf_pool_watch_is_sentinel(buf_pool, bpage)) {
+ bpage = NULL;
+ }
+
+ return(bpage);
+}
+
+/******************************************************************//**
Returns the control block of a file page, NULL if not found
or an uncompressed page frame does not exist.
@return block, NULL if not found */
@@ -992,10 +1077,16 @@ UNIV_INLINE
buf_block_t*
buf_block_hash_get(
/*===============*/
- ulint space, /*!< in: space id */
- ulint offset) /*!< in: offset of the page within space */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint space, /*!< in: space id */
+ ulint offset) /*!< in: offset of the page
+ within space */
{
- return(buf_page_get_block(buf_page_hash_get(space, offset)));
+ buf_block_t* block;
+
+ block = buf_page_get_block(buf_page_hash_get(buf_pool, space, offset));
+
+ return(block);
}
/********************************************************************//**
@@ -1013,14 +1104,15 @@ buf_page_peek(
ulint offset) /*!< in: page number */
{
const buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, offset);
- //buf_pool_mutex_enter();
- rw_lock_s_lock(&page_hash_latch);
+ //buf_pool_mutex_enter(buf_pool);
+ rw_lock_s_lock(&buf_pool->page_hash_latch);
- bpage = buf_page_hash_get(space, offset);
+ bpage = buf_page_hash_get(buf_pool, space, offset);
- //buf_pool_mutex_exit();
- rw_lock_s_unlock(&page_hash_latch);
+ //buf_pool_mutex_exit(buf_pool);
+ rw_lock_s_unlock(&buf_pool->page_hash_latch);
return(bpage != NULL);
}
@@ -1034,6 +1126,7 @@ buf_page_release_zip(
buf_page_t* bpage) /*!< in: buffer block */
{
buf_block_t* block;
+ buf_pool_t* buf_pool = buf_pool_from_bpage(bpage);
ut_ad(bpage);
ut_a(bpage->buf_fix_count > 0);
@@ -1041,9 +1134,9 @@ buf_page_release_zip(
switch (buf_page_get_state(bpage)) {
case BUF_BLOCK_ZIP_PAGE:
case BUF_BLOCK_ZIP_DIRTY:
- mutex_enter(&buf_pool_zip_mutex);
+ mutex_enter(&buf_pool->zip_mutex);
bpage->buf_fix_count--;
- mutex_exit(&buf_pool_zip_mutex);
+ mutex_exit(&buf_pool->zip_mutex);
return;
case BUF_BLOCK_FILE_PAGE:
block = (buf_block_t*) bpage;
@@ -1062,6 +1155,7 @@ buf_page_release_zip(
break;
}
+
ut_error;
}
@@ -1073,24 +1167,14 @@ void
buf_page_release(
/*=============*/
buf_block_t* block, /*!< in: buffer block */
- ulint rw_latch, /*!< in: RW_S_LATCH, RW_X_LATCH,
+ ulint rw_latch) /*!< in: RW_S_LATCH, RW_X_LATCH,
RW_NO_LATCH */
- mtr_t* mtr __attribute__((unused))) /*!< in: mtr */
{
ut_ad(block);
ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
ut_a(block->page.buf_fix_count > 0);
- /* buf_flush_note_modification() should be called before this function. */
-/*
- if (rw_latch == RW_X_LATCH && mtr->modifications) {
- buf_pool_mutex_enter();
- buf_flush_note_modification(block, mtr);
- buf_pool_mutex_exit();
- }
-*/
-
mutex_enter(&block->mutex);
#ifdef UNIV_SYNC_DEBUG
@@ -1123,4 +1207,71 @@ buf_block_dbg_add_level(
sync_thread_add_level(&block->lock, level);
}
#endif /* UNIV_SYNC_DEBUG */
+/********************************************************************//**
+Acquire mutex on all buffer pool instances. */
+UNIV_INLINE
+void
+buf_pool_mutex_enter_all(void)
+/*==========================*/
+{
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ buf_pool_mutex_enter(buf_pool);
+ }
+}
+
+/********************************************************************//**
+Release mutex on all buffer pool instances. */
+UNIV_INLINE
+void
+buf_pool_mutex_exit_all(void)
+/*=========================*/
+{
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ buf_pool_mutex_exit(buf_pool);
+ }
+}
+
+/********************************************************************//**
+*/
+UNIV_INLINE
+void
+buf_pool_page_hash_x_lock_all(void)
+/*===============================*/
+{
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ rw_lock_x_lock(&buf_pool->page_hash_latch);
+ }
+}
+
+/********************************************************************//**
+*/
+UNIV_INLINE
+void
+buf_pool_page_hash_x_unlock_all(void)
+/*=================================*/
+{
+ ulint i;
+
+ for (i = 0; i < srv_buf_pool_instances; i++) {
+ buf_pool_t* buf_pool;
+
+ buf_pool = buf_pool_from_array(i);
+ rw_lock_x_unlock(&buf_pool->page_hash_latch);
+ }
+}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h
index 2f7108fda1b..036f4389774 100644
--- a/storage/xtradb/include/buf0flu.h
+++ b/storage/xtradb/include/buf0flu.h
@@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
#include "mtr0types.h"
#include "buf0types.h"
+#include "log0log.h"
/********************************************************************//**
Remove a block from the flush list of modified blocks. */
@@ -39,7 +40,7 @@ void
buf_flush_remove(
/*=============*/
buf_page_t* bpage); /*!< in: pointer to the block in question */
-/********************************************************************//**
+/*******************************************************************//**
Relocates a buffer control block on the flush_list.
Note that it is assumed that the contents of bpage has already been
copied to dpage. */
@@ -58,12 +59,21 @@ buf_flush_write_complete(
buf_page_t* bpage); /*!< in: pointer to the block in question */
/*********************************************************************//**
Flushes pages from the end of the LRU list if there is too small
-a margin of replaceable pages there. */
+a margin of replaceable pages there. If buffer pool is NULL it
+means flush free margin on all buffer pool instances. */
UNIV_INTERN
void
buf_flush_free_margin(
-/*=======================*/
- ibool wait);
+/*==================*/
+ buf_pool_t* buf_pool,
+ ibool wait);
+/*********************************************************************//**
+Flushes pages from the end of all the LRU lists. */
+UNIV_INTERN
+void
+buf_flush_free_margins(
+/*=========================*/
+ ibool wait);
#endif /* !UNIV_HOTBACKUP */
/********************************************************************//**
Initializes a page for writing to the tablespace. */
@@ -76,22 +86,46 @@ buf_flush_init_for_writing(
ib_uint64_t newest_lsn); /*!< in: newest modification lsn
to the page */
#ifndef UNIV_HOTBACKUP
+# if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/********************************************************************//**
+Writes a flushable page asynchronously from the buffer pool to a file.
+NOTE: buf_pool->mutex and block->mutex must be held upon entering this
+function, and they will be released by this function after flushing.
+This is loosely based on buf_flush_batch() and buf_flush_page().
+@return TRUE if the page was flushed and the mutexes released */
+UNIV_INTERN
+ibool
+buf_flush_page_try(
+/*===============*/
+ buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
+ buf_block_t* block) /*!< in/out: buffer control block */
+ __attribute__((nonnull, warn_unused_result));
+# endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages!
+This utility flushes dirty blocks from the end of the LRU list.
+NOTE: The calling thread may own latches to pages: to avoid deadlocks,
+this function must be written so that it cannot end up waiting for these
+latches!
@return number of blocks for which the write request was queued;
ULINT_UNDEFINED if there was a flush of the same type already running */
UNIV_INTERN
ulint
-buf_flush_batch(
+buf_flush_LRU(
+/*==========*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint min_n); /*!< in: wished minimum mumber of blocks
+ flushed (it is not guaranteed that the
+ actual number is that big, though) */
+/*******************************************************************//**
+This utility flushes dirty blocks from the end of the flush_list of
+all buffer pool instances.
+NOTE: The calling thread is not allowed to own any latches on pages!
+@return number of blocks for which the write request was queued;
+ULINT_UNDEFINED if there was a flush of the same type already running */
+UNIV_INTERN
+ulint
+buf_flush_list(
/*============*/
- enum buf_flush flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
ulint min_n, /*!< in: wished minimum mumber of blocks
flushed (it is not guaranteed that the
actual number is that big, though) */
@@ -106,7 +140,20 @@ UNIV_INTERN
void
buf_flush_wait_batch_end(
/*=====================*/
- enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
+/******************************************************************//**
+Waits until a flush batch of the given type ends. This is called by
+a thread that only wants to wait for a flush to end but doesn't do
+any flushing itself. */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end_wait_only(
+/*===============================*/
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ enum buf_flush type); /*!< in: BUF_FLUSH_LRU
+ or BUF_FLUSH_LIST */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
modified in it. Puts the block to the list of modified blocks, if it not
@@ -150,8 +197,8 @@ how much redo the workload is generating and at what rate. */
struct buf_flush_stat_struct
{
- ib_uint64_t redo; /*!< amount of redo generated. */
- ulint n_flushed; /*!< number of pages flushed. */
+ ib_uint64_t redo; /**< amount of redo generated. */
+ ulint n_flushed; /**< number of pages flushed. */
};
/** Statistics for selecting flush rate of dirty pages. */
@@ -182,11 +229,12 @@ Validates the flush list.
@return TRUE if ok */
UNIV_INTERN
ibool
-buf_flush_validate(void);
-/*====================*/
+buf_flush_validate(
+/*===============*/
+ buf_pool_t* buf_pool);
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
-/******************************************************************//**
+/********************************************************************//**
Initialize the red-black tree to speed up insertions into the flush_list
during recovery process. Should be called at the start of recovery
process before any page has been read/written. */
@@ -195,7 +243,7 @@ void
buf_flush_init_flush_rbt(void);
/*==========================*/
-/******************************************************************//**
+/********************************************************************//**
Frees up the red-black tree. */
UNIV_INTERN
void
@@ -206,9 +254,10 @@ buf_flush_free_flush_rbt(void);
available to replacement in the free list and at the end of the LRU list (to
make sure that a read-ahead batch can be read efficiently in a single
sweep). */
-#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA)
+#define BUF_FLUSH_FREE_BLOCK_MARGIN(b) (5 + BUF_READ_AHEAD_AREA(b))
/** Extra margin to apply above BUF_FLUSH_FREE_BLOCK_MARGIN */
-#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100)
+#define BUF_FLUSH_EXTRA_MARGIN(b) ((BUF_FLUSH_FREE_BLOCK_MARGIN(b) / 4 \
+ + 100) / srv_buf_pool_instances)
#endif /* !UNIV_HOTBACKUP */
#ifndef UNIV_NONINL
diff --git a/storage/xtradb/include/buf0flu.ic b/storage/xtradb/include/buf0flu.ic
index 4ad0814f344..30e2cc8efe8 100644
--- a/storage/xtradb/include/buf0flu.ic
+++ b/storage/xtradb/include/buf0flu.ic
@@ -33,7 +33,9 @@ UNIV_INTERN
void
buf_flush_insert_into_flush_list(
/*=============================*/
- buf_block_t* block); /*!< in/out: block which is modified */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_block_t* block, /*!< in/out: block which is modified */
+ ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
Inserts a modified block into the flush list in the right sorted position.
This function is used by recovery, because there the modifications do not
@@ -42,7 +44,9 @@ UNIV_INTERN
void
buf_flush_insert_sorted_into_flush_list(
/*====================================*/
- buf_block_t* block); /*!< in/out: block which is modified */
+ buf_pool_t* buf_pool, /*!< buffer pool instance */
+ buf_block_t* block, /*!< in/out: block which is modified */
+ ib_uint64_t lsn); /*!< in: oldest modification */
/********************************************************************//**
This function should be called at a mini-transaction commit, if a page was
@@ -55,15 +59,7 @@ buf_flush_note_modification(
buf_block_t* block, /*!< in: block which is modified */
mtr_t* mtr) /*!< in: mtr */
{
- ibool use_LRU_mutex = FALSE;
-
- if (UT_LIST_GET_LEN(buf_pool->unzip_LRU))
- use_LRU_mutex = TRUE;
-
- if (use_LRU_mutex)
- mutex_enter(&LRU_list_mutex);
-
- mutex_enter(&block->mutex);
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
@@ -71,22 +67,22 @@ buf_flush_note_modification(
#ifdef UNIV_SYNC_DEBUG
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- //ut_ad(buf_pool_mutex_own());
+
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
+ ut_ad(log_flush_order_mutex_own());
ut_ad(mtr->start_lsn != 0);
ut_ad(mtr->modifications);
+
+ mutex_enter(&block->mutex);
ut_ad(block->page.newest_modification <= mtr->end_lsn);
block->page.newest_modification = mtr->end_lsn;
if (!block->page.oldest_modification) {
- mutex_enter(&flush_list_mutex);
-
- block->page.oldest_modification = mtr->start_lsn;
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_into_flush_list(block);
- mutex_exit(&flush_list_mutex);
+ buf_flush_insert_into_flush_list(
+ buf_pool, block, mtr->start_lsn);
} else {
ut_ad(block->page.oldest_modification <= mtr->start_lsn);
}
@@ -94,9 +90,6 @@ buf_flush_note_modification(
mutex_exit(&block->mutex);
++srv_buf_pool_write_requests;
-
- if (use_LRU_mutex)
- mutex_exit(&LRU_list_mutex);
}
/********************************************************************//**
@@ -111,15 +104,7 @@ buf_flush_recv_note_modification(
ib_uint64_t end_lsn) /*!< in: end lsn of the last mtr in the
set of mtr's */
{
- ibool use_LRU_mutex = FALSE;
-
- if(UT_LIST_GET_LEN(buf_pool->unzip_LRU))
- use_LRU_mutex = TRUE;
-
- if (use_LRU_mutex)
- mutex_enter(&LRU_list_mutex);
-
- mutex_enter(&(block->mutex));
+ buf_pool_t* buf_pool = buf_pool_from_block(block);
ut_ad(block);
ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
@@ -128,28 +113,24 @@ buf_flush_recv_note_modification(
ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- //buf_pool_mutex_enter();
+ ut_ad(!buf_pool_mutex_own(buf_pool));
+ ut_ad(!buf_flush_list_mutex_own(buf_pool));
+ ut_ad(log_flush_order_mutex_own());
+ ut_ad(start_lsn != 0);
ut_ad(block->page.newest_modification <= end_lsn);
+ mutex_enter(&block->mutex);
block->page.newest_modification = end_lsn;
if (!block->page.oldest_modification) {
- mutex_enter(&flush_list_mutex);
-
- block->page.oldest_modification = start_lsn;
-
- ut_ad(block->page.oldest_modification != 0);
-
- buf_flush_insert_sorted_into_flush_list(block);
- mutex_exit(&flush_list_mutex);
+ buf_flush_insert_sorted_into_flush_list(
+ buf_pool, block, start_lsn);
} else {
ut_ad(block->page.oldest_modification <= start_lsn);
}
- //buf_pool_mutex_exit();
- if (use_LRU_mutex)
- mutex_exit(&LRU_list_mutex);
- mutex_exit(&(block->mutex));
+ mutex_exit(&block->mutex);
+
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h
index d3b59e8b579..5d004ec4bb7 100644
--- a/storage/xtradb/include/buf0lru.h
+++ b/storage/xtradb/include/buf0lru.h
@@ -52,8 +52,9 @@ operations need new buffer blocks, and the i/o work done in flushing would be
wasted. */
UNIV_INTERN
void
-buf_LRU_try_free_flushed_blocks(void);
-/*==================================*/
+buf_LRU_try_free_flushed_blocks(
+/*============================*/
+ buf_pool_t* buf_pool); /*!< in: buffer pool instance */
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
used in heuristics to prevent huge transactions eating up the whole buffer
@@ -72,7 +73,7 @@ These are low-level functions
#define BUF_LRU_OLD_MIN_LEN 512 /* 8 megabytes of 16k pages */
/** Maximum LRU list search length in buf_flush_LRU_recommendation() */
-#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA)
+#define BUF_LRU_FREE_SEARCH_LEN(b) (5 + 2 * BUF_READ_AHEAD_AREA(b))
/******************************************************************//**
Invalidates all pages belonging to a given tablespace when we are deleting
@@ -84,6 +85,13 @@ void
buf_LRU_invalidate_tablespace(
/*==========================*/
ulint id); /*!< in: space id */
+/******************************************************************//**
+*/
+UNIV_INTERN
+void
+buf_LRU_mark_space_was_deleted(
+/*===========================*/
+ ulint id); /*!< in: space id */
/********************************************************************//**
Insert a compressed block into buf_pool->zip_clean in the LRU order. */
UNIV_INTERN
@@ -97,10 +105,10 @@ Try to free a block. If bpage is a descriptor of a compressed-only
page, the descriptor object will be freed as well.
NOTE: If this function returns BUF_LRU_FREED, it will temporarily
-release buf_pool_mutex. Furthermore, the page frame will no longer be
+release buf_pool->mutex. Furthermore, the page frame will no longer be
accessible via bpage.
-The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and
+The caller must hold buf_pool->mutex and buf_page_get_mutex(bpage) and
release these two mutexes after the call. No other
buf_page_get_mutex() may be held when calling this function.
@return BUF_LRU_FREED if freed, BUF_LRU_CANNOT_RELOCATE or
@@ -112,11 +120,8 @@ buf_LRU_free_block(
buf_page_t* bpage, /*!< in: block to be freed */
ibool zip, /*!< in: TRUE if should remove also the
compressed page of an uncompressed page */
- ibool* buf_pool_mutex_released,
- /*!< in: pointer to a variable that will
- be assigned TRUE if buf_pool_mutex
- was temporarily released, or NULL */
- ibool have_LRU_mutex);
+ ibool have_LRU_mutex)
+ __attribute__((nonnull));
/******************************************************************//**
Try to free a replaceable block.
@return TRUE if found and freed */
@@ -124,22 +129,26 @@ UNIV_INTERN
ibool
buf_LRU_search_and_free_block(
/*==========================*/
- ulint n_iterations); /*!< in: how many times this has been called
- repeatedly without result: a high value means
- that we should search farther; if
- n_iterations < 10, then we search
- n_iterations / 10 * buf_pool->curr_size
- pages from the end of the LRU list; if
- n_iterations < 5, then we will also search
- n_iterations / 5 of the unzip_LRU list. */
+ buf_pool_t* buf_pool, /*!< in: buffer pool instance */
+ ulint n_iterations); /*!< in: how many times this has
+ been called repeatedly without
+ result: a high value means that
+ we should search farther; if
+ n_iterations < 10, then we search
+ n_iterations / 10 * buf_pool->curr_size
+ pages from the end of the LRU list; if
+ n_iterations < 5, then we will
+ also search n_iterations / 5
+ of the unzip_LRU list. */
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, returns NULL.
@return a free control block, or NULL if the buf_block->free list is empty */
UNIV_INTERN
buf_block_t*
-buf_LRU_get_free_only(void);
-/*=======================*/
+buf_LRU_get_free_only(
+/*==================*/
+ buf_pool_t* buf_pool); /*!< buffer pool instance */
/******************************************************************//**
Returns a free block from the buf_pool. The block is taken off the
free list. If it is empty, blocks are moved from the end of the
@@ -149,8 +158,8 @@ UNIV_INTERN
buf_block_t*
buf_LRU_get_free_block(
/*===================*/
- ulint zip_size); /*!< in: compressed page size in bytes,
- or 0 if uncompressed tablespace */
+ buf_pool_t* buf_pool) /*!< in/out: buffer pool instance */
+ __attribute__((nonnull,warn_unused_result));
/******************************************************************//**
Puts a block back to the free list. */
@@ -198,7 +207,7 @@ buf_LRU_make_block_old(
Updates buf_LRU_old_ratio.
@return updated old_pct */
UNIV_INTERN
-uint
+ulint
buf_LRU_old_ratio_update(
/*=====================*/
uint old_pct,/*!< in: Reserve this percentage of
@@ -246,7 +255,7 @@ buf_LRU_print(void);
/** @name Heuristics for detecting index scan @{ */
/** Reserve this much/BUF_LRU_OLD_RATIO_DIV of the buffer pool for
-"old" blocks. Protected by buf_pool_mutex. */
+"old" blocks. Protected by buf_pool->mutex. */
extern uint buf_LRU_old_ratio;
/** The denominator of buf_LRU_old_ratio. */
#define BUF_LRU_OLD_RATIO_DIV 1024
@@ -292,7 +301,7 @@ Cleared by buf_LRU_stat_update(). */
extern buf_LRU_stat_t buf_LRU_stat_cur;
/** Running sum of past values of buf_LRU_stat_cur.
-Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */
+Updated by buf_LRU_stat_update(). Protected by buf_pool->mutex. */
extern buf_LRU_stat_t buf_LRU_stat_sum;
/********************************************************************//**
diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h
index 56d3d24a3b7..4910c1e2aca 100644
--- a/storage/xtradb/include/buf0rea.h
+++ b/storage/xtradb/include/buf0rea.h
@@ -103,10 +103,10 @@ UNIV_INTERN
ulint
buf_read_ahead_linear(
/*==================*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint offset, /*!< in: page number of a page; NOTE: the current thread
- must want access to this page (see NOTE 3 above) */
+ ulint space, /*!< in: space id */
+ ulint zip_size, /*!< in: compressed page size in bytes, or 0 */
+ ulint offset, /*!< in: page number; see NOTE 3 above */
+ ibool inside_ibuf, /*!< in: TRUE if we are inside ibuf routine */
trx_t* trx);
/********************************************************************//**
Issues read requests for pages which the ibuf module wants to read in, in
@@ -158,7 +158,7 @@ buf_read_recv_pages(
/** The size in pages of the area which the read-ahead algorithms read if
invoked */
-#define BUF_READ_AHEAD_AREA 64
+#define BUF_READ_AHEAD_AREA(b) 64
/** @name Modes used in read-ahead @{ */
/** read only pages belonging to the insert buffer tree */
diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h
index 507f1543bbb..888be86e332 100644
--- a/storage/xtradb/include/buf0types.h
+++ b/storage/xtradb/include/buf0types.h
@@ -36,6 +36,8 @@ typedef struct buf_chunk_struct buf_chunk_t;
typedef struct buf_pool_struct buf_pool_t;
/** Buffer pool statistics struct */
typedef struct buf_pool_stat_struct buf_pool_stat_t;
+/** Buffer pool buddy statistics struct */
+typedef struct buf_buddy_stat_struct buf_buddy_stat_t;
/** A buffer frame. @see page_t */
typedef byte buf_frame_t;
@@ -76,7 +78,7 @@ enum buf_io_fix {
/** twice the maximum block size of the buddy system;
the underlying memory is aligned by this amount:
this must be equal to UNIV_PAGE_SIZE */
-#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
+#define BUF_BUDDY_HIGH ((ulint)BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
#endif
diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h
index f9fce3f3657..f7bdd29ed90 100644
--- a/storage/xtradb/include/data0data.h
+++ b/storage/xtradb/include/data0data.h
@@ -154,14 +154,19 @@ dfield_dup(
dfield_t* field, /*!< in/out: data field */
mem_heap_t* heap); /*!< in: memory heap where allocated */
/*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return TRUE if equal */
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return TRUE if both fields are NULL or if they are equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
const dfield_t* field1, /*!< in: field */
- const dfield_t* field2);/*!< in: field */
+ const dfield_t* field2, /*!< in: field */
+ ulint len) /*!< in: maximum prefix to compare,
+ or 0 to compare the whole field length */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Tests if dfield data length and content is equal to the given.
@return TRUE if equal */
@@ -309,7 +314,7 @@ dtuple_fold(
ulint n_fields,/*!< in: number of complete fields to fold */
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
- dulint tree_id)/*!< in: index tree id */
+ index_id_t tree_id)/*!< in: index tree id */
__attribute__((pure));
/*******************************************************************//**
Sets types of fields binary in a tuple. */
diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic
index da79aa33702..5c0f8039c80 100644
--- a/storage/xtradb/include/data0data.ic
+++ b/storage/xtradb/include/data0data.ic
@@ -229,20 +229,30 @@ dfield_dup(
}
/*********************************************************************//**
-Tests if data length and content is equal for two dfields.
-@return TRUE if equal */
+Tests if two data fields are equal.
+If len==0, tests the data length and content for equality.
+If len>0, tests the first len bytes of the content for equality.
+@return TRUE if both fields are NULL or if they are equal */
UNIV_INLINE
ibool
dfield_datas_are_binary_equal(
/*==========================*/
const dfield_t* field1, /*!< in: field */
- const dfield_t* field2) /*!< in: field */
+ const dfield_t* field2, /*!< in: field */
+ ulint len) /*!< in: maximum prefix to compare,
+ or 0 to compare the whole field length */
{
- ulint len;
+ ulint len2 = len;
- len = field1->len;
+ if (field1->len == UNIV_SQL_NULL || len == 0 || field1->len < len) {
+ len = field1->len;
+ }
+
+ if (field2->len == UNIV_SQL_NULL || len2 == 0 || field2->len < len2) {
+ len2 = field2->len;
+ }
- return(len == field2->len
+ return(len == len2
&& (len == UNIV_SQL_NULL
|| !memcmp(field1->data, field2->data, len)));
}
@@ -518,7 +528,7 @@ dtuple_fold(
ulint n_fields,/*!< in: number of complete fields to fold */
ulint n_bytes,/*!< in: number of bytes to fold in an
incomplete last field */
- dulint tree_id)/*!< in: index tree id */
+ index_id_t tree_id)/*!< in: index tree id */
{
const dfield_t* field;
ulint i;
@@ -530,7 +540,7 @@ dtuple_fold(
ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N);
ut_ad(dtuple_check_typed(tuple));
- fold = ut_fold_dulint(tree_id);
+ fold = ut_fold_ull(tree_id);
for (i = 0; i < n_fields; i++) {
field = dtuple_get_nth_field(tuple, i);
diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h
index a73bed3a9f5..d7fa0b9cd44 100644
--- a/storage/xtradb/include/data0type.h
+++ b/storage/xtradb/include/data0type.h
@@ -128,7 +128,7 @@ columns, and for them the precise type is usually not used at all.
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
be less than 256 */
-#define DATA_ROW_ID 0 /* row id: a dulint */
+#define DATA_ROW_ID 0 /* row id: a 48-bit integer */
#define DATA_ROW_ID_LEN 6 /* stored length for row id */
#define DATA_TRX_ID 1 /* transaction id: 6 bytes */
@@ -168,6 +168,20 @@ SQL null*/
store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
+/* Maximum multi-byte character length in bytes, plus 1 */
+#define DATA_MBMAX 5
+
+/* Pack mbminlen, mbmaxlen to mbminmaxlen. */
+#define DATA_MBMINMAXLEN(mbminlen, mbmaxlen) \
+ ((mbmaxlen) * DATA_MBMAX + (mbminlen))
+/* Get mbminlen from mbminmaxlen. Cast the result of UNIV_EXPECT to ulint
+because in GCC it returns a long. */
+#define DATA_MBMINLEN(mbminmaxlen) ((ulint) \
+ UNIV_EXPECT(((mbminmaxlen) % DATA_MBMAX), \
+ 1))
+/* Get mbmaxlen from mbminmaxlen. */
+#define DATA_MBMAXLEN(mbminmaxlen) ((ulint) ((mbminmaxlen) / DATA_MBMAX))
+
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
Gets the MySQL type code from a dtype.
@@ -187,10 +201,8 @@ ulint
dtype_get_at_most_n_mbchars(
/*========================*/
ulint prtype, /*!< in: precise type */
- ulint mbminlen, /*!< in: minimum length of a
- multi-byte character */
- ulint mbmaxlen, /*!< in: maximum length of a
- multi-byte character */
+ ulint mbminmaxlen, /*!< in: minimum and maximum length of
+ a multi-byte character */
ulint prefix_len, /*!< in: length of the requested
prefix, in characters, multiplied by
dtype_get_mbmaxlen(dtype) */
@@ -335,6 +347,19 @@ dtype_get_mbmaxlen(
/*===============*/
const dtype_t* type); /*!< in: type */
/*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+ dtype_t* type, /*!< in/out: type */
+ ulint mbminlen, /*!< in: minimum length of a char,
+ in bytes, or 0 if this is not
+ a character type */
+ ulint mbmaxlen); /*!< in: maximum length of a char,
+ in bytes, or 0 if this is not
+ a character type */
+/*********************************************************************//**
Gets the padding character code for the type.
@return padding character code, or ULINT_UNDEFINED if no padding specified */
UNIV_INLINE
@@ -354,8 +379,8 @@ dtype_get_fixed_size_low(
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
+ ulint mbminmaxlen, /*!< in: minimum and maximum length of a
+ multibyte character, in bytes */
ulint comp); /*!< in: nonzero=ROW_FORMAT=COMPACT */
#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
@@ -368,8 +393,8 @@ dtype_get_min_size_low(
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen); /*!< in: maximum length of a multibyte char */
+ ulint mbminmaxlen); /*!< in: minimum and maximum length of a
+ multibyte character */
/***********************************************************************//**
Returns the maximum size of a data type. Note: types in system tables may be
incomplete and return incorrect information.
@@ -472,10 +497,11 @@ struct dtype_struct{
the string, MySQL uses 1 or 2
bytes to store the string length) */
#ifndef UNIV_HOTBACKUP
- unsigned mbminlen:2; /*!< minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /*!< maximum length of a
- character, in bytes */
+ unsigned mbminmaxlen:5; /*!< minimum and maximum length of a
+ character, in bytes;
+ DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+ mbminlen=DATA_MBMINLEN(mbminmaxlen);
+ mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
#endif /* !UNIV_HOTBACKUP */
};
diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic
index 2bf67a941bd..757dd815c5e 100644
--- a/storage/xtradb/include/data0type.ic
+++ b/storage/xtradb/include/data0type.ic
@@ -93,14 +93,35 @@ dtype_get_mblen(
innobase_get_cset_width(dtype_get_charset_coll(prtype),
mbminlen, mbmaxlen);
ut_ad(*mbminlen <= *mbmaxlen);
- ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */
- ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */
+ ut_ad(*mbminlen < DATA_MBMAX);
+ ut_ad(*mbmaxlen < DATA_MBMAX);
} else {
*mbminlen = *mbmaxlen = 0;
}
}
/*********************************************************************//**
+Sets the minimum and maximum length of a character, in bytes. */
+UNIV_INLINE
+void
+dtype_set_mbminmaxlen(
+/*==================*/
+ dtype_t* type, /*!< in/out: type */
+ ulint mbminlen, /*!< in: minimum length of a char,
+ in bytes, or 0 if this is not
+ a character type */
+ ulint mbmaxlen) /*!< in: maximum length of a char,
+ in bytes, or 0 if this is not
+ a character type */
+{
+ ut_ad(mbminlen < DATA_MBMAX);
+ ut_ad(mbmaxlen < DATA_MBMAX);
+ ut_ad(mbminlen <= mbmaxlen);
+
+ type->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+
+/*********************************************************************//**
Compute the mbminlen and mbmaxlen members of a data type structure. */
UNIV_INLINE
void
@@ -112,8 +133,7 @@ dtype_set_mblen(
ulint mbmaxlen;
dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen);
- type->mbminlen = mbminlen;
- type->mbmaxlen = mbmaxlen;
+ dtype_set_mbminmaxlen(type, mbminlen, mbmaxlen);
ut_ad(dtype_validate(type));
}
@@ -210,7 +230,7 @@ dtype_get_mbminlen(
const dtype_t* type) /*!< in: type */
{
ut_ad(type);
- return(type->mbminlen);
+ return(DATA_MBMINLEN(type->mbminmaxlen));
}
/*********************************************************************//**
Gets the maximum length of a character, in bytes.
@@ -223,7 +243,7 @@ dtype_get_mbmaxlen(
const dtype_t* type) /*!< in: type */
{
ut_ad(type);
- return(type->mbmaxlen);
+ return(DATA_MBMAXLEN(type->mbminmaxlen));
}
/*********************************************************************//**
@@ -404,8 +424,8 @@ dtype_get_fixed_size_low(
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen, /*!< in: maximum length of a multibyte char */
+ ulint mbminmaxlen, /*!< in: minimum and maximum length of
+ a multibyte character, in bytes */
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
switch (mtype) {
@@ -453,8 +473,9 @@ dtype_get_fixed_size_low(
dtype_get_charset_coll(prtype),
&i_mbminlen, &i_mbmaxlen);
- if (UNIV_UNLIKELY(mbminlen != i_mbminlen)
- || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) {
+ if (UNIV_UNLIKELY
+ (DATA_MBMINMAXLEN(i_mbminlen, i_mbmaxlen)
+ != mbminmaxlen)) {
ut_print_timestamp(stderr);
fprintf(stderr, " InnoDB: "
@@ -464,10 +485,10 @@ dtype_get_fixed_size_low(
"type->mbmaxlen=%lu\n",
(ulong) i_mbminlen,
(ulong) i_mbmaxlen,
- (ulong) mbminlen,
- (ulong) mbmaxlen);
+ (ulong) DATA_MBMINLEN(mbminmaxlen),
+ (ulong) DATA_MBMAXLEN(mbminmaxlen));
}
- if (mbminlen == mbmaxlen) {
+ if (i_mbminlen == i_mbmaxlen) {
return(len);
}
}
@@ -499,8 +520,8 @@ dtype_get_min_size_low(
ulint mtype, /*!< in: main type */
ulint prtype, /*!< in: precise type */
ulint len, /*!< in: length */
- ulint mbminlen, /*!< in: minimum length of a multibyte char */
- ulint mbmaxlen) /*!< in: maximum length of a multibyte char */
+ ulint mbminmaxlen) /*!< in: minimum and maximum length of a
+ multi-byte character */
{
switch (mtype) {
case DATA_SYS:
@@ -527,14 +548,22 @@ dtype_get_min_size_low(
case DATA_DOUBLE:
return(len);
case DATA_MYSQL:
- if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) {
+ if (prtype & DATA_BINARY_TYPE) {
return(len);
+ } else {
+ ulint mbminlen = DATA_MBMINLEN(mbminmaxlen);
+ ulint mbmaxlen = DATA_MBMAXLEN(mbminmaxlen);
+
+ if (mbminlen == mbmaxlen) {
+ return(len);
+ }
+
+ /* this is a variable-length character set */
+ ut_a(mbminlen > 0);
+ ut_a(mbmaxlen > mbminlen);
+ ut_a(len % mbmaxlen == 0);
+ return(len * mbminlen / mbmaxlen);
}
- /* this is a variable-length character set */
- ut_a(mbminlen > 0);
- ut_a(mbmaxlen > mbminlen);
- ut_a(len % mbmaxlen == 0);
- return(len * mbminlen / mbmaxlen);
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
@@ -595,9 +624,9 @@ dtype_get_sql_null_size(
{
#ifndef UNIV_HOTBACKUP
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- type->mbminlen, type->mbmaxlen, comp));
+ type->mbminmaxlen, comp));
#else /* !UNIV_HOTBACKUP */
return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len,
- 0, 0, 0));
+ 0, 0));
#endif /* !UNIV_HOTBACKUP */
}
diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h
index c7fa6d2a444..74a2354bce3 100644
--- a/storage/xtradb/include/db0err.h
+++ b/storage/xtradb/include/db0err.h
@@ -94,9 +94,24 @@ enum db_err {
DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY
was found to be NULL */
+
+ DB_STATS_DO_NOT_EXIST, /* an operation that requires the
+ persistent storage, used for recording
+ table and index statistics, was
+ requested but this storage does not
+ exist itself or the stats for a given
+ table do not exist */
DB_FOREIGN_EXCEED_MAX_CASCADE, /* Foreign key constraint related
cascading delete/update exceeds
maximum allowed depth */
+ DB_CHILD_NO_INDEX, /* the child (foreign) table does not
+ have an index that contains the
+ foreign keys as its prefix columns */
+ DB_PARENT_NO_INDEX, /* the parent table does not
+ have an index that contains the
+ foreign keys as its prefix columns */
+ DB_TOO_BIG_INDEX_COL, /* index column size exceeds maximum
+ limit */
/* The following are partial failure codes */
DB_FAIL = 1000,
diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h
index 9239e031a7f..c8a09ca7de4 100644
--- a/storage/xtradb/include/dict0boot.h
+++ b/storage/xtradb/include/dict0boot.h
@@ -51,32 +51,35 @@ UNIV_INTERN
void
dict_hdr_get_new_id(
/*================*/
- dulint* table_id, /*!< out: table id (not assigned if NULL) */
- dulint* index_id, /*!< out: index id (not assigned if NULL) */
- ulint* space_id); /*!< out: space id (not assigned if NULL) */
+ table_id_t* table_id, /*!< out: table id
+ (not assigned if NULL) */
+ index_id_t* index_id, /*!< out: index id
+ (not assigned if NULL) */
+ ulint* space_id); /*!< out: space id
+ (not assigned if NULL) */
/**********************************************************************//**
Returns a new row id.
@return the new id */
UNIV_INLINE
-dulint
+row_id_t
dict_sys_get_new_row_id(void);
/*=========================*/
/**********************************************************************//**
Reads a row id from a record or other 6-byte stored form.
@return row id */
UNIV_INLINE
-dulint
+row_id_t
dict_sys_read_row_id(
/*=================*/
- byte* field); /*!< in: record field */
+ const byte* field); /*!< in: record field */
/**********************************************************************//**
Writes a row id to a record or other 6-byte stored form. */
UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
- byte* field, /*!< in: record field */
- dulint row_id);/*!< in: row id */
+ byte* field, /*!< in: record field */
+ row_id_t row_id);/*!< in: row id */
/*****************************************************************//**
Initializes the data dictionary memory structures when the database is
started. This function is also called when the data dictionary is created. */
@@ -97,13 +100,13 @@ dict_create(void);
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
/* The ids for the basic system tables and their indexes */
-#define DICT_TABLES_ID ut_dulint_create(0, 1)
-#define DICT_COLUMNS_ID ut_dulint_create(0, 2)
-#define DICT_INDEXES_ID ut_dulint_create(0, 3)
-#define DICT_FIELDS_ID ut_dulint_create(0, 4)
-#define DICT_STATS_ID ut_dulint_create(0, 6)
+#define DICT_TABLES_ID 1
+#define DICT_COLUMNS_ID 2
+#define DICT_INDEXES_ID 3
+#define DICT_FIELDS_ID 4
+#define DICT_STATS_ID 6
/* The following is a secondary index on SYS_TABLES */
-#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5)
+#define DICT_TABLE_IDS_ID 5
#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start
from this number, except for basic
@@ -111,7 +114,7 @@ dict_create(void);
indexes; ibuf tables and indexes are
assigned as the id the number
DICT_IBUF_ID_MIN plus the space id */
-#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0)
+#define DICT_IBUF_ID_MIN 0xFFFFFFFF00000000ULL
/* The offset of the dictionary header on the page */
#define DICT_HDR FSEG_PAGE_DATA
@@ -146,13 +149,14 @@ clustered index */
#define DICT_SYS_INDEXES_NAME_FIELD 4
#define DICT_SYS_STATS_DIFF_VALS_FIELD 4
+#define DICT_SYS_STATS_NON_NULL_VALS_FIELD 5
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
updated */
#define DICT_HDR_ROW_ID_WRITE_MARGIN 256
-#define DICT_HDR_XTRADB_FLAG ut_dulint_create(0x58545241UL,0x44425F31UL) /* "XTRADB_1" */
+#define DICT_HDR_XTRADB_FLAG 0x5854524144425F31ULL /* "XTRADB_1" */
#ifndef UNIV_NONINL
#include "dict0boot.ic"
diff --git a/storage/xtradb/include/dict0boot.ic b/storage/xtradb/include/dict0boot.ic
index d5f372e38c4..d3ba9eee78f 100644
--- a/storage/xtradb/include/dict0boot.ic
+++ b/storage/xtradb/include/dict0boot.ic
@@ -36,22 +36,22 @@ dict_hdr_flush_row_id(void);
Returns a new row id.
@return the new id */
UNIV_INLINE
-dulint
+row_id_t
dict_sys_get_new_row_id(void)
/*=========================*/
{
- dulint id;
+ row_id_t id;
mutex_enter(&(dict_sys->mutex));
id = dict_sys->row_id;
- if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
+ if (0 == (id % DICT_HDR_ROW_ID_WRITE_MARGIN)) {
dict_hdr_flush_row_id();
}
- UT_DULINT_INC(dict_sys->row_id);
+ dict_sys->row_id++;
mutex_exit(&(dict_sys->mutex));
@@ -62,10 +62,10 @@ dict_sys_get_new_row_id(void)
Reads a row id from a record or other 6-byte stored form.
@return row id */
UNIV_INLINE
-dulint
+row_id_t
dict_sys_read_row_id(
/*=================*/
- byte* field) /*!< in: record field */
+ const byte* field) /*!< in: record field */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
@@ -80,8 +80,8 @@ UNIV_INLINE
void
dict_sys_write_row_id(
/*==================*/
- byte* field, /*!< in: record field */
- dulint row_id) /*!< in: row id */
+ byte* field, /*!< in: record field */
+ row_id_t row_id) /*!< in: row id */
{
#if DATA_ROW_ID_LEN != 6
# error "DATA_ROW_ID_LEN != 6"
diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h
index d18b3ecb1b0..d99177e0330 100644
--- a/storage/xtradb/include/dict0dict.h
+++ b/storage/xtradb/include/dict0dict.h
@@ -75,8 +75,8 @@ UNIV_INTERN
dict_table_t*
dict_table_get_on_id(
/*=================*/
- dulint table_id, /*!< in: table id */
- trx_t* trx); /*!< in: transaction handle */
+ table_id_t table_id, /*!< in: table id */
+ trx_t* trx); /*!< in: transaction handle */
/********************************************************************//**
Decrements the count of open MySQL handles to a table. */
UNIV_INTERN
@@ -102,6 +102,33 @@ void
dict_load_space_id_list(void);
/*=========================*/
/*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+ const dict_col_t* col); /*!< in: column */
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+ const dict_col_t* col); /*!< in: column */
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+ dict_col_t* col, /*!< in/out: column */
+ ulint mbminlen, /*!< in: minimum multi-byte
+ character size, in bytes */
+ ulint mbmaxlen); /*!< in: minimum multi-byte
+ character size, in bytes */
+/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
void
@@ -109,6 +136,19 @@ dict_col_copy_type(
/*===============*/
const dict_col_t* col, /*!< in: column */
dtype_t* type); /*!< out: data type */
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+ dict_table_t* table, /*!< in: table */
+ const dict_col_t* col); /*!< in: column which index prefix
+ is based on */
+
#endif /* !UNIV_HOTBACKUP */
#ifdef UNIV_DEBUG
/*********************************************************************//**
@@ -277,7 +317,7 @@ void
dict_table_change_id_in_cache(
/*==========================*/
dict_table_t* table, /*!< in/out: table object already in cache */
- dulint new_id);/*!< in: new id to set */
+ table_id_t new_id);/*!< in: new id to set */
/**********************************************************************//**
Adds a foreign key constraint object to the dictionary cache. May free
the object if there already is an object with the same identifier in.
@@ -318,7 +358,8 @@ void
dict_table_replace_index_in_foreign_list(
/*=====================================*/
dict_table_t* table, /*!< in/out: table */
- dict_index_t* index); /*!< in: index to be replaced */
+ dict_index_t* index, /*!< in: index to be replaced */
+ const trx_t* trx); /*!< in: transaction handle */
/*********************************************************************//**
Checks if a index is defined for a foreign key constraint. Index is a part
of a foreign key constraint if the index is referenced by foreign key
@@ -397,7 +438,7 @@ dict_index_t*
dict_index_get_on_id_low(
/*=====================*/
dict_table_t* table, /*!< in: table */
- dulint index_id); /*!< in: index id */
+ index_id_t index_id); /*!< in: index id */
/**********************************************************************//**
Checks if a table is in the dictionary cache.
@return table, NULL if not found */
@@ -413,6 +454,18 @@ function.
@return table, NULL if not found */
UNIV_INLINE
dict_table_t*
+dict_table_get_low_ignore_err(
+/*===========================*/
+ const char* table_name, /*!< in: table name */
+ dict_err_ignore_t
+ ignore_err); /*!< in: error to be ignored when
+ loading a table definition */
+/**********************************************************************//**
+Gets a table; loads it to the dictionary cache if necessary. A low-level
+function.
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
dict_table_get_low(
/*===============*/
const char* table_name); /*!< in: table name */
@@ -423,7 +476,7 @@ UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
- dulint table_id); /*!< in: table id */
+ table_id_t table_id); /*!< in: table id */
/**********************************************************************//**
Find an index that is equivalent to the one passed in and is not marked
for deletion.
@@ -726,7 +779,7 @@ UNIV_INTERN
dict_index_t*
dict_index_find_on_id_low(
/*======================*/
- dulint id); /*!< in: index id */
+ index_id_t id); /*!< in: index id */
/**********************************************************************//**
Adds an index to the dictionary cache.
@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
@@ -917,7 +970,7 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache_low(
/*===========================*/
- dulint index_id); /*!< in: index id */
+ index_id_t index_id); /*!< in: index id */
#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
/**********************************************************************//**
Returns an index object if it is found in the dictionary cache.
@@ -926,7 +979,7 @@ UNIV_INTERN
dict_index_t*
dict_index_get_if_in_cache(
/*=======================*/
- dulint index_id); /*!< in: index id */
+ index_id_t index_id); /*!< in: index id */
#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
#ifdef UNIV_DEBUG
/**********************************************************************//**
@@ -1053,21 +1106,21 @@ Calculates new estimates for table and index statistics. The statistics
are used in query optimization. */
UNIV_INTERN
void
-dict_update_statistics_low(
-/*=======================*/
+dict_update_statistics(
+/*===================*/
dict_table_t* table, /*!< in/out: table */
- ibool has_dict_mutex, /*!< in: TRUE if the caller has the
- dictionary mutex */
+ ibool only_calc_if_missing_stats, /*!< in: only
+ update/recalc the stats if they have
+ not been initialized yet, otherwise
+ do nothing */
ibool sync);
/*********************************************************************//**
-Calculates new estimates for table and index statistics. The statistics
-are used in query optimization. */
+*/
UNIV_INTERN
-void
-dict_update_statistics(
-/*===================*/
- dict_table_t* table, /*!< in/out: table */
- ibool sync);
+ibool
+dict_is_older_statistics(
+/*=====================*/
+ dict_index_t* index);
/********************************************************************//**
Reserves the dictionary system mutex for MySQL. */
UNIV_INTERN
@@ -1081,21 +1134,25 @@ void
dict_mutex_exit_for_mysql(void);
/*===========================*/
/**********************************************************************//**
-Lock the appropriate mutex to protect index->stat_n_diff_key_vals[].
-index->id is used to pick the right mutex and it should not change
-before dict_index_stat_mutex_exit() is called on this index. */
+Lock the appropriate latch to protect a given table's statistics.
+table->id is used to pick the corresponding latch from a global array of
+latches. */
UNIV_INTERN
void
-dict_index_stat_mutex_enter(
-/*========================*/
- const dict_index_t* index); /*!< in: index */
+dict_table_stats_lock(
+/*==================*/
+ const dict_table_t* table, /*!< in: table */
+ ulint latch_mode); /*!< in: RW_S_LATCH or
+ RW_X_LATCH */
/**********************************************************************//**
-Unlock the appropriate mutex that protects index->stat_n_diff_key_vals[]. */
+Unlock the latch that has been locked by dict_table_stats_lock() */
UNIV_INTERN
void
-dict_index_stat_mutex_exit(
-/*=======================*/
- const dict_index_t* index); /*!< in: index */
+dict_table_stats_unlock(
+/*====================*/
+ const dict_table_t* table, /*!< in: table */
+ ulint latch_mode); /*!< in: RW_S_LATCH or
+ RW_X_LATCH */
/********************************************************************//**
Checks if the database name in two table names is the same.
@return TRUE if same db name */
@@ -1159,7 +1216,7 @@ struct dict_sys_struct{
and DROP TABLE, as well as reading
the dictionary data for a table from
system tables */
- dulint row_id; /*!< the next row id to assign;
+ row_id_t row_id; /*!< the next row id to assign;
NOTE that at a checkpoint this
must be written to the dict system
header and flushed to a file; in
diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic
index bd7534dc7e2..b03f5117295 100644
--- a/storage/xtradb/include/dict0dict.ic
+++ b/storage/xtradb/include/dict0dict.ic
@@ -29,6 +29,46 @@ Created 1/8/1996 Heikki Tuuri
#include "rem0types.h"
/*********************************************************************//**
+Gets the minimum number of bytes per character.
+@return minimum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbminlen(
+/*==================*/
+ const dict_col_t* col) /*!< in: column */
+{
+ return(DATA_MBMINLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Gets the maximum number of bytes per character.
+@return maximum multi-byte char size, in bytes */
+UNIV_INLINE
+ulint
+dict_col_get_mbmaxlen(
+/*==================*/
+ const dict_col_t* col) /*!< in: column */
+{
+ return(DATA_MBMAXLEN(col->mbminmaxlen));
+}
+/*********************************************************************//**
+Sets the minimum and maximum number of bytes per character. */
+UNIV_INLINE
+void
+dict_col_set_mbminmaxlen(
+/*=====================*/
+ dict_col_t* col, /*!< in/out: column */
+ ulint mbminlen, /*!< in: minimum multi-byte
+ character size, in bytes */
+ ulint mbmaxlen) /*!< in: minimum multi-byte
+ character size, in bytes */
+{
+ ut_ad(mbminlen < DATA_MBMAX);
+ ut_ad(mbmaxlen < DATA_MBMAX);
+ ut_ad(mbminlen <= mbmaxlen);
+
+ col->mbminmaxlen = DATA_MBMINMAXLEN(mbminlen, mbmaxlen);
+}
+/*********************************************************************//**
Gets the column data type. */
UNIV_INLINE
void
@@ -42,8 +82,7 @@ dict_col_copy_type(
type->mtype = col->mtype;
type->prtype = col->prtype;
type->len = col->len;
- type->mbminlen = col->mbminlen;
- type->mbmaxlen = col->mbmaxlen;
+ type->mbminmaxlen = col->mbminmaxlen;
}
#endif /* !UNIV_HOTBACKUP */
@@ -65,8 +104,7 @@ dict_col_type_assert_equal(
ut_ad(col->prtype == type->prtype);
ut_ad(col->len == type->len);
# ifndef UNIV_HOTBACKUP
- ut_ad(col->mbminlen == type->mbminlen);
- ut_ad(col->mbmaxlen == type->mbmaxlen);
+ ut_ad(col->mbminmaxlen == type->mbminmaxlen);
# endif /* !UNIV_HOTBACKUP */
return(TRUE);
@@ -84,7 +122,7 @@ dict_col_get_min_size(
const dict_col_t* col) /*!< in: column */
{
return(dtype_get_min_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen));
+ col->mbminmaxlen));
}
/***********************************************************************//**
Returns the maximum size of the column.
@@ -109,7 +147,7 @@ dict_col_get_fixed_size(
ulint comp) /*!< in: nonzero=ROW_FORMAT=COMPACT */
{
return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len,
- col->mbminlen, col->mbmaxlen, comp));
+ col->mbminmaxlen, comp));
}
/***********************************************************************//**
Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column.
@@ -797,6 +835,34 @@ dict_table_check_if_in_cache_low(
}
/**********************************************************************//**
+load a table into dictionary cache, ignore any error specified during load;
+@return table, NULL if not found */
+UNIV_INLINE
+dict_table_t*
+dict_table_get_low_ignore_err(
+/*==========================*/
+ const char* table_name, /*!< in: table name */
+ dict_err_ignore_t
+ ignore_err) /*!< in: error to be ignored when
+ loading a table definition */
+{
+ dict_table_t* table;
+
+ ut_ad(table_name);
+ ut_ad(mutex_own(&(dict_sys->mutex)));
+
+ table = dict_table_check_if_in_cache_low(table_name);
+
+ if (table == NULL) {
+ table = dict_load_table(table_name, TRUE, ignore_err);
+ }
+
+ ut_ad(!table || table->cached);
+
+ return(table);
+}
+
+/**********************************************************************//**
Gets a table; loads it to the dictionary cache if necessary. A low-level
function.
@return table, NULL if not found */
@@ -814,7 +880,7 @@ dict_table_get_low(
table = dict_table_check_if_in_cache_low(table_name);
if (table == NULL) {
- table = dict_load_table(table_name);
+ table = dict_load_table(table_name, TRUE, DICT_ERR_IGNORE_NONE);
}
ut_ad(!table || table->cached);
@@ -829,7 +895,7 @@ UNIV_INLINE
dict_table_t*
dict_table_get_on_id_low(
/*=====================*/
- dulint table_id) /*!< in: table id */
+ table_id_t table_id) /*!< in: table id */
{
dict_table_t* table;
ulint fold;
@@ -837,11 +903,11 @@ dict_table_get_on_id_low(
ut_ad(mutex_own(&(dict_sys->mutex)));
/* Look for the table name in the hash table */
- fold = ut_fold_dulint(table_id);
+ fold = ut_fold_ull(table_id);
HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold,
dict_table_t*, table, ut_ad(table->cached),
- !ut_dulint_cmp(table->id, table_id));
+ table->id == table_id);
if (table == NULL) {
table = dict_load_table_on_id(table_id);
}
@@ -858,4 +924,30 @@ dict_table_get_on_id_low(
return(table);
}
+
+/**********************************************************************//**
+Determine bytes of column prefix to be stored in the undo log. Please
+note if the table format is UNIV_FORMAT_A (< DICT_TF_FORMAT_ZIP), no prefix
+needs to be stored in the undo log.
+@return bytes of column prefix to be stored in the undo log */
+UNIV_INLINE
+ulint
+dict_max_field_len_store_undo(
+/*==========================*/
+ dict_table_t* table, /*!< in: table */
+ const dict_col_t* col) /*!< in: column which index prefix
+ is based on */
+{
+ ulint prefix_len = 0;
+
+ if (dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP)
+ {
+ prefix_len = col->max_prefix
+ ? col->max_prefix
+ : DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+ }
+
+ return(prefix_len);
+}
+
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h
index f41882019d5..215beee1ba6 100644
--- a/storage/xtradb/include/dict0load.h
+++ b/storage/xtradb/include/dict0load.h
@@ -31,6 +31,36 @@ Created 4/24/1996 Heikki Tuuri
#include "dict0types.h"
#include "ut0byte.h"
#include "mem0mem.h"
+#include "btr0types.h"
+
+/** enum that defines all 6 system table IDs */
+enum dict_system_table_id {
+ SYS_TABLES = 0,
+ SYS_INDEXES,
+ SYS_COLUMNS,
+ SYS_FIELDS,
+ SYS_FOREIGN,
+ SYS_FOREIGN_COLS,
+ SYS_STATS,
+
+ /* This must be last item. Defines the number of system tables. */
+ SYS_NUM_SYSTEM_TABLES
+};
+
+typedef enum dict_system_table_id dict_system_id_t;
+
+/** Status bit for dict_process_sys_tables_rec() */
+enum dict_table_info {
+ DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
+ structure with information from
+ a SYS_TABLES record */
+ DICT_TABLE_LOAD_FROM_CACHE = 1, /*!< Check first whether dict_table_t
+ is in the cache, if so, return it */
+ DICT_TABLE_UPDATE_STATS = 2 /*!< whether to update statistics
+ when loading SYS_TABLES information. */
+};
+
+typedef enum dict_table_info dict_table_info_t;
/********************************************************************//**
In a crash recovery we already have all the tablespace objects created.
@@ -54,6 +84,84 @@ char*
dict_get_first_table_name_in_db(
/*============================*/
const char* name); /*!< in: database name which ends to '/' */
+
+/********************************************************************//**
+Loads a table definition from a SYS_TABLES record to dict_table_t.
+Does not load any columns or indexes.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_table_low(
+/*================*/
+ const char* name, /*!< in: table name */
+ const rec_t* rec, /*!< in: SYS_TABLES record */
+ dict_table_t** table); /*!< out,own: table, or NULL */
+/********************************************************************//**
+Loads a table column definition from a SYS_COLUMNS record to
+dict_table_t.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_column_low(
+/*=================*/
+ dict_table_t* table, /*!< in/out: table, could be NULL
+ if we just populate a dict_column_t
+ struct with information from
+ a SYS_COLUMNS record */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ dict_col_t* column, /*!< out: dict_column_t to fill,
+ or NULL if table != NULL */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name, /*!< out: column name */
+ const rec_t* rec); /*!< in: SYS_COLUMNS record */
+/********************************************************************//**
+Loads an index definition from a SYS_INDEXES record to dict_index_t.
+If allocate=TRUE, we will create a dict_index_t structure and fill it
+accordingly. If allocated=FALSE, the dict_index_t will be supplied by
+the caller and filled with information read from the record. @return
+error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_index_low(
+/*================*/
+ byte* table_id, /*!< in/out: table id (8 bytes),
+ an "in" value if allocate=TRUE
+ and "out" when allocate=FALSE */
+ const char* table_name, /*!< in: table name */
+ mem_heap_t* heap, /*!< in/out: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_INDEXES record */
+ ibool allocate, /*!< in: TRUE=allocate *index,
+ FALSE=fill in a pre-allocated
+ *index */
+ dict_index_t** index); /*!< out,own: index, or NULL */
+/********************************************************************//**
+Loads an index field definition from a SYS_FIELDS record to
+dict_index_t.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_load_field_low(
+/*================*/
+ byte* index_id, /*!< in/out: index id (8 bytes)
+ an "in" value if index != NULL
+ and "out" if index == NULL */
+ dict_index_t* index, /*!< in/out: index, could be NULL
+ if we just populate a dict_field_t
+ struct with information from
+ a SYS_FIELDS record */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ byte* last_index_id, /*!< in: last index id */
+ mem_heap_t* heap, /*!< in/out: memory heap
+ for temporary storage */
+ const rec_t* rec, /*!< in: SYS_FIELDS record */
+ char* addition_err_str,/*!< out: additional error message
+ that requires information to be
+ filled, or NULL */
+ ulint err_str_len); /*!< in: length of addition_err_str
+ in bytes */
/********************************************************************//**
Loads a table definition and also all its index definitions, and also
the cluster definition if the table is a member in a cluster. Also loads
@@ -66,8 +174,12 @@ UNIV_INTERN
dict_table_t*
dict_load_table(
/*============*/
- const char* name); /*!< in: table name in the
+ const char* name, /*!< in: table name in the
databasename/tablename format */
+ ibool cached, /*!< in: TRUE=add to cache, FALSE=do not */
+ dict_err_ignore_t ignore_err);
+ /*!< in: error to be ignored when loading
+ table and its indexes' definition */
/***********************************************************************//**
Loads a table object based on the table id.
@return table; NULL if table does not exist */
@@ -75,7 +187,7 @@ UNIV_INTERN
dict_table_t*
dict_load_table_on_id(
/*==================*/
- dulint table_id); /*!< in: table id */
+ table_id_t table_id); /*!< in: table id */
/********************************************************************//**
This function is called when the database is booted.
Loads system table index definitions except for the clustered index which
@@ -109,7 +221,127 @@ void
dict_print(void);
/*============*/
-
+/********************************************************************//**
+This function opens a system table, and return the first record.
+@return first record of the system table */
+UNIV_INTERN
+const rec_t*
+dict_startscan_system(
+/*==================*/
+ btr_pcur_t* pcur, /*!< out: persistent cursor to
+ the record */
+ mtr_t* mtr, /*!< in: the mini-transaction */
+ dict_system_id_t system_id); /*!< in: which system table to open */
+/********************************************************************//**
+This function get the next system table record as we scan the table.
+@return the record if found, NULL if end of scan. */
+UNIV_INTERN
+const rec_t*
+dict_getnext_system(
+/*================*/
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor
+ to the record */
+ mtr_t* mtr); /*!< in: the mini-transaction */
+/********************************************************************//**
+This function processes one SYS_TABLES record and populate the dict_table_t
+struct for the table. Extracted out of dict_print() to be used by
+both monitor table output and information schema innodb_sys_tables output.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_tables_rec(
+/*========================*/
+ mem_heap_t* heap, /*!< in: temporary memory heap */
+ const rec_t* rec, /*!< in: SYS_TABLES record */
+ dict_table_t** table, /*!< out: dict_table_t to fill */
+ dict_table_info_t status); /*!< in: status bit controls
+ options such as whether we shall
+ look for dict_table_t from cache
+ first */
+/********************************************************************//**
+This function parses a SYS_INDEXES record and populate a dict_index_t
+structure with the information from the record. For detail information
+about SYS_INDEXES fields, please refer to dict_boot() function.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_indexes_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_INDEXES rec */
+ dict_index_t* index, /*!< out: dict_index_t to be
+ filled */
+ table_id_t* table_id); /*!< out: table id */
+/********************************************************************//**
+This function parses a SYS_COLUMNS record and populate a dict_column_t
+structure with the information from the record.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_columns_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_COLUMNS rec */
+ dict_col_t* column, /*!< out: dict_col_t to be filled */
+ table_id_t* table_id, /*!< out: table id */
+ const char** col_name); /*!< out: column name */
+/********************************************************************//**
+This function parses a SYS_FIELDS record and populate a dict_field_t
+structure with the information from the record.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_fields_rec(
+/*========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FIELDS rec */
+ dict_field_t* sys_field, /*!< out: dict_field_t to be
+ filled */
+ ulint* pos, /*!< out: Field position */
+ index_id_t* index_id, /*!< out: current index id */
+ index_id_t last_id); /*!< in: previous index id */
+/********************************************************************//**
+This function parses a SYS_FOREIGN record and populate a dict_foreign_t
+structure with the information from the record. For detail information
+about SYS_FOREIGN fields, please refer to dict_load_foreign() function
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_foreign_rec(
+/*=========================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FOREIGN rec */
+ dict_foreign_t* foreign); /*!< out: dict_foreign_t to be
+ filled */
+/********************************************************************//**
+This function parses a SYS_FOREIGN_COLS record and extract necessary
+information from the record and return to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_foreign_col_rec(
+/*=============================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_FOREIGN_COLS rec */
+ const char** name, /*!< out: foreign key constraint name */
+ const char** for_col_name, /*!< out: referencing column name */
+ const char** ref_col_name, /*!< out: referenced column name
+ in referenced table */
+ ulint* pos); /*!< out: column position */
+/********************************************************************//**
+This function parses a SYS_STATS record and extract necessary
+information from the record and return to caller.
+@return error message, or NULL on success */
+UNIV_INTERN
+const char*
+dict_process_sys_stats_rec(
+/*=============================*/
+ mem_heap_t* heap, /*!< in/out: heap memory */
+ const rec_t* rec, /*!< in: current SYS_STATS rec */
+ index_id_t* index_id, /*!< out: INDEX_ID */
+ ulint* key_cols, /*!< out: KEY_COLS */
+ ib_uint64_t* diff_vals, /*!< out: DIFF_VALS */
+ ib_uint64_t* non_null_vals); /*!< out: NON_NULL_VALS */
#ifndef UNIV_NONINL
#include "dict0load.ic"
#endif
diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h
index 6736c2a3a36..71af9ce2f0c 100644
--- a/storage/xtradb/include/dict0mem.h
+++ b/storage/xtradb/include/dict0mem.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -88,6 +88,10 @@ combination of types */
new BLOB treatment */
/** Maximum supported file format */
#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP
+
+/** Minimum supported file format */
+#define DICT_TF_FORMAT_MIN DICT_TF_FORMAT_51
+
/* @} */
#define DICT_TF_BITS 6 /*!< number of flag bits */
#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
@@ -118,7 +122,7 @@ This could result in rescursive calls and out of stack error eventually.
DICT_FK_MAX_RECURSIVE_LOAD defines the maximum number of recursive loads,
when exceeded, the child table will not be loaded. It will be loaded when
the foreign constraint check needs to be run. */
-#define DICT_FK_MAX_RECURSIVE_LOAD 250
+#define DICT_FK_MAX_RECURSIVE_LOAD 255
/** Similarly, when tables are chained together with foreign key constraints
with on cascading delete/update clause, delete from parent table could
@@ -126,7 +130,7 @@ result in recursive cascading calls. This defines the maximum number of
such cascading deletes/updates allowed. When exceeded, the delete from
parent table will fail, and user has to drop excessive foreign constraint
before proceeds. */
-#define FK_MAX_CASCADE_DEL 300
+#define FK_MAX_CASCADE_DEL 255
/**********************************************************************//**
Creates a table memory object.
@@ -162,6 +166,36 @@ dict_mem_table_add_col(
ulint prtype, /*!< in: precise type */
ulint len); /*!< in: precision */
/**********************************************************************//**
+This function populates a dict_col_t memory structure with
+supplied information. */
+UNIV_INTERN
+void
+dict_mem_fill_column_struct(
+/*========================*/
+ dict_col_t* column, /*!< out: column struct to be
+ filled */
+ ulint col_pos, /*!< in: column position */
+ ulint mtype, /*!< in: main data type */
+ ulint prtype, /*!< in: precise type */
+ ulint col_len); /*!< in: column length */
+/**********************************************************************//**
+This function poplulates a dict_index_t index memory structure with
+supplied information. */
+UNIV_INLINE
+void
+dict_mem_fill_index_struct(
+/*=======================*/
+ dict_index_t* index, /*!< out: index to be filled */
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* table_name, /*!< in: table name */
+ const char* index_name, /*!< in: index name */
+ ulint space, /*!< in: space where the index tree is
+ placed, ignored if the index is of
+ the clustered type */
+ ulint type, /*!< in: DICT_UNIQUE,
+ DICT_CLUSTERED, ... ORed */
+ ulint n_fields); /*!< in: number of fields */
+/**********************************************************************//**
Creates an index memory object.
@return own: index object */
UNIV_INTERN
@@ -204,6 +238,30 @@ dict_foreign_t*
dict_mem_foreign_create(void);
/*=========================*/
+/**********************************************************************//**
+Sets the foreign_table_name_lookup pointer based on the value of
+lower_case_table_names. If that is 0 or 1, foreign_table_name_lookup
+will point to foreign_table_name. If 2, then another string is
+allocated from the heap and set to lower case. */
+UNIV_INTERN
+void
+dict_mem_foreign_table_name_lookup_set(
+/*===================================*/
+ dict_foreign_t* foreign, /*!< in/out: foreign struct */
+ ibool do_alloc); /*!< in: is an alloc needed */
+
+/**********************************************************************//**
+Sets the referenced_table_name_lookup pointer based on the value of
+lower_case_table_names. If that is 0 or 1, referenced_table_name_lookup
+will point to referenced_table_name. If 2, then another string is
+allocated from the heap and set to lower case. */
+UNIV_INTERN
+void
+dict_mem_referenced_table_name_lookup_set(
+/*======================================*/
+ dict_foreign_t* foreign, /*!< in/out: foreign struct */
+ ibool do_alloc); /*!< in: is an alloc needed */
+
/** Data structure for a column in a table */
struct dict_col_struct{
/*----------------------*/
@@ -230,10 +288,11 @@ struct dict_col_struct{
the string, MySQL uses 1 or 2
bytes to store the string length) */
- unsigned mbminlen:2; /*!< minimum length of a
- character, in bytes */
- unsigned mbmaxlen:3; /*!< maximum length of a
- character, in bytes */
+ unsigned mbminmaxlen:5; /*!< minimum and maximum length of a
+ character, in bytes;
+ DATA_MBMINMAXLEN(mbminlen,mbmaxlen);
+ mbminlen=DATA_MBMINLEN(mbminmaxlen);
+ mbmaxlen=DATA_MBMINLEN(mbminmaxlen) */
/*----------------------*/
/* End of definitions copied from dtype_t */
/* @} */
@@ -243,38 +302,64 @@ struct dict_col_struct{
unsigned ord_part:1; /*!< nonzero if this column
appears in the ordering fields
of an index */
+ unsigned max_prefix:12; /*!< maximum index prefix length on
+ this column. Our current max limit is
+ 3072 for Barracuda table */
};
-/** @brief DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length).
+/** @brief DICT_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and
+is the maximum indexed column length (or indexed prefix length) in
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. Also, in any format,
+any fixed-length field that is longer than this will be encoded as
+a variable-length field.
It is set to 3*256, so that one can create a column prefix index on
256 characters of a TEXT or VARCHAR column also in the UTF-8
charset. In that charset, a character may take at most 3 bytes. This
constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
-#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN
+#define DICT_ANTELOPE_MAX_INDEX_COL_LEN REC_ANTELOPE_MAX_INDEX_COL_LEN
+
+/** Find out maximum indexed column length by its table format.
+For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
+field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For new
+barracuda format, the length could be REC_VERSION_56_MAX_INDEX_COL_LEN
+(3072) bytes */
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
+ ((dict_table_get_format(table) < DICT_TF_FORMAT_ZIP) \
+ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
+ : REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
+ ((((flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT)\
+ < DICT_TF_FORMAT_ZIP) \
+ ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
+ : REC_VERSION_56_MAX_INDEX_COL_LEN)
+
+/** Defines the maximum fixed length column size */
+#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
/** Data structure for a field in an index */
struct dict_field_struct{
dict_col_t* col; /*!< pointer to the table column */
const char* name; /*!< name of the column */
- unsigned prefix_len:10; /*!< 0 or the length of the column
+ unsigned prefix_len:12; /*!< 0 or the length of the column
prefix in bytes in a MySQL index of
type, e.g., INDEX (textcol(25));
must be smaller than
- DICT_MAX_INDEX_COL_LEN; NOTE that
- in the UTF-8 charset, MySQL sets this
- to 3 * the prefix len in UTF-8 chars */
+ DICT_MAX_FIELD_LEN_BY_FORMAT;
+ NOTE that in the UTF-8 charset, MySQL
+ sets this to (mbmaxlen * the prefix len)
+ in UTF-8 chars */
unsigned fixed_len:10; /*!< 0 or the fixed length of the
column if smaller than
- DICT_MAX_INDEX_COL_LEN */
+ DICT_ANTELOPE_MAX_INDEX_COL_LEN */
};
/** Data structure for an index. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_index_create(). */
struct dict_index_struct{
- dulint id; /*!< id of the index */
+ index_id_t id; /*!< id of the index */
mem_heap_t* heap; /*!< memory heap */
const char* name; /*!< index name */
const char* table_name;/*!< table name */
@@ -306,6 +391,8 @@ struct dict_index_struct{
/*!< TRUE if this index is marked to be
dropped in ha_innobase::prepare_drop_index(),
otherwise FALSE */
+ unsigned corrupted:1;
+ /*!< TRUE if the index object is corrupted */
dict_field_t* fields; /*!< array of field descriptions */
#ifndef UNIV_HOTBACKUP
UT_LIST_NODE_T(dict_index_t)
@@ -321,6 +408,12 @@ struct dict_index_struct{
dict_get_n_unique(index); we
periodically calculate new
estimates */
+ ib_int64_t* stat_n_non_null_key_vals;
+ /* approximate number of non-null key values
+ for this index, for each column where
+ n < dict_get_n_unique(index); This
+ is used when innodb_stats_method is
+ "nulls_ignored". */
ulint stat_index_size;
/*!< approximate index size in
database pages */
@@ -330,10 +423,17 @@ struct dict_index_struct{
/* @} */
rw_lock_t lock; /*!< read-write lock protecting the
upper levels of the index tree */
- ib_uint64_t trx_id; /*!< id of the transaction that created this
+ trx_id_t trx_id; /*!< id of the transaction that created this
index, or 0 if the index existed
when InnoDB was started up */
#endif /* !UNIV_HOTBACKUP */
+#ifdef UNIV_BLOB_DEBUG
+ mutex_t blobs_mutex;
+ /*!< mutex protecting blobs */
+ void* blobs; /*!< map of (page_no,heap_no,field_no)
+ to first_blob_page_no; protected by
+ blobs_mutex; @see btr_blob_dbg_t */
+#endif /* UNIV_BLOB_DEBUG */
#ifdef UNIV_DEBUG
ulint magic_n;/*!< magic number */
/** Value of dict_index_struct::magic_n */
@@ -358,10 +458,14 @@ struct dict_foreign_struct{
unsigned type:6; /*!< 0 or DICT_FOREIGN_ON_DELETE_CASCADE
or DICT_FOREIGN_ON_DELETE_SET_NULL */
char* foreign_table_name;/*!< foreign table name */
+ char* foreign_table_name_lookup;
+ /*!< foreign table name used for dict lookup */
dict_table_t* foreign_table; /*!< table where the foreign key is */
const char** foreign_col_names;/*!< names of the columns in the
foreign key */
char* referenced_table_name;/*!< referenced table name */
+ char* referenced_table_name_lookup;
+ /*!< referenced table name for dict lookup*/
dict_table_t* referenced_table;/*!< table where the referenced key
is */
const char** referenced_col_names;/*!< names of the referenced
@@ -395,7 +499,7 @@ a foreign key constraint is enforced, therefore RESTRICT just means no flag */
/** Data structure for a database table. Most fields will be
initialized to 0, NULL or FALSE in dict_mem_table_create(). */
struct dict_table_struct{
- dulint id; /*!< id of the table */
+ table_id_t id; /*!< id of the table */
mem_heap_t* heap; /*!< memory heap */
char* name; /*!< table name */
const char* dir_path_of_temp_table;/*!< NULL or the directory path
@@ -422,6 +526,8 @@ struct dict_table_struct{
to the dictionary cache */
unsigned n_def:10;/*!< number of columns defined so far */
unsigned n_cols:10;/*!< number of columns */
+ unsigned corrupted:1;
+ /*!< TRUE if table is corrupted */
dict_col_t* cols; /*!< array of column descriptions */
const char* col_names;
/*!< Column names packed in a character string
diff --git a/storage/xtradb/include/dict0mem.ic b/storage/xtradb/include/dict0mem.ic
index c36adb07a18..1d80ffc9b94 100644
--- a/storage/xtradb/include/dict0mem.ic
+++ b/storage/xtradb/include/dict0mem.ic
@@ -23,4 +23,50 @@ Data dictionary memory object creation
Created 1/8/1996 Heikki Tuuri
***********************************************************************/
+#include "data0type.h"
+#include "dict0mem.h"
+#include "fil0fil.h"
+/**********************************************************************//**
+This function poplulates a dict_index_t index memory structure with
+supplied information. */
+UNIV_INLINE
+void
+dict_mem_fill_index_struct(
+/*=======================*/
+ dict_index_t* index, /*!< out: index to be filled */
+ mem_heap_t* heap, /*!< in: memory heap */
+ const char* table_name, /*!< in: table name */
+ const char* index_name, /*!< in: index name */
+ ulint space, /*!< in: space where the index tree is
+ placed, ignored if the index is of
+ the clustered type */
+ ulint type, /*!< in: DICT_UNIQUE,
+ DICT_CLUSTERED, ... ORed */
+ ulint n_fields) /*!< in: number of fields */
+{
+
+ if (heap) {
+ index->heap = heap;
+ index->name = mem_heap_strdup(heap, index_name);
+ index->fields = (dict_field_t*) mem_heap_alloc(
+ heap, 1 + n_fields * sizeof(dict_field_t));
+ } else {
+ index->name = index_name;
+ index->heap = NULL;
+ index->fields = NULL;
+ }
+
+ index->type = type;
+#ifndef UNIV_HOTBACKUP
+ index->space = (unsigned int) space;
+ index->page = FIL_NULL;
+#endif /* !UNIV_HOTBACKUP */
+ index->table_name = table_name;
+ index->n_fields = (unsigned int) n_fields;
+ /* The '1 +' above prevents allocation
+ of an empty mem block */
+#ifdef UNIV_DEBUG
+ index->magic_n = DICT_INDEX_MAGIC_N;
+#endif /* UNIV_DEBUG */
+}
diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h
index 7ad69193cc9..8cbd7cd5783 100644
--- a/storage/xtradb/include/dict0types.h
+++ b/storage/xtradb/include/dict0types.h
@@ -33,11 +33,6 @@ typedef struct dict_index_struct dict_index_t;
typedef struct dict_table_struct dict_table_t;
typedef struct dict_foreign_struct dict_foreign_t;
-/* A cluster object is a table object with the type field set to
-DICT_CLUSTERED */
-
-typedef dict_table_t dict_cluster_t;
-
typedef struct ind_node_struct ind_node_t;
typedef struct tab_node_struct tab_node_t;
@@ -45,4 +40,21 @@ typedef struct tab_node_struct tab_node_t;
#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */
#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO
+typedef ib_id_t table_id_t;
+typedef ib_id_t index_id_t;
+
+/** Error to ignore when we load table dictionary into memory. However,
+the table and index will be marked as "corrupted", and caller will
+be responsible to deal with corrupted table or index.
+Note: please define the IGNORE_ERR_* as bits, so their value can
+be or-ed together */
+enum dict_err_ignore {
+ DICT_ERR_IGNORE_NONE = 0, /*!< no error to ignore */
+ DICT_ERR_IGNORE_INDEX_ROOT = 1, /*!< ignore error if index root
+ page is FIL_NUL or incorrect value */
+ DICT_ERR_IGNORE_ALL = 0xFFFF /*!< ignore all errors */
+};
+
+typedef enum dict_err_ignore dict_err_ignore_t;
+
#endif
diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h
index 07c80ef8609..840a9fbb13a 100644
--- a/storage/xtradb/include/fil0fil.h
+++ b/storage/xtradb/include/fil0fil.h
@@ -27,12 +27,13 @@ Created 10/25/1995 Heikki Tuuri
#define fil0fil_h
#include "univ.i"
-#ifndef UNIV_HOTBACKUP
-#include "sync0rw.h"
-#endif /* !UNIV_HOTBACKUP */
#include "dict0types.h"
#include "ut0byte.h"
#include "os0file.h"
+#ifndef UNIV_HOTBACKUP
+#include "sync0rw.h"
+#include "ibuf0types.h"
+#endif /* !UNIV_HOTBACKUP */
/** When mysqld is run, the default directory "." is the mysqld datadir,
but in the MySQL Embedded Server Library and ibbackup it is not the default
@@ -648,18 +649,10 @@ _fil_io(
Confirm whether the parameters are valid or not */
UNIV_INTERN
ibool
-fil_area_is_exist(
+fil_is_exist(
/*==============*/
ulint space_id, /*!< in: space id */
- ulint zip_size, /*!< in: compressed page size in bytes;
- 0 for uncompressed pages */
- ulint block_offset, /*!< in: offset in number of blocks */
- ulint byte_offset, /*!< in: remainder of offset in bytes; in
- aio this must be divisible by the OS block
- size */
- ulint len); /*!< in: how many bytes to read or write; this
- must not cross a file boundary; in aio this
- must be a block size multiple */
+ ulint block_offset); /*!< in: offset in number of blocks */
/**********************************************************************//**
Waits for an aio operation to complete. This function is used to write the
handler for completed requests. The aio array of pending requests is divided
@@ -678,8 +671,9 @@ UNIV_INTERN
void
fil_flush(
/*======*/
- ulint space_id); /*!< in: file space id (this can be a group of
+ ulint space_id, /*!< in: file space id (this can be a group of
log files or a tablespace of the database) */
+ ibool metadata);
/**********************************************************************//**
Flushes to disk writes in file spaces of the given type possibly cached by
the OS. */
@@ -737,6 +731,15 @@ fil_page_get_type(
/*==============*/
const byte* page); /*!< in: file page */
+/*******************************************************************//**
+Returns TRUE if a single-table tablespace is being deleted.
+@return TRUE if being deleted */
+UNIV_INTERN
+ibool
+fil_tablespace_is_being_deleted(
+/*============================*/
+ ulint id); /*!< in: space id */
+
/*************************************************************************
Return local hash table informations. */
diff --git a/storage/xtradb/include/fsp0types.h b/storage/xtradb/include/fsp0types.h
index 2dd2deca671..43e385b7eb0 100644
--- a/storage/xtradb/include/fsp0types.h
+++ b/storage/xtradb/include/fsp0types.h
@@ -42,7 +42,7 @@ fseg_alloc_free_page) */
/* @} */
/** File space extent size (one megabyte) in pages */
-#define FSP_EXTENT_SIZE (1u << (20 - UNIV_PAGE_SIZE_SHIFT))
+#define FSP_EXTENT_SIZE ((ulint)1 << (20 - UNIV_PAGE_SIZE_SHIFT))
/** On a page of any file segment, data may be put starting from this
offset */
diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h
index 445d94eeabb..02e7712df58 100644
--- a/storage/xtradb/include/ha_prototypes.h
+++ b/storage/xtradb/include/ha_prototypes.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -174,6 +174,15 @@ innobase_strcasecmp(
const char* b); /*!< in: second string to compare */
/******************************************************************//**
+Strip dir name from a full path name and return only its file name.
+@return file name or "null" if no file name */
+UNIV_INTERN
+const char*
+innobase_basename(
+/*==============*/
+ const char* path_name); /*!< in: full path name */
+
+/******************************************************************//**
Returns true if the thread is executing a SELECT statement.
@return true if thd is executing SELECT */
@@ -267,13 +276,31 @@ thd_lock_wait_timeout(
/*==================*/
void* thd); /*!< in: thread handle (THD*), or NULL to query
the global innodb_lock_wait_timeout */
-
+/******************************************************************//**
+Add up the time waited for the lock for the current query. */
+UNIV_INTERN
+void
+thd_set_lock_wait_time(
+/*===================*/
+ void* thd, /*!< in: thread handle (THD*) */
+ ulint value); /*!< in: time waited for the lock */
/******************************************************************//**
*/
ulong
-thd_flush_log_at_trx_commit_session(
+thd_flush_log_at_trx_commit(
/*================================*/
void* thd);
+/**********************************************************************//**
+Get the current setting of the lower_case_table_names global parameter from
+mysqld.cc. We do a dirty read because for one there is no synchronization
+object and secondly there is little harm in doing so even if we get a torn
+read.
+@return value of lower_case_table_names */
+UNIV_INTERN
+ulint
+innobase_get_lower_case_table_names(void);
+/*=====================================*/
+
#endif
diff --git a/storage/xtradb/include/handler0alter.h b/storage/xtradb/include/handler0alter.h
index 7f5af6d2e76..017fe88d533 100644
--- a/storage/xtradb/include/handler0alter.h
+++ b/storage/xtradb/include/handler0alter.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h
index 492c767acc4..b17c21a45ef 100644
--- a/storage/xtradb/include/hash0hash.h
+++ b/storage/xtradb/include/hash0hash.h
@@ -49,28 +49,6 @@ hash_table_t*
hash_create(
/*========*/
ulint n); /*!< in: number of array cells */
-
-/*************************************************************//**
-*/
-UNIV_INTERN
-ulint
-hash_create_needed(
-/*===============*/
- ulint n);
-
-UNIV_INTERN
-void
-hash_create_init(
-/*=============*/
- hash_table_t* table,
- ulint n);
-
-UNIV_INTERN
-void
-hash_create_reuse(
-/*==============*/
- hash_table_t* table);
-
#ifndef UNIV_HOTBACKUP
/*************************************************************//**
Creates a mutex array to protect a hash table. */
@@ -350,33 +328,6 @@ do {\
}\
} while (0)
-/********************************************************************//**
-Align nodes with moving location.*/
-#define HASH_OFFSET(TABLE, NODE_TYPE, PTR_NAME, FADDR, FOFFSET, BOFFSET) \
-do {\
- ulint i2222;\
- ulint cell_count2222;\
-\
- cell_count2222 = hash_get_n_cells(TABLE);\
-\
- for (i2222 = 0; i2222 < cell_count2222; i2222++) {\
- NODE_TYPE* node2222;\
-\
- if ((TABLE)->array[i2222].node) \
- (TABLE)->array[i2222].node = (void*)((byte*)(TABLE)->array[i2222].node \
- + (((TABLE)->array[i2222].node > (void*)FADDR)?FOFFSET:BOFFSET));\
- node2222 = HASH_GET_FIRST((TABLE), i2222);\
-\
- while (node2222) {\
- if (node2222->PTR_NAME) \
- node2222->PTR_NAME = (void*)((byte*)(node2222->PTR_NAME) \
- + ((((void*)node2222->PTR_NAME) > (void*)FADDR)?FOFFSET:BOFFSET));\
-\
- node2222 = node2222->PTR_NAME;\
- }\
- }\
-} while (0)
-
/************************************************************//**
Gets the mutex index for a fold value in a hash table.
@return mutex number */
diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h
index 8aa21fb9d95..0fb0aae9786 100644
--- a/storage/xtradb/include/ibuf0ibuf.h
+++ b/storage/xtradb/include/ibuf0ibuf.h
@@ -35,12 +35,27 @@ Created 7/19/1997 Heikki Tuuri
#ifndef UNIV_HOTBACKUP
# include "ibuf0types.h"
+/* Possible operations buffered in the insert/whatever buffer. See
+ibuf_insert(). DO NOT CHANGE THE VALUES OF THESE, THEY ARE STORED ON DISK. */
+typedef enum {
+ IBUF_OP_INSERT = 0,
+ IBUF_OP_DELETE_MARK = 1,
+ IBUF_OP_DELETE = 2,
+
+ /* Number of different operation types. */
+ IBUF_OP_COUNT = 3
+} ibuf_op_t;
+
/** Combinations of operations that can be buffered. Because the enum
values are used for indexing innobase_change_buffering_values[], they
should start at 0 and there should not be any gaps. */
typedef enum {
IBUF_USE_NONE = 0,
IBUF_USE_INSERT, /* insert */
+ IBUF_USE_DELETE_MARK, /* delete */
+ IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
+ IBUF_USE_DELETE, /* delete+purge */
+ IBUF_USE_ALL, /* insert+delete+purge */
IBUF_USE_COUNT /* number of entries in ibuf_use_t */
} ibuf_use_t;
@@ -48,6 +63,11 @@ typedef enum {
/** Operations that can currently be buffered. */
extern ibuf_use_t ibuf_use;
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/** Flag to control insert buffer debugging. */
+extern uint ibuf_debug;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
/** The insert buffer control structure */
extern ibuf_t* ibuf;
@@ -72,8 +92,7 @@ separately committed mini-transaction, because in crash recovery, the
free bits could momentarily be set too high. */
/******************************************************************//**
-Creates the insert buffer data structure at a database startup and
-initializes the data structures for the insert buffer of each tablespace. */
+Creates the insert buffer data structure at a database startup. */
UNIV_INTERN
void
ibuf_init_at_db_start(void);
@@ -85,6 +104,22 @@ UNIV_INTERN
void
ibuf_update_max_tablespace_id(void);
/*===============================*/
+/***************************************************************//**
+Starts an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_start(
+/*===========*/
+ mtr_t* mtr) /*!< out: mini-transaction */
+ __attribute__((nonnull));
+/***************************************************************//**
+Commits an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_commit(
+/*============*/
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/*********************************************************************//**
Initializes an ibuf bitmap page. */
UNIV_INTERN
@@ -205,10 +240,12 @@ routine.
For instance, a read-ahead of non-ibuf pages is forbidden by threads
that are executing an insert buffer routine.
@return TRUE if inside an insert buffer routine */
-UNIV_INTERN
+UNIV_INLINE
ibool
-ibuf_inside(void);
-/*=============*/
+ibuf_inside(
+/*========*/
+ const mtr_t* mtr) /*!< in: mini-transaction */
+ __attribute__((nonnull, pure));
/***********************************************************************//**
Checks if a page address is an ibuf bitmap page (level 3 page) address.
@return TRUE if a bitmap page */
@@ -225,15 +262,44 @@ Must not be called when recv_no_ibuf_operations==TRUE.
@return TRUE if level 2 or level 3 page */
UNIV_INTERN
ibool
-ibuf_page(
-/*======*/
- ulint space, /*!< in: space id */
- ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
- ulint page_no,/*!< in: page number */
- mtr_t* mtr); /*!< in: mtr which will contain an x-latch to the
- bitmap page if the page is not one of the fixed
- address ibuf pages, or NULL, in which case a new
- transaction is created. */
+ibuf_page_low(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number */
+#ifdef UNIV_DEBUG
+ ibool x_latch,/*!< in: FALSE if relaxed check
+ (avoid latching the bitmap page) */
+#endif /* UNIV_DEBUG */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mtr which will contain an
+ x-latch to the bitmap page if the page
+ is not one of the fixed address ibuf
+ pages, or NULL, in which case a new
+ transaction is created. */
+ __attribute__((warn_unused_result));
+#ifdef UNIV_DEBUG
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
+pages. Must not be called when recv_no_ibuf_operations==TRUE.
+@param space tablespace identifier
+@param zip_size compressed page size in bytes, or 0
+@param page_no page number
+@param mtr mini-transaction or NULL
+@return TRUE if level 2 or level 3 page */
+# define ibuf_page(space, zip_size, page_no, mtr) \
+ ibuf_page_low(space, zip_size, page_no, TRUE, __FILE__, __LINE__, mtr)
+#else /* UVIV_DEBUG */
+/** Checks if a page is a level 2 or 3 page in the ibuf hierarchy of
+pages. Must not be called when recv_no_ibuf_operations==TRUE.
+@param space tablespace identifier
+@param zip_size compressed page size in bytes, or 0
+@param page_no page number
+@param mtr mini-transaction or NULL
+@return TRUE if level 2 or level 3 page */
+# define ibuf_page(space, zip_size, page_no, mtr) \
+ ibuf_page_low(space, zip_size, page_no, __FILE__, __LINE__, mtr)
+#endif /* UVIV_DEBUG */
/***********************************************************************//**
Frees excess pages from the ibuf free list. This function is called when an OS
thread calls fsp services to allocate a new file segment, or a new page to a
@@ -243,14 +309,15 @@ void
ibuf_free_excess_pages(void);
/*========================*/
/*********************************************************************//**
-Makes an index insert to the insert buffer, instead of directly to the disk
-page, if this is possible. Does not do insert if the index is clustered
-or unique.
+Buffer an operation in the insert/delete buffer, instead of doing it
+directly to the disk page, if this is possible. Does not do it if the index
+is clustered or unique.
@return TRUE if success */
UNIV_INTERN
ibool
ibuf_insert(
/*========*/
+ ibuf_op_t op, /*!< in: operation type */
const dtuple_t* entry, /*!< in: index entry to insert */
dict_index_t* index, /*!< in: index where to insert */
ulint space, /*!< in: space id where to insert */
@@ -259,11 +326,11 @@ ibuf_insert(
que_thr_t* thr); /*!< in: query thread */
/*********************************************************************//**
When an index page is read from a disk to the buffer pool, this function
-inserts to the page the possible index entries buffered in the insert buffer.
-The entries are deleted from the insert buffer. If the page is not read, but
-created in the buffer pool, this function deletes its buffered entries from
-the insert buffer; there can exist entries for such a page if the page
-belonged to an index which subsequently was dropped. */
+applies any buffered operations to the page and deletes the entries from the
+insert buffer. If the page is not read, but created in the buffer pool, this
+function deletes its buffered entries from the insert buffer; there can
+exist entries for such a page if the page belonged to an index which
+subsequently was dropped. */
UNIV_INTERN
void
ibuf_merge_or_delete_for_page(
@@ -356,12 +423,37 @@ void
ibuf_print(
/*=======*/
FILE* file); /*!< in: file where to print */
+/********************************************************************
+Read the first two bytes from a record's fourth field (counter field in new
+records; something else in older records).
+@return "counter" field, or ULINT_UNDEFINED if for some reason it can't be read */
+UNIV_INTERN
+ulint
+ibuf_rec_get_counter(
+/*=================*/
+ const rec_t* rec); /*!< in: ibuf record */
/******************************************************************//**
Closes insert buffer and frees the data structures. */
UNIV_INTERN
void
ibuf_close(void);
/*============*/
+/******************************************************************//**
+Function to pass ibuf status variables */
+UNIV_INTERN
+void
+ibuf_export_ibuf_status(
+/*====================*/
+ ulint* size,
+ ulint* free_list,
+ ulint* segment_size,
+ ulint* merges,
+ ulint* merged_inserts,
+ ulint* merged_delete_marks,
+ ulint* merged_deletes,
+ ulint* discarded_inserts,
+ ulint* discarded_delete_marks,
+ ulint* discarded_deletes);
#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO
#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO
diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic
index 15bbe61ab30..0a22667a260 100644
--- a/storage/xtradb/include/ibuf0ibuf.ic
+++ b/storage/xtradb/include/ibuf0ibuf.ic
@@ -37,6 +37,30 @@ buffer inserts to this page. If there is this much of free space, the
corresponding bits are set in the ibuf bitmap. */
#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32
+/***************************************************************//**
+Starts an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_start(
+/*===========*/
+ mtr_t* mtr) /*!< out: mini-transaction */
+{
+ mtr_start(mtr);
+ mtr->inside_ibuf = TRUE;
+}
+/***************************************************************//**
+Commits an insert buffer mini-transaction. */
+UNIV_INLINE
+void
+ibuf_mtr_commit(
+/*============*/
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ut_ad(mtr->inside_ibuf);
+ ut_d(mtr->inside_ibuf = FALSE);
+ mtr_commit(mtr);
+}
+
/** Insert buffer struct */
struct ibuf_struct{
ulint size; /*!< current size of the ibuf index
@@ -46,19 +70,25 @@ struct ibuf_struct{
ulint seg_size; /*!< allocated pages of the file
segment containing ibuf header and
tree */
- ibool empty; /*!< after an insert to the ibuf tree
- is performed, this is set to FALSE,
- and if a contract operation finds
- the tree empty, this is set to
- TRUE */
+ ibool empty; /*!< Protected by the page
+ latch of the root page of the
+ insert buffer tree
+ (FSP_IBUF_TREE_ROOT_PAGE_NO). TRUE
+ if and only if the insert
+ buffer tree is empty. */
ulint free_list_len; /*!< length of the free list */
ulint height; /*!< tree height */
dict_index_t* index; /*!< insert buffer index */
- ulint n_inserts; /*!< number of inserts made to
- the insert buffer */
ulint n_merges; /*!< number of pages merged */
- ulint n_merged_recs; /*!< number of records merged */
+ ulint n_merged_ops[IBUF_OP_COUNT];
+ /*!< number of operations of each type
+ merged to index pages */
+ ulint n_discarded_ops[IBUF_OP_COUNT];
+ /*!< number of operations of each type
+ discarded without merging due to the
+ tablespace being deleted or the
+ index being dropped */
};
/************************************************************************//**
@@ -105,7 +135,7 @@ ibuf_should_try(
if (ibuf_flush_count % 4 == 0) {
- buf_LRU_try_free_flushed_blocks();
+ buf_LRU_try_free_flushed_blocks(NULL);
}
return(TRUE);
@@ -114,6 +144,22 @@ ibuf_should_try(
return(FALSE);
}
+/******************************************************************//**
+Returns TRUE if the current OS thread is performing an insert buffer
+routine.
+
+For instance, a read-ahead of non-ibuf pages is forbidden by threads
+that are executing an insert buffer routine.
+@return TRUE if inside an insert buffer routine */
+UNIV_INLINE
+ibool
+ibuf_inside(
+/*========*/
+ const mtr_t* mtr) /*!< in: mini-transaction */
+{
+ return(mtr->inside_ibuf);
+}
+
/***********************************************************************//**
Checks if a page address is an ibuf bitmap page address.
@return TRUE if a bitmap page */
diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h
index 73f885ecf04..ea636f985b4 100644
--- a/storage/xtradb/include/lock0lock.h
+++ b/storage/xtradb/include/lock0lock.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -43,7 +43,7 @@ extern ibool lock_print_waits;
#endif /* UNIV_DEBUG */
/* Buffer for storing information about the most recent deadlock error */
extern FILE* lock_latest_err_file;
-extern ulint srv_n_lock_deadlock_count;
+extern ulint srv_n_lock_deadlock_count;
/*********************************************************************//**
Gets the size of a lock struct.
@@ -73,9 +73,10 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
@@ -640,7 +641,7 @@ UNIV_INTERN
ulint
lock_number_of_rows_locked(
/*=======================*/
- trx_t* trx); /*!< in: transaction */
+ const trx_t* trx); /*!< in: transaction */
/*******************************************************************//**
Check if a transaction holds any autoinc locks.
@return TRUE if the transaction holds any AUTOINC locks. */
@@ -671,7 +672,7 @@ lock_get_type(
Gets the id of the transaction owning a lock.
@return transaction id */
UNIV_INTERN
-ullint
+trx_id_t
lock_get_trx_id(
/*============*/
const lock_t* lock); /*!< in: lock */
@@ -700,7 +701,7 @@ lock_get_type_str(
Gets the id of the table on which the lock is.
@return id of the table */
UNIV_INTERN
-ullint
+table_id_t
lock_get_table_id(
/*==============*/
const lock_t* lock); /*!< in: lock */
@@ -816,6 +817,7 @@ struct lock_op_struct{
/** The lock system struct */
struct lock_sys_struct{
hash_table_t* rec_hash; /*!< hash table of the record locks */
+ ulint rec_num;
};
/** The lock system */
diff --git a/storage/xtradb/include/lock0lock.ic b/storage/xtradb/include/lock0lock.ic
index 014722f51c4..1d740a5fa43 100644
--- a/storage/xtradb/include/lock0lock.ic
+++ b/storage/xtradb/include/lock0lock.ic
@@ -75,9 +75,9 @@ UNIV_INLINE
trx_t*
lock_clust_rec_some_has_impl(
/*=========================*/
- const rec_t* rec, /*!< in: user record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: user record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
trx_id_t trx_id;
diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h
index 8fce4ef96bc..e0bffe6f725 100644
--- a/storage/xtradb/include/log0log.h
+++ b/storage/xtradb/include/log0log.h
@@ -672,6 +672,9 @@ extern log_t* log_sys;
when mysqld is first time started
on the restored database, it can
print helpful info for the user */
+#define LOG_FILE_OS_FILE_LOG_BLOCK_SIZE 64
+ /* extend to record log_block_size
+ of XtraDB. 0 means default 512 */
#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE
/* this 4-byte field is TRUE when
the writing of an archived log file
@@ -763,6 +766,15 @@ struct log_struct{
#ifndef UNIV_HOTBACKUP
mutex_t mutex; /*!< mutex protecting the log */
#endif /* !UNIV_HOTBACKUP */
+
+ mutex_t log_flush_order_mutex;/*!< mutex to serialize access to
+ the flush list when we are putting
+ dirty blocks in the list. The idea
+ behind this mutex is to be able
+ to release log_sys->mutex during
+ mtr_commit and still ensure that
+ insertions in the flush_list happen
+ in the LSN order. */
byte* buf_ptr; /* unaligned log buffer */
byte* buf; /*!< log buffer */
ulint buf_size; /*!< log buffer size in bytes */
@@ -952,6 +964,19 @@ struct log_struct{
#endif /* UNIV_LOG_ARCHIVE */
};
+/** Test if flush order mutex is owned. */
+#define log_flush_order_mutex_own() \
+ mutex_own(&log_sys->log_flush_order_mutex)
+
+/** Acquire the flush order mutex. */
+#define log_flush_order_mutex_enter() do { \
+ mutex_enter(&log_sys->log_flush_order_mutex); \
+} while (0)
+/** Release the flush order mutex. */
+# define log_flush_order_mutex_exit() do { \
+ mutex_exit(&log_sys->log_flush_order_mutex); \
+} while (0)
+
#ifdef UNIV_LOG_ARCHIVE
/** Archiving state @{ */
#define LOG_ARCH_ON 71
diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic
index 1ce00fd7313..67db6695cab 100644
--- a/storage/xtradb/include/log0log.ic
+++ b/storage/xtradb/include/log0log.ic
@@ -435,7 +435,7 @@ log_free_check(void)
{
#ifdef UNIV_SYNC_DEBUG
- ut_ad(sync_thread_levels_empty_gen(TRUE));
+ ut_ad(sync_thread_levels_empty_except_dict());
#endif /* UNIV_SYNC_DEBUG */
if (log_sys->check_flush_or_checkpoint) {
diff --git a/storage/xtradb/include/mach0data.h b/storage/xtradb/include/mach0data.h
index 44ee3df22ce..8434bc73586 100644
--- a/storage/xtradb/include/mach0data.h
+++ b/storage/xtradb/include/mach0data.h
@@ -166,14 +166,14 @@ UNIV_INLINE
void
mach_write_to_6(
/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 6 bytes where to store */
+ ib_uint64_t id); /*!< in: 48-bit integer */
/********************************************************//**
The following function is used to fetch data from 6 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
+@return 48-bit integer */
UNIV_INLINE
-dulint
+ib_uint64_t
mach_read_from_6(
/*=============*/
const byte* b) /*!< in: pointer to 6 bytes */
@@ -185,14 +185,14 @@ UNIV_INLINE
void
mach_write_to_7(
/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 7 bytes where to store */
+ ib_uint64_t n); /*!< in: 56-bit integer */
/********************************************************//**
The following function is used to fetch data from 7 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
+@return 56-bit integer */
UNIV_INLINE
-dulint
+ib_uint64_t
mach_read_from_7(
/*=============*/
const byte* b) /*!< in: pointer to 7 bytes */
@@ -204,88 +204,69 @@ UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- dulint n); /*!< in: dulint integer to be stored */
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_ull(
-/*===========*/
byte* b, /*!< in: pointer to 8 bytes where to store */
ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/********************************************************//**
The following function is used to fetch data from 8 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
- __attribute__((nonnull, pure));
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
@return 64-bit integer */
UNIV_INLINE
ib_uint64_t
-mach_read_ull(
-/*==========*/
+mach_read_from_8(
+/*=============*/
const byte* b) /*!< in: pointer to 8 bytes */
__attribute__((nonnull, pure));
/*********************************************************//**
-Writes a dulint in a compressed form (5..9 bytes).
+Writes a 64-bit integer in a compressed form (5..9 bytes).
@return size in bytes */
UNIV_INLINE
ulint
-mach_dulint_write_compressed(
-/*=========================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n); /*!< in: dulint integer to be stored */
+mach_ull_write_compressed(
+/*======================*/
+ byte* b, /*!< in: pointer to memory where to store */
+ ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
+Returns the size of a 64-bit integer when written in the compressed form.
@return compressed size in bytes */
UNIV_INLINE
ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- dulint n); /*!< in: dulint integer to be stored */
+mach_ull_get_compressed_size(
+/*=========================*/
+ ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
+Reads a 64-bit integer in a compressed form.
+@return the value read */
UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
+ib_uint64_t
+mach_ull_read_compressed(
+/*=====================*/
const byte* b) /*!< in: pointer to memory from where to read */
__attribute__((nonnull, pure));
/*********************************************************//**
-Writes a dulint in a compressed form (1..11 bytes).
+Writes a 64-bit integer in a compressed form (1..11 bytes).
@return size in bytes */
UNIV_INLINE
ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n); /*!< in: dulint integer to be stored */
+mach_ull_write_much_compressed(
+/*===========================*/
+ byte* b, /*!< in: pointer to memory where to store */
+ ib_uint64_t n); /*!< in: 64-bit integer to be stored */
/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
+Returns the size of a 64-bit integer when written in the compressed form.
@return compressed size in bytes */
UNIV_INLINE
ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- dulint n) /*!< in: dulint integer to be stored */
+mach_ull_get_much_compressed_size(
+/*==============================*/
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
__attribute__((const));
/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
+Reads a 64-bit integer in a compressed form.
+@return the value read */
UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
+ib_uint64_t
+mach_ull_read_much_compressed(
+/*==========================*/
const byte* b) /*!< in: pointer to memory from where to read */
__attribute__((nonnull, pure));
/*********************************************************//**
@@ -299,15 +280,16 @@ mach_parse_compressed(
byte* end_ptr,/*!< in: pointer to end of the buffer */
ulint* val); /*!< out: read value */
/*********************************************************//**
-Reads a dulint in a compressed form if the log record fully contains it.
-@return pointer to end of the stored field, NULL if not complete */
-UNIV_INTERN
+Reads a 64-bit integer in a compressed form
+if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INLINE
byte*
-mach_dulint_parse_compressed(
-/*=========================*/
- byte* ptr, /*!< in: pointer to buffer from where to read */
- byte* end_ptr,/*!< in: pointer to end of the buffer */
- dulint* val); /*!< out: read value */
+mach_ull_parse_compressed(
+/*======================*/
+ byte* ptr, /*!< in: pointer to buffer from where to read */
+ byte* end_ptr,/*!< in: pointer to end of the buffer */
+ ib_uint64_t* val); /*!< out: read value */
#ifndef UNIV_HOTBACKUP
/*********************************************************//**
Reads a double. It is stored in a little-endian format.
diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic
index 96d2417ac81..b1e5991d39e 100644
--- a/storage/xtradb/include/mach0data.ic
+++ b/storage/xtradb/include/mach0data.ic
@@ -280,22 +280,6 @@ UNIV_INLINE
void
mach_write_to_8(
/*============*/
- byte* b, /*!< in: pointer to 8 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
-{
- ut_ad(b);
-
- mach_write_to_4(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 4, ut_dulint_get_low(n));
-}
-
-/*******************************************************//**
-The following function is used to store data in 8 consecutive
-bytes. We store the most significant byte to the lowest address. */
-UNIV_INLINE
-void
-mach_write_ull(
-/*===========*/
byte* b, /*!< in: pointer to 8 bytes where to store */
ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
@@ -308,32 +292,11 @@ mach_write_ull(
/********************************************************//**
The following function is used to fetch data from 8 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
-UNIV_INLINE
-dulint
-mach_read_from_8(
-/*=============*/
- const byte* b) /*!< in: pointer to 8 bytes */
-{
- ulint high;
- ulint low;
-
- ut_ad(b);
-
- high = mach_read_from_4(b);
- low = mach_read_from_4(b + 4);
-
- return(ut_dulint_create(high, low));
-}
-
-/********************************************************//**
-The following function is used to fetch data from 8 consecutive
-bytes. The most significant byte is at the lowest address.
@return 64-bit integer */
UNIV_INLINE
ib_uint64_t
-mach_read_ull(
-/*==========*/
+mach_read_from_8(
+/*=============*/
const byte* b) /*!< in: pointer to 8 bytes */
{
ib_uint64_t ull;
@@ -351,34 +314,28 @@ UNIV_INLINE
void
mach_write_to_7(
/*============*/
- byte* b, /*!< in: pointer to 7 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 7 bytes where to store */
+ ib_uint64_t n) /*!< in: 56-bit integer */
{
ut_ad(b);
- mach_write_to_3(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 3, ut_dulint_get_low(n));
+ mach_write_to_3(b, (ulint) (n >> 32));
+ mach_write_to_4(b + 3, (ulint) n);
}
/********************************************************//**
The following function is used to fetch data from 7 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
+@return 56-bit integer */
UNIV_INLINE
-dulint
+ib_uint64_t
mach_read_from_7(
/*=============*/
const byte* b) /*!< in: pointer to 7 bytes */
{
- ulint high;
- ulint low;
-
ut_ad(b);
- high = mach_read_from_3(b);
- low = mach_read_from_4(b + 3);
-
- return(ut_dulint_create(high, low));
+ return(ut_ull_create(mach_read_from_3(b), mach_read_from_4(b + 3)));
}
/*******************************************************//**
@@ -388,162 +345,196 @@ UNIV_INLINE
void
mach_write_to_6(
/*============*/
- byte* b, /*!< in: pointer to 6 bytes where to store */
- dulint n) /*!< in: dulint integer to be stored */
+ byte* b, /*!< in: pointer to 6 bytes where to store */
+ ib_uint64_t n) /*!< in: 48-bit integer */
{
ut_ad(b);
- mach_write_to_2(b, ut_dulint_get_high(n));
- mach_write_to_4(b + 2, ut_dulint_get_low(n));
+ mach_write_to_2(b, (ulint) (n >> 32));
+ mach_write_to_4(b + 2, (ulint) n);
}
/********************************************************//**
The following function is used to fetch data from 6 consecutive
bytes. The most significant byte is at the lowest address.
-@return dulint integer */
+@return 48-bit integer */
UNIV_INLINE
-dulint
+ib_uint64_t
mach_read_from_6(
/*=============*/
const byte* b) /*!< in: pointer to 6 bytes */
{
- ulint high;
- ulint low;
-
ut_ad(b);
- high = mach_read_from_2(b);
- low = mach_read_from_4(b + 2);
-
- return(ut_dulint_create(high, low));
+ return(ut_ull_create(mach_read_from_2(b), mach_read_from_4(b + 2)));
}
/*********************************************************//**
-Writes a dulint in a compressed form (5..9 bytes).
+Writes a 64-bit integer in a compressed form (5..9 bytes).
@return size in bytes */
UNIV_INLINE
ulint
-mach_dulint_write_compressed(
-/*=========================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n) /*!< in: dulint integer to be stored */
+mach_ull_write_compressed(
+/*======================*/
+ byte* b, /*!< in: pointer to memory where to store */
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
ulint size;
ut_ad(b);
- size = mach_write_compressed(b, ut_dulint_get_high(n));
- mach_write_to_4(b + size, ut_dulint_get_low(n));
+ size = mach_write_compressed(b, (ulint) (n >> 32));
+ mach_write_to_4(b + size, (ulint) n);
return(size + 4);
}
/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
+Returns the size of a 64-bit integer when written in the compressed form.
@return compressed size in bytes */
UNIV_INLINE
ulint
-mach_dulint_get_compressed_size(
-/*============================*/
- dulint n) /*!< in: dulint integer to be stored */
+mach_ull_get_compressed_size(
+/*=========================*/
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
- return(4 + mach_get_compressed_size(ut_dulint_get_high(n)));
+ return(4 + mach_get_compressed_size((ulint) (n >> 32)));
}
/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
+Reads a 64-bit integer in a compressed form.
+@return the value read */
UNIV_INLINE
-dulint
-mach_dulint_read_compressed(
-/*========================*/
+ib_uint64_t
+mach_ull_read_compressed(
+/*=====================*/
const byte* b) /*!< in: pointer to memory from where to read */
{
- ulint high;
- ulint low;
- ulint size;
+ ib_uint64_t n;
+ ulint size;
ut_ad(b);
- high = mach_read_compressed(b);
+ n = (ib_uint64_t) mach_read_compressed(b);
- size = mach_get_compressed_size(high);
+ size = mach_get_compressed_size((ulint) n);
- low = mach_read_from_4(b + size);
+ n <<= 32;
+ n |= (ib_uint64_t) mach_read_from_4(b + size);
- return(ut_dulint_create(high, low));
+ return(n);
}
/*********************************************************//**
-Writes a dulint in a compressed form (1..11 bytes).
+Writes a 64-bit integer in a compressed form (1..11 bytes).
@return size in bytes */
UNIV_INLINE
ulint
-mach_dulint_write_much_compressed(
-/*==============================*/
- byte* b, /*!< in: pointer to memory where to store */
- dulint n) /*!< in: dulint integer to be stored */
+mach_ull_write_much_compressed(
+/*===========================*/
+ byte* b, /*!< in: pointer to memory where to store */
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
ulint size;
ut_ad(b);
- if (ut_dulint_get_high(n) == 0) {
- return(mach_write_compressed(b, ut_dulint_get_low(n)));
+ if (!(n >> 32)) {
+ return(mach_write_compressed(b, (ulint) n));
}
*b = (byte)0xFF;
- size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n));
+ size = 1 + mach_write_compressed(b + 1, (ulint) (n >> 32));
- size += mach_write_compressed(b + size, ut_dulint_get_low(n));
+ size += mach_write_compressed(b + size, (ulint) n & 0xFFFFFFFF);
return(size);
}
/*********************************************************//**
-Returns the size of a dulint when written in the compressed form.
+Returns the size of a 64-bit integer when written in the compressed form.
@return compressed size in bytes */
UNIV_INLINE
ulint
-mach_dulint_get_much_compressed_size(
-/*=================================*/
- dulint n) /*!< in: dulint integer to be stored */
+mach_ull_get_much_compressed_size(
+/*==============================*/
+ ib_uint64_t n) /*!< in: 64-bit integer to be stored */
{
- if (0 == ut_dulint_get_high(n)) {
- return(mach_get_compressed_size(ut_dulint_get_low(n)));
+ if (!(n >> 32)) {
+ return(mach_get_compressed_size((ulint) n));
}
- return(1 + mach_get_compressed_size(ut_dulint_get_high(n))
- + mach_get_compressed_size(ut_dulint_get_low(n)));
+ return(1 + mach_get_compressed_size((ulint) (n >> 32))
+ + mach_get_compressed_size((ulint) n & ULINT32_MASK));
}
/*********************************************************//**
-Reads a dulint in a compressed form.
-@return read dulint */
+Reads a 64-bit integer in a compressed form.
+@return the value read */
UNIV_INLINE
-dulint
-mach_dulint_read_much_compressed(
-/*=============================*/
+ib_uint64_t
+mach_ull_read_much_compressed(
+/*==========================*/
const byte* b) /*!< in: pointer to memory from where to read */
{
- ulint high;
- ulint low;
- ulint size;
+ ib_uint64_t n;
+ ulint size;
ut_ad(b);
if (*b != (byte)0xFF) {
- high = 0;
+ n = 0;
size = 0;
} else {
- high = mach_read_compressed(b + 1);
+ n = (ib_uint64_t) mach_read_compressed(b + 1);
+
+ size = 1 + mach_get_compressed_size((ulint) n);
+ n <<= 32;
+ }
+
+ n |= mach_read_compressed(b + size);
+
+ return(n);
+}
+
+/*********************************************************//**
+Reads a 64-bit integer in a compressed form
+if the log record fully contains it.
+@return pointer to end of the stored field, NULL if not complete */
+UNIV_INLINE
+byte*
+mach_ull_parse_compressed(
+/*======================*/
+ byte* ptr, /* in: pointer to buffer from where to read */
+ byte* end_ptr,/* in: pointer to end of the buffer */
+ ib_uint64_t* val) /* out: read value */
+{
+ ulint size;
+
+ ut_ad(ptr);
+ ut_ad(end_ptr);
+ ut_ad(val);
+
+ if (end_ptr < ptr + 5) {
+
+ return(NULL);
+ }
+
+ *val = mach_read_compressed(ptr);
+
+ size = mach_get_compressed_size((ulint) *val);
+
+ ptr += size;
+
+ if (end_ptr < ptr + 4) {
- size = 1 + mach_get_compressed_size(high);
+ return(NULL);
}
- low = mach_read_compressed(b + size);
+ *val <<= 32;
+ *val |= mach_read_from_4(ptr);
- return(ut_dulint_create(high, low));
+ return(ptr + 4);
}
#ifndef UNIV_HOTBACKUP
/*********************************************************//**
diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h
index ee28cf7b225..5181bb4c9f7 100644
--- a/storage/xtradb/include/mem0mem.h
+++ b/storage/xtradb/include/mem0mem.h
@@ -359,7 +359,7 @@ struct mem_block_info_struct {
to the heap is also the first block in this list,
though it also contains the base node of the list. */
ulint len; /*!< physical length of this block in bytes */
- ulint total_size; /* physical length in bytes of all blocks
+ ulint total_size; /*!< physical length in bytes of all blocks
in the heap. This is defined only in the base
node and is set to ULINT_UNDEFINED in others. */
ulint type; /*!< type of heap: MEM_HEAP_DYNAMIC, or
diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic
index cbce2edc661..d214c3fe6c9 100644
--- a/storage/xtradb/include/mem0mem.ic
+++ b/storage/xtradb/include/mem0mem.ic
@@ -350,27 +350,27 @@ mem_heap_get_top(
ulint n) /*!< in: size of the topmost element */
{
mem_block_t* block;
- void* buf;
+ byte* buf;
ut_ad(mem_heap_check(heap));
block = UT_LIST_GET_LAST(heap->base);
- buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
+ buf = (byte*) block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n);
#ifdef UNIV_MEM_DEBUG
- ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block));
+ ut_ad(mem_block_get_start(block) <= (ulint) (buf - (byte*) block));
/* In the debug version, advance buf to point at the storage which
was given to the caller in the allocation*/
- buf = (byte*)buf + MEM_FIELD_HEADER_SIZE;
+ buf += MEM_FIELD_HEADER_SIZE;
/* Check that the field lengths agree */
- ut_ad(n == (ulint)mem_field_header_get_len(buf));
+ ut_ad(n == mem_field_header_get_len(buf));
#endif
- return(buf);
+ return((void*) buf);
}
/*****************************************************************//**
diff --git a/storage/xtradb/include/mtr0log.h b/storage/xtradb/include/mtr0log.h
index 6322af2a569..d271002a5fe 100644
--- a/storage/xtradb/include/mtr0log.h
+++ b/storage/xtradb/include/mtr0log.h
@@ -47,11 +47,11 @@ Writes 8 bytes to a file page buffered in the buffer pool.
Writes the corresponding log record to the mini-transaction log. */
UNIV_INTERN
void
-mlog_write_dulint(
-/*==============*/
- byte* ptr, /*!< in: pointer where to write */
- dulint val, /*!< in: value to write */
- mtr_t* mtr); /*!< in: mini-transaction handle */
+mlog_write_ull(
+/*===========*/
+ byte* ptr, /*!< in: pointer where to write */
+ ib_uint64_t val, /*!< in: value to write */
+ mtr_t* mtr); /*!< in: mini-transaction handle */
/********************************************************//**
Writes a string to a file page buffered in the buffer pool. Writes the
corresponding log record to the mini-transaction log. */
@@ -125,13 +125,13 @@ mlog_catenate_ulint_compressed(
mtr_t* mtr, /*!< in: mtr */
ulint val); /*!< in: value to write */
/********************************************************//**
-Catenates a compressed dulint to mlog. */
+Catenates a compressed 64-bit integer to mlog. */
UNIV_INLINE
void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- dulint val); /*!< in: value to write */
+mlog_catenate_ull_compressed(
+/*=========================*/
+ mtr_t* mtr, /*!< in: mtr */
+ ib_uint64_t val); /*!< in: value to write */
/********************************************************//**
Opens a buffer to mlog. It must be closed with mlog_close.
@return buffer, NULL if log mode MTR_LOG_NONE */
@@ -183,7 +183,7 @@ mlog_parse_initial_log_record(
ulint* space, /*!< out: space id */
ulint* page_no);/*!< out: page number */
/********************************************************//**
-Parses a log record written by mlog_write_ulint or mlog_write_dulint.
+Parses a log record written by mlog_write_ulint or mlog_write_ull.
@return parsed record end, NULL if not a complete record */
UNIV_INTERN
byte*
diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic
index 63af02ba409..fa9ac014f4e 100644
--- a/storage/xtradb/include/mtr0log.ic
+++ b/storage/xtradb/include/mtr0log.ic
@@ -142,13 +142,13 @@ mlog_catenate_ulint_compressed(
}
/********************************************************//**
-Catenates a compressed dulint to mlog. */
+Catenates a compressed 64-bit integer to mlog. */
UNIV_INLINE
void
-mlog_catenate_dulint_compressed(
-/*============================*/
- mtr_t* mtr, /*!< in: mtr */
- dulint val) /*!< in: value to write */
+mlog_catenate_ull_compressed(
+/*=========================*/
+ mtr_t* mtr, /*!< in: mtr */
+ ib_uint64_t val) /*!< in: value to write */
{
byte* log_ptr;
@@ -160,7 +160,7 @@ mlog_catenate_dulint_compressed(
return;
}
- log_ptr += mach_dulint_write_compressed(log_ptr, val);
+ log_ptr += mach_ull_write_compressed(log_ptr, val);
mlog_close(mtr, log_ptr);
}
diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h
index bc3f1951be9..5582ad63039 100644
--- a/storage/xtradb/include/mtr0mtr.h
+++ b/storage/xtradb/include/mtr0mtr.h
@@ -190,21 +190,21 @@ functions). The page number parameter was originally written as 0. @{ */
/* @} */
/***************************************************************//**
-Starts a mini-transaction and creates a mini-transaction handle
-and buffer in the memory buffer given by the caller.
-@return mtr buffer which also acts as the mtr handle */
+Starts a mini-transaction. */
UNIV_INLINE
-mtr_t*
+void
mtr_start(
/*======*/
- mtr_t* mtr); /*!< in: memory buffer for the mtr buffer */
+ mtr_t* mtr) /*!< out: mini-transaction */
+ __attribute__((nonnull));
/***************************************************************//**
Commits a mini-transaction. */
UNIV_INTERN
void
mtr_commit(
/*=======*/
- mtr_t* mtr); /*!< in: mini-transaction */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
/**********************************************************//**
Sets and returns a savepoint in mtr.
@return savepoint */
@@ -264,15 +264,6 @@ mtr_read_ulint(
const byte* ptr, /*!< in: pointer from where to read */
ulint type, /*!< in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */
mtr_t* mtr); /*!< in: mini-transaction handle */
-/********************************************************//**
-Reads 8 bytes from a file page buffered in the buffer pool.
-@return value read */
-UNIV_INTERN
-dulint
-mtr_read_dulint(
-/*============*/
- const byte* ptr, /*!< in: pointer from where to read */
- mtr_t* mtr); /*!< in: mini-transaction handle */
#ifndef UNIV_HOTBACKUP
/*********************************************************************//**
This macro locks an rw-lock in s-mode. */
@@ -387,6 +378,8 @@ struct mtr_struct{
#endif
dyn_array_t memo; /*!< memo stack for locks etc. */
dyn_array_t log; /*!< mini-transaction log */
+ ibool inside_ibuf;
+ /*!< TRUE if inside ibuf changes */
ibool modifications;
/* TRUE if the mtr made modifications to
buffer pool pages */
diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic
index 18f8e87b3cf..1db4a4bd735 100644
--- a/storage/xtradb/include/mtr0mtr.ic
+++ b/storage/xtradb/include/mtr0mtr.ic
@@ -30,26 +30,25 @@ Created 11/26/1995 Heikki Tuuri
#include "mach0data.h"
/***************************************************************//**
-Starts a mini-transaction and creates a mini-transaction handle
-and a buffer in the memory buffer given by the caller.
-@return mtr buffer which also acts as the mtr handle */
+Starts a mini-transaction. */
UNIV_INLINE
-mtr_t*
+void
mtr_start(
/*======*/
- mtr_t* mtr) /*!< in: memory buffer for the mtr buffer */
+ mtr_t* mtr) /*!< out: mini-transaction */
{
+ UNIV_MEM_INVALID(mtr, sizeof *mtr);
+
dyn_array_create(&(mtr->memo));
dyn_array_create(&(mtr->log));
mtr->log_mode = MTR_LOG_ALL;
mtr->modifications = FALSE;
+ mtr->inside_ibuf = FALSE;
mtr->n_log_recs = 0;
ut_d(mtr->state = MTR_ACTIVE);
ut_d(mtr->magic_n = MTR_MAGIC_N);
-
- return(mtr);
}
/***************************************************//**
@@ -160,7 +159,7 @@ mtr_memo_contains(
while (offset > 0) {
offset -= sizeof(mtr_memo_slot_t);
- slot = dyn_array_get_element(memo, offset);
+ slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, offset);
if ((object == slot->object) && (type == slot->type)) {
diff --git a/storage/xtradb/include/mysql_addons.h b/storage/xtradb/include/mysql_addons.h
deleted file mode 100644
index 17660c18710..00000000000
--- a/storage/xtradb/include/mysql_addons.h
+++ /dev/null
@@ -1,33 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/mysql_addons.h
-This file contains functions that need to be added to
-MySQL code but have not been added yet.
-
-Whenever you add a function here submit a MySQL bug
-report (feature request) with the implementation. Then
-write the bug number in the comment before the
-function in this file.
-
-When MySQL commits the function it can be deleted from
-here. In a perfect world this file exists but is empty.
-
-Created November 07, 2007 Vasil Dimov
-*******************************************************/
diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h
index eeab8a2b5d9..b778adaa809 100644
--- a/storage/xtradb/include/os0file.h
+++ b/storage/xtradb/include/os0file.h
@@ -77,29 +77,23 @@ extern ulint os_n_pending_writes;
#ifdef __WIN__
/** File handle */
-#define os_file_t HANDLE
+# define os_file_t HANDLE
/** Convert a C file descriptor to a native file handle
@param fd file descriptor
@return native file handle */
-#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
+# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
#else
/** File handle */
typedef int os_file_t;
/** Convert a C file descriptor to a native file handle
@param fd file descriptor
@return native file handle */
-#define OS_FILE_FROM_FD(fd) fd
+# define OS_FILE_FROM_FD(fd) fd
#endif
/** Umask for creating files */
extern ulint os_innodb_umask;
-/** If this flag is TRUE, then we will use the native aio of the
-OS (provided we compiled Innobase with it in), otherwise we will
-use simulated aio we build below with threads */
-
-extern ibool os_aio_use_native_aio;
-
/** The next value should be smaller or equal to the smallest sector size used
on any disk. A log block is required to be a portion of disk which is written
so that if the start and the end of a block get written to disk, then the
@@ -107,7 +101,7 @@ whole block gets written. This should be true even in most cases of a crash:
if this fails for a log block, then it is equivalent to a media failure in the
log. */
-#define OS_FILE_LOG_BLOCK_SIZE 512
+#define OS_FILE_LOG_BLOCK_SIZE srv_log_block_size
/** Options for file_create @{ */
#define OS_FILE_OPEN 51
@@ -142,7 +136,8 @@ log. */
#define OS_FILE_SHARING_VIOLATION 76
#define OS_FILE_ERROR_NOT_SPECIFIED 77
#define OS_FILE_INSUFFICIENT_RESOURCE 78
-#define OS_FILE_OPERATION_ABORTED 79
+#define OS_FILE_AIO_INTERRUPTED 79
+#define OS_FILE_OPERATION_ABORTED 80
/* @} */
/** Types for aio operations @{ */
@@ -183,11 +178,177 @@ log. */
#define OS_WIN95 2 /*!< Microsoft Windows 95 */
#define OS_WINNT 3 /*!< Microsoft Windows NT 3.x */
#define OS_WIN2000 4 /*!< Microsoft Windows 2000 */
+#define OS_WINXP 5 /*!< Microsoft Windows XP
+ or Windows Server 2003 */
+#define OS_WINVISTA 6 /*!< Microsoft Windows Vista
+ or Windows Server 2008 */
+#define OS_WIN7 7 /*!< Microsoft Windows 7
+ or Windows Server 2008 R2 */
+
extern ulint os_n_file_reads;
extern ulint os_n_file_writes;
extern ulint os_n_fsyncs;
+extern ulint srv_log_block_size;
+
+#ifdef UNIV_PFS_IO
+/* Keys to register InnoDB I/O with performance schema */
+extern mysql_pfs_key_t innodb_file_data_key;
+extern mysql_pfs_key_t innodb_file_log_key;
+extern mysql_pfs_key_t innodb_file_temp_key;
+
+/* Following four macros are instumentations to register
+various file I/O operations with performance schema.
+1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
+used to register file creation, opening, closing and renaming.
+2) register_pfs_file_io_begin() and register_pfs_file_io_end() are
+used to register actual file read, write and flush */
+# define register_pfs_file_open_begin(state, locker, key, op, name, \
+ src_file, src_line) \
+do { \
+ if (PSI_server) { \
+ locker = PSI_server->get_thread_file_name_locker( \
+ state, key, op, name, &locker); \
+ if (locker) { \
+ PSI_server->start_file_open_wait( \
+ locker, src_file, src_line); \
+ } \
+ } \
+} while (0)
+
+# define register_pfs_file_open_end(locker, file) \
+do { \
+ if (locker) { \
+ PSI_server->end_file_open_wait_and_bind_to_descriptor( \
+ locker, file); \
+ } \
+} while (0)
+
+# define register_pfs_file_io_begin(state, locker, file, count, op, \
+ src_file, src_line) \
+do { \
+ if (PSI_server) { \
+ locker = PSI_server->get_thread_file_descriptor_locker( \
+ state, file, op); \
+ if (locker) { \
+ PSI_server->start_file_wait( \
+ locker, count, src_file, src_line); \
+ } \
+ } \
+} while (0)
+
+# define register_pfs_file_io_end(locker, count) \
+do { \
+ if (locker) { \
+ PSI_server->end_file_wait(locker, count); \
+ } \
+} while (0)
+#endif /* UNIV_PFS_IO */
+
+/* Following macros/functions are file I/O APIs that would be performance
+schema instrumented if "UNIV_PFS_IO" is defined. They would point to
+wrapper functions with performance schema instrumentation in such case.
+
+os_file_create
+os_file_create_simple
+os_file_create_simple_no_error_handling
+os_file_close
+os_file_rename
+os_aio
+os_file_read
+os_file_read_no_error_handling
+os_file_write
+
+The wrapper functions have the prefix of "innodb_". */
+
+#ifdef UNIV_PFS_IO
+# define os_file_create(key, name, create, purpose, type, success) \
+ pfs_os_file_create_func(key, name, create, purpose, type, \
+ success, __FILE__, __LINE__)
+
+# define os_file_create_simple(key, name, create, access, success) \
+ pfs_os_file_create_simple_func(key, name, create, access, \
+ success, __FILE__, __LINE__)
+
+# define os_file_create_simple_no_error_handling( \
+ key, name, create_mode, access, success) \
+ pfs_os_file_create_simple_no_error_handling_func( \
+ key, name, create_mode, access, success, __FILE__, __LINE__)
+
+# define os_file_close(file) \
+ pfs_os_file_close_func(file, __FILE__, __LINE__)
+
+# define os_aio(type, mode, name, file, buf, offset, offset_high, \
+ n, message1, message2, space_id, trx) \
+ pfs_os_aio_func(type, mode, name, file, buf, offset, \
+ offset_high, n, message1, message2, space_id, trx,\
+ __FILE__, __LINE__)
+
+# define os_file_read(file, buf, offset, offset_high, n) \
+ pfs_os_file_read_func(file, buf, offset, offset_high, n, NULL, \
+ __FILE__, __LINE__)
+
+# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
+ os_file_read_func(file, buf, offset, offset_high, n, trx)
+
+# define os_file_read_no_error_handling(file, buf, offset, \
+ offset_high, n) \
+ pfs_os_file_read_no_error_handling_func(file, buf, offset, \
+ offset_high, n, \
+ __FILE__, __LINE__)
+
+# define os_file_write(name, file, buf, offset, offset_high, n) \
+ pfs_os_file_write_func(name, file, buf, offset, offset_high, \
+ n, __FILE__, __LINE__)
+
+# define os_file_flush(file, metadata) \
+ pfs_os_file_flush_func(file, metadata, __FILE__, __LINE__)
+
+# define os_file_rename(key, oldpath, newpath) \
+ pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
+#else /* UNIV_PFS_IO */
+
+/* If UNIV_PFS_IO is not defined, these I/O APIs point
+to original un-instrumented file I/O APIs */
+# define os_file_create(key, name, create, purpose, type, success) \
+ os_file_create_func(name, create, purpose, type, success)
+
+# define os_file_create_simple(key, name, create, access, success) \
+ os_file_create_simple_func(name, create_mode, access, success)
+
+# define os_file_create_simple_no_error_handling( \
+ key, name, create_mode, access, success) \
+ os_file_create_simple_no_error_handling_func( \
+ name, create_mode, access, success)
+
+# define os_file_close(file) os_file_close_func(file)
+
+# define os_aio(type, mode, name, file, buf, offset, offset_high, \
+ n, message1, message2, space_id, trx) \
+ os_aio_func(type, mode, name, file, buf, offset, offset_high, n,\
+ message1, message2, space_id, trx)
+
+# define os_file_read(file, buf, offset, offset_high, n) \
+ os_file_read_func(file, buf, offset, offset_high, n, NULL)
+
+# define os_file_read_trx(file, buf, offset, offset_high, n, trx) \
+ os_file_read_func(file, buf, offset, offset_high, n, trx)
+
+# define os_file_read_no_error_handling(file, buf, offset, \
+ offset_high, n) \
+ os_file_read_no_error_handling_func(file, buf, offset, offset_high, n)
+
+# define os_file_write(name, file, buf, offset, offset_high, n) \
+ os_file_write_func(name, file, buf, offset, offset_high, n)
+
+# define os_file_flush(file, metadata) os_file_flush_func(file, metadata)
+
+# define os_file_rename(key, oldpath, newpath) \
+ os_file_rename_func(oldpath, newpath)
+
+#endif /* UNIV_PFS_IO */
+
/* File types for directory entry data type */
enum os_file_type_enum{
@@ -221,13 +382,16 @@ typedef HANDLE os_file_dir_t; /*!< directory stream */
typedef DIR* os_file_dir_t; /*!< directory stream */
#endif
+#ifdef __WIN__
/***********************************************************************//**
Gets the operating system version. Currently works only on Windows.
-@return OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */
+@return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
+OS_WIN7. */
UNIV_INTERN
ulint
os_get_os_version(void);
/*===================*/
+#endif /* __WIN__ */
#ifndef UNIV_HOTBACKUP
/****************************************************************//**
Creates the seek mutexes used in positioned reads and writes. */
@@ -238,8 +402,6 @@ os_io_init_simple(void);
/***********************************************************************//**
Creates a temporary file. This function is like tmpfile(3), but
the temporary file is created in the MySQL temporary directory.
-On Netware, this function is like tmpfile(3), because the C run-time
-library of Netware does not expose the delete-on-close flag.
@return temporary file handle, or NULL on error */
FILE*
@@ -297,13 +459,15 @@ os_file_create_directory(
ibool fail_if_exists);/*!< in: if TRUE, pre-existing directory
is treated as an error. */
/****************************************************************//**
+NOTE! Use the corresponding macro os_file_create_simple(), not directly
+this function!
A simple function to open or create a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
-os_file_create_simple(
-/*==================*/
+os_file_create_simple_func(
+/*=======================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
@@ -317,13 +481,15 @@ os_file_create_simple(
OS_FILE_READ_WRITE */
ibool* success);/*!< out: TRUE if succeed, FALSE if error */
/****************************************************************//**
+NOTE! Use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
A simple function to open or create a file.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
-os_file_create_simple_no_error_handling(
-/*====================================*/
+os_file_create_simple_no_error_handling_func(
+/*=========================================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
@@ -347,13 +513,15 @@ os_file_set_nocache(
const char* operation_name);/*!< in: "open" or "create"; used in the
diagnostic message */
/****************************************************************//**
+NOTE! Use the corresponding macro os_file_create(), not directly
+this function!
Opens an existing file or creates a new.
@return own: handle to the file, not defined if error, error number
can be retrieved with os_file_get_last_error */
UNIV_INTERN
os_file_t
-os_file_create(
-/*===========*/
+os_file_create_func(
+/*================*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
@@ -391,25 +559,262 @@ os_file_delete_if_exists(
/*=====================*/
const char* name); /*!< in: file path as a null-terminated string */
/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_rename(), not directly
+this function!
Renames a file (can also move it to another directory). It is safest that the
file is closed before calling this function.
@return TRUE if success */
UNIV_INTERN
ibool
-os_file_rename(
-/*===========*/
+os_file_rename_func(
+/*================*/
const char* oldpath, /*!< in: old file path as a
null-terminated string */
const char* newpath); /*!< in: new file path */
/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_close(), not directly this
+function!
Closes a file handle. In case of error, error number can be retrieved with
os_file_get_last_error.
@return TRUE if success */
UNIV_INTERN
ibool
-os_file_close(
-/*==========*/
+os_file_close_func(
+/*===============*/
os_file_t file); /*!< in, own: handle to a file */
+
+#ifdef UNIV_PFS_IO
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create_simple(),
+not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple() which opens or creates a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_func(
+/*===========================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
+ opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error), or
+ OS_FILE_CREATE_PATH if new file
+ (if exists, error) and subdirectories along
+ its path are created (if needed)*/
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY or
+ OS_FILE_READ_WRITE */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple_no_error_handling(). Add instrumentation to
+monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_no_error_handling_func(
+/*=============================================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error) */
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY,
+ OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create(), not directly
+this function!
+A performance schema wrapper function for os_file_create().
+Add instrumentation to monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error),
+ OS_FILE_OVERWRITE if a new file is created
+ or an old overwritten;
+ OS_FILE_OPEN_RAW, if a raw device or disk
+ partition should be opened */
+ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
+ non-buffered i/o is desired,
+ OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use
+ async i/o or unbuffered i/o: look in the
+ function source code for the exact rules */
+ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_close(), not directly
+this function!
+A performance schema instrumented wrapper function for os_file_close().
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_close_func(
+/*===================*/
+ os_file_t file, /*!< in, own: handle to a file */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_read() which requests a synchronous read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_func(
+/*==================*/
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
+ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_read_no_error_handling_func() which requests a synchronous
+read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_no_error_handling_func(
+/*====================================*/
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_aio(), not directly this
+function!
+Performance schema wrapper function of os_aio() which requests
+an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_aio_func(
+/*============*/
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read or from which
+ to write */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read or write */
+ fil_node_t* message1,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ void* message2,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ ulint space_id,
+ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_write(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_write() which requests a synchronous write operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_write_func(
+/*===================*/
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /*!< in: handle to a file */
+ const void* buf, /*!< in: buffer from which to write */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to write */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_flush(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_flush() which flushes the write buffers of a given file to the disk.
+Flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_flush_func(
+/*===================*/
+ os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_rename(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_rename()
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_rename_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* oldpath,/*!< in: old file path as a null-terminated
+ string */
+ const char* newpath,/*!< in: new file path */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line);/*!< in: line where the func invoked */
+#endif /* UNIV_PFS_IO */
+
#ifdef UNIV_HOTBACKUP
/***********************************************************************//**
Closes a file handle.
@@ -461,13 +866,15 @@ os_file_set_eof(
/*============*/
FILE* file); /*!< in: file to be truncated */
/***********************************************************************//**
+NOTE! Use the corresponding macro os_file_flush(), not directly this function!
Flushes the write buffers of a given file to the disk.
@return TRUE if success */
UNIV_INTERN
ibool
-os_file_flush(
-/*==========*/
- os_file_t file); /*!< in, own: handle to a file */
+os_file_flush_func(
+/*===============*/
+ os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata);
/***********************************************************************//**
Retrieves the last error number if an error occurs in a file io function.
The number should be retrieved before any other OS calls (because they may
@@ -481,15 +888,13 @@ os_file_get_last_error(
ibool report_all_errors); /*!< in: TRUE if we want an error message
printed of all errors */
/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_read(), not directly this function!
Requests a synchronous read operation.
@return TRUE if request was successful, FALSE if fail */
-#define os_file_read(file, buf, offset, offset_high, n) \
- _os_file_read(file, buf, offset, offset_high, n, NULL)
-
UNIV_INTERN
ibool
-_os_file_read(
-/*=========*/
+os_file_read_func(
+/*==============*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
@@ -510,13 +915,15 @@ os_file_read_string(
char* str, /*!< in: buffer where to read */
ulint size); /*!< in: size of buffer */
/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_read_no_error_handling(),
+not directly this function!
Requests a synchronous positioned read operation. This function does not do
any error handling. In case of error it returns FALSE.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
-os_file_read_no_error_handling(
-/*===========================*/
+os_file_read_no_error_handling_func(
+/*================================*/
os_file_t file, /*!< in: handle to a file */
void* buf, /*!< in: buffer where to read */
ulint offset, /*!< in: least significant 32 bits of file
@@ -526,12 +933,14 @@ os_file_read_no_error_handling(
ulint n); /*!< in: number of bytes to read */
/*******************************************************************//**
+NOTE! Use the corresponding macro os_file_write(), not directly this
+function!
Requests a synchronous write operation.
@return TRUE if request was successful, FALSE if fail */
UNIV_INTERN
ibool
-os_file_write(
-/*==========*/
+os_file_write_func(
+/*===============*/
const char* name, /*!< in: name of the file or path as a
null-terminated string */
os_file_t file, /*!< in: handle to a file */
@@ -600,7 +1009,7 @@ respectively. The caller must create an i/o handler thread for each
segment in these arrays. This function also creates the sync array.
No i/o handler thread needs to be created for that */
UNIV_INTERN
-void
+ibool
os_aio_init(
/*========*/
ulint n_per_seg, /*<! in: maximum number of pending aio
@@ -617,12 +1026,13 @@ os_aio_free(void);
/*=============*/
/*******************************************************************//**
+NOTE! Use the corresponding macro os_aio(), not directly this function!
Requests an asynchronous i/o operation.
@return TRUE if request was queued successfully, FALSE if fail */
UNIV_INTERN
ibool
-os_aio(
-/*===*/
+os_aio_func(
+/*========*/
ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
ulint mode, /*!< in: OS_AIO_NORMAL, ..., possibly ORed
to OS_AIO_SIMULATED_WAKE_LATER: the
@@ -655,6 +1065,7 @@ os_aio(
(can be used to identify a completed
aio operation); ignored if mode is
OS_AIO_SYNC */
+ ulint space_id,
trx_t* trx);
/************************************************************************//**
Wakes up all async i/o threads so that they know to exit themselves in
@@ -715,7 +1126,8 @@ os_aio_windows_handle(
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
+ ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
+ ulint* space_id);
#endif
/**********************************************************************//**
@@ -737,7 +1149,8 @@ os_aio_simulated_handle(
parameters are valid and can be used to
restart the operation, for example */
void** message2,
- ulint* type); /*!< out: OS_FILE_WRITE or ..._READ */
+ ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
+ ulint* space_id);
/**********************************************************************//**
Validates the consistency of the aio system.
@return TRUE if ok */
@@ -780,7 +1193,7 @@ os_file_get_status(
os_file_stat_t* stat_info); /*!< information of a file in a
directory */
-#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__)
+#if !defined(UNIV_HOTBACKUP)
/*********************************************************************//**
Creates a temporary file that will be deleted on close.
This function is defined in ha_innodb.cc.
@@ -789,6 +1202,39 @@ UNIV_INTERN
int
innobase_mysql_tmpfile(void);
/*========================*/
-#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */
+#endif /* !UNIV_HOTBACKUP */
+
+
+#if defined(LINUX_NATIVE_AIO)
+/**************************************************************************
+This function is only used in Linux native asynchronous i/o.
+Waits for an aio operation to complete. This function is used to wait the
+for completed requests. The aio array of pending requests is divided
+into segments. The thread specifies which segment or slot it wants to wait
+for. NOTE: this function will also take care of freeing the aio slot,
+therefore no other thread is allowed to do the freeing!
+@return TRUE if the IO was successful */
+UNIV_INTERN
+ibool
+os_aio_linux_handle(
+/*================*/
+ ulint global_seg, /*!< in: segment number in the aio array
+ to wait for; segment 0 is the ibuf
+ i/o thread, segment 1 is log i/o thread,
+ then follow the non-ibuf read threads,
+ and the last are the non-ibuf write
+ threads. */
+ fil_node_t**message1, /*!< out: the messages passed with the */
+ void** message2, /*!< aio request; note that in case the
+ aio operation failed, these output
+ parameters are valid and can be used to
+ restart the operation. */
+ ulint* type, /*!< out: OS_FILE_WRITE or ..._READ */
+ ulint* space_id);
+#endif /* LINUX_NATIVE_AIO */
+
+#ifndef UNIV_NONINL
+#include "os0file.ic"
+#endif
#endif
diff --git a/storage/xtradb/include/os0file.ic b/storage/xtradb/include/os0file.ic
new file mode 100644
index 00000000000..2d2145d72f0
--- /dev/null
+++ b/storage/xtradb/include/os0file.ic
@@ -0,0 +1,422 @@
+/*****************************************************************************
+
+Copyright (c) 2010, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/os0file.ic
+The interface to the operating system file io
+
+Created 2/20/2010 Jimmy Yang
+*******************************************************/
+
+#include "univ.i"
+
+#ifdef UNIV_PFS_IO
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create_simple(),
+not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple() which opens or creates a file.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_func(
+/*===========================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file is
+ opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error), or
+ OS_FILE_CREATE_PATH if new file
+ (if exists, error) and subdirectories along
+ its path are created (if needed)*/
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY or
+ OS_FILE_READ_WRITE */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ os_file_t file;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ /* register a file open or creation depending on "create_mode" */
+ register_pfs_file_open_begin(&state, locker, key,
+ ((create_mode == OS_FILE_CREATE)
+ ? PSI_FILE_CREATE
+ : PSI_FILE_OPEN),
+ name, src_file, src_line);
+
+ file = os_file_create_simple_func(name, create_mode,
+ access_type, success);
+
+ /* Regsiter the returning "file" value with the system */
+ register_pfs_file_open_end(locker, file);
+
+ return(file);
+}
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_create_simple_no_error_handling(), not directly this function!
+A performance schema instrumented wrapper function for
+os_file_create_simple_no_error_handling(). Add instrumentation to
+monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_simple_no_error_handling_func(
+/*=============================================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error) */
+ ulint access_type,/*!< in: OS_FILE_READ_ONLY,
+ OS_FILE_READ_WRITE, or
+ OS_FILE_READ_ALLOW_DELETE; the last option is
+ used by a backup program reading the file */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ os_file_t file;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ /* register a file open or creation depending on "create_mode" */
+ register_pfs_file_open_begin(&state, locker, key,
+ ((create_mode == OS_FILE_CREATE)
+ ? PSI_FILE_CREATE
+ : PSI_FILE_OPEN),
+ name, src_file, src_line);
+
+ file = os_file_create_simple_no_error_handling_func(
+ name, create_mode, access_type, success);
+
+ register_pfs_file_open_end(locker, file);
+
+ return(file);
+}
+
+/****************************************************************//**
+NOTE! Please use the corresponding macro os_file_create(), not directly
+this function!
+A performance schema wrapper function for os_file_create().
+Add instrumentation to monitor file creation/open.
+@return own: handle to the file, not defined if error, error number
+can be retrieved with os_file_get_last_error */
+UNIV_INLINE
+os_file_t
+pfs_os_file_create_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ ulint create_mode,/*!< in: OS_FILE_OPEN if an existing file
+ is opened (if does not exist, error), or
+ OS_FILE_CREATE if a new file is created
+ (if exists, error),
+ OS_FILE_OVERWRITE if a new file is created
+ or an old overwritten;
+ OS_FILE_OPEN_RAW, if a raw device or disk
+ partition should be opened */
+ ulint purpose,/*!< in: OS_FILE_AIO, if asynchronous,
+ non-buffered i/o is desired,
+ OS_FILE_NORMAL, if any normal file;
+ NOTE that it also depends on type, os_aio_..
+ and srv_.. variables whether we really use
+ async i/o or unbuffered i/o: look in the
+ function source code for the exact rules */
+ ulint type, /*!< in: OS_DATA_FILE or OS_LOG_FILE */
+ ibool* success,/*!< out: TRUE if succeed, FALSE if error */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ os_file_t file;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ /* register a file open or creation depending on "create_mode" */
+ register_pfs_file_open_begin(&state, locker, key,
+ ((create_mode == OS_FILE_CREATE)
+ ? PSI_FILE_CREATE
+ : PSI_FILE_OPEN),
+ name, src_file, src_line);
+
+ file = os_file_create_func(name, create_mode, purpose, type, success);
+
+ register_pfs_file_open_end(locker, file);
+
+ return(file);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_close(), not directly
+this function!
+A performance schema instrumented wrapper function for os_file_close().
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_close_func(
+/*===================*/
+ os_file_t file, /*!< in, own: handle to a file */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ /* register the file close */
+ register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_CLOSE,
+ src_file, src_line);
+
+ result = os_file_close_func(file);
+
+ register_pfs_file_io_end(locker, 0);
+
+ return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_aio(), not directly this
+function!
+Performance schema instrumented wrapper function of os_aio() which
+requests an asynchronous i/o operation.
+@return TRUE if request was queued successfully, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_aio_func(
+/*============*/
+ ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE */
+ ulint mode, /*!< in: OS_AIO_NORMAL etc. I/O mode */
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read or from which
+ to write */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read or write */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read or write */
+ fil_node_t* message1,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ void* message2,/*!< in: message for the aio handler
+ (can be used to identify a completed
+ aio operation); ignored if mode is
+ OS_AIO_SYNC */
+ ulint space_id,
+ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ /* Register the read or write I/O depending on "type" */
+ register_pfs_file_io_begin(&state, locker, file, n,
+ (type == OS_FILE_WRITE)
+ ? PSI_FILE_WRITE
+ : PSI_FILE_READ,
+ src_file, src_line);
+
+ result = os_aio_func(type, mode, name, file, buf, offset, offset_high,
+ n, message1, message2, space_id, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+ return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_read(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_read() which requests a synchronous read operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_func(
+/*==================*/
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
+ trx_t* trx,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+ src_file, src_line);
+
+ result = os_file_read_func(file, buf, offset, offset_high, n, trx);
+
+ register_pfs_file_io_end(locker, n);
+
+ return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro
+os_file_read_no_error_handling(), not directly this function!
+This is the performance schema instrumented wrapper function for
+os_file_read_no_error_handling() which requests a synchronous
+positioned read operation. This function does not do any error
+handling. In case of error it returns FALSE.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_read_no_error_handling_func(
+/*====================================*/
+ os_file_t file, /*!< in: handle to a file */
+ void* buf, /*!< in: buffer where to read */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to read */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to read */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_READ,
+ src_file, src_line);
+
+ result = os_file_read_no_error_handling_func(file, buf, offset,
+ offset_high, n);
+
+ register_pfs_file_io_end(locker, n);
+
+ return(result);
+}
+
+/*******************************************************************//**
+NOTE! Please use the corresponding macro os_file_write(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_write() which requests a synchronous write operation.
+@return TRUE if request was successful, FALSE if fail */
+UNIV_INLINE
+ibool
+pfs_os_file_write_func(
+/*===================*/
+ const char* name, /*!< in: name of the file or path as a
+ null-terminated string */
+ os_file_t file, /*!< in: handle to a file */
+ const void* buf, /*!< in: buffer from which to write */
+ ulint offset, /*!< in: least significant 32 bits of file
+ offset where to write */
+ ulint offset_high,/*!< in: most significant 32 bits of
+ offset */
+ ulint n, /*!< in: number of bytes to write */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_io_begin(&state, locker, file, n, PSI_FILE_WRITE,
+ src_file, src_line);
+
+ result = os_file_write_func(name, file, buf, offset, offset_high, n);
+
+ register_pfs_file_io_end(locker, n);
+
+ return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_flush(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_flush() which flushes the write buffers of a given file to the disk.
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_flush_func(
+/*===================*/
+ os_file_t file, /*!< in, own: handle to a file */
+ ibool metadata,
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_io_begin(&state, locker, file, 0, PSI_FILE_SYNC,
+ src_file, src_line);
+ result = os_file_flush_func(file, metadata);
+
+ register_pfs_file_io_end(locker, 0);
+
+ return(result);
+}
+
+/***********************************************************************//**
+NOTE! Please use the corresponding macro os_file_rename(), not directly
+this function!
+This is the performance schema instrumented wrapper function for
+os_file_rename()
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_os_file_rename_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema Key */
+ const char* oldpath,/*!< in: old file path as a null-terminated
+ string */
+ const char* newpath,/*!< in: new file path */
+ const char* src_file,/*!< in: file name where func invoked */
+ ulint src_line)/*!< in: line where the func invoked */
+{
+ ibool result;
+ struct PSI_file_locker* locker = NULL;
+ PSI_file_locker_state state;
+
+ register_pfs_file_open_begin(&state, locker, key, PSI_FILE_RENAME, newpath,
+ src_file, src_line);
+
+ result = os_file_rename_func(oldpath, newpath);
+
+ register_pfs_file_open_end(locker, 0);
+
+ return(result);
+}
+#endif /* UNIV_PFS_IO */
diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h
index 582cef6f803..fd46bd7db87 100644
--- a/storage/xtradb/include/os0proc.h
+++ b/storage/xtradb/include/os0proc.h
@@ -32,11 +32,6 @@ Created 9/30/1995 Heikki Tuuri
#ifdef UNIV_LINUX
#include <sys/ipc.h>
#include <sys/shm.h>
-#else
-# if defined HAVE_SYS_IPC_H && HAVE_SYS_SHM_H
-#include <sys/ipc.h>
-#include <sys/shm.h>
-# endif
#endif
typedef void* os_process_t;
@@ -75,29 +70,6 @@ os_mem_free_large(
ulint size); /*!< in: size returned by
os_mem_alloc_large() */
-
-/****************************************************************//**
-Allocates or attaches and reuses shared memory segment.
-The content is not cleared automatically.
-@return allocated memory */
-UNIV_INTERN
-void*
-os_shm_alloc(
-/*=========*/
- ulint* n, /*!< in/out: number of bytes */
- uint key,
- ibool* is_new);
-
-/****************************************************************//**
-Detach shared memory segment. */
-UNIV_INTERN
-void
-os_shm_free(
-/*========*/
- void *ptr, /*!< in: pointer returned by
- os_shm_alloc() */
- ulint size); /*!< in: size returned by
- os_shm_alloc() */
#ifndef UNIV_NONINL
#include "os0proc.ic"
#endif
diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h
index c230a03b6db..6a99c60226b 100644
--- a/storage/xtradb/include/os0sync.h
+++ b/storage/xtradb/include/os0sync.h
@@ -38,28 +38,18 @@ Created 9/6/1995 Heikki Tuuri
#include "ut0lst.h"
#ifdef __WIN__
-
+/** Native event (slow)*/
+typedef HANDLE os_native_event_t;
/** Native mutex */
-#define os_fast_mutex_t CRITICAL_SECTION
-
-/** Native event */
-typedef HANDLE os_native_event_t;
-
-/** Operating system event */
-typedef struct os_event_struct os_event_struct_t;
-/** Operating system event handle */
-typedef os_event_struct_t* os_event_t;
-
-/** An asynchronous signal sent between threads */
-struct os_event_struct {
- os_native_event_t handle;
- /*!< Windows event */
- UT_LIST_NODE_T(os_event_struct_t) os_event_list;
- /*!< list of all created events */
-};
+typedef CRITICAL_SECTION os_fast_mutex_t;
+/** Native condition variable. */
+typedef CONDITION_VARIABLE os_cond_t;
#else
/** Native mutex */
-typedef pthread_mutex_t os_fast_mutex_t;
+typedef pthread_mutex_t os_fast_mutex_t;
+/** Native condition variable */
+typedef pthread_cond_t os_cond_t;
+#endif
/** Operating system event */
typedef struct os_event_struct os_event_struct_t;
@@ -68,6 +58,10 @@ typedef os_event_struct_t* os_event_t;
/** An asynchronous signal sent between threads */
struct os_event_struct {
+#ifdef __WIN__
+ HANDLE handle; /*!< kernel event object, slow,
+ used on older Windows */
+#endif
os_fast_mutex_t os_mutex; /*!< this mutex protects the next
fields */
ibool is_set; /*!< this is TRUE when the event is
@@ -76,24 +70,23 @@ struct os_event_struct {
this event */
ib_int64_t signal_count; /*!< this is incremented each time
the event becomes signaled */
- pthread_cond_t cond_var; /*!< condition variable is used in
+ os_cond_t cond_var; /*!< condition variable is used in
waiting for the event */
UT_LIST_NODE_T(os_event_struct_t) os_event_list;
/*!< list of all created events */
};
-#endif
+
+/** Denotes an infinite delay for os_event_wait_time() */
+#define OS_SYNC_INFINITE_TIME ULINT_UNDEFINED
+
+/** Return value of os_event_wait_time() when the time is exceeded */
+#define OS_SYNC_TIME_EXCEEDED 1
/** Operating system mutex */
typedef struct os_mutex_struct os_mutex_str_t;
/** Operating system mutex handle */
typedef os_mutex_str_t* os_mutex_t;
-/** Denotes an infinite delay for os_event_wait_time() */
-#define OS_SYNC_INFINITE_TIME ((ulint)(-1))
-
-/** Return value of os_event_wait_time() when the time is exceeded */
-#define OS_SYNC_TIME_EXCEEDED 1
-
/** Mutex protecting counts and the event and OS 'slow' mutex lists */
extern os_mutex_t os_sync_mutex;
@@ -157,10 +150,7 @@ os_event_free(
os_event_t event); /*!< in: event to free */
/**********************************************************//**
-Waits for an event object until it is in the signaled state. If
-srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the
-waiting thread when the event becomes signaled (or immediately if the
-event is already in the signaled state).
+Waits for an event object until it is in the signaled state.
Typically, if the event has been signalled after the os_event_reset()
we'll return immediately because event->is_set == TRUE.
@@ -186,6 +176,7 @@ os_event_wait_low(
os_event_reset(). */
#define os_event_wait(event) os_event_wait_low(event, 0)
+#define os_event_wait_time(e, t) os_event_wait_time_low(e, t, 0)
/**********************************************************//**
Waits for an event object until it is in the signaled state or
@@ -193,36 +184,23 @@ a timeout is exceeded.
@return 0 if success, OS_SYNC_TIME_EXCEEDED if timeout was exceeded */
UNIV_INTERN
ulint
-os_event_wait_time(
-/*===============*/
- os_event_t event, /*!< in: event to wait */
- ulint time); /*!< in: timeout in microseconds, or
- OS_SYNC_INFINITE_TIME */
-#ifdef __WIN__
-/**********************************************************//**
-Waits for any event in an OS native event array. Returns if even a single
-one is signaled or becomes signaled.
-@return index of the event which was signaled */
-UNIV_INTERN
-ulint
-os_event_wait_multiple(
+os_event_wait_time_low(
/*===================*/
- ulint n, /*!< in: number of events in the
- array */
- os_native_event_t* native_event_array);
- /*!< in: pointer to an array of event
- handles */
-#endif
+ os_event_t event, /*!< in: event to wait */
+ ulint time_in_usec, /*!< in: timeout in
+ microseconds, or
+ OS_SYNC_INFINITE_TIME */
+ ib_int64_t reset_sig_count); /*!< in: zero or the value
+ returned by previous call of
+ os_event_reset(). */
/*********************************************************//**
Creates an operating system mutex semaphore. Because these are slow, the
mutex semaphore of InnoDB itself (mutex_t) should be used where possible.
@return the mutex handle */
UNIV_INTERN
os_mutex_t
-os_mutex_create(
-/*============*/
- const char* name); /*!< in: the name of the mutex, if NULL
- the mutex is created without a name */
+os_mutex_create(void);
+/*=================*/
/**********************************************************//**
Acquires ownership of a mutex semaphore. */
UNIV_INTERN
@@ -330,7 +308,7 @@ amount of increment. */
Returns the old value of *ptr, atomically sets *ptr to new_val */
# define os_atomic_test_and_set_byte(ptr, new_val) \
- __sync_lock_test_and_set(ptr, new_val)
+ __sync_lock_test_and_set(ptr, (byte) new_val)
#elif defined(HAVE_IB_SOLARIS_ATOMICS)
diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic
index 1f3ce38fa65..c33f13aaad6 100644
--- a/storage/xtradb/include/os0sync.ic
+++ b/storage/xtradb/include/os0sync.ic
@@ -28,8 +28,7 @@ Created 9/6/1995 Heikki Tuuri
#endif
/**********************************************************//**
-Acquires ownership of a fast mutex. Currently in Windows this is the same
-as os_fast_mutex_lock!
+Acquires ownership of a fast mutex.
@return 0 if success, != 0 if was reserved by another thread */
UNIV_INLINE
ulint
@@ -38,9 +37,13 @@ os_fast_mutex_trylock(
os_fast_mutex_t* fast_mutex) /*!< in: mutex to acquire */
{
#ifdef __WIN__
- EnterCriticalSection(fast_mutex);
+ if (TryEnterCriticalSection(fast_mutex)) {
- return(0);
+ return(0);
+ } else {
+
+ return(1);
+ }
#else
/* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock
so that it returns 0 on success. In the operating system
diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h
index 6583de0005f..df3cdb7728e 100644
--- a/storage/xtradb/include/os0thread.h
+++ b/storage/xtradb/include/os0thread.h
@@ -56,6 +56,11 @@ typedef os_thread_t os_thread_id_t; /*!< In Unix we use the thread
/* Define a function pointer type to use in a typecast */
typedef void* (*os_posix_f_t) (void*);
+#ifdef HAVE_PSI_INTERFACE
+/* Define for performance schema registration key */
+typedef unsigned int mysql_pfs_key_t;
+#endif
+
/***************************************************************//**
Compares two thread ids for equality.
@return TRUE if equal */
@@ -86,7 +91,7 @@ os_thread_t
os_thread_create(
/*=============*/
#ifndef __WIN__
- os_posix_f_t start_f,
+ os_posix_f_t start_f,
#else
ulint (*start_f)(void*), /*!< in: pointer to function
from which to start */
@@ -102,8 +107,9 @@ UNIV_INTERN
void
os_thread_exit(
/*===========*/
- void* exit_value); /*!< in: exit value; in Windows this void*
+ void* exit_value) /*!< in: exit value; in Windows this void*
is cast as a DWORD */
+ UNIV_COLD __attribute__((noreturn));
/*****************************************************************//**
Returns the thread identifier of current thread.
@return current thread identifier */
@@ -112,13 +118,6 @@ os_thread_id_t
os_thread_get_curr_id(void);
/*========================*/
/*****************************************************************//**
-Returns handle to the current thread.
-@return current thread handle */
-UNIV_INTERN
-os_thread_t
-os_thread_get_curr(void);
-/*====================*/
-/*****************************************************************//**
Advises the os to give up remainder of the thread's time slice. */
UNIV_INTERN
void
@@ -131,29 +130,6 @@ void
os_thread_sleep(
/*============*/
ulint tm); /*!< in: time in microseconds */
-/******************************************************************//**
-Gets a thread priority.
-@return priority */
-UNIV_INTERN
-ulint
-os_thread_get_priority(
-/*===================*/
- os_thread_t handle);/*!< in: OS handle to the thread */
-/******************************************************************//**
-Sets a thread priority. */
-UNIV_INTERN
-void
-os_thread_set_priority(
-/*===================*/
- os_thread_t handle, /*!< in: OS handle to the thread */
- ulint pri); /*!< in: priority: one of OS_PRIORITY_... */
-/******************************************************************//**
-Gets the last operating system error code for the calling thread.
-@return last error on Windows, 0 otherwise */
-UNIV_INTERN
-ulint
-os_thread_get_last_error(void);
-/*==========================*/
#ifndef UNIV_NONINL
#include "os0thread.ic"
diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h
index 6b444b3dd96..1544b0abe1c 100644
--- a/storage/xtradb/include/page0cur.h
+++ b/storage/xtradb/include/page0cur.h
@@ -293,22 +293,6 @@ page_cur_open_on_rnd_user_rec(
/*==========================*/
buf_block_t* block, /*!< in: page */
page_cur_t* cursor);/*!< out: page cursor */
-
-UNIV_INTERN
-void
-page_cur_open_on_nth_user_rec(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor, /*!< out: page cursor */
- ulint nth);
-
-UNIV_INTERN
-ibool
-page_cur_open_on_rnd_user_rec_after_nth(
-/*==========================*/
- buf_block_t* block, /*!< in: page */
- page_cur_t* cursor, /*!< out: page cursor */
- ulint nth);
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses a log record of a record insert on a page.
diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h
index 5b2bcf7c054..c06815cb6ca 100644
--- a/storage/xtradb/include/page0page.h
+++ b/storage/xtradb/include/page0page.h
@@ -66,7 +66,7 @@ typedef byte page_header_t;
direction */
#define PAGE_N_RECS 16 /* number of user records on the page */
#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified
- a record on the page; a dulint; defined only
+ a record on the page; trx_id_t; defined only
in secondary indexes and in the insert buffer
tree; NOTE: this may be modified only
when the thread has an x-latch to the page,
@@ -618,18 +618,19 @@ rec_t*
page_rec_find_owner_rec(
/*====================*/
rec_t* rec); /*!< in: the physical record */
+#ifndef UNIV_HOTBACKUP
/***********************************************************************//**
-This is a low-level operation which is used in a database index creation
-to update the page number of a created B-tree to a data dictionary
-record. */
-UNIV_INTERN
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
void
-page_rec_write_index_page_no(
-/*=========================*/
- rec_t* rec, /*!< in: record to update */
+page_rec_write_field(
+/*=================*/
+ rec_t* rec, /*!< in/out: record to update */
ulint i, /*!< in: index of the field to update */
- ulint page_no,/*!< in: value to write */
- mtr_t* mtr); /*!< in: mtr */
+ ulint val, /*!< in: value to write */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+ __attribute__((nonnull));
+#endif /* !UNIV_HOTBACKUP */
/************************************************************//**
Returns the maximum combined size of records which can be inserted on top
of record heap.
@@ -952,7 +953,7 @@ UNIV_INTERN
ibool
page_rec_validate(
/*==============*/
- rec_t* rec, /*!< in: physical record */
+ const rec_t* rec, /*!< in: physical record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
/***************************************************************//**
Checks that the first directory slot points to the infimum record and
@@ -972,7 +973,7 @@ UNIV_INTERN
ibool
page_simple_validate_old(
/*=====================*/
- page_t* page); /*!< in: old-style index page */
+ const page_t* page); /*!< in: index page in ROW_FORMAT=REDUNDANT */
/***************************************************************//**
This function checks the consistency of an index page when we do not
know the index. This is also resilient so that this should never crash
@@ -982,7 +983,7 @@ UNIV_INTERN
ibool
page_simple_validate_new(
/*=====================*/
- page_t* block); /*!< in: new-style index page */
+ const page_t* page); /*!< in: index page in ROW_FORMAT!=REDUNDANT */
/***************************************************************//**
This function checks the consistency of an index page.
@return TRUE if ok */
@@ -990,7 +991,7 @@ UNIV_INTERN
ibool
page_validate(
/*==========*/
- page_t* page, /*!< in: index page */
+ const page_t* page, /*!< in: index page */
dict_index_t* index); /*!< in: data dictionary index containing
the page record type definition */
/***************************************************************//**
diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic
index dab9dc742e4..d8570108115 100644
--- a/storage/xtradb/include/page0page.ic
+++ b/storage/xtradb/include/page0page.ic
@@ -94,11 +94,10 @@ page_update_max_trx_id(
TRUE for the dummy indexes constructed during redo log
application). In that case, PAGE_MAX_TRX_ID is unused,
and trx_id is usually zero. */
- ut_ad(!ut_dulint_is_zero(trx_id) || recv_recovery_is_on());
+ ut_ad(trx_id || recv_recovery_is_on());
ut_ad(page_is_leaf(buf_block_get_frame(block)));
- if (ut_dulint_cmp(page_get_max_trx_id(buf_block_get_frame(block)),
- trx_id) < 0) {
+ if (page_get_max_trx_id(buf_block_get_frame(block)) < trx_id) {
page_set_max_trx_id(block, page_zip, trx_id, mtr);
}
@@ -963,6 +962,29 @@ page_get_free_space_of_empty(
- 2 * PAGE_DIR_SLOT_SIZE));
}
+#ifndef UNIV_HOTBACKUP
+/***********************************************************************//**
+Write a 32-bit field in a data dictionary record. */
+UNIV_INLINE
+void
+page_rec_write_field(
+/*=================*/
+ rec_t* rec, /*!< in/out: record to update */
+ ulint i, /*!< in: index of the field to update */
+ ulint val, /*!< in: value to write */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ byte* data;
+ ulint len;
+
+ data = rec_get_nth_field_old(rec, i, &len);
+
+ ut_ad(len == 4);
+
+ mlog_write_ulint(data, val, MLOG_4BYTES, mtr);
+}
+#endif /* !UNIV_HOTBACKUP */
+
/************************************************************//**
Each user record on a page, and also the deleted user records in the heap
takes its size plus the fraction of the dir cell size /
diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h
index 4d37302ed20..fe3d2e52e0b 100644
--- a/storage/xtradb/include/page0zip.h
+++ b/storage/xtradb/include/page0zip.h
@@ -420,7 +420,7 @@ page_zip_copy_recs(
const page_t* src, /*!< in: page */
dict_index_t* index, /*!< in: index of the B-tree */
mtr_t* mtr) /*!< in: mini-transaction */
- __attribute__((nonnull(1,2,3,4)));
+ __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/**********************************************************************//**
diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h
index fe5d76ebbb0..141b2706d7d 100644
--- a/storage/xtradb/include/pars0pars.h
+++ b/storage/xtradb/include/pars0pars.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -521,17 +521,17 @@ Equivalent to:
char buf[8];
mach_write_to_8(buf, val);
-pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0);
+pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0);
except that the buffer is dynamically allocated from the info struct's
heap. */
UNIV_INTERN
void
-pars_info_add_dulint_literal(
-/*=========================*/
+pars_info_add_ull_literal(
+/*======================*/
pars_info_t* info, /*!< in: info struct */
const char* name, /*!< in: name */
- dulint val); /*!< in: value */
+ ib_uint64_t val); /*!< in: value */
/****************************************************************//**
Add user function. */
UNIV_INTERN
diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h
index ed48f980294..369e74d788c 100644
--- a/storage/xtradb/include/que0que.h
+++ b/storage/xtradb/include/que0que.h
@@ -424,9 +424,6 @@ struct que_fork_struct{
ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e.,
it is not before the first row or
after the last row */
- dulint n_inserts; /*!< number of rows inserted */
- dulint n_updates; /*!< number of rows updated */
- dulint n_deletes; /*!< number of rows deleted */
sel_node_t* last_sel_node; /*!< last executed select node, or NULL
if none */
UT_LIST_NODE_T(que_fork_t)
diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h
index 4d9a9fade36..0c9468d985e 100644
--- a/storage/xtradb/include/read0read.h
+++ b/storage/xtradb/include/read0read.h
@@ -43,8 +43,7 @@ read_view_t*
read_view_open_now(
/*===============*/
trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
+ transaction, or 0 used in purge */
mem_heap_t* heap); /*!< in: memory heap from which
allocated */
/*********************************************************************//**
@@ -56,8 +55,7 @@ read_view_t*
read_view_oldest_copy_or_open_new(
/*==============================*/
trx_id_t cr_trx_id, /*!< in: trx_id of creating
- transaction, or ut_dulint_zero
- used in purge */
+ transaction, or 0 used in purge */
mem_heap_t* heap); /*!< in: memory heap from which
allocated */
/*********************************************************************//**
@@ -90,6 +88,7 @@ UNIV_INTERN
void
read_view_print(
/*============*/
+ FILE* file,
const read_view_t* view); /*!< in: read view */
/*********************************************************************//**
Create a consistent cursor view for mysql to be used in cursors. In this
@@ -125,7 +124,7 @@ read should not see the modifications to the database. */
struct read_view_struct{
ulint type; /*!< VIEW_NORMAL, VIEW_HIGH_GRANULARITY */
- undo_no_t undo_no;/*!< ut_dulint_zero or if type is
+ undo_no_t undo_no;/*!< 0 or if type is
VIEW_HIGH_GRANULARITY
transaction undo_no when this high-granularity
consistent read view was created */
@@ -156,7 +155,7 @@ struct read_view_struct{
that is, up_limit_id and low_limit_id. */
trx_id_t creator_trx_id;
/*!< trx id of creating transaction, or
- ut_dulint_zero used in purge */
+ 0 used in purge */
UT_LIST_NODE_T(read_view_t) view_list;
/*!< List of read views in trx_sys */
};
diff --git a/storage/xtradb/include/read0read.ic b/storage/xtradb/include/read0read.ic
index 9924967cc2d..5bb5249b591 100644
--- a/storage/xtradb/include/read0read.ic
+++ b/storage/xtradb/include/read0read.ic
@@ -64,15 +64,14 @@ read_view_sees_trx_id(
trx_id_t trx_id) /*!< in: trx id */
{
ulint n_ids;
- int cmp;
ulint i;
- if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) {
+ if (trx_id < view->up_limit_id) {
return(TRUE);
}
- if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) {
+ if (trx_id >= view->low_limit_id) {
return(FALSE);
}
@@ -85,12 +84,11 @@ read_view_sees_trx_id(
n_ids = view->n_trx_ids;
for (i = 0; i < n_ids; i++) {
+ trx_id_t view_trx_id
+ = read_view_get_nth_trx_id(view, n_ids - i - 1);
- cmp = ut_dulint_cmp(
- trx_id,
- read_view_get_nth_trx_id(view, n_ids - i - 1));
- if (cmp <= 0) {
- return(cmp < 0);
+ if (trx_id <= view_trx_id) {
+ return(trx_id != view_trx_id);
}
}
diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h
index fcea62ad486..a908521c9f7 100644
--- a/storage/xtradb/include/rem0cmp.h
+++ b/storage/xtradb/include/rem0cmp.h
@@ -165,15 +165,18 @@ cmp_rec_rec_with_match(
const ulint* offsets1,/*!< in: rec_get_offsets(rec1, index) */
const ulint* offsets2,/*!< in: rec_get_offsets(rec2, index) */
dict_index_t* index, /*!< in: data dictionary index */
+ ibool nulls_unequal,
+ /* in: TRUE if this is for index statistics
+ cardinality estimation, and innodb_stats_method
+ is "nulls_unequal" or "nulls_ignored" */
ulint* matched_fields, /*!< in/out: number of already completely
matched fields; when the function returns,
contains the value the for current
comparison */
- ulint* matched_bytes, /*!< in/out: number of already matched
+ ulint* matched_bytes);/*!< in/out: number of already matched
bytes within the first field not completely
matched; when the function returns, contains
the value for the current comparison */
- ulint stats_method);
/*************************************************************//**
This function is used to compare two physical records. Only the common
first fields are compared.
diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic
index d5185ec94af..63415fe7837 100644
--- a/storage/xtradb/include/rem0cmp.ic
+++ b/storage/xtradb/include/rem0cmp.ic
@@ -87,5 +87,5 @@ cmp_rec_rec(
ulint match_b = 0;
return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
- &match_f, &match_b, 0));
+ FALSE, &match_f, &match_b));
}
diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h
index 17d08afabb9..10b74d18c13 100644
--- a/storage/xtradb/include/rem0rec.h
+++ b/storage/xtradb/include/rem0rec.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -480,6 +480,18 @@ ulint
rec_offs_any_extern(
/*================*/
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_BLOB_NULL_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+ __attribute__((nonnull, warn_unused_result));
+#endif /* UNIV_BLOB_NULL_DEBUG */
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
@@ -600,6 +612,7 @@ ulint
rec_offs_size(
/*==========*/
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the start of the record.
@return pointer to start */
@@ -607,7 +620,7 @@ UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- rec_t* rec, /*!< in: pointer to record */
+ const rec_t* rec, /*!< in: pointer to record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
/**********************************************************//**
Returns a pointer to the end of the record.
@@ -616,8 +629,12 @@ UNIV_INLINE
byte*
rec_get_end(
/*========*/
- rec_t* rec, /*!< in: pointer to record */
+ const rec_t* rec, /*!< in: pointer to record */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
+#else /* UNIV_DEBUG */
+# define rec_get_start(rec, offsets) ((rec) - rec_offs_extra_size(offsets))
+# define rec_get_end(rec, offsets) ((rec) + rec_offs_data_size(offsets))
+#endif /* UNIV_DEBUG */
/***************************************************************//**
Copies a physical record to a buffer.
@return pointer to the origin of the copy */
@@ -659,7 +676,7 @@ rec_fold(
fields to fold */
ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
- dulint tree_id) /*!< in: index tree id */
+ index_id_t tree_id) /*!< in: index tree id */
__attribute__((pure));
#endif /* !UNIV_HOTBACKUP */
/*********************************************************//**
@@ -801,13 +818,11 @@ UNIV_INTERN
void
rec_print(
/*======*/
- FILE* file, /*!< in: file where to print */
- const rec_t* rec, /*!< in: physical record */
- dict_index_t* index); /*!< in: record descriptor */
+ FILE* file, /*!< in: file where to print */
+ const rec_t* rec, /*!< in: physical record */
+ const dict_index_t* index); /*!< in: record descriptor */
#endif /* UNIV_HOTBACKUP */
-#define REC_INFO_BITS 6 /* This is single byte bit-field */
-
/* Maximum lengths for the data in a physical record if the offsets
are given in one byte (resp. two byte) format. */
#define REC_1BYTE_OFFS_LIMIT 0x7FUL
diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic
index fa96c97f95e..7cd203f131a 100644
--- a/storage/xtradb/include/rem0rec.ic
+++ b/storage/xtradb/include/rem0rec.ic
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2011, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,6 +26,7 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
#include "dict0dict.h"
+#include "btr0types.h"
/* Compact flag ORed to the extra size returned by rec_get_offsets() */
#define REC_OFFS_COMPACT ((ulint) 1 << 31)
@@ -1087,6 +1088,44 @@ rec_offs_any_extern(
return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL));
}
+#ifdef UNIV_BLOB_NULL_DEBUG
+/******************************************************//**
+Determine if the offsets are for a record containing null BLOB pointers.
+@return first field containing a null BLOB pointer, or NULL if none found */
+UNIV_INLINE
+const byte*
+rec_offs_any_null_extern(
+/*=====================*/
+ const rec_t* rec, /*!< in: record */
+ const ulint* offsets) /*!< in: rec_get_offsets(rec) */
+{
+ ulint i;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
+ if (!rec_offs_any_extern(offsets)) {
+ return(NULL);
+ }
+
+ for (i = 0; i < rec_offs_n_fields(offsets); i++) {
+ if (rec_offs_nth_extern(offsets, i)) {
+ ulint len;
+ const byte* field
+ = rec_get_nth_field(rec, offsets, i, &len);
+
+ ut_a(len >= BTR_EXTERN_FIELD_REF_SIZE);
+ if (!memcmp(field + len
+ - BTR_EXTERN_FIELD_REF_SIZE,
+ field_ref_zero,
+ BTR_EXTERN_FIELD_REF_SIZE)) {
+ return(field);
+ }
+ }
+ }
+
+ return(NULL);
+}
+#endif /* UNIV_BLOB_NULL_DEBUG */
+
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
@@ -1462,6 +1501,7 @@ rec_offs_size(
return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
+#ifdef UNIV_DEBUG
/**********************************************************//**
Returns a pointer to the end of the record.
@return pointer to end */
@@ -1469,11 +1509,11 @@ UNIV_INLINE
byte*
rec_get_end(
/*========*/
- rec_t* rec, /*!< in: pointer to record */
+ const rec_t* rec, /*!< in: pointer to record */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(rec + rec_offs_data_size(offsets));
+ return((rec_t*) rec + rec_offs_data_size(offsets));
}
/**********************************************************//**
@@ -1483,12 +1523,13 @@ UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- rec_t* rec, /*!< in: pointer to record */
+ const rec_t* rec, /*!< in: pointer to record */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
ut_ad(rec_offs_validate(rec, NULL, offsets));
- return(rec - rec_offs_extra_size(offsets));
+ return((rec_t*) rec - rec_offs_extra_size(offsets));
}
+#endif /* UNIV_DEBUG */
/***************************************************************//**
Copies a physical record to a buffer.
@@ -1594,7 +1635,7 @@ rec_fold(
fields to fold */
ulint n_bytes, /*!< in: number of bytes to fold
in an incomplete last field */
- dulint tree_id) /*!< in: index tree id */
+ index_id_t tree_id) /*!< in: index tree id */
{
ulint i;
const byte* data;
@@ -1618,7 +1659,7 @@ rec_fold(
n_bytes = 0;
}
- fold = ut_fold_dulint(tree_id);
+ fold = ut_fold_ull(tree_id);
for (i = 0; i < n_fields; i++) {
data = rec_get_nth_field(rec, offsets, i, &len);
diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h
index 8b84d4af233..7afd595be90 100644
--- a/storage/xtradb/include/rem0types.h
+++ b/storage/xtradb/include/rem0types.h
@@ -34,13 +34,21 @@ typedef byte rec_t;
#define REC_MAX_HEAP_NO (2 * 8192 - 1)
#define REC_MAX_N_OWNED (16 - 1)
-/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
-indexed column length (or indexed prefix length). It is set to 3*256,
-so that one can create a column prefix index on 256 characters of a
-TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
-a character may take at most 3 bytes.
+/* REC_ANTELOPE_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed field length (or indexed prefix length) for indexes on tables of
+ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT format.
+Before we support UTF-8 encodings with mbmaxlen = 4, a UTF-8 character
+may take at most 3 bytes. So the limit was set to 3*256, so that one
+can create a column prefix index on 256 characters of a TEXT or VARCHAR
+column also in the UTF-8 charset.
This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
-#define REC_MAX_INDEX_COL_LEN 768
+#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
+
+/** Maximum indexed field length for table format DICT_TF_FORMAT_ZIP and
+beyond.
+This (3072) is the maximum index row length allowed, so we cannot create index
+prefix column longer than that. */
+#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
#endif
diff --git a/storage/xtradb/include/row0ext.h b/storage/xtradb/include/row0ext.h
index 43d82d644e6..557da2c4a82 100644
--- a/storage/xtradb/include/row0ext.h
+++ b/storage/xtradb/include/row0ext.h
@@ -30,6 +30,7 @@ Created September 2006 Marko Makela
#include "row0types.h"
#include "data0types.h"
#include "mem0mem.h"
+#include "dict0types.h"
/********************************************************************//**
Creates a cache of column prefixes of externally stored columns.
@@ -43,13 +44,13 @@ row_ext_create(
in the InnoDB table object, as reported by
dict_col_get_no(); NOT relative to the records
in the clustered index */
+ ulint flags, /*!< in: table->flags */
const dtuple_t* tuple, /*!< in: data tuple containing the field
references of the externally stored
columns; must be indexed by col_no;
the clustered index record must be
covered by a lock or a page latch
to prevent deletion (rollback or purge). */
- ulint zip_size,/*!< compressed page size in bytes, or 0 */
mem_heap_t* heap); /*!< in: heap where created */
/********************************************************************//**
@@ -63,7 +64,8 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len); /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most the length determined by
+ DICT_MAX_FIELD_LEN_BY_FORMAT() */
/********************************************************************//**
Looks up a column prefix of an externally stored column.
@return column prefix, or NULL if the column is not stored externally,
@@ -78,13 +80,18 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len); /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most the length determined by
+ DICT_MAX_FIELD_LEN_BY_FORMAT() */
/** Prefixes of externally stored columns */
struct row_ext_struct{
ulint n_ext; /*!< number of externally stored columns */
const ulint* ext; /*!< col_no's of externally stored columns */
byte* buf; /*!< backing store of the column prefix cache */
+ ulint max_len;/*!< maximum prefix length, it could be
+ REC_ANTELOPE_MAX_INDEX_COL_LEN or
+ REC_VERSION_56_MAX_INDEX_COL_LEN depending
+ on row format */
ulint len[1]; /*!< prefix lengths; 0 if not cached */
};
diff --git a/storage/xtradb/include/row0ext.ic b/storage/xtradb/include/row0ext.ic
index 82771a9312a..466046b2821 100644
--- a/storage/xtradb/include/row0ext.ic
+++ b/storage/xtradb/include/row0ext.ic
@@ -37,7 +37,7 @@ row_ext_lookup_ith(
const row_ext_t* ext, /*!< in/out: column prefix cache */
ulint i, /*!< in: index of ext->ext[] */
ulint* len) /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most ext->max_len */
{
ut_ad(ext);
ut_ad(len);
@@ -45,11 +45,14 @@ row_ext_lookup_ith(
*len = ext->len[i];
+ ut_ad(*len <= ext->max_len);
+ ut_ad(ext->max_len > 0);
+
if (UNIV_UNLIKELY(*len == 0)) {
/* The BLOB could not be fetched to the cache. */
return(field_ref_zero);
} else {
- return(ext->buf + i * REC_MAX_INDEX_COL_LEN);
+ return(ext->buf + i * ext->max_len);
}
}
@@ -67,7 +70,7 @@ row_ext_lookup(
dict_col_get_no(); NOT relative to the
records in the clustered index */
ulint* len) /*!< out: length of prefix, in bytes,
- at most REC_MAX_INDEX_COL_LEN */
+ at most ext->max_len */
{
ulint i;
diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h
index 9f93565ddb7..810973e61a7 100644
--- a/storage/xtradb/include/row0ins.h
+++ b/storage/xtradb/include/row0ins.h
@@ -84,9 +84,10 @@ ulint
row_ins_index_entry(
/*================*/
dict_index_t* index, /*!< in: index */
- dtuple_t* entry, /*!< in: index entry to insert */
+ dtuple_t* entry, /*!< in/out: index entry to insert */
ulint n_ext, /*!< in: number of externally stored columns */
- ibool foreign,/*!< in: TRUE=check foreign key constraints */
+ ibool foreign,/*!< in: TRUE=check foreign key constraints
+ (foreign=FALSE only during CREATE INDEX) */
que_thr_t* thr); /*!< in: query thread */
/***********************************************************//**
Inserts a row to a table. This is a high-level function used in
diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h
index fbeb125ce7b..be7c77e7724 100644
--- a/storage/xtradb/include/row0merge.h
+++ b/storage/xtradb/include/row0merge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h
index 9090e476bfd..ff00f031e91 100644
--- a/storage/xtradb/include/row0mysql.h
+++ b/storage/xtradb/include/row0mysql.h
@@ -103,6 +103,17 @@ row_mysql_read_blob_ref(
ulint col_len); /*!< in: BLOB reference length
(not BLOB length) */
/**************************************************************//**
+Pad a column with spaces. */
+UNIV_INTERN
+void
+row_mysql_pad_col(
+/*==============*/
+ ulint mbminlen, /*!< in: minimum size of a character,
+ in bytes */
+ byte* pad, /*!< out: padded buffer */
+ ulint len); /*!< in: number of bytes to pad */
+
+/**************************************************************//**
Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
The counterpart of this function is row_sel_field_store_in_mysql_format() in
row0sel.c.
@@ -384,6 +395,14 @@ row_insert_stats_for_mysql(
dict_index_t* index,
trx_t* trx);
/*********************************************************************//**
+*/
+UNIV_INTERN
+int
+row_delete_stats_for_mysql(
+/*=======================*/
+ dict_index_t* index,
+ trx_t* trx);
+/*********************************************************************//**
Scans a table create SQL string and adds to the data dictionary
the foreign key constraints declared in the string. This function
should be called after the indexes for a table have been created.
@@ -535,6 +554,10 @@ struct mysql_row_templ_struct {
Innobase record in the current index;
not defined if template_type is
ROW_MYSQL_WHOLE_ROW */
+ ulint clust_rec_field_no; /*!< field number of the column in an
+ Innobase record in the clustered index;
+ not defined if template_type is
+ ROW_MYSQL_WHOLE_ROW */
ulint mysql_col_offset; /*!< offset of the column in the MySQL
row format */
ulint mysql_col_len; /*!< length of the column in the MySQL
@@ -573,7 +596,7 @@ struct mysql_row_templ_struct {
#define ROW_PREBUILT_ALLOCATED 78540783
#define ROW_PREBUILT_FREED 26423527
-typedef int (*index_cond_func_t)(void *param);
+typedef int (*idx_cond_func_t)(void *param);
/** A struct for (sometimes lazily) prebuilt structures in an Innobase table
handle used within MySQL; these are used to save CPU time. */
@@ -774,7 +797,7 @@ struct row_prebuilt_struct {
ulint magic_n2; /*!< this should be the same as
magic_n */
/*----------------------*/
- index_cond_func_t idx_cond_func;/* Index Condition Pushdown function,
+ idx_cond_func_t idx_cond_func; /* Index Condition Pushdown function,
or NULL if there is none set */
void* idx_cond_func_arg;/* ICP function argument */
ulint n_index_fields; /* Number of fields at the start of
diff --git a/storage/xtradb/include/row0purge.h b/storage/xtradb/include/row0purge.h
index 89ec54fb54a..485d51dbc83 100644
--- a/storage/xtradb/include/row0purge.h
+++ b/storage/xtradb/include/row0purge.h
@@ -45,6 +45,28 @@ row_purge_node_create(
que_thr_t* parent, /*!< in: parent node, i.e., a thr node */
mem_heap_t* heap); /*!< in: memory heap where created */
/***********************************************************//**
+Determines if it is possible to remove a secondary index entry.
+Removal is possible if the secondary index entry does not refer to any
+not delete marked version of a clustered index record where DB_TRX_ID
+is newer than the purge view.
+
+NOTE: This function should only be called by the purge thread, only
+while holding a latch on the leaf page of the secondary index entry
+(or keeping the buffer pool watch on the page). It is possible that
+this function first returns TRUE and then FALSE, if a user transaction
+inserts a record that the secondary index entry would refer to.
+However, in that case, the user transaction would also re-insert the
+secondary index entry after purge has removed it and released the leaf
+page latch.
+@return TRUE if the secondary index record can be purged */
+UNIV_INTERN
+ibool
+row_purge_poss_sec(
+/*===============*/
+ purge_node_t* node, /*!< in/out: row purge node */
+ dict_index_t* index, /*!< in: secondary index */
+ const dtuple_t* entry); /*!< in: secondary index entry */
+/***************************************************************
Does the purge operation for a single undo log record. This is a high-level
function used in an SQL execution graph.
@return query thread to run next or NULL */
diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h
index 723b7b53395..75e15d67246 100644
--- a/storage/xtradb/include/row0row.h
+++ b/storage/xtradb/include/row0row.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -41,13 +41,24 @@ Created 4/20/1996 Heikki Tuuri
Gets the offset of the trx id field, in bytes relative to the origin of
a clustered index record.
@return offset of DATA_TRX_ID */
-UNIV_INTERN
+UNIV_INLINE
ulint
-row_get_trx_id_offset(
-/*==================*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+row_get_trx_id_offset_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const rec_t* rec, /*!< in: record */
+#endif /* UNIV_DEBUG */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
+#ifdef UNIV_DEBUG
+# define row_get_trx_id_offset(rec, index, offsets) \
+ row_get_trx_id_offset_func(rec, index, offsets)
+#else /* UNIV_DEBUG */
+# define row_get_trx_id_offset(rec, index, offsets) \
+ row_get_trx_id_offset_func(index, offsets)
+#endif /* UNIV_DEBUG */
+
/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
@@ -55,9 +66,10 @@ UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets);/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ __attribute__((nonnull, warn_unused_result));
/*********************************************************************//**
Reads the roll pointer field from a clustered index record.
@return value of the field */
@@ -257,11 +269,25 @@ row_get_clust_rec(
dict_index_t* index, /*!< in: secondary index */
dict_index_t** clust_index,/*!< out: clustered index */
mtr_t* mtr); /*!< in: mtr */
+
+/** Result of row_search_index_entry */
+enum row_search_result {
+ ROW_FOUND = 0, /*!< the record was found */
+ ROW_NOT_FOUND, /*!< record not found */
+ ROW_BUFFERED, /*!< one of BTR_INSERT, BTR_DELETE, or
+ BTR_DELETE_MARK was specified, the
+ secondary index leaf page was not in
+ the buffer pool, and the operation was
+ enqueued in the insert/delete buffer */
+ ROW_NOT_DELETED_REF /*!< BTR_DELETE was specified, and
+ row_purge_poss_sec() failed */
+};
+
/***************************************************************//**
Searches an index record.
-@return TRUE if found */
+@return whether the record was found or buffered */
UNIV_INTERN
-ibool
+enum row_search_result
row_search_index_entry(
/*===================*/
dict_index_t* index, /*!< in: index */
diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic
index 05c007641af..9d19e430e16 100644
--- a/storage/xtradb/include/row0row.ic
+++ b/storage/xtradb/include/row0row.ic
@@ -28,15 +28,45 @@ Created 4/20/1996 Heikki Tuuri
#include "trx0undo.h"
/*********************************************************************//**
+Gets the offset of trx id field, in bytes relative to the origin of
+a clustered index record.
+@return offset of DATA_TRX_ID */
+UNIV_INLINE
+ulint
+row_get_trx_id_offset_func(
+/*=======================*/
+#ifdef UNIV_DEBUG
+ const rec_t* rec, /*!< in: record */
+#endif /* UNIV_DEBUG */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+{
+ ulint pos;
+ ulint offset;
+ ulint len;
+
+ ut_ad(dict_index_is_clust(index));
+ ut_ad(rec_offs_validate(rec, index, offsets));
+
+ pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID);
+
+ offset = rec_get_nth_field_offs(offsets, pos, &len);
+
+ ut_ad(len == DATA_TRX_ID_LEN);
+
+ return(offset);
+}
+
+/*********************************************************************//**
Reads the trx id field from a clustered index record.
@return value of the field */
UNIV_INLINE
trx_id_t
row_get_rec_trx_id(
/*===============*/
- const rec_t* rec, /*!< in: record */
- dict_index_t* index, /*!< in: clustered index */
- const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
+ const rec_t* rec, /*!< in: record */
+ const dict_index_t* index, /*!< in: clustered index */
+ const ulint* offsets)/*!< in: rec_get_offsets(rec, index) */
{
ulint offset;
diff --git a/storage/xtradb/include/row0types.h b/storage/xtradb/include/row0types.h
index 1be729206ba..7d6a7c8e2b1 100644
--- a/storage/xtradb/include/row0types.h
+++ b/storage/xtradb/include/row0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h
index 635d746d5a1..c275c1da78e 100644
--- a/storage/xtradb/include/row0upd.h
+++ b/storage/xtradb/include/row0upd.h
@@ -126,13 +126,13 @@ UNIV_INTERN
void
row_upd_index_entry_sys_field(
/*==========================*/
- const dtuple_t* entry, /*!< in: index entry, where the memory buffers
- for sys fields are already allocated:
+ dtuple_t* entry, /*!< in/out: index entry, where the memory
+ buffers for sys fields are already allocated:
the function just copies the new values to
them */
dict_index_t* index, /*!< in: clustered index */
ulint type, /*!< in: DATA_TRX_ID or DATA_ROLL_PTR */
- dulint val); /*!< in: value to write */
+ ib_uint64_t val); /*!< in: value to write */
/*********************************************************************//**
Creates an update node for a query graph.
@return own: update node */
@@ -167,8 +167,11 @@ row_upd_changes_field_size_or_external(
const upd_t* update);/*!< in: update vector */
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
-Replaces the new column values stored in the update vector to the record
-given. No field size changes are allowed. */
+Replaces the new column values stored in the update vector to the
+record given. No field size changes are allowed. This function is
+usually invoked on a clustered index. The only use case for a
+secondary index is row_ins_sec_index_entry_by_modify() or its
+counterpart in ibuf_insert_to_index_page(). */
UNIV_INTERN
void
row_upd_rec_in_place(
@@ -277,16 +280,29 @@ NOTE: we compare the fields as binary strings!
@return TRUE if update vector changes an ordering field in the index record */
UNIV_INTERN
ibool
-row_upd_changes_ord_field_binary(
-/*=============================*/
+row_upd_changes_ord_field_binary_func(
+/*==================================*/
+ dict_index_t* index, /*!< in: index of the record */
+ const upd_t* update, /*!< in: update vector for the row; NOTE: the
+ field numbers in this MUST be clustered index
+ positions! */
+#ifdef UNIV_DEBUG
+ const que_thr_t*thr, /*!< in: query thread */
+#endif /* UNIV_DEBUG */
const dtuple_t* row, /*!< in: old value of row, or NULL if the
row and the data values in update are not
known when this function is called, e.g., at
compile time */
- dict_index_t* index, /*!< in: index of the record */
- const upd_t* update);/*!< in: update vector for the row; NOTE: the
- field numbers in this MUST be clustered index
- positions! */
+ const row_ext_t*ext) /*!< NULL, or prefixes of the externally
+ stored columns in the old row */
+ __attribute__((nonnull(1,2), warn_unused_result));
+#ifdef UNIV_DEBUG
+# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
+ row_upd_changes_ord_field_binary_func(index,update,thr,row,ext)
+#else /* UNIV_DEBUG */
+# define row_upd_changes_ord_field_binary(index,update,thr,row,ext) \
+ row_upd_changes_ord_field_binary_func(index,update,row,ext)
+#endif /* UNIV_DEBUG */
/***********************************************************//**
Checks if an update vector changes an ordering field of an index record.
This function is fast if the update vector is short or the number of ordering
@@ -459,11 +475,16 @@ struct upd_node_struct{
#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be
inserted, old record is already delete
marked */
-#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered
+#define UPD_NODE_INSERT_BLOB 4 /* clustered index record should be
+ inserted, old record is already
+ delete-marked; non-updated BLOBs
+ should be inherited by the new record
+ and disowned by the old record */
+#define UPD_NODE_UPDATE_ALL_SEC 5 /* an ordering field of the clustered
index record was changed, or this is
a delete operation: should update
all the secondary index records */
-#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be
+#define UPD_NODE_UPDATE_SOME_SEC 6 /* secondary index entries should be
looked at and updated if an ordering
field changed */
diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic
index 18e22f1eca9..2968a548b34 100644
--- a/storage/xtradb/include/row0upd.ic
+++ b/storage/xtradb/include/row0upd.ic
@@ -158,7 +158,7 @@ row_upd_rec_sys_fields(
ut_ad(dict_index_is_clust(index));
ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
- if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) {
+ if (!rw_lock_own(btr_search_get_latch(index->id), RW_LOCK_EX)) {
ut_ad(!buf_block_align(rec)->is_hashed);
}
#endif /* UNIV_SYNC_DEBUG */
diff --git a/storage/xtradb/include/srv0que.h b/storage/xtradb/include/srv0que.h
deleted file mode 100644
index 82ee7739ef7..00000000000
--- a/storage/xtradb/include/srv0que.h
+++ /dev/null
@@ -1,42 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/srv0que.h
-Server query execution
-
-Created 6/5/1996 Heikki Tuuri
-*******************************************************/
-
-#ifndef srv0que_h
-#define srv0que_h
-
-#include "univ.i"
-#include "que0types.h"
-
-/**********************************************************************//**
-Enqueues a task to server task queue and releases a worker thread, if there
-is a suspended one. */
-UNIV_INTERN
-void
-srv_que_task_enqueue_low(
-/*=====================*/
- que_thr_t* thr); /*!< in: query thread */
-
-#endif
-
diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h
index 8c64d5cee71..ebd83043040 100644
--- a/storage/xtradb/include/srv0srv.h
+++ b/storage/xtradb/include/srv0srv.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, 2009, Google Inc.
Copyright (c) 2009, Percona Inc.
@@ -60,6 +60,15 @@ extern os_event_t srv_lock_timeout_thread_event;
/* This event is set to tell the purge thread to shut down */
extern os_event_t srv_purge_thread_event;
+/* The monitor thread waits on this event. */
+extern os_event_t srv_monitor_event;
+
+/* The lock timeout thread waits on this event. */
+extern os_event_t srv_timeout_event;
+
+/* The error monitor thread waits on this event. */
+extern os_event_t srv_error_event;
+
/* If the last data file is auto-extended, we add this many pages to it
at a time */
#define SRV_AUTO_EXTEND_INCREMENT \
@@ -68,9 +77,6 @@ at a time */
/* prototypes for new functions added to ha_innodb.cc */
ibool innobase_get_slow_log();
-/* This is set to TRUE if the MySQL user has set it in MySQL */
-extern ibool srv_lower_case_table_names;
-
/* Mutex for locking srv_monitor_file */
extern mutex_t srv_monitor_file_mutex;
/* Temporary file for innodb monitor output */
@@ -107,12 +113,20 @@ extern ulint srv_file_format;
/** Whether to check file format during startup. A value of
DICT_TF_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
set it to the highest format we support. */
-extern ulint srv_check_file_format_at_startup;
+extern ulint srv_max_file_format_at_startup;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
#endif /* !UNIV_HOTBACKUP */
+/* If this flag is TRUE, then we will use the native aio of the
+OS (provided we compiled Innobase with it in), otherwise we will
+use simulated aio we build below with threads.
+Currently we support native aio on windows and linux */
+extern my_bool srv_use_native_aio;
+#ifdef __WIN__
+extern ibool srv_use_native_conditions;
+#endif
extern ulint srv_n_data_files;
extern char** srv_data_file_names;
extern ulint* srv_data_file_sizes;
@@ -120,12 +134,8 @@ extern ulint* srv_data_file_is_raw_partition;
extern char* srv_doublewrite_file;
-extern ibool srv_extra_undoslots;
-
extern ibool srv_recovery_stats;
-extern ulint srv_use_purge_thread;
-
extern ibool srv_auto_extend_last_data_file;
extern ulint srv_last_file_size_max;
extern char** srv_log_group_home_dirs;
@@ -138,7 +148,8 @@ extern ulint srv_n_log_groups;
extern ulint srv_n_log_files;
extern ulint srv_log_file_size;
extern ulint srv_log_buffer_size;
-extern ulong srv_flush_log_at_trx_commit;
+//extern ulong srv_flush_log_at_trx_commit;
+extern char srv_use_global_flush_log_at_trx_commit;
extern char srv_adaptive_flushing;
@@ -154,15 +165,12 @@ extern my_bool srv_use_sys_malloc;
extern ibool srv_use_sys_malloc;
#endif /* UNIV_HOTBACKUP */
extern ulint srv_buf_pool_size; /*!< requested size in bytes */
+extern ulint srv_buf_pool_instances; /*!< requested number of buffer pool instances */
extern ulint srv_buf_pool_old_size; /*!< previously requested size */
extern ulint srv_buf_pool_curr_size; /*!< current size in bytes */
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
-extern uint srv_buffer_pool_shm_key;
-extern ibool srv_buffer_pool_shm_is_reused;
-extern ibool srv_buffer_pool_shm_checksum;
-
extern ibool srv_thread_concurrency_timer_based;
extern ulint srv_n_file_io_threads;
@@ -177,10 +185,15 @@ capacity. PCT_IO(5) -> returns the number of IO operations that
is 5% of the max where max is srv_io_capacity. */
#define PCT_IO(p) ((ulong) (srv_io_capacity * ((double) p / 100.0)))
+/* The "innodb_stats_method" setting, decides how InnoDB is going
+to treat NULL value when collecting statistics. It is not defined
+as enum type because the configure option takes unsigned integer type. */
+extern ulong srv_innodb_stats_method;
+
#ifdef UNIV_LOG_ARCHIVE
-extern ibool srv_log_archive_on;
-extern ibool srv_archive_recovery;
-extern dulint srv_archive_recovery_limit_lsn;
+extern ibool srv_log_archive_on;
+extern ibool srv_archive_recovery;
+extern ib_uint64_t srv_archive_recovery_limit_lsn;
#endif /* UNIV_LOG_ARCHIVE */
extern char* srv_file_flush_method_str;
@@ -208,11 +221,7 @@ extern ulint srv_fast_shutdown; /* If this is 1, do not do a
extern ibool srv_innodb_status;
extern unsigned long long srv_stats_sample_pages;
-extern ulong srv_stats_method;
-#define SRV_STATS_METHOD_NULLS_EQUAL 0
-#define SRV_STATS_METHOD_NULLS_NOT_EQUAL 1
-#define SRV_STATS_METHOD_IGNORE_NULLS 2
-extern ulong srv_stats_auto_update;
+extern ulint srv_stats_auto_update;
extern ulint srv_stats_update_need_lock;
extern ibool srv_use_sys_stats_table;
@@ -220,9 +229,6 @@ extern ibool srv_use_doublewrite_buf;
extern ibool srv_use_checksums;
extern ibool srv_fast_checksum;
-extern ibool srv_set_thread_priorities;
-extern int srv_query_thread_priority;
-
extern ulong srv_max_buf_pool_modified_pct;
extern ulong srv_max_purge_lag;
@@ -232,16 +238,17 @@ extern long long srv_ibuf_max_size;
extern ulong srv_ibuf_active_contract;
extern ulong srv_ibuf_accel_rate;
extern ulint srv_checkpoint_age_target;
-extern ulong srv_flush_neighbor_pages;
-extern ulong srv_enable_unsafe_group_commit;
-extern ulong srv_read_ahead;
-extern ulong srv_adaptive_checkpoint;
+extern ulint srv_flush_neighbor_pages;
+extern ulint srv_enable_unsafe_group_commit;
+extern ulint srv_read_ahead;
+extern ulint srv_adaptive_flushing_method;
extern ulong srv_expand_import;
extern ulint srv_pass_corrupt_table;
-extern ulong srv_extra_rsegments;
-extern ulong srv_dict_size_limit;
+extern ulint srv_dict_size_limit;
+
+extern ulint srv_lazy_drop_table;
/*-------------------------------------------*/
extern ulint srv_n_rows_inserted;
@@ -265,6 +272,8 @@ extern ulong srv_thread_sleep_delay;
extern ulong srv_spin_wait_delay;
extern ibool srv_priority_boost;
+extern ulint srv_truncated_status_writes;
+
extern ulint srv_mem_pool_size;
extern ulint srv_lock_table_size;
@@ -315,6 +324,15 @@ extern ulint srv_os_log_pending_writes;
log buffer and have to flush it */
extern ulint srv_log_waits;
+/* the number of purge threads to use from the worker pool (currently 0 or 1) */
+extern ulong srv_n_purge_threads;
+
+/* the number of pages to purge in one batch */
+extern ulong srv_purge_batch_size;
+
+/* the number of rollback segments to use */
+extern ulong srv_rollback_segments;
+
/* variable that counts amount of data read in total (in bytes) */
extern ulint srv_data_read;
@@ -341,6 +359,8 @@ extern ulint srv_buf_pool_wait_free;
buffer pool to disk */
extern ulint srv_buf_pool_flushed;
+extern ulint buf_lru_flush_page_count;
+
/** Number of buffer pool reads that led to the
reading of a disk page */
extern ulint srv_buf_pool_reads;
@@ -359,6 +379,38 @@ typedef struct srv_sys_struct srv_sys_t;
/** The server system */
extern srv_sys_t* srv_sys;
+
+# ifdef UNIV_PFS_THREAD
+/* Keys to register InnoDB threads with performance schema */
+extern mysql_pfs_key_t trx_rollback_clean_thread_key;
+extern mysql_pfs_key_t io_handler_thread_key;
+extern mysql_pfs_key_t srv_lock_timeout_thread_key;
+extern mysql_pfs_key_t srv_error_monitor_thread_key;
+extern mysql_pfs_key_t srv_monitor_thread_key;
+extern mysql_pfs_key_t srv_master_thread_key;
+extern mysql_pfs_key_t srv_purge_thread_key;
+
+/* This macro register the current thread and its key with performance
+schema */
+# define pfs_register_thread(key) \
+do { \
+ if (PSI_server) { \
+ struct PSI_thread* psi = PSI_server->new_thread(key, NULL, 0);\
+ if (psi) { \
+ PSI_server->set_thread(psi); \
+ } \
+ } \
+} while (0)
+
+/* This macro delist the current thread from performance schema */
+# define pfs_delete_thread() \
+do { \
+ if (PSI_server) { \
+ PSI_server->delete_current_thread(); \
+ } \
+} while (0)
+# endif /* UNIV_PFS_THREAD */
+
#endif /* !UNIV_HOTBACKUP */
/** Types of raw partitions in innodb_data_file_path */
@@ -380,7 +432,7 @@ enum {
SRV_UNIX_NOSYNC, /*!< do not flush after writing */
SRV_UNIX_O_DIRECT, /*!< invoke os_file_set_nocache() on
data files */
- SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
+ SRV_UNIX_ALL_O_DIRECT /* new method for examination: logfile also open O_DIRECT */
};
/** Alternatives for file i/o in Windows */
@@ -413,21 +465,24 @@ enum {
in connection with recovery */
};
+/* Alternatives for srv_innodb_stats_method, which could be changed by
+setting innodb_stats_method */
+enum srv_stats_method_name_enum {
+ SRV_STATS_NULLS_EQUAL, /* All NULL values are treated as
+ equal. This is the default setting
+ for innodb_stats_method */
+ SRV_STATS_NULLS_UNEQUAL, /* All NULL values are treated as
+ NOT equal. */
+ SRV_STATS_NULLS_IGNORED /* NULL values are ignored */
+};
+
+typedef enum srv_stats_method_name_enum srv_stats_method_name_t;
+
#ifndef UNIV_HOTBACKUP
/** Types of threads existing in the system. */
enum srv_thread_type {
- SRV_COM = 1, /**< threads serving communication and queries */
- SRV_CONSOLE, /**< thread serving console */
- SRV_WORKER, /**< threads serving parallelized queries and
+ SRV_WORKER = 0, /**< threads serving parallelized queries and
queries released from lock wait */
-#if 0
- /* Utility threads */
- SRV_BUFFER, /**< thread flushing dirty buffer blocks */
- SRV_RECOVERY, /**< threads finishing a recovery */
- SRV_INSERT, /**< thread flushing the insert buffer to disk */
-#endif
- SRV_PURGE, /* thread purging undo records */
- SRV_PURGE_WORKER, /* thread purging undo records */
SRV_MASTER /**< the master thread, (whose type number must
be biggest) */
};
@@ -466,12 +521,13 @@ ulint
srv_get_n_threads(void);
/*===================*/
/*********************************************************************//**
-Returns the calling thread type.
-@return SRV_COM, ... */
-
-enum srv_thread_type
-srv_get_thread_type(void);
-/*=====================*/
+Check whether thread type has reserved a slot.
+@return slot number or UNDEFINED if not found*/
+UNIV_INTERN
+ulint
+srv_thread_has_reserved_slot(
+/*=========================*/
+ enum srv_thread_type type); /*!< in: thread type to check */
/*********************************************************************//**
Sets the info describing an i/o thread current state. */
UNIV_INTERN
@@ -501,21 +557,12 @@ srv_master_thread(
/*==============*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
-/*************************************************************************
-The undo purge thread. */
-UNIV_INTERN
-os_thread_ret_t
-srv_purge_thread(
-/*=============*/
- void* arg); /* in: a dummy parameter required by
- os_thread_create */
-/*************************************************************************
-The undo purge thread. */
+/*******************************************************************//**
+Wakes up the purge thread if it's not already awake. */
UNIV_INTERN
-os_thread_ret_t
-srv_purge_worker_thread(
-/*====================*/
- void* arg);
+void
+srv_wake_purge_thread(void);
+/*=======================*/
/*******************************************************************//**
Tells the Innobase server that there has been activity in the database
and wakes up the master thread if it is suspended (not sleeping). Used
@@ -532,6 +579,16 @@ UNIV_INTERN
void
srv_wake_master_thread(void);
/*========================*/
+/*******************************************************************//**
+Tells the purge thread that there has been activity in the database
+and wakes up the purge thread if it is suspended (not sleeping). Note
+that there is a small chance that the purge thread stays suspended
+(we do not protect our operation with the kernel mutex, for
+performace reasons). */
+UNIV_INTERN
+void
+srv_wake_purge_thread_if_not_active(void);
+/*=====================================*/
/*********************************************************************//**
Puts an OS thread to wait if there are too many concurrent threads
(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
@@ -648,14 +705,40 @@ void
srv_export_innodb_status(void);
/*==========================*/
-/** Thread slot in the thread table */
-typedef struct srv_slot_struct srv_slot_t;
+/*********************************************************************//**
+Asynchronous purge thread.
+@return a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_purge_thread(
+/*=============*/
+ void* arg __attribute__((unused))); /*!< in: a dummy parameter
+ required by os_thread_create */
-/** Thread table is an array of slots */
-typedef srv_slot_t srv_table_t;
+/**********************************************************************//**
+Enqueues a task to server task queue and releases a worker thread, if there
+is a suspended one. */
+UNIV_INTERN
+void
+srv_que_task_enqueue_low(
+/*=====================*/
+ que_thr_t* thr); /*!< in: query thread */
+
+/**********************************************************************//**
+Check whether any background thread is active.
+@return FALSE if all are are suspended or have exited. */
+UNIV_INTERN
+ibool
+srv_is_any_background_thread_active(void);
+/*======================================*/
/** Status variables to be passed to MySQL */
struct export_var_struct{
+ ulint innodb_adaptive_hash_cells;
+ ulint innodb_adaptive_hash_heap_buffers;
+ ulint innodb_adaptive_hash_hash_searches;
+ ulint innodb_adaptive_hash_non_hash_searches;
+ ulint innodb_background_log_sync;
ulint innodb_data_pending_reads; /*!< Pending reads */
ulint innodb_data_pending_writes; /*!< Pending writes */
ulint innodb_data_pending_fsyncs; /*!< Pending fsyncs */
@@ -673,20 +756,54 @@ struct export_var_struct{
#ifdef UNIV_DEBUG
ulint innodb_buffer_pool_pages_latched; /*!< Latched pages */
#endif /* UNIV_DEBUG */
+ ulint innodb_buffer_pool_pages_made_not_young;
+ ulint innodb_buffer_pool_pages_made_young;
+ ulint innodb_buffer_pool_pages_old;
ulint innodb_buffer_pool_read_requests; /*!< buf_pool->stat.n_page_gets */
ulint innodb_buffer_pool_reads; /*!< srv_buf_pool_reads */
ulint innodb_buffer_pool_wait_free; /*!< srv_buf_pool_wait_free */
ulint innodb_buffer_pool_pages_flushed; /*!< srv_buf_pool_flushed */
+ ulint innodb_buffer_pool_pages_LRU_flushed; /*!< buf_lru_flush_page_count */
ulint innodb_buffer_pool_write_requests;/*!< srv_buf_pool_write_requests */
ulint innodb_buffer_pool_read_ahead; /*!< srv_read_ahead */
ulint innodb_buffer_pool_read_ahead_evicted;/*!< srv_read_ahead evicted*/
- ulint innodb_deadlocks; /* ??? */
+ ulint innodb_checkpoint_age;
+ ulint innodb_checkpoint_max_age;
+ ulint innodb_checkpoint_target_age;
ulint innodb_dblwr_pages_written; /*!< srv_dblwr_pages_written */
ulint innodb_dblwr_writes; /*!< srv_dblwr_writes */
+ ulint innodb_deadlocks;
ibool innodb_have_atomic_builtins; /*!< HAVE_ATOMIC_BUILTINS */
+ ulint innodb_history_list_length;
+ ulint innodb_ibuf_size;
+ ulint innodb_ibuf_free_list;
+ ulint innodb_ibuf_segment_size;
+ ulint innodb_ibuf_merges;
+ ulint innodb_ibuf_merged_inserts;
+ ulint innodb_ibuf_merged_delete_marks;
+ ulint innodb_ibuf_merged_deletes;
+ ulint innodb_ibuf_discarded_inserts;
+ ulint innodb_ibuf_discarded_delete_marks;
+ ulint innodb_ibuf_discarded_deletes;
ulint innodb_log_waits; /*!< srv_log_waits */
ulint innodb_log_write_requests; /*!< srv_log_write_requests */
ulint innodb_log_writes; /*!< srv_log_writes */
+ ib_int64_t innodb_lsn_current;
+ ib_int64_t innodb_lsn_flushed;
+ ib_int64_t innodb_lsn_last_checkpoint;
+ ulint innodb_master_thread_1_second_loops;
+ ulint innodb_master_thread_10_second_loops;
+ ulint innodb_master_thread_background_loops;
+ ulint innodb_master_thread_main_flush_loops;
+ ulint innodb_master_thread_sleeps;
+ ib_int64_t innodb_max_trx_id;
+ ulint innodb_mem_adaptive_hash;
+ ulint innodb_mem_dictionary;
+ ulint innodb_mem_total;
+ ib_int64_t innodb_mutex_os_waits;
+ ib_int64_t innodb_mutex_spin_rounds;
+ ib_int64_t innodb_mutex_spin_waits;
+ ib_int64_t innodb_oldest_view_low_limit_trx_id;
ulint innodb_os_log_written; /*!< srv_os_log_written */
ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
@@ -695,6 +812,8 @@ struct export_var_struct{
ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read */
ulint innodb_pages_written; /*!< buf_pool->stat.n_pages_written */
+ ib_int64_t innodb_purge_trx_id;
+ ib_int64_t innodb_purge_undo_no;
ulint innodb_row_lock_waits; /*!< srv_n_lock_wait_count */
ulint innodb_row_lock_current_waits; /*!< srv_n_lock_wait_current_count */
ib_int64_t innodb_row_lock_time; /*!< srv_n_lock_wait_time
@@ -704,12 +823,26 @@ struct export_var_struct{
/ srv_n_lock_wait_count */
ulint innodb_row_lock_time_max; /*!< srv_n_lock_max_wait_time
/ 1000 */
+ ulint innodb_current_row_locks;
ulint innodb_rows_read; /*!< srv_n_rows_read */
ulint innodb_rows_inserted; /*!< srv_n_rows_inserted */
ulint innodb_rows_updated; /*!< srv_n_rows_updated */
ulint innodb_rows_deleted; /*!< srv_n_rows_deleted */
+ ulint innodb_truncated_status_writes; /*!< srv_truncated_status_writes */
+ ib_int64_t innodb_s_lock_os_waits;
+ ib_int64_t innodb_s_lock_spin_rounds;
+ ib_int64_t innodb_s_lock_spin_waits;
+ ib_int64_t innodb_x_lock_os_waits;
+ ib_int64_t innodb_x_lock_spin_rounds;
+ ib_int64_t innodb_x_lock_spin_waits;
};
+/** Thread slot in the thread table */
+typedef struct srv_slot_struct srv_slot_t;
+
+/** Thread table is an array of slots */
+typedef srv_slot_t srv_table_t;
+
/** The server system struct */
struct srv_sys_struct{
srv_table_t* threads; /*!< server thread table */
@@ -719,8 +852,9 @@ struct srv_sys_struct{
extern ulint srv_n_threads_active[];
#else /* !UNIV_HOTBACKUP */
-# define srv_use_checksums TRUE
# define srv_use_adaptive_hash_indexes FALSE
+# define srv_use_checksums TRUE
+# define srv_use_native_aio FALSE
# define srv_force_recovery 0UL
# define srv_set_io_thread_op_info(t,info) ((void) 0)
# define srv_is_being_started 0
diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h
index 8abf15da9c1..a7214743887 100644
--- a/storage/xtradb/include/srv0start.h
+++ b/storage/xtradb/include/srv0start.h
@@ -91,10 +91,6 @@ extern ib_uint64_t srv_shutdown_lsn;
/** Log sequence number immediately after startup */
extern ib_uint64_t srv_start_lsn;
-#ifdef __NETWARE__
-void set_panic_flag_for_netware(void);
-#endif
-
#ifdef HAVE_DARWIN_THREADS
/** TRUE if the F_FULLFSYNC option is available */
extern ibool srv_have_fullfsync;
@@ -131,4 +127,7 @@ extern enum srv_shutdown_state srv_shutdown_state;
/** Log 'spaces' have id's >= this */
#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL
+/** reserved for extra system tables */
+#define SRV_EXTRA_SYS_SPACE_FIRST_ID 0xFFFFFFE0UL
+
#endif
diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h
index 5f1280f5e28..6e931346238 100644
--- a/storage/xtradb/include/sync0arr.h
+++ b/storage/xtradb/include/sync0arr.h
@@ -115,8 +115,11 @@ Prints warnings of long semaphore waits to stderr.
@return TRUE if fatal semaphore wait threshold was exceeded */
UNIV_INTERN
ibool
-sync_array_print_long_waits(void);
-/*=============================*/
+sync_array_print_long_waits(
+/*========================*/
+ os_thread_id_t* waiter, /*!< out: longest waiting thread */
+ const void** sema) /*!< out: longest-waited-for semaphore */
+ __attribute__((nonnull));
/********************************************************************//**
Validates the integrity of the wait array. Checks
that the number of reserved cells equals the count variable. */
diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h
index 4edf93f4042..6420bc5d3bf 100644
--- a/storage/xtradb/include/sync0rw.h
+++ b/storage/xtradb/include/sync0rw.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -105,23 +105,142 @@ extern ib_int64_t rw_x_os_wait_count;
set only when UNIV_SYNC_PERF_STAT is defined */
extern ib_int64_t rw_x_exit_count;
+#ifdef UNIV_PFS_RWLOCK
+/* Following are rwlock keys used to register with MySQL
+performance schema */
+# ifdef UNIV_LOG_ARCHIVE
+extern mysql_pfs_key_t archive_lock_key;
+# endif /* UNIV_LOG_ARCHIVE */
+extern mysql_pfs_key_t btr_search_latch_key;
+extern mysql_pfs_key_t buf_pool_page_hash_key;
+extern mysql_pfs_key_t buf_block_lock_key;
+# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t buf_block_debug_latch_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t dict_operation_lock_key;
+extern mysql_pfs_key_t fil_space_latch_key;
+extern mysql_pfs_key_t checkpoint_lock_key;
+extern mysql_pfs_key_t trx_i_s_cache_lock_key;
+extern mysql_pfs_key_t trx_purge_latch_key;
+extern mysql_pfs_key_t index_tree_rw_lock_key;
+extern mysql_pfs_key_t dict_table_stats_latch_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+
+#ifndef UNIV_PFS_RWLOCK
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
-is necessary only if the memory block containing it is freed. */
-#ifdef UNIV_DEBUG
+is necessary only if the memory block containing it is freed.
+if MySQL performance schema is enabled and "UNIV_PFS_RWLOCK" is
+defined, the rwlock are instrumented with performance schema probes. */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+# define rw_lock_create(K, L, level) \
+ rw_lock_create_func((L), (level), __FILE__, __LINE__, #L)
+# else /* UNIV_SYNC_DEBUG */
+# define rw_lock_create(K, L, level) \
+ rw_lock_create_func((L), __FILE__, __LINE__, #L)
+# endif/* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
+# define rw_lock_create(K, L, level) \
+ rw_lock_create_func((L), #L)
+# endif /* UNIV_DEBUG */
+
+/**************************************************************//**
+NOTE! The following macros should be used in rw locking and
+unlocking, not the corresponding function. */
+
+# define rw_lock_s_lock(M) \
+ rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_s_lock_gen(M, P) \
+ rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_nowait(M, F, L) \
+ rw_lock_s_lock_low((M), 0, (F), (L))
+
# ifdef UNIV_SYNC_DEBUG
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), (level), #L, __FILE__, __LINE__)
-# else /* UNIV_SYNC_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), #L, __FILE__, __LINE__)
-# endif /* UNIV_SYNC_DEBUG */
-#else /* UNIV_DEBUG */
-# define rw_lock_create(L, level) \
- rw_lock_create_func((L), #L, NULL, 0)
-#endif /* UNIV_DEBUG */
+# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
+# else
+# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
+# endif
+
+
+# define rw_lock_x_lock(M) \
+ rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_x_lock_gen(M, P) \
+ rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_nowait(M) \
+ rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+
+# ifdef UNIV_SYNC_DEBUG
+# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
+# else
+# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
+# endif
+
+# define rw_lock_free(M) rw_lock_free_func(M)
+
+#else /* !UNIV_PFS_RWLOCK */
+
+/* Following macros point to Performance Schema instrumented functions. */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+# define rw_lock_create(K, L, level) \
+ pfs_rw_lock_create_func((K), (L), (level), __FILE__, __LINE__, #L)
+# else /* UNIV_SYNC_DEBUG */
+# define rw_lock_create(K, L, level) \
+ pfs_rw_lock_create_func((K), (L), __FILE__, __LINE__, #L)
+# endif/* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
+# define rw_lock_create(K, L, level) \
+ pfs_rw_lock_create_func((K), (L), #L)
+# endif /* UNIV_DEBUG */
+
+/******************************************************************
+NOTE! The following macros should be used in rw locking and
+unlocking, not the corresponding function. */
+
+# define rw_lock_s_lock(M) \
+ pfs_rw_lock_s_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_s_lock_gen(M, P) \
+ pfs_rw_lock_s_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_s_lock_nowait(M, F, L) \
+ pfs_rw_lock_s_lock_low((M), 0, (F), (L))
+
+# ifdef UNIV_SYNC_DEBUG
+# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(P, L)
+# else
+# define rw_lock_s_unlock_gen(L, P) pfs_rw_lock_s_unlock_func(L)
+# endif
+
+# define rw_lock_x_lock(M) \
+ pfs_rw_lock_x_lock_func((M), 0, __FILE__, __LINE__)
+
+# define rw_lock_x_lock_gen(M, P) \
+ pfs_rw_lock_x_lock_func((M), (P), __FILE__, __LINE__)
+
+# define rw_lock_x_lock_nowait(M) \
+ pfs_rw_lock_x_lock_func_nowait((M), __FILE__, __LINE__)
+
+# ifdef UNIV_SYNC_DEBUG
+# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(P, L)
+# else
+# define rw_lock_x_unlock_gen(L, P) pfs_rw_lock_x_unlock_func(L)
+# endif
+
+# define rw_lock_free(M) pfs_rw_lock_free_func(M)
+
+#endif /* UNIV_PFS_RWLOCK */
+
+#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
+#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
/******************************************************************//**
Creates, or rather, initializes an rw-lock object in a specified memory
@@ -137,18 +256,18 @@ rw_lock_create_func(
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
- const char* cmutex_name, /*!< in: mutex name */
const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
+ ulint cline, /*!< in: file line where created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
/******************************************************************//**
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
rw-lock is checked to be in the non-locked state. */
UNIV_INTERN
void
-rw_lock_free(
-/*=========*/
+rw_lock_free_func(
+/*==============*/
rw_lock_t* lock); /*!< in: rw-lock */
#ifdef UNIV_DEBUG
/******************************************************************//**
@@ -161,24 +280,6 @@ rw_lock_validate(
/*=============*/
rw_lock_t* lock); /*!< in: rw-lock */
#endif /* UNIV_DEBUG */
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock(M) rw_lock_s_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw s-locking, not the
-corresponding function. */
-
-#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\
- (M), 0, (F), (L))
/******************************************************************//**
Low-level function which tries to lock an rw-lock in s-mode. Performs no
spinning.
@@ -233,33 +334,6 @@ rw_lock_s_unlock_func(
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(P, L)
-#else
-# define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L)
-#endif
-/*******************************************************************//**
-Releases a shared mode lock. */
-#define rw_lock_s_unlock(L) rw_lock_s_unlock_gen(L, 0)
-
-/**************************************************************//**
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock(M) rw_lock_x_lock_func(\
- (M), 0, __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macro should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\
- (M), (P), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macros should be used in rw x-locking, not the
-corresponding function. */
-
-#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\
- (M), __FILE__, __LINE__)
/******************************************************************//**
NOTE! Use the corresponding macro, not directly this function! Lock an
rw-lock in exclusive mode for the current thread. If the rw-lock is locked
@@ -290,14 +364,6 @@ rw_lock_x_unlock_func(
#endif
rw_lock_t* lock); /*!< in/out: rw-lock */
-#ifdef UNIV_SYNC_DEBUG
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(P, L)
-#else
-# define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L)
-#endif
-/*******************************************************************//**
-Releases an exclusive mode lock. */
-#define rw_lock_x_unlock(L) rw_lock_x_unlock_gen(L, 0)
/******************************************************************//**
Low-level function which locks an rw-lock in s-mode when we know that it
@@ -490,6 +556,7 @@ UNIV_INTERN
void
rw_lock_debug_print(
/*================*/
+ FILE* f, /*!< in: output stream */
rw_lock_debug_t* info); /*!< in: debug struct */
#endif /* UNIV_SYNC_DEBUG */
@@ -540,6 +607,9 @@ struct rw_lock_struct {
info list of the lock */
ulint level; /*!< Level in the global latching order. */
#endif /* UNIV_SYNC_DEBUG */
+#ifdef UNIV_PFS_RWLOCK
+ struct PSI_rwlock *pfs_psi;/*!< The instrumentation hook */
+#endif
ulint count_os_wait; /*!< Count of os_waits. May not be accurate */
//const char* cfile_name;/*!< File name where lock created */
const char* lock_name;/*!< lock name */
@@ -580,6 +650,160 @@ struct rw_lock_debug_struct {
};
#endif /* UNIV_SYNC_DEBUG */
+/* For performance schema instrumentation, a new set of rwlock
+wrap functions are created if "UNIV_PFS_RWLOCK" is defined.
+The instrumentations are not planted directly into original
+functions, so that we keep the underlying function as they
+are. And in case, user wants to "take out" some rwlock from
+instrumentation even if performance schema (UNIV_PFS_RWLOCK)
+is defined, they can do so by reinstating APIs directly link to
+original underlying functions.
+The instrumented function names have prefix of "pfs_rw_lock_" vs.
+original name prefix of "rw_lock_". Following are list of functions
+that have been instrumented:
+
+rw_lock_create()
+rw_lock_x_lock()
+rw_lock_x_lock_gen()
+rw_lock_x_lock_nowait()
+rw_lock_x_unlock_gen()
+rw_lock_s_lock()
+rw_lock_s_lock_gen()
+rw_lock_s_lock_nowait()
+rw_lock_s_unlock_gen()
+rw_lock_free()
+
+Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct()
+do not have any caller/user, they are not instrumented.
+*/
+
+#ifdef UNIV_PFS_RWLOCK
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func()
+NOTE! Please use the corresponding macro rw_lock_create(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+ PSI_rwlock_key key, /*!< in: key registered with
+ performance schema */
+ rw_lock_t* lock, /*!< in: rw lock */
+#ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for
+rw_lock_x_lock_func_nowait()
+NOTE! Please use the corresponding macro, not directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_x_lock_func_nowait(
+/*===========================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not directly
+this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_s_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ rw_lock_t* lock); /*!< in/out: rw-lock */
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+ rw_lock_t* lock); /*!< in: rw-lock */
+#endif /* UNIV_PFS_RWLOCK */
+
+
#ifndef UNIV_NONINL
#include "sync0rw.ic"
#endif
diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic
index 7116f1b7c9b..a6dfa603c59 100644
--- a/storage/xtradb/include/sync0rw.ic
+++ b/storage/xtradb/include/sync0rw.ic
@@ -622,3 +622,260 @@ rw_lock_x_unlock_direct(
rw_x_exit_count++;
#endif
}
+
+#ifdef UNIV_PFS_RWLOCK
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_create_func().
+NOTE! Please use the corresponding macro rw_lock_create(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_create_func(
+/*====================*/
+ mysql_pfs_key_t key, /*!< in: key registered with
+ performance schema */
+ rw_lock_t* lock, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name) /*!< in: mutex name */
+{
+ /* Initialize the rwlock for performance schema */
+ lock->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
+ ? PSI_server->init_rwlock(key, lock)
+ : NULL;
+
+ /* The actual function to initialize an rwlock */
+ rw_lock_create_func(lock,
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ level,
+# endif /* UNIV_SYNC_DEBUG */
+ cfile_name,
+ cline,
+# endif /* UNIV_DEBUG */
+ cmutex_name);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_lock_func()
+NOTE! Please use the corresponding macro rw_lock_x_lock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the lock will
+ be passed to another thread to unlock */
+ const char* file_name,/*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+ struct PSI_rwlock_locker* locker = NULL;
+ PSI_rwlock_locker_state state;
+
+ /* Record the entry of rw x lock request in performance schema */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ locker = PSI_server->get_thread_rwlock_locker(
+ &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
+
+ if (locker) {
+ PSI_server->start_rwlock_wrwait(locker,
+ file_name, line);
+ }
+ }
+
+ rw_lock_x_lock_func(lock, pass, file_name, line);
+
+ if (locker) {
+ PSI_server->end_rwlock_wrwait(locker, 0);
+ }
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for
+rw_lock_x_lock_func_nowait()
+NOTE! Please use the corresponding macro rw_lock_x_lock_func(),
+not directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_x_lock_func_nowait(
+/*===========================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ const char* file_name,/*!< in: file name where lock
+ requested */
+ ulint line) /*!< in: line where requested */
+{
+ struct PSI_rwlock_locker* locker = NULL;
+ PSI_rwlock_locker_state state;
+ ibool ret;
+
+ /* Record the entry of rw x lock request in performance schema */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ locker = PSI_server->get_thread_rwlock_locker(
+ &state, lock->pfs_psi, PSI_RWLOCK_WRITELOCK);
+
+ if (locker) {
+ PSI_server->start_rwlock_wrwait(locker,
+ file_name, line);
+ }
+ }
+
+ ret = rw_lock_x_lock_func_nowait(lock, file_name, line);
+
+ if (locker) {
+ PSI_server->end_rwlock_wrwait(locker, 0);
+ }
+
+ return(ret);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_free_func()
+NOTE! Please use the corresponding macro rw_lock_free(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_free_func(
+/*==================*/
+ rw_lock_t* lock) /*!< in: pointer to rw-lock */
+{
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ PSI_server->destroy_rwlock(lock->pfs_psi);
+ lock->pfs_psi = NULL;
+ }
+
+ rw_lock_free_func(lock);
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_lock_func(
+/*====================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name,/*!< in: file name where lock
+ requested */
+ ulint line) /*!< in: line where requested */
+{
+ struct PSI_rwlock_locker* locker = NULL;
+ PSI_rwlock_locker_state state;
+
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ locker = PSI_server->get_thread_rwlock_locker(
+ &state, lock->pfs_psi, PSI_RWLOCK_READLOCK);
+ if (locker) {
+ PSI_server->start_rwlock_rdwait(locker,
+ file_name, line);
+ }
+ }
+
+ rw_lock_s_lock_func(lock, pass, file_name, line);
+
+ if (locker) {
+ PSI_server->end_rwlock_rdwait(locker, 0);
+ }
+}
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_lock_func()
+NOTE! Please use the corresponding macro rw_lock_s_lock(), not
+directly this function!
+@return TRUE if success */
+UNIV_INLINE
+ibool
+pfs_rw_lock_s_lock_low(
+/*===================*/
+ rw_lock_t* lock, /*!< in: pointer to rw-lock */
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock will be passed to another
+ thread to unlock */
+ const char* file_name, /*!< in: file name where lock requested */
+ ulint line) /*!< in: line where requested */
+{
+ struct PSI_rwlock_locker* locker = NULL;
+ PSI_rwlock_locker_state state;
+ ibool ret;
+
+ /* Instrumented to inform we are aquiring a shared rwlock */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ locker = PSI_server->get_thread_rwlock_locker(
+ &state, lock->pfs_psi, PSI_RWLOCK_READLOCK);
+ if (locker) {
+ PSI_server->start_rwlock_rdwait(locker,
+ file_name, line);
+ }
+ }
+
+ ret = rw_lock_s_lock_low(lock, pass, file_name, line);
+
+ if (locker) {
+ PSI_server->end_rwlock_rdwait(locker, 0);
+ }
+
+ return(ret);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_x_unlock_func()
+NOTE! Please use the corresponding macro rw_lock_x_unlock(), not directly
+this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_x_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ /* Inform performance schema we are unlocking the lock */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ PSI_server->unlock_rwlock(lock->pfs_psi);
+ }
+
+ rw_lock_x_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ lock);
+}
+
+/******************************************************************//**
+Performance schema instrumented wrap function for rw_lock_s_unlock_func()
+NOTE! Please use the corresponding macro pfs_rw_lock_s_unlock(), not
+directly this function! */
+UNIV_INLINE
+void
+pfs_rw_lock_s_unlock_func(
+/*======================*/
+#ifdef UNIV_SYNC_DEBUG
+ ulint pass, /*!< in: pass value; != 0, if the
+ lock may have been passed to another
+ thread to unlock */
+#endif
+ rw_lock_t* lock) /*!< in/out: rw-lock */
+{
+ /* Inform performance schema we are unlocking the lock */
+ if (UNIV_LIKELY(PSI_server && lock->pfs_psi)) {
+ PSI_server->unlock_rwlock(lock->pfs_psi);
+ }
+
+ rw_lock_s_unlock_func(
+#ifdef UNIV_SYNC_DEBUG
+ pass,
+#endif
+ lock);
+
+}
+#endif /* UNIV_PFS_RWLOCK */
diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h
index a500cf1da45..32a20807ba0 100644
--- a/storage/xtradb/include/sync0sync.h
+++ b/storage/xtradb/include/sync0sync.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2011, Innobase Oy. All Rights Reserved.
Copyright (c) 2008, Google Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -52,6 +52,71 @@ typedef LONG lock_word_t; /*!< On Windows, InterlockedExchange operates
typedef byte lock_word_t;
#endif
+#if defined UNIV_PFS_MUTEX || defined UNIV_PFS_RWLOCK
+/* There are mutexes/rwlocks that we want to exclude from
+instrumentation even if their corresponding performance schema
+define is set. And this PFS_NOT_INSTRUMENTED is used
+as the key value to dentify those objects that would
+be excluded from instrumentation. */
+# define PFS_NOT_INSTRUMENTED ULINT32_UNDEFINED
+
+# define PFS_IS_INSTRUMENTED(key) ((key) != PFS_NOT_INSTRUMENTED)
+
+/* By default, buffer mutexes and rwlocks will be excluded from
+instrumentation due to their large number of instances. */
+# define PFS_SKIP_BUFFER_MUTEX_RWLOCK
+
+#endif /* UNIV_PFS_MUTEX || UNIV_PFS_RWLOCK */
+
+#ifdef UNIV_PFS_MUTEX
+/* Key defines to register InnoDB mutexes with performance schema */
+extern mysql_pfs_key_t autoinc_mutex_key;
+extern mysql_pfs_key_t btr_search_enabled_mutex_key;
+extern mysql_pfs_key_t buffer_block_mutex_key;
+extern mysql_pfs_key_t buf_pool_mutex_key;
+extern mysql_pfs_key_t buf_pool_zip_mutex_key;
+extern mysql_pfs_key_t buf_pool_LRU_list_mutex_key;
+extern mysql_pfs_key_t buf_pool_free_list_mutex_key;
+extern mysql_pfs_key_t buf_pool_zip_free_mutex_key;
+extern mysql_pfs_key_t buf_pool_zip_hash_mutex_key;
+extern mysql_pfs_key_t cache_last_read_mutex_key;
+extern mysql_pfs_key_t dict_foreign_err_mutex_key;
+extern mysql_pfs_key_t dict_sys_mutex_key;
+extern mysql_pfs_key_t file_format_max_mutex_key;
+extern mysql_pfs_key_t fil_system_mutex_key;
+extern mysql_pfs_key_t flush_list_mutex_key;
+extern mysql_pfs_key_t hash_table_mutex_key;
+extern mysql_pfs_key_t ibuf_bitmap_mutex_key;
+extern mysql_pfs_key_t ibuf_mutex_key;
+extern mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
+extern mysql_pfs_key_t log_sys_mutex_key;
+extern mysql_pfs_key_t log_flush_order_mutex_key;
+extern mysql_pfs_key_t kernel_mutex_key;
+# ifdef UNIV_MEM_DEBUG
+extern mysql_pfs_key_t mem_hash_mutex_key;
+# endif /* UNIV_MEM_DEBUG */
+extern mysql_pfs_key_t mem_pool_mutex_key;
+extern mysql_pfs_key_t mutex_list_mutex_key;
+extern mysql_pfs_key_t purge_sys_bh_mutex_key;
+extern mysql_pfs_key_t recv_sys_mutex_key;
+extern mysql_pfs_key_t rseg_mutex_key;
+# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t rw_lock_debug_mutex_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t rw_lock_list_mutex_key;
+extern mysql_pfs_key_t rw_lock_mutex_key;
+extern mysql_pfs_key_t srv_dict_tmpfile_mutex_key;
+extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
+extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
+extern mysql_pfs_key_t srv_monitor_file_mutex_key;
+extern mysql_pfs_key_t syn_arr_mutex_key;
+# ifdef UNIV_SYNC_DEBUG
+extern mysql_pfs_key_t sync_thread_mutex_key;
+# endif /* UNIV_SYNC_DEBUG */
+extern mysql_pfs_key_t trx_doublewrite_mutex_key;
+extern mysql_pfs_key_t trx_undo_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
/******************************************************************//**
Initializes the synchronization data structures. */
UNIV_INTERN
@@ -64,24 +129,82 @@ UNIV_INTERN
void
sync_close(void);
/*===========*/
+
+#undef mutex_free /* Fix for MacOS X */
+
+#ifdef UNIV_PFS_MUTEX
+/**********************************************************************
+Following mutex APIs would be performance schema instrumented
+if "UNIV_PFS_MUTEX" is defined:
+
+mutex_create
+mutex_enter
+mutex_exit
+mutex_enter_nowait
+mutex_free
+
+These mutex APIs will point to corresponding wrapper functions that contain
+the performance schema instrumentation if "UNIV_PFS_MUTEX" is defined.
+The instrumented wrapper functions have the prefix of "innodb_".
+
+NOTE! The following macro should be used in mutex operation, not the
+corresponding function. */
+
/******************************************************************//**
Creates, or rather, initializes a mutex object to a specified memory
location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
-
-#ifdef UNIV_DEBUG
-# ifdef UNIV_SYNC_DEBUG
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, (level), __FILE__, __LINE__)
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+# define mutex_create(K, M, level) \
+ pfs_mutex_create_func((K), (M), (level), __FILE__, __LINE__, #M)
+# else
+# define mutex_create(K, M, level) \
+ pfs_mutex_create_func((K), (M), __FILE__, __LINE__, #M)
+# endif/* UNIV_SYNC_DEBUG */
# else
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, __FILE__, __LINE__)
-# endif
-#else
-# define mutex_create(M, level) \
- mutex_create_func((M), #M, NULL, 0)
-#endif
+# define mutex_create(K, M, level) \
+ pfs_mutex_create_func((K), (M), #M)
+# endif /* UNIV_DEBUG */
+
+# define mutex_enter(M) \
+ pfs_mutex_enter_func((M), __FILE__, __LINE__)
+
+# define mutex_enter_nowait(M) \
+ pfs_mutex_enter_nowait_func((M), __FILE__, __LINE__)
+
+# define mutex_exit(M) pfs_mutex_exit_func(M)
+
+# define mutex_free(M) pfs_mutex_free_func(M)
+
+#else /* UNIV_PFS_MUTEX */
+
+/* If "UNIV_PFS_MUTEX" is not defined, the mutex APIs point to
+original non-instrumented functions */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+# define mutex_create(K, M, level) \
+ mutex_create_func((M), (level), __FILE__, __LINE__, #M)
+# else /* UNIV_SYNC_DEBUG */
+# define mutex_create(K, M, level) \
+ mutex_create_func((M), __FILE__, __LINE__, #M)
+# endif /* UNIV_SYNC_DEBUG */
+# else /* UNIV_DEBUG */
+# define mutex_create(K, M, level) \
+ mutex_create_func((M), #M)
+# endif /* UNIV_DEBUG */
+
+# define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
+
+# define mutex_enter_nowait(M) \
+ mutex_enter_nowait_func((M), __FILE__, __LINE__)
+
+# define mutex_exit(M) mutex_exit_func(M)
+
+# define mutex_free(M) mutex_free_func(M)
+
+#endif /* UNIV_PFS_MUTEX */
/******************************************************************//**
Creates, or rather, initializes a mutex object in a specified memory
@@ -93,35 +216,29 @@ void
mutex_create_func(
/*==============*/
mutex_t* mutex, /*!< in: pointer to memory */
- const char* cmutex_name, /*!< in: mutex name */
#ifdef UNIV_DEBUG
# ifdef UNIV_SYNC_DEBUG
ulint level, /*!< in: level */
# endif /* UNIV_SYNC_DEBUG */
-#endif /* UNIV_DEBUG */
const char* cfile_name, /*!< in: file name where created */
- ulint cline); /*!< in: file line where created */
-
-#undef mutex_free /* Fix for MacOS X */
+ ulint cline, /*!< in: file line where created */
+#endif /* UNIV_DEBUG */
+ const char* cmutex_name); /*!< in: mutex name */
/******************************************************************//**
+NOTE! Use the corresponding macro mutex_free(), not directly this function!
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
is checked to be in the reset state. */
UNIV_INTERN
void
-mutex_free(
-/*=======*/
+mutex_free_func(
+/*============*/
mutex_t* mutex); /*!< in: mutex */
/**************************************************************//**
NOTE! The following macro should be used in mutex locking, not the
corresponding function. */
-#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__)
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
/* NOTE! currently same as mutex_enter! */
#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__)
@@ -137,12 +254,6 @@ mutex_enter_func(
mutex_t* mutex, /*!< in: pointer to mutex */
const char* file_name, /*!< in: file name where locked */
ulint line); /*!< in: line where locked */
-/**************************************************************//**
-NOTE! The following macro should be used in mutex locking, not the
-corresponding function. */
-
-#define mutex_enter_nowait(M) \
- mutex_enter_nowait_func((M), __FILE__, __LINE__)
/********************************************************************//**
NOTE! Use the corresponding macro in the header file, not this function
directly. Tries to lock the mutex for the current thread. If the lock is not
@@ -157,12 +268,86 @@ mutex_enter_nowait_func(
requested */
ulint line); /*!< in: line where requested */
/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
-mutex_exit(
-/*=======*/
+mutex_exit_func(
+/*============*/
+ mutex_t* mutex); /*!< in: pointer to mutex */
+
+
+#ifdef UNIV_PFS_MUTEX
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with peformance schema if "UNIV_PFS_MUTEX" is defined when
+creating the mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+ PSI_mutex_key key, /*!< in: Performance Schema key */
+ mutex_t* mutex, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name);
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where locked */
+ ulint line); /*!< in: line where locked */
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return 0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
+ requested */
+ ulint line); /*!< in: line where requested */
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with peformance schema instrumentation.
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
mutex_t* mutex); /*!< in: pointer to mutex */
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+ mutex_t* mutex); /*!< in: mutex */
+
+#endif /* UNIV_PFS_MUTEX */
+
#ifdef UNIV_SYNC_DEBUG
/******************************************************************//**
Returns TRUE if no mutex or rw-lock is currently locked.
@@ -232,13 +417,6 @@ sync_thread_reset_level(
/*====================*/
void* latch); /*!< in: pointer to a mutex or an rw-lock */
/******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return TRUE if empty */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty(void);
-/*==========================*/
-/******************************************************************//**
Checks if the level array for the current thread contains a
mutex or rw-latch at the specified level.
@return a matching latch, or NULL if not found */
@@ -249,17 +427,33 @@ sync_thread_levels_contains(
ulint level); /*!< in: latching order level
(SYNC_DICT, ...)*/
/******************************************************************//**
-Checks if the level array for the current thread is empty.
+Checks that the level array for the current thread is empty.
@return a latch, or NULL if empty except the exceptions specified below */
UNIV_INTERN
void*
sync_thread_levels_nonempty_gen(
/*============================*/
- ibool dict_mutex_allowed); /*!< in: TRUE if dictionary mutex is
- allowed to be owned by the thread,
- also purge_is_running mutex is
- allowed */
-#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d))
+ ibool dict_mutex_allowed) /*!< in: TRUE if dictionary mutex is
+ allowed to be owned by the thread */
+ __attribute__((warn_unused_result));
+/******************************************************************//**
+Checks if the level array for the current thread is empty,
+except for data dictionary latches. */
+#define sync_thread_levels_empty_except_dict() \
+ (!sync_thread_levels_nonempty_gen(TRUE))
+/******************************************************************//**
+Checks if the level array for the current thread is empty,
+except for the btr_search_latch.
+@return a latch, or NULL if empty except the exceptions specified below */
+UNIV_INTERN
+void*
+sync_thread_levels_nonempty_trx(
+/*============================*/
+ ibool has_search_latch)
+ /*!< in: TRUE if and only if the thread
+ is supposed to hold btr_search_latch */
+ __attribute__((warn_unused_result));
+
/******************************************************************//**
Gets the debug information for a reserved mutex. */
UNIV_INTERN
@@ -455,7 +649,6 @@ or row lock! */
#define SYNC_TREE_NODE_NEW 892
#define SYNC_TREE_NODE_FROM_HASH 891
#define SYNC_TREE_NODE 890
-#define SYNC_PURGE_SYS 810
#define SYNC_PURGE_LATCH 800
#define SYNC_TRX_UNDO 700
#define SYNC_RSEG 600
@@ -477,7 +670,9 @@ or row lock! */
#define SYNC_REC_LOCK 299
#define SYNC_TRX_LOCK_HEAP 298
#define SYNC_TRX_SYS_HEADER 290
+#define SYNC_PURGE_QUEUE 200
#define SYNC_LOG 170
+#define SYNC_LOG_FLUSH_ORDER 156
#define SYNC_RECV 168
#define SYNC_WORK_QUEUE 162
#define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */
@@ -487,15 +682,16 @@ or row lock! */
SYNC_SEARCH_SYS, as memory allocation
can call routines there! Otherwise
the level is SYNC_MEM_HASH. */
-#define SYNC_BUF_LRU_LIST 157
-#define SYNC_BUF_PAGE_HASH 156
-#define SYNC_BUF_BLOCK 155
-#define SYNC_BUF_FREE_LIST 153
-#define SYNC_BUF_ZIP_FREE 152
-#define SYNC_BUF_ZIP_HASH 151
-#define SYNC_BUF_POOL 150
-#define SYNC_BUF_FLUSH_LIST 149
+#define SYNC_BUF_LRU_LIST 158
+#define SYNC_BUF_PAGE_HASH 157
+#define SYNC_BUF_BLOCK 155 /* Block mutex */
+#define SYNC_BUF_FREE_LIST 153
+#define SYNC_BUF_ZIP_FREE 152
+#define SYNC_BUF_ZIP_HASH 151
+#define SYNC_BUF_POOL 150 /* Buffer pool mutex */
+#define SYNC_BUF_FLUSH_LIST 145 /* Buffer flush list mutex */
#define SYNC_DOUBLEWRITE 140
+#define SYNC_OUTER_ANY_LATCH 136
#define SYNC_ANY_LATCH 135
#define SYNC_THR_LOCAL 133
#define SYNC_MEM_HASH 131
@@ -556,6 +752,10 @@ struct mutex_struct {
ulint mutex_type; /*!< 0=usual mutex, 1=rw_lock mutex */
#endif /* UNIV_DEBUG */
const char* cmutex_name; /*!< mutex name */
+#ifdef UNIV_PFS_MUTEX
+ struct PSI_mutex* pfs_psi; /*!< The performance schema
+ instrumentation hook */
+#endif
};
/** The global array of wait cells for implementation of the databases own
@@ -569,6 +769,10 @@ to 20 microseconds. */
#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds
+extern ib_int64_t mutex_spin_round_count;
+extern ib_int64_t mutex_spin_wait_count;
+extern ib_int64_t mutex_os_wait_count;
+
/** The number of mutex_exit calls. Intended for performance monitoring. */
extern ib_int64_t mutex_exit_count;
diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic
index b05020b5660..1d9c8d59b12 100644
--- a/storage/xtradb/include/sync0sync.ic
+++ b/storage/xtradb/include/sync0sync.ic
@@ -152,11 +152,12 @@ mutex_get_waiters(
}
/******************************************************************//**
+NOTE! Use the corresponding macro mutex_exit(), not directly this function!
Unlocks a mutex owned by the current thread. */
UNIV_INLINE
void
-mutex_exit(
-/*=======*/
+mutex_exit_func(
+/*============*/
mutex_t* mutex) /*!< in: pointer to mutex */
{
ut_ad(mutex_own(mutex));
@@ -220,3 +221,145 @@ mutex_enter_func(
mutex_spin_wait(mutex, file_name, line);
}
+
+#ifdef UNIV_PFS_MUTEX
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_func(). */
+UNIV_INLINE
+void
+pfs_mutex_enter_func(
+/*=================*/
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where locked */
+ ulint line) /*!< in: line where locked */
+{
+ struct PSI_mutex_locker* locker = NULL;
+ PSI_mutex_locker_state state;
+ int result = 0;
+
+ if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
+ locker = PSI_server->get_thread_mutex_locker(
+ &state, mutex->pfs_psi, PSI_MUTEX_LOCK);
+ if (locker) {
+ PSI_server->start_mutex_wait(locker, file_name, line);
+ }
+ }
+
+ mutex_enter_func(mutex, file_name, line);
+
+ if (locker) {
+ PSI_server->end_mutex_wait(locker, result);
+ }
+}
+/********************************************************************//**
+NOTE! Please use the corresponding macro mutex_enter_nowait(), not directly
+this function!
+This is a performance schema instrumented wrapper function for
+mutex_enter_nowait_func.
+@return 0 if succeed, 1 if not */
+UNIV_INLINE
+ulint
+pfs_mutex_enter_nowait_func(
+/*========================*/
+ mutex_t* mutex, /*!< in: pointer to mutex */
+ const char* file_name, /*!< in: file name where mutex
+ requested */
+ ulint line) /*!< in: line where requested */
+{
+ ulint ret;
+ struct PSI_mutex_locker* locker = NULL;
+ PSI_mutex_locker_state state;
+ int result = 0;
+
+ if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
+ locker = PSI_server->get_thread_mutex_locker(
+ &state, mutex->pfs_psi, PSI_MUTEX_LOCK);
+ if (locker) {
+ PSI_server->start_mutex_wait(locker, file_name, line);
+ }
+ }
+
+ ret = mutex_enter_nowait_func(mutex, file_name, line);
+
+ if (locker) {
+ PSI_server->end_mutex_wait(locker, result);
+ }
+
+ return(ret);
+}
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_exit(), not directly
+this function!
+A wrap function of mutex_exit_func() with performance schema instrumentation.
+Unlocks a mutex owned by the current thread. */
+UNIV_INLINE
+void
+pfs_mutex_exit_func(
+/*================*/
+ mutex_t* mutex) /*!< in: pointer to mutex */
+{
+ if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
+ PSI_server->unlock_mutex(mutex->pfs_psi);
+ }
+
+ mutex_exit_func(mutex);
+}
+
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_create(), not directly
+this function!
+A wrapper function for mutex_create_func(), registers the mutex
+with performance schema if "UNIV_PFS_MUTEX" is defined when
+creating the mutex */
+UNIV_INLINE
+void
+pfs_mutex_create_func(
+/*==================*/
+ mysql_pfs_key_t key, /*!< in: Performance Schema key */
+ mutex_t* mutex, /*!< in: pointer to memory */
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ ulint level, /*!< in: level */
+# endif /* UNIV_SYNC_DEBUG */
+ const char* cfile_name, /*!< in: file name where created */
+ ulint cline, /*!< in: file line where created */
+# endif /* UNIV_DEBUG */
+ const char* cmutex_name) /*!< in: mutex name */
+{
+ mutex->pfs_psi = (PSI_server && PFS_IS_INSTRUMENTED(key))
+ ? PSI_server->init_mutex(key, mutex)
+ : NULL;
+
+ mutex_create_func(mutex,
+# ifdef UNIV_DEBUG
+# ifdef UNIV_SYNC_DEBUG
+ level,
+# endif /* UNIV_SYNC_DEBUG */
+ cfile_name,
+ cline,
+# endif /* UNIV_DEBUG */
+ cmutex_name);
+}
+/******************************************************************//**
+NOTE! Please use the corresponding macro mutex_free(), not directly
+this function!
+Wrapper function for mutex_free_func(). Also destroys the performance
+schema probes when freeing the mutex */
+UNIV_INLINE
+void
+pfs_mutex_free_func(
+/*================*/
+ mutex_t* mutex) /*!< in: mutex */
+{
+ if (UNIV_LIKELY(PSI_server && mutex->pfs_psi)) {
+ PSI_server->destroy_mutex(mutex->pfs_psi);
+ mutex->pfs_psi = NULL;
+ }
+
+ mutex_free_func(mutex);
+}
+
+#endif /* UNIV_PFS_MUTEX */
diff --git a/storage/xtradb/include/thr0loc.h b/storage/xtradb/include/thr0loc.h
deleted file mode 100644
index 293d1ebd57f..00000000000
--- a/storage/xtradb/include/thr0loc.h
+++ /dev/null
@@ -1,101 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/thr0loc.h
-The thread local storage
-
-Created 10/5/1995 Heikki Tuuri
-*******************************************************/
-
-/* This module implements storage private to each thread,
-a capability useful in some situations like storing the
-OS handle to the current thread, or its priority. */
-
-#ifndef thr0loc_h
-#define thr0loc_h
-
-#include "univ.i"
-#include "os0thread.h"
-
-/****************************************************************//**
-Initializes the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_init(void);
-/*================*/
- /****************************************************************//**
-Close the thread local storage module. */
-UNIV_INTERN
-void
-thr_local_close(void);
-/*=================*/
-/*******************************************************************//**
-Creates a local storage struct for the calling new thread. */
-UNIV_INTERN
-void
-thr_local_create(void);
-/*==================*/
-/*******************************************************************//**
-Frees the local storage struct for the specified thread. */
-UNIV_INTERN
-void
-thr_local_free(
-/*===========*/
- os_thread_id_t id); /*!< in: thread id */
-/*******************************************************************//**
-Gets the slot number in the thread table of a thread.
-@return slot number */
-UNIV_INTERN
-ulint
-thr_local_get_slot_no(
-/*==================*/
- os_thread_id_t id); /*!< in: thread id of the thread */
-/*******************************************************************//**
-Sets in the local storage the slot number in the thread table of a thread. */
-UNIV_INTERN
-void
-thr_local_set_slot_no(
-/*==================*/
- os_thread_id_t id, /*!< in: thread id of the thread */
- ulint slot_no);/*!< in: slot number */
-/*******************************************************************//**
-Returns pointer to the 'in_ibuf' field within the current thread local
-storage.
-@return pointer to the in_ibuf field */
-UNIV_INTERN
-ibool*
-thr_local_get_in_ibuf_field(void);
-/*=============================*/
-
-/*************************************************************************
-Return local hash table informations. */
-
-ulint
-thr_local_hash_cells(void);
-/*=======================*/
-
-ulint
-thr_local_hash_nodes(void);
-/*=======================*/
-
-#ifndef UNIV_NONINL
-#include "thr0loc.ic"
-#endif
-
-#endif
diff --git a/storage/xtradb/include/thr0loc.ic b/storage/xtradb/include/thr0loc.ic
deleted file mode 100644
index ce44e512320..00000000000
--- a/storage/xtradb/include/thr0loc.ic
+++ /dev/null
@@ -1,24 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/thr0loc.ic
-Thread local storage
-
-Created 10/4/1995 Heikki Tuuri
-*******************************************************/
diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h
index 7bd4e1b88c8..73896a3cb76 100644
--- a/storage/xtradb/include/trx0i_s.h
+++ b/storage/xtradb/include/trx0i_s.h
@@ -30,6 +30,7 @@ Created July 17, 2007 Vasil Dimov
#include "univ.i"
#include "trx0types.h"
+#include "dict0types.h"
#include "ut0ut.h"
/** The maximum amount of memory that can be consumed by innodb_trx,
@@ -44,6 +45,37 @@ i_s_locks_row_t::lock_data */
i_s_trx_row_t::trx_query */
#define TRX_I_S_TRX_QUERY_MAX_LEN 1024
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_operation_state */
+#define TRX_I_S_TRX_OP_STATE_MAX_LEN 64
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_foreign_key_error */
+#define TRX_I_S_TRX_FK_ERROR_MAX_LEN 256
+
+/** The maximum length of a string that can be stored in
+i_s_trx_row_t::trx_isolation_level */
+#define TRX_I_S_TRX_ISOLATION_LEVEL_MAX_LEN 16
+
+/** Safely copy strings in to the INNODB_TRX table's
+string based columns */
+#define TRX_I_S_STRING_COPY(data, field, constraint, tcache) \
+do { \
+ if (strlen(data) > constraint) { \
+ char buff[constraint + 1]; \
+ strncpy(buff, data, constraint); \
+ buff[constraint] = '\0'; \
+ \
+ field = ha_storage_put_memlim( \
+ (tcache)->storage, buff, constraint + 1,\
+ MAX_ALLOWED_FOR_STORAGE(tcache)); \
+ } else { \
+ field = ha_storage_put_str_memlim( \
+ (tcache)->storage, data, \
+ MAX_ALLOWED_FOR_STORAGE(tcache)); \
+ } \
+} while (0)
+
/** A row of INFORMATION_SCHEMA.innodb_locks */
typedef struct i_s_locks_row_struct i_s_locks_row_t;
/** A row of INFORMATION_SCHEMA.innodb_trx */
@@ -64,7 +96,7 @@ struct i_s_hash_chain_struct {
/** This structure represents INFORMATION_SCHEMA.innodb_locks row */
struct i_s_locks_row_struct {
- ullint lock_trx_id; /*!< transaction identifier */
+ trx_id_t lock_trx_id; /*!< transaction identifier */
const char* lock_mode; /*!< lock mode from
lock_get_mode_str() */
const char* lock_type; /*!< lock type from
@@ -85,7 +117,7 @@ struct i_s_locks_row_struct {
/** The following are auxiliary and not included in the table */
/* @{ */
- ullint lock_table_id;
+ table_id_t lock_table_id;
/*!< table identifier from
lock_get_table_id */
i_s_hash_chain_t hash_chain; /*!< hash table chain node for
@@ -95,21 +127,52 @@ struct i_s_locks_row_struct {
/** This structure represents INFORMATION_SCHEMA.innodb_trx row */
struct i_s_trx_row_struct {
- ullint trx_id; /*!< transaction identifier */
+ trx_id_t trx_id; /*!< transaction identifier */
const char* trx_state; /*!< transaction state from
trx_get_que_state_str() */
ib_time_t trx_started; /*!< trx_struct::start_time */
const i_s_locks_row_t* requested_lock_row;
- /*!< pointer to a row
- in innodb_locks if trx
- is waiting, or NULL */
- ib_time_t trx_wait_started;
- /*!< trx_struct::wait_started */
- ullint trx_weight; /*!< TRX_WEIGHT() */
- ulint trx_mysql_thread_id;
- /*!< thd_get_thread_id() */
- const char* trx_query; /*!< MySQL statement being
- executed in the transaction */
+ /*!< pointer to a row
+ in innodb_locks if trx
+ is waiting, or NULL */
+ ib_time_t trx_wait_started; /*!< trx_struct::wait_started */
+ ullint trx_weight; /*!< TRX_WEIGHT() */
+ ulint trx_mysql_thread_id; /*!< thd_get_thread_id() */
+ const char* trx_query; /*!< MySQL statement being
+ executed in the transaction */
+ struct charset_info_st* trx_query_cs;
+ /*!< charset encode the MySQL
+ statement */
+ const char* trx_operation_state; /*!< trx_struct::op_info */
+ ulint trx_tables_in_use;/*!< n_mysql_tables_in_use in
+ trx_struct */
+ ulint trx_tables_locked;
+ /*!< mysql_n_tables_locked in
+ trx_struct */
+ ulint trx_lock_structs;/*!< list len of trx_locks in
+ trx_struct */
+ ulint trx_lock_memory_bytes;
+ /*!< mem_heap_get_size(
+ trx->lock_heap) */
+ ulint trx_rows_locked;/*!< lock_number_of_rows_locked() */
+ ullint trx_rows_modified;/*!< trx_struct::undo_no */
+ ulint trx_concurrency_tickets;
+ /*!< n_tickets_to_enter_innodb in
+ trx_struct */
+ const char* trx_isolation_level;
+ /*!< isolation_level in trx_struct*/
+ ibool trx_unique_checks;
+ /*!< check_unique_secondary in
+ trx_struct*/
+ ibool trx_foreign_key_checks;
+ /*!< check_foreigns in trx_struct */
+ const char* trx_foreign_key_error;
+ /*!< detailed_error in trx_struct */
+ ibool trx_has_search_latch;
+ /*!< has_search_latch in trx_struct */
+ ulint trx_search_latch_timeout;
+ /*!< search_latch_timeout in
+ trx_struct */
};
/** This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */
diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h
index ae5bc6f90be..0b83a76cab7 100644
--- a/storage/xtradb/include/trx0purge.h
+++ b/storage/xtradb/include/trx0purge.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -68,8 +68,9 @@ Creates the global purge system control structure and inits the history
mutex. */
UNIV_INTERN
void
-trx_purge_sys_create(void);
-/*======================*/
+trx_purge_sys_create(
+/*=================*/
+ ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/
/********************************************************************//**
Frees the global purge system control structure. */
UNIV_INTERN
@@ -112,27 +113,10 @@ This function runs a purge batch.
@return number of undo log pages handled in the batch */
UNIV_INTERN
ulint
-trx_purge(void);
-/*===========*/
-/**********************************************************************
-This function runs a purge worker batch */
-UNIV_INTERN
-void
-trx_purge_worker(
-/*=============*/
- ulint worker_id);
-/**********************************************************************
-This function waits the event for worker batch */
-UNIV_INTERN
-void
-trx_purge_worker_wait(void);
-/*========================*/
-/**********************************************************************
-This function wakes the waiting worker batch */
-UNIV_INTERN
-void
-trx_purge_worker_wake(void);
-/*========================*/
+trx_purge(
+/*======*/
+ ulint limit); /*!< in: the maximum number of records to
+ purge in one batch */
/******************************************************************//**
Prints information of the purge system to stderr. */
UNIV_INTERN
@@ -145,25 +129,20 @@ struct trx_purge_struct{
ulint state; /*!< Purge system state */
sess_t* sess; /*!< System session running the purge
query */
- trx_t* trx; /*!< System transaction running the purge
+ trx_t* trx; /*!< System transaction running the
+ purge
query: this trx is not in the trx list
of the trx system and it never ends */
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
- ulint n_worker;
- os_event_t worker_event;
- sess_t** sess_arr;
- trx_t** trx_arr;
- que_t** query_arr;
- rw_lock_t latch; /*!< The latch protecting the purge view.
- A purge operation must acquire an
- x-latch here for the instant at which
+ rw_lock_t latch; /*!< The latch protecting the purge
+ view. A purge operation must acquire
+ an x-latch here for the instant at which
it changes the purge view: an undo
log operation can prevent this by
obtaining an s-latch here. */
read_view_t* view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- mutex_t mutex; /*!< Mutex protecting the fields below */
ulint n_pages_handled;/*!< Approximate number of undo log
pages processed in purge */
ulint handle_limit; /*!< Target of how many pages to get
@@ -201,6 +180,11 @@ struct trx_purge_struct{
mem_heap_t* heap; /*!< Temporary storage used during a
purge: can be emptied after purge
completes */
+ /*-----------------------------*/
+ ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on
+ rseg_queue_t::trx_no. It is protected
+ by the bh_mutex */
+ mutex_t bh_mutex; /*!< Mutex protecting ib_bh */
};
#define TRX_PURGE_ON 1 /* purge operation is running */
diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h
index a6e56e963c6..477748f6f89 100644
--- a/storage/xtradb/include/trx0rec.h
+++ b/storage/xtradb/include/trx0rec.h
@@ -108,7 +108,7 @@ trx_undo_rec_get_pars(
ibool* updated_extern, /*!< out: TRUE if we updated an
externally stored fild */
undo_no_t* undo_no, /*!< out: undo log record number */
- dulint* table_id); /*!< out: table id */
+ table_id_t* table_id); /*!< out: table id */
/*******************************************************************//**
Builds a row reference from an undo log record.
@return pointer to remaining part of undo record */
@@ -227,7 +227,7 @@ trx_undo_report_row_operation(
index, otherwise NULL */
roll_ptr_t* roll_ptr); /*!< out: rollback pointer to the
inserted undo log record,
- ut_dulint_zero if BTR_NO_UNDO_LOG
+ 0 if BTR_NO_UNDO_LOG
flag was specified */
/******************************************************************//**
Copies an undo record to heap. This function can be called if we know that
diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic
index e7e41d6d9f6..f0b3276ed44 100644
--- a/storage/xtradb/include/trx0rec.ic
+++ b/storage/xtradb/include/trx0rec.ic
@@ -78,7 +78,7 @@ trx_undo_rec_get_undo_no(
ptr = undo_rec + 3;
- return(mach_dulint_read_much_compressed(ptr));
+ return(mach_ull_read_much_compressed(ptr));
}
/**********************************************************************//**
@@ -90,7 +90,7 @@ trx_undo_rec_get_offset(
/*====================*/
undo_no_t undo_no) /*!< in: undo no read from node */
{
- return (3 + mach_dulint_get_much_compressed_size(undo_no));
+ return (3 + mach_ull_get_much_compressed_size(undo_no));
}
/***********************************************************************//**
diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h
index 303188f09f2..5acde05de3d 100644
--- a/storage/xtradb/include/trx0rseg.h
+++ b/storage/xtradb/include/trx0rseg.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -103,7 +103,7 @@ trx_rseg_header_create(
ulint zip_size, /*!< in: compressed page size in bytes
or 0 for uncompressed pages */
ulint max_size, /*!< in: max size in pages */
- ulint* slot_no, /*!< out: rseg id == slot number in trx sys */
+ ulint rseg_slot_no, /*!< in: rseg id == slot number in trx sys */
mtr_t* mtr); /*!< in: mtr */
/*********************************************************************//**
Creates the memory copies for rollback segments and initializes the
@@ -113,18 +113,9 @@ void
trx_rseg_list_and_array_init(
/*=========================*/
trx_sysf_t* sys_header, /*!< in: trx system header */
+ ib_bh_t* ib_bh, /*!< in: rseg queue */
mtr_t* mtr); /*!< in: mtr */
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
- ulint space, /*!< in: space id */
- ulint max_size, /*!< in: max size in pages */
- ulint* id, /*!< out: rseg id */
- mtr_t* mtr); /*!< in: mtr */
+
/***************************************************************************
Free's an instance of the rollback segment in memory. */
UNIV_INTERN
@@ -133,12 +124,15 @@ trx_rseg_mem_free(
/*==============*/
trx_rseg_t* rseg); /* in, own: instance to free */
-
-/* Real max value may be 4076 in usual. But reserve 4 slot for safety or etc... */
-#define TRX_RSEG_N_EXTRA_SLOTS (((UNIV_PAGE_SIZE - (FIL_PAGE_DATA + FIL_PAGE_DATA_END + TRX_RSEG_UNDO_SLOTS)) / TRX_RSEG_SLOT_SIZE) - 4)
+/*********************************************************************
+Creates a rollback segment. */
+UNIV_INTERN
+trx_rseg_t*
+trx_rseg_create(void);
+/*==================*/
/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS (srv_extra_undoslots ? TRX_RSEG_N_EXTRA_SLOTS : (UNIV_PAGE_SIZE / 16))
+#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
/* Maximum number of transactions supported by a single rollback segment */
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
@@ -149,9 +143,7 @@ struct trx_rseg_struct{
ulint id; /*!< rollback segment id == the index of
its slot in the trx system file copy */
mutex_t mutex; /*!< mutex protecting the fields in this
- struct except id; NOTE that the latching
- order must always be kernel mutex ->
- rseg mutex */
+ struct except id, which is constant */
ulint space; /*!< space where the rollback segment is
header is placed */
ulint zip_size;/* compressed page size of space
@@ -190,6 +182,14 @@ struct trx_rseg_struct{
memory objects */
};
+/** For prioritising the rollback segments for purge. */
+struct rseg_queue_struct {
+ trx_id_t trx_no; /*!< trx_rseg_t::last_trx_no */
+ trx_rseg_t* rseg; /*!< Rollback segment */
+};
+
+typedef struct rseg_queue_struct rseg_queue_t;
+
/* Undo log segment slot in a rollback segment header */
/*-------------------------------------------------------------*/
#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of
diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h
index 9ef9485b611..db7bf68deae 100644
--- a/storage/xtradb/include/trx0sys.h
+++ b/storage/xtradb/include/trx0sys.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mem0mem.h"
#include "sync0sync.h"
#include "ut0lst.h"
+#include "ut0bh.h"
#include "read0types.h"
#include "page0types.h"
@@ -160,13 +161,6 @@ void
trx_sys_dummy_create(
/*=================*/
ulint space);
-/*********************************************************************
-Create extra rollback segments when create_new_db */
-UNIV_INTERN
-void
-trx_sys_create_extra_rseg(
-/*======================*/
- ulint num); /* in: number of extra user rollback segments */
/****************************************************************//**
Looks for a free slot for a rollback segment in the trx system file copy.
@return slot index or ULINT_UNDEFINED if not found */
@@ -254,13 +248,6 @@ UNIV_INLINE
trx_id_t
trx_sys_get_new_trx_id(void);
/*========================*/
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void);
-/*========================*/
#endif /* !UNIV_HOTBACKUP */
/*****************************************************************//**
Writes a trx id to an index page. In case that the id size changes in
@@ -465,12 +452,20 @@ trx_sys_file_format_id_to_name(
const ulint id); /*!< in: id of the file format */
#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************
+Creates the rollback segments */
+UNIV_INTERN
+void
+trx_sys_create_rsegs(
+/*=================*/
+ ulint n_rsegs); /*!< number of rollback segments to create */
+
/* The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
/* Space id and page no where the trx system file copy resides */
#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */
-#define TRX_DOUBLEWRITE_SPACE 1 /* the doublewrite buffer tablespace if used */
+#define TRX_DOUBLEWRITE_SPACE 0xFFFFFFE0UL /* the doublewrite buffer tablespace if used */
#define TRX_SYS_SPACE_MAX 9 /* reserved max space id for system tablespaces */
#include "fsp0fsp.h"
#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO
@@ -501,11 +496,16 @@ trx_sys_file_format_id_to_name(
slots */
/*------------------------------------------------------------- @} */
-/** Maximum number of rollback segments: the number of segment
-specification slots in the transaction system array; rollback segment
-id must fit in one byte, therefore 256; each slot is currently 8 bytes
-in size */
-#define TRX_SYS_N_RSEGS 256
+/* Max number of rollback segments: the number of segment specification slots
+in the transaction system array; rollback segment id must fit in one (signed)
+byte, therefore 128; each slot is currently 8 bytes in size. If you want
+to raise the level to 256 then you will need to fix some assertions that
+impose the 7 bit restriction. e.g., mach_write_to_3() */
+#define TRX_SYS_N_RSEGS 128
+/* Originally, InnoDB defined TRX_SYS_N_RSEGS as 256 but created only one
+rollback segment. It initialized some arrays with this number of entries.
+We must remember this limit in order to keep file compatibility. */
+#define TRX_SYS_OLD_N_RSEGS 256
/** Maximum length of MySQL binlog file name, in bytes.
@see trx_sys_mysql_master_log_name
@@ -593,11 +593,16 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
identifier is added to this constant. */
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
+/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
+identifier is added to this 64-bit constant. */
+#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N \
+ ((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32 \
+ | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
/* @} */
/** Doublewrite control struct */
diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic
index c7b09d4aec2..aa2d2ea26b0 100644
--- a/storage/xtradb/include/trx0sys.ic
+++ b/storage/xtradb/include/trx0sys.ic
@@ -81,7 +81,7 @@ trx_sys_sys_space(
{
if (srv_doublewrite_file) {
/* several spaces are reserved */
- return((ibool)(space <= TRX_SYS_SPACE_MAX));
+ return((ibool)(space == TRX_SYS_SPACE || space == TRX_DOUBLEWRITE_SPACE));
} else {
return((ibool)(space == TRX_SYS_SPACE));
}
@@ -300,7 +300,7 @@ trx_get_on_id(
trx = UT_LIST_GET_FIRST(trx_sys->trx_list);
while (trx != NULL) {
- if (0 == ut_dulint_cmp(trx_id, trx->id)) {
+ if (trx_id == trx->id) {
return(trx);
}
@@ -349,12 +349,12 @@ trx_is_active(
ut_ad(mutex_own(&(kernel_mutex)));
- if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) {
+ if (trx_id < trx_list_get_min_trx_id()) {
return(FALSE);
}
- if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
+ if (UNIV_UNLIKELY(trx_id >= trx_sys->max_trx_id)) {
/* There must be corruption: we return TRUE because this
function is only called by lock_clust_rec_some_has_impl()
@@ -393,29 +393,14 @@ trx_sys_get_new_trx_id(void)
Thus trx id values will not overlap when the database is
repeatedly started! */
- if (ut_dulint_get_low(trx_sys->max_trx_id)
- % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
+ if ((ulint) trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) {
trx_sys_flush_max_trx_id();
}
- id = trx_sys->max_trx_id;
-
- UT_DULINT_INC(trx_sys->max_trx_id);
+ id = trx_sys->max_trx_id++;
return(id);
}
-/*****************************************************************//**
-Allocates a new transaction number.
-@return new, allocated trx number */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_no(void)
-/*========================*/
-{
- ut_ad(mutex_own(&kernel_mutex));
-
- return(trx_sys_get_new_trx_id());
-}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h
index 4c0ce392bcd..ab7404c5eff 100644
--- a/storage/xtradb/include/trx0trx.h
+++ b/storage/xtradb/include/trx0trx.h
@@ -44,6 +44,9 @@ extern sess_t* trx_dummy_sess;
/** Number of transactions currently allocated for MySQL: protected by
the kernel mutex */
extern ulint trx_n_mysql_transactions;
+/** Number of transactions currently in the XA PREPARED state: protected by
+the kernel mutex */
+extern ulint trx_n_prepared;
/********************************************************************//**
Releases the search latch if trx has reserved it. */
@@ -108,6 +111,14 @@ trx_free(
/*=====*/
trx_t* trx); /*!< in, own: trx object */
/********************************************************************//**
+At shutdown, frees a transaction object that is in the PREPARED state. */
+UNIV_INTERN
+void
+trx_free_prepared(
+/*==============*/
+ trx_t* trx) /*!< in, own: trx object */
+ UNIV_COLD __attribute__((nonnull));
+/********************************************************************//**
Frees a transaction object for MySQL. */
UNIV_INTERN
void
@@ -214,12 +225,12 @@ trx_recover_for_mysql(
/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
which is in the prepared state
-@return trx or NULL */
+@return trx or NULL; on match, the trx->xid will be invalidated */
UNIV_INTERN
trx_t *
trx_get_trx_by_xid(
/*===============*/
- XID* xid); /*!< in: X/Open XA transaction identification */
+ const XID* xid); /*!< in: X/Open XA transaction identifier */
/**********************************************************************//**
If required, flushes the log to disk if we called trx_commit_for_mysql()
with trx->flush_log_later == TRUE.
@@ -408,30 +419,20 @@ Calculates the "weight" of a transaction. The weight of one transaction
is estimated as the number of altered rows + the number of locked rows.
@param t transaction
@return transaction weight */
-#define TRX_WEIGHT(t) \
- ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
+#define TRX_WEIGHT(t) ((t)->undo_no + UT_LIST_GET_LEN((t)->trx_locks))
/*******************************************************************//**
Compares the "weight" (or size) of two transactions. Transactions that
have edited non-transactional tables are considered heavier than ones
that have not.
-@return <0, 0 or >0; similar to strcmp(3) */
+@return TRUE if weight(a) >= weight(b) */
UNIV_INTERN
-int
-trx_weight_cmp(
-/*===========*/
+ibool
+trx_weight_ge(
+/*==========*/
const trx_t* a, /*!< in: the first transaction to be compared */
const trx_t* b); /*!< in: the second transaction to be compared */
-/*******************************************************************//**
-Retrieves transacion's id, represented as unsigned long long.
-@return transaction's id */
-UNIV_INLINE
-ullint
-trx_get_id(
-/*=======*/
- const trx_t* trx); /*!< in: transaction */
-
/* Maximum length of a string that can be returned by
trx_get_que_state_str(). */
#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */
@@ -480,6 +481,20 @@ struct trx_struct{
of view of concurrency control:
TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
... */
+ /*------------------------------*/
+ /* MySQL has a transaction coordinator to coordinate two phase
+ commit between multiple storage engines and the binary log. When
+ an engine participates in a transaction, it's responsible for
+ registering itself using the trans_register_ha() API. */
+ unsigned is_registered:1;/* This flag is set to 1 after the
+ transaction has been registered with
+ the coordinator using the XA API, and
+ is set to 0 after commit or rollback. */
+ unsigned owns_prepare_mutex:1;/* 1 if owns prepare mutex, if
+ this is set to 1 then registered should
+ also be set to 1. This is used in the
+ XA code */
+ /*------------------------------*/
ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
ulint check_foreigns; /* normally TRUE, but if the user
wants to suppress foreign key checks,
@@ -497,7 +512,6 @@ struct trx_struct{
FALSE, one can save CPU time and about
150 bytes in the undo log size as then
we skip XA steps */
- ulint flush_log_at_trx_commit_session;
ulint flush_log_later;/* In 2PC, we hold the
prepare_commit mutex across
both phases. In that case, we
@@ -511,9 +525,6 @@ struct trx_struct{
in that case we must flush the log
in trx_commit_complete_for_mysql() */
ulint duplicates; /*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
- ulint active_trans; /*!< 1 - if a transaction in MySQL
- is active. 2 - if prepare_commit_mutex
- was taken */
ulint has_search_latch;
/* TRUE if this trx has latched the
search system latch in S-mode */
@@ -556,8 +567,8 @@ struct trx_struct{
max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */
ib_uint64_t commit_lsn; /*!< lsn at the time of the commit */
- trx_id_t table_id; /*!< Table to drop iff dict_operation
- is TRUE, or ut_dulint_zero. */
+ table_id_t table_id; /*!< Table to drop iff dict_operation
+ is TRUE, or 0. */
/*------------------------------*/
void* mysql_thd; /*!< MySQL thread handle corresponding
to this trx, or NULL */
@@ -583,12 +594,6 @@ struct trx_struct{
replication has processed */
const char* mysql_relay_log_file_name;
ib_int64_t mysql_relay_log_pos;
-
- os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated
- with this transaction object */
- ulint mysql_process_no;/* since in Linux, 'top' reports
- process id's and not thread id's, we
- store the process number too */
/*------------------------------*/
ulint n_mysql_tables_in_use; /* number of Innobase tables
used in the processing of the current
diff --git a/storage/xtradb/include/trx0trx.ic b/storage/xtradb/include/trx0trx.ic
index 7332eeece85..4a1d3bcde0b 100644
--- a/storage/xtradb/include/trx0trx.ic
+++ b/storage/xtradb/include/trx0trx.ic
@@ -69,18 +69,6 @@ trx_get_error_info(
}
/*******************************************************************//**
-Retrieves transacion's id, represented as unsigned long long.
-@return transaction's id */
-UNIV_INLINE
-ullint
-trx_get_id(
-/*=======*/
- const trx_t* trx) /*!< in: transaction */
-{
- return((ullint)ut_conv_dulint_to_longlong(trx->id));
-}
-
-/*******************************************************************//**
Retrieves transaction's que state in a human readable string. The string
should not be free()'d or modified.
@return string in the data segment */
diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h
index 40a7256cbfd..a4115b5aca7 100644
--- a/storage/xtradb/include/trx0types.h
+++ b/storage/xtradb/include/trx0types.h
@@ -28,10 +28,7 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0byte.h"
-/** prepare trx_t::id for being printed via printf(3) */
-#define TRX_ID_PREP_PRINTF(id) (ullint) ut_conv_dulint_to_longlong(id)
-
-/** printf(3) format used for printing TRX_ID_PRINTF_PREP() */
+/** printf(3) format used for printing DB_TRX_ID and other system fields */
#define TRX_ID_FMT "%llX"
/** maximum length that a formatted trx_t::id could take, not including
@@ -81,12 +78,14 @@ enum trx_rb_ctx {
in crash recovery */
};
+/** Row identifier (DB_ROW_ID, DATA_ROW_ID) */
+typedef ib_id_t row_id_t;
/** Transaction identifier (DB_TRX_ID, DATA_TRX_ID) */
-typedef dulint trx_id_t;
+typedef ib_id_t trx_id_t;
/** Rollback pointer (DB_ROLL_PTR, DATA_ROLL_PTR) */
-typedef dulint roll_ptr_t;
+typedef ib_id_t roll_ptr_t;
/** Undo number */
-typedef dulint undo_no_t;
+typedef ib_id_t undo_no_t;
/** Transaction savepoint */
typedef struct trx_savept_struct trx_savept_t;
diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h
index a084f2394b5..df16c939070 100644
--- a/storage/xtradb/include/trx0undo.h
+++ b/storage/xtradb/include/trx0undo.h
@@ -262,8 +262,6 @@ UNIV_INTERN
page_t*
trx_undo_set_state_at_finish(
/*=========================*/
- trx_rseg_t* rseg, /*!< in: rollback segment memory object */
- trx_t* trx, /*!< in: transaction */
trx_undo_t* undo, /*!< in: undo log memory copy */
mtr_t* mtr); /*!< in: mtr */
/******************************************************************//**
@@ -298,6 +296,15 @@ void
trx_undo_insert_cleanup(
/*====================*/
trx_t* trx); /*!< in: transaction handle */
+
+/********************************************************************//**
+At shutdown, frees the undo logs of a PREPARED transaction. */
+UNIV_INTERN
+void
+trx_undo_free_prepared(
+/*===================*/
+ trx_t* trx) /*!< in/out: PREPARED transaction */
+ UNIV_COLD __attribute__((nonnull));
#endif /* !UNIV_HOTBACKUP */
/***********************************************************//**
Parses the redo log entry of an undo log page initialization.
@@ -383,7 +390,7 @@ struct trx_undo_struct{
XID xid; /*!< X/Open XA transaction
identification */
ibool dict_operation; /*!< TRUE if a dict operation trx */
- dulint table_id; /*!< if a dict operation, then the table
+ table_id_t table_id; /*!< if a dict operation, then the table
id */
trx_rseg_t* rseg; /*!< rseg where the undo log belongs */
/*-----------------------------*/
diff --git a/storage/xtradb/include/trx0undo.ic b/storage/xtradb/include/trx0undo.ic
index 2d289b34ef1..b81330f7f8b 100644
--- a/storage/xtradb/include/trx0undo.ic
+++ b/storage/xtradb/include/trx0undo.ic
@@ -39,16 +39,19 @@ trx_undo_build_roll_ptr(
ulint page_no, /*!< in: page number */
ulint offset) /*!< in: offset of the undo entry within page */
{
+ roll_ptr_t roll_ptr;
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
- ut_ad(rseg_id < 128);
-
- return(ut_dulint_create(is_insert * 128 * 256 * 256
- + rseg_id * 256 * 256
- + (page_no / 256) / 256,
- (page_no % (256 * 256)) * 256 * 256
- + offset));
+ ut_ad(is_insert == 0 || is_insert == 1);
+ ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+ ut_ad(offset < 65536);
+
+ roll_ptr = (roll_ptr_t) is_insert << 55
+ | (roll_ptr_t) rseg_id << 48
+ | (roll_ptr_t) page_no << 16
+ | offset;
+ return(roll_ptr);
}
/***********************************************************************//**
@@ -64,24 +67,20 @@ trx_undo_decode_roll_ptr(
ulint* offset) /*!< out: offset of the undo
entry within page */
{
- ulint low;
- ulint high;
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
#if TRUE != 1
# error "TRUE != 1"
#endif
- high = ut_dulint_get_high(roll_ptr);
- low = ut_dulint_get_low(roll_ptr);
-
- *offset = low % (256 * 256);
-
- *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */
- *rseg_id = (high / (256 * 256)) % 128;
-
- *page_no = (high % (256 * 256)) * 256 * 256
- + (low / 256) / 256;
+ ut_ad(roll_ptr < (1ULL << 56));
+ *offset = (ulint) roll_ptr & 0xFFFF;
+ roll_ptr >>= 16;
+ *page_no = (ulint) roll_ptr & 0xFFFFFFFF;
+ roll_ptr >>= 32;
+ *rseg_id = (ulint) roll_ptr & 0x7F;
+ roll_ptr >>= 7;
+ *is_insert = (ibool) roll_ptr; /* TRUE==1 */
}
/***********************************************************************//**
@@ -93,16 +92,14 @@ trx_undo_roll_ptr_is_insert(
/*========================*/
roll_ptr_t roll_ptr) /*!< in: roll pointer */
{
- ulint high;
#if DATA_ROLL_PTR_LEN != 7
# error "DATA_ROLL_PTR_LEN != 7"
#endif
#if TRUE != 1
# error "TRUE != 1"
#endif
- high = ut_dulint_get_high(roll_ptr);
-
- return(high / (256 * 256 * 128));
+ ut_ad(roll_ptr < (1ULL << 56));
+ return((ibool) (roll_ptr >> 55));
}
#endif /* !UNIV_HOTBACKUP */
diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i
index 8691e3cf337..17cc21946a1 100644
--- a/storage/xtradb/include/univ.i
+++ b/storage/xtradb/include/univ.i
@@ -44,10 +44,19 @@ Created 1/20/1994 Heikki Tuuri
#include "hb_univ.i"
#endif /* UNIV_HOTBACKUP */
+/* aux macros to convert M into "123" (string) if M is defined like
+#define M 123 */
+#define _IB_TO_STR(s) #s
+#define IB_TO_STR(s) _IB_TO_STR(s)
+
#define INNODB_VERSION_MAJOR 1
-#define INNODB_VERSION_MINOR 0
-#define INNODB_VERSION_BUGFIX 12
-#define PERCONA_INNODB_VERSION 12.1
+#define INNODB_VERSION_MINOR 1
+#define INNODB_VERSION_BUGFIX 8
+
+#ifndef PERCONA_INNODB_VERSION
+#define PERCONA_INNODB_VERSION 20.1
+#endif
+
/* The following is the InnoDB version as shown in
SELECT plugin_version FROM information_schema.plugins;
@@ -58,18 +67,15 @@ component, i.e. we show M.N.P as M.N */
#define INNODB_VERSION_SHORT \
(INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
-/* auxiliary macros to help creating the version as string */
-#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d)
-#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d)
-
-
#define INNODB_VERSION_STR \
- _INNODB_VERSION(INNODB_VERSION_MAJOR, \
- INNODB_VERSION_MINOR, \
- INNODB_VERSION_BUGFIX, \
- PERCONA_INNODB_VERSION)
+ IB_TO_STR(INNODB_VERSION_MAJOR) "." \
+ IB_TO_STR(INNODB_VERSION_MINOR) "." \
+ IB_TO_STR(INNODB_VERSION_BUGFIX) "-" \
+ IB_TO_STR(PERCONA_INNODB_VERSION)
-#define REFMAN "http://dev.mysql.com/doc/refman/5.1/en/"
+#define REFMAN "http://dev.mysql.com/doc/refman/" \
+ IB_TO_STR(MYSQL_MAJOR_VERSION) "." \
+ IB_TO_STR(MYSQL_MINOR_VERSION) "/en/"
#ifdef MYSQL_DYNAMIC_PLUGIN
/* In the dynamic plugin, redefine some externally visible symbols
@@ -81,19 +87,6 @@ the virtual method table (vtable) in GCC 3. */
# define ha_innobase ha_innodb
#endif /* MYSQL_DYNAMIC_PLUGIN */
-/* if any of the following macros is defined at this point this means
-that the code from the "right" plug.in was executed and we do not
-need to include ut0auxconf.h which would either define the same macros
-or will be empty */
-#if !defined(HAVE_IB_GCC_ATOMIC_BUILTINS) \
- && !defined(HAVE_IB_ATOMIC_PTHREAD_T_GCC) \
- && !defined(HAVE_IB_SOLARIS_ATOMICS) \
- && !defined(HAVE_IB_ATOMIC_PTHREAD_T_SOLARIS) \
- && !defined(SIZEOF_PTHREAD_T) \
- && !defined(HAVE_IB_PAUSE_INSTRUCTION)
-# include "ut0auxconf.h"
-#endif
-
#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
# undef __WIN__
# define __WIN__
@@ -118,7 +111,7 @@ if we are compiling on Windows. */
/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */
# include <sys/stat.h>
-# if !defined(__NETWARE__) && !defined(__WIN__)
+# if !defined(__WIN__)
# include <sys/mman.h> /* mmap() for os0proc.c */
# endif
@@ -147,6 +140,23 @@ Sun Studio */
#endif /* #if (defined(WIN32) || ... */
+/* Following defines are to enable performance schema
+instrumentation in each of four InnoDB modules if
+HAVE_PSI_INTERFACE is defined. */
+#ifdef HAVE_PSI_INTERFACE
+# define UNIV_PFS_MUTEX
+# define UNIV_PFS_RWLOCK
+/* For I/O instrumentation, performance schema rely
+on a native descriptor to identify the file, this
+descriptor could conflict with our OS level descriptor.
+Disable IO instrumentation on Windows until this is
+resolved */
+# ifndef __WIN__
+# define UNIV_PFS_IO
+# endif
+# define UNIV_PFS_THREAD
+#endif /* HAVE_PSI_INTERFACE */
+
/* DEBUG VERSION CONTROL
===================== */
@@ -180,14 +190,17 @@ command. Not tested on Windows. */
debugging without UNIV_DEBUG */
#define UNIV_BUF_DEBUG /* Enable buffer pool
debugging without UNIV_DEBUG */
+#define UNIV_BLOB_LIGHT_DEBUG /* Enable off-page column
+ debugging without UNIV_DEBUG */
+#define UNIV_BLOB_NULL_DEBUG /* Enable deep off-page
+ column debugging */
#define UNIV_DEBUG /* Enable ut_ad() assertions
and disable UNIV_INLINE */
#define UNIV_DEBUG_LOCK_VALIDATE /* Enable
ut_ad(lock_rec_validate_page())
assertions. */
-#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access
- (field file_page_was_freed
- in buf_page_t) */
+#define UNIV_DEBUG_FILE_ACCESSES /* Enable freed block access
+ debugging without UNIV_DEBUG */
#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */
#define UNIV_HASH_DEBUG /* debug HASH_ macros */
#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */
@@ -196,6 +209,8 @@ this will break redo log file compatibility, but it may be useful when
debugging redo log application problems. */
#define UNIV_MEM_DEBUG /* detect memory leaks etc */
#define UNIV_IBUF_DEBUG /* debug the insert buffer */
+#define UNIV_BLOB_DEBUG /* track BLOB ownership;
+assumes that no BLOBs survive server restart */
#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer;
this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES,
and the insert buffer must be empty when the database is started */
@@ -214,6 +229,9 @@ operations (very slow); also UNIV_DEBUG must be defined */
for compressed pages */
#define UNIV_ZIP_COPY /* call page_zip_copy_recs()
more often */
+#define UNIV_AIO_DEBUG /* prints info about
+ submitted and reaped AIO
+ requests to the log. */
#endif
#define UNIV_BTR_DEBUG /* check B-tree links */
@@ -237,14 +255,29 @@ by one. */
option off; also some ibuf tests are suppressed */
/* Linkage specifier for non-static InnoDB symbols (variables and functions)
-that are only referenced from within InnoDB, not from MySQL */
-#if defined(__GNUC__) && (__GNUC__ >= 4) || defined(__INTEL_COMPILER)
+that are only referenced from within InnoDB, not from MySQL. We disable the
+GCC visibility directive on all Sun operating systems because there is no
+easy way to get it to work. See http://bugs.mysql.com/bug.php?id=52263. */
+#if defined(__GNUC__) && (__GNUC__ >= 4) && !defined(sun) || defined(__INTEL_COMPILER)
# define UNIV_INTERN __attribute__((visibility ("hidden")))
#else
# define UNIV_INTERN
#endif
+#if defined __GNUC__ && (__GNUC__ > 4 || __GNUC__ == 4 && __GNUC_MINOR__ >= 3)
+/** Starting with GCC 4.3, the "cold" attribute is used to inform the
+compiler that a function is unlikely executed. The function is
+optimized for size rather than speed and on many targets it is placed
+into special subsection of the text section so all cold functions
+appears close together improving code locality of non-cold parts of
+program. The paths leading to call of cold functions within code are
+marked as unlikely by the branch prediction mechanism. optimize a
+rarely invoked function for size instead for speed. */
+# define UNIV_COLD __attribute__((cold))
+#else
+# define UNIV_COLD /* empty */
+#endif
-#if (!defined(UNIV_DEBUG) && !defined(UNIV_MUST_NOT_INLINE))
+#ifndef UNIV_MUST_NOT_INLINE
/* Definition for inline version */
#ifdef __WIN__
@@ -303,6 +336,18 @@ number does not include a terminating '\0'. InnoDB probably can handle
longer names internally */
#define MAX_TABLE_NAME_LEN 192
+/* The maximum length of a database name. Like MAX_TABLE_NAME_LEN this is
+the MySQL's NAME_LEN, see check_and_convert_db_name(). */
+#define MAX_DATABASE_NAME_LEN MAX_TABLE_NAME_LEN
+
+/* MAX_FULL_NAME_LEN defines the full name path including the
+database name and table name. In addition, 14 bytes is added for:
+ 2 for surrounding quotes around table name
+ 1 for the separating dot (.)
+ 9 for the #mysql50# prefix */
+#define MAX_FULL_NAME_LEN \
+ (MAX_TABLE_NAME_LEN + MAX_DATABASE_NAME_LEN + 14)
+
/*
UNIVERSAL TYPE DEFINITIONS
==========================
@@ -360,8 +405,10 @@ typedef unsigned long long int ullint;
/* The 'undefined' value for a ulint */
#define ULINT_UNDEFINED ((ulint)(-1))
+/** The bitmask of 32-bit unsigned integer */
+#define ULINT32_MASK 0xFFFFFFFF
/* The undefined 32-bit unsigned integer */
-#define ULINT32_UNDEFINED 0xFFFFFFFF
+#define ULINT32_UNDEFINED ULINT32_MASK
/* Maximum value for a ulint */
#define ULINT_MAX ((ulint)(-2))
@@ -369,6 +416,12 @@ typedef unsigned long long int ullint;
/* Maximum value for ib_uint64_t */
#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL))
+/** The generic InnoDB system object identifier data type */
+typedef ib_uint64_t ib_id_t;
+
+/* The 'undefined' value for a ullint */
+#define ULLINT_UNDEFINED ((ullint)(-1))
+
/* This 'ibool' type is used within Innobase. Remember that different included
headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */
#define ibool ulint
@@ -418,7 +471,7 @@ it is read or written. */
/* Use sun_prefetch when compile with Sun Studio */
# define UNIV_EXPECT(expr,value) (expr)
# define UNIV_LIKELY_NULL(expr) (expr)
-# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
+# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many((void*) addr)
# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
#else
/* Dummy versions of the macros */
diff --git a/storage/xtradb/include/ut0auxconf.h b/storage/xtradb/include/ut0auxconf.h
deleted file mode 100644
index 16bcc308392..00000000000
--- a/storage/xtradb/include/ut0auxconf.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* Do not remove this file even though it is empty.
-This file is included in univ.i and will cause compilation failure
-if not present.
-A custom checks have been added in the generated
-storage/innobase/Makefile.in that is shipped with the InnoDB Plugin
-source archive. These checks eventually define some macros and put
-them in this file.
-This is a hack that has been developed in order to deploy new compile
-time checks without the need to regenerate the ./configure script that is
-distributed in the MySQL 5.1 official source archives.
-If by any chance Makefile.in and ./configure are regenerated and thus
-the hack from Makefile.in wiped away then the "real" checks from plug.in
-will take over.
-*/
diff --git a/storage/xtradb/include/ut0bh.h b/storage/xtradb/include/ut0bh.h
new file mode 100644
index 00000000000..1b211390283
--- /dev/null
+++ b/storage/xtradb/include/ut0bh.h
@@ -0,0 +1,152 @@
+/***************************************************************************//**
+
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.h
+Binary min-heap interface.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_UT0BH_H
+#define INNOBASE_UT0BH_H
+
+#include "univ.i"
+
+/** Comparison function for objects in the binary heap. */
+typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
+
+typedef struct ib_bh_struct ib_bh_t;
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+ const ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in: instance */
+ ulint i); /*!< in: index */
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ ulint i, /*!< in: index */
+ const void* elem); /*!< in: element to add */
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+ ib_bh_t* ib_bh); /*!< in: instance */
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+ ib_bh_t* ib_bh); /*!< in/out: instance */
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+ ib_bh_cmp_t compare, /*!< in: comparator */
+ ulint sizeof_elem, /*!< in: size of one element */
+ ulint max_elems); /*!< in: max elements allowed */
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+ ib_bh_t* ib_bh); /*!< in,own: instance */
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ const void* elem); /*!< in: element to add */
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+ ib_bh_t* ib_bh); /*!< in/out: instance */
+
+/** Binary heap data structure */
+struct ib_bh_struct {
+ ulint max_elems; /*!< max elements allowed */
+ ulint n_elems; /*!< current size */
+ ulint sizeof_elem; /*!< sizeof element */
+ ib_bh_cmp_t compare; /*!< comparator */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#endif /* INNOBASE_UT0BH_H */
diff --git a/storage/xtradb/include/ut0bh.ic b/storage/xtradb/include/ut0bh.ic
new file mode 100644
index 00000000000..afbe58e7e3b
--- /dev/null
+++ b/storage/xtradb/include/ut0bh.ic
@@ -0,0 +1,125 @@
+/***************************************************************************//**
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.ic
+Binary min-heap implementation.
+
+Created 2011-01-15 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h" /* For ut_memcpy() */
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh->n_elems);
+}
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) == 0);
+}
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+ const ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
+}
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+ ib_bh_t* ib_bh, /*!< in: instance */
+ ulint i) /*!< in: index */
+{
+ byte* ptr = (byte*) (ib_bh + 1);
+
+ ut_a(i < ib_bh_size(ib_bh));
+
+ return(ptr + (ib_bh->sizeof_elem * i));
+}
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+ ib_bh_t* ib_bh, /*!< in/out: instance */
+ ulint i, /*!< in: index */
+ const void* elem) /*!< in: element to add */
+{
+ void* ptr = ib_bh_get(ib_bh, i);
+
+ ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
+
+ return(ptr);
+}
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in: instance */
+{
+ return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
+}
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+ ib_bh_t* ib_bh) /*!< in/out: instance */
+{
+ return(ib_bh_is_empty(ib_bh)
+ ? NULL
+ : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
+}
+
+
diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h
index f55e2888c60..b99d7175b94 100644
--- a/storage/xtradb/include/ut0byte.h
+++ b/storage/xtradb/include/ut0byte.h
@@ -27,145 +27,22 @@ Created 1/20/1994 Heikki Tuuri
#define ut0byte_h
-#include "univ.i"
-
-/** Pair of ulint integers. */
-typedef struct dulint_struct dulint;
-/** Type definition for a 64-bit unsigned integer, which works also
-in 32-bit machines. NOTE! Access the fields only with the accessor
-functions. This definition appears here only for the compiler to
-know the size of a dulint. */
-struct dulint_struct{
- ulint high; /*!< most significant 32 bits */
- ulint low; /*!< least significant 32 bits */
-};
-
-/** Zero value for a dulint */
-extern const dulint ut_dulint_zero;
-/** Maximum value for a dulint */
-extern const dulint ut_dulint_max;
+#include "univ.i"
/*******************************************************//**
-Creates a 64-bit dulint out of two ulints.
+Creates a 64-bit integer out of two 32-bit integers.
@return created dulint */
UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
- ulint high, /*!< in: high-order 32 bits */
- ulint low); /*!< in: low-order 32 bits */
-/*******************************************************//**
-Gets the high-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Gets the low-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
-integer type.
-@return value in ib_int64_t type */
-UNIV_INLINE
-ib_int64_t
-ut_conv_dulint_to_longlong(
-/*=======================*/
- dulint d); /*!< in: dulint */
-/*******************************************************//**
-Tests if a dulint is zero.
-@return TRUE if zero */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- dulint a); /*!< in: dulint */
-/*******************************************************//**
-Compares two dulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Calculates the max of two dulints.
-@return max(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Calculates the min of two dulints.
-@return min(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b); /*!< in: dulint */
-/*******************************************************//**
-Adds a ulint to a dulint.
-@return sum a + b */
-UNIV_INLINE
-dulint
-ut_dulint_add(
+ib_uint64_t
+ut_ull_create(
/*==========*/
- dulint a, /*!< in: dulint */
- ulint b); /*!< in: ulint */
-/*******************************************************//**
-Subtracts a ulint from a dulint.
-@return a - b */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- dulint a, /*!< in: dulint */
- ulint b); /*!< in: ulint, b <= a */
-/*******************************************************//**
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G.
-@return a - b */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- dulint a, /*!< in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b); /*!< in: dulint */
-/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number which must be a
- power of 2 */
-/********************************************************//**
-Rounds a dulint upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no); /*!< in: align by this number which must be a
- power of 2 */
+ ulint high, /*!< in: high-order 32 bits */
+ ulint low) /*!< in: low-order 32 bits */
+ __attribute__((const));
+
/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
+Rounds a 64-bit integer downward to a multiple of a power of 2.
@return rounded value */
UNIV_INLINE
ib_uint64_t
@@ -184,34 +61,6 @@ ut_uint64_align_up(
ib_uint64_t n, /*!< in: number to be rounded */
ulint align_no); /*!< in: align by this number
which must be a power of 2 */
-/*******************************************************//**
-Increments a dulint variable by 1. */
-#define UT_DULINT_INC(D)\
-{\
- if ((D).low == 0xFFFFFFFFUL) {\
- (D).high = (D).high + 1;\
- (D).low = 0;\
- } else {\
- (D).low = (D).low + 1;\
- }\
-}
-/*******************************************************//**
-Tests if two dulints are equal. */
-#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\
- && ((D1).high == (D2).high))
-#ifdef notdefined
-/************************************************************//**
-Sort function for dulint arrays. */
-UNIV_INTERN
-void
-ut_dulint_sort(
-/*===========*/
- dulint* arr, /*!< in/out: array to be sorted */
- dulint* aux_arr,/*!< in/out: auxiliary array (same size as arr) */
- ulint low, /*!< in: low bound of sort interval, inclusive */
- ulint high); /*!< in: high bound of sort interval, noninclusive */
-#endif /* notdefined */
-
/*********************************************************//**
The following function rounds up a pointer to the nearest aligned address.
@return aligned pointer */
diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic
index 3dd51890cb4..e7908efa41a 100644
--- a/storage/xtradb/include/ut0byte.ic
+++ b/storage/xtradb/include/ut0byte.ic
@@ -24,260 +24,22 @@ Created 5/30/1994 Heikki Tuuri
*******************************************************************/
/*******************************************************//**
-Creates a 64-bit dulint out of two ulints.
+Creates a 64-bit integer out of two 32-bit integers.
@return created dulint */
UNIV_INLINE
-dulint
-ut_dulint_create(
-/*=============*/
+ib_uint64_t
+ut_ull_create(
+/*==========*/
ulint high, /*!< in: high-order 32 bits */
ulint low) /*!< in: low-order 32 bits */
{
- dulint res;
-
- ut_ad(high <= 0xFFFFFFFF);
- ut_ad(low <= 0xFFFFFFFF);
-
- res.high = high;
- res.low = low;
-
- return(res);
-}
-
-/*******************************************************//**
-Gets the high-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_high(
-/*===============*/
- dulint d) /*!< in: dulint */
-{
- return(d.high);
-}
-
-/*******************************************************//**
-Gets the low-order 32 bits of a dulint.
-@return 32 bits in ulint */
-UNIV_INLINE
-ulint
-ut_dulint_get_low(
-/*==============*/
- dulint d) /*!< in: dulint */
-{
- return(d.low);
-}
-
-/*******************************************************//**
-Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit
-integer type.
-@return value in ib_int64_t type */
-UNIV_INLINE
-ib_int64_t
-ut_conv_dulint_to_longlong(
-/*=======================*/
- dulint d) /*!< in: dulint */
-{
- return((ib_int64_t)d.low
- + (((ib_int64_t)d.high) << 32));
-}
-
-/*******************************************************//**
-Tests if a dulint is zero.
-@return TRUE if zero */
-UNIV_INLINE
-ibool
-ut_dulint_is_zero(
-/*==============*/
- dulint a) /*!< in: dulint */
-{
- if ((a.low == 0) && (a.high == 0)) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/*******************************************************//**
-Compares two dulints.
-@return -1 if a < b, 0 if a == b, 1 if a > b */
-UNIV_INLINE
-int
-ut_dulint_cmp(
-/*==========*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (a.high > b.high) {
- return(1);
- } else if (a.high < b.high) {
- return(-1);
- } else if (a.low > b.low) {
- return(1);
- } else if (a.low < b.low) {
- return(-1);
- } else {
- return(0);
- }
-}
-
-/*******************************************************//**
-Calculates the max of two dulints.
-@return max(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_max(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(a);
- }
-
- return(b);
-}
-
-/*******************************************************//**
-Calculates the min of two dulints.
-@return min(a, b) */
-UNIV_INLINE
-dulint
-ut_dulint_get_min(
-/*==============*/
- dulint a, /*!< in: dulint */
- dulint b) /*!< in: dulint */
-{
- if (ut_dulint_cmp(a, b) > 0) {
-
- return(b);
- }
-
- return(a);
-}
-
-/*******************************************************//**
-Adds a ulint to a dulint.
-@return sum a + b */
-UNIV_INLINE
-dulint
-ut_dulint_add(
-/*==========*/
- dulint a, /*!< in: dulint */
- ulint b) /*!< in: ulint */
-{
- if (0xFFFFFFFFUL - b >= a.low) {
- a.low += b;
-
- return(a);
- }
-
- a.low = a.low - (0xFFFFFFFFUL - b) - 1;
-
- a.high++;
-
- return(a);
-}
-
-/*******************************************************//**
-Subtracts a ulint from a dulint.
-@return a - b */
-UNIV_INLINE
-dulint
-ut_dulint_subtract(
-/*===============*/
- dulint a, /*!< in: dulint */
- ulint b) /*!< in: ulint, b <= a */
-{
- if (a.low >= b) {
- a.low -= b;
-
- return(a);
- }
-
- b -= a.low + 1;
-
- a.low = 0xFFFFFFFFUL - b;
-
- ut_ad(a.high > 0);
-
- a.high--;
-
- return(a);
-}
-
-/*******************************************************//**
-Subtracts a dulint from another. NOTE that the difference must be positive
-and smaller that 4G.
-@return a - b */
-UNIV_INLINE
-ulint
-ut_dulint_minus(
-/*============*/
- dulint a, /*!< in: dulint; NOTE a must be >= b and at most
- 2 to power 32 - 1 greater */
- dulint b) /*!< in: dulint */
-{
- ulint diff;
-
- if (a.high == b.high) {
- ut_ad(a.low >= b.low);
-
- return(a.low - b.low);
- }
-
- ut_ad(a.high == b.high + 1);
-
- diff = (ulint)(0xFFFFFFFFUL - b.low);
- diff += 1 + a.low;
-
- ut_ad(diff > a.low);
-
- return(diff);
-}
-
-/********************************************************//**
-Rounds a dulint downward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_down(
-/*=================*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number which must be a
- power of 2 */
-{
- ulint low, high;
-
- ut_ad(align_no > 0);
- ut_ad(((align_no - 1) & align_no) == 0);
-
- low = ut_dulint_get_low(n);
- high = ut_dulint_get_high(n);
-
- low = low & ~(align_no - 1);
-
- return(ut_dulint_create(high, low));
-}
-
-/********************************************************//**
-Rounds a dulint upward to a multiple of a power of 2.
-@return rounded value */
-UNIV_INLINE
-dulint
-ut_dulint_align_up(
-/*===============*/
- dulint n, /*!< in: number to be rounded */
- ulint align_no) /*!< in: align by this number which must be a
- power of 2 */
-{
- return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no));
+ ut_ad(high <= ULINT32_MASK);
+ ut_ad(low <= ULINT32_MASK);
+ return(((ib_uint64_t) high) << 32 | low);
}
/********************************************************//**
-Rounds ib_uint64_t downward to a multiple of a power of 2.
+Rounds a 64-bit integer downward to a multiple of a power of 2.
@return rounded value */
UNIV_INLINE
ib_uint64_t
diff --git a/storage/xtradb/include/ut0dbg.h b/storage/xtradb/include/ut0dbg.h
index 78b525c38ab..07730176d81 100644
--- a/storage/xtradb/include/ut0dbg.h
+++ b/storage/xtradb/include/ut0dbg.h
@@ -50,33 +50,23 @@ UNIV_INTERN
void
ut_dbg_assertion_failed(
/*====================*/
- const char* expr, /*!< in: the failed assertion */
- const char* file, /*!< in: source file containing the assertion */
- ulint line); /*!< in: line number of the assertion */
-
-#ifdef __NETWARE__
-/** Flag for ignoring further assertion failures. This is set to TRUE
-when on NetWare there happens an InnoDB assertion failure or other
-fatal error condition that requires an immediate shutdown. */
-extern ibool panic_shutdown;
-/* Abort the execution. */
-void ut_dbg_panic(void);
-# define UT_DBG_PANIC ut_dbg_panic()
-/* Stop threads in ut_a(). */
-# define UT_DBG_STOP do {} while (0) /* We do not do this on NetWare */
-#else /* __NETWARE__ */
-# if defined(__WIN__) || defined(__INTEL_COMPILER)
-# undef UT_DBG_USE_ABORT
-# elif defined(__GNUC__) && (__GNUC__ > 2)
-# define UT_DBG_USE_ABORT
-# endif
-
-# ifndef UT_DBG_USE_ABORT
+ const char* expr, /*!< in: the failed assertion */
+ const char* file, /*!< in: source file containing the assertion */
+ ulint line) /*!< in: line number of the assertion */
+ UNIV_COLD __attribute__((nonnull(2)));
+
+#if defined(__WIN__) || defined(__INTEL_COMPILER)
+# undef UT_DBG_USE_ABORT
+#elif defined(__GNUC__) && (__GNUC__ > 2)
+# define UT_DBG_USE_ABORT
+#endif
+
+#ifndef UT_DBG_USE_ABORT
/** A null pointer that will be dereferenced to trigger a memory trap */
extern ulint* ut_dbg_null_ptr;
-# endif
+#endif
-# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
+#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT)
/** If this is set to TRUE by ut_dbg_assertion_failed(), all threads
will stop at the next ut_a() or ut_ad(). */
extern ibool ut_dbg_stop_threads;
@@ -89,24 +79,23 @@ ut_dbg_stop_thread(
/*===============*/
const char* file,
ulint line);
-# endif
+#endif
-# ifdef UT_DBG_USE_ABORT
+#ifdef UT_DBG_USE_ABORT
/** Abort the execution. */
-# define UT_DBG_PANIC abort()
+# define UT_DBG_PANIC abort()
/** Stop threads (null operation) */
-# define UT_DBG_STOP do {} while (0)
-# else /* UT_DBG_USE_ABORT */
+# define UT_DBG_STOP do {} while (0)
+#else /* UT_DBG_USE_ABORT */
/** Abort the execution. */
-# define UT_DBG_PANIC \
+# define UT_DBG_PANIC \
if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL
/** Stop threads in ut_a(). */
-# define UT_DBG_STOP do \
+# define UT_DBG_STOP do \
if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \
ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \
} while (0)
-# endif /* UT_DBG_USE_ABORT */
-#endif /* __NETWARE__ */
+#endif /* UT_DBG_USE_ABORT */
/** Abort execution if EXPR does not evaluate to nonzero.
@param EXPR assertion expression that should hold */
diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h
index 245dfc226c3..49756bc9f13 100644
--- a/storage/xtradb/include/ut0lst.h
+++ b/storage/xtradb/include/ut0lst.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -110,7 +110,7 @@ Adds the node as the last element in a two-way linked list.
*/
#define UT_LIST_ADD_LAST(NAME, BASE, N)\
{\
- ut_ad(N);\
+ ut_ad(N != NULL);\
((BASE).count)++;\
((N)->NAME).prev = (BASE).end;\
((N)->NAME).next = NULL;\
@@ -257,48 +257,5 @@ do { \
ut_a(ut_list_node_313 == NULL); \
} while (0)
-/********************************************************************//**
-Align nodes with moving location.
-@param NAME the name of the list
-@param TYPE node type
-@param BASE base node (not a pointer to it)
-@param OFFSET offset moved */
-#define UT_LIST_OFFSET(NAME, TYPE, BASE, FADDR, FOFFSET, BOFFSET) \
-do { \
- ulint ut_list_i_313; \
- TYPE* ut_list_node_313; \
- \
- if ((BASE).start) \
- (BASE).start = (void*)((byte*)((BASE).start) \
- + (((void*)((BASE).start) > (void*)FADDR)?FOFFSET:BOFFSET));\
- if ((BASE).end) \
- (BASE).end = (void*)((byte*)((BASE).end) \
- + (((void*)((BASE).end) > (void*)FADDR)?FOFFSET:BOFFSET));\
- \
- ut_list_node_313 = (BASE).start; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- if ((ut_list_node_313->NAME).prev) \
- (ut_list_node_313->NAME).prev = (void*)((byte*)((ut_list_node_313->NAME).prev)\
- + (((void*)((ut_list_node_313->NAME).prev) > (void*)FADDR)?FOFFSET:BOFFSET));\
- if ((ut_list_node_313->NAME).next) \
- (ut_list_node_313->NAME).next = (void*)((byte*)((ut_list_node_313->NAME).next)\
- + (((void*)((ut_list_node_313->NAME).next)> (void*)FADDR)?FOFFSET:BOFFSET));\
- ut_list_node_313 = (ut_list_node_313->NAME).next; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
- \
- ut_list_node_313 = (BASE).end; \
- \
- for (ut_list_i_313 = (BASE).count; ut_list_i_313--; ) { \
- ut_a(ut_list_node_313); \
- ut_list_node_313 = (ut_list_node_313->NAME).prev; \
- } \
- \
- ut_a(ut_list_node_313 == NULL); \
-} while (0)
-
#endif
diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h
index f14606be966..57dfb08f41c 100644
--- a/storage/xtradb/include/ut0mem.h
+++ b/storage/xtradb/include/ut0mem.h
@@ -119,7 +119,7 @@ UNIV_INTERN
void
ut_free(
/*====*/
- void* ptr); /*!< in, own: memory block */
+ void* ptr); /*!< in, own: memory block, can be NULL */
#ifndef UNIV_HOTBACKUP
/**********************************************************************//**
Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not
diff --git a/storage/xtradb/include/ut0rbt.h b/storage/xtradb/include/ut0rbt.h
index 6fd050acfe7..e26b637ae13 100644
--- a/storage/xtradb/include/ut0rbt.h
+++ b/storage/xtradb/include/ut0rbt.h
@@ -1,5 +1,12 @@
-/*****************************************************************************
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+/***************************************************************************//**
+
+Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -14,13 +21,12 @@ this program; if not, write to the Free Software Foundation, Inc., 59 Temple
Place, Suite 330, Boston, MA 02111-1307 USA
*****************************************************************************/
-
-/*******************************************************************//**
+/******************************************************************//**
@file include/ut0rbt.h
-Red-Black tree implementation.
+Various utilities
Created 2007-03-20 Sunny Bains
-************************************************************************/
+*******************************************************/
#ifndef INNOBASE_UT0RBT_H
#define INNOBASE_UT0RBT_H
@@ -52,7 +58,7 @@ typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
-/* Red black tree color types */
+/** Red black tree color types */
enum ib_rbt_color_enum {
IB_RBT_RED,
IB_RBT_BLACK
@@ -60,7 +66,7 @@ enum ib_rbt_color_enum {
typedef enum ib_rbt_color_enum ib_rbt_color_t;
-/* Red black tree node */
+/** Red black tree node */
struct ib_rbt_node_struct {
ib_rbt_color_t color; /* color of this node */
@@ -71,7 +77,7 @@ struct ib_rbt_node_struct {
char value[1]; /* Data value */
};
-/* Red black tree instance.*/
+/** Red black tree instance.*/
struct ib_rbt_struct {
ib_rbt_node_t* nil; /* Black colored node that is
used as a sentinel. This is
@@ -87,7 +93,7 @@ struct ib_rbt_struct {
ulint sizeof_value; /* Sizeof the item in bytes */
};
-/* The result of searching for a key in the tree, this is useful for
+/** The result of searching for a key in the tree, this is useful for
a speedy lookup and insert if key doesn't exist.*/
struct ib_rbt_bound_struct {
const ib_rbt_node_t*
@@ -110,131 +116,132 @@ struct ib_rbt_bound_struct {
/* Compare a key with the node value (t is tree, k is key, n is node)*/
#define rbt_compare(t, k, n) (t->compare(k, n->value))
-/****************************************************************//**
+/**********************************************************************//**
Free an instance of a red black tree */
UNIV_INTERN
void
rbt_free(
/*=====*/
- ib_rbt_t* tree); /*!< in: rb tree to free */
-/****************************************************************//**
+ ib_rbt_t* tree); /*!< in: rb tree to free */
+/**********************************************************************//**
Create an instance of a red black tree
@return rb tree instance */
UNIV_INTERN
ib_rbt_t*
rbt_create(
/*=======*/
- size_t sizeof_value, /*!< in: size in bytes */
- ib_rbt_compare compare); /*!< in: comparator */
-/****************************************************************//**
-Delete a node from the red black tree, identified by key.
-@return TRUE if success FALSE if not found */
+ size_t sizeof_value, /*!< in: size in bytes */
+ ib_rbt_compare compare); /*!< in: comparator */
+/**********************************************************************//**
+Delete a node from the red black tree, identified by key */
UNIV_INTERN
ibool
rbt_delete(
/*=======*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to delete */
-/****************************************************************//**
-Remove a node from the rb tree, the node is not free'd, that is the
-callers responsibility.
+ /* in: TRUE on success */
+ ib_rbt_t* tree, /* in: rb tree */
+ const void* key); /* in: key to delete */
+/**********************************************************************//**
+Remove a node from the red black tree, NOTE: This function will not delete
+the node instance, THAT IS THE CALLERS RESPONSIBILITY.
@return the deleted node with the const. */
UNIV_INTERN
ib_rbt_node_t*
rbt_remove_node(
/*============*/
- ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_t* tree, /*!< in: rb tree */
const ib_rbt_node_t*
- node); /*!< in: node to delete, this
- is a fudge and declared const
- because the caller has access
- only to const nodes.*/
-/****************************************************************//**
-Find a matching node in the rb tree.
+ node); /*!< in: node to delete, this
+ is a fudge and declared const
+ because the caller has access
+ only to const nodes.*/
+/**********************************************************************//**
+Return a node from the red black tree, identified by
+key, NULL if not found
@return node if found else return NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lookup(
/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree to search */
- const void* key); /*!< in: key to lookup */
-/****************************************************************//**
-Generic insert of a value in the rb tree.
+ const ib_rbt_t* tree, /*!< in: rb tree to search */
+ const void* key); /*!< in: key to lookup */
+/**********************************************************************//**
+Add data to the red black tree, identified by key (no dups yet!)
@return inserted node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_insert(
/*=======*/
- ib_rbt_t* tree, /*!< in: rb tree */
- const void* key, /*!< in: key for ordering */
- const void* value); /*!< in: data that will be
- copied to the node.*/
-/****************************************************************//**
+ ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key, /*!< in: key for ordering */
+ const void* value); /*!< in: data that will be
+ copied to the node.*/
+/**********************************************************************//**
Add a new node to the tree, useful for data that is pre-sorted.
@return appended node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_add_node(
/*=========*/
- ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: parent */
- const void* value); /*!< in: this value is copied
- to the node */
-/****************************************************************//**
+ ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: parent */
+ const void* value); /*!< in: this value is copied
+ to the node */
+/**********************************************************************//**
Return the left most data node in the tree
@return left most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_first(
/*======*/
- const ib_rbt_t* tree); /*!< in: rb tree */
-/****************************************************************//**
+ const ib_rbt_t* tree); /*!< in: rb tree */
+/**********************************************************************//**
Return the right most data node in the tree
@return right most node */
UNIV_INTERN
const ib_rbt_node_t*
rbt_last(
/*=====*/
- const ib_rbt_t* tree); /*!< in: rb tree */
-/****************************************************************//**
+ const ib_rbt_t* tree); /*!< in: rb tree */
+/**********************************************************************//**
Return the next node from current.
@return successor node to current that is passed in. */
UNIV_INTERN
const ib_rbt_node_t*
rbt_next(
/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* /*!< in: current node */
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* /* in: current node */
current);
-/****************************************************************//**
+/**********************************************************************//**
Return the prev node from current.
@return precedessor node to current that is passed in */
UNIV_INTERN
const ib_rbt_node_t*
rbt_prev(
/*=====*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const ib_rbt_node_t* /*!< in: current node */
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const ib_rbt_node_t* /* in: current node */
current);
-/****************************************************************//**
+/**********************************************************************//**
Find the node that has the lowest key that is >= key.
@return node that satisfies the lower bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_lower_bound(
/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/****************************************************************//**
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key); /*!< in: key to search */
+/**********************************************************************//**
Find the node that has the greatest key that is <= key.
@return node that satisifies the upper bound constraint or NULL */
UNIV_INTERN
const ib_rbt_node_t*
rbt_upper_bound(
/*============*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- const void* key); /*!< in: key to search */
-/****************************************************************//**
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ const void* key); /*!< in: key to search */
+/**********************************************************************//**
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@@ -243,10 +250,10 @@ UNIV_INTERN
int
rbt_search(
/*=======*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key); /*!< in: key to search */
-/****************************************************************//**
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key); /*!< in: key to search */
+/**********************************************************************//**
Search for the key, a node will be retuned in parent.last, whether it
was found or not. If not found then parent.last will contain the
parent node for the possibly new key otherwise the matching node.
@@ -255,27 +262,27 @@ UNIV_INTERN
int
rbt_search_cmp(
/*===========*/
- const ib_rbt_t* tree, /*!< in: rb tree */
- ib_rbt_bound_t* parent, /*!< in: search bounds */
- const void* key, /*!< in: key to search */
- ib_rbt_compare compare); /*!< in: comparator */
-/****************************************************************//**
+ const ib_rbt_t* tree, /*!< in: rb tree */
+ ib_rbt_bound_t* parent, /*!< in: search bounds */
+ const void* key, /*!< in: key to search */
+ ib_rbt_compare compare); /*!< in: comparator */
+/**********************************************************************//**
Clear the tree, deletes (and free's) all the nodes. */
UNIV_INTERN
void
rbt_clear(
/*======*/
- ib_rbt_t* tree); /*!< in: rb tree */
-/****************************************************************//**
+ ib_rbt_t* tree); /*!< in: rb tree */
+/**********************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
@return no. of recs merged */
UNIV_INTERN
ulint
rbt_merge_uniq(
/*===========*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- const ib_rbt_t* src); /*!< in: src rb tree */
-/****************************************************************//**
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ const ib_rbt_t* src); /*!< in: src rb tree */
+/**********************************************************************//**
Merge the node from dst into src. Return the number of nodes merged.
Delete the nodes from src after copying node to dst. As a side effect
the duplicates will be left untouched in the src, since we don't support
@@ -286,9 +293,9 @@ UNIV_INTERN
ulint
rbt_merge_uniq_destructive(
/*=======================*/
- ib_rbt_t* dst, /*!< in: dst rb tree */
- ib_rbt_t* src); /*!< in: src rb tree */
-/****************************************************************//**
+ ib_rbt_t* dst, /*!< in: dst rb tree */
+ ib_rbt_t* src); /*!< in: src rb tree */
+/**********************************************************************//**
Verify the integrity of the RB tree. For debugging. 0 failure else height
of tree (in count of black nodes).
@return TRUE if OK FALSE if tree invalid. */
@@ -296,14 +303,14 @@ UNIV_INTERN
ibool
rbt_validate(
/*=========*/
- const ib_rbt_t* tree); /*!< in: tree to validate */
-/****************************************************************//**
+ const ib_rbt_t* tree); /*!< in: tree to validate */
+/**********************************************************************//**
Iterate over the tree in depth first order. */
UNIV_INTERN
void
rbt_print(
/*======*/
- const ib_rbt_t* tree, /*!< in: tree to traverse */
- ib_rbt_print_node print); /*!< in: print function */
+ const ib_rbt_t* tree, /*!< in: tree to traverse */
+ ib_rbt_print_node print); /*!< in: print function */
#endif /* INNOBASE_UT0RBT_H */
diff --git a/storage/xtradb/include/ut0rnd.h b/storage/xtradb/include/ut0rnd.h
index ad55df40abc..9c53101198d 100644
--- a/storage/xtradb/include/ut0rnd.h
+++ b/storage/xtradb/include/ut0rnd.h
@@ -97,13 +97,13 @@ ut_fold_ulint_pair(
ulint n2) /*!< in: ulint */
__attribute__((const));
/*************************************************************//**
-Folds a dulint.
+Folds a 64-bit integer.
@return folded value */
UNIV_INLINE
ulint
-ut_fold_dulint(
-/*===========*/
- dulint d) /*!< in: dulint */
+ut_fold_ull(
+/*========*/
+ ib_uint64_t d) /*!< in: 64-bit integer */
__attribute__((const));
/*************************************************************//**
Folds a character string ending in the null character.
diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic
index c2043660efd..2c8c959d804 100644
--- a/storage/xtradb/include/ut0rnd.ic
+++ b/storage/xtradb/include/ut0rnd.ic
@@ -85,9 +85,6 @@ ut_rnd_gen_ulint(void)
/*==================*/
{
ulint rnd;
- ulint n_bits;
-
- n_bits = 8 * sizeof(ulint);
ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2;
@@ -173,16 +170,16 @@ ut_fold_ulint_pair(
}
/*************************************************************//**
-Folds a dulint.
+Folds a 64-bit integer.
@return folded value */
UNIV_INLINE
ulint
-ut_fold_dulint(
-/*===========*/
- dulint d) /*!< in: dulint */
+ut_fold_ull(
+/*========*/
+ ib_uint64_t d) /*!< in: 64-bit integer */
{
- return(ut_fold_ulint_pair(ut_dulint_get_low(d),
- ut_dulint_get_high(d)));
+ return(ut_fold_ulint_pair((ulint) d & ULINT32_MASK,
+ (ulint) (d >> 32)));
}
/*************************************************************//**
diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h
index 197b8401428..cad39e9a34f 100644
--- a/storage/xtradb/include/ut0ut.h
+++ b/storage/xtradb/include/ut0ut.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
Copyright (c) 2009, Sun Microsystems, Inc.
Portions of this file contain modifications contributed and copyrighted by
@@ -35,6 +35,8 @@ Created 1/20/1994 Heikki Tuuri
#include "univ.i"
+#include "db0err.h"
+
#ifndef UNIV_HOTBACKUP
# include "os0sync.h" /* for HAVE_ATOMIC_BUILTINS */
#endif /* UNIV_HOTBACKUP */
@@ -53,24 +55,24 @@ Created 1/20/1994 Heikki Tuuri
typedef time_t ib_time_t;
#ifndef UNIV_HOTBACKUP
-#if defined(HAVE_IB_PAUSE_INSTRUCTION)
-# ifdef WIN32
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor.*/
-# define UT_RELAX_CPU() YieldProcessor()
-# else
- /* According to the gcc info page, asm volatile means that the
- instruction has important side-effects and must not be removed.
- Also asm volatile may trigger a memory barrier (spilling all registers
- to memory). */
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif
+#if defined(HAVE_PAUSE_INSTRUCTION)
+ /* According to the gcc info page, asm volatile means that the
+ instruction has important side-effects and must not be removed.
+ Also asm volatile may trigger a memory barrier (spilling all registers
+ to memory). */
+# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
+#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
+# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
#elif defined(HAVE_ATOMIC_BUILTINS)
# define UT_RELAX_CPU() do { \
volatile lint volatile_var; \
os_compare_and_swap_lint(&volatile_var, 0, 1); \
} while (0)
+#elif defined(HAVE_WINDOWS_ATOMICS)
+ /* In the Win32 API, the x86 PAUSE instruction is executed by calling
+ the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
+ independent way by using YieldProcessor. */
+# define UT_RELAX_CPU() YieldProcessor()
#else
# define UT_RELAX_CPU() ((void)0) /* avoid warning for an empty statement */
#endif
@@ -273,7 +275,8 @@ UNIV_INTERN
void
ut_print_timestamp(
/*===============*/
- FILE* file); /*!< in: file where to print */
+ FILE* file) /*!< in: file where to print */
+ UNIV_COLD __attribute__((nonnull));
/**********************************************************//**
Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */
UNIV_INTERN
@@ -395,6 +398,16 @@ a limited buffer. */
# define ut_snprintf snprintf
#endif /* __WIN__ */
+/*************************************************************//**
+Convert an error number to a human readable text message. The
+returned string is static and should not be freed or modified.
+@return string, describing the error */
+UNIV_INTERN
+const char*
+ut_strerr(
+/*======*/
+ enum db_err num); /*!< in: error number */
+
#ifndef UNIV_NONINL
#include "ut0ut.ic"
#endif
diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h
index a770f671cfc..0f8b955b098 100644
--- a/storage/xtradb/include/ut0vec.h
+++ b/storage/xtradb/include/ut0vec.h
@@ -94,6 +94,25 @@ ib_vector_get(
ulint n); /*!< in: element index to get */
/****************************************************************//**
+Get last element. The vector must not be empty.
+@return last element */
+UNIV_INLINE
+void*
+ib_vector_get_last(
+/*===============*/
+ ib_vector_t* vec); /*!< in: vector */
+
+/****************************************************************//**
+Set the n'th element. */
+UNIV_INLINE
+void
+ib_vector_set(
+/*==========*/
+ ib_vector_t* vec, /*!< in/out: vector */
+ ulint n, /*!< in: element index to set */
+ void* elem); /*!< in: data element */
+
+/****************************************************************//**
Remove the last element from the vector. */
UNIV_INLINE
void*
diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic
index 02e881f9bca..34c858868ce 100644
--- a/storage/xtradb/include/ut0vec.ic
+++ b/storage/xtradb/include/ut0vec.ic
@@ -51,6 +51,35 @@ ib_vector_get(
}
/****************************************************************//**
+Get last element. The vector must not be empty.
+@return last element */
+UNIV_INLINE
+void*
+ib_vector_get_last(
+/*===============*/
+ ib_vector_t* vec) /*!< in: vector */
+{
+ ut_a(vec->used > 0);
+
+ return(vec->data[vec->used - 1]);
+}
+
+/****************************************************************//**
+Set the n'th element. */
+UNIV_INLINE
+void
+ib_vector_set(
+/*==========*/
+ ib_vector_t* vec, /*!< in/out: vector */
+ ulint n, /*!< in: element index to set */
+ void* elem) /*!< in: data element */
+{
+ ut_a(n < vec->used);
+
+ vec->data[n] = elem;
+}
+
+/****************************************************************//**
Remove the last element from the vector.
@return last vector element */
UNIV_INLINE