summaryrefslogtreecommitdiff
path: root/storage/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'storage/innobase/include')
-rw-r--r--storage/innobase/include/btr0btr.h82
-rw-r--r--storage/innobase/include/btr0btr.ic26
-rw-r--r--storage/innobase/include/btr0bulk.h10
-rw-r--r--storage/innobase/include/btr0cur.h65
-rw-r--r--storage/innobase/include/btr0cur.ic25
-rw-r--r--storage/innobase/include/btr0pcur.h41
-rw-r--r--storage/innobase/include/btr0pcur.ic74
-rw-r--r--storage/innobase/include/btr0sea.h145
-rw-r--r--storage/innobase/include/btr0sea.ic112
-rw-r--r--storage/innobase/include/buf0buddy.h10
-rw-r--r--storage/innobase/include/buf0buddy.ic16
-rw-r--r--storage/innobase/include/buf0buf.h32
-rw-r--r--storage/innobase/include/buf0buf.ic25
-rw-r--r--storage/innobase/include/buf0checksum.h2
-rw-r--r--storage/innobase/include/buf0dblwr.h4
-rw-r--r--storage/innobase/include/buf0flu.h77
-rw-r--r--storage/innobase/include/buf0lru.h1
-rw-r--r--storage/innobase/include/buf0mtflu.h95
-rw-r--r--storage/innobase/include/buf0types.h4
-rw-r--r--storage/innobase/include/data0data.h25
-rw-r--r--storage/innobase/include/data0data.ic3
-rw-r--r--storage/innobase/include/data0type.h34
-rw-r--r--storage/innobase/include/data0type.ic26
-rw-r--r--storage/innobase/include/dict0boot.h2
-rw-r--r--storage/innobase/include/dict0boot.ic10
-rw-r--r--storage/innobase/include/dict0crea.h24
-rw-r--r--storage/innobase/include/dict0dict.h196
-rw-r--r--storage/innobase/include/dict0dict.ic167
-rw-r--r--storage/innobase/include/dict0load.h17
-rw-r--r--storage/innobase/include/dict0mem.h371
-rw-r--r--storage/innobase/include/dict0mem.ic7
-rw-r--r--storage/innobase/include/dict0stats_bg.h13
-rw-r--r--storage/innobase/include/dict0types.h38
-rw-r--r--storage/innobase/include/dyn0buf.h61
-rw-r--r--storage/innobase/include/fil0crypt.h4
-rw-r--r--storage/innobase/include/fil0fil.h539
-rw-r--r--storage/innobase/include/fil0fil.ic2
-rw-r--r--storage/innobase/include/fsp0file.h11
-rw-r--r--storage/innobase/include/fsp0fsp.h91
-rw-r--r--storage/innobase/include/fsp0fsp.ic24
-rw-r--r--storage/innobase/include/fsp0sysspace.h14
-rw-r--r--storage/innobase/include/fsp0types.h42
-rw-r--r--storage/innobase/include/fts0fts.h75
-rw-r--r--storage/innobase/include/fts0priv.h1
-rw-r--r--storage/innobase/include/fts0tokenize.h2
-rw-r--r--storage/innobase/include/fts0types.ic7
-rw-r--r--storage/innobase/include/fut0fut.ic2
-rw-r--r--storage/innobase/include/fut0lst.ic4
-rw-r--r--storage/innobase/include/gis0rtree.h22
-rw-r--r--storage/innobase/include/gis0rtree.ic2
-rw-r--r--storage/innobase/include/ha_prototypes.h27
-rw-r--r--storage/innobase/include/handler0alter.h8
-rw-r--r--storage/innobase/include/ib0mutex.h83
-rw-r--r--storage/innobase/include/ibuf0ibuf.h28
-rw-r--r--storage/innobase/include/ibuf0ibuf.ic8
-rw-r--r--storage/innobase/include/lock0lock.h146
-rw-r--r--storage/innobase/include/lock0lock.ic8
-rw-r--r--storage/innobase/include/lock0prdt.h5
-rw-r--r--storage/innobase/include/lock0types.h3
-rw-r--r--storage/innobase/include/log0log.h335
-rw-r--r--storage/innobase/include/log0log.ic56
-rw-r--r--storage/innobase/include/log0recv.h23
-rw-r--r--storage/innobase/include/mem0mem.h65
-rw-r--r--storage/innobase/include/mem0mem.ic22
-rw-r--r--storage/innobase/include/mtr0log.ic2
-rw-r--r--storage/innobase/include/mtr0mtr.h78
-rw-r--r--storage/innobase/include/mtr0types.h14
-rw-r--r--storage/innobase/include/os0event.h6
-rw-r--r--storage/innobase/include/os0file.h38
-rw-r--r--storage/innobase/include/os0file.ic88
-rw-r--r--storage/innobase/include/os0once.h3
-rw-r--r--storage/innobase/include/os0thread.h12
-rw-r--r--storage/innobase/include/page0cur.h5
-rw-r--r--storage/innobase/include/page0cur.ic7
-rw-r--r--storage/innobase/include/page0page.h167
-rw-r--r--storage/innobase/include/page0page.ic148
-rw-r--r--storage/innobase/include/page0size.h2
-rw-r--r--storage/innobase/include/page0zip.h41
-rw-r--r--storage/innobase/include/page0zip.ic8
-rw-r--r--storage/innobase/include/pars0pars.h2
-rw-r--r--storage/innobase/include/que0que.h18
-rw-r--r--storage/innobase/include/read0read.h125
-rw-r--r--storage/innobase/include/read0types.h338
-rw-r--r--storage/innobase/include/rem0rec.h470
-rw-r--r--storage/innobase/include/rem0rec.ic488
-rw-r--r--storage/innobase/include/rem0types.h5
-rw-r--r--storage/innobase/include/row0ftsort.h18
-rw-r--r--storage/innobase/include/row0import.h24
-rw-r--r--storage/innobase/include/row0ins.h3
-rw-r--r--storage/innobase/include/row0log.h14
-rw-r--r--storage/innobase/include/row0merge.h30
-rw-r--r--storage/innobase/include/row0mysql.h35
-rw-r--r--storage/innobase/include/row0purge.h2
-rw-r--r--storage/innobase/include/row0row.h19
-rw-r--r--storage/innobase/include/row0row.ic5
-rw-r--r--storage/innobase/include/row0sel.h3
-rw-r--r--storage/innobase/include/row0trunc.h33
-rw-r--r--storage/innobase/include/row0undo.h2
-rw-r--r--storage/innobase/include/row0upd.h85
-rw-r--r--storage/innobase/include/row0upd.ic5
-rw-r--r--storage/innobase/include/row0vers.h23
-rw-r--r--storage/innobase/include/srv0conc.h5
-rw-r--r--storage/innobase/include/srv0mon.h30
-rw-r--r--storage/innobase/include/srv0srv.h82
-rw-r--r--storage/innobase/include/srv0start.h14
-rw-r--r--storage/innobase/include/sync0arr.h15
-rw-r--r--storage/innobase/include/sync0policy.h36
-rw-r--r--storage/innobase/include/sync0policy.ic4
-rw-r--r--storage/innobase/include/sync0rw.h16
-rw-r--r--storage/innobase/include/sync0rw.ic161
-rw-r--r--storage/innobase/include/sync0sync.h3
-rw-r--r--storage/innobase/include/sync0types.h144
-rw-r--r--storage/innobase/include/trx0i_s.h4
-rw-r--r--storage/innobase/include/trx0purge.h348
-rw-r--r--storage/innobase/include/trx0purge.ic21
-rw-r--r--storage/innobase/include/trx0rec.h54
-rw-r--r--storage/innobase/include/trx0rec.ic33
-rw-r--r--storage/innobase/include/trx0roll.h25
-rw-r--r--storage/innobase/include/trx0roll.ic62
-rw-r--r--storage/innobase/include/trx0rseg.h195
-rw-r--r--storage/innobase/include/trx0rseg.ic48
-rw-r--r--storage/innobase/include/trx0sys.h1370
-rw-r--r--storage/innobase/include/trx0sys.ic464
-rw-r--r--storage/innobase/include/trx0trx.h446
-rw-r--r--storage/innobase/include/trx0trx.ic47
-rw-r--r--storage/innobase/include/trx0types.h58
-rw-r--r--storage/innobase/include/trx0undo.h207
-rw-r--r--storage/innobase/include/trx0undo.ic181
-rw-r--r--storage/innobase/include/univ.i69
-rw-r--r--storage/innobase/include/ut0byte.ic6
-rw-r--r--storage/innobase/include/ut0crc32.h9
-rw-r--r--storage/innobase/include/ut0dbg.h4
-rw-r--r--storage/innobase/include/ut0lst.h2
-rw-r--r--storage/innobase/include/ut0new.h143
-rw-r--r--storage/innobase/include/ut0pool.h2
-rw-r--r--storage/innobase/include/ut0rnd.h10
-rw-r--r--storage/innobase/include/ut0rnd.ic24
-rw-r--r--storage/innobase/include/ut0stage.h61
-rw-r--r--storage/innobase/include/ut0ut.h39
139 files changed, 4449 insertions, 5858 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h
index 2fccdfc431c..54f13a17c4c 100644
--- a/storage/innobase/include/btr0btr.h
+++ b/storage/innobase/include/btr0btr.h
@@ -2,7 +2,7 @@
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -37,9 +37,15 @@ Created 6/2/1994 Heikki Tuuri
#include "btr0types.h"
#include "gis0type.h"
+#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
+ (not really a hard limit).
+ Used in debug assertions
+ in btr_page_set_level and
+ btr_page_get_level */
+
/** Maximum record size which can be stored on a page, without using the
special big record storage structure */
-#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200)
+#define BTR_PAGE_MAX_REC_SIZE (srv_page_size / 2 - 200)
/** @brief Maximum depth of a B-tree in InnoDB.
@@ -151,23 +157,23 @@ free the pages of externally stored fields. */
record is in spatial index */
#define BTR_RTREE_DELETE_MARK 524288U
-#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
- ((latch_mode) & btr_latch_mode(~(BTR_INSERT \
- | BTR_DELETE_MARK \
- | BTR_RTREE_UNDO_INS \
- | BTR_RTREE_DELETE_MARK \
- | BTR_DELETE \
- | BTR_ESTIMATE \
- | BTR_IGNORE_SEC_UNIQUE \
- | BTR_ALREADY_S_LATCHED \
- | BTR_LATCH_FOR_INSERT \
- | BTR_LATCH_FOR_DELETE \
- | BTR_MODIFY_EXTERNAL)))
-
-#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode) \
- ((latch_mode) & btr_latch_mode(~(BTR_LATCH_FOR_INSERT \
- | BTR_LATCH_FOR_DELETE \
- | BTR_MODIFY_EXTERNAL)))
+#define BTR_LATCH_MODE_WITHOUT_FLAGS(latch_mode) \
+ ((latch_mode) & ulint(~(BTR_INSERT \
+ | BTR_DELETE_MARK \
+ | BTR_RTREE_UNDO_INS \
+ | BTR_RTREE_DELETE_MARK \
+ | BTR_DELETE \
+ | BTR_ESTIMATE \
+ | BTR_IGNORE_SEC_UNIQUE \
+ | BTR_ALREADY_S_LATCHED \
+ | BTR_LATCH_FOR_INSERT \
+ | BTR_LATCH_FOR_DELETE \
+ | BTR_MODIFY_EXTERNAL)))
+
+#define BTR_LATCH_MODE_WITHOUT_INTENTION(latch_mode) \
+ ((latch_mode) & ulint(~(BTR_LATCH_FOR_INSERT \
+ | BTR_LATCH_FOR_DELETE \
+ | BTR_MODIFY_EXTERNAL)))
/**************************************************************//**
Report that an index page is corrupted. */
@@ -289,14 +295,22 @@ btr_page_get_index_id(
MY_ATTRIBUTE((warn_unused_result));
/********************************************************//**
Gets the node level field in an index page.
+@param[in] page index page
@return level, leaf level == 0 */
UNIV_INLINE
ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page) /*!< in: index page */
- MY_ATTRIBUTE((warn_unused_result));
-#define btr_page_get_level(page, mtr) btr_page_get_level_low(page)
+btr_page_get_level(const page_t* page)
+{
+ ulint level;
+
+ ut_ad(page);
+
+ level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
+
+ ut_ad(level <= BTR_MAX_NODE_LEVEL);
+
+ return(level);
+} MY_ATTRIBUTE((warn_unused_result))
/********************************************************//**
Gets the next index page number.
@return next page number */
@@ -345,8 +359,7 @@ btr_node_ptr_get_child_page_no(
/** Create the root node for a new index tree.
@param[in] type type of the index
-@param[in] space space where created
-@param[in] page_size page size
+@param[in,out] space tablespace where created
@param[in] index_id index id
@param[in] index index, or NULL when applying TRUNCATE
log record during recovery
@@ -357,8 +370,7 @@ record during recovery
ulint
btr_create(
ulint type,
- ulint space,
- const page_size_t& page_size,
+ fil_space_t* space,
index_id_t index_id,
dict_index_t* index,
const btr_create_t* btr_redo_create_info,
@@ -684,6 +696,20 @@ btr_page_free(
buf_block_t* block, /*!< in: block to be freed, x-latched */
mtr_t* mtr) /*!< in: mtr */
MY_ATTRIBUTE((nonnull));
+/** Empty an index page (possibly the root page). @see btr_page_create().
+@param[in,out] block page to be emptied
+@param[in,out] page_zip compressed page frame, or NULL
+@param[in] index index of the page
+@param[in] level B-tree level of the page (0=leaf)
+@param[in,out] mtr mini-transaction */
+void
+btr_page_empty(
+ buf_block_t* block,
+ page_zip_des_t* page_zip,
+ dict_index_t* index,
+ ulint level,
+ mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull(1, 3, 5)));
/**************************************************************//**
Creates a new index page (not the root, and also not
used in page reorganization). @see btr_page_empty(). */
diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic
index bd4f2a40267..d24458beace 100644
--- a/storage/innobase/include/btr0btr.ic
+++ b/storage/innobase/include/btr0btr.ic
@@ -29,12 +29,6 @@ Created 6/2/1994 Heikki Tuuri
#include "mtr0log.h"
#include "page0zip.h"
-#define BTR_MAX_NODE_LEVEL 50 /*!< Maximum B-tree page level
- (not really a hard limit).
- Used in debug assertions
- in btr_page_set_level and
- btr_page_get_level_low */
-
/** Gets a buffer page and declares its latching order level.
@param[in] page_id page id
@param[in] mode latch mode
@@ -144,26 +138,6 @@ btr_page_get_index_id(
}
/********************************************************//**
-Gets the node level field in an index page.
-@return level, leaf level == 0 */
-UNIV_INLINE
-ulint
-btr_page_get_level_low(
-/*===================*/
- const page_t* page) /*!< in: index page */
-{
- ulint level;
-
- ut_ad(page);
-
- level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL);
-
- ut_ad(level <= BTR_MAX_NODE_LEVEL);
-
- return(level);
-}
-
-/********************************************************//**
Sets the node level field in an index page. */
UNIV_INLINE
void
diff --git a/storage/innobase/include/btr0bulk.h b/storage/innobase/include/btr0bulk.h
index edf34bd5ae4..5047dce38b4 100644
--- a/storage/innobase/include/btr0bulk.h
+++ b/storage/innobase/include/btr0bulk.h
@@ -33,7 +33,7 @@ Created 03/11/2014 Shaohua Wang
#include <vector>
/** Innodb B-tree index fill factor for bulk load. */
-extern long innobase_fill_factor;
+extern uint innobase_fill_factor;
/** whether to reduce redo logging during ALTER TABLE */
extern my_bool innodb_log_optimize_ddl;
@@ -86,7 +86,7 @@ public:
m_err(DB_SUCCESS)
{
ut_ad(!dict_index_is_spatial(m_index));
- ut_ad(!dict_table_is_temporary(m_index->table));
+ ut_ad(!m_index->table->is_temporary());
}
/** Deconstructor */
@@ -287,7 +287,8 @@ public:
{
#ifdef UNIV_DEBUG
if (m_flush_observer)
- fil_space_inc_redo_skipped_count(m_index->space);
+ my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+ 1);
#endif /* UNIV_DEBUG */
}
@@ -296,7 +297,8 @@ public:
{
#ifdef UNIV_DEBUG
if (m_flush_observer)
- fil_space_dec_redo_skipped_count(m_index->space);
+ my_atomic_addlint(&m_index->table->space->redo_skipped_count,
+ ulint(-1));
#endif /* UNIV_DEBUG */
}
diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h
index 1df382bb995..f87370be70c 100644
--- a/storage/innobase/include/btr0cur.h
+++ b/storage/innobase/include/btr0cur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 10/16/1994 Heikki Tuuri
#define btr0cur_h
#include "univ.i"
+#include "my_base.h"
#include "dict0dict.h"
#include "page0cur.h"
#include "btr0types.h"
@@ -42,6 +43,11 @@ enum {
/** sys fields will be found in the update vector or inserted
entry */
BTR_KEEP_SYS_FLAG = 4,
+
+ /** no rollback */
+ BTR_NO_ROLLBACK = BTR_NO_UNDO_LOG_FLAG
+ | BTR_NO_LOCKING_FLAG | BTR_KEEP_SYS_FLAG,
+
/** btr_cur_pessimistic_update() must keep cursor position
when moving columns to big_rec */
BTR_KEEP_POS_FLAG = 8,
@@ -127,6 +133,24 @@ btr_cur_position(
buf_block_t* block, /*!< in: buffer block of rec */
btr_cur_t* cursor);/*!< in: cursor */
+/** Load the instant ALTER TABLE metadata from the clustered index
+when loading a table definition.
+@param[in,out] table table definition from the data dictionary
+@return error code
+@retval DB_SUCCESS if no error occurred */
+dberr_t
+btr_cur_instant_init(dict_table_t* table)
+ ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
+/** Initialize the n_core_null_bytes on first access to a clustered
+index root page.
+@param[in] index clustered index that is on its first access
+@param[in] page clustered index root page
+@return whether the page is corrupted */
+bool
+btr_cur_instant_root_init(dict_index_t* index, const page_t* page)
+ ATTRIBUTE_COLD __attribute__((nonnull, warn_unused_result));
+
/** Optimistically latches the leaf page or pages requested.
@param[in] block guessed buffer block
@param[in] modify_clock modify clock value
@@ -154,8 +178,7 @@ Note that if mode is PAGE_CUR_LE, which is used in inserts, then
cursor->up_match and cursor->low_match both will have sensible values.
If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */
dberr_t
-btr_cur_search_to_nth_level(
-/*========================*/
+btr_cur_search_to_nth_level_func(
dict_index_t* index, /*!< in: index */
ulint level, /*!< in: the tree level of search */
const dtuple_t* tuple, /*!< in: data tuple; NOTE: n_fields_cmp in
@@ -174,23 +197,29 @@ btr_cur_search_to_nth_level(
cursor->left_block is used to store a pointer
to the left neighbor page, in the cases
BTR_SEARCH_PREV and BTR_MODIFY_PREV;
- NOTE that if has_search_latch
- is != 0, we maybe do not have a latch set
- on the cursor page, we assume
- the caller uses his search latch
- to protect the record! */
+ NOTE that if ahi_latch, we might not have a
+ cursor page latch, we assume that ahi_latch
+ protects the record! */
btr_cur_t* cursor, /*!< in/out: tree cursor; the cursor page is
s- or x-latched, but see also above! */
- ulint has_search_latch,
- /*!< in: latch mode the caller
- currently has on search system:
- RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+ rw_lock_t* ahi_latch,
+ /*!< in: currently held btr_search_latch
+ (in RW_S_LATCH mode), or NULL */
+#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr, /*!< in/out: mini-transaction */
ib_uint64_t autoinc = 0);
/*!< in: PAGE_ROOT_AUTO_INC to be written
(0 if none) */
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+ btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,a,fi,li,mtr)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_cur_search_to_nth_level(i,l,t,m,lm,c,a,fi,li,mtr) \
+ btr_cur_search_to_nth_level_func(i,l,t,m,lm,c,fi,li,mtr)
+#endif /* BTR_CUR_HASH_ADAPT */
/*****************************************************************//**
Opens a cursor at either end of an index.
@@ -572,7 +601,7 @@ btr_cur_parse_del_mark_set_sec_rec(
@param[in] tuple2 range end, may also be empty tuple
@param[in] mode2 search mode for range end
@return estimated number of rows */
-int64_t
+ha_rows
btr_estimate_n_rows_in_range(
dict_index_t* index,
const dtuple_t* tuple1,
@@ -806,7 +835,7 @@ btr_cur_latch_leaves(
/** In the pessimistic delete, if the page data size drops below this
limit, merging it to a neighbor is tried */
#define BTR_CUR_PAGE_COMPRESS_LIMIT(index) \
- ((UNIV_PAGE_SIZE * (ulint)((index)->merge_threshold)) / 100)
+ ((srv_page_size * (ulint)((index)->merge_threshold)) / 100)
/** A slot in the path array. We store here info on a search path down the
tree. Each slot contains data on a single level of the tree. */
@@ -974,11 +1003,11 @@ We store locally a long enough prefix of each column so that we can determine
the ordering parts of each index record without looking into the externally
stored part. */
/*-------------------------------------- @{ */
-#define BTR_EXTERN_SPACE_ID 0 /*!< space id where stored */
-#define BTR_EXTERN_PAGE_NO 4 /*!< page no where stored */
-#define BTR_EXTERN_OFFSET 8 /*!< offset of BLOB header
+#define BTR_EXTERN_SPACE_ID 0U /*!< space id where stored */
+#define BTR_EXTERN_PAGE_NO 4U /*!< page no where stored */
+#define BTR_EXTERN_OFFSET 8U /*!< offset of BLOB header
on that page */
-#define BTR_EXTERN_LEN 12 /*!< 8 bytes containing the
+#define BTR_EXTERN_LEN 12U /*!< 8 bytes containing the
length of the externally
stored part of the BLOB.
The 2 highest bits are
diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic
index b1e59651a1d..adcd92e2fc8 100644
--- a/storage/innobase/include/btr0cur.ic
+++ b/storage/innobase/include/btr0cur.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,7 +29,7 @@ Created 10/16/1994 Heikki Tuuri
#ifdef UNIV_DEBUG
# define LIMIT_OPTIMISTIC_INSERT_DEBUG(NREC, CODE)\
if (btr_cur_limit_optimistic_insert_debug > 1\
- && (NREC) >= (ulint)btr_cur_limit_optimistic_insert_debug) {\
+ && (NREC) >= btr_cur_limit_optimistic_insert_debug) {\
CODE;\
}
#else
@@ -128,19 +129,17 @@ btr_cur_compress_recommendation(
{
const page_t* page;
- ut_ad(mtr_is_block_fix(
- mtr, btr_cur_get_block(cursor),
- MTR_MEMO_PAGE_X_FIX, cursor->index->table));
+ ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor),
+ MTR_MEMO_PAGE_X_FIX));
page = btr_cur_get_page(cursor);
- LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2,
+ LIMIT_OPTIMISTIC_INSERT_DEBUG(page_get_n_recs(page) * 2U,
return(FALSE));
- if ((page_get_data_size(page)
- < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))) {
+ if (page_get_data_size(page)
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+ || !page_has_siblings(page)) {
/* The page fillfactor has dropped below a predefined
minimum value OR the level in the B-tree contains just
@@ -173,11 +172,9 @@ btr_cur_can_delete_without_compress(
page = btr_cur_get_page(cursor);
- if ((page_get_data_size(page) - rec_size
- < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index))
- || ((btr_page_get_next(page, mtr) == FIL_NULL)
- && (btr_page_get_prev(page, mtr) == FIL_NULL))
- || (page_get_n_recs(page) < 2)) {
+ if (page_get_data_size(page) - rec_size
+ < BTR_CUR_PAGE_COMPRESS_LIMIT(cursor->index)
+ || !page_has_siblings(page) || page_get_n_recs(page) < 2) {
/* The page fillfactor will drop below a predefined
minimum value, OR the level in the B-tree contains just
diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h
index b84d9840a28..747ad676e33 100644
--- a/storage/innobase/include/btr0pcur.h
+++ b/storage/innobase/include/btr0pcur.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -136,20 +136,25 @@ btr_pcur_open_with_no_init_func(
may end up on the previous page of the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
+ NOTE that if ahi_latch then we might not
+ acquire a cursor page latch, but assume
+ that the ahi_latch protects the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,
- /*!< in: latch mode the caller
- currently has on search system:
- RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+ rw_lock_t* ahi_latch,
+ /*!< in: adaptive hash index latch held
+ by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr); /*!< in: mtr */
-#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m) \
- btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+#ifdef BTR_CUR_HASH_ADAPT
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m) \
+ btr_pcur_open_with_no_init_func(ix,t,md,l,cur,ahi,__FILE__,__LINE__,m)
+#else /* BTR_CUR_HASH_ADAPT */
+# define btr_pcur_open_with_no_init(ix,t,md,l,cur,ahi,m) \
+ btr_pcur_open_with_no_init_func(ix,t,md,l,cur,__FILE__,__LINE__,m)
+#endif /* BTR_CUR_HASH_ADAPT */
/*****************************************************************//**
Opens a persistent cursor at either end of an index. */
@@ -436,21 +441,11 @@ btr_pcur_is_before_first_on_page(
/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor);
/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr); /*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor);
/*********************************************************//**
Moves the persistent cursor to the next record on the same page. */
UNIV_INLINE
diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic
index b2a85def63d..6e38bf61701 100644
--- a/storage/innobase/include/btr0pcur.ic
+++ b/storage/innobase/include/btr0pcur.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -209,43 +209,25 @@ btr_pcur_is_on_user_rec(
/*********************************************************//**
Checks if the persistent cursor is before the first user record in
the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_before_first_in_tree(
-/*=============================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
+static inline bool btr_pcur_is_before_first_in_tree(btr_pcur_t* cursor)
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor)));
+ return !page_has_prev(btr_pcur_get_page(cursor))
+ && page_cur_is_before_first(btr_pcur_get_page_cur(cursor));
}
/*********************************************************//**
Checks if the persistent cursor is after the last user record in
the index tree. */
-UNIV_INLINE
-ibool
-btr_pcur_is_after_last_in_tree(
-/*===========================*/
- btr_pcur_t* cursor, /*!< in: persistent cursor */
- mtr_t* mtr) /*!< in: mtr */
+static inline bool btr_pcur_is_after_last_in_tree(btr_pcur_t* cursor)
{
ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED);
ut_ad(cursor->latch_mode != BTR_NO_LATCHES);
- if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) {
-
- return(FALSE);
- }
-
- return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor)));
+ return !page_has_next(btr_pcur_get_page(cursor))
+ && page_cur_is_after_last(btr_pcur_get_page_cur(cursor));
}
/*********************************************************//**
@@ -315,9 +297,7 @@ btr_pcur_move_to_next_user_rec(
cursor->old_stored = false;
loop:
if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+ if (btr_pcur_is_after_last_in_tree(cursor)) {
return(FALSE);
}
@@ -352,19 +332,15 @@ btr_pcur_move_to_next(
cursor->old_stored = false;
if (btr_pcur_is_after_last_on_page(cursor)) {
-
- if (btr_pcur_is_after_last_in_tree(cursor, mtr)) {
-
+ if (btr_pcur_is_after_last_in_tree(cursor)) {
return(FALSE);
}
btr_pcur_move_to_next_page(cursor, mtr);
-
return(TRUE);
}
btr_pcur_move_to_next_on_page(cursor);
-
return(TRUE);
}
@@ -480,9 +456,12 @@ btr_pcur_open_low(
ut_ad(!dict_index_is_spatial(index));
- err = btr_cur_search_to_nth_level(
- index, level, tuple, mode, latch_mode,
- btr_cursor, 0, file, line, mtr, autoinc);
+ err = btr_cur_search_to_nth_level_func(
+ index, level, tuple, mode, latch_mode, btr_cursor,
+#ifdef BTR_CUR_HASH_ADAPT
+ NULL,
+#endif /* BTR_CUR_HASH_ADAPT */
+ file, line, mtr, autoinc);
if (err != DB_SUCCESS) {
ib::warn() << " Error code: " << err
@@ -517,15 +496,15 @@ btr_pcur_open_with_no_init_func(
may end up on the previous page of the
record! */
ulint latch_mode,/*!< in: BTR_SEARCH_LEAF, ...;
- NOTE that if has_search_latch != 0 then
- we maybe do not acquire a latch on the cursor
- page, but assume that the caller uses his
- btr search latch to protect the record! */
+ NOTE that if ahi_latch then we might not
+ acquire a cursor page latch, but assume
+ that the ahi_latch protects the record! */
btr_pcur_t* cursor, /*!< in: memory buffer for persistent cursor */
- ulint has_search_latch,
- /*!< in: latch mode the caller
- currently has on search system:
- RW_S_LATCH, or 0 */
+#ifdef BTR_CUR_HASH_ADAPT
+ rw_lock_t* ahi_latch,
+ /*!< in: adaptive hash index latch held
+ by the caller, or NULL if none */
+#endif /* BTR_CUR_HASH_ADAPT */
const char* file, /*!< in: file name */
unsigned line, /*!< in: line where called */
mtr_t* mtr) /*!< in: mtr */
@@ -540,9 +519,12 @@ btr_pcur_open_with_no_init_func(
btr_cursor = btr_pcur_get_btr_cur(cursor);
- err = btr_cur_search_to_nth_level(
+ err = btr_cur_search_to_nth_level_func(
index, 0, tuple, mode, latch_mode, btr_cursor,
- has_search_latch, file, line, mtr);
+#ifdef BTR_CUR_HASH_ADAPT
+ ahi_latch,
+#endif /* BTR_CUR_HASH_ADAPT */
+ file, line, mtr);
cursor->pos_state = BTR_PCUR_IS_POSITIONED;
diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h
index e6983cacffb..4aaf3fb835e 100644
--- a/storage/innobase/include/btr0sea.h
+++ b/storage/innobase/include/btr0sea.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -38,26 +38,20 @@ Created 2/17/1996 Heikki Tuuri
/** Creates and initializes the adaptive search system at a database start.
@param[in] hash_size hash table size. */
-void
-btr_search_sys_create(ulint hash_size);
+void btr_search_sys_create(ulint hash_size);
/** Resize hash index hash table.
@param[in] hash_size hash index hash table size */
-void
-btr_search_sys_resize(ulint hash_size);
+void btr_search_sys_resize(ulint hash_size);
/** Frees the adaptive search system at a database shutdown. */
-void
-btr_search_sys_free();
+void btr_search_sys_free();
/** Disable the adaptive hash search system and empty the index.
@param need_mutex need to acquire dict_sys->mutex */
-void
-btr_search_disable(
- bool need_mutex);
+void btr_search_disable(bool need_mutex);
/** Enable the adaptive hash search system. */
-void
-btr_search_enable();
+void btr_search_enable();
/** Returns the value of ref_count. The value is protected by latch.
@param[in] info search info
@@ -91,12 +85,11 @@ both have sensible values.
we assume the caller uses his search latch
to protect the record!
@param[out] cursor tree cursor
-@param[in] has_search_latch
- latch mode the caller currently has on
- search system: RW_S/X_LATCH or 0
+@param[in] ahi_latch the adaptive hash index latch being held,
+ or NULL
@param[in] mtr mini transaction
-@return TRUE if succeeded */
-ibool
+@return whether the search succeeded */
+bool
btr_search_guess_on_hash(
dict_index_t* index,
btr_search_t* info,
@@ -104,22 +97,19 @@ btr_search_guess_on_hash(
ulint mode,
ulint latch_mode,
btr_cur_t* cursor,
- ulint has_search_latch,
+ rw_lock_t* ahi_latch,
mtr_t* mtr);
-/** Moves or deletes hash entries for moved records. If new_page is already
-hashed, then the hash index for page, if any, is dropped. If new_page is not
-hashed, and page is hashed, then a new hash index is built to new_page with the
-same parameters as page (this often happens when a page is split).
-@param[in,out] new_block records are copied to this page.
-@param[in,out] block index page from which record are copied, and the
- copied records will be deleted from this page.
-@param[in,out] index record descriptor */
+/** Move or delete hash entries for moved records, usually in a page split.
+If new_block is already hashed, then any hash index for block is dropped.
+If new_block is not hashed, and block is hashed, then a new hash index is
+built to new_block with the same parameters as block.
+@param[in,out] new_block destination page
+@param[in,out] block source page (subject to deletion later) */
void
btr_search_move_or_delete_hash_entries(
buf_block_t* new_block,
- buf_block_t* block,
- dict_index_t* index);
+ buf_block_t* block);
/** Drop any adaptive hash index entries that point to an index page.
@param[in,out] block block containing index page, s- or x-latched, or an
@@ -127,8 +117,7 @@ btr_search_move_or_delete_hash_entries(
block->buf_fix_count == 0 or it is an index page which
has already been removed from the buf_pool->page_hash
i.e.: it is in state BUF_BLOCK_REMOVE_HASH */
-void
-btr_search_drop_page_hash_index(buf_block_t* block);
+void btr_search_drop_page_hash_index(buf_block_t* block);
/** Drop possible adaptive hash index entries when a page is evicted
from the buffer pool or freed in a file, or the index is being dropped.
@@ -138,118 +127,78 @@ void btr_search_drop_page_hash_when_freed(const page_id_t& page_id);
/** Updates the page hash index when a single record is inserted on a page.
@param[in] cursor cursor which was positioned to the place to insert
using btr_cur_search_, and the new record has been
- inserted next to the cursor. */
+ inserted next to the cursor.
+@param[in] ahi_latch the adaptive hash index latch */
void
-btr_search_update_hash_node_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_node_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
/** Updates the page hash index when a single record is inserted on a page.
-@param[in] cursor cursor which was positioned to the
+@param[in,out] cursor cursor which was positioned to the
place to insert using btr_cur_search_...,
and the new record has been inserted next
- to the cursor */
+ to the cursor
+@param[in] ahi_latch the adaptive hash index latch */
void
-btr_search_update_hash_on_insert(btr_cur_t* cursor);
+btr_search_update_hash_on_insert(btr_cur_t* cursor, rw_lock_t* ahi_latch);
/** Updates the page hash index when a single record is deleted from a page.
@param[in] cursor cursor which was positioned on the record to delete
using btr_cur_search_, the record is not yet deleted.*/
-void
-btr_search_update_hash_on_delete(btr_cur_t* cursor);
+void btr_search_update_hash_on_delete(btr_cur_t* cursor);
/** Validates the search system.
@return true if ok */
-bool
-btr_search_validate();
-
-/** X-Lock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index);
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index);
+bool btr_search_validate();
/** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all();
+static inline void btr_search_x_lock_all();
/** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all();
-
-/** S-Lock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index);
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index);
+static inline void btr_search_x_unlock_all();
/** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all();
+static inline void btr_search_s_lock_all();
#ifdef UNIV_DEBUG
/** Check if thread owns all the search latches.
@param[in] mode lock mode check
@retval true if owns all of them
@retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode);
+static inline bool btr_search_own_all(ulint mode);
/** Check if thread owns any of the search latches.
@param[in] mode lock mode check
@retval true if owns any of them
@retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode);
+static inline bool btr_search_own_any(ulint mode);
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any();
#endif /* UNIV_DEBUG */
/** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all();
+static inline void btr_search_s_unlock_all();
/** Get the latch based on index attributes.
A latch is selected from an array of latches using pair of index-id, space-id.
@param[in] index index handler
@return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index);
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index);
/** Get the hash-table based on index attributes.
A table is selected from an array of tables using pair of index-id, space-id.
@param[in] index index handler
@return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index);
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index);
#else /* BTR_CUR_HASH_ADAPT */
# define btr_search_sys_create(size)
+# define btr_search_sys_free()
# define btr_search_drop_page_hash_index(block)
-# define btr_search_s_lock(index)
-# define btr_search_s_unlock(index)
# define btr_search_s_lock_all(index)
# define btr_search_s_unlock_all(index)
-# define btr_search_x_lock(index)
-# define btr_search_x_unlock(index)
# define btr_search_info_update(index, cursor)
-# define btr_search_move_or_delete_hash_entries(new_block, block, index)
-# define btr_search_update_hash_on_insert(cursor)
+# define btr_search_move_or_delete_hash_entries(new_block, block)
+# define btr_search_update_hash_on_insert(cursor, ahi_latch)
# define btr_search_update_hash_on_delete(cursor)
# define btr_search_sys_resize(hash_size)
#endif /* BTR_CUR_HASH_ADAPT */
@@ -258,15 +207,11 @@ btr_get_search_table(const dict_index_t* index);
/** Create and initialize search info.
@param[in,out] heap heap where created
@return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** @return the search info of an index */
-UNIV_INLINE
-btr_search_t*
-btr_search_get_info(dict_index_t* index)
+static inline btr_search_t* btr_search_get_info(dict_index_t* index)
{
return(index->search_info);
}
@@ -310,7 +255,7 @@ struct btr_search_t{
ulint n_bytes; /*!< recommended prefix: number of bytes in
an incomplete field
@see BTR_PAGE_MAX_REC_SIZE */
- ibool left_side; /*!< TRUE or FALSE, depending on whether
+ bool left_side; /*!< true or false, depending on whether
the leftmost record of several records with
the same prefix should be indexed in the
hash index */
diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic
index b5a7536a2b4..716410e3557 100644
--- a/storage/innobase/include/btr0sea.ic
+++ b/storage/innobase/include/btr0sea.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -30,9 +31,7 @@ Created 2/17/1996 Heikki Tuuri
/** Create and initialize search info.
@param[in,out] heap heap where created
@return own: search info struct */
-UNIV_INLINE
-btr_search_t*
-btr_search_info_create(mem_heap_t* heap)
+static inline btr_search_t* btr_search_info_create(mem_heap_t* heap)
{
btr_search_t* info = static_cast<btr_search_t*>(
mem_heap_zalloc(heap, sizeof(btr_search_t)));
@@ -45,25 +44,23 @@ btr_search_info_create(mem_heap_t* heap)
}
#ifdef BTR_CUR_HASH_ADAPT
-/*********************************************************************//**
-Updates the search info. */
+/** Updates the search info.
+@param[in,out] info search info
+@param[in,out] cursor cursor which was just positioned */
void
-btr_search_info_update_slow(
-/*========================*/
- btr_search_t* info, /*!< in/out: search info */
- btr_cur_t* cursor);/*!< in: cursor which was just positioned */
+btr_search_info_update_slow(btr_search_t* info, btr_cur_t* cursor);
/*********************************************************************//**
Updates the search info. */
-UNIV_INLINE
+static inline
void
btr_search_info_update(
/*===================*/
dict_index_t* index, /*!< in: index of the cursor */
btr_cur_t* cursor) /*!< in: cursor which was just positioned */
{
- ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_S));
- ut_ad(!rw_lock_own(btr_get_search_latch(index), RW_LOCK_X));
+ ut_ad(!btr_search_own_any(RW_LOCK_S));
+ ut_ad(!btr_search_own_any(RW_LOCK_X));
if (dict_index_is_spatial(index) || !btr_search_enabled) {
return;
@@ -87,28 +84,8 @@ btr_search_info_update(
btr_search_info_update_slow(info, cursor);
}
-/** X-Lock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_x_lock(const dict_index_t* index)
-{
- rw_lock_x_lock(btr_get_search_latch(index));
-}
-
-/** X-Unlock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_x_unlock(const dict_index_t* index)
-{
- rw_lock_x_unlock(btr_get_search_latch(index));
-}
-
/** Lock all search latches in exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_lock_all()
+static inline void btr_search_x_lock_all()
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
rw_lock_x_lock(btr_search_latches[i]);
@@ -116,37 +93,15 @@ btr_search_x_lock_all()
}
/** Unlock all search latches from exclusive mode. */
-UNIV_INLINE
-void
-btr_search_x_unlock_all()
+static inline void btr_search_x_unlock_all()
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
rw_lock_x_unlock(btr_search_latches[i]);
}
}
-/** S-Lock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_s_lock(const dict_index_t* index)
-{
- rw_lock_s_lock(btr_get_search_latch(index));
-}
-
-/** S-Unlock the search latch (corresponding to given index)
-@param[in] index index handler */
-UNIV_INLINE
-void
-btr_search_s_unlock(const dict_index_t* index)
-{
- rw_lock_s_unlock(btr_get_search_latch(index));
-}
-
/** Lock all search latches in shared mode. */
-UNIV_INLINE
-void
-btr_search_s_lock_all()
+static inline void btr_search_s_lock_all()
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
rw_lock_s_lock(btr_search_latches[i]);
@@ -154,9 +109,7 @@ btr_search_s_lock_all()
}
/** Unlock all search latches from shared mode. */
-UNIV_INLINE
-void
-btr_search_s_unlock_all()
+static inline void btr_search_s_unlock_all()
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
rw_lock_s_unlock(btr_search_latches[i]);
@@ -168,9 +121,7 @@ btr_search_s_unlock_all()
@param[in] mode lock mode check
@retval true if owns all of them
@retval false if does not own some of them */
-UNIV_INLINE
-bool
-btr_search_own_all(ulint mode)
+static inline bool btr_search_own_all(ulint mode)
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
if (!rw_lock_own(btr_search_latches[i], mode)) {
@@ -184,9 +135,7 @@ btr_search_own_all(ulint mode)
@param[in] mode lock mode check
@retval true if owns any of them
@retval false if owns no search latch */
-UNIV_INLINE
-bool
-btr_search_own_any(ulint mode)
+static inline bool btr_search_own_any(ulint mode)
{
for (ulint i = 0; i < btr_ahi_parts; ++i) {
if (rw_lock_own(btr_search_latches[i], mode)) {
@@ -195,19 +144,31 @@ btr_search_own_any(ulint mode)
}
return(false);
}
+
+/** @return whether this thread holds any of the search latches */
+static inline bool btr_search_own_any()
+{
+ for (ulint i = btr_ahi_parts; i--; ) {
+ if (rw_lock_own_flagged(btr_search_latches[i],
+ RW_LOCK_FLAG_X | RW_LOCK_FLAG_S)) {
+ return true;
+ }
+ }
+ return false;
+}
#endif /* UNIV_DEBUG */
/** Get the adaptive hash search index latch for a b-tree.
@param[in] index b-tree index
@return latch */
-UNIV_INLINE
-rw_lock_t*
-btr_get_search_latch(const dict_index_t* index)
+static inline rw_lock_t* btr_get_search_latch(const dict_index_t* index)
{
ut_ad(index != NULL);
+ ut_ad(!index->table->space
+ || index->table->space->id == index->table->space_id);
- ulint ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
- static_cast<ulint>(index->space));
+ ulint ifold = ut_fold_ulint_pair(ulint(index->id),
+ index->table->space_id);
return(btr_search_latches[ifold % btr_ahi_parts]);
}
@@ -216,14 +177,13 @@ btr_get_search_latch(const dict_index_t* index)
A table is selected from an array of tables using pair of index-id, space-id.
@param[in] index index handler
@return hash table */
-UNIV_INLINE
-hash_table_t*
-btr_get_search_table(const dict_index_t* index)
+static inline hash_table_t* btr_get_search_table(const dict_index_t* index)
{
ut_ad(index != NULL);
+ ut_ad(index->table->space->id == index->table->space_id);
- ulint ifold = ut_fold_ulint_pair(static_cast<ulint>(index->id),
- static_cast<ulint>(index->space));
+ ulint ifold = ut_fold_ulint_pair(ulint(index->id),
+ index->table->space_id);
return(btr_search_sys->hash_tables[ifold % btr_ahi_parts]);
}
diff --git a/storage/innobase/include/buf0buddy.h b/storage/innobase/include/buf0buddy.h
index f56ac2e5e70..8befc038f23 100644
--- a/storage/innobase/include/buf0buddy.h
+++ b/storage/innobase/include/buf0buddy.h
@@ -48,9 +48,9 @@ buf_buddy_alloc(
the page resides */
ulint size, /*!< in: compressed page size
(between UNIV_ZIP_SIZE_MIN and
- UNIV_PAGE_SIZE) */
- ibool* lru) /*!< in: pointer to a variable
- that will be assigned TRUE if
+ srv_page_size) */
+ bool* lru) /*!< in: pointer to a variable
+ that will be assigned true if
storage was allocated from the
LRU list and buf_pool->mutex was
temporarily released */
@@ -67,14 +67,14 @@ buf_buddy_free(
void* buf, /*!< in: block to be freed, must not
be pointed to by the buffer pool */
ulint size) /*!< in: block size,
- up to UNIV_PAGE_SIZE */
+ up to srv_page_size */
MY_ATTRIBUTE((nonnull));
/** Reallocate a block.
@param[in] buf_pool buffer pool instance
@param[in] buf block to be reallocated, must be pointed
to by the buffer pool
-@param[in] size block size, up to UNIV_PAGE_SIZE
+@param[in] size block size, up to srv_page_size
@retval false if failed because of no free blocks. */
bool
buf_buddy_realloc(
diff --git a/storage/innobase/include/buf0buddy.ic b/storage/innobase/include/buf0buddy.ic
index 2b6d76df009..d166ab8441c 100644
--- a/storage/innobase/include/buf0buddy.ic
+++ b/storage/innobase/include/buf0buddy.ic
@@ -42,8 +42,8 @@ buf_buddy_alloc_low(
buf_pool_t* buf_pool, /*!< in/out: buffer pool instance */
ulint i, /*!< in: index of buf_pool->zip_free[],
or BUF_BUDDY_SIZES */
- ibool* lru) /*!< in: pointer to a variable that
- will be assigned TRUE if storage was
+ bool* lru) /*!< in: pointer to a variable that
+ will be assigned true if storage was
allocated from the LRU list and
buf_pool->mutex was temporarily
released */
@@ -96,9 +96,9 @@ buf_buddy_alloc(
the page resides */
ulint size, /*!< in: compressed page size
(between UNIV_ZIP_SIZE_MIN and
- UNIV_PAGE_SIZE) */
- ibool* lru) /*!< in: pointer to a variable
- that will be assigned TRUE if
+ srv_page_size) */
+ bool* lru) /*!< in: pointer to a variable
+ that will be assigned true if
storage was allocated from the
LRU list and buf_pool->mutex was
temporarily released */
@@ -106,7 +106,7 @@ buf_buddy_alloc(
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(ut_is_2pow(size));
ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
+ ut_ad(size <= srv_page_size);
return((byte*) buf_buddy_alloc_low(buf_pool, buf_buddy_get_slot(size),
lru));
@@ -123,12 +123,12 @@ buf_buddy_free(
void* buf, /*!< in: block to be freed, must not
be pointed to by the buffer pool */
ulint size) /*!< in: block size,
- up to UNIV_PAGE_SIZE */
+ up to srv_page_size */
{
ut_ad(buf_pool_mutex_own(buf_pool));
ut_ad(ut_is_2pow(size));
ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
+ ut_ad(size <= srv_page_size);
buf_buddy_free_low(buf_pool, buf, buf_buddy_get_slot(size));
}
diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h
index a79b39235f3..33612f85ed6 100644
--- a/storage/innobase/include/buf0buf.h
+++ b/storage/innobase/include/buf0buf.h
@@ -268,18 +268,8 @@ public:
m_fold = src.fold();
}
- /** Reset the values from a (space, page_no).
- @param[in] space tablespace id
- @param[in] page_no page number */
- inline void reset(ulint space, ulint page_no)
- {
- m_space = static_cast<ib_uint32_t>(space);
- m_page_no = static_cast<ib_uint32_t>(page_no);
- m_fold = ULINT_UNDEFINED;
-
- ut_ad(space <= 0xFFFFFFFFU);
- ut_ad(page_no <= 0xFFFFFFFFU);
- }
+ /** Reset the object. */
+ void reset() { m_space= ~0U; m_page_no= ~0U; m_fold= ULINT_UNDEFINED; }
/** Reset the page number only.
@param[in] page_no page number */
@@ -1605,7 +1595,7 @@ public:
bool encrypted; /*!< page is still encrypted */
ulint real_size; /*!< Real size of the page
- Normal pages == UNIV_PAGE_SIZE
+ Normal pages == srv_page_size
page compressed pages, payload
size alligned to sector boundary.
*/
@@ -1740,9 +1730,9 @@ struct buf_block_t{
buf_pool->page_hash can point
to buf_page_t or buf_block_t */
byte* frame; /*!< pointer to buffer frame which
- is of size UNIV_PAGE_SIZE, and
+ is of size srv_page_size, and
aligned to an address divisible by
- UNIV_PAGE_SIZE */
+ srv_page_size */
BPageLock lock; /*!< read-write lock of the buffer
frame */
UT_LIST_NODE_T(buf_block_t) unzip_LRU;
@@ -1756,7 +1746,7 @@ struct buf_block_t{
used in debugging */
ibool in_withdraw_list;
#endif /* UNIV_DEBUG */
- unsigned lock_hash_val:32;/*!< hashed value of the page address
+ uint32_t lock_hash_val; /*!< hashed value of the page address
in the record lock hash table;
protected by buf_block_t::lock
(or buf_block_t::mutex, buf_pool->mutex
@@ -1902,7 +1892,7 @@ struct buf_block_t{
/**********************************************************************//**
Compute the hash fold value for blocks in buf_pool->zip_hash. */
/* @{ */
-#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE)
+#define BUF_POOL_ZIP_FOLD_PTR(ptr) (ulint(ptr) >> srv_page_size_shift)
#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame)
#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b))
/* @} */
@@ -2369,8 +2359,12 @@ Use these instead of accessing buf_pool->mutex directly. */
/** Get appropriate page_hash_lock. */
-# define buf_page_hash_lock_get(buf_pool, page_id) \
- hash_get_lock((buf_pool)->page_hash, (page_id).fold())
+UNIV_INLINE
+rw_lock_t*
+buf_page_hash_lock_get(const buf_pool_t* buf_pool, const page_id_t& page_id)
+{
+ return hash_get_lock(buf_pool->page_hash, page_id.fold());
+}
/** If not appropriate page_hash_lock, relock until appropriate. */
# define buf_page_hash_lock_s_confirm(hash_lock, buf_pool, page_id)\
diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
index 38c52d5e608..8314797e78d 100644
--- a/storage/innobase/include/buf0buf.ic
+++ b/storage/innobase/include/buf0buf.ic
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -115,7 +115,7 @@ ulint
buf_pool_get_n_pages(void)
/*======================*/
{
- return(buf_pool_get_curr_size() / UNIV_PAGE_SIZE);
+ return buf_pool_get_curr_size() >> srv_page_size_shift;
}
/********************************************************************//**
@@ -761,7 +761,7 @@ buf_frame_align(
ut_ad(ptr);
- frame = (buf_frame_t*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+ frame = (buf_frame_t*) ut_align_down(ptr, srv_page_size);
return(frame);
}
@@ -778,11 +778,11 @@ buf_ptr_get_fsp_addr(
fil_addr_t* addr) /*!< out: page offset and byte offset */
{
const page_t* page = (const page_t*) ut_align_down(ptr,
- UNIV_PAGE_SIZE);
+ srv_page_size);
*space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET);
- addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE);
+ addr->boffset = ut_align_offset(ptr, srv_page_size);
}
/**********************************************************************//**
@@ -867,7 +867,7 @@ buf_frame_copy(
{
ut_ad(buf && frame);
- ut_memcpy(buf, frame, UNIV_PAGE_SIZE);
+ ut_memcpy(buf, frame, srv_page_size);
return(buf);
}
@@ -955,7 +955,7 @@ ulint
buf_block_fix(
buf_page_t* bpage)
{
- return(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
+ return uint32(my_atomic_add32((int32*) &bpage->buf_fix_count, 1) + 1);
}
/** Increments the bufferfix count.
@@ -1003,9 +1003,10 @@ ulint
buf_block_unfix(
buf_page_t* bpage)
{
- ulint count = my_atomic_add32((int32*) &bpage->buf_fix_count, -1) - 1;
- ut_ad(count + 1 != 0);
- return(count);
+ uint32 count = uint32(my_atomic_add32((int32*) &bpage->buf_fix_count,
+ -1));
+ ut_ad(count != 0);
+ return count - 1;
}
/** Decrements the bufferfix count.
@@ -1424,8 +1425,8 @@ bool
buf_pool_is_obsolete(
ulint withdraw_clock)
{
- return(buf_pool_withdrawing
- || buf_withdraw_clock != withdraw_clock);
+ return(UNIV_UNLIKELY(buf_pool_withdrawing
+ || buf_withdraw_clock != withdraw_clock));
}
/** Calculate aligned buffer pool size based on srv_buf_pool_chunk_unit,
diff --git a/storage/innobase/include/buf0checksum.h b/storage/innobase/include/buf0checksum.h
index 20955a5b2e6..dc0dbafa4c4 100644
--- a/storage/innobase/include/buf0checksum.h
+++ b/storage/innobase/include/buf0checksum.h
@@ -36,7 +36,7 @@ when it is written to a file and also checked for a match when reading from
the file. When reading we allow both normal CRC32 and CRC-legacy-big-endian
variants. Note that we must be careful to calculate the same value on 32-bit
and 64-bit architectures.
-@param[in] page buffer page (UNIV_PAGE_SIZE bytes)
+@param[in] page buffer page (srv_page_size bytes)
@param[in] use_legacy_big_endian if true then use big endian
byteorder when converting byte strings to integers
@return checksum */
diff --git a/storage/innobase/include/buf0dblwr.h b/storage/innobase/include/buf0dblwr.h
index 598609e2be4..5d2e5e9fdf7 100644
--- a/storage/innobase/include/buf0dblwr.h
+++ b/storage/innobase/include/buf0dblwr.h
@@ -131,7 +131,7 @@ struct buf_dblwr_t{
doublewrite block (64 pages) */
ulint block2; /*!< page number of the second block */
ulint first_free;/*!< first free position in write_buf
- measured in units of UNIV_PAGE_SIZE */
+ measured in units of srv_page_size */
ulint b_reserved;/*!< number of slots currently reserved
for batch flush. */
os_event_t b_event;/*!< event where threads wait for a
@@ -150,7 +150,7 @@ struct buf_dblwr_t{
buffer. */
byte* write_buf;/*!< write buffer used in writing to the
doublewrite buffer, aligned to an
- address divisible by UNIV_PAGE_SIZE
+ address divisible by srv_page_size
(which is required by Windows aio) */
byte* write_buf_unaligned;/*!< pointer to write_buf,
but unaligned */
diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h
index 5c1dddd9a3b..741cb1dbca3 100644
--- a/storage/innobase/include/buf0flu.h
+++ b/storage/innobase/include/buf0flu.h
@@ -217,16 +217,10 @@ buf_flush_ready_for_replace(
#ifdef UNIV_DEBUG
/** Disables page cleaner threads (coordinator and workers).
It's used by: SET GLOBAL innodb_page_cleaner_disabled_debug = 1 (0).
-@param[in] thd thread handle
-@param[in] var pointer to system variable
-@param[out] var_ptr where the formal string goes
@param[in] save immediate result from check function */
-void
-buf_flush_page_cleaner_disabled_debug_update(
- THD* thd,
- struct st_mysql_sys_var* var,
- void* var_ptr,
- const void* save);
+void buf_flush_page_cleaner_disabled_debug_update(THD*,
+ st_mysql_sys_var*, void*,
+ const void* save);
#endif /* UNIV_DEBUG */
/******************************************************************//**
@@ -239,6 +233,12 @@ DECLARE_THREAD(buf_flush_page_cleaner_coordinator)(
/*===============================================*/
void* arg); /*!< in: a dummy parameter required by
os_thread_create */
+
+/** Adjust thread count for page cleaner workers.
+@param[in] new_cnt Number of threads to be used */
+void
+buf_flush_set_page_cleaner_thread_cnt(ulong new_cnt);
+
/******************************************************************//**
Worker thread of page_cleaner.
@return a dummy parameter */
@@ -339,12 +339,12 @@ flushed to disk before any redo logged operations go to the index. */
class FlushObserver {
public:
/** Constructor
- @param[in] space_id table space id
+ @param[in,out] space tablespace
@param[in] trx trx instance
@param[in] stage performance schema accounting object,
used by ALTER TABLE. It is passed to log_preflush_pool_modified_pages()
for accounting. */
- FlushObserver(ulint space_id, trx_t* trx, ut_stage_alter_t* stage);
+ FlushObserver(fil_space_t* space, trx_t* trx, ut_stage_alter_t* stage);
/** Deconstructor */
~FlushObserver();
@@ -390,8 +390,8 @@ public:
buf_pool_t* buf_pool,
buf_page_t* bpage);
private:
- /** Table space id */
- const ulint m_space_id;
+ /** Tablespace */
+ fil_space_t* m_space;
/** Trx instance */
const trx_t* const m_trx;
@@ -413,57 +413,6 @@ private:
bool m_interrupted;
};
-/******************************************************************//**
-Start a buffer flush batch for LRU or flush list */
-ibool
-buf_flush_start(
-/*============*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-/******************************************************************//**
-End a buffer flush batch for LRU or flush list */
-void
-buf_flush_end(
-/*==========*/
- buf_pool_t* buf_pool, /*!< buffer pool instance */
- buf_flush_t flush_type); /*!< in: BUF_FLUSH_LRU
- or BUF_FLUSH_LIST */
-/******************************************************************//**
-Gather the aggregated stats for both flush list and LRU list flushing */
-void
-buf_flush_common(
-/*=============*/
- buf_flush_t flush_type, /*!< in: type of flush */
- ulint page_count); /*!< in: number of pages flushed */
-
-/*******************************************************************//**
-This utility flushes dirty blocks from the end of the LRU list or flush_list.
-NOTE 1: in the case of an LRU flush the calling thread may own latches to
-pages: to avoid deadlocks, this function must be written so that it cannot
-end up waiting for these latches! NOTE 2: in the case of a flush list flush,
-the calling thread is not allowed to own any latches on pages! */
-__attribute__((nonnull))
-void
-buf_flush_batch(
-/*============*/
- buf_pool_t* buf_pool, /*!< in: buffer pool instance */
- buf_flush_t flush_type, /*!< in: BUF_FLUSH_LRU or
- BUF_FLUSH_LIST; if BUF_FLUSH_LIST,
- then the caller must not own any
- latches on pages */
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in: in the case of BUF_FLUSH_LIST
- all blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- flush_counters_t* n); /*!< out: flushed/evicted page
- counts */
-
-
#include "buf0flu.ic"
#endif
diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h
index 10dcdb27eb0..d3e953ad9c7 100644
--- a/storage/innobase/include/buf0lru.h
+++ b/storage/innobase/include/buf0lru.h
@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
// Forward declaration
struct trx_t;
+struct fil_space_t;
/******************************************************************//**
Returns TRUE if less than 25 % of the buffer pool is available. This can be
diff --git a/storage/innobase/include/buf0mtflu.h b/storage/innobase/include/buf0mtflu.h
deleted file mode 100644
index 0475335bbf5..00000000000
--- a/storage/innobase/include/buf0mtflu.h
+++ /dev/null
@@ -1,95 +0,0 @@
-/*****************************************************************************
-
-Copyright (C) 2014 SkySQL Ab. All Rights Reserved.
-Copyright (C) 2014 Fusion-io. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/buf0mtflu.h
-Multi-threadef flush method interface function prototypes
-
-Created 06/02/2014 Jan Lindström jan.lindstrom@skysql.com
- Dhananjoy Das DDas@fusionio.com
-***********************************************************************/
-
-#ifndef buf0mtflu_h
-#define buf0mtflu_h
-
-/******************************************************************//**
-Add exit work item to work queue to signal multi-threded flush
-threads that they should exit.
-*/
-void
-buf_mtflu_io_thread_exit(void);
-/*===========================*/
-
-/******************************************************************//**
-Initialize multi-threaded flush thread syncronization data.
-@return Initialized multi-threaded flush thread syncroniztion data. */
-void*
-buf_mtflu_handler_init(
-/*===================*/
- ulint n_threads, /*!< in: Number of threads to create */
- ulint wrk_cnt); /*!< in: Number of work items */
-
-/******************************************************************//**
-Return true if multi-threaded flush is initialized
-@return true if initialized, false if not */
-bool
-buf_mtflu_init_done(void);
-/*======================*/
-
-/*********************************************************************//**
-Clears up tail of the LRU lists:
-* Put replaceable pages at the tail of LRU to the free list
-* Flush dirty pages at the tail of LRU to the disk
-The depth to which we scan each buffer pool is controlled by dynamic
-config parameter innodb_LRU_scan_depth.
-@return total pages flushed */
-UNIV_INTERN
-ulint
-buf_mtflu_flush_LRU_tail(void);
-/*===========================*/
-
-/*******************************************************************//**
-Multi-threaded version of buf_flush_list
-*/
-bool
-buf_mtflu_flush_list(
-/*=================*/
- ulint min_n, /*!< in: wished minimum mumber of blocks
- flushed (it is not guaranteed that the
- actual number is that big, though) */
- lsn_t lsn_limit, /*!< in the case BUF_FLUSH_LIST all
- blocks whose oldest_modification is
- smaller than this should be flushed
- (if their number does not exceed
- min_n), otherwise ignored */
- ulint* n_processed); /*!< out: the number of pages
- which were processed is passed
- back to caller. Ignored if NULL */
-
-/*********************************************************************//**
-Set correct thread identifiers to io thread array based on
-information we have. */
-void
-buf_mtflu_set_thread_ids(
-/*=====================*/
- ulint n_threads, /*!<in: Number of threads to fill */
- void* ctx, /*!<in: thread context */
- os_thread_id_t* thread_ids); /*!<in: thread id array */
-
-#endif
diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h
index 719699f5ee2..2847e328515 100644
--- a/storage/innobase/include/buf0types.h
+++ b/storage/innobase/include/buf0types.h
@@ -113,7 +113,7 @@ is_checksum_strict(ulint algo)
#define BUF_BUDDY_LOW (1U << BUF_BUDDY_LOW_SHIFT)
/** Actual number of buddy sizes based on current page size */
-#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT)
+#define BUF_BUDDY_SIZES (srv_page_size_shift - BUF_BUDDY_LOW_SHIFT)
/** Maximum number of buddy sizes based on the max page size */
#define BUF_BUDDY_SIZES_MAX (UNIV_PAGE_SIZE_SHIFT_MAX \
@@ -121,7 +121,7 @@ is_checksum_strict(ulint algo)
/** twice the maximum block size of the buddy system;
the underlying memory is aligned by this amount:
-this must be equal to UNIV_PAGE_SIZE */
+this must be equal to srv_page_size */
#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES)
/* @} */
diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h
index b6187d46025..d3361ad8b3b 100644
--- a/storage/innobase/include/data0data.h
+++ b/storage/innobase/include/data0data.h
@@ -591,6 +591,22 @@ struct dfield_t{
@param[in,out] heap memory heap in which the clone will be created
@return the cloned object */
dfield_t* clone(mem_heap_t* heap) const;
+
+ /** @return system field indicates history row */
+ bool vers_history_row() const
+ {
+ ut_ad(type.vers_sys_end());
+ if (type.mtype == DATA_FIXBINARY) {
+ ut_ad(len == sizeof timestamp_max_bytes);
+ return 0 != memcmp(data, timestamp_max_bytes, len);
+ } else {
+ ut_ad(type.mtype == DATA_INT);
+ ut_ad(len == sizeof trx_id_max_bytes);
+ return 0 != memcmp(data, trx_id_max_bytes, len);
+ }
+ ut_ad(0);
+ return false;
+ }
};
/** Structure for an SQL data tuple of fields (logical record) */
@@ -619,6 +635,15 @@ struct dtuple_t {
/** Value of dtuple_t::magic_n */
# define DATA_TUPLE_MAGIC_N 65478679
#endif /* UNIV_DEBUG */
+
+ /** Trim the tail of an index tuple before insert or update.
+ After instant ADD COLUMN, if the last fields of a clustered index tuple
+ match the 'default row', there will be no need to store them.
+ NOTE: A page latch in the index must be held, so that the index
+ may not lose 'instantness' before the trimmed tuple has been
+ inserted or updated.
+ @param[in] index index possibly with instantly added columns */
+ void trim(const dict_index_t& index);
};
/** A slot for a field in a big rec vector */
diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic
index 81788885aa5..310902f5166 100644
--- a/storage/innobase/include/data0data.ic
+++ b/storage/innobase/include/data0data.ic
@@ -94,6 +94,7 @@ dfield_get_len(
ut_ad(field);
ut_ad((field->len == UNIV_SQL_NULL)
|| (field->data != &data_error));
+ ut_ad(field->len != UNIV_SQL_DEFAULT);
return(field->len);
}
@@ -108,6 +109,7 @@ dfield_set_len(
ulint len) /*!< in: length or UNIV_SQL_NULL */
{
ut_ad(field);
+ ut_ad(len != UNIV_SQL_DEFAULT);
#ifdef UNIV_VALGRIND_DEBUG
if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len);
#endif /* UNIV_VALGRIND_DEBUG */
@@ -326,6 +328,7 @@ dfield_data_is_binary_equal(
ulint len, /*!< in: data length or UNIV_SQL_NULL */
const byte* data) /*!< in: data */
{
+ ut_ad(len != UNIV_SQL_DEFAULT);
return(len == dfield_get_len(field)
&& (len == UNIV_SQL_NULL
|| !memcmp(dfield_get_data(field), data, len)));
diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h
index c4521d0723b..b999106fee0 100644
--- a/storage/innobase/include/data0type.h
+++ b/storage/innobase/include/data0type.h
@@ -29,6 +29,12 @@ Created 1/16/1996 Heikki Tuuri
#include "univ.i"
+/** Special length indicating a missing instantly added column */
+#define UNIV_SQL_DEFAULT (UNIV_SQL_NULL - 1)
+
+/** @return whether a length is actually stored in a field */
+#define len_is_stored(len) (len != UNIV_SQL_NULL && len != UNIV_SQL_DEFAULT)
+
extern ulint data_mysql_default_charset_coll;
#define DATA_MYSQL_BINARY_CHARSET_COLL 63
@@ -183,8 +189,12 @@ be less than 256 */
for shorter VARCHARs MySQL uses only 1 byte */
#define DATA_VIRTUAL 8192U /* Virtual column */
-/** Get the number of system columns in a table. */
-#define dict_table_get_n_sys_cols(table) DATA_N_SYS_COLS
+/** System Versioning */
+#define DATA_VERS_START 16384U /* start system field */
+#define DATA_VERS_END 32768U /* end system field */
+/** system-versioned user data column */
+#define DATA_VERSIONED (DATA_VERS_START|DATA_VERS_END)
+
/** Check whether locking is disabled (never). */
#define dict_table_is_locking_disabled(table) false
@@ -355,9 +365,9 @@ dtype_form_prtype(ulint old_prtype, ulint charset_coll)
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
UNIV_INLINE
-ibool
+bool
dtype_is_utf8(
/*==========*/
ulint prtype);/*!< in: precise data type */
@@ -531,8 +541,24 @@ struct dtype_t{
in bytes */
unsigned mbmaxlen:3; /*!< maximum length of a character,
in bytes */
+
+ /** @return whether this is system versioned user field */
+ bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+ /** @return whether this is the system field start */
+ bool vers_sys_start() const
+ {
+ return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+ }
+ /** @return whether this is the system field end */
+ bool vers_sys_end() const
+ {
+ return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+ }
};
+/** The DB_TRX_ID,DB_ROLL_PTR values for "no history is available" */
+extern const byte reset_trx_id[DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
+
#include "data0type.ic"
#endif
diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic
index 59f8c75fd65..56a588562ee 100644
--- a/storage/innobase/include/data0type.ic
+++ b/storage/innobase/include/data0type.ic
@@ -43,9 +43,9 @@ dtype_get_charset_coll(
Determines if a MySQL string type is a subset of UTF-8. This function
may return false negatives, in case further character-set collation
codes are introduced in MySQL later.
-@return TRUE if a subset of UTF-8 */
+@return whether a subset of UTF-8 */
UNIV_INLINE
-ibool
+bool
dtype_is_utf8(
/*==========*/
ulint prtype) /*!< in: precise data type */
@@ -58,10 +58,10 @@ dtype_is_utf8(
case 33: /* utf8_general_ci */
case 83: /* utf8_bin */
case 254: /* utf8_general_cs */
- return(TRUE);
+ return true;
}
- return(FALSE);
+ return false;
}
/*********************************************************************//**
@@ -235,9 +235,8 @@ dtype_new_store_for_order_and_null_size(
ulint prefix_len)/*!< in: prefix length to
replace type->len, or 0 */
{
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+ compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
ulint len;
ut_ad(type);
@@ -280,10 +279,7 @@ dtype_read_for_order_and_null_size(
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
-#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE
-# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
-
+ compile_time_assert(4 == DATA_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0] & 63;
type->prtype = buf[1];
@@ -309,11 +305,7 @@ dtype_new_read_for_order_and_null_size(
dtype_t* type, /*!< in: type struct */
const byte* buf) /*!< in: buffer for stored type order info */
{
- ulint charset_coll;
-
-#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
-#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
-#endif
+ compile_time_assert(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
type->mtype = buf[0] & 63;
type->prtype = buf[1];
@@ -328,7 +320,7 @@ dtype_new_read_for_order_and_null_size(
type->len = mach_read_from_2(buf + 2);
- charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
+ ulint charset_coll = mach_read_from_2(buf + 4) & CHAR_COLL_MASK;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll <= MAX_CHAR_COLL_NUM);
diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h
index d6de7dcf71b..25aced44b2e 100644
--- a/storage/innobase/include/dict0boot.h
+++ b/storage/innobase/include/dict0boot.h
@@ -122,7 +122,7 @@ dict_is_sys_table(
/* The ids for the basic system tables and their indexes */
#define DICT_TABLES_ID 1
#define DICT_COLUMNS_ID 2
-#define DICT_INDEXES_ID 3
+#define DICT_INDEXES_ID dict_index_t::DICT_INDEXES_ID /* 3 */
#define DICT_FIELDS_ID 4
/* The following is a secondary index on SYS_TABLES */
#define DICT_TABLE_IDS_ID 5
diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic
index e40c3f844e3..845a0a3888d 100644
--- a/storage/innobase/include/dict0boot.ic
+++ b/storage/innobase/include/dict0boot.ic
@@ -58,10 +58,7 @@ dict_sys_read_row_id(
/*=================*/
const byte* field) /*!< in: record field */
{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+ compile_time_assert(DATA_ROW_ID_LEN == 6);
return(mach_read_from_6(field));
}
@@ -74,10 +71,7 @@ dict_sys_write_row_id(
byte* field, /*!< in: record field */
row_id_t row_id) /*!< in: row id */
{
-#if DATA_ROW_ID_LEN != 6
-# error "DATA_ROW_ID_LEN != 6"
-#endif
-
+ compile_time_assert(DATA_ROW_ID_LEN == 6);
mach_write_to_6(field, row_id);
}
diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h
index 12c78862261..dc48aa59809 100644
--- a/storage/innobase/include/dict0crea.h
+++ b/storage/innobase/include/dict0crea.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -50,6 +50,7 @@ tab_create_graph_create(
/** Creates an index create graph.
@param[in] index index to create, built as a memory data structure
+@param[in] table table name
@param[in,out] heap heap where created
@param[in] add_v new virtual columns added in the same clause with
add index
@@ -57,8 +58,9 @@ tab_create_graph_create(
ind_node_t*
ind_create_graph_create(
dict_index_t* index,
+ const char* table,
mem_heap_t* heap,
- const dict_add_v_col_t* add_v);
+ const dict_add_v_col_t* add_v = NULL);
/***********************************************************//**
Creates a table. This is a high-level function used in SQL execution graphs.
@@ -68,15 +70,6 @@ dict_create_table_step(
/*===================*/
que_thr_t* thr); /*!< in: query thread */
-/** Builds a tablespace to contain a table, using file-per-table=1.
-@param[in,out] table Table to build in its own tablespace.
-@param[in] node Table create node
-@return DB_SUCCESS or error code */
-dberr_t
-dict_build_tablespace_for_table(
- dict_table_t* table,
- tab_node_t* node);
-
/** Assign a new table ID and put it into the table cache and the transaction.
@param[in,out] table Table that needs an ID
@param[in,out] trx Transaction */
@@ -151,14 +144,6 @@ dict_create_index_tree_in_mem(
dict_index_t* index, /*!< in/out: index */
const trx_t* trx); /*!< in: InnoDB transaction handle */
-/*******************************************************************//**
-Drops the index tree but don't update SYS_INDEXES table. */
-void
-dict_drop_index_tree_in_mem(
-/*========================*/
- const dict_index_t* index, /*!< in: index */
- ulint page_no);/*!< in: index page-no */
-
/****************************************************************//**
Creates the foreign key constraints system tables inside InnoDB
at server bootstrap or server start if they are not found or are
@@ -317,6 +302,7 @@ struct ind_node_t{
dict_index_t* index; /*!< index to create, built as a
memory data structure with
dict_mem_... functions */
+ const char* table_name; /*!< table name */
ins_node_t* ind_def; /*!< child node which does the insert of
the index definition; the row to be
inserted is built by the parent node */
diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
index b5f2b108959..3dcf290a276 100644
--- a/storage/innobase/include/dict0dict.h
+++ b/storage/innobase/include/dict0dict.h
@@ -192,7 +192,7 @@ dict_col_copy_type(
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
@@ -375,15 +375,6 @@ dict_table_add_system_columns(
mem_heap_t* heap) /*!< in: temporary heap */
MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
-Adds a table object to the dictionary cache. */
-void
-dict_table_add_to_cache(
-/*====================*/
- dict_table_t* table, /*!< in: table */
- bool can_be_evicted, /*!< in: whether can be evicted*/
- mem_heap_t* heap) /*!< in: temporary heap */
- MY_ATTRIBUTE((nonnull));
-/**********************************************************************//**
Removes a table object from the dictionary cache. */
void
dict_table_remove_from_cache(
@@ -577,16 +568,6 @@ dict_foreign_find_index(
happened */
MY_ATTRIBUTE((nonnull(1,3), warn_unused_result));
-/**********************************************************************//**
-Returns a column's name.
-@return column name. NOTE: not guaranteed to stay valid if table is
-modified in any way (columns added, etc.). */
-const char*
-dict_table_get_col_name(
-/*====================*/
- const dict_table_t* table, /*!< in: table */
- ulint col_nr) /*!< in: column number */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Returns a virtual column's name.
@param[in] table table object
@@ -889,14 +870,25 @@ dict_table_get_sys_col(
ulint sys) /*!< in: DATA_ROW_ID, ... */
MY_ATTRIBUTE((nonnull, warn_unused_result));
#else /* UNIV_DEBUG */
-#define dict_table_get_nth_col(table, pos) \
-((table)->cols + (pos))
-#define dict_table_get_sys_col(table, sys) \
-((table)->cols + (table)->n_cols + (sys) \
- - (dict_table_get_n_sys_cols(table)))
+#define dict_table_get_nth_col(table, pos) \
+ (&(table)->cols[pos])
+#define dict_table_get_sys_col(table, sys) \
+ (&(table)->cols[(table)->n_cols + (sys) - DATA_N_SYS_COLS])
/* Get nth virtual columns */
-#define dict_table_get_nth_v_col(table, pos) ((table)->v_cols + (pos))
+#define dict_table_get_nth_v_col(table, pos) (&(table)->v_cols[pos])
#endif /* UNIV_DEBUG */
+/** Wrapper function.
+@see dict_col_t::name()
+@param[in] table table
+@param[in] col_nr column number in table
+@return column name */
+inline
+const char*
+dict_table_get_col_name(const dict_table_t* table, ulint col_nr)
+{
+ return(dict_table_get_nth_col(table, col_nr)->name(*table));
+}
+
/********************************************************************//**
Gets the given system column number of a table.
@return column number */
@@ -921,30 +913,21 @@ dict_index_get_min_size(
Check whether the table uses the compact page format.
@return TRUE if table uses the compact page format */
UNIV_INLINE
-ibool
+bool
dict_table_is_comp(
/*===============*/
const dict_table_t* table) /*!< in: table */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
- ulint flags) /*!< in: dict_table_t::flags */
- MY_ATTRIBUTE((warn_unused_result));
+/** Determine if a table uses atomic BLOBs (no locally stored prefix).
+@param[in] table InnoDB table
+@return whether BLOBs are atomic */
+inline
+bool
+dict_table_has_atomic_blobs(const dict_table_t* table)
+{
+ return(DICT_TF_HAS_ATOMIC_BLOBS(table->flags));
+}
/** Set the various values in a dict_table_t::flags pointer.
@param[in,out] flags, Pointer to a 4 byte Table Flags
@@ -952,8 +935,7 @@ dict_tf_get_format(
@param[in] zip_ssize Zip Shift Size
@param[in] use_data_dir Table uses DATA DIRECTORY
@param[in] page_compressed Table uses page compression
-@param[in] page_compression_level Page compression level
-@param[in] not_used For future */
+@param[in] page_compression_level Page compression level */
UNIV_INLINE
void
dict_tf_set(
@@ -962,8 +944,7 @@ dict_tf_set(
ulint zip_ssize,
bool use_data_dir,
bool page_compressed,
- ulint page_compression_level,
- ulint not_used);
+ ulint page_compression_level);
/** Convert a 32 bit integer table flags to the 32 bit FSP Flags.
Fsp Flags are written into the tablespace header at the offset
@@ -999,14 +980,8 @@ ulint
dict_table_extent_size(
const dict_table_t* table);
-/** Get the table page size.
-@param[in] table table
-@return compressed page size, or 0 if not compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
- const dict_table_t* table)
- MY_ATTRIBUTE((warn_unused_result));
+/** Get the table page size. */
+#define dict_table_page_size(table) page_size_t(table->space->flags)
/*********************************************************************//**
Obtain exclusive locks on all index trees of the table. This is to prevent
@@ -1098,51 +1073,32 @@ dict_make_room_in_cache(
ulint max_tables, /*!< in: max tables allowed in cache */
ulint pct_check); /*!< in: max percent to check */
-#define BIG_ROW_SIZE 1024
-
-/** Adds an index to the dictionary cache.
-@param[in] table table on which the index is
-@param[in] index index; NOTE! The index memory
- object is freed in this function!
-@param[in] page_no root page number of the index
-@param[in] strict TRUE=refuse to create the index
- if records could be too big to fit in
- an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache(
- dict_table_t* table,
- dict_index_t* index,
- ulint page_no,
- ibool strict)
- MY_ATTRIBUTE((warn_unused_result));
-
/** Clears the virtual column's index list before index is being freed.
@param[in] index Index being freed */
-void
-dict_index_remove_from_v_col_list(
- dict_index_t* index);
+void dict_index_remove_from_v_col_list(dict_index_t* index);
/** Adds an index to the dictionary cache, with possible indexing newly
added column.
-@param[in] table table on which the index is
@param[in] index index; NOTE! The index memory
object is freed in this function!
-@param[in] add_v new virtual column that being added along with
- an add index call
@param[in] page_no root page number of the index
-@param[in] strict TRUE=refuse to create the index
+@param[in] strict true=refuse to create the index
if records could be too big to fit in
an B-tree page
-@return DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION */
-dberr_t
-dict_index_add_to_cache_w_vcol(
- dict_table_t* table,
+@param[out] err DB_SUCCESS, DB_TOO_BIG_RECORD, or DB_CORRUPTION
+@param[in] add_v new virtual column that being added along with
+ an add index call
+@return the added index
+@retval NULL on error */
+dict_index_t*
+dict_index_add_to_cache(
dict_index_t* index,
- const dict_add_v_col_t* add_v,
ulint page_no,
- ibool strict)
- MY_ATTRIBUTE((warn_unused_result));
+ bool strict = false,
+ dberr_t* err = NULL,
+ const dict_add_v_col_t* add_v = NULL)
+ MY_ATTRIBUTE((nonnull(1)));
+
/********************************************************************//**
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system.
@@ -1155,6 +1111,7 @@ dict_index_get_n_fields(
representation of index (in
the dictionary cache) */
MY_ATTRIBUTE((nonnull, warn_unused_result));
+
/********************************************************************//**
Gets the number of fields in the internal representation of an index
that uniquely determine the position of an index entry in the index, if
@@ -1281,7 +1238,7 @@ Returns TRUE if the index contains a column or a prefix of that column.
@param[in] n column number
@param[in] is_virtual whether it is a virtual col
@return TRUE if contains the column or its prefix */
-ibool
+bool
dict_index_contains_col_or_prefix(
/*==============================*/
const dict_index_t* index, /*!< in: index */
@@ -1443,42 +1400,15 @@ dict_index_copy_rec_order_prefix(
@param[in,out] heap memory heap for allocation
@return own: data tuple */
dtuple_t*
-dict_index_build_data_tuple_func(
+dict_index_build_data_tuple(
const rec_t* rec,
const dict_index_t* index,
-#ifdef UNIV_DEBUG
bool leaf,
-#endif /* UNIV_DEBUG */
ulint n_fields,
mem_heap_t* heap)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-#ifdef UNIV_DEBUG
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap) \
- dict_index_build_data_tuple_func(rec, index, leaf, n_fields, heap)
-#else /* UNIV_DEBUG */
-# define dict_index_build_data_tuple(rec, index, leaf, n_fields, heap) \
- dict_index_build_data_tuple_func(rec, index, n_fields, heap)
-#endif /* UNIV_DEBUG */
/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
- MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space) /*!< in: space id */
- MY_ATTRIBUTE((nonnull));
-/*********************************************************************//**
Gets the page number of the root of the index tree.
@return page number */
UNIV_INLINE
@@ -1860,18 +1790,10 @@ dict_set_corrupted_index_cache_only(
Flags a table with specified space_id corrupted in the table dictionary
cache.
@return TRUE if successful */
-ibool
-dict_set_corrupted_by_space(
-/*========================*/
- ulint space_id); /*!< in: space ID */
+bool dict_set_corrupted_by_space(const fil_space_t* space);
-/** Flag a table with specified space_id encrypted in the data dictionary
-cache
-@param[in] space_id Tablespace id */
-UNIV_INTERN
-void
-dict_set_encrypted_by_space(
- ulint space_id);
+/** Flag a table encrypted in the data dictionary cache. */
+void dict_set_encrypted_by_space(const fil_space_t* space);
/** Sets merge_threshold in the SYS_INDEXES
@param[in,out] index index
@@ -1908,18 +1830,6 @@ dict_tf2_is_valid(
ulint flags,
ulint flags2);
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
- MY_ATTRIBUTE((warn_unused_result));
-
-#define dict_table_is_temporary(table) (table)->is_temporary()
-
/*********************************************************************//**
This function should be called whenever a page is successfully
compressed. Updates the compression padding information. */
@@ -1953,8 +1863,6 @@ dict_tf_to_row_format_string(
/*=========================*/
ulint table_flag); /*!< in: row format setting */
-#define dict_col_is_virtual(col) (col)->is_virtual()
-
/** encode number of columns and number of virtual columns in one
4 bytes value. We could do this because the number of columns in
InnoDB is limited to 1017
diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
index fe2f8e32b1a..3bcd1abfbbf 100644
--- a/storage/innobase/include/dict0dict.ic
+++ b/storage/innobase/include/dict0dict.ic
@@ -270,7 +270,6 @@ dict_index_is_clust(
const dict_index_t* index) /*!< in: index */
{
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
return(index->type & DICT_CLUSTERED);
}
@@ -312,7 +311,7 @@ dict_index_is_spatial(
ut_ad(index);
ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
- return(index->type & DICT_SPATIAL);
+ return ulint(UNIV_EXPECT(index->type & DICT_SPATIAL, 0));
}
/********************************************************************//**
@@ -356,8 +355,10 @@ dict_table_get_n_user_cols(
const dict_table_t* table) /*!< in: table */
{
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(table->n_cols - dict_table_get_n_sys_cols(table));
+ /* n_cols counts stored columns only. A table may contain
+ virtual columns and no user-specified stored columns at all. */
+ ut_ad(table->n_cols >= DATA_N_SYS_COLS);
+ return unsigned(table->n_cols) - DATA_N_SYS_COLS;
}
/********************************************************************//**
@@ -489,8 +490,8 @@ dict_table_get_nth_v_col(
ut_ad(table);
ut_ad(pos < table->n_v_def);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- return(static_cast<dict_v_col_t*>(table->v_cols) + pos);
+ ut_ad(!table->v_cols[pos].m_col.is_instant());
+ return &table->v_cols[pos];
}
/********************************************************************//**
@@ -504,14 +505,8 @@ dict_table_get_sys_col(
ulint sys) /*!< in: DATA_ROW_ID, ... */
{
dict_col_t* col;
-
- ut_ad(table);
- ut_ad(sys < dict_table_get_n_sys_cols(table));
- ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
-
- col = dict_table_get_nth_col(table, table->n_cols
- - dict_table_get_n_sys_cols(table)
- + sys);
+ col = dict_table_get_nth_col(table,
+ dict_table_get_sys_col_no(table, sys));
ut_ad(col->mtype == DATA_SYS);
ut_ad(col->prtype == (sys | DATA_NOT_NULL));
@@ -530,28 +525,23 @@ dict_table_get_sys_col_no(
ulint sys) /*!< in: DATA_ROW_ID, ... */
{
ut_ad(table);
- ut_ad(sys < dict_table_get_n_sys_cols(table));
+ ut_ad(sys < DATA_N_SYS_COLS);
ut_ad(table->magic_n == DICT_TABLE_MAGIC_N);
- return(table->n_cols - dict_table_get_n_sys_cols(table) + sys);
+ return unsigned(table->n_cols) + (sys - DATA_N_SYS_COLS);
}
/********************************************************************//**
Check whether the table uses the compact page format.
@return TRUE if table uses the compact page format */
UNIV_INLINE
-ibool
+bool
dict_table_is_comp(
/*===============*/
const dict_table_t* table) /*!< in: table */
{
ut_ad(table);
-
-#if DICT_TF_COMPACT != 1
-#error "DICT_TF_COMPACT must be 1"
-#endif
-
- return(table->flags & DICT_TF_COMPACT);
+ return (table->flags & DICT_TF_COMPACT) != 0;
}
/************************************************************************
@@ -586,8 +576,8 @@ dict_tf_is_valid_not_redundant(ulint flags)
for the uncompressed page format */
return(false);
} else if (zip_ssize > PAGE_ZIP_SSIZE_MAX
- || zip_ssize > UNIV_PAGE_SIZE_SHIFT
- || UNIV_PAGE_SIZE_SHIFT > UNIV_ZIP_SIZE_SHIFT_MAX) {
+ || zip_ssize > srv_page_size_shift
+ || srv_page_size_shift > UNIV_ZIP_SIZE_SHIFT_MAX) {
/* KEY_BLOCK_SIZE is out of bounds, or
ROW_FORMAT=COMPRESSED is not supported with this
innodb_page_size (only up to 16KiB) */
@@ -627,7 +617,7 @@ dict_tf_is_valid(
bit. For ROW_FORMAT=REDUNDANT, only the DATA_DIR flag
(which we cleared above) can be set. If any other flags
are set, the flags are invalid. */
- return(flags == 0);
+ return(flags == 0 || flags == DICT_TF_MASK_NO_ROLLBACK);
}
return(dict_tf_is_valid_not_redundant(flags));
@@ -683,44 +673,13 @@ dict_tf_get_rec_format(
return(REC_FORMAT_DYNAMIC);
}
-/********************************************************************//**
-Determine the file format from a dict_table_t::flags.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_tf_get_format(
-/*===============*/
- ulint flags) /*!< in: dict_table_t::flags */
-{
- if (DICT_TF_HAS_ATOMIC_BLOBS(flags)) {
- return(UNIV_FORMAT_B);
- }
-
- return(UNIV_FORMAT_A);
-}
-
-/********************************************************************//**
-Determine the file format of a table.
-@return file format version */
-UNIV_INLINE
-ulint
-dict_table_get_format(
-/*==================*/
- const dict_table_t* table) /*!< in: table */
-{
- ut_ad(table);
-
- return(dict_tf_get_format(table->flags));
-}
-
/** Set the various values in a dict_table_t::flags pointer.
@param[in,out] flags, Pointer to a 4 byte Table Flags
@param[in] format File Format
@param[in] zip_ssize Zip Shift Size
@param[in] use_data_dir Table uses DATA DIRECTORY
@param[in] page_compressed Table uses page compression
-@param[in] page_compression_level Page compression level
-@param[in] not_used For future */
+@param[in] page_compression_level Page compression level */
UNIV_INLINE
void
dict_tf_set(
@@ -730,8 +689,7 @@ dict_tf_set(
ulint zip_ssize,
bool use_data_dir,
bool page_compressed,
- ulint page_compression_level,
- ulint not_used)
+ ulint page_compression_level)
{
switch (format) {
case REC_FORMAT_REDUNDANT:
@@ -848,7 +806,8 @@ dict_tf_to_sys_tables_type(
| DICT_TF_MASK_ATOMIC_BLOBS
| DICT_TF_MASK_DATA_DIR
| DICT_TF_MASK_PAGE_COMPRESSION
- | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL);
+ | DICT_TF_MASK_PAGE_COMPRESSION_LEVEL
+ | DICT_TF_MASK_NO_ROLLBACK);
return(type);
}
@@ -872,21 +831,7 @@ dict_tf_get_page_size(
ut_ad(zip_size <= UNIV_ZIP_SIZE_MAX);
- return(page_size_t(zip_size, univ_page_size.logical(), true));
-}
-
-/** Get the table page size.
-@param[in] table table
-@return a structure containing the compressed and uncompressed
-page sizes and a boolean indicating if the page is compressed */
-UNIV_INLINE
-const page_size_t
-dict_table_page_size(
- const dict_table_t* table)
-{
- ut_ad(table != NULL);
-
- return(dict_tf_get_page_size(table->flags));
+ return(page_size_t(zip_size, srv_page_size, true));
}
/*********************************************************************//**
@@ -1177,36 +1122,6 @@ dict_index_get_min_size(
}
/*********************************************************************//**
-Gets the space id of the root of the index tree.
-@return space id */
-UNIV_INLINE
-ulint
-dict_index_get_space(
-/*=================*/
- const dict_index_t* index) /*!< in: index */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- return(index->space);
-}
-
-/*********************************************************************//**
-Sets the space id of the root of the index tree. */
-UNIV_INLINE
-void
-dict_index_set_space(
-/*=================*/
- dict_index_t* index, /*!< in/out: index */
- ulint space) /*!< in: space id */
-{
- ut_ad(index);
- ut_ad(index->magic_n == DICT_INDEX_MAGIC_N);
-
- index->space = unsigned(space);
-}
-
-/*********************************************************************//**
Gets the page number of the root of the index tree.
@return page number */
UNIV_INLINE
@@ -1246,7 +1161,7 @@ ulint
dict_index_get_space_reserve(void)
/*==============================*/
{
- return(UNIV_PAGE_SIZE / 16);
+ return(srv_page_size / 16);
}
/********************************************************************//**
@@ -1376,7 +1291,7 @@ dict_table_is_fts_column(
/**********************************************************************//**
Determine bytes of column prefix to be stored in the undo log. Please
-note if the table format is UNIV_FORMAT_A (< UNIV_FORMAT_B), no prefix
+note that if !dict_table_has_atomic_blobs(table), no prefix
needs to be stored in the undo log.
@return bytes of column prefix to be stored in the undo log */
UNIV_INLINE
@@ -1387,16 +1302,15 @@ dict_max_field_len_store_undo(
const dict_col_t* col) /*!< in: column which index prefix
is based on */
{
- ulint prefix_len = 0;
+ if (!dict_table_has_atomic_blobs(table)) {
+ return(0);
+ }
- if (dict_table_get_format(table) >= UNIV_FORMAT_B)
- {
- prefix_len = col->max_prefix
- ? col->max_prefix
- : DICT_MAX_FIELD_LEN_BY_FORMAT(table);
+ if (col->max_prefix != 0) {
+ return(col->max_prefix);
}
- return(prefix_len);
+ return(REC_VERSION_56_MAX_INDEX_COL_LEN);
}
/** Determine maximum bytes of a virtual column need to be stored
@@ -1416,10 +1330,10 @@ dict_max_v_field_len_store_undo(
/* This calculation conforms to the non-virtual column
maximum log length calculation:
- 1) for UNIV_FORMAT_A, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
- for UNIV_FORMAT_B, upto col->max_prefix or
- 2) REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
- if (dict_table_get_format(table) >= UNIV_FORMAT_B) {
+ 1) if No atomic BLOB, upto REC_ANTELOPE_MAX_INDEX_COL_LEN
+ 2) if atomic BLOB, upto col->max_prefix or
+ REC_VERSION_56_MAX_INDEX_COL_LEN, whichever is less */
+ if (dict_table_has_atomic_blobs(table)) {
if (DATA_BIG_COL(col) && col->max_prefix > 0) {
max_log_len = col->max_prefix;
} else {
@@ -1462,18 +1376,6 @@ dict_table_is_corrupted(
return(table->corrupted);
}
-/********************************************************************//**
-Check if the tablespace for the table has been discarded.
-@return true if the tablespace has been discarded. */
-UNIV_INLINE
-bool
-dict_table_is_discarded(
-/*====================*/
- const dict_table_t* table) /*!< in: table to check */
-{
- return(DICT_TF2_FLAG_IS_SET(table, DICT_TF2_DISCARDED));
-}
-
/** Check if the table is found is a file_per_table tablespace.
This test does not use table flags2 since some REDUNDANT tables in the
system tablespace may have garbage in the MIX_LEN field where flags2 is
@@ -1495,7 +1397,8 @@ bool
dict_table_is_file_per_table(
const dict_table_t* table) /*!< in: table to check */
{
- return !is_system_tablespace(table->space);
+ return table->space != fil_system.sys_space
+ && table->space != fil_system.temp_space;
}
/** Acquire the table handle. */
diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h
index 9ba42007568..9b798353afd 100644
--- a/storage/innobase/include/dict0load.h
+++ b/storage/innobase/include/dict0load.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -57,15 +57,6 @@ enum dict_system_id_t {
SYS_NUM_SYSTEM_TABLES
};
-/** Status bit for dict_process_sys_tables_rec_and_mtr_commit() */
-enum dict_table_info_t {
- DICT_TABLE_LOAD_FROM_RECORD = 0,/*!< Directly populate a dict_table_t
- structure with information from
- a SYS_TABLES record */
- DICT_TABLE_LOAD_FROM_CACHE = 1 /*!< Check first whether dict_table_t
- is in the cache, if so, return it */
-};
-
/** Check each tablespace found in the data dictionary.
Look at each table defined in SYS_TABLES that has a space_id > 0.
If the tablespace is not yet in the fil_system cache, look up the
@@ -201,10 +192,7 @@ dict_process_sys_tables_rec_and_mtr_commit(
mem_heap_t* heap, /*!< in: temporary memory heap */
const rec_t* rec, /*!< in: SYS_TABLES record */
dict_table_t** table, /*!< out: dict_table_t to fill */
- dict_table_info_t status, /*!< in: status bit controls
- options such as whether we shall
- look for dict_table_t from cache
- first */
+ bool cached, /*!< in: whether to load from cache */
mtr_t* mtr); /*!< in/out: mini-transaction,
will be committed */
/********************************************************************//**
@@ -245,7 +233,6 @@ information
@return error message, or NULL on success */
const char*
dict_process_sys_virtual_rec(
- mem_heap_t* heap,
const rec_t* rec,
table_id_t* table_id,
ulint* pos,
diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
index fc120149c5f..3e06def55b0 100644
--- a/storage/innobase/include/dict0mem.h
+++ b/storage/innobase/include/dict0mem.h
@@ -49,7 +49,6 @@ Created 1/8/1996 Heikki Tuuri
#include "os0once.h"
#include "ut0new.h"
#include "fil0fil.h"
-#include <my_crypt.h>
#include "fil0crypt.h"
#include <set>
#include <algorithm>
@@ -110,7 +109,7 @@ are described in fsp0fsp.h. */
/** dict_table_t::flags bit 0 is equal to 0 if the row format = Redundant */
#define DICT_TF_REDUNDANT 0 /*!< Redundant row format. */
/** dict_table_t::flags bit 0 is equal to 1 if the row format = Compact */
-#define DICT_TF_COMPACT 1 /*!< Compact row format. */
+#define DICT_TF_COMPACT 1U /*!< Compact row format. */
/** This bitmask is used in SYS_TABLES.N_COLS to set and test whether
the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
@@ -122,9 +121,10 @@ the Compact page format is used, i.e ROW_FORMAT != REDUNDANT */
/** Width of the ZIP_SSIZE flag */
#define DICT_TF_WIDTH_ZIP_SSIZE 4
-/** Width of the ATOMIC_BLOBS flag. The Antelope file formats broke up
-BLOB and TEXT fields, storing the first 768 bytes in the clustered index.
-Barracuda row formats store the whole blob or text field off-page atomically.
+/** Width of the ATOMIC_BLOBS flag. The ROW_FORMAT=REDUNDANT and
+ROW_FORMAT=COMPACT broke up BLOB and TEXT fields, storing the first 768 bytes
+in the clustered index. ROW_FORMAT=DYNAMIC and ROW_FORMAT=COMPRESSED
+store the whole blob or text field off-page atomically.
Secondary indexes are created from this external data using row_ext_t
to cache the BLOB prefixes. */
#define DICT_TF_WIDTH_ATOMIC_BLOBS 1
@@ -142,10 +142,10 @@ Width of the page compression flag
#define DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL 4
/**
-Width of atomic writes flag
-DEFAULT=0, ON = 1, OFF = 2
+The NO_ROLLBACK flag (3=yes; the values 1,2 used stand for
+ATOMIC_WRITES=ON and ATOMIC_WRITES=OFF between MariaDB 10.1.0 and 10.2.3)
*/
-#define DICT_TF_WIDTH_ATOMIC_WRITES 2
+#define DICT_TF_WIDTH_NO_ROLLBACK 2
/** Width of all the currently known table flags */
#define DICT_TF_BITS (DICT_TF_WIDTH_COMPACT \
@@ -153,7 +153,8 @@ DEFAULT=0, ON = 1, OFF = 2
+ DICT_TF_WIDTH_ATOMIC_BLOBS \
+ DICT_TF_WIDTH_DATA_DIR \
+ DICT_TF_WIDTH_PAGE_COMPRESSION \
- + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
+ + DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL \
+ + DICT_TF_WIDTH_NO_ROLLBACK)
/** Zero relative shift position of the COMPACT field */
#define DICT_TF_POS_COMPACT 0
@@ -172,11 +173,11 @@ DEFAULT=0, ON = 1, OFF = 2
/** Zero relative shift position of the PAGE_COMPRESSION_LEVEL field */
#define DICT_TF_POS_PAGE_COMPRESSION_LEVEL (DICT_TF_POS_PAGE_COMPRESSION \
+ DICT_TF_WIDTH_PAGE_COMPRESSION)
-/** Zero relative shift position of the ATOMIC_WRITES field */
-#define DICT_TF_POS_ATOMIC_WRITES (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+/** Zero relative shift position of the NO_ROLLBACK field */
+#define DICT_TF_POS_NO_ROLLBACK (DICT_TF_POS_PAGE_COMPRESSION_LEVEL \
+ DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)
-#define DICT_TF_POS_UNUSED (DICT_TF_POS_ATOMIC_WRITES \
- + DICT_TF_WIDTH_ATOMIC_WRITES)
+#define DICT_TF_POS_UNUSED (DICT_TF_POS_NO_ROLLBACK \
+ + DICT_TF_WIDTH_NO_ROLLBACK)
/** Bit mask of the COMPACT field */
#define DICT_TF_MASK_COMPACT \
@@ -202,10 +203,10 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_MASK_PAGE_COMPRESSION_LEVEL \
((~(~0U << DICT_TF_WIDTH_PAGE_COMPRESSION_LEVEL)) \
<< DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Bit mask of the ATOMIC_WRITES field */
-#define DICT_TF_MASK_ATOMIC_WRITES \
- ((~(~0U << DICT_TF_WIDTH_ATOMIC_WRITES)) \
- << DICT_TF_POS_ATOMIC_WRITES)
+/** Bit mask of the NO_ROLLBACK field */
+#define DICT_TF_MASK_NO_ROLLBACK \
+ ((~(~0U << DICT_TF_WIDTH_NO_ROLLBACK)) \
+ << DICT_TF_POS_NO_ROLLBACK)
/** Return the value of the COMPACT field */
#define DICT_TF_GET_COMPACT(flags) \
@@ -231,10 +232,6 @@ DEFAULT=0, ON = 1, OFF = 2
#define DICT_TF_GET_PAGE_COMPRESSION_LEVEL(flags) \
((flags & DICT_TF_MASK_PAGE_COMPRESSION_LEVEL) \
>> DICT_TF_POS_PAGE_COMPRESSION_LEVEL)
-/** Return the value of the ATOMIC_WRITES field */
-#define DICT_TF_GET_ATOMIC_WRITES(flags) \
- ((flags & DICT_TF_MASK_ATOMIC_WRITES) \
- >> DICT_TF_POS_ATOMIC_WRITES)
/* @} */
@@ -309,22 +306,13 @@ dict_table_t*
dict_mem_table_create(
/*==================*/
const char* name, /*!< in: table name */
- ulint space, /*!< in: space where the clustered index
- of the table is placed */
+ fil_space_t* space, /*!< in: tablespace */
ulint n_cols, /*!< in: total number of columns
including virtual and non-virtual
columns */
ulint n_v_cols, /*!< in: number of virtual columns */
ulint flags, /*!< in: table flags */
ulint flags2); /*!< in: table flags2 */
-/**********************************************************************//**
-Determines if a table belongs to a system database
-@return */
-UNIV_INTERN
-bool
-dict_mem_table_is_system(
-/*==================*/
- char *name); /*!< in: table name */
/****************************************************************//**
Free a table memory object. */
void
@@ -408,11 +396,7 @@ dict_mem_fill_index_struct(
/*=======================*/
dict_index_t* index, /*!< out: index to be filled */
mem_heap_t* heap, /*!< in: memory heap */
- const char* table_name, /*!< in: table name */
const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /*!< in: number of fields */
@@ -422,11 +406,8 @@ Creates an index memory object.
dict_index_t*
dict_mem_index_create(
/*==================*/
- const char* table_name, /*!< in: table name */
+ dict_table_t* table, /*!< in: table */
const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields); /*!< in: number of fields */
@@ -563,36 +544,6 @@ private:
const char* m_name;
};
-/** Table name wrapper for pretty-printing */
-struct table_name_t
-{
- /** The name in internal representation */
- char* m_name;
-
- /** @return the end of the schema name */
- const char* dbend() const
- {
- const char* sep = strchr(m_name, '/');
- ut_ad(sep);
- return sep;
- }
-
- /** @return the length of the schema name, in bytes */
- size_t dblen() const { return dbend() - m_name; }
-
- /** Determine the filename-safe encoded table name.
- @return the filename-safe encoded table name */
- const char* basename() const { return dbend() + 1; }
-
- /** The start of the table basename suffix for partitioned tables */
- static const char part_suffix[4];
-
- /** Determine the partition or subpartition name suffix.
- @return the partition name
- @retval NULL if the table is not partitioned */
- const char* part() const { return strstr(basename(), part_suffix); }
-};
-
/** Data structure for a column in a table */
struct dict_col_t{
/*----------------------*/
@@ -634,14 +585,74 @@ struct dict_col_t{
of an index */
unsigned max_prefix:12; /*!< maximum index prefix length on
this column. Our current max limit is
- 3072 for Barracuda table */
-
- /** @return whether this is a virtual column */
- bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+ 3072 (REC_VERSION_56_MAX_INDEX_COL_LEN)
+ bytes. */
/** Detach the column from an index.
@param[in] index index to be detached from */
inline void detach(const dict_index_t& index);
+
+ /** Data for instantly added columns */
+ struct def_t {
+ /** original default value of instantly added column */
+ const void* data;
+ /** len of data, or UNIV_SQL_DEFAULT if unavailable */
+ ulint len;
+ } def_val;
+
+ /** Retrieve the column name.
+ @param[in] table table name */
+ const char* name(const dict_table_t& table) const;
+
+ /** @return whether this is a virtual column */
+ bool is_virtual() const { return prtype & DATA_VIRTUAL; }
+ /** @return whether NULL is an allowed value for this column */
+ bool is_nullable() const { return !(prtype & DATA_NOT_NULL); }
+
+ /** @return whether table of this system field is TRX_ID-based */
+ bool vers_native() const
+ {
+ ut_ad(vers_sys_start() || vers_sys_end());
+ ut_ad(mtype == DATA_INT || mtype == DATA_FIXBINARY);
+ return mtype == DATA_INT;
+ }
+ /** @return whether this is system versioned */
+ bool is_versioned() const { return !(~prtype & DATA_VERSIONED); }
+ /** @return whether this is the system version start */
+ bool vers_sys_start() const
+ {
+ return (prtype & DATA_VERSIONED) == DATA_VERS_START;
+ }
+ /** @return whether this is the system version end */
+ bool vers_sys_end() const
+ {
+ return (prtype & DATA_VERSIONED) == DATA_VERS_END;
+ }
+
+ /** @return whether this is an instantly-added column */
+ bool is_instant() const
+ {
+ DBUG_ASSERT(def_val.len != UNIV_SQL_DEFAULT || !def_val.data);
+ return def_val.len != UNIV_SQL_DEFAULT;
+ }
+ /** Get the default value of an instantly-added column.
+ @param[out] len value length (in bytes), or UNIV_SQL_NULL
+ @return default value
+ @retval NULL if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+ const byte* instant_value(ulint* len) const
+ {
+ DBUG_ASSERT(is_instant());
+ *len = def_val.len;
+ return static_cast<const byte*>(def_val.data);
+ }
+
+ /** Remove the 'instant ADD' status of the column */
+ void remove_instant()
+ {
+ DBUG_ASSERT(is_instant());
+ def_val.len = UNIV_SQL_DEFAULT;
+ def_val.data = NULL;
+ }
};
/** Index information put in a list of virtual column structure. Index
@@ -653,6 +664,9 @@ struct dict_v_idx_t {
/** position in this index */
ulint nth_field;
+
+ dict_v_idx_t(dict_index_t* index, ulint nth_field)
+ : index(index), nth_field(nth_field) {}
};
/** Index list to put in dict_v_col_t */
@@ -722,17 +736,17 @@ files would be at risk! */
/** Find out maximum indexed column length by its table format.
For ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT, the maximum
field length is REC_ANTELOPE_MAX_INDEX_COL_LEN - 1 (767). For
-Barracuda row formats COMPRESSED and DYNAMIC, the length could
+ROW_FORMAT=COMPRESSED and ROW_FORMAT=DYNAMIC, the length could
be REC_VERSION_56_MAX_INDEX_COL_LEN (3072) bytes */
-#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
- ((dict_table_get_format(table) < UNIV_FORMAT_B) \
- ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
- : REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT(table) \
+ (dict_table_has_atomic_blobs(table) \
+ ? REC_VERSION_56_MAX_INDEX_COL_LEN \
+ : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
-#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
- ((DICT_TF_HAS_ATOMIC_BLOBS(flags) < UNIV_FORMAT_B) \
- ? (REC_ANTELOPE_MAX_INDEX_COL_LEN - 1) \
- : REC_VERSION_56_MAX_INDEX_COL_LEN)
+#define DICT_MAX_FIELD_LEN_BY_FORMAT_FLAG(flags) \
+ (DICT_TF_HAS_ATOMIC_BLOBS(flags) \
+ ? REC_VERSION_56_MAX_INDEX_COL_LEN \
+ : REC_ANTELOPE_MAX_INDEX_COL_LEN - 1)
/** Defines the maximum fixed length column size */
#define DICT_MAX_FIXED_COL_LEN DICT_ANTELOPE_MAX_INDEX_COL_LEN
@@ -759,6 +773,15 @@ struct dict_field_t{
/** Zero-initialize all fields */
dict_field_t() : col(NULL), name(NULL), prefix_len(0), fixed_len(0) {}
+
+ /** Check whether two index fields are equivalent.
+ @param[in] old the other index field
+ @return whether the index fields are equivalent */
+ bool same(const dict_field_t& other) const
+ {
+ return(prefix_len == other.prefix_len
+ && fixed_len == other.fixed_len);
+ }
};
/**********************************************************************//**
@@ -834,10 +857,7 @@ struct dict_index_t{
index_id_t id; /*!< id of the index */
mem_heap_t* heap; /*!< memory heap */
id_name_t name; /*!< index name */
- const char* table_name;/*!< table name */
dict_table_t* table; /*!< back pointer to table */
- unsigned space:32;
- /*!< space where the index tree is placed */
unsigned page:32;/*!< index tree root page number */
unsigned merge_threshold:6;
/*!< In the pessimistic delete, if the page
@@ -853,8 +873,8 @@ struct dict_index_t{
in a clustered index record, if the fields
before it are known to be of a fixed size,
0 otherwise */
-#if (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
-# error (1<<MAX_KEY_LENGTH_BITS) < MAX_KEY_LENGTH
+#if (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
+# error (1<<MAX_KEY_LENGTH_BITS) < HA_MAX_KEY_LENGTH
#endif
unsigned n_user_defined_cols:10;
/*!< number of columns the user defined to
@@ -877,6 +897,17 @@ struct dict_index_t{
unsigned n_def:10;/*!< number of fields defined so far */
unsigned n_fields:10;/*!< number of fields in the index */
unsigned n_nullable:10;/*!< number of nullable fields */
+ unsigned n_core_fields:10;/*!< number of fields in the index
+ (before the first time of instant add columns) */
+ /** number of bytes of null bits in ROW_FORMAT!=REDUNDANT node pointer
+ records; usually equal to UT_BITS_IN_BYTES(n_nullable), but
+ can be less in clustered indexes with instant ADD COLUMN */
+ unsigned n_core_null_bytes:8;
+ /** magic value signalling that n_core_null_bytes was not
+ initialized yet */
+ static const unsigned NO_CORE_NULL_BYTES = 0xff;
+ /** The clustered index ID of the hard-coded SYS_INDEXES table. */
+ static const unsigned DICT_INDEXES_ID = 3;
unsigned cached:1;/*!< TRUE if the index object is in the
dictionary cache */
unsigned to_be_dropped:1;
@@ -1000,6 +1031,10 @@ struct dict_index_t{
uncommitted = !committed;
}
+ /** Notify that the index pages are going to be modified.
+ @param[in,out] mtr mini-transaction */
+ inline void set_modified(mtr_t& mtr) const;
+
/** @return whether this index is readable
@retval true normally
@retval false if this is a single-table tablespace
@@ -1007,6 +1042,9 @@ struct dict_index_t{
page cannot be read or decrypted */
inline bool is_readable() const;
+ /** @return whether instant ADD COLUMN is in effect */
+ inline bool is_instant() const;
+
/** @return whether the index is the primary key index
(not the clustered index of the change buffer) */
bool is_primary() const
@@ -1031,6 +1069,69 @@ struct dict_index_t{
n_fields = 0;
}
}
+
+ /** Determine how many fields of a given prefix can be set NULL.
+ @param[in] n_prefix number of fields in the prefix
+ @return number of fields 0..n_prefix-1 that can be set NULL */
+ unsigned get_n_nullable(ulint n_prefix) const
+ {
+ DBUG_ASSERT(n_prefix > 0);
+ DBUG_ASSERT(n_prefix <= n_fields);
+ unsigned n = n_nullable;
+ for (; n_prefix < n_fields; n_prefix++) {
+ const dict_col_t* col = fields[n_prefix].col;
+ DBUG_ASSERT(!col->is_virtual());
+ n -= col->is_nullable();
+ }
+ DBUG_ASSERT(n < n_def);
+ return n;
+ }
+
+ /** Get the default value of an instantly-added clustered index field.
+ @param[in] n instantly added field position
+ @param[out] len value length (in bytes), or UNIV_SQL_NULL
+ @return default value
+ @retval NULL if the default value is SQL NULL (len=UNIV_SQL_NULL) */
+ const byte* instant_field_value(ulint n, ulint* len) const
+ {
+ DBUG_ASSERT(is_instant() || id == DICT_INDEXES_ID);
+ DBUG_ASSERT(n + (id == DICT_INDEXES_ID) >= n_core_fields);
+ DBUG_ASSERT(n < n_fields);
+ return fields[n].col->instant_value(len);
+ }
+
+ /** Adjust clustered index metadata for instant ADD COLUMN.
+ @param[in] clustered index definition after instant ADD COLUMN */
+ void instant_add_field(const dict_index_t& instant);
+
+ /** Remove the 'instant ADD' status of a clustered index.
+ Protected by index root page x-latch or table X-lock. */
+ void remove_instant()
+ {
+ DBUG_ASSERT(is_primary());
+ if (!is_instant()) {
+ return;
+ }
+ for (unsigned i = n_core_fields; i < n_fields; i++) {
+ fields[i].col->remove_instant();
+ }
+ n_core_fields = n_fields;
+ n_core_null_bytes = UT_BITS_IN_BYTES(unsigned(n_nullable));
+ }
+
+ /** Check if record in clustered index is historical row.
+ @param[in] rec clustered row
+ @param[in] offsets offsets
+ @return true if row is historical */
+ bool
+ vers_history_row(const rec_t* rec, const ulint* offsets);
+
+ /** Check if record in secondary index is historical row.
+ @param[in] rec record in a secondary index
+ @param[out] history_row true if row is historical
+ @return true on error */
+ bool
+ vers_history_row(const rec_t* rec, bool &history_row);
};
/** Detach a column from an index.
@@ -1396,6 +1497,11 @@ struct dict_table_t {
@return whether the last handle was released */
inline bool release();
+ /** @return whether the table supports transactions */
+ bool no_rollback() const
+ {
+ return !(~unsigned(flags) & DICT_TF_MASK_NO_ROLLBACK);
+ }
/** @return whether this is a temporary table */
bool is_temporary() const
{
@@ -1409,9 +1515,66 @@ struct dict_table_t {
page cannot be read or decrypted */
bool is_readable() const
{
+ ut_ad(file_unreadable || space);
return(UNIV_LIKELY(!file_unreadable));
}
+ /** @return whether instant ADD COLUMN is in effect */
+ bool is_instant() const
+ {
+ return(UT_LIST_GET_FIRST(indexes)->is_instant());
+ }
+
+ /** @return whether the table supports instant ADD COLUMN */
+ bool supports_instant() const
+ {
+ return(!(flags & DICT_TF_MASK_ZIP_SSIZE));
+ }
+
+ /** Adjust metadata for instant ADD COLUMN.
+ @param[in] table table definition after instant ADD COLUMN */
+ void instant_add_column(const dict_table_t& table);
+
+ /** Roll back instant_add_column().
+ @param[in] old_n_cols original n_cols
+ @param[in] old_cols original cols
+ @param[in] old_col_names original col_names */
+ void rollback_instant(
+ unsigned old_n_cols,
+ dict_col_t* old_cols,
+ const char* old_col_names);
+
+ /** Trim the instantly added columns when an insert into SYS_COLUMNS
+ is rolled back during ALTER TABLE or recovery.
+ @param[in] n number of surviving non-system columns */
+ void rollback_instant(unsigned n);
+
+ /** Add the table definition to the data dictionary cache */
+ void add_to_cache();
+
+ bool versioned() const { return vers_start || vers_end; }
+ bool versioned_by_id() const
+ {
+ return vers_start && cols[vers_start].mtype == DATA_INT;
+ }
+
+ void inc_fk_checks()
+ {
+#ifdef UNIV_DEBUG
+ lint fk_checks= (lint)
+#endif
+ my_atomic_addlint(&n_foreign_key_checks_running, 1);
+ ut_ad(fk_checks >= 0);
+ }
+ void dec_fk_checks()
+ {
+#ifdef UNIV_DEBUG
+ lint fk_checks= (lint)
+#endif
+ my_atomic_addlint(&n_foreign_key_checks_running, ulint(-1));
+ ut_ad(fk_checks > 0);
+ }
+
/** Id of the table. */
table_id_t id;
@@ -1432,8 +1595,10 @@ struct dict_table_t {
/** NULL or the directory path specified by DATA DIRECTORY. */
char* data_dir_path;
- /** Space where the clustered index of the table is placed. */
- uint32_t space;
+ /** The tablespace of the table */
+ fil_space_t* space;
+ /** Tablespace ID */
+ ulint space_id;
/** Stores information about:
1 row format (redundant or compact),
@@ -1532,7 +1697,10 @@ struct dict_table_t {
/** Virtual column names */
const char* v_col_names;
-
+ unsigned vers_start:10;
+ /*!< System Versioning: row start col index */
+ unsigned vers_end:10;
+ /*!< System Versioning: row end col index */
bool is_system_db;
/*!< True if the table belongs to a system
database (mysql, information_schema or
@@ -1749,7 +1917,7 @@ struct dict_table_t {
ulong n_waiting_or_granted_auto_inc_locks;
/** The transaction that currently holds the the AUTOINC lock on this
- table. Protected by lock_sys->mutex. */
+ table. Protected by lock_sys.mutex. */
const trx_t* autoinc_trx;
/* @} */
@@ -1764,7 +1932,7 @@ struct dict_table_t {
/** Count of the number of record locks on this table. We use this to
determine whether we can evict the table from the dictionary cache.
- It is protected by lock_sys->mutex. */
+ It is protected by lock_sys.mutex. */
ulint n_rec_locks;
private:
@@ -1774,7 +1942,7 @@ private:
int32 n_ref_count;
public:
- /** List of locks on the table. Protected by lock_sys->mutex. */
+ /** List of locks on the table. Protected by lock_sys.mutex. */
table_lock_list_t locks;
/** Timestamp of the last modification of this table. */
@@ -1792,9 +1960,22 @@ public:
dict_vcol_templ_t* vc_templ;
};
-inline bool dict_index_t::is_readable() const
+inline void dict_index_t::set_modified(mtr_t& mtr) const
+{
+ mtr.set_named_space(table->space);
+}
+
+inline bool dict_index_t::is_readable() const { return table->is_readable(); }
+
+inline bool dict_index_t::is_instant() const
{
- return(UNIV_LIKELY(!table->file_unreadable));
+ ut_ad(n_core_fields > 0);
+ ut_ad(n_core_fields <= n_fields);
+ ut_ad(n_core_fields == n_fields
+ || (type & ~(DICT_UNIQUE | DICT_CORRUPT)) == DICT_CLUSTERED);
+ ut_ad(n_core_fields == n_fields || table->supports_instant());
+ ut_ad(n_core_fields == n_fields || !table->is_temporary());
+ return(n_core_fields != n_fields);
}
inline bool dict_index_t::is_corrupted() const
diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic
index da2ac629850..70424af7347 100644
--- a/storage/innobase/include/dict0mem.ic
+++ b/storage/innobase/include/dict0mem.ic
@@ -37,11 +37,7 @@ dict_mem_fill_index_struct(
/*=======================*/
dict_index_t* index, /*!< out: index to be filled */
mem_heap_t* heap, /*!< in: memory heap */
- const char* table_name, /*!< in: table name */
const char* index_name, /*!< in: index name */
- ulint space, /*!< in: space where the index tree is
- placed, ignored if the index is of
- the clustered type */
ulint type, /*!< in: DICT_UNIQUE,
DICT_CLUSTERED, ... ORed */
ulint n_fields) /*!< in: number of fields */
@@ -61,11 +57,10 @@ dict_mem_fill_index_struct(
/* Assign a ulint to a 4-bit-mapped field.
Only the low-order 4 bits are assigned. */
index->type = unsigned(type);
- index->space = (unsigned int) space;
index->page = FIL_NULL;
index->merge_threshold = DICT_INDEX_MERGE_THRESHOLD_DEFAULT;
- index->table_name = table_name;
index->n_fields = (unsigned int) n_fields;
+ index->n_core_fields = (unsigned int) n_fields;
/* The '1 +' above prevents allocation
of an empty mem block */
index->nulls_equal = false;
diff --git a/storage/innobase/include/dict0stats_bg.h b/storage/innobase/include/dict0stats_bg.h
index e66666b66a3..d66afdd4b25 100644
--- a/storage/innobase/include/dict0stats_bg.h
+++ b/storage/innobase/include/dict0stats_bg.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2017, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -116,16 +116,9 @@ dict_stats_thread_deinit();
#ifdef UNIV_DEBUG
/** Disables dict stats thread. It's used by:
SET GLOBAL innodb_dict_stats_disabled_debug = 1 (0).
-@param[in] thd thread handle
-@param[in] var pointer to system variable
-@param[out] var_ptr where the formal string goes
@param[in] save immediate result from check function */
-void
-dict_stats_disabled_debug_update(
- THD* thd,
- struct st_mysql_sys_var* var,
- void* var_ptr,
- const void* save);
+void dict_stats_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+ const void* save);
#endif /* UNIV_DEBUG */
/*****************************************************************//**
diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h
index 27b4cc0e694..f2fcae69bd5 100644
--- a/storage/innobase/include/dict0types.h
+++ b/storage/innobase/include/dict0types.h
@@ -28,6 +28,7 @@ Created 1/8/1996 Heikki Tuuri
#define dict0types_h
#include <ut0mutex.h>
+#include <rem0types.h>
struct dict_sys_t;
struct dict_col_t;
@@ -52,6 +53,13 @@ DICT_IBUF_ID_MIN plus the space id */
typedef ib_id_t table_id_t;
typedef ib_id_t index_id_t;
+/** Maximum transaction identifier */
+#define TRX_ID_MAX IB_ID_MAX
+
+/** The bit pattern corresponding to TRX_ID_MAX */
+extern const byte trx_id_max_bytes[8];
+extern const byte timestamp_max_bytes[7];
+
/** Error to ignore when we load table dictionary into memory. However,
the table and index will be marked as "corrupted", and caller will
be responsible to deal with corrupted table or index.
@@ -92,6 +100,36 @@ typedef ib_mutex_t DictSysMutex;
#define TEMP_TABLE_PREFIX "#sql"
#define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX
+/** Table name wrapper for pretty-printing */
+struct table_name_t
+{
+ /** The name in internal representation */
+ char* m_name;
+
+ /** @return the end of the schema name */
+ const char* dbend() const
+ {
+ const char* sep = strchr(m_name, '/');
+ ut_ad(sep);
+ return sep;
+ }
+
+ /** @return the length of the schema name, in bytes */
+ size_t dblen() const { return size_t(dbend() - m_name); }
+
+ /** Determine the filename-safe encoded table name.
+ @return the filename-safe encoded table name */
+ const char* basename() const { return dbend() + 1; }
+
+ /** The start of the table basename suffix for partitioned tables */
+ static const char part_suffix[4];
+
+ /** Determine the partition or subpartition name suffix.
+ @return the partition name
+ @retval NULL if the table is not partitioned */
+ const char* part() const { return strstr(basename(), part_suffix); }
+};
+
#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
/** Flag to control insert buffer debugging. */
extern uint ibuf_debug;
diff --git a/storage/innobase/include/dyn0buf.h b/storage/innobase/include/dyn0buf.h
index 3126c8e4683..4b6c808b47c 100644
--- a/storage/innobase/include/dyn0buf.h
+++ b/storage/innobase/include/dyn0buf.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2013, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -32,14 +33,13 @@ Created 2013-03-16 Sunny Bains
#include "dyn0types.h"
/** Class that manages dynamic buffers. It uses a UT_LIST of
-dyn_buf_t::block_t instances. We don't use STL containers in
+mtr_buf_t::block_t instances. We don't use STL containers in
order to avoid the overhead of heap calls. Using a custom memory
allocator doesn't solve the problem either because we have to get
the memory from somewhere. We can't use the block_t::m_data as the
backend for the custom allocator because we would like the data in
the blocks to be contiguous. */
-template <size_t SIZE = DYN_ARRAY_DATA_SIZE>
-class dyn_buf_t {
+class mtr_buf_t {
public:
class block_t;
@@ -47,17 +47,19 @@ public:
typedef UT_LIST_NODE_T(block_t) block_node_t;
typedef UT_LIST_BASE_NODE_T(block_t) block_list_t;
+ /** SIZE - sizeof(m_node) + sizeof(m_used) */
+ enum { MAX_DATA_SIZE = DYN_ARRAY_DATA_SIZE
+ - sizeof(block_node_t) + sizeof(ib_uint32_t) };
+
class block_t {
public:
block_t()
{
- ut_ad(MAX_DATA_SIZE <= (2 << 15));
+ compile_time_assert(MAX_DATA_SIZE <= (2 << 15));
init();
}
- ~block_t() { }
-
/**
Gets the number of used bytes in a block.
@return number of bytes used */
@@ -112,12 +114,12 @@ public:
/**
@return pointer to start of reserved space */
template <typename Type>
- Type push(ib_uint32_t size)
+ Type push(uint32_t size)
{
Type ptr = reinterpret_cast<Type>(end());
m_used += size;
- ut_ad(m_used <= static_cast<ib_uint32_t>(MAX_DATA_SIZE));
+ ut_ad(m_used <= uint32_t(MAX_DATA_SIZE));
return(ptr);
}
@@ -131,7 +133,7 @@ public:
ut_ad(ptr <= begin() + m_buf_end);
/* We have done the boundary check above */
- m_used = static_cast<ib_uint32_t>(ptr - begin());
+ m_used = uint32_t(ptr - begin());
ut_ad(m_used <= MAX_DATA_SIZE);
ut_d(m_buf_end = 0);
@@ -154,13 +156,6 @@ public:
ulint m_magic_n;
#endif /* UNIV_DEBUG */
- /** SIZE - sizeof(m_node) + sizeof(m_used) */
- enum {
- MAX_DATA_SIZE = SIZE
- - sizeof(block_node_t)
- + sizeof(ib_uint32_t)
- };
-
/** Storage */
byte m_data[MAX_DATA_SIZE];
@@ -169,15 +164,13 @@ public:
/** number of data bytes used in this block;
DYN_BLOCK_FULL_FLAG is set when the block becomes full */
- ib_uint32_t m_used;
+ uint32_t m_used;
- friend class dyn_buf_t;
+ friend class mtr_buf_t;
};
- enum { MAX_DATA_SIZE = block_t::MAX_DATA_SIZE};
-
/** Default constructor */
- dyn_buf_t()
+ mtr_buf_t()
:
m_heap(),
m_size()
@@ -187,7 +180,7 @@ public:
}
/** Destructor */
- ~dyn_buf_t()
+ ~mtr_buf_t()
{
erase();
}
@@ -252,7 +245,7 @@ public:
@param size in bytes of the element
@return pointer to the element */
template <typename Type>
- Type push(ib_uint32_t size)
+ Type push(uint32_t size)
{
ut_ad(size > 0);
ut_ad(size <= MAX_DATA_SIZE);
@@ -272,17 +265,11 @@ public:
Pushes n bytes.
@param str string to write
@param len string length */
- void push(const byte* ptr, ib_uint32_t len)
+ void push(const byte* ptr, uint32_t len)
{
while (len > 0) {
- ib_uint32_t n_copied;
-
- if (len >= MAX_DATA_SIZE) {
- n_copied = MAX_DATA_SIZE;
- } else {
- n_copied = len;
- }
-
+ uint32_t n_copied = std::min(len,
+ uint32_t(MAX_DATA_SIZE));
::memmove(push<byte*>(n_copied), ptr, n_copied);
ptr += n_copied;
@@ -298,7 +285,7 @@ public:
const Type at(ulint pos) const
{
block_t* block = const_cast<block_t*>(
- const_cast<dyn_buf_t*>(this)->find(pos));
+ const_cast<mtr_buf_t*>(this)->find(pos));
return(reinterpret_cast<Type>(block->begin() + pos));
}
@@ -391,8 +378,8 @@ public:
private:
// Disable copying
- dyn_buf_t(const dyn_buf_t&);
- dyn_buf_t& operator=(const dyn_buf_t&);
+ mtr_buf_t(const mtr_buf_t&);
+ mtr_buf_t& operator=(const mtr_buf_t&);
/**
Add the block to the end of the list*/
@@ -404,7 +391,7 @@ private:
}
/** @return the last block in the list */
- block_t* back()
+ block_t* back() const
{
return(UT_LIST_GET_LAST(m_list));
}
@@ -484,8 +471,6 @@ private:
block_t m_first_block;
};
-typedef dyn_buf_t<DYN_ARRAY_DATA_SIZE> mtr_buf_t;
-
/** mtr_buf_t copier */
struct mtr_buf_copy_t {
/** The copied buffer */
diff --git a/storage/innobase/include/fil0crypt.h b/storage/innobase/include/fil0crypt.h
index 13b3ec4e37e..5238213135f 100644
--- a/storage/innobase/include/fil0crypt.h
+++ b/storage/innobase/include/fil0crypt.h
@@ -27,9 +27,9 @@ Created 04/01/2015 Jan Lindström
#define fil0crypt_h
#ifndef UNIV_INNOCHECKSUM
-
#include "os0event.h"
#include "my_crypt.h"
+#include "fil0fil.h"
#endif /*! UNIV_INNOCHECKSUM */
/**
@@ -296,7 +296,6 @@ fil_space_destroy_crypt_data(
Parse a MLOG_FILE_WRITE_CRYPT_DATA log entry
@param[in] ptr Log entry start
@param[in] end_ptr Log entry end
-@param[in] block buffer block
@param[out] err DB_SUCCESS or DB_DECRYPTION_FAILED
@return position on log buffer */
UNIV_INTERN
@@ -304,7 +303,6 @@ byte*
fil_parse_write_crypt_data(
byte* ptr,
const byte* end_ptr,
- const buf_block_t* block,
dberr_t* err)
MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h
index 1307598971b..890684af67e 100644
--- a/storage/innobase/include/fil0fil.h
+++ b/storage/innobase/include/fil0fil.h
@@ -35,17 +35,13 @@ Created 10/25/1995 Heikki Tuuri
#include "page0size.h"
#include "ibuf0types.h"
-#include <list>
-
// Forward declaration
-extern ibool srv_use_doublewrite_buf;
+extern my_bool srv_use_doublewrite_buf;
extern struct buf_dblwr_t* buf_dblwr;
struct trx_t;
class page_id_t;
class truncate_t;
-typedef std::list<char*, ut_allocator<char*> > space_name_list_t;
-
/** Structure containing encryption specification */
struct fil_space_crypt_t;
@@ -86,7 +82,7 @@ struct fil_space_t {
/*!< LSN of the most recent
fil_names_write_if_was_clean().
Reset to 0 by fil_names_clear().
- Protected by log_sys->mutex.
+ Protected by log_sys.mutex.
If and only if this is nonzero, the
tablespace will be in named_spaces. */
bool stop_new_ops;
@@ -107,7 +103,8 @@ struct fil_space_t {
ulint redo_skipped_count;
/*!< reference count for operations who want
to skip redo log in the file space in order
- to make fsp_space_modify_check pass. */
+ to make fsp_space_modify_check pass.
+ Uses my_atomic_loadlint() and friends. */
#endif
fil_type_t purpose;/*!< purpose */
UT_LIST_BASE_NODE_T(fil_node_t) chain;
@@ -141,14 +138,14 @@ struct fil_space_t {
dropped. An example is change buffer merge.
The tablespace cannot be dropped while this is nonzero,
or while fil_node_t::n_pending is nonzero.
- Protected by fil_system->mutex. */
+ Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
ulint n_pending_ops;
/** Number of pending block read or write operations
(when a write is imminent or a read has recently completed).
The tablespace object cannot be freed while this is nonzero,
but it can be detached from fil_system.
Note that fil_node_t::n_pending tracks actual pending I/O requests.
- Protected by fil_system->mutex. */
+ Protected by fil_system.mutex and my_atomic_loadlint() and friends. */
ulint n_pending_ios;
hash_node_t hash; /*!< hash chain node */
hash_node_t name_hash;/*!< hash chain the name_hash table */
@@ -176,10 +173,6 @@ struct fil_space_t {
/** True if the device this filespace is on supports atomic writes */
bool atomic_write_supported;
- /** Release the reserved free extents.
- @param[in] n_reserved number of reserved extents */
- void release_free_extents(ulint n_reserved);
-
/** True if file system storing this tablespace supports
punch hole */
bool punch_hole;
@@ -195,6 +188,82 @@ struct fil_space_t {
return !atomic_write_supported
&& srv_use_doublewrite_buf && buf_dblwr;
}
+
+ /** Try to reserve free extents.
+ @param[in] n_free_now current number of free extents
+ @param[in] n_to_reserve number of extents to reserve
+ @return whether the reservation succeeded */
+ bool reserve_free_extents(ulint n_free_now, ulint n_to_reserve)
+ {
+ ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+ if (n_reserved_extents + n_to_reserve > n_free_now) {
+ return false;
+ }
+
+ n_reserved_extents += n_to_reserve;
+ return true;
+ }
+
+ /** Release the reserved free extents.
+ @param[in] n_reserved number of reserved extents */
+ void release_free_extents(ulint n_reserved)
+ {
+ if (!n_reserved) return;
+ ut_ad(rw_lock_own(&latch, RW_LOCK_X));
+ ut_a(n_reserved_extents >= n_reserved);
+ n_reserved_extents -= n_reserved;
+ }
+
+ /** Rename a file.
+ @param[in] name table name after renaming
+ @param[in] path tablespace file name after renaming
+ @param[in] log whether to write redo log
+ @return error code
+ @retval DB_SUCCESS on success */
+ dberr_t rename(const char* name, const char* path, bool log);
+
+ /** Note that the tablespace has been imported.
+ Initially, purpose=FIL_TYPE_IMPORT so that no redo log is
+ written while the space ID is being updated in each page. */
+ void set_imported();
+
+ /** Open each file. Only invoked on fil_system.temp_space.
+ @return whether all files were opened */
+ bool open();
+ /** Close each file. Only invoked on fil_system.temp_space. */
+ void close();
+
+ /** Acquire a tablespace reference. */
+ void acquire() { my_atomic_addlint(&n_pending_ops, 1); }
+ /** Release a tablespace reference. */
+ void release()
+ {
+ ut_ad(referenced());
+ my_atomic_addlint(&n_pending_ops, ulint(-1));
+ }
+ /** @return whether references are being held */
+ bool referenced() { return my_atomic_loadlint(&n_pending_ops); }
+ /** @return whether references are being held */
+ bool referenced() const
+ {
+ return const_cast<fil_space_t*>(this)->referenced();
+ }
+
+ /** Acquire a tablespace reference for I/O. */
+ void acquire_for_io() { my_atomic_addlint(&n_pending_ios, 1); }
+ /** Release a tablespace reference for I/O. */
+ void release_for_io()
+ {
+ ut_ad(pending_io());
+ my_atomic_addlint(&n_pending_ios, ulint(-1));
+ }
+ /** @return whether I/O is pending */
+ bool pending_io() { return my_atomic_loadlint(&n_pending_ios); }
+ /** @return whether I/O is pending */
+ bool pending_io() const
+ {
+ return const_cast<fil_space_t*>(this)->pending_io();
+ }
};
/** Value of fil_space_t::magic_n */
@@ -204,13 +273,13 @@ struct fil_space_t {
struct fil_node_t {
/** tablespace containing this file */
fil_space_t* space;
- /** file name; protected by fil_system->mutex and log_sys->mutex. */
+ /** file name; protected by fil_system.mutex and log_sys.mutex. */
char* name;
/** file handle (valid if is_open) */
pfs_os_file_t handle;
/** event that groups and serializes calls to fsync;
os_event_set() and os_event_reset() are protected by
- fil_system_t::mutex */
+ fil_system.mutex */
os_event_t sync_event;
/** whether the file actually is a raw device or disk partition */
bool is_raw_disk;
@@ -235,7 +304,7 @@ struct fil_node_t {
int64_t flush_counter;
/** link to other files in this tablespace */
UT_LIST_NODE_T(fil_node_t) chain;
- /** link to the fil_system->LRU list (keeping track of open files) */
+ /** link to the fil_system.LRU list (keeping track of open files) */
UT_LIST_NODE_T(fil_node_t) LRU;
/** whether this file could use atomic write (data file) */
@@ -252,6 +321,9 @@ struct fil_node_t {
{
return(handle != OS_FILE_CLOSED);
}
+
+ /** Close the file handle. */
+ void close();
};
/** Value of fil_node_t::magic_n */
@@ -283,15 +355,15 @@ typedef byte fil_faddr_t; /*!< 'type' definition in C: an address
#endif /* !UNIV_INNOCHECKSUM */
/** Initial size of a single-table tablespace in pages */
-#define FIL_IBD_FILE_INITIAL_SIZE 4
+#define FIL_IBD_FILE_INITIAL_SIZE 4U
/** 'null' (undefined) page offset in the context of file spaces */
#define FIL_NULL ULINT32_UNDEFINED
-#define FIL_ADDR_PAGE 0 /* first in address is the page offset */
-#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/
-#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */
+#define FIL_ADDR_PAGE 0U /* first in address is the page offset */
+#define FIL_ADDR_BYTE 4U /* then comes 2-byte byte offset within page*/
+#define FIL_ADDR_SIZE 6U /* address size is 6 bytes */
#ifndef UNIV_INNOCHECKSUM
@@ -302,7 +374,7 @@ struct fil_addr_t {
};
/** The null file address */
-extern fil_addr_t fil_addr_null;
+extern const fil_addr_t fil_addr_null;
#endif /* !UNIV_INNOCHECKSUM */
@@ -311,15 +383,15 @@ extern fil_addr_t fil_addr_null;
page belongs to (== 0) but in later
versions the 'new' checksum of the
page */
-#define FIL_PAGE_OFFSET 4 /*!< page offset inside space */
-#define FIL_PAGE_PREV 8 /*!< if there is a 'natural'
+#define FIL_PAGE_OFFSET 4U /*!< page offset inside space */
+#define FIL_PAGE_PREV 8U /*!< if there is a 'natural'
predecessor of the page, its
offset. Otherwise FIL_NULL.
This field is not set on BLOB
pages, which are stored as a
singly-linked list. See also
FIL_PAGE_NEXT. */
-#define FIL_PAGE_NEXT 12 /*!< if there is a 'natural' successor
+#define FIL_PAGE_NEXT 12U /*!< if there is a 'natural' successor
of the page, its offset.
Otherwise FIL_NULL.
B-tree index pages
@@ -329,9 +401,9 @@ extern fil_addr_t fil_addr_null;
FIL_PAGE_PREV and FIL_PAGE_NEXT
in the collation order of the
smallest user record on each page. */
-#define FIL_PAGE_LSN 16 /*!< lsn of the end of the newest
+#define FIL_PAGE_LSN 16U /*!< lsn of the end of the newest
modification log record to the page */
-#define FIL_PAGE_TYPE 24 /*!< file page type: FIL_PAGE_INDEX,...,
+#define FIL_PAGE_TYPE 24U /*!< file page type: FIL_PAGE_INDEX,...,
2 bytes.
The contents of this field can only
@@ -346,7 +418,7 @@ extern fil_addr_t fil_addr_null;
MySQL/InnoDB 5.1.7 or later, the
contents of this field is valid
for all uncompressed pages. */
-#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26 /*!< for the first page
+#define FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION 26U /*!< for the first page
in a system tablespace data file
(ibdata*, not *.ibd): the file has
been flushed to disk at least up
@@ -360,7 +432,7 @@ extern fil_addr_t fil_addr_null;
#define FIL_RTREE_SPLIT_SEQ_NUM FIL_PAGE_FILE_FLUSH_LSN_OR_KEY_VERSION
/** starting from 4.1.x this contains the space id of the page */
-#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34
+#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34U
#define FIL_PAGE_SPACE_ID FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID
@@ -387,7 +459,7 @@ extern fil_addr_t fil_addr_null;
then encrypted */
#define FIL_PAGE_PAGE_COMPRESSED 34354 /*!< page compressed page */
#define FIL_PAGE_INDEX 17855 /*!< B-tree node */
-#define FIL_PAGE_RTREE 17854 /*!< B-tree node */
+#define FIL_PAGE_RTREE 17854 /*!< R-tree node (SPATIAL INDEX) */
#define FIL_PAGE_UNDO_LOG 2 /*!< Undo log page */
#define FIL_PAGE_INODE 3 /*!< Index node */
#define FIL_PAGE_IBUF_FREE_LIST 4 /*!< Insert buffer free list */
@@ -410,15 +482,26 @@ extern fil_addr_t fil_addr_null;
//#define FIL_PAGE_ENCRYPTED 15
//#define FIL_PAGE_COMPRESSED_AND_ENCRYPTED 16
//#define FIL_PAGE_ENCRYPTED_RTREE 17
+/** Clustered index root page after instant ADD COLUMN */
+#define FIL_PAGE_TYPE_INSTANT 18
-/** Used by i_s.cc to index into the text description. */
+/** Used by i_s.cc to index into the text description.
+Note: FIL_PAGE_TYPE_INSTANT maps to the same as FIL_PAGE_INDEX. */
#define FIL_PAGE_TYPE_LAST FIL_PAGE_TYPE_UNKNOWN
/*!< Last page type */
/* @} */
-/** macro to check whether the page type is index (Btree or Rtree) type */
-#define fil_page_type_is_index(page_type) \
- (page_type == FIL_PAGE_INDEX || page_type == FIL_PAGE_RTREE)
+/** @return whether the page type is B-tree or R-tree index */
+inline bool fil_page_type_is_index(ulint page_type)
+{
+ switch (page_type) {
+ case FIL_PAGE_TYPE_INSTANT:
+ case FIL_PAGE_INDEX:
+ case FIL_PAGE_RTREE:
+ return(true);
+ }
+ return(false);
+}
/** Check whether the page is index page (either regular Btree index or Rtree
index */
@@ -453,7 +536,7 @@ The caller should hold an InnoDB table lock or a MDL that prevents
the tablespace from being dropped during the operation,
or the caller should be in single-threaded crash recovery mode
(no user connections that could drop tablespaces).
-If this is not the case, fil_space_acquire() and fil_space_release()
+If this is not the case, fil_space_acquire() and fil_space_t::release()
should be used instead.
@param[in] id tablespace ID
@return tablespace, or NULL if not found */
@@ -466,12 +549,42 @@ fil_space_get(
data space) is stored here; below we talk about tablespaces, but also
the ib_logfiles form a 'space' and it is handled here */
struct fil_system_t {
+ /**
+ Constructor.
+
+ Some members may require late initialisation, thus we just mark object as
+ uninitialised. Real initialisation happens in create().
+ */
+ fil_system_t(): m_initialised(false)
+ {
+ UT_LIST_INIT(LRU, &fil_node_t::LRU);
+ UT_LIST_INIT(space_list, &fil_space_t::space_list);
+ UT_LIST_INIT(rotation_list, &fil_space_t::rotation_list);
+ UT_LIST_INIT(unflushed_spaces, &fil_space_t::unflushed_spaces);
+ UT_LIST_INIT(named_spaces, &fil_space_t::named_spaces);
+ }
+
+ bool is_initialised() const { return m_initialised; }
+
+ /**
+ Create the file system interface at database start.
+
+ @param[in] hash_size hash table size
+ */
+ void create(ulint hash_size);
+
+ /** Close the file system interface at shutdown */
+ void close();
+
+private:
+ bool m_initialised;
+public:
ib_mutex_t mutex; /*!< The mutex protecting the cache */
+ fil_space_t* sys_space; /*!< The innodb_system tablespace */
+ fil_space_t* temp_space; /*!< The innodb_temporary tablespace */
hash_table_t* spaces; /*!< The hash table of spaces in the
system; they are hashed on the space
id */
- hash_table_t* name_hash; /*!< hash table based on the space
- name */
UT_LIST_BASE_NODE_T(fil_node_t) LRU;
/*!< base node for the LRU list of the
most recently used open files with no
@@ -490,8 +603,6 @@ struct fil_system_t {
at least one file node where
modification_counter > flush_counter */
ulint n_open; /*!< number of files currently open */
- ulint max_n_open; /*!< n_open is not allowed to exceed
- this */
int64_t modification_counter;/*!< when we write to a file we
increment this by one */
ulint max_assigned_id;/*!< maximum space id in the existing
@@ -507,20 +618,19 @@ struct fil_system_t {
for which a MLOG_FILE_NAME
record has been written since
the latest redo log checkpoint.
- Protected only by log_sys->mutex. */
+ Protected only by log_sys.mutex. */
UT_LIST_BASE_NODE_T(fil_space_t) rotation_list;
/*!< list of all file spaces needing
key rotation.*/
- ibool space_id_reuse_warned;
- /* !< TRUE if fil_space_create()
+ bool space_id_reuse_warned;
+ /*!< whether fil_space_create()
has issued a warning about
potential space_id reuse */
};
-/** The tablespace memory cache. This variable is NULL before the module is
-initialized. */
-extern fil_system_t* fil_system;
+/** The tablespace memory cache. */
+extern fil_system_t fil_system;
#include "fil0crypt.h"
@@ -533,23 +643,6 @@ fil_space_get_latch(
ulint id,
ulint* flags);
-/** Gets the type of a file space.
-@param[in] id tablespace identifier
-@return file type */
-fil_type_t
-fil_space_get_type(
- ulint id);
-
-/** Note that a tablespace has been imported.
-It is initially marked as FIL_TYPE_IMPORT so that no logging is
-done during the import process when the space ID is stamped to each page.
-Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
-NOTE: temporary tablespaces are never imported.
-@param[in] id tablespace identifier */
-void
-fil_space_set_imported(
- ulint id);
-
/** Append a file to the chain of files of a space.
@param[in] name file name of a file that is not open
@param[in] size file size in entire database blocks
@@ -611,16 +704,6 @@ fil_space_free(
ulint id,
bool x_latched);
-/** Returns the path from the first fil_node_t found with this space ID.
-The caller is responsible for freeing the memory allocated here for the
-value returned.
-@param[in] id Tablespace ID
-@return own: A copy of fil_node_t::path, NULL if space ID is zero
-or not found. */
-char*
-fil_space_get_first_path(
- ulint id);
-
/** Set the recovered size of a tablespace in pages.
@param id tablespace ID
@param size recovered size in pages */
@@ -644,19 +727,6 @@ fil_space_get_flags(
/*================*/
ulint id); /*!< in: space id */
-/** Open each fil_node_t of a named fil_space_t if not already open.
-@param[in] name Tablespace name
-@return true if all file nodes are opened. */
-bool
-fil_space_open(
- const char* name);
-
-/** Close each fil_node_t of a named fil_space_t if open.
-@param[in] name Tablespace name */
-void
-fil_space_close(
- const char* name);
-
/** Returns the page size of the space and whether it is compressed or not.
The tablespace must be cached in the memory cache.
@param[in] id space id
@@ -667,18 +737,6 @@ fil_space_get_page_size(
ulint id,
bool* found);
-/****************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_init(
-/*=====*/
- ulint hash_size, /*!< in: hash table size */
- ulint max_n_open); /*!< in: max number of open files */
-/*******************************************************************//**
-Initializes the tablespace memory cache. */
-void
-fil_close(void);
-/*===========*/
/*******************************************************************//**
Opens all log files and system tablespace data files. They stay open until the
database server shutdown. This should be called at a server startup after the
@@ -756,11 +814,6 @@ fil_space_acquire_silent(ulint id)
return (fil_space_acquire_low(id, true));
}
-/** Release a tablespace acquired with fil_space_acquire().
-@param[in,out] space tablespace to release */
-void
-fil_space_release(fil_space_t* space);
-
/** Acquire a tablespace for reading or writing a block,
when it could be dropped concurrently.
@param[in] id tablespace ID
@@ -769,17 +822,12 @@ when it could be dropped concurrently.
fil_space_t*
fil_space_acquire_for_io(ulint id);
-/** Release a tablespace acquired with fil_space_acquire_for_io().
-@param[in,out] space tablespace to release */
-void
-fil_space_release_for_io(fil_space_t* space);
-
/** Return the next fil_space_t.
Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
blocks a concurrent operation from dropping the tablespace.
@param[in,out] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last */
fil_space_t*
@@ -789,10 +837,10 @@ fil_space_next(
/** Return the next fil_space_t from key rotation list.
Once started, the caller must keep calling this until it returns NULL.
-fil_space_acquire() and fil_space_release() are invoked here which
+fil_space_acquire() and fil_space_t::release() are invoked here which
blocks a concurrent operation from dropping the tablespace.
@param[in,out] prev_space Pointer to the previous fil_space_t.
-If NULL, use the first fil_space_t on fil_system->space_list.
+If NULL, use the first fil_space_t on fil_system.space_list.
@return pointer to the next fil_space_t.
@retval NULL if this was the last*/
fil_space_t*
@@ -800,68 +848,6 @@ fil_space_keyrotate_next(
fil_space_t* prev_space)
MY_ATTRIBUTE((warn_unused_result));
-/** Wrapper with reference-counting for a fil_space_t. */
-class FilSpace
-{
-public:
- /** Default constructor: Use this when reference counting
- is done outside this wrapper. */
- FilSpace() : m_space(NULL) {}
-
- /** Constructor: Look up the tablespace and increment the
- reference count if found.
- @param[in] space_id tablespace ID
- @param[in] silent whether not to display errors */
- explicit FilSpace(ulint space_id, bool silent = false)
- : m_space(fil_space_acquire_low(space_id, silent)) {}
-
- /** Assignment operator: This assumes that fil_space_acquire()
- has already been done for the fil_space_t. The caller must
- assign NULL if it calls fil_space_release().
- @param[in] space tablespace to assign */
- class FilSpace& operator=(fil_space_t* space)
- {
- /* fil_space_acquire() must have been invoked. */
- ut_ad(space == NULL || space->n_pending_ops > 0);
- m_space = space;
- return(*this);
- }
-
- /** Destructor - Decrement the reference count if a fil_space_t
- is still assigned. */
- ~FilSpace()
- {
- if (m_space != NULL) {
- fil_space_release(m_space);
- }
- }
-
- /** Implicit type conversion
- @return the wrapped object */
- operator const fil_space_t*() const
- {
- return(m_space);
- }
-
- /** Member accessor
- @return the wrapped object */
- const fil_space_t* operator->() const
- {
- return(m_space);
- }
-
- /** Explicit type conversion
- @return the wrapped object */
- const fil_space_t* operator()() const
- {
- return(m_space);
- }
-
-private:
- /** The wrapped pointer */
- fil_space_t* m_space;
-};
-
/********************************************************//**
Creates the database directory for a table if it does not exist yet. */
void
@@ -869,43 +855,6 @@ fil_create_directory_for_tablename(
/*===============================*/
const char* name); /*!< in: name in the standard
'databasename/tablename' format */
-/** Write redo log for renaming a file.
-@param[in] space_id tablespace id
-@param[in] old_name tablespace file name
-@param[in] new_name tablespace file name after renaming */
-void
-fil_name_write_rename(
- ulint space_id,
- const char* old_name,
- const char* new_name);
-/********************************************************//**
-Recreates table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_table(
-/*===============*/
- ulint space_id, /*!< in: space id */
- ulint format_flags, /*!< in: page format */
- ulint flags, /*!< in: tablespace flags */
- const char* name, /*!< in: table name */
- truncate_t& truncate); /*!< in/out: The information of
- TRUNCATE log record */
-/********************************************************//**
-Recreates the tablespace and table indexes by applying
-TRUNCATE log record during recovery.
-@return DB_SUCCESS or error code */
-dberr_t
-fil_recreate_tablespace(
-/*====================*/
- ulint space_id, /*!< in: space id */
- ulint format_flags, /*!< in: page format */
- ulint flags, /*!< in: tablespace flags */
- const char* name, /*!< in: table name */
- truncate_t& truncate, /*!< in/out: The information of
- TRUNCATE log record */
- lsn_t recv_lsn); /*!< in: the end LSN of
- the log record */
/** Replay a file rename operation if possible.
@param[in] space_id tablespace identifier
@param[in] first_page_no first page number in the file
@@ -966,56 +915,6 @@ fil_close_tablespace(
ulint id); /*!< in: space id */
/*******************************************************************//**
-Discards a single-table tablespace. The tablespace must be cached in the
-memory cache. Discarding is like deleting a tablespace, but
-
- 1. We do not drop the table from the data dictionary;
-
- 2. We remove all insert buffer entries for the tablespace immediately;
- in DROP TABLE they are only removed gradually in the background;
-
- 3. When the user does IMPORT TABLESPACE, the tablespace will have the
- same id as it originally had.
-
- 4. Free all the pages in use by the tablespace if rename=true.
-@return DB_SUCCESS or error */
-dberr_t
-fil_discard_tablespace(
-/*===================*/
- ulint id) /*!< in: space id */
- MY_ATTRIBUTE((warn_unused_result));
-
-/** Test if a tablespace file can be renamed to a new filepath by checking
-if that the old filepath exists and the new filepath does not exist.
-@param[in] space_id tablespace id
-@param[in] old_path old filepath
-@param[in] new_path new filepath
-@param[in] is_discarded whether the tablespace is discarded
-@return innodb error code */
-dberr_t
-fil_rename_tablespace_check(
- ulint space_id,
- const char* old_path,
- const char* new_path,
- bool is_discarded);
-
-/** Rename a single-table tablespace.
-The tablespace must exist in the memory cache.
-@param[in] id tablespace identifier
-@param[in] old_path old file name
-@param[in] new_name new table name in the
-databasename/tablename format
-@param[in] new_path_in new file name,
-or NULL if it is located in the normal data directory
-@return true if success */
-bool
-fil_rename_tablespace(
- ulint id,
- const char* old_path,
- const char* new_name,
- const char* new_path_in);
-
-/*******************************************************************//**
Allocates and builds a file name from a path, a table or tablespace name
and a suffix. The string must be freed by caller with ut_free().
@param[in] path NULL or the direcory path or the full path and filename.
@@ -1038,8 +937,10 @@ fil_make_filepath(
must be >= FIL_IBD_FILE_INITIAL_SIZE
@param[in] mode MariaDB encryption mode
@param[in] key_id MariaDB encryption key_id
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out] err DB_SUCCESS or error code
+@return the created tablespace
+@retval NULL on error */
+fil_space_t*
fil_ibd_create(
ulint space_id,
const char* name,
@@ -1047,16 +948,15 @@ fil_ibd_create(
ulint flags,
ulint size,
fil_encryption_t mode,
- uint32_t key_id)
- MY_ATTRIBUTE((nonnull(2), warn_unused_result));
+ uint32_t key_id,
+ dberr_t* err)
+ MY_ATTRIBUTE((nonnull(2,8), warn_unused_result));
/** Try to adjust FSP_SPACE_FLAGS if they differ from the expectations.
(Typically when upgrading from MariaDB 10.1.0..10.1.20.)
-@param[in] space_id tablespace ID
+@param[in,out] space tablespace
@param[in] flags desired tablespace flags */
-UNIV_INTERN
-void
-fsp_flags_try_adjust(ulint space_id, ulint flags);
+void fsp_flags_try_adjust(fil_space_t* space, ulint flags);
/********************************************************************//**
Tries to open a single-table tablespace and optionally checks the space id is
@@ -1083,19 +983,22 @@ statement to update the dictionary tables if they are incorrect.
@param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
@param[in] id tablespace ID
@param[in] flags expected FSP_SPACE_FLAGS
-@param[in] space_name tablespace name of the datafile
+@param[in] tablename table name
If file-per-table, it is the table name in the databasename/tablename format
@param[in] path_in expected filepath, usually read from dictionary
-@return DB_SUCCESS or error code */
-dberr_t
+@param[out] err DB_SUCCESS or error code
+@return tablespace
+@retval NULL if the tablespace could not be opened */
+fil_space_t*
fil_ibd_open(
- bool validate,
- bool fix_dict,
- fil_type_t purpose,
- ulint id,
- ulint flags,
- const char* tablename,
- const char* path_in)
+ bool validate,
+ bool fix_dict,
+ fil_type_t purpose,
+ ulint id,
+ ulint flags,
+ const table_name_t& tablename,
+ const char* path_in,
+ dberr_t* err = NULL)
MY_ATTRIBUTE((warn_unused_result));
enum fil_load_status {
@@ -1145,15 +1048,14 @@ startup, there may be many tablespaces which are not yet in the memory cache.
@param[in] print_error_if_does_not_exist
Print detailed error information to the
error log if a matching tablespace is not found from memory.
-@param[in] heap Heap memory
@param[in] table_flags table flags
-@return true if a matching tablespace exists in the memory cache */
-bool
+@return the tablespace
+@retval NULL if no matching tablespace exists in the memory cache */
+fil_space_t*
fil_space_for_table_exists_in_mem(
ulint id,
const char* name,
bool print_error_if_does_not_exist,
- mem_heap_t* heap,
ulint table_flags);
/** Try to extend a tablespace if it is smaller than the specified size.
@@ -1164,29 +1066,6 @@ bool
fil_space_extend(
fil_space_t* space,
ulint size);
-/*******************************************************************//**
-Tries to reserve free extents in a file space.
-@return true if succeed */
-bool
-fil_space_reserve_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_free_now, /*!< in: number of free extents now */
- ulint n_to_reserve); /*!< in: how many one wants to reserve */
-/*******************************************************************//**
-Releases free extents in a file space. */
-void
-fil_space_release_free_extents(
-/*===========================*/
- ulint id, /*!< in: space id */
- ulint n_reserved); /*!< in: how many one reserved */
-/*******************************************************************//**
-Gets the number of reserved extents. If the database is silent, this number
-should be zero. */
-ulint
-fil_space_get_n_reserved_extents(
-/*=============================*/
- ulint id); /*!< in: space id */
/** Reads or writes data. This operation could be asynchronous (aio).
@@ -1342,20 +1221,6 @@ Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
#define fil_block_check_type(block, type, mtr) \
fil_page_check_type(block->page.id, block->frame, type, mtr)
-#ifdef UNIV_DEBUG
-/** Increase redo skipped of a tablespace.
-@param[in] id space id */
-void
-fil_space_inc_redo_skipped_count(
- ulint id);
-
-/** Decrease redo skipped of a tablespace.
-@param[in] id space id */
-void
-fil_space_dec_redo_skipped_count(
- ulint id);
-#endif
-
/********************************************************************//**
Delete the tablespace file and any related files like .cfg.
This should not be called for temporary tables. */
@@ -1384,27 +1249,6 @@ char*
fil_path_to_space_name(
const char* filename);
-/** Returns the space ID based on the tablespace name.
-The tablespace must be found in the tablespace memory cache.
-This call is made from external to this module, so the mutex is not owned.
-@param[in] tablespace Tablespace name
-@return space ID if tablespace found, ULINT_UNDEFINED if space not. */
-ulint
-fil_space_get_id_by_name(
- const char* tablespace);
-
-/**
-Iterate over all the spaces in the space list and fetch the
-tablespace names. It will return a copy of the name that must be
-freed by the caller using: delete[].
-@return DB_SUCCESS if all OK. */
-dberr_t
-fil_get_space_names(
-/*================*/
- space_name_list_t& space_name_list)
- /*!< in/out: Vector for collecting the names. */
- MY_ATTRIBUTE((warn_unused_result));
-
/** Generate redo log for swapping two .ibd files
@param[in] old_table old table
@param[in] new_table new table
@@ -1420,9 +1264,9 @@ fil_mtr_rename_log(
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Acquire the fil_system mutex. */
-#define fil_system_enter() mutex_enter(&fil_system->mutex)
+#define fil_system_enter() mutex_enter(&fil_system.mutex)
/** Release the fil_system mutex. */
-#define fil_system_exit() mutex_exit(&fil_system->mutex)
+#define fil_system_exit() mutex_exit(&fil_system.mutex)
/*******************************************************************//**
Returns the table space by a given id, NULL if not found. */
@@ -1431,14 +1275,7 @@ fil_space_get_by_id(
/*================*/
ulint id); /*!< in: space id */
-/** Look up a tablespace.
-@param[in] name tablespace name
-@return tablespace
-@retval NULL if not found */
-fil_space_t*
-fil_space_get_by_name(const char* name);
-
-/*******************************************************************//**
+/** Note that a non-predefined persistent tablespace has been modified
by redo log.
@param[in,out] space tablespace */
void
@@ -1473,8 +1310,8 @@ fil_names_write_if_was_clean(
}
const bool was_clean = space->max_lsn == 0;
- ut_ad(space->max_lsn <= log_sys->lsn);
- space->max_lsn = log_sys->lsn;
+ ut_ad(space->max_lsn <= log_sys.lsn);
+ space->max_lsn = log_sys.lsn;
if (was_clean) {
fil_names_dirty_and_write(space, mtr);
diff --git a/storage/innobase/include/fil0fil.ic b/storage/innobase/include/fil0fil.ic
index 023a48a5066..2a7d06e243f 100644
--- a/storage/innobase/include/fil0fil.ic
+++ b/storage/innobase/include/fil0fil.ic
@@ -39,6 +39,7 @@ fil_get_page_type_name(
return "PAGE_COMPRESSED_ENRYPTED";
case FIL_PAGE_PAGE_COMPRESSED:
return "PAGE_COMPRESSED";
+ case FIL_PAGE_TYPE_INSTANT:
case FIL_PAGE_INDEX:
return "INDEX";
case FIL_PAGE_RTREE:
@@ -89,6 +90,7 @@ fil_page_type_validate(
if (!((page_type == FIL_PAGE_PAGE_COMPRESSED ||
page_type == FIL_PAGE_PAGE_COMPRESSED_ENCRYPTED ||
page_type == FIL_PAGE_INDEX ||
+ page_type == FIL_PAGE_TYPE_INSTANT ||
page_type == FIL_PAGE_RTREE ||
page_type == FIL_PAGE_UNDO_LOG ||
page_type == FIL_PAGE_INODE ||
diff --git a/storage/innobase/include/fsp0file.h b/storage/innobase/include/fsp0file.h
index 68e9f687fcd..794d44373e8 100644
--- a/storage/innobase/include/fsp0file.h
+++ b/storage/innobase/include/fsp0file.h
@@ -417,7 +417,8 @@ private:
/** Flags to use for opening the data file */
os_file_create_t m_open_flags;
- /** size in database pages */
+ /** size in megabytes or pages; converted from megabytes to
+ pages in SysTablespace::normalize_size() */
ulint m_size;
/** ordinal position of this datafile in the tablespace */
@@ -480,7 +481,7 @@ public:
/* No op - base constructor is called. */
}
- RemoteDatafile(const char* name, ulint size, ulint order)
+ RemoteDatafile(const char*, ulint, ulint)
:
m_link_filepath()
{
@@ -502,12 +503,6 @@ public:
return(m_link_filepath);
}
- /** Set the link filepath. Use default datadir, the base name of
- the path provided without its suffix, plus DOT_ISL.
- @param[in] path filepath which contains a basename to use.
- If NULL, use m_name as the basename. */
- void set_link_filepath(const char* path);
-
/** Create a link filename based on the contents of m_name,
open that file, and read the contents into m_filepath.
@retval DB_SUCCESS if remote linked tablespace file is opened and read.
diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
index 368f0daa201..3222f1c761a 100644
--- a/storage/innobase/include/fsp0fsp.h
+++ b/storage/innobase/include/fsp0fsp.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -45,8 +45,8 @@ Created 12/18/1995 Heikki Tuuri
/** @return the PAGE_SSIZE flags for the current innodb_page_size */
#define FSP_FLAGS_PAGE_SSIZE() \
- ((UNIV_PAGE_SIZE == UNIV_PAGE_SIZE_ORIG) ? \
- 0 : (UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1) \
+ ((srv_page_size == UNIV_PAGE_SIZE_ORIG) ? \
+ 0U : (srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1) \
<< FSP_FLAGS_POS_PAGE_SSIZE)
/* @defgroup Compatibility macros for MariaDB 10.1.0 through 10.1.20;
@@ -294,22 +294,6 @@ the extent are free and which contain old tuple version to clean. */
#ifndef UNIV_INNOCHECKSUM
/* @} */
-/**********************************************************************//**
-Initializes the file space system. */
-void
-fsp_init(void);
-/*==========*/
-
-/**********************************************************************//**
-Gets the size of the system tablespace from the tablespace header. If
-we do not have an auto-extending data file, this should be equal to
-the size of the data files. If there is an auto-extending data file,
-this can be smaller.
-@return size in pages */
-ulint
-fsp_header_get_tablespace_size(void);
-/*================================*/
-
/** Calculate the number of pages to extend a datafile.
We extend single-table tablespaces first one extent at a time,
but 4 at a time for bigger tablespaces. It is not enough to extend always
@@ -334,7 +318,7 @@ UNIV_INLINE
ulint
fsp_get_extent_size_in_pages(const page_size_t& page_size)
{
- return(FSP_EXTENT_SIZE * UNIV_PAGE_SIZE / page_size.physical());
+ return (FSP_EXTENT_SIZE << srv_page_size_shift) / page_size.physical();
}
/**********************************************************************//**
@@ -397,56 +381,33 @@ fsp_header_init_fields(
ulint flags); /*!< in: tablespace flags (FSP_SPACE_FLAGS):
0, or table->flags if newer than COMPACT */
/** Initialize a tablespace header.
-@param[in] space_id space id
-@param[in] size current size in blocks
-@param[in,out] mtr mini-transaction */
-void
-fsp_header_init(ulint space_id, ulint size, mtr_t* mtr);
+@param[in,out] space tablespace
+@param[in] size current size in blocks
+@param[in,out] mtr mini-transaction */
+void fsp_header_init(fil_space_t* space, ulint size, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/**********************************************************************//**
-Increases the space size field of a space. */
-void
-fsp_header_inc_size(
-/*================*/
- ulint space_id, /*!< in: space id */
- ulint size_inc, /*!< in: size increment in pages */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
Creates a new segment.
@return the block where the segment header is placed, x-latched, NULL
if could not create segment because of lack of space */
buf_block_t*
fseg_create(
-/*========*/
- ulint space_id,/*!< in: space id */
- ulint page, /*!< in: page where the segment header is placed: if
- this is != 0, the page must belong to another segment,
- if this is 0, a new page will be allocated and it
- will belong to the created segment */
- ulint byte_offset, /*!< in: byte offset of the created segment header
- on the page */
- mtr_t* mtr); /*!< in/out: mini-transaction */
-/**********************************************************************//**
-Creates a new segment.
-@return the block where the segment header is placed, x-latched, NULL
-if could not create segment because of lack of space */
-buf_block_t*
-fseg_create_general(
-/*================*/
- ulint space_id,/*!< in: space id */
+ fil_space_t* space, /*!< in,out: tablespace */
ulint page, /*!< in: page where the segment header is placed: if
this is != 0, the page must belong to another segment,
if this is 0, a new page will be allocated and it
will belong to the created segment */
ulint byte_offset, /*!< in: byte offset of the created segment header
on the page */
- ibool has_done_reservation, /*!< in: TRUE if the caller has already
- done the reservation for the pages with
+ mtr_t* mtr,
+ bool has_done_reservation = false); /*!< in: whether the caller
+ has already done the reservation for the pages with
fsp_reserve_free_extents (at least 2 extents: one for
the inode and the other for the segment) then there is
no need to do the check for this individual
operation */
- mtr_t* mtr); /*!< in/out: mini-transaction */
+
/**********************************************************************//**
Calculates the number of pages reserved by a segment, and how many pages are
currently used.
@@ -508,7 +469,7 @@ fseg_alloc_free_page_general(
use several pages from the tablespace should call this function beforehand
and reserve enough free extents so that they certainly will be able
to do their operation, like a B-tree page split, fully. Reservations
-must be released with function fil_space_release_free_extents!
+must be released with function fil_space_t::release_free_extents()!
The alloc_type below has the following meaning: FSP_NORMAL means an
operation which will probably result in more space usage, like an
@@ -534,7 +495,7 @@ free pages available.
return true and the tablespace size is <
FSP_EXTENT_SIZE pages, then this can be 0,
otherwise it is n_ext
-@param[in] space_id tablespace identifier
+@param[in,out] space tablespace
@param[in] n_ext number of extents to reserve
@param[in] alloc_type page reservation type (FSP_BLOB, etc)
@param[in,out] mtr the mini transaction
@@ -545,30 +506,12 @@ free pages available.
bool
fsp_reserve_free_extents(
ulint* n_reserved,
- ulint space_id,
+ fil_space_t* space,
ulint n_ext,
fsp_reserve_t alloc_type,
mtr_t* mtr,
ulint n_pages = 2);
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space.
-@param[in] space_id tablespace ID
-@return available space in KiB
-@retval UINTMAX_MAX if unknown */
-uintmax_t
-fsp_get_available_space_in_free_extents(
- ulint space_id);
-
-/** Calculate how many KiB of new data we will be able to insert to the
-tablespace without running out of space. Start with a space object that has
-been acquired by the caller who holds it for the calculation,
-@param[in] space tablespace object from fil_space_acquire()
-@return available space in KiB */
-uintmax_t
-fsp_get_available_space_in_free_extents(
- const fil_space_t* space);
-
/**********************************************************************//**
Frees a single page of a segment. */
void
diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic
index 2da3320eef7..38d890fd2f3 100644
--- a/storage/innobase/include/fsp0fsp.ic
+++ b/storage/innobase/include/fsp0fsp.ic
@@ -92,21 +92,15 @@ xdes_calc_descriptor_page(
const page_size_t& page_size,
ulint offset)
{
-#ifndef DOXYGEN /* Doxygen gets confused by these */
-# if UNIV_PAGE_SIZE_MAX <= XDES_ARR_OFFSET \
- + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX) \
- * XDES_SIZE_MAX
-# error
-# endif
-# if UNIV_ZIP_SIZE_MIN <= XDES_ARR_OFFSET \
- + (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE_MIN) \
- * XDES_SIZE_MIN
-# error
-# endif
-#endif /* !DOXYGEN */
-
- ut_ad(UNIV_PAGE_SIZE > XDES_ARR_OFFSET
- + (UNIV_PAGE_SIZE / FSP_EXTENT_SIZE)
+ compile_time_assert(UNIV_PAGE_SIZE_MAX > XDES_ARR_OFFSET
+ + (UNIV_PAGE_SIZE_MAX / FSP_EXTENT_SIZE_MAX)
+ * XDES_SIZE_MAX);
+ compile_time_assert(UNIV_PAGE_SIZE_MIN > XDES_ARR_OFFSET
+ + (UNIV_PAGE_SIZE_MIN / FSP_EXTENT_SIZE_MIN)
+ * XDES_SIZE_MIN);
+
+ ut_ad(srv_page_size > XDES_ARR_OFFSET
+ + (srv_page_size / FSP_EXTENT_SIZE)
* XDES_SIZE);
ut_ad(UNIV_ZIP_SIZE_MIN > XDES_ARR_OFFSET
+ (UNIV_ZIP_SIZE_MIN / FSP_EXTENT_SIZE)
diff --git a/storage/innobase/include/fsp0sysspace.h b/storage/innobase/include/fsp0sysspace.h
index efbd4fc3f24..80b006f2dd7 100644
--- a/storage/innobase/include/fsp0sysspace.h
+++ b/storage/innobase/include/fsp0sysspace.h
@@ -33,14 +33,6 @@ Created 2013-7-26 by Kevin Lewis
at a time. We have to make this public because it is a config variable. */
extern ulong sys_tablespace_auto_extend_increment;
-#ifdef UNIV_DEBUG
-/** Control if extra debug checks need to be done for temporary tablespace.
-Default = true that is disable such checks.
-This variable is not exposed to end-user but still kept as variable for
-developer to enable it during debug. */
-extern bool srv_skip_temp_table_checks_debug;
-#endif /* UNIV_DEBUG */
-
/** Data structure that contains the information about shared tablespaces.
Currently this can be the system tablespace or a temporary table tablespace */
class SysTablespace : public Tablespace
@@ -111,7 +103,7 @@ public:
void shutdown();
/** Normalize the file size, convert to extents. */
- void normalize();
+ void normalize_size();
/**
@return true if a new raw device was created. */
@@ -147,8 +139,8 @@ public:
@return the autoextend increment in pages. */
ulint get_autoextend_increment() const
{
- return(sys_tablespace_auto_extend_increment
- * ((1024 * 1024) / UNIV_PAGE_SIZE));
+ return sys_tablespace_auto_extend_increment
+ << (20 - srv_page_size_shift);
}
/**
diff --git a/storage/innobase/include/fsp0types.h b/storage/innobase/include/fsp0types.h
index c6dbe52def4..f7a5befa6ae 100644
--- a/storage/innobase/include/fsp0types.h
+++ b/storage/innobase/include/fsp0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2014, 2017, MariaDB Corporation.
+Copyright (c) 2014, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -59,11 +59,8 @@ page size | file space extent size
32 KiB | 64 pages = 2 MiB
64 KiB | 64 pages = 4 MiB
*/
-#define FSP_EXTENT_SIZE ((UNIV_PAGE_SIZE <= (16384) ? \
- (1048576 / UNIV_PAGE_SIZE) : \
- ((UNIV_PAGE_SIZE <= (32768)) ? \
- (2097152 / UNIV_PAGE_SIZE) : \
- (4194304 / UNIV_PAGE_SIZE))))
+#define FSP_EXTENT_SIZE (srv_page_size_shift < 14 ? \
+ (1048576U >> srv_page_size_shift) : 64U)
/** File space extent size (four megabyte) in pages for MAX page size */
#define FSP_EXTENT_SIZE_MAX (4194304 / UNIV_PAGE_SIZE_MAX)
@@ -151,38 +148,38 @@ enum fsp_reserve_t {
/* Number of pages described in a single descriptor page: currently each page
description takes less than 1 byte; a descriptor page is repeated every
this many file pages */
-/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */
-/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */
+/* #define XDES_DESCRIBED_PER_PAGE srv_page_size */
+/* This has been replaced with either srv_page_size or page_zip->size. */
/** @name The space low address page map
The pages at FSP_XDES_OFFSET and FSP_IBUF_BITMAP_OFFSET are repeated
every XDES_DESCRIBED_PER_PAGE pages in every tablespace. */
/* @{ */
/*--------------------------------------*/
-#define FSP_XDES_OFFSET 0 /* !< extent descriptor */
-#define FSP_IBUF_BITMAP_OFFSET 1 /* !< insert buffer bitmap */
+#define FSP_XDES_OFFSET 0U /* !< extent descriptor */
+#define FSP_IBUF_BITMAP_OFFSET 1U /* !< insert buffer bitmap */
/* The ibuf bitmap pages are the ones whose
page number is the number above plus a
multiple of XDES_DESCRIBED_PER_PAGE */
-#define FSP_FIRST_INODE_PAGE_NO 2 /*!< in every tablespace */
+#define FSP_FIRST_INODE_PAGE_NO 2U /*!< in every tablespace */
/* The following pages exist
in the system tablespace (space 0). */
-#define FSP_IBUF_HEADER_PAGE_NO 3 /*!< insert buffer
+#define FSP_IBUF_HEADER_PAGE_NO 3U /*!< insert buffer
header page, in
tablespace 0 */
-#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /*!< insert buffer
+#define FSP_IBUF_TREE_ROOT_PAGE_NO 4U /*!< insert buffer
B-tree root page in
tablespace 0 */
/* The ibuf tree root page number in
tablespace 0; its fseg inode is on the page
number FSP_FIRST_INODE_PAGE_NO */
-#define FSP_TRX_SYS_PAGE_NO 5 /*!< transaction
+#define FSP_TRX_SYS_PAGE_NO 5U /*!< transaction
system header, in
tablespace 0 */
-#define FSP_FIRST_RSEG_PAGE_NO 6 /*!< first rollback segment
+#define FSP_FIRST_RSEG_PAGE_NO 6U /*!< first rollback segment
page, in tablespace 0 */
-#define FSP_DICT_HDR_PAGE_NO 7 /*!< data dictionary header
+#define FSP_DICT_HDR_PAGE_NO 7U /*!< data dictionary header
page, in tablespace 0 */
/*--------------------------------------*/
/* @} */
@@ -196,17 +193,6 @@ fsp_is_system_temporary(ulint space_id)
{
return(space_id == SRV_TMP_SPACE_ID);
}
-
-#ifdef UNIV_DEBUG
-/** Skip some of the sanity checks that are time consuming even in debug mode
-and can affect frequent verification runs that are done to ensure stability of
-the product.
-@return true if check should be skipped for given space. */
-bool
-fsp_skip_sanity_check(
- ulint space_id);
-#endif /* UNIV_DEBUG */
-
#endif /* !UNIV_INNOCHECKSUM */
/* @defgroup fsp_flags InnoDB Tablespace Flag Constants @{ */
@@ -217,7 +203,7 @@ fsp_skip_sanity_check(
#define FSP_FLAGS_WIDTH_ZIP_SSIZE 4
/** Width of the ATOMIC_BLOBS flag. The ability to break up a long
column into an in-record prefix and an externally stored part is available
-to the two Barracuda row formats COMPRESSED and DYNAMIC. */
+to ROW_FORMAT=REDUNDANT and ROW_FORMAT=COMPACT. */
#define FSP_FLAGS_WIDTH_ATOMIC_BLOBS 1
/** Number of flag bits used to indicate the tablespace page size */
#define FSP_FLAGS_WIDTH_PAGE_SSIZE 4
diff --git a/storage/innobase/include/fts0fts.h b/storage/innobase/include/fts0fts.h
index cad9ef37560..068720c1947 100644
--- a/storage/innobase/include/fts0fts.h
+++ b/storage/innobase/include/fts0fts.h
@@ -488,47 +488,49 @@ fts_trx_free(
/*=========*/
fts_trx_t* fts_trx); /*!< in, own: FTS trx */
-/******************************************************************//**
-Creates the common ancillary tables needed for supporting an FTS index
-on the given table. row_mysql_lock_data_dictionary must have been
-called before this.
-@return DB_SUCCESS or error code */
+/** Creates the common auxiliary tables needed for supporting an FTS index
+on the given table. row_mysql_lock_data_dictionary must have been called
+before this.
+The following tables are created.
+CREATE TABLE $FTS_PREFIX_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_BEING_DELETED_CACHE
+ (doc_id BIGINT UNSIGNED, UNIQUE CLUSTERED INDEX on doc_id)
+CREATE TABLE $FTS_PREFIX_CONFIG
+ (key CHAR(50), value CHAR(200), UNIQUE CLUSTERED INDEX on key)
+@param[in,out] trx transaction
+@param[in] table table with FTS index
+@param[in] skip_doc_id_index Skip index on doc id
+@return DB_SUCCESS if succeed */
dberr_t
fts_create_common_tables(
-/*=====================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_table_t*
- table, /*!< in: table with one FTS
- index */
- const char* name, /*!< in: table name */
- bool skip_doc_id_index) /*!< in: Skip index on doc id */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Wrapper function of fts_create_index_tables_low(), create auxiliary
-tables for an FTS index
-@return DB_SUCCESS or error code */
-dberr_t
-fts_create_index_tables(
-/*====================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_index_t* index) /*!< in: the FTS index
- instance */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************************//**
-Creates the column specific ancillary tables needed for supporting an
+ trx_t* trx,
+ dict_table_t* table,
+ bool skip_doc_id_index)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
+/** Creates the column specific ancillary tables needed for supporting an
FTS index on the given table. row_mysql_lock_data_dictionary must have
been called before this.
+
+All FTS AUX Index tables have the following schema.
+CREAT TABLE $FTS_PREFIX_INDEX_[1-6](
+ word VARCHAR(FTS_MAX_WORD_LEN),
+ first_doc_id INT NOT NULL,
+ last_doc_id UNSIGNED NOT NULL,
+ doc_count UNSIGNED INT NOT NULL,
+ ilist VARBINARY NOT NULL,
+ UNIQUE CLUSTERED INDEX ON (word, first_doc_id))
+@param[in,out] trx dictionary transaction
+@param[in] index fulltext index
+@param[in] id table id
@return DB_SUCCESS or error code */
dberr_t
-fts_create_index_tables_low(
-/*========================*/
- trx_t* trx, /*!< in: transaction handle */
- const dict_index_t*
- index, /*!< in: the FTS index
- instance */
- const char* table_name, /*!< in: the table name */
- table_id_t table_id) /*!< in: the table id */
- MY_ATTRIBUTE((warn_unused_result));
+fts_create_index_tables(trx_t* trx, const dict_index_t* index, table_id_t id)
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Add the FTS document id hidden column. */
void
@@ -559,7 +561,7 @@ fts_commit(
MY_ATTRIBUTE((warn_unused_result));
/** FTS Query entry point.
-@param[in] trx transaction
+@param[in,out] trx transaction
@param[in] index fts index to search
@param[in] flags FTS search mode
@param[in] query_str FTS query
@@ -740,7 +742,6 @@ Take a FTS savepoint. */
void
fts_savepoint_take(
/*===============*/
- trx_t* trx, /*!< in: transaction */
fts_trx_t* fts_trx, /*!< in: fts transaction */
const char* name); /*!< in: savepoint name */
diff --git a/storage/innobase/include/fts0priv.h b/storage/innobase/include/fts0priv.h
index fca22bdc7d6..d045c9d3c72 100644
--- a/storage/innobase/include/fts0priv.h
+++ b/storage/innobase/include/fts0priv.h
@@ -319,7 +319,6 @@ the dict mutex
que_t*
fts_parse_sql_no_dict_lock(
/*=======================*/
- fts_table_t* fts_table, /*!< in: table with FTS index */
pars_info_t* info, /*!< in: parser info */
const char* sql) /*!< in: SQL string to evaluate */
MY_ATTRIBUTE((warn_unused_result));
diff --git a/storage/innobase/include/fts0tokenize.h b/storage/innobase/include/fts0tokenize.h
index 15726aea1de..909d2ce07ba 100644
--- a/storage/innobase/include/fts0tokenize.h
+++ b/storage/innobase/include/fts0tokenize.h
@@ -144,7 +144,7 @@ fts_get_word(
}
}
- info->prev = *doc;
+ info->prev = char(*doc);
info->yesno = (FTB_YES == ' ') ? 1 : (info->quot != 0);
info->weight_adjust = info->wasign = 0;
}
diff --git a/storage/innobase/include/fts0types.ic b/storage/innobase/include/fts0types.ic
index a8712751412..487e7c33b63 100644
--- a/storage/innobase/include/fts0types.ic
+++ b/storage/innobase/include/fts0types.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2007, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -172,7 +172,6 @@ fts_select_index_by_hash(
const byte* str,
ulint len)
{
- int char_len;
ulong nr1 = 1;
ulong nr2 = 4;
@@ -187,9 +186,9 @@ fts_select_index_by_hash(
char_len = my_mbcharlen_ptr(cs, reinterpret_cast<const char*>(str),
reinterpret_cast<const char*>(str + len));
*/
- char_len = cs->cset->charlen(cs, str, str+len);
+ size_t char_len = size_t(cs->cset->charlen(cs, str, str + len));
- ut_ad(static_cast<ulint>(char_len) <= len);
+ ut_ad(char_len <= len);
/* Get collation hash code */
cs->coll->hash_sort(cs, str, char_len, &nr1, &nr2);
diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic
index 6fe031876e6..56be971f233 100644
--- a/storage/innobase/include/fut0fut.ic
+++ b/storage/innobase/include/fut0fut.ic
@@ -48,7 +48,7 @@ fut_get_ptr(
buf_block_t* block;
byte* ptr = NULL;
- ut_ad(addr.boffset < UNIV_PAGE_SIZE);
+ ut_ad(addr.boffset < srv_page_size);
ut_ad((rw_latch == RW_S_LATCH)
|| (rw_latch == RW_X_LATCH)
|| (rw_latch == RW_SX_LATCH));
diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic
index 128dc77ed92..5c9a9ca94c1 100644
--- a/storage/innobase/include/fut0lst.ic
+++ b/storage/innobase/include/fut0lst.ic
@@ -58,7 +58,7 @@ flst_write_addr(
MTR_MEMO_PAGE_X_FIX
| MTR_MEMO_PAGE_SX_FIX));
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+ ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr);
mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset,
@@ -83,7 +83,7 @@ flst_read_addr(
addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES,
mtr);
ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA);
- ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA);
+ ut_a(ut_align_offset(faddr, srv_page_size) >= FIL_PAGE_DATA);
return(addr);
}
diff --git a/storage/innobase/include/gis0rtree.h b/storage/innobase/include/gis0rtree.h
index 65a53ec1d39..461d2816653 100644
--- a/storage/innobase/include/gis0rtree.h
+++ b/storage/innobase/include/gis0rtree.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2014, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,7 @@ Created 2013/03/27 Jimmy Yang and Allen Lai
#define gis0rtree_h
#include "univ.i"
+#include "my_base.h"
#include "data0type.h"
#include "data0types.h"
@@ -87,10 +88,8 @@ rtr_index_build_node_ptr(
pointer */
ulint page_no,/*!< in: page number to put in node
pointer */
- mem_heap_t* heap, /*!< in: memory heap where pointer
+ mem_heap_t* heap); /*!< in: memory heap where pointer
created */
- ulint level); /*!< in: level of rec in tree:
- 0 means leaf level */
/*************************************************************//**
Splits an R-tree index page to halves and inserts the tuple. It is assumed
@@ -179,7 +178,6 @@ dberr_t
rtr_ins_enlarge_mbr(
/*=================*/
btr_cur_t* cursor, /*!< in: btr cursor */
- que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr); /*!< in: mtr */
/********************************************************************//**
@@ -438,9 +436,6 @@ rtr_merge_and_update_mbr(
ulint* offsets, /*!< in: rec offsets */
ulint* offsets2, /*!< in: rec offsets */
page_t* child_page, /*!< in: the child page. */
- buf_block_t* merge_block, /*!< in: page to merge */
- buf_block_t* block, /*!< in: page be merged */
- dict_index_t* index, /*!< in: index */
mtr_t* mtr); /*!< in: mtr */
/*************************************************************//**
@@ -448,10 +443,8 @@ Deletes on the upper level the node pointer to a page. */
void
rtr_node_ptr_delete(
/*================*/
- dict_index_t* index, /*!< in: index tree */
- btr_cur_t* sea_cur,/*!< in: search cursor, contains information
+ btr_cur_t* cursor, /*!< in: search cursor, contains information
about parent nodes in search */
- buf_block_t* block, /*!< in: page whose node pointer is deleted */
mtr_t* mtr); /*!< in: mtr */
/****************************************************************//**
@@ -463,10 +456,7 @@ rtr_merge_mbr_changed(
btr_cur_t* cursor2, /*!< in: the other cursor */
ulint* offsets, /*!< in: rec offsets */
ulint* offsets2, /*!< in: rec offsets */
- rtr_mbr_t* new_mbr, /*!< out: MBR to update */
- buf_block_t* merge_block, /*!< in: page to merge */
- buf_block_t* block, /*!< in: page be merged */
- dict_index_t* index); /*!< in: index */
+ rtr_mbr_t* new_mbr); /*!< out: MBR to update */
/**************************************************************//**
@@ -543,7 +533,7 @@ rtr_info_reinit_in_cursor(
@param[in] tuple range tuple containing mbr, may also be empty tuple
@param[in] mode search mode
@return estimated number of rows */
-int64_t
+ha_rows
rtr_estimate_n_rows_in_range(
dict_index_t* index,
const dtuple_t* tuple,
diff --git a/storage/innobase/include/gis0rtree.ic b/storage/innobase/include/gis0rtree.ic
index e852ebd8028..4dd05d3b251 100644
--- a/storage/innobase/include/gis0rtree.ic
+++ b/storage/innobase/include/gis0rtree.ic
@@ -38,7 +38,7 @@ rtr_page_cal_mbr(
{
page_t* page;
rec_t* rec;
- byte* field;
+ const byte* field;
ulint len;
ulint* offsets = NULL;
double bmin, bmax;
diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
index 86defe9b166..1313705f119 100644
--- a/storage/innobase/include/ha_prototypes.h
+++ b/storage/innobase/include/ha_prototypes.h
@@ -70,13 +70,11 @@ innobase_invalidate_query_cache(
/*============================*/
trx_t* trx, /*!< in: transaction which
modifies the table */
- const char* full_name, /*!< in: concatenation of
+ const char* full_name); /*!< in: concatenation of
database name, path separator,
table name, null char NUL;
NOTE that in Windows this is
always in LOWER CASE! */
- ulint full_name_len); /*!< in: full name length where
- also the null chars count */
/** Quote a standard SQL identifier like tablespace, index or column name.
@param[in] file output stream
@@ -158,7 +156,6 @@ UNIV_INTERN
void
innobase_mysql_log_notify(
/*======================*/
- ib_uint64_t write_lsn, /*!< in: LSN written to log file */
ib_uint64_t flush_lsn); /*!< in: LSN flushed to disk */
/** Converts a MySQL type to an InnoDB type. Note that this function returns
@@ -240,7 +237,7 @@ wsrep_innobase_kill_one_trx(void * const thd_ptr,
const trx_t * const bf_trx,
trx_t *victim_trx,
ibool signal);
-int wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
+ulint wsrep_innobase_mysql_sort(int mysql_type, uint charset_number,
unsigned char* str, unsigned int str_length,
unsigned int buf_length);
#endif /* WITH_WSREP */
@@ -309,14 +306,6 @@ thd_lock_wait_timeout(
/*==================*/
THD* thd); /*!< in: thread handle, or NULL to query
the global innodb_lock_wait_timeout */
-/******************************************************************//**
-Add up the time waited for the lock for the current query. */
-void
-thd_set_lock_wait_time(
-/*===================*/
- THD* thd, /*!< in/out: thread handle */
- ulint value); /*!< in: time waited for the lock */
-
/** Get status of innodb_tmpdir.
@param[in] thd thread handle, or NULL to query
the global innodb_tmpdir.
@@ -453,14 +442,6 @@ const char*
server_get_hostname();
/*=================*/
-/******************************************************************//**
-Get the error message format string.
-@return the format string or 0 if not found. */
-const char*
-innobase_get_err_msg(
-/*=================*/
- int error_code); /*!< in: MySQL error code */
-
/*********************************************************************//**
Compute the next autoinc value.
@@ -533,7 +514,7 @@ UNIV_INTERN
void
ib_push_warning(
trx_t* trx, /*!< in: trx */
- ulint error, /*!< in: error code to push as warning */
+ dberr_t error, /*!< in: error code to push as warning */
const char *format,/*!< in: warning message */
...);
@@ -543,7 +524,7 @@ UNIV_INTERN
void
ib_push_warning(
void* ithd, /*!< in: thd */
- ulint error, /*!< in: error code to push as warning */
+ dberr_t error, /*!< in: error code to push as warning */
const char *format,/*!< in: warning message */
...);
diff --git a/storage/innobase/include/handler0alter.h b/storage/innobase/include/handler0alter.h
index 1c690839449..81c0fd18a29 100644
--- a/storage/innobase/include/handler0alter.h
+++ b/storage/innobase/include/handler0alter.h
@@ -53,14 +53,6 @@ innobase_row_to_mysql(
const dtuple_t* row) /*!< in: InnoDB row */
MY_ATTRIBUTE((nonnull));
-/*************************************************************//**
-Resets table->record[0]. */
-void
-innobase_rec_reset(
-/*===============*/
- struct TABLE* table) /*!< in/out: MySQL table */
- MY_ATTRIBUTE((nonnull));
-
/** Generate the next autoinc based on a snapshot of the session
auto_increment_increment and auto_increment_offset variables. */
struct ib_sequence_t {
diff --git a/storage/innobase/include/ib0mutex.h b/storage/innobase/include/ib0mutex.h
index 7b289c7a98c..eaf391be09b 100644
--- a/storage/innobase/include/ib0mutex.h
+++ b/storage/innobase/include/ib0mutex.h
@@ -53,15 +53,8 @@ struct OSTrackMutex {
ut_ad(!m_destroy_at_exit || !m_locked);
}
- /** Initialise the mutex.
- @param[in] id Mutex ID
- @param[in] filename File where mutex was created
- @param[in] line Line in filename */
- void init(
- latch_id_t id,
- const char* filename,
- uint32_t line)
- UNIV_NOTHROW
+ /** Initialise the mutex. */
+ void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
{
ut_ad(m_freed);
ut_ad(!m_locked);
@@ -92,16 +85,8 @@ struct OSTrackMutex {
m_mutex.exit();
}
- /** Acquire the mutex.
- @param[in] max_spins max number of spins
- @param[in] max_delay max delay per spin
- @param[in] filename from where called
- @param[in] line within filename */
- void enter(
- uint32_t max_spins,
- uint32_t max_delay,
- const char* filename,
- uint32_t line)
+ /** Acquire the mutex. */
+ void enter(uint32_t, uint32_t, const char*, uint32_t)
UNIV_NOTHROW
{
ut_ad(!m_freed);
@@ -186,15 +171,8 @@ struct TTASFutexMutex {
}
/** Called when the mutex is "created". Note: Not from the constructor
- but when the mutex is initialised.
- @param[in] id Mutex ID
- @param[in] filename File where mutex was created
- @param[in] line Line in filename */
- void init(
- latch_id_t id,
- const char* filename,
- uint32_t line)
- UNIV_NOTHROW
+ but when the mutex is initialised. */
+ void init(latch_id_t, const char*, uint32_t) UNIV_NOTHROW
{
ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
}
@@ -208,14 +186,9 @@ struct TTASFutexMutex {
/** Acquire the mutex.
@param[in] max_spins max number of spins
- @param[in] max_delay max delay per spin
- @param[in] filename from where called
- @param[in] line within filename */
- void enter(
- uint32_t max_spins,
- uint32_t max_delay,
- const char* filename,
- uint32_t line) UNIV_NOTHROW
+ @param[in] max_delay max delay per spin */
+ void enter(uint32_t max_spins, uint32_t max_delay,
+ const char*, uint32_t) UNIV_NOTHROW
{
uint32_t n_spins, n_waits;
@@ -225,7 +198,7 @@ struct TTASFutexMutex {
return;
}
- ut_delay(ut_rnd_interval(0, max_delay));
+ ut_delay(max_delay);
}
for (n_waits= 0;; n_waits++) {
@@ -308,15 +281,8 @@ struct TTASMutex {
}
/** Called when the mutex is "created". Note: Not from the constructor
- but when the mutex is initialised.
- @param[in] id Mutex ID
- @param[in] filename File where mutex was created
- @param[in] line Line in filename */
- void init(
- latch_id_t id,
- const char* filename,
- uint32_t line)
- UNIV_NOTHROW
+ but when the mutex is initialised. */
+ void init(latch_id_t) UNIV_NOTHROW
{
ut_ad(m_lock_word == MUTEX_STATE_UNLOCKED);
}
@@ -349,20 +315,15 @@ struct TTASMutex {
/** Acquire the mutex.
@param max_spins max number of spins
- @param max_delay max delay per spin
- @param filename from where called
- @param line within filename */
- void enter(
- uint32_t max_spins,
- uint32_t max_delay,
- const char* filename,
- uint32_t line) UNIV_NOTHROW
+ @param max_delay max delay per spin */
+ void enter(uint32_t max_spins, uint32_t max_delay,
+ const char*, uint32_t) UNIV_NOTHROW
{
const uint32_t step = max_spins;
uint32_t n_spins = 0;
while (!try_lock()) {
- ut_delay(ut_rnd_interval(0, max_delay));
+ ut_delay(max_delay);
if (++n_spins == max_spins) {
os_thread_yield();
max_spins+= step;
@@ -420,14 +381,8 @@ struct TTASEventMutex {
/** Called when the mutex is "created". Note: Not from the constructor
but when the mutex is initialised.
- @param[in] id Mutex ID
- @param[in] filename File where mutex was created
- @param[in] line Line in filename */
- void init(
- latch_id_t id,
- const char* filename,
- uint32_t line)
- UNIV_NOTHROW
+ @param[in] id Mutex ID */
+ void init(latch_id_t id, const char*, uint32_t) UNIV_NOTHROW
{
ut_a(m_event == 0);
ut_a(m_lock_word == MUTEX_STATE_UNLOCKED);
@@ -516,7 +471,7 @@ struct TTASEventMutex {
sync_array_wait_event(sync_arr, cell);
}
} else {
- ut_delay(ut_rnd_interval(0, max_delay));
+ ut_delay(max_delay);
}
}
diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h
index 6cff26635bd..8233a536abc 100644
--- a/storage/innobase/include/ibuf0ibuf.h
+++ b/storage/innobase/include/ibuf0ibuf.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -49,22 +49,19 @@ typedef enum {
IBUF_OP_COUNT = 3
} ibuf_op_t;
-/** Combinations of operations that can be buffered. Because the enum
-values are used for indexing innobase_change_buffering_values[], they
-should start at 0 and there should not be any gaps. */
-typedef enum {
+/** Combinations of operations that can be buffered.
+@see innodb_change_buffering_names */
+enum ibuf_use_t {
IBUF_USE_NONE = 0,
IBUF_USE_INSERT, /* insert */
IBUF_USE_DELETE_MARK, /* delete */
IBUF_USE_INSERT_DELETE_MARK, /* insert+delete */
IBUF_USE_DELETE, /* delete+purge */
- IBUF_USE_ALL, /* insert+delete+purge */
-
- IBUF_USE_COUNT /* number of entries in ibuf_use_t */
-} ibuf_use_t;
+ IBUF_USE_ALL /* insert+delete+purge */
+};
/** Operations that can currently be buffered. */
-extern ibuf_use_t ibuf_use;
+extern ulong innodb_change_buffering;
/** The insert buffer control structure */
extern ibuf_t* ibuf;
@@ -421,14 +418,11 @@ void
ibuf_close(void);
/*============*/
-/******************************************************************//**
-Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+/** Check the insert buffer bitmaps on IMPORT TABLESPACE.
+@param[in] trx transaction
+@param[in,out] space tablespace being imported
@return DB_SUCCESS or error code */
-dberr_t
-ibuf_check_bitmap_on_import(
-/*========================*/
- const trx_t* trx, /*!< in: transaction */
- ulint space_id) /*!< in: tablespace identifier */
+dberr_t ibuf_check_bitmap_on_import(const trx_t* trx, fil_space_t* space)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Updates free bits and buffered bits for bulk loaded page.
diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic
index 09070c14059..355fad62f24 100644
--- a/storage/innobase/include/ibuf0ibuf.ic
+++ b/storage/innobase/include/ibuf0ibuf.ic
@@ -28,7 +28,7 @@ Created 7/19/1997 Heikki Tuuri
#include "fsp0types.h"
#include "buf0lru.h"
-/** An index page must contain at least UNIV_PAGE_SIZE /
+/** An index page must contain at least srv_page_size /
IBUF_PAGE_SIZE_PER_FREE_SPACE bytes of free space for ibuf to try to
buffer inserts to this page. If there is this much of free space, the
corresponding bits are set in the ibuf bitmap. */
@@ -124,7 +124,7 @@ ibuf_should_try(
a secondary index when we
decide */
{
- return(ibuf_use != IBUF_USE_NONE
+ return(innodb_change_buffering
&& ibuf->max_size != 0
&& !dict_index_is_clust(index)
&& !dict_index_is_spatial(index)
@@ -314,9 +314,7 @@ ibuf_update_free_bits_if_full(
block->page.size.physical(), max_ins_size);
if (max_ins_size >= increase) {
-#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX
-# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE_MAX"
-#endif
+ compile_time_assert(ULINT32_UNDEFINED > UNIV_PAGE_SIZE_MAX);
after = ibuf_index_page_calc_free_bits(
block->page.size.physical(), max_ins_size - increase);
#ifdef UNIV_IBUF_DEBUG
diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
index 45f69cad9a5..0f6fe158264 100644
--- a/storage/innobase/include/lock0lock.h
+++ b/storage/innobase/include/lock0lock.h
@@ -65,23 +65,6 @@ ulint
lock_get_size(void);
/*===============*/
/*********************************************************************//**
-Creates the lock system at database start. */
-void
-lock_sys_create(
-/*============*/
- ulint n_cells); /*!< in: number of slots in lock hash table */
-/** Resize the lock hash table.
-@param[in] n_cells number of slots in lock hash table */
-void
-lock_sys_resize(
- ulint n_cells);
-
-/*********************************************************************//**
-Closes the lock system at database shutdown. */
-void
-lock_sys_close(void);
-/*================*/
-/*********************************************************************//**
Gets the heap_no of the smallest user record on a page.
@return heap_no of smallest user record, or PAGE_HEAP_NO_SUPREMUM */
UNIV_INLINE
@@ -296,7 +279,7 @@ lock_rec_insert_check_and_lock(
dict_index_t* index, /*!< in: index */
que_thr_t* thr, /*!< in: query thread */
mtr_t* mtr, /*!< in/out: mini-transaction */
- ibool* inherit)/*!< out: set to TRUE if the new
+ bool* inherit)/*!< out: set to true if the new
inserted record maybe should inherit
LOCK_GAP type locks from the successor
record */
@@ -509,18 +492,6 @@ void
lock_trx_release_locks(
/*===================*/
trx_t* trx); /*!< in/out: transaction */
-/*********************************************************************//**
-Removes locks on a table to be dropped or discarded.
-If remove_also_table_sx_locks is TRUE then table-level S and X locks are
-also removed in addition to other table-level and record-level locks.
-No lock, that is going to be removed, is allowed to be a wait lock. */
-void
-lock_remove_all_on_table(
-/*=====================*/
- dict_table_t* table, /*!< in: table to be dropped
- or discarded */
- ibool remove_also_table_sx_locks);/*!< in: also removes
- table S and X locks */
/*********************************************************************//**
Calculates the fold value of a page file address: used in inserting or
@@ -565,8 +536,8 @@ lock_rec_find_set_bit(
/*********************************************************************//**
Checks if a lock request lock1 has to wait for request lock2.
-@return TRUE if lock1 has to wait for lock2 to be removed */
-ibool
+@return whether lock1 has to wait for lock2 to be removed */
+bool
lock_has_to_wait(
/*=============*/
const lock_t* lock1, /*!< in: waiting lock */
@@ -583,7 +554,7 @@ lock_report_trx_id_insanity(
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
const ulint* offsets, /*!< in: rec_get_offsets(rec, index) */
- trx_id_t max_trx_id); /*!< in: trx_sys_get_max_trx_id() */
+ trx_id_t max_trx_id); /*!< in: trx_sys.get_max_trx_id() */
/*********************************************************************//**
Prints info of locks for all transactions.
@return FALSE if not able to obtain lock mutex and exits without
@@ -615,7 +586,7 @@ lock_print_info_all_transactions(
Return approximate number or record locks (bits set in the bitmap) for
this transaction. Since delete-marked records may be removed, the
record count will not be precise.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
ulint
lock_number_of_rows_locked(
/*=======================*/
@@ -624,7 +595,7 @@ lock_number_of_rows_locked(
/*********************************************************************//**
Return the number of table locks for a transaction.
-The caller must be holding lock_sys->mutex. */
+The caller must be holding lock_sys.mutex. */
ulint
lock_number_of_tables_locked(
/*=========================*/
@@ -799,7 +770,6 @@ Set the lock system timeout event. */
void
lock_set_timeout_event();
/*====================*/
-#ifdef UNIV_DEBUG
/*********************************************************************//**
Checks that a transaction id is sensible, i.e., not in the future.
@return true if ok */
@@ -809,8 +779,8 @@ lock_check_trx_id_sanity(
trx_id_t trx_id, /*!< in: trx id */
const rec_t* rec, /*!< in: user record */
dict_index_t* index, /*!< in: index */
- const ulint* offsets) /*!< in: rec_get_offsets(rec, index) */
- MY_ATTRIBUTE((warn_unused_result));
+ const ulint* offsets); /*!< in: rec_get_offsets(rec, index) */
+#ifdef UNIV_DEBUG
/*******************************************************************//**
Check if the transaction holds any locks on the sys tables
or its records.
@@ -819,19 +789,21 @@ const lock_t*
lock_trx_has_sys_table_locks(
/*=========================*/
const trx_t* trx) /*!< in: transaction to check */
- MY_ATTRIBUTE((warn_unused_result));
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*******************************************************************//**
-Check if the transaction holds an exclusive lock on a record.
-@return whether the locks are held */
+/** Check if the transaction holds an explicit exclusive lock on a record.
+@param[in] trx transaction
+@param[in] table table
+@param[in] block leaf page
+@param[in] heap_no heap number identifying the record
+@return whether an explicit X-lock is held */
bool
-lock_trx_has_rec_x_lock(
-/*====================*/
+lock_trx_has_expl_x_lock(
const trx_t* trx, /*!< in: transaction to check */
const dict_table_t* table, /*!< in: table to check */
const buf_block_t* block, /*!< in: buffer block of the record */
ulint heap_no)/*!< in: record heap number */
- MY_ATTRIBUTE((warn_unused_result));
+ MY_ATTRIBUTE((nonnull, warn_unused_result));
#endif /* UNIV_DEBUG */
/** Lock operation struct */
@@ -843,11 +815,12 @@ struct lock_op_t{
typedef ib_mutex_t LockMutex;
/** The lock system struct */
-struct lock_sys_t{
- char pad1[CACHE_LINE_SIZE]; /*!< padding to prevent other
- memory update hotspots from
- residing on the same memory
- cache line */
+class lock_sys_t
+{
+ bool m_initialised;
+
+public:
+ MY_ALIGNED(CACHE_LINE_SIZE)
LockMutex mutex; /*!< Mutex protecting the
locks */
hash_table_t* rec_hash; /*!< hash table of the record
@@ -857,13 +830,13 @@ struct lock_sys_t{
hash_table_t* prdt_page_hash; /*!< hash table of the page
lock */
- char pad2[CACHE_LINE_SIZE]; /*!< Padding */
+ MY_ALIGNED(CACHE_LINE_SIZE)
LockMutex wait_mutex; /*!< Mutex protecting the
next two fields */
srv_slot_t* waiting_threads; /*!< Array of user threads
suspended while waiting for
locks within InnoDB, protected
- by the lock_sys->wait_mutex;
+ by the lock_sys.wait_mutex;
os_event_set() and
os_event_reset() on
waiting_threads[]->event
@@ -872,12 +845,7 @@ struct lock_sys_t{
srv_slot_t* last_slot; /*!< highest slot ever used
in the waiting_threads array,
protected by
- lock_sys->wait_mutex */
- ibool rollback_complete;
- /*!< TRUE if rollback of all
- recovered transactions is
- complete. Protected by
- lock_sys->mutex */
+ lock_sys.wait_mutex */
ulint n_lock_max_wait_time; /*!< Max wait time */
@@ -889,6 +857,38 @@ struct lock_sys_t{
bool timeout_thread_active; /*!< True if the timeout thread
is running */
+
+
+ /**
+ Constructor.
+
+ Some members may require late initialisation, thus we just mark object as
+ uninitialised. Real initialisation happens in create().
+ */
+ lock_sys_t(): m_initialised(false) {}
+
+
+ bool is_initialised() { return m_initialised; }
+
+
+ /**
+ Creates the lock system at database start.
+
+ @param[in] n_cells number of slots in lock hash table
+ */
+ void create(ulint n_cells);
+
+
+ /**
+ Resize the lock hash table.
+
+ @param[in] n_cells number of slots in lock hash table
+ */
+ void resize(ulint n_cells);
+
+
+ /** Closes the lock system at database shutdown. */
+ void close();
};
/*********************************************************************//**
@@ -1002,36 +1002,36 @@ lock_rec_free_all_from_discard_page(
const buf_block_t* block); /*!< in: page to be discarded */
/** The lock system */
-extern lock_sys_t* lock_sys;
+extern lock_sys_t lock_sys;
-/** Test if lock_sys->mutex can be acquired without waiting. */
+/** Test if lock_sys.mutex can be acquired without waiting. */
#define lock_mutex_enter_nowait() \
- (lock_sys->mutex.trylock(__FILE__, __LINE__))
+ (lock_sys.mutex.trylock(__FILE__, __LINE__))
-/** Test if lock_sys->mutex is owned. */
-#define lock_mutex_own() (lock_sys->mutex.is_owned())
+/** Test if lock_sys.mutex is owned. */
+#define lock_mutex_own() (lock_sys.mutex.is_owned())
-/** Acquire the lock_sys->mutex. */
+/** Acquire the lock_sys.mutex. */
#define lock_mutex_enter() do { \
- mutex_enter(&lock_sys->mutex); \
+ mutex_enter(&lock_sys.mutex); \
} while (0)
-/** Release the lock_sys->mutex. */
+/** Release the lock_sys.mutex. */
#define lock_mutex_exit() do { \
- lock_sys->mutex.exit(); \
+ lock_sys.mutex.exit(); \
} while (0)
-/** Test if lock_sys->wait_mutex is owned. */
-#define lock_wait_mutex_own() (lock_sys->wait_mutex.is_owned())
+/** Test if lock_sys.wait_mutex is owned. */
+#define lock_wait_mutex_own() (lock_sys.wait_mutex.is_owned())
-/** Acquire the lock_sys->wait_mutex. */
+/** Acquire the lock_sys.wait_mutex. */
#define lock_wait_mutex_enter() do { \
- mutex_enter(&lock_sys->wait_mutex); \
+ mutex_enter(&lock_sys.wait_mutex); \
} while (0)
-/** Release the lock_sys->wait_mutex. */
+/** Release the lock_sys.wait_mutex. */
#define lock_wait_mutex_exit() do { \
- lock_sys->wait_mutex.exit(); \
+ lock_sys.wait_mutex.exit(); \
} while (0)
#ifdef WITH_WSREP
diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic
index 475f2ccedf1..c1c886f6832 100644
--- a/storage/innobase/include/lock0lock.ic
+++ b/storage/innobase/include/lock0lock.ic
@@ -54,7 +54,7 @@ lock_rec_hash(
ulint page_no)/*!< in: page number */
{
return(unsigned(hash_calc_hash(lock_rec_fold(space, page_no),
- lock_sys->rec_hash)));
+ lock_sys.rec_hash)));
}
/*********************************************************************//**
@@ -90,11 +90,11 @@ lock_hash_get(
ulint mode) /*!< in: lock mode */
{
if (mode & LOCK_PREDICATE) {
- return(lock_sys->prdt_hash);
+ return(lock_sys.prdt_hash);
} else if (mode & LOCK_PRDT_PAGE) {
- return(lock_sys->prdt_page_hash);
+ return(lock_sys.prdt_page_hash);
} else {
- return(lock_sys->rec_hash);
+ return(lock_sys.rec_hash);
}
}
diff --git a/storage/innobase/include/lock0prdt.h b/storage/innobase/include/lock0prdt.h
index e4e37776d22..9eb38ff8975 100644
--- a/storage/innobase/include/lock0prdt.h
+++ b/storage/innobase/include/lock0prdt.h
@@ -51,9 +51,8 @@ lock_prdt_lock(
SELECT FOR UPDATE */
ulint type_mode,
/*!< in: LOCK_PREDICATE or LOCK_PRDT_PAGE */
- que_thr_t* thr, /*!< in: query thread
+ que_thr_t* thr); /*!< in: query thread
(can be NULL if BTR_NO_LOCKING_FLAG) */
- mtr_t* mtr); /*!< in/out: mini-transaction */
/*********************************************************************//**
Acquire a "Page" lock on a block
@@ -107,7 +106,6 @@ Update predicate lock when page splits */
void
lock_prdt_update_split(
/*===================*/
- buf_block_t* block, /*!< in/out: page to be split */
buf_block_t* new_block, /*!< in/out: the new half page */
lock_prdt_t* prdt, /*!< in: MBR on the old page */
lock_prdt_t* new_prdt, /*!< in: MBR on the new page */
@@ -123,7 +121,6 @@ lock_prdt_update_parent(
buf_block_t* right_block, /*!< in/out: the new half page */
lock_prdt_t* left_prdt, /*!< in: MBR on the old page */
lock_prdt_t* right_prdt, /*!< in: MBR on the new page */
- lock_prdt_t* parent_prdt, /*!< in: original parent MBR */
ulint space, /*!< in: space id */
ulint page_no); /*!< in: page number */
diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h
index 1a03d1d0297..1aac5d20a59 100644
--- a/storage/innobase/include/lock0types.h
+++ b/storage/innobase/include/lock0types.h
@@ -32,7 +32,6 @@ Created 5/7/1996 Heikki Tuuri
#define lock_t ib_lock_t
struct lock_t;
-struct lock_sys_t;
struct lock_table_t;
/* Basic lock modes */
@@ -175,7 +174,7 @@ operator<<(std::ostream& out, const lock_rec_t& lock)
#endif
/* @} */
-/** Lock struct; protected by lock_sys->mutex */
+/** Lock struct; protected by lock_sys.mutex */
struct ib_lock_t
{
trx_t* trx; /*!< transaction owning the
diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h
index 4759e5a85f4..b215ba34a77 100644
--- a/storage/innobase/include/log0log.h
+++ b/storage/innobase/include/log0log.h
@@ -41,8 +41,8 @@ Created 12/9/1995 Heikki Tuuri
#include "os0event.h"
#include "os0file.h"
-/** Redo log group */
-struct log_group_t;
+/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
+#define SRV_N_LOG_FILES_MAX 100
/** Magic value to use instead of log checksums when they are disabled */
#define LOG_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
@@ -50,13 +50,13 @@ struct log_group_t;
/* Margin for the free space in the smallest log group, before a new query
step which modifies the database, is started */
-#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE)
-#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE)
+#define LOG_CHECKPOINT_FREE_PER_THREAD (4U << srv_page_size_shift)
+#define LOG_CHECKPOINT_EXTRA_FREE (8U << srv_page_size_shift)
typedef ulint (*log_checksum_func_t)(const byte* log_block);
/** Pointer to the log checksum calculation function. Protected with
-log_sys->mutex. */
+log_sys.mutex. */
extern log_checksum_func_t log_checksum_algorithm_ptr;
/** Append a string to the log.
@@ -82,9 +82,7 @@ log_free_check(void);
/** Extends the log buffer.
@param[in] len requested minimum size in bytes */
-void
-log_buffer_extend(
- ulint len);
+void log_buffer_extend(ulong len);
/** Check margin not to overwrite transaction log from the last checkpoint.
If would estimate the log write to exceed the log_group_capacity,
@@ -138,7 +136,7 @@ log_get_flush_lsn(void);
/*=============*/
/****************************************************************
Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
@return log group capacity */
UNIV_INLINE
lsn_t
@@ -152,14 +150,7 @@ UNIV_INLINE
lsn_t
log_get_max_modified_age_async(void);
/*================================*/
-/** Initializes the redo logging subsystem. */
-void
-log_sys_init();
-/** Initialize the redo log.
-@param[in] n_files number of files */
-void
-log_init(ulint n_files);
/** Calculate the recommended highest values for lsn - last_checkpoint_lsn
and lsn - buf_get_oldest_modification().
@param[in] file_size requested innodb_log_file_size
@@ -171,12 +162,6 @@ log_set_capacity(ulonglong file_size)
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
-Completes an i/o to a log file. */
-void
-log_io_complete(
-/*============*/
- log_group_t* group); /*!< in: log group */
-/******************************************************//**
This function is called, e.g., when a transaction wants to commit. It checks
that the log has been written to the log file up to the last log entry written
by the transaction. If there is a flush running, it waits and checks if the
@@ -235,13 +220,9 @@ shutdown. This function also writes all log in log files to the log archive. */
void
logs_empty_and_mark_files_at_shutdown(void);
/*=======================================*/
-/** Read a log group header page to log_sys->checkpoint_buf.
-@param[in] group log group
-@param[in] header 0 or LOG_CHEKCPOINT_1 or LOG_CHECKPOINT2 */
-void
-log_group_header_read(
- const log_group_t* group,
- ulint header);
+/** Read a log group header page to log_sys.checkpoint_buf.
+@param[in] header 0 or LOG_CHECKPOINT_1 or LOG_CHECKPOINT2 */
+void log_header_read(ulint header);
/** Write checkpoint info to the log header and invoke log_mutex_exit().
@param[in] sync whether to wait for the write to complete
@param[in] end_lsn start LSN of the MLOG_CHECKPOINT mini-transaction */
@@ -262,16 +243,6 @@ objects! */
void
log_check_margins(void);
-/********************************************************//**
-Sets the field values in group to correspond to a given lsn. For this function
-to work, the values must already be correctly initialized to correspond to
-some lsn, for instance, a checkpoint lsn. */
-void
-log_group_set_fields(
-/*=================*/
- log_group_t* group, /*!< in/out: group */
- lsn_t lsn); /*!< in: lsn for which the values should be
- set */
/************************************************************//**
Gets a log block flush bit.
@return TRUE if this block was the first to be written in a log flush */
@@ -322,11 +293,10 @@ log_block_calc_checksum_crc32(
const byte* block);
/** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in] block the redo log block
@return the calculated checksum value */
UNIV_INLINE
ulint
-log_block_calc_checksum_none(const byte* block);
+log_block_calc_checksum_none(const byte*);
/************************************************************//**
Gets a log block checksum field value.
@@ -403,14 +373,6 @@ Refreshes the statistics used to print per-second averages. */
void
log_refresh_stats(void);
/*===================*/
-/********************************************************//**
-Closes all log groups. */
-void
-log_group_close_all(void);
-/*=====================*/
-/** Shut down the redo log subsystem. */
-void
-log_shutdown();
/** Whether to generate and require checksums on the redo log pages */
extern my_bool innodb_log_checksums;
@@ -422,8 +384,6 @@ extern my_bool innodb_log_checksums;
/* The counting of lsn's starts from this value: this must be non-zero */
#define LOG_START_LSN ((lsn_t) (16 * OS_FILE_LOG_BLOCK_SIZE))
-#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE)
-
/* Offsets of a log block header */
#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and
is allowed to wrap around at 2G; the
@@ -447,7 +407,7 @@ extern my_bool innodb_log_checksums;
from this offset in this log block,
if value not 0 */
#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of
- log_sys->next_checkpoint_no when the
+ log_sys.next_checkpoint_no when the
log block was last written to: if the
block has not yet been written full,
this value is only updated before a
@@ -470,7 +430,7 @@ extern my_bool innodb_log_checksums;
#define LOG_CHECKPOINT_LSN 8
/** Byte offset of the log record corresponding to LOG_CHECKPOINT_LSN */
#define LOG_CHECKPOINT_OFFSET 16
-/** log_sys_t::buf_size at the time of the checkpoint (not used) */
+/** srv_log_buffer_size at the time of the checkpoint (not used) */
#define LOG_CHECKPOINT_LOG_BUF_SIZE 24
/** MariaDB 10.2.5 encrypted redo log encryption key version (32 bits)*/
#define LOG_CHECKPOINT_CRYPT_KEY 32
@@ -512,16 +472,20 @@ or the MySQL version that created the redo log file. */
IB_TO_STR(MYSQL_VERSION_MINOR) "." \
IB_TO_STR(MYSQL_VERSION_PATCH)
-/** The redo log format identifier corresponding to the current format version.
-Stored in LOG_HEADER_FORMAT.
+/** The original (not version-tagged) InnoDB redo log format */
+#define LOG_HEADER_FORMAT_3_23 0
+/** The MySQL 5.7.9/MariaDB 10.2.2 log format */
+#define LOG_HEADER_FORMAT_10_2 1
+/** The MariaDB 10.3.2 log format.
To prevent crash-downgrade to earlier 10.2 due to the inability to
roll back a retroactively introduced TRX_UNDO_RENAME_TABLE undo log record,
MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
1 instead of 0. MariaDB 10.3 will use subformat 0 (5.7-style TRUNCATE) or 2
(MDEV-13564 backup-friendly TRUNCATE). */
-#define LOG_HEADER_FORMAT_CURRENT 103
-/** The old MariaDB 10.2.2..10.2.17 log format */
-#define LOG_HEADER_FORMAT_10_2 1
+#define LOG_HEADER_FORMAT_10_3 103
+/** The redo log format identifier corresponding to the current format version.
+Stored in LOG_HEADER_FORMAT. */
+#define LOG_HEADER_FORMAT_CURRENT LOG_HEADER_FORMAT_10_3
/** Future MariaDB 10.4 log format */
#define LOG_HEADER_FORMAT_10_4 104
/** Encrypted MariaDB redo log */
@@ -540,102 +504,43 @@ MariaDB 10.2.18 and later will use the 10.3 format, but LOG_HEADER_SUBFORMAT
header */
#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE)
-/** The state of a log group */
-enum log_group_state_t {
- /** No corruption detected */
- LOG_GROUP_OK,
- /** Corrupted */
- LOG_GROUP_CORRUPTED
-};
-
typedef ib_mutex_t LogSysMutex;
typedef ib_mutex_t FlushOrderMutex;
-/** Log group consists of a number of log files, each of the same size; a log
-group is implemented as a space in the sense of the module fil0fil.
-Currently, this is only protected by log_sys->mutex. However, in the case
-of log_write_up_to(), we will access some members only with the protection
-of log_sys->write_mutex, which should affect nothing for now. */
-struct log_group_t{
- /** number of files in the group */
- ulint n_files;
- /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
- uint32_t format;
- /** redo log subformat: 0 with separately logged TRUNCATE,
- 1 with fully redo-logged TRUNCATE */
- uint32_t subformat;
- /** individual log file size in bytes, including the header */
- lsn_t file_size;
- /** corruption status */
- log_group_state_t state;
- /** lsn used to fix coordinates within the log group */
- lsn_t lsn;
- /** the byte offset of the above lsn */
- lsn_t lsn_offset;
- /** unaligned buffers */
- byte** file_header_bufs_ptr;
- /** buffers for each file header in the group */
- byte** file_header_bufs;
-
- /** used only in recovery: recovery scan succeeded up to this
- lsn in this log group */
- lsn_t scanned_lsn;
- /** unaligned checkpoint header */
- byte* checkpoint_buf_ptr;
- /** buffer for writing a checkpoint header */
- byte* checkpoint_buf;
-
- /** @return whether the redo log is encrypted */
- bool is_encrypted() const
- {
- return((format & LOG_HEADER_FORMAT_ENCRYPTED) != 0);
- }
-
- /** @return capacity in bytes */
- inline lsn_t capacity() const
- {
- return((file_size - LOG_FILE_HDR_SIZE) * n_files);
- }
-};
-
/** Redo log buffer */
struct log_t{
- char pad1[CACHE_LINE_SIZE];
- /*!< Padding to prevent other memory
- update hotspots from residing on the
- same memory cache line */
+ MY_ALIGNED(CACHE_LINE_SIZE)
lsn_t lsn; /*!< log sequence number */
- ulint buf_free; /*!< first free offset within the log
+ ulong buf_free; /*!< first free offset within the log
buffer in use */
- char pad2[CACHE_LINE_SIZE];/*!< Padding */
+ MY_ALIGNED(CACHE_LINE_SIZE)
LogSysMutex mutex; /*!< mutex protecting the log */
- char pad3[CACHE_LINE_SIZE]; /*!< Padding */
- LogSysMutex write_mutex; /*!< mutex protecting writing to log
- file and accessing to log_group_t */
- char pad4[CACHE_LINE_SIZE];/*!< Padding */
+ MY_ALIGNED(CACHE_LINE_SIZE)
+ LogSysMutex write_mutex; /*!< mutex protecting writing to log */
+ MY_ALIGNED(CACHE_LINE_SIZE)
FlushOrderMutex log_flush_order_mutex;/*!< mutex to serialize access to
the flush list when we are putting
dirty blocks in the list. The idea
behind this mutex is to be able
- to release log_sys->mutex during
+ to release log_sys.mutex during
mtr_commit and still ensure that
insertions in the flush_list happen
in the LSN order. */
- byte* buf_ptr; /*!< unaligned log buffer, which should
- be of double of buf_size */
- byte* buf; /*!< log buffer currently in use;
- this could point to either the first
- half of the aligned(buf_ptr) or the
+ byte* buf; /*!< Memory of double the
+ srv_log_buffer_size is
+ allocated here. This pointer will change
+ however to either the first half or the
second half in turns, so that log
write/flush to disk don't block
concurrent mtrs which will write
- log to this buffer */
+ log to this buffer. Care to switch back
+ to the first half before freeing/resizing
+ must be undertaken. */
bool first_in_use; /*!< true if buf points to the first
half of the aligned(buf_ptr), false
if the second half */
- ulint buf_size; /*!< log buffer size of each in bytes */
- ulint max_buf_free; /*!< recommended maximum value of
+ ulong max_buf_free; /*!< recommended maximum value of
buf_free for the buffer in use, after
which the buffer is flushed */
bool check_flush_or_checkpoint;
@@ -647,12 +552,72 @@ struct log_t{
max_checkpoint_age; this flag is
peeked at by log_free_check(), which
does not reserve the log mutex */
- /** the redo log */
- log_group_t log;
+
+ /** Log files. Protected by mutex or write_mutex. */
+ struct files {
+ /** number of files */
+ ulint n_files;
+ /** format of the redo log: e.g., LOG_HEADER_FORMAT_CURRENT */
+ uint32_t format;
+ /** redo log subformat: 0 with separately logged TRUNCATE,
+ 2 with fully redo-logged TRUNCATE (1 in MariaDB 10.2) */
+ uint32_t subformat;
+ /** individual log file size in bytes, including the header */
+ lsn_t file_size;
+ /** lsn used to fix coordinates within the log group */
+ lsn_t lsn;
+ /** the byte offset of the above lsn */
+ lsn_t lsn_offset;
+
+ /** unaligned buffers */
+ byte* file_header_bufs_ptr;
+ /** buffers for each file header in the group */
+ byte* file_header_bufs[SRV_N_LOG_FILES_MAX];
+
+ /** used only in recovery: recovery scan succeeded up to this
+ lsn in this log group */
+ lsn_t scanned_lsn;
+
+ /** @return whether the redo log is encrypted */
+ bool is_encrypted() const { return format & LOG_HEADER_FORMAT_ENCRYPTED; }
+ /** @return capacity in bytes */
+ lsn_t capacity() const{ return (file_size - LOG_FILE_HDR_SIZE) * n_files; }
+ /** Calculate the offset of a log sequence number.
+ @param[in] lsn log sequence number
+ @return offset within the log */
+ inline lsn_t calc_lsn_offset(lsn_t lsn) const;
+
+ /** Set the field values to correspond to a given lsn. */
+ void set_fields(lsn_t lsn)
+ {
+ lsn_offset = calc_lsn_offset(lsn);
+ this->lsn = lsn;
+ }
+
+ /** Read a log segment to log_sys.buf.
+ @param[in,out] start_lsn in: read area start,
+ out: the last read valid lsn
+ @param[in] end_lsn read area end
+ @return whether no invalid blocks (e.g checksum mismatch) were found */
+ bool read_log_seg(lsn_t* start_lsn, lsn_t end_lsn);
+
+ /** Initialize the redo log buffer.
+ @param[in] n_files number of files */
+ void create(ulint n_files);
+
+ /** Close the redo log buffer. */
+ void close()
+ {
+ ut_free(file_header_bufs_ptr);
+ n_files = 0;
+ file_header_bufs_ptr = NULL;
+ memset(file_header_bufs, 0, sizeof file_header_bufs);
+ }
+ } log;
/** The fields involved in the log buffer flush @{ */
- ulint buf_next_to_write;/*!< first offset in the log buffer
+ ulong buf_next_to_write;/*!< first offset in the log buffer
where the byte content may not exist
written to file, e.g., the start
offset of a log record catenated
@@ -669,11 +634,11 @@ struct log_t{
AND flushed to disk */
ulint n_pending_flushes;/*!< number of currently
pending flushes; protected by
- log_sys_t::mutex */
+ log_sys.mutex */
os_event_t flush_event; /*!< this event is in the reset state
when a flush is running;
os_event_set() and os_event_reset()
- are protected by log_sys_t::mutex */
+ are protected by log_sys.mutex */
ulint n_log_ios; /*!< number of log i/os initiated thus
far */
ulint n_log_ios_old; /*!< number of log i/o's at the
@@ -719,7 +684,7 @@ struct log_t{
/*!< extra redo log records to write
during a checkpoint, or NULL if none.
The pointer is protected by
- log_sys->mutex, and the data must
+ log_sys.mutex, and the data must
remain constant as long as this
pointer is not NULL. */
ulint n_pending_checkpoint_writes;
@@ -729,73 +694,105 @@ struct log_t{
checkpoint write is running; a thread
should wait for this without owning
the log mutex */
- byte* checkpoint_buf_ptr;/* unaligned checkpoint header */
- byte* checkpoint_buf; /*!< checkpoint header is read to this
- buffer */
+
+ /** buffer for checkpoint header */
+ MY_ALIGNED(OS_FILE_LOG_BLOCK_SIZE)
+ byte checkpoint_buf[OS_FILE_LOG_BLOCK_SIZE];
/* @} */
- /** @return whether the redo log is encrypted */
- bool is_encrypted() const
- {
- return(log.is_encrypted());
- }
+private:
+ bool m_initialised;
+public:
+ /**
+ Constructor.
+
+ Some members may require late initialisation, thus we just mark object as
+ uninitialised. Real initialisation happens in create().
+ */
+ log_t(): m_initialised(false) {}
+
+ /** @return whether the redo log is encrypted */
+ bool is_encrypted() const { return(log.is_encrypted()); }
+
+ bool is_initialised() { return m_initialised; }
+
+ /** Complete an asynchronous checkpoint write. */
+ void complete_checkpoint();
+
+ /** Initialise the redo log subsystem. */
+ void create();
+
+ /** Shut down the redo log subsystem. */
+ void close();
};
/** Redo log system */
-extern log_t* log_sys;
+extern log_t log_sys;
+
+/** Calculate the offset of a log sequence number.
+@param[in] lsn log sequence number
+@return offset within the log */
+inline lsn_t log_t::files::calc_lsn_offset(lsn_t lsn) const
+{
+ ut_ad(this == &log_sys.log);
+ /* The lsn parameters are updated while holding both the mutexes
+ and it is ok to have either of them while reading */
+ ut_ad(log_sys.mutex.is_owned() || log_sys.write_mutex.is_owned());
+ const lsn_t group_size= capacity();
+ lsn_t l= lsn - this->lsn;
+ if (longlong(l) < 0) {
+ l= lsn_t(-longlong(l)) % group_size;
+ l= group_size - l;
+ }
+
+ l+= lsn_offset - LOG_FILE_HDR_SIZE * (1 + lsn_offset / file_size);
+ l%= group_size;
+ return l + LOG_FILE_HDR_SIZE * (1 + l / (file_size - LOG_FILE_HDR_SIZE));
+}
/** Test if flush order mutex is owned. */
#define log_flush_order_mutex_own() \
- mutex_own(&log_sys->log_flush_order_mutex)
+ mutex_own(&log_sys.log_flush_order_mutex)
/** Acquire the flush order mutex. */
#define log_flush_order_mutex_enter() do { \
- mutex_enter(&log_sys->log_flush_order_mutex); \
+ mutex_enter(&log_sys.log_flush_order_mutex); \
} while (0)
/** Release the flush order mutex. */
# define log_flush_order_mutex_exit() do { \
- mutex_exit(&log_sys->log_flush_order_mutex); \
+ mutex_exit(&log_sys.log_flush_order_mutex); \
} while (0)
/** Test if log sys mutex is owned. */
-#define log_mutex_own() mutex_own(&log_sys->mutex)
+#define log_mutex_own() mutex_own(&log_sys.mutex)
/** Test if log sys write mutex is owned. */
-#define log_write_mutex_own() mutex_own(&log_sys->write_mutex)
+#define log_write_mutex_own() mutex_own(&log_sys.write_mutex)
/** Acquire the log sys mutex. */
-#define log_mutex_enter() mutex_enter(&log_sys->mutex)
+#define log_mutex_enter() mutex_enter(&log_sys.mutex)
/** Acquire the log sys write mutex. */
-#define log_write_mutex_enter() mutex_enter(&log_sys->write_mutex)
+#define log_write_mutex_enter() mutex_enter(&log_sys.write_mutex)
/** Acquire all the log sys mutexes. */
#define log_mutex_enter_all() do { \
- mutex_enter(&log_sys->write_mutex); \
- mutex_enter(&log_sys->mutex); \
+ mutex_enter(&log_sys.write_mutex); \
+ mutex_enter(&log_sys.mutex); \
} while (0)
/** Release the log sys mutex. */
-#define log_mutex_exit() mutex_exit(&log_sys->mutex)
+#define log_mutex_exit() mutex_exit(&log_sys.mutex)
/** Release the log sys write mutex.*/
-#define log_write_mutex_exit() mutex_exit(&log_sys->write_mutex)
+#define log_write_mutex_exit() mutex_exit(&log_sys.write_mutex)
/** Release all the log sys mutexes. */
#define log_mutex_exit_all() do { \
- mutex_exit(&log_sys->mutex); \
- mutex_exit(&log_sys->write_mutex); \
+ mutex_exit(&log_sys.mutex); \
+ mutex_exit(&log_sys.write_mutex); \
} while (0)
-/** Calculate the offset of an lsn within a log group.
-@param[in] lsn log sequence number
-@param[in] group log group
-@return offset within the log group */
-lsn_t
-log_group_calc_lsn_offset(
- lsn_t lsn,
- const log_group_t* group);
-
/* log scrubbing speed, in bytes/sec */
extern ulonglong innodb_scrub_log_speed;
diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic
index 58da7bacc6f..87d55f9e01d 100644
--- a/storage/innobase/include/log0log.ic
+++ b/storage/innobase/include/log0log.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,12 +26,12 @@ Created 12/9/1995 Heikki Tuuri
#include "mach0data.h"
#include "srv0mon.h"
-#include "srv0srv.h"
#include "ut0crc32.h"
#ifdef UNIV_LOG_LSN_DEBUG
#include "mtr0types.h"
#endif /* UNIV_LOG_LSN_DEBUG */
+extern ulong srv_log_buffer_size;
/************************************************************//**
Gets a log block flush bit.
@@ -241,12 +241,10 @@ log_block_calc_checksum_crc32(
}
/** Calculates the checksum for a log block using the "no-op" algorithm.
-@param[in] block log block
@return checksum */
UNIV_INLINE
ulint
-log_block_calc_checksum_none(
- const byte* block)
+log_block_calc_checksum_none(const byte*)
{
return(LOG_NO_CHECKSUM_MAGIC);
}
@@ -330,15 +328,15 @@ log_reserve_and_write_fast(
len - SIZE_OF_MLOG_CHECKPOINT]
? 0
: 1
- + mach_get_compressed_size(log_sys->lsn >> 32)
- + mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+ + mach_get_compressed_size(log_sys.lsn >> 32)
+ + mach_get_compressed_size(log_sys.lsn & 0xFFFFFFFFUL);
#endif /* UNIV_LOG_LSN_DEBUG */
const ulint data_len = len
#ifdef UNIV_LOG_LSN_DEBUG
+ lsn_len
#endif /* UNIV_LOG_LSN_DEBUG */
- + log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE;
+ + log_sys.buf_free % OS_FILE_LOG_BLOCK_SIZE;
if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) {
@@ -348,44 +346,44 @@ log_reserve_and_write_fast(
return(0);
}
- *start_lsn = log_sys->lsn;
+ *start_lsn = log_sys.lsn;
#ifdef UNIV_LOG_LSN_DEBUG
if (lsn_len) {
/* Write the LSN pseudo-record. */
- byte* b = &log_sys->buf[log_sys->buf_free];
+ byte* b = &log_sys.buf[log_sys.buf_free];
*b++ = MLOG_LSN | (MLOG_SINGLE_REC_FLAG & *(const byte*) str);
/* Write the LSN in two parts,
as a pseudo page number and space id. */
- b += mach_write_compressed(b, log_sys->lsn >> 32);
- b += mach_write_compressed(b, log_sys->lsn & 0xFFFFFFFFUL);
- ut_a(b - lsn_len == &log_sys->buf[log_sys->buf_free]);
+ b += mach_write_compressed(b, log_sys.lsn >> 32);
+ b += mach_write_compressed(b, log_sys.lsn & 0xFFFFFFFFUL);
+ ut_a(b - lsn_len == &log_sys.buf[log_sys.buf_free]);
::memcpy(b, str, len);
len += lsn_len;
} else
#endif /* UNIV_LOG_LSN_DEBUG */
- memcpy(log_sys->buf + log_sys->buf_free, str, len);
+ memcpy(log_sys.buf + log_sys.buf_free, str, len);
log_block_set_data_len(
reinterpret_cast<byte*>(ut_align_down(
- log_sys->buf + log_sys->buf_free,
+ log_sys.buf + log_sys.buf_free,
OS_FILE_LOG_BLOCK_SIZE)),
data_len);
- log_sys->buf_free += len;
+ log_sys.buf_free += ulong(len);
- ut_ad(log_sys->buf_free <= log_sys->buf_size);
+ ut_ad(log_sys.buf_free <= srv_log_buffer_size);
- log_sys->lsn += len;
+ log_sys.lsn += len;
MONITOR_SET(MONITOR_LSN_CHECKPOINT_AGE,
- log_sys->lsn - log_sys->last_checkpoint_lsn);
+ log_sys.lsn - log_sys.last_checkpoint_lsn);
- return(log_sys->lsn);
+ return(log_sys.lsn);
}
/************************************************************//**
@@ -400,7 +398,7 @@ log_get_lsn(void)
log_mutex_enter();
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
log_mutex_exit();
@@ -418,7 +416,7 @@ log_get_flush_lsn(void)
log_mutex_enter();
- lsn = log_sys->flushed_to_disk_lsn;
+ lsn = log_sys.flushed_to_disk_lsn;
log_mutex_exit();
@@ -435,11 +433,11 @@ log_get_lsn_nowait(void)
{
lsn_t lsn=0;
- if (!mutex_enter_nowait(&(log_sys->mutex))) {
+ if (!mutex_enter_nowait(&(log_sys.mutex))) {
- lsn = log_sys->lsn;
+ lsn = log_sys.lsn;
- mutex_exit(&(log_sys->mutex));
+ mutex_exit(&(log_sys.mutex));
}
return(lsn);
@@ -447,14 +445,14 @@ log_get_lsn_nowait(void)
/****************************************************************
Gets the log group capacity. It is OK to read the value without
-holding log_sys->mutex because it is constant.
+holding log_sys.mutex because it is constant.
@return log group capacity */
UNIV_INLINE
lsn_t
log_get_capacity(void)
/*==================*/
{
- return(log_sys->log_group_capacity);
+ return(log_sys.log_group_capacity);
}
/****************************************************************
@@ -466,7 +464,7 @@ lsn_t
log_get_max_modified_age_async(void)
/*================================*/
{
- return(log_sys->max_modified_age_async);
+ return(log_sys.max_modified_age_async);
}
/***********************************************************************//**
@@ -498,7 +496,7 @@ log_free_check(void)
sync_allowed_latches(latches,
latches + UT_ARR_SIZE(latches))));
- if (log_sys->check_flush_or_checkpoint) {
+ if (log_sys.check_flush_or_checkpoint) {
log_check_margins();
}
diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h
index d3c891c9cba..d15ec19d86b 100644
--- a/storage/innobase/include/log0recv.h
+++ b/storage/innobase/include/log0recv.h
@@ -96,20 +96,6 @@ void
recv_sys_debug_free(void);
/*=====================*/
-/** Read a log segment to a buffer.
-@param[out] buf buffer
-@param[in] group redo log files
-@param[in, out] start_lsn in : read area start, out: the last read valid lsn
-@param[in] end_lsn read area end
-@param[out] invalid_block - invalid, (maybe incompletely written) block encountered
-@return false, if invalid block encountered (e.g checksum mismatch), true otherwise */
-bool
-log_group_read_log_seg(
- byte* buf,
- const log_group_t* group,
- lsn_t* start_lsn,
- lsn_t end_lsn);
-
/********************************************************//**
Reset the state of the recovery system variables. */
void
@@ -227,7 +213,7 @@ struct recv_sys_t{
ib_mutex_t writer_mutex;/*!< mutex coordinating
flushing between recv_writer_thread and
the recovery thread. */
- os_event_t flush_start;/*!< event to acticate
+ os_event_t flush_start;/*!< event to activate
page cleaner threads */
os_event_t flush_end;/*!< event to signal that the page
cleaner has finished the request */
@@ -243,6 +229,7 @@ struct recv_sys_t{
/*!< this is TRUE when a log rec application
batch is running */
byte* buf; /*!< buffer for parsing log records */
+ size_t buf_size; /*!< size of buf */
ulint len; /*!< amount of data in buf */
lsn_t parse_start_lsn;
/*!< this is the lsn from which we were able to
@@ -330,7 +317,7 @@ extern bool recv_no_ibuf_operations;
extern bool recv_needed_recovery;
#ifdef UNIV_DEBUG
/** TRUE if writing to the redo log (mtr_commit) is forbidden.
-Protected by log_sys->mutex. */
+Protected by log_sys.mutex. */
extern bool recv_no_log_write;
#endif /* UNIV_DEBUG */
@@ -341,11 +328,11 @@ extern bool recv_lsn_checks_on;
/** Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many
times! */
-#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024)
+#define RECV_PARSING_BUF_SIZE (2U << 20)
/** Size of block reads when the log groups are scanned forward to do a
roll-forward */
-#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE)
+#define RECV_SCAN_SIZE (4U << srv_page_size_shift)
/** This many frames must be left free in the buffer pool when we scan
the log and store the scanned log records in the buffer pool: we will
diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h
index e44f3f730af..2cdb307ea96 100644
--- a/storage/innobase/include/mem0mem.h
+++ b/storage/innobase/include/mem0mem.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -71,11 +71,11 @@ allocations of small buffers. */
#define MEM_BLOCK_START_SIZE 64
#define MEM_BLOCK_STANDARD_SIZE \
- (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
+ (srv_page_size >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF)
/** If a memory heap is allowed to grow into the buffer pool, the following
is the maximum size for a single allocated buffer: */
-#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200)
+#define MEM_MAX_ALLOC_IN_BUF (srv_page_size - 200)
/** Space needed when allocating for a user a field of length N.
The space is allocated only in multiples of UNIV_MEM_ALIGNMENT. */
@@ -294,26 +294,42 @@ mem_strdupl(
const char* str, /*!< in: string to be copied */
ulint len); /*!< in: length of str, in bytes */
-/** Duplicates a NUL-terminated string, allocated from a memory heap.
+/** Duplicate a block of data, allocated from a memory heap.
+@param[in] heap memory heap where string is allocated
+@param[in] data block of data to be copied
+@param[in] len length of data, in bytes
+@return own: a copy of data */
+inline
+void*
+mem_heap_dup(mem_heap_t* heap, const void* data, size_t len)
+{
+ return(memcpy(mem_heap_alloc(heap, len), data, len));
+}
+
+/** Duplicate a NUL-terminated string, allocated from a memory heap.
@param[in] heap memory heap where string is allocated
@param[in] str string to be copied
@return own: a copy of the string */
+inline
char*
-mem_heap_strdup(
- mem_heap_t* heap,
- const char* str);
+mem_heap_strdup(mem_heap_t* heap, const char* str)
+{
+ return(static_cast<char*>(mem_heap_dup(heap, str, strlen(str) + 1)));
+}
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
+/** Duplicate a string, allocated from a memory heap.
+@param[in] heap memory heap where string is allocated
+@param[in] str string to be copied
+@param[in] len length of str, in bytes
+@return own: a NUL-terminated copy of str */
+inline
char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len); /*!< in: length of str, in bytes */
+mem_heap_strdupl(mem_heap_t* heap, const char* str, size_t len)
+{
+ char* s = static_cast<char*>(mem_heap_alloc(heap, len + 1));
+ s[len] = 0;
+ return(static_cast<char*>(memcpy(s, str, len)));
+}
/**********************************************************************//**
Concatenate two strings and return the result, using a memory heap.
@@ -325,16 +341,6 @@ mem_heap_strcat(
const char* s1, /*!< in: string 1 */
const char* s2); /*!< in: string 2 */
-/**********************************************************************//**
-Duplicate a block of data, allocated from a memory heap.
-@return own: a copy of the data */
-void*
-mem_heap_dup(
-/*=========*/
- mem_heap_t* heap, /*!< in: memory heap where copy is allocated */
- const void* data, /*!< in: data to be copied */
- ulint len); /*!< in: length of data, in bytes */
-
/****************************************************************//**
A simple sprintf replacement that dynamically allocates the space for the
formatted string from the given heap. This supports a very limited set of
@@ -458,13 +464,14 @@ public:
allocated by mem_heap_allocator) can be used as a hint to the
implementation about where the new memory should be allocated in
order to improve locality. */
- pointer allocate(size_type n, const_pointer hint = 0)
+ pointer allocate(size_type n)
{
return(reinterpret_cast<pointer>(
mem_heap_alloc(m_heap, n * sizeof(T))));
}
+ pointer allocate(size_type n, const_pointer) { return allocate(n); }
- void deallocate(pointer p, size_type n) { }
+ void deallocate(pointer, size_type) {}
pointer address (reference r) const { return(&r); }
diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
index 4d76f07694d..405b7338b51 100644
--- a/storage/innobase/include/mem0mem.ic
+++ b/storage/innobase/include/mem0mem.ic
@@ -277,7 +277,8 @@ mem_heap_free_heap_top(
ut_ad(block);
/* Set the free field of block */
- mem_block_set_free(block, old_top - (byte*) block);
+ mem_block_set_free(block,
+ ulint(old_top - reinterpret_cast<byte*>(block)));
ut_ad(mem_block_get_start(block) <= mem_block_get_free(block));
UNIV_MEM_FREE(old_top, (byte*) block + block->len - old_top);
@@ -547,7 +548,7 @@ mem_heap_get_size(
size = heap->total_size;
if (heap->free_block) {
- size += UNIV_PAGE_SIZE;
+ size += srv_page_size;
}
return(size);
@@ -580,20 +581,3 @@ mem_strdupl(
s[len] = 0;
return(static_cast<char*>(memcpy(s, str, len)));
}
-
-/**********************************************************************//**
-Makes a NUL-terminated copy of a nonterminated string,
-allocated from a memory heap.
-@return own: a copy of the string */
-UNIV_INLINE
-char*
-mem_heap_strdupl(
-/*=============*/
- mem_heap_t* heap, /*!< in: memory heap where string is allocated */
- const char* str, /*!< in: string to be copied */
- ulint len) /*!< in: length of str, in bytes */
-{
- char* s = (char*) mem_heap_alloc(heap, len + 1);
- s[len] = 0;
- return((char*) memcpy(s, str, len));
-}
diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic
index dd68ea25613..5c72c7cb5da 100644
--- a/storage/innobase/include/mtr0log.ic
+++ b/storage/innobase/include/mtr0log.ic
@@ -225,7 +225,7 @@ mlog_write_initial_log_record_fast(
ut_ad(log_ptr);
ut_d(mtr->memo_modify_page(ptr));
- page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE);
+ page = (const byte*) ut_align_down(ptr, srv_page_size);
space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
offset = mach_read_from_4(page + FIL_PAGE_OFFSET);
diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h
index bdd3a6a67b9..0c157cb87cf 100644
--- a/storage/innobase/include/mtr0mtr.h
+++ b/storage/innobase/include/mtr0mtr.h
@@ -29,9 +29,7 @@ Created 11/26/1995 Heikki Tuuri
#define mtr0mtr_h
#include "univ.i"
-#include "log0types.h"
-#include "mtr0types.h"
-#include "buf0types.h"
+#include "fil0fil.h"
#include "dyn0buf.h"
/** Start a mini-transaction. */
@@ -73,13 +71,6 @@ savepoint. */
(m)->memo_release((o), (t))
#ifdef UNIV_DEBUG
-
-/** Check if memo contains the given item. */
-#define mtr_is_block_fix(m, o, t, table) mtr_memo_contains(m, o, t)
-
-/** Check if memo contains the given page. */
-#define mtr_is_page_fix(m, p, t, table) mtr_memo_contains_page(m, p, t)
-
/** Check if memo contains the given item.
@return TRUE if contains */
#define mtr_memo_contains(m, o, t) \
@@ -133,9 +124,6 @@ savepoint. */
@return true if the mtr is dirtying a clean page. */
#define mtr_block_dirtied(b) mtr_t::is_block_dirtied((b))
-/** Forward declaration of a tablespace object */
-struct fil_space_t;
-
/** Append records to the system-wide redo log buffer.
@param[in] log redo log records */
void
@@ -187,12 +175,6 @@ struct mtr_t {
/** User tablespace that is being modified by the
mini-transaction */
fil_space_t* m_user_space;
- /** Undo tablespace that is being modified by the
- mini-transaction */
- fil_space_t* m_undo_space;
- /** System tablespace if it is being modified by the
- mini-transaction */
- fil_space_t* m_sys_space;
/** State of the transaction */
mtr_state_t m_state;
@@ -216,17 +198,9 @@ struct mtr_t {
~mtr_t() { }
- /** Release the free extents that was reserved using
- fsp_reserve_free_extents(). This is equivalent to calling
- fil_space_release_free_extents(). This is intended for use
- with index pages.
- @param[in] n_reserved number of reserved extents */
- void release_free_extents(ulint n_reserved);
-
/** Start a mini-transaction.
- @param sync true if it is a synchronous mini-transaction
- @param read_only true if read only mini-transaction */
- void start(bool sync = true, bool read_only = false);
+ @param sync true if it is a synchronous mini-transaction */
+ void start(bool sync = true);
/** @return whether this is an asynchronous mini-transaction. */
bool is_async() const
@@ -295,17 +269,6 @@ struct mtr_t {
@return old mode */
inline mtr_log_t set_log_mode(mtr_log_t mode);
- /** Note that the mini-transaction is modifying the system tablespace
- (for example, for the change buffer or for undo logs)
- @return the system tablespace */
- fil_space_t* set_sys_modified()
- {
- if (!m_impl.m_sys_space) {
- lookup_sys_space();
- }
- return(m_impl.m_sys_space);
- }
-
/** Copy the tablespaces associated with the mini-transaction
(needed for generating MLOG_FILE_NAME records)
@param[in] mtr mini-transaction that may modify
@@ -314,35 +277,41 @@ struct mtr_t {
{
ut_ad(!m_impl.m_user_space_id);
ut_ad(!m_impl.m_user_space);
- ut_ad(!m_impl.m_undo_space);
- ut_ad(!m_impl.m_sys_space);
ut_d(m_impl.m_user_space_id = mtr.m_impl.m_user_space_id);
m_impl.m_user_space = mtr.m_impl.m_user_space;
- m_impl.m_undo_space = mtr.m_impl.m_undo_space;
- m_impl.m_sys_space = mtr.m_impl.m_sys_space;
}
/** Set the tablespace associated with the mini-transaction
(needed for generating a MLOG_FILE_NAME record)
@param[in] space_id user or system tablespace ID
@return the tablespace */
- fil_space_t* set_named_space(ulint space_id)
+ fil_space_t* set_named_space_id(ulint space_id)
{
ut_ad(!m_impl.m_user_space_id);
ut_d(m_impl.m_user_space_id = space_id);
if (!space_id) {
- return(set_sys_modified());
+ return fil_system.sys_space;
} else {
- lookup_user_space(space_id);
- return(m_impl.m_user_space);
+ ut_ad(m_impl.m_user_space_id == space_id);
+ ut_ad(!m_impl.m_user_space);
+ m_impl.m_user_space = fil_space_get(space_id);
+ ut_ad(m_impl.m_user_space);
+ return m_impl.m_user_space;
}
}
/** Set the tablespace associated with the mini-transaction
(needed for generating a MLOG_FILE_NAME record)
@param[in] space user or system tablespace */
- void set_named_space(fil_space_t* space);
+ void set_named_space(fil_space_t* space)
+ {
+ ut_ad(!m_impl.m_user_space_id);
+ ut_d(m_impl.m_user_space_id = space->id);
+ if (space->id) {
+ m_impl.m_user_space = space;
+ }
+ }
#ifdef UNIV_DEBUG
/** Check the tablespace associated with the mini-transaction
@@ -350,6 +319,11 @@ struct mtr_t {
@param[in] space tablespace
@return whether the mini-transaction is associated with the space */
bool is_named_space(ulint space) const;
+ /** Check the tablespace associated with the mini-transaction
+ (needed for generating a MLOG_FILE_NAME record)
+ @param[in] space tablespace
+ @return whether the mini-transaction is associated with the space */
+ bool is_named_space(const fil_space_t* space) const;
#endif /* UNIV_DEBUG */
/** Read 1 - 4 bytes from a file page buffered in the buffer pool.
@@ -575,12 +549,6 @@ struct mtr_t {
MY_ATTRIBUTE((warn_unused_result));
private:
- /** Look up the system tablespace. */
- void lookup_sys_space();
- /** Look up the user tablespace.
- @param[in] space_id tablespace ID */
- void lookup_user_space(ulint space_id);
-
class Command;
friend class Command;
diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h
index af8f1d2c7db..eaf838aaa76 100644
--- a/storage/innobase/include/mtr0types.h
+++ b/storage/innobase/include/mtr0types.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -100,16 +100,16 @@ enum mlog_id_t {
/** Create an index page */
MLOG_PAGE_CREATE = 19,
- /** Insert entry in an undo log */
+ /** insert an undo log record */
MLOG_UNDO_INSERT = 20,
- /** erase an undo log page end */
+ /** erase an undo log page end (used in MariaDB 10.2) */
MLOG_UNDO_ERASE_END = 21,
/** initialize a page in an undo log */
MLOG_UNDO_INIT = 22,
- /** reuse an insert undo log header */
+ /** reuse an insert undo log header (used in MariaDB 10.2) */
MLOG_UNDO_HDR_REUSE = 24,
/** create an undo log header */
@@ -223,8 +223,12 @@ enum mlog_id_t {
redo log about individual pages */
MLOG_INDEX_LOAD = 61,
+ /** write DB_TRX_ID,DB_ROLL_PTR to a clustered index leaf page
+ of a ROW_FORMAT=COMPRESSED table */
+ MLOG_ZIP_WRITE_TRX_ID = 62,
+
/** biggest value (used in assertions) */
- MLOG_BIGGEST_TYPE = MLOG_INDEX_LOAD,
+ MLOG_BIGGEST_TYPE = MLOG_ZIP_WRITE_TRX_ID,
/** log record for writing/updating crypt data of
a tablespace */
diff --git a/storage/innobase/include/os0event.h b/storage/innobase/include/os0event.h
index d5fdc6ba080..f8227235211 100644
--- a/storage/innobase/include/os0event.h
+++ b/storage/innobase/include/os0event.h
@@ -42,11 +42,7 @@ Creates an event semaphore, i.e., a semaphore which may just have two states:
signaled and nonsignaled. The created event is manual reset: it must be reset
explicitly by calling os_event_reset().
@return the event handle */
-os_event_t
-os_event_create(
-/*============*/
- const char* name); /*!< in: the name of the event, if NULL
- the event is created without a name */
+os_event_t os_event_create(const char*);
/**
Sets an event semaphore to the signaled state: lets waiting threads
diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
index c19079e1f9e..71da751ad25 100644
--- a/storage/innobase/include/os0file.h
+++ b/storage/innobase/include/os0file.h
@@ -50,7 +50,6 @@ struct fil_node_t;
struct fil_space_t;
extern bool os_has_said_disk_full;
-extern my_bool srv_use_trim;
/** File offset in bytes */
typedef ib_uint64_t os_offset_t;
@@ -69,10 +68,6 @@ the OS actually supports it: Win 95 does not, NT does. */
/** File handle */
typedef HANDLE os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
#else /* _WIN32 */
@@ -81,14 +76,9 @@ typedef DIR* os_file_dir_t; /*!< directory stream */
/** File handle */
typedef int os_file_t;
-/** Convert a C file descriptor to a native file handle
-@param fd file descriptor
-@return native file handle */
-# define OS_FILE_FROM_FD(fd) fd
-
#endif /* _WIN32 */
-static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
+static const os_file_t OS_FILE_CLOSED = IF_WIN(os_file_t(INVALID_HANDLE_VALUE),-1);
/** File descriptor with optional PERFORMANCE_SCHEMA instrumentation */
struct pfs_os_file_t
@@ -251,7 +241,7 @@ public:
m_fil_node(NULL),
m_type(static_cast<uint16_t>(type))
{
- if (!is_punch_hole_supported() || !srv_use_trim) {
+ if (!is_punch_hole_supported()) {
clear_punch_hole();
}
}
@@ -270,7 +260,7 @@ public:
set_punch_hole();
}
- if (!is_punch_hole_supported() || !srv_use_trim) {
+ if (!is_punch_hole_supported()) {
clear_punch_hole();
}
}
@@ -357,7 +347,7 @@ public:
/** Set the punch hole flag */
void set_punch_hole()
{
- if (is_punch_hole_supported() && srv_use_trim) {
+ if (is_punch_hole_supported()) {
m_type |= PUNCH_HOLE;
}
}
@@ -372,8 +362,7 @@ public:
@param[in] node File node */
void set_fil_node(fil_node_t* node)
{
- if (!srv_use_trim ||
- (node && !fil_node_should_punch_hole(node))) {
+ if (node && !fil_node_should_punch_hole(node)) {
clear_punch_hole();
}
@@ -537,14 +526,11 @@ struct os_file_stat_t {
};
/** Create a temporary file. This function is like tmpfile(3), but
-the temporary file is created in the given parameter path. If the path
-is null then it will create the file in the mysql server configuration
+the temporary file is created in the in the mysql server configuration
parameter (--tmpdir).
-@param[in] path location for creating temporary file
@return temporary file handle, or NULL on error */
FILE*
-os_file_create_tmpfile(
- const char* path);
+os_file_create_tmpfile();
/** The os_file_opendir() function opens a directory stream corresponding to the
directory named by the dirname argument. The directory stream is positioned
@@ -848,18 +834,10 @@ The wrapper functions have the prefix of "innodb_". */
pfs_os_file_read_no_error_handling_func( \
type, file, buf, offset, n, o, __FILE__, __LINE__)
-# define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n) \
- pfs_os_file_read_no_error_handling_int_fd_func( \
- type, file, buf, offset, n, __FILE__, __LINE__)
-
# define os_file_write(type, name, file, buf, offset, n) \
pfs_os_file_write_func(type, name, file, buf, offset, \
n, __FILE__, __LINE__)
-# define os_file_write_int_fd(type, name, file, buf, offset, n) \
- pfs_os_file_write_int_fd_func(type, name, file, buf, offset, \
- n, __FILE__, __LINE__)
-
# define os_file_flush(file) \
pfs_os_file_flush_func(file, __FILE__, __LINE__)
@@ -1570,7 +1548,7 @@ path. If the path is NULL then it will be created on --tmpdir location.
This function is defined in ha_innodb.cc.
@param[in] path location for creating temporary file
@return temporary file descriptor, or < 0 on error */
-int
+os_file_t
innobase_mysql_tmpfile(
const char* path);
diff --git a/storage/innobase/include/os0file.ic b/storage/innobase/include/os0file.ic
index a7e4f2695da..895f82cf2d8 100644
--- a/storage/innobase/include/os0file.ic
+++ b/storage/innobase/include/os0file.ic
@@ -340,49 +340,6 @@ pfs_os_file_read_no_error_handling_func(
return(result);
}
-/** NOTE! Please use the corresponding macro
-os_file_read_no_error_handling_int_fd() to request
-a synchronous read operation.
-@param[in] type read request
-@param[in] file file handle
-@param[out] buf buffer where to read
-@param[in] offset file offset where to read
-@param[in] n number of bytes to read
-@param[in] src_file caller file name
-@param[in] src_line caller line number
-@return whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_read_no_error_handling_int_fd_func(
- const IORequest& type,
- int file,
- void* buf,
- os_offset_t offset,
- ulint n,
- const char* src_file,
- uint src_line)
-{
- PSI_file_locker_state state;
-
- PSI_file_locker* locker = PSI_FILE_CALL(
- get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_READ);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_wait)(
- locker, n,
- __FILE__, __LINE__);
- }
-
- bool success = DB_SUCCESS == os_file_read_no_error_handling_func(
- type, OS_FILE_FROM_FD(file), buf, offset, n, NULL);
-
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_wait)(locker, n);
- }
-
- return(success);
-}
-
/** NOTE! Please use the corresponding macro os_file_write(), not directly
this function!
This is the performance schema instrumented wrapper function for
@@ -425,51 +382,6 @@ pfs_os_file_write_func(
return(result);
}
-/** NOTE! Please use the corresponding macro os_file_write_int_fd(),
-not directly this function!
-This is the performance schema instrumented wrapper function for
-os_file_write_int_fd() which requests a synchronous write operation.
-@param[in] type write request
-@param[in] name file name
-@param[in] file file handle
-@param[in] buf buffer to write
-@param[in] offset file offset
-@param[in] n number of bytes
-@param[in] src_file file name where func invoked
-@param[in] src_line line where the func invoked
-@return whether the request was successful */
-UNIV_INLINE
-bool
-pfs_os_file_write_int_fd_func(
- const IORequest& type,
- const char* name,
- int file,
- const void* buf,
- os_offset_t offset,
- ulint n,
- const char* src_file,
- uint src_line)
-{
- PSI_file_locker_state state;
- struct PSI_file_locker* locker;
-
- locker = PSI_FILE_CALL(get_thread_file_descriptor_locker)(
- &state, file, PSI_FILE_WRITE);
- if (locker != NULL) {
- PSI_FILE_CALL(start_file_wait)(
- locker, n,
- __FILE__, __LINE__);
- }
-
- bool success = DB_SUCCESS == os_file_write_func(
- type, name, OS_FILE_FROM_FD(file), buf, offset, n);
-
- if (locker != NULL) {
- PSI_FILE_CALL(end_file_wait)(locker, n);
- }
-
- return(success);
-}
/** NOTE! Please use the corresponding macro os_file_flush(), not directly
this function!
diff --git a/storage/innobase/include/os0once.h b/storage/innobase/include/os0once.h
index 05a45a69f33..551e78d24ba 100644
--- a/storage/innobase/include/os0once.h
+++ b/storage/innobase/include/os0once.h
@@ -30,6 +30,7 @@ Created Feb 20, 2014 Vasil Dimov
#include "univ.i"
#include "ut0ut.h"
+#include "my_cpu.h"
/** Execute a given function exactly once in a multi-threaded environment
or wait for the function to be executed by another thread.
@@ -110,7 +111,7 @@ public:
ut_error;
}
- UT_RELAX_CPU();
+ MY_RELAX_CPU();
}
}
}
diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h
index c240f5dacdd..b6838c919a0 100644
--- a/storage/innobase/include/os0thread.h
+++ b/storage/innobase/include/os0thread.h
@@ -30,12 +30,6 @@ Created 9/8/1995 Heikki Tuuri
#include "univ.i"
-/* Maximum number of threads which can be created in the program;
-this is also the size of the wait slot array for MySQL threads which
-can wait inside InnoDB */
-
-#define OS_THREAD_MAX_N srv_max_n_threads
-
/* Possible fixed priorities for threads */
#define OS_THREAD_PRIORITY_NONE 100
#define OS_THREAD_PRIORITY_BACKGROUND 1
@@ -53,12 +47,8 @@ typedef LPTHREAD_START_ROUTINE os_thread_func_t;
/** Macro for specifying a Windows thread start function. */
#define DECLARE_THREAD(func) WINAPI func
-/** Required to get around a build error on Windows. Even though our functions
-are defined/declared as WINAPI f(LPVOID a); the compiler complains that they
-are defined as: os_thread_ret_t (__cdecl*)(void*). Because our functions
-don't access the arguments and don't return any value, we should be safe. */
#define os_thread_create(f,a,i) \
- os_thread_create_func(reinterpret_cast<os_thread_func_t>(f), a, i)
+ os_thread_create_func(f, a, i)
#else
diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h
index a038f68731c..d98dfa5ec07 100644
--- a/storage/innobase/include/page0cur.h
+++ b/storage/innobase/include/page0cur.h
@@ -157,10 +157,7 @@ page_cur_tuple_insert(
ulint** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr, /*!< in: mini-transaction handle, or NULL */
- bool use_cache = false)
- /*!< in: if true, then use record cache to
- hold the tuple converted record. */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
MY_ATTRIBUTE((nonnull(1,2,3,4,5), warn_unused_result));
/***********************************************************//**
Inserts a record next to page cursor. Returns pointer to inserted record if
diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic
index 3e6d40cba4a..86e560395f3 100644
--- a/storage/innobase/include/page0cur.ic
+++ b/storage/innobase/include/page0cur.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2014, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -262,10 +262,7 @@ page_cur_tuple_insert(
ulint** offsets,/*!< out: offsets on *rec */
mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */
ulint n_ext, /*!< in: number of externally stored columns */
- mtr_t* mtr, /*!< in: mini-transaction handle, or NULL */
- bool use_cache)
- /*!< in: if true, then use record cache to
- hold the tuple converted record. */
+ mtr_t* mtr) /*!< in: mini-transaction handle, or NULL */
{
rec_t* rec;
ulint size = rec_get_converted_size(index, tuple, n_ext);
diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
index 53a58de229d..d3f6bd304a6 100644
--- a/storage/innobase/include/page0page.h
+++ b/storage/innobase/include/page0page.h
@@ -1,6 +1,6 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -63,9 +63,42 @@ typedef byte page_header_t;
#define PAGE_FREE 6 /* pointer to start of page free record list */
#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
- NULL if this info has been reset by a delete,
+ 0 if this info has been reset by a delete,
for example */
-#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */
+
+/** This 10-bit field is usually 0. In B-tree index pages of
+ROW_FORMAT=REDUNDANT tables, this byte can contain garbage if the .ibd
+file was created in MySQL 4.1.0 or if the table resides in the system
+tablespace and was created before MySQL 4.1.1 or MySQL 4.0.14.
+In this case, the FIL_PAGE_TYPE would be FIL_PAGE_INDEX.
+
+In ROW_FORMAT=COMPRESSED tables, this field is always 0, because
+instant ADD COLUMN is not supported.
+
+In ROW_FORMAT=COMPACT and ROW_FORMAT=DYNAMIC tables, this field is
+always 0, except in the root page of the clustered index after instant
+ADD COLUMN.
+
+Instant ADD COLUMN will change FIL_PAGE_TYPE to FIL_PAGE_TYPE_INSTANT
+and initialize the PAGE_INSTANT field to the original number of
+fields in the clustered index (dict_index_t::n_core_fields). The most
+significant bits are in the first byte, and the least significant 5
+bits are stored in the most significant 5 bits of PAGE_DIRECTION_B.
+
+These FIL_PAGE_TYPE_INSTANT and PAGE_INSTANT may be assigned even if
+instant ADD COLUMN was not committed. Changes to these page header fields
+are not undo-logged, but changes to the 'default value record' are.
+If the server is killed and restarted, the page header fields could
+remain set even though no 'default value record' is present.
+
+When the table becomes empty, the PAGE_INSTANT field and the
+FIL_PAGE_TYPE can be reset and any 'default value record' be removed. */
+#define PAGE_INSTANT 12
+
+/** last insert direction: PAGE_LEFT, ....
+In ROW_FORMAT=REDUNDANT tables created before MySQL 4.1.1 or MySQL 4.0.14,
+this byte can be garbage. */
+#define PAGE_DIRECTION_B 13
#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same
direction */
#define PAGE_N_RECS 16 /* number of user records on the page */
@@ -125,9 +158,9 @@ Otherwise written as 0. @see PAGE_ROOT_AUTO_INC */
/*-----------------------------*/
/* Heap numbers */
-#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */
-#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */
-#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in
+#define PAGE_HEAP_NO_INFIMUM 0U /* page infimum */
+#define PAGE_HEAP_NO_SUPREMUM 1U /* page supremum */
+#define PAGE_HEAP_NO_USER_LOW 2U /* first user record in
creation (insertion) order,
not necessarily collation order;
this record may have been deleted */
@@ -177,7 +210,7 @@ inline
page_t*
page_align(const void* ptr)
{
- return(static_cast<page_t*>(ut_align_down(ptr, UNIV_PAGE_SIZE)));
+ return(static_cast<page_t*>(ut_align_down(ptr, srv_page_size)));
}
/** Gets the byte offset within a page frame.
@@ -188,7 +221,7 @@ inline
ulint
page_offset(const void* ptr)
{
- return(ut_align_offset(ptr, UNIV_PAGE_SIZE));
+ return(ut_align_offset(ptr, srv_page_size));
}
/** Determine whether an index page is not in ROW_FORMAT=REDUNDANT.
@@ -251,6 +284,20 @@ page_rec_is_comp(const byte* rec)
return(page_is_comp(page_align(rec)));
}
+# ifdef UNIV_DEBUG
+/** Determine if the record is the 'default row' pseudo-record
+in the clustered index.
+@param[in] rec leaf page record on an index page
+@return whether the record is the 'default row' pseudo-record */
+inline
+bool
+page_rec_is_default_row(const rec_t* rec)
+{
+ return rec_get_info_bits(rec, page_rec_is_comp(rec))
+ & REC_INFO_MIN_REC_FLAG;
+}
+# endif /* UNIV_DEBUG */
+
/** Determine the offset of the infimum record on the page.
@param[in] page index page
@return offset of the infimum record in record list, relative from page */
@@ -288,7 +335,7 @@ page_rec_is_user_rec_low(ulint offset)
compile_time_assert(PAGE_NEW_SUPREMUM < PAGE_OLD_SUPREMUM_END);
compile_time_assert(PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM_END);
ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
return(offset != PAGE_NEW_SUPREMUM
&& offset != PAGE_NEW_INFIMUM
@@ -304,7 +351,7 @@ bool
page_rec_is_supremum_low(ulint offset)
{
ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
return(offset == PAGE_NEW_SUPREMUM || offset == PAGE_OLD_SUPREMUM);
}
@@ -316,7 +363,7 @@ bool
page_rec_is_infimum_low(ulint offset)
{
ut_ad(offset >= PAGE_NEW_INFIMUM);
- ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START);
+ ut_ad(offset <= srv_page_size - PAGE_EMPTY_DIR_START);
return(offset == PAGE_NEW_INFIMUM || offset == PAGE_OLD_INFIMUM);
}
@@ -457,7 +504,7 @@ page_header_set_field(
Returns the offset stored in the given header field.
@return offset from the start of the page, or 0 */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_offs(
/*=================*/
const page_t* page, /*!< in: page */
@@ -551,7 +598,7 @@ Gets the number of user records on page (the infimum and supremum records
are not user records).
@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_get_n_recs(
/*============*/
const page_t* page); /*!< in: index page */
@@ -569,7 +616,7 @@ page_rec_get_n_recs_before(
Gets the number of records in the heap.
@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_heap(
/*================*/
const page_t* page); /*!< in: index page */
@@ -590,7 +637,7 @@ page_dir_set_n_heap(
Gets the number of dir slots in directory.
@return number of slots */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page); /*!< in: index page */
@@ -616,7 +663,7 @@ page_dir_get_nth_slot(
ulint n); /*!< in: position */
#else /* UNIV_DEBUG */
# define page_dir_get_nth_slot(page, n) \
- ((page) + (UNIV_PAGE_SIZE - PAGE_DIR \
+ ((page) + (srv_page_size - PAGE_DIR \
- (n + 1) * PAGE_DIR_SLOT_SIZE))
#endif /* UNIV_DEBUG */
/**************************************************************//**
@@ -686,14 +733,52 @@ ulint
page_rec_get_heap_no(
/*=================*/
const rec_t* rec); /*!< in: the physical record */
+/** Determine whether a page has any siblings.
+@param[in] page page frame
+@return true if the page has any siblings */
+inline
+bool
+page_has_siblings(const page_t* page)
+{
+ compile_time_assert(!(FIL_PAGE_PREV % 8));
+ compile_time_assert(FIL_PAGE_NEXT == FIL_PAGE_PREV + 4);
+ compile_time_assert(FIL_NULL == 0xffffffff);
+ return *reinterpret_cast<const uint64_t*>(page + FIL_PAGE_PREV)
+ != ~uint64_t(0);
+}
+
/** Determine whether a page is an index root page.
@param[in] page page frame
@return true if the page is a root page of an index */
-UNIV_INLINE
+inline
bool
-page_is_root(
- const page_t* page)
- MY_ATTRIBUTE((warn_unused_result));
+page_is_root(const page_t* page)
+{
+ return fil_page_index_page_check(page) && !page_has_siblings(page);
+}
+
+/** Determine whether a page has a predecessor.
+@param[in] page page frame
+@return true if the page has a predecessor */
+inline
+bool
+page_has_prev(const page_t* page)
+{
+ return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_PREV)
+ != FIL_NULL;
+}
+
+/** Determine whether a page has a successor.
+@param[in] page page frame
+@return true if the page has a successor */
+inline
+bool
+page_has_next(const page_t* page)
+{
+ return *reinterpret_cast<const uint32_t*>(page + FIL_PAGE_NEXT)
+ != FIL_NULL;
+}
+
/************************************************************//**
Gets the pointer to the next record on the page.
@return pointer to next record */
@@ -865,7 +950,7 @@ Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records.
@return data in bytes */
UNIV_INLINE
-ulint
+uint16_t
page_get_data_size(
/*===============*/
const page_t* page); /*!< in: index page */
@@ -911,6 +996,45 @@ page_mem_free(
const dict_index_t* index, /*!< in: index of rec */
const ulint* offsets);/*!< in: array returned by
rec_get_offsets() */
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in] ptr pointer to PAGE_DIRECTION_B
+@return the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr);
+
+/** Set the PAGE_DIRECTION field.
+@param[in] ptr pointer to PAGE_DIRECTION_B
+@param[in] dir the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir);
+
+/** Read the PAGE_DIRECTION field.
+@param[in] page index page
+@return the value of the PAGE_DIRECTION field */
+inline
+byte
+page_get_direction(const page_t* page)
+{
+ return page_ptr_get_direction(PAGE_HEADER + PAGE_DIRECTION_B + page);
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in] page index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page);
+/** Assign the PAGE_INSTANT field.
+@param[in,out] page clustered index root page
+@param[in] n original number of clustered index fields
+@param[in,out] mtr mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr);
+
/**********************************************************//**
Create an uncompressed B-tree index page.
@return pointer to the page */
@@ -1251,5 +1375,4 @@ page_warn_strict_checksum(
#include "page0page.ic"
-
#endif
diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic
index 0062db56bfa..307803367c0 100644
--- a/storage/innobase/include/page0page.ic
+++ b/storage/innobase/include/page0page.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2016, 2017, MariaDB Corporation.
+Copyright (c) 2016, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -172,8 +172,8 @@ page_header_set_field(
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
- ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
- ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
+ ut_ad(field == PAGE_N_HEAP || val < srv_page_size);
+ ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < srv_page_size);
mach_write_to_2(page + PAGE_HEADER + field, val);
if (page_zip) {
@@ -186,19 +186,17 @@ page_header_set_field(
Returns the offset stored in the given header field.
@return offset from the start of the page, or 0 */
UNIV_INLINE
-ulint
+uint16_t
page_header_get_offs(
/*=================*/
const page_t* page, /*!< in: page */
ulint field) /*!< in: PAGE_FREE, ... */
{
- ulint offs;
-
ut_ad((field == PAGE_FREE)
|| (field == PAGE_LAST_INSERT)
|| (field == PAGE_HEAP_TOP));
- offs = page_header_get_field(page, field);
+ uint16_t offs = page_header_get_field(page, field);
ut_ad((field != PAGE_HEAP_TOP) || offs);
@@ -277,31 +275,6 @@ page_rec_get_heap_no(
}
}
-/** Determine whether a page is an index root page.
-@param[in] page page frame
-@return true if the page is a root page of an index */
-UNIV_INLINE
-bool
-page_is_root(
- const page_t* page)
-{
-#if FIL_PAGE_PREV % 8
-# error FIL_PAGE_PREV must be 64-bit aligned
-#endif
-#if FIL_PAGE_NEXT != FIL_PAGE_PREV + 4
-# error FIL_PAGE_NEXT must be adjacent to FIL_PAGE_PREV
-#endif
-#if FIL_NULL != 0xffffffff
-# error FIL_NULL != 0xffffffff
-#endif
- /* Check that this is an index page and both the PREV and NEXT
- pointers are FIL_NULL, because the root page does not have any
- siblings. */
- return(fil_page_index_page_check(page)
- && *reinterpret_cast<const ib_uint64_t*>(page + FIL_PAGE_PREV)
- == IB_UINT64_MAX);
-}
-
/** Determine whether an index page record is a user record.
@param[in] rec record in an index page
@return true if a user record */
@@ -423,7 +396,8 @@ page_get_middle_rec(
/*================*/
page_t* page) /*!< in: page */
{
- ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2;
+ ulint middle = (ulint(page_get_n_recs(page))
+ + PAGE_HEAP_NO_USER_LOW) / 2;
return(page_rec_get_nth(page, middle));
}
@@ -464,7 +438,7 @@ Gets the number of user records on page (infimum and supremum records
are not user records).
@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_get_n_recs(
/*============*/
const page_t* page) /*!< in: index page */
@@ -477,7 +451,7 @@ page_get_n_recs(
Gets the number of dir slots in directory.
@return number of slots */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_slots(
/*=================*/
const page_t* page) /*!< in: index page */
@@ -502,7 +476,7 @@ page_dir_set_n_slots(
Gets the number of records in the heap.
@return number of user records */
UNIV_INLINE
-ulint
+uint16_t
page_dir_get_n_heap(
/*================*/
const page_t* page) /*!< in: index page */
@@ -547,7 +521,7 @@ page_dir_get_nth_slot(
ut_ad(page_dir_get_n_slots(page) > n);
return((page_dir_slot_t*)
- page + UNIV_PAGE_SIZE - PAGE_DIR
+ page + srv_page_size - PAGE_DIR
- (n + 1) * PAGE_DIR_SLOT_SIZE);
}
#endif /* UNIV_DEBUG */
@@ -666,7 +640,7 @@ page_rec_get_next_low(
offs = rec_get_next_offs(rec, comp);
- if (offs >= UNIV_PAGE_SIZE) {
+ if (offs >= srv_page_size) {
fprintf(stderr,
"InnoDB: Next record offset is nonsensical %lu"
" in record at offset %lu\n"
@@ -855,9 +829,8 @@ page_rec_get_base_extra_size(
/*=========================*/
const rec_t* rec) /*!< in: physical record */
{
-#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES
-# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES"
-#endif
+ compile_time_assert(REC_N_NEW_EXTRA_BYTES + 1
+ == REC_N_OLD_EXTRA_BYTES);
return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec));
}
@@ -868,21 +841,17 @@ Returns the sum of the sizes of the records in the record list, excluding
the infimum and supremum records.
@return data in bytes */
UNIV_INLINE
-ulint
+uint16_t
page_get_data_size(
/*===============*/
const page_t* page) /*!< in: index page */
{
- ulint ret;
-
- ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - (page_is_comp(page)
- ? PAGE_NEW_SUPREMUM_END
- : PAGE_OLD_SUPREMUM_END)
- - page_header_get_field(page, PAGE_GARBAGE));
-
- ut_ad(ret < UNIV_PAGE_SIZE);
-
+ uint16_t ret = page_header_get_field(page, PAGE_HEAP_TOP)
+ - (page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END)
+ - page_header_get_field(page, PAGE_GARBAGE);
+ ut_ad(ret < srv_page_size);
return(ret);
}
@@ -930,13 +899,13 @@ page_get_free_space_of_empty(
ulint comp) /*!< in: nonzero=compact page layout */
{
if (comp) {
- return((ulint)(UNIV_PAGE_SIZE
+ return((ulint)(srv_page_size
- PAGE_NEW_SUPREMUM_END
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
}
- return((ulint)(UNIV_PAGE_SIZE
+ return((ulint)(srv_page_size
- PAGE_OLD_SUPREMUM_END
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
@@ -1074,10 +1043,79 @@ page_mem_free(
page_zip_dir_delete(page_zip, rec, index, offsets, free);
} else {
page_header_set_field(page, page_zip, PAGE_N_RECS,
- page_get_n_recs(page) - 1);
+ ulint(page_get_n_recs(page)) - 1);
+ }
+}
+
+/** Read the PAGE_DIRECTION field from a byte.
+@param[in] ptr pointer to PAGE_DIRECTION_B
+@return the value of the PAGE_DIRECTION field */
+inline
+byte
+page_ptr_get_direction(const byte* ptr)
+{
+ ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+ return *ptr & ((1U << 3) - 1);
+}
+
+/** Set the PAGE_DIRECTION field.
+@param[in] ptr pointer to PAGE_DIRECTION_B
+@param[in] dir the value of the PAGE_DIRECTION field */
+inline
+void
+page_ptr_set_direction(byte* ptr, byte dir)
+{
+ ut_ad(page_offset(ptr) == PAGE_HEADER + PAGE_DIRECTION_B);
+ ut_ad(dir >= PAGE_LEFT);
+ ut_ad(dir <= PAGE_NO_DIRECTION);
+ *ptr = (*ptr & ~((1U << 3) - 1)) | dir;
+}
+
+/** Read the PAGE_INSTANT field.
+@param[in] page index page
+@return the value of the PAGE_INSTANT field */
+inline
+uint16_t
+page_get_instant(const page_t* page)
+{
+ uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+#ifdef UNIV_DEBUG
+ switch (fil_page_get_type(page)) {
+ case FIL_PAGE_TYPE_INSTANT:
+ ut_ad(page_get_direction(page) <= PAGE_NO_DIRECTION);
+ ut_ad(i >> 3);
+ break;
+ case FIL_PAGE_INDEX:
+ ut_ad(i <= PAGE_NO_DIRECTION || !page_is_comp(page));
+ break;
+ case FIL_PAGE_RTREE:
+ ut_ad(i <= PAGE_NO_DIRECTION);
+ break;
+ default:
+ ut_ad(!"invalid page type");
+ break;
}
+#endif /* UNIV_DEBUG */
+ return(i >> 3);
}
+/** Assign the PAGE_INSTANT field.
+@param[in,out] page clustered index root page
+@param[in] n original number of clustered index fields
+@param[in,out] mtr mini-transaction */
+inline
+void
+page_set_instant(page_t* page, unsigned n, mtr_t* mtr)
+{
+ ut_ad(fil_page_get_type(page) == FIL_PAGE_TYPE_INSTANT);
+ ut_ad(n > 0);
+ ut_ad(n < REC_MAX_N_FIELDS);
+ uint16_t i = page_header_get_field(page, PAGE_INSTANT);
+ ut_ad(i <= PAGE_NO_DIRECTION);
+ i |= n << 3;
+ mlog_write_ulint(PAGE_HEADER + PAGE_INSTANT + page, i,
+ MLOG_2BYTES, mtr);
+}
#endif /* !UNIV_INNOCHECKSUM */
#ifdef UNIV_MATERIALIZE
diff --git a/storage/innobase/include/page0size.h b/storage/innobase/include/page0size.h
index 30a996df0a6..7b8b7efe617 100644
--- a/storage/innobase/include/page0size.h
+++ b/storage/innobase/include/page0size.h
@@ -30,7 +30,7 @@ Created Nov 14, 2013 Vasil Dimov
#include "univ.i"
#include "fsp0types.h"
-#define FIELD_REF_SIZE 20
+#define FIELD_REF_SIZE 20U
/** A BLOB field reference full of zero, for use in assertions and
tests.Initially, BLOB field references are set to zero, in
diff --git a/storage/innobase/include/page0zip.h b/storage/innobase/include/page0zip.h
index c1d5443d9e5..6e0c097bbaf 100644
--- a/storage/innobase/include/page0zip.h
+++ b/storage/innobase/include/page0zip.h
@@ -340,18 +340,39 @@ page_zip_write_node_ptr(
ulint ptr, /*!< in: node pointer */
mtr_t* mtr); /*!< in: mini-transaction, or NULL */
-/**********************************************************************//**
-Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
+/** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
+@param[in,out] page_zip compressed page
+@param[in,out] rec record
+@param[in] offsets rec_get_offsets(rec, index)
+@param[in] trx_id_field field number of DB_TRX_ID (number of PK fields)
+@param[in] trx_id DB_TRX_ID value (transaction identifier)
+@param[in] roll_ptr DB_ROLL_PTR value (undo log pointer)
+@param[in,out] mtr mini-transaction, or NULL to skip logging */
void
page_zip_write_trx_id_and_roll_ptr(
-/*===============================*/
- page_zip_des_t* page_zip,/*!< in/out: compressed page */
- byte* rec, /*!< in/out: record */
- const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */
- ulint trx_id_col,/*!< in: column number of TRX_ID in rec */
- trx_id_t trx_id, /*!< in: transaction identifier */
- roll_ptr_t roll_ptr)/*!< in: roll_ptr */
- MY_ATTRIBUTE((nonnull));
+ page_zip_des_t* page_zip,
+ byte* rec,
+ const ulint* offsets,
+ ulint trx_id_col,
+ trx_id_t trx_id,
+ roll_ptr_t roll_ptr,
+ mtr_t* mtr = NULL)
+ MY_ATTRIBUTE((nonnull(1,2,3)));
+
+/** Parse a MLOG_ZIP_WRITE_TRX_ID record.
+@param[in] ptr redo log buffer
+@param[in] end_ptr end of redo log buffer
+@param[in,out] page uncompressed page
+@param[in,out] page_zip compressed page
+@return end of log record
+@retval NULL if the log record is incomplete */
+byte*
+page_zip_parse_write_trx_id(
+ byte* ptr,
+ byte* end_ptr,
+ page_t* page,
+ page_zip_des_t* page_zip)
+ MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
/**********************************************************************//**
Write the "deleted" flag of a record on a compressed page. The flag must
diff --git a/storage/innobase/include/page0zip.ic b/storage/innobase/include/page0zip.ic
index b471e2cf64e..b3ebc5dcf51 100644
--- a/storage/innobase/include/page0zip.ic
+++ b/storage/innobase/include/page0zip.ic
@@ -120,7 +120,7 @@ page_zip_get_size(
size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
ut_ad(size >= UNIV_ZIP_SIZE_MIN);
- ut_ad(size <= UNIV_PAGE_SIZE);
+ ut_ad(size <= srv_page_size);
return(size);
}
@@ -242,9 +242,9 @@ page_zip_get_trailer_len(
ut_ad(!page_zip->n_blobs);
}
- return((page_dir_get_n_heap(page_zip->data) - 2)
- * uncompressed_size
- + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
+ return (ulint(page_dir_get_n_heap(page_zip->data)) - 2)
+ * uncompressed_size
+ + ulint(page_zip->n_blobs) * BTR_EXTERN_FIELD_REF_SIZE;
}
/**********************************************************************//**
diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h
index 37498c1c638..487ba8c147f 100644
--- a/storage/innobase/include/pars0pars.h
+++ b/storage/innobase/include/pars0pars.h
@@ -539,7 +539,7 @@ pars_info_add_int4_literal(
/*=======================*/
pars_info_t* info, /*!< in: info struct */
const char* name, /*!< in: name */
- lint val); /*!< in: value */
+ ulint val); /*!< in: value */
/****************************************************************//**
Equivalent to:
diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h
index f01b596a52e..ca06f5b09ba 100644
--- a/storage/innobase/include/que0que.h
+++ b/storage/innobase/include/que0que.h
@@ -335,13 +335,6 @@ enum que_thr_lock_t {
QUE_THR_LOCK_TABLE
};
-/** From where the cursor position is counted */
-enum que_cur_t {
- QUE_CUR_NOT_DEFINED,
- QUE_CUR_START,
- QUE_CUR_END
-};
-
/* Query graph query thread node: the fields are protected by the
trx_t::mutex with the exceptions named below */
@@ -415,18 +408,7 @@ struct que_fork_t{
generated by the parser, or NULL
if the graph was created 'by hand' */
pars_info_t* info; /*!< info struct, or NULL */
- /* The following cur_... fields are relevant only in a select graph */
- ulint cur_end; /*!< QUE_CUR_NOT_DEFINED, QUE_CUR_START,
- QUE_CUR_END */
- ulint cur_pos; /*!< if there are n rows in the result
- set, values 0 and n + 1 mean before
- first row, or after last row, depending
- on cur_end; values 1...n mean a row
- index */
- ibool cur_on_row; /*!< TRUE if cursor is on a row, i.e.,
- it is not before the first row or
- after the last row */
sel_node_t* last_sel_node; /*!< last executed select node, or NULL
if none */
UT_LIST_NODE_T(que_fork_t)
diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h
deleted file mode 100644
index 129341be77c..00000000000
--- a/storage/innobase/include/read0read.h
+++ /dev/null
@@ -1,125 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1997, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/read0read.h
-Cursor read
-
-Created 2/16/1997 Heikki Tuuri
-*******************************************************/
-
-#ifndef read0read_h
-#define read0read_h
-
-#include "univ.i"
-
-#include "read0types.h"
-
-#include <algorithm>
-
-/** The MVCC read view manager */
-class MVCC {
-public:
- /** Constructor
- @param size Number of views to pre-allocate */
- explicit MVCC(ulint size);
-
- /** Destructor.
- Free all the views in the m_free list */
- ~MVCC();
-
- /**
- Allocate and create a view.
- @param view view owned by this class created for the
- caller. Must be freed by calling close()
- @param trx transaction creating the view */
- void view_open(ReadView*& view, trx_t* trx);
-
- /**
- Close a view created by the above function.
- @para view view allocated by trx_open.
- @param own_mutex true if caller owns trx_sys_t::mutex */
- void view_close(ReadView*& view, bool own_mutex);
-
- /**
- Release a view that is inactive but not closed. Caller must own
- the trx_sys_t::mutex.
- @param view View to release */
- void view_release(ReadView*& view);
-
- /** Clones the oldest view and stores it in view. No need to
- call view_close(). The caller owns the view that is passed in.
- It will also move the closed views from the m_views list to the
- m_free list. This function is called by Purge to create it view.
- @param view Preallocated view, owned by the caller */
- void clone_oldest_view(ReadView* view);
-
- /**
- @return the number of active views */
- ulint size() const;
-
- /**
- @return true if the view is active and valid */
- static bool is_view_active(ReadView* view)
- {
- ut_a(view != reinterpret_cast<ReadView*>(0x1));
-
- return(view != NULL && !(intptr_t(view) & 0x1));
- }
-
- /**
- Set the view creator transaction id. Note: This shouldbe set only
- for views created by RW transactions. */
- static void set_view_creator_trx_id(ReadView* view, trx_id_t id);
-
-private:
-
- /**
- Validates a read view list. */
- bool validate() const;
-
- /**
- Find a free view from the active list, if none found then allocate
- a new view. This function will also attempt to move delete marked
- views from the active list to the freed list.
- @return a view to use */
- inline ReadView* get_view();
-
- /**
- Get the oldest view in the system. It will also move the delete
- marked read views from the views list to the freed list.
- @return oldest view if found or NULL */
- inline ReadView* get_oldest_view() const;
-
-private:
- // Prevent copying
- MVCC(const MVCC&);
- MVCC& operator=(const MVCC&);
-
-private:
- typedef UT_LIST_BASE_NODE_T(ReadView) view_list_t;
-
- /** Free views ready for reuse. */
- view_list_t m_free;
-
- /** Active and closed views, the closed views will have the
- creator trx id set to TRX_ID_MAX */
- view_list_t m_views;
-};
-
-#endif /* read0read_h */
diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h
index c83c7e04f11..eade82714c5 100644
--- a/storage/innobase/include/read0types.h
+++ b/storage/innobase/include/read0types.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -31,122 +32,163 @@ Created 2/16/1997 Heikki Tuuri
#include "trx0types.h"
-// Friend declaration
-class MVCC;
-/** Read view lists the trx ids of those transactions for which a consistent
-read should not see the modifications to the database. */
+/** View is not visible to purge thread. */
+#define READ_VIEW_STATE_CLOSED 0
-class ReadView {
- /** This is similar to a std::vector but it is not a drop
- in replacement. It is specific to ReadView. */
- class ids_t {
- typedef trx_ids_t::value_type value_type;
+/** View is being opened, purge thread must wait for state change. */
+#define READ_VIEW_STATE_SNAPSHOT 1
- /**
- Constructor */
- ids_t() : m_ptr(), m_size(), m_reserved() { }
+/** View is visible to purge thread. */
+#define READ_VIEW_STATE_OPEN 2
- /**
- Destructor */
- ~ids_t() { UT_DELETE_ARRAY(m_ptr); }
- /**
- Try and increase the size of the array. Old elements are
- copied across. It is a no-op if n is < current size.
+/**
+ Read view lists the trx ids of those transactions for which a consistent read
+ should not see the modifications to the database.
+*/
+class ReadView
+{
+ /**
+ View state.
- @param n Make space for n elements */
- void reserve(ulint n);
+ It is not defined as enum as it has to be updated using atomic operations.
+ Possible values are READ_VIEW_STATE_CLOSED, READ_VIEW_STATE_SNAPSHOT and
+ READ_VIEW_STATE_OPEN.
- /**
- Resize the array, sets the current element count.
- @param n new size of the array, in elements */
- void resize(ulint n)
- {
- ut_ad(n <= capacity());
+ Possible state transfers...
- m_size = n;
- }
-
- /**
- Reset the size to 0 */
- void clear() { resize(0); }
-
- /**
- @return the capacity of the array in elements */
- ulint capacity() const { return(m_reserved); }
-
- /**
- Copy and overwrite the current array contents
-
- @param start Source array
- @param end Pointer to end of array */
- void assign(const value_type* start, const value_type* end);
-
- /**
- Insert the value in the correct slot, preserving the order.
- Doesn't check for duplicates. */
- void insert(value_type value);
-
- /**
- @return the value of the first element in the array */
- value_type front() const
- {
- ut_ad(!empty());
-
- return(m_ptr[0]);
- }
-
- /**
- @return the value of the last element in the array */
- value_type back() const
- {
- ut_ad(!empty());
-
- return(m_ptr[m_size - 1]);
- }
-
- /**
- Append a value to the array.
- @param value the value to append */
- void push_back(value_type value);
-
- /**
- @return a pointer to the start of the array */
- trx_id_t* data() { return(m_ptr); };
-
- /**
- @return a const pointer to the start of the array */
- const trx_id_t* data() const { return(m_ptr); };
+ Start view open:
+ READ_VIEW_STATE_CLOSED -> READ_VIEW_STATE_SNAPSHOT
- /**
- @return the number of elements in the array */
- ulint size() const { return(m_size); }
+ Complete view open:
+ READ_VIEW_STATE_SNAPSHOT -> READ_VIEW_STATE_OPEN
- /**
- @return true if size() == 0 */
- bool empty() const { return(size() == 0); }
+ Close view:
+ READ_VIEW_STATE_OPEN -> READ_VIEW_STATE_CLOSED
+ */
+ int32_t m_state;
- private:
- // Prevent copying
- ids_t(const ids_t&);
- ids_t& operator=(const ids_t&);
- private:
- /** Memory for the array */
- value_type* m_ptr;
-
- /** Number of active elements in the array */
- ulint m_size;
+public:
+ ReadView(): m_state(READ_VIEW_STATE_CLOSED), m_low_limit_id(0) {}
+
+
+ /**
+ Copy state from another view.
+
+ This method is used to find min(m_low_limit_no), min(m_low_limit_id) and
+ all transaction ids below min(m_low_limit_id). These values effectively
+ form oldest view.
+
+ @param other view to copy from
+ */
+ void copy(const ReadView &other)
+ {
+ ut_ad(&other != this);
+ if (m_low_limit_no > other.m_low_limit_no)
+ m_low_limit_no= other.m_low_limit_no;
+ if (m_low_limit_id > other.m_low_limit_id)
+ m_low_limit_id= other.m_low_limit_id;
+
+ trx_ids_t::iterator dst= m_ids.begin();
+ for (trx_ids_t::const_iterator src= other.m_ids.begin();
+ src != other.m_ids.end(); src++)
+ {
+ if (*src >= m_low_limit_id)
+ break;
+loop:
+ if (dst == m_ids.end())
+ {
+ m_ids.push_back(*src);
+ dst= m_ids.end();
+ continue;
+ }
+ if (*dst < *src)
+ {
+ dst++;
+ goto loop;
+ }
+ else if (*dst > *src)
+ dst= m_ids.insert(dst, *src) + 1;
+ }
+ m_ids.erase(std::lower_bound(dst, m_ids.end(), m_low_limit_id),
+ m_ids.end());
+
+ m_up_limit_id= m_ids.empty() ? m_low_limit_id : m_ids.front();
+ ut_ad(m_up_limit_id <= m_low_limit_id);
+ }
+
+
+ /**
+ Opens a read view where exactly the transactions serialized before this
+ point in time are seen in the view.
+
+ View becomes visible to purge thread.
+
+ @param[in,out] trx transaction
+ */
+ void open(trx_t *trx);
+
+
+ /**
+ Closes the view.
+
+ View becomes not visible to purge thread.
+ */
+ void close()
+ {
+ ut_ad(m_state == READ_VIEW_STATE_CLOSED ||
+ m_state == READ_VIEW_STATE_OPEN);
+ if (m_state == READ_VIEW_STATE_OPEN)
+ my_atomic_store32_explicit(&m_state, READ_VIEW_STATE_CLOSED,
+ MY_MEMORY_ORDER_RELAXED);
+ }
+
+
+ /** m_state getter for trx_sys::clone_oldest_view() trx_sys::size(). */
+ int32_t get_state() const
+ {
+ return my_atomic_load32_explicit(const_cast<int32*>(&m_state),
+ MY_MEMORY_ORDER_ACQUIRE);
+ }
+
+
+ /**
+ Returns true if view is open.
+
+ Only used by view owner thread, thus we can omit atomic operations.
+ */
+ bool is_open() const
+ {
+ ut_ad(m_state == READ_VIEW_STATE_OPEN ||
+ m_state == READ_VIEW_STATE_CLOSED);
+ return m_state == READ_VIEW_STATE_OPEN;
+ }
+
+
+ /**
+ Creates a snapshot where exactly the transactions serialized before this
+ point in time are seen in the view.
+
+ @param[in,out] trx transaction
+ */
+ inline void snapshot(trx_t *trx);
+
+
+ /**
+ Sets the creator transaction id.
+
+ This should be set only for views created by RW transactions.
+ */
+ void set_creator_trx_id(trx_id_t id)
+ {
+ ut_ad(id > 0);
+ ut_ad(m_creator_trx_id == 0);
+ m_creator_trx_id= id;
+ }
- /** Size of m_ptr in elements */
- ulint m_reserved;
- friend class ReadView;
- };
-public:
- ReadView();
- ~ReadView();
/** Check whether transaction id is valid.
@param[in] id transaction id to check
@param[in] name table name */
@@ -163,8 +205,6 @@ public:
const table_name_t& name) const
MY_ATTRIBUTE((warn_unused_result))
{
- ut_ad(id > 0);
-
if (id < m_up_limit_id || id == m_creator_trx_id) {
return(true);
@@ -181,9 +221,7 @@ public:
return(true);
}
- const ids_t::value_type* p = m_ids.data();
-
- return(!std::binary_search(p, p + m_ids.size(), id));
+ return(!std::binary_search(m_ids.begin(), m_ids.end(), id));
}
/**
@@ -195,21 +233,6 @@ public:
}
/**
- Mark the view as closed */
- void close()
- {
- ut_ad(m_creator_trx_id != TRX_ID_MAX);
- m_creator_trx_id = TRX_ID_MAX;
- }
-
- /**
- @return true if the view is closed */
- bool is_closed() const
- {
- return(m_closed);
- }
-
- /**
Write the limits to the file.
@param file file to write to */
void print_limits(FILE* file) const
@@ -234,66 +257,6 @@ public:
return(m_low_limit_id);
}
- /**
- @return true if there are no transaction ids in the snapshot */
- bool empty() const
- {
- return(m_ids.empty());
- }
-
-#ifdef UNIV_DEBUG
- /**
- @param rhs view to compare with
- @return truen if this view is less than or equal rhs */
- bool le(const ReadView* rhs) const
- {
- return(m_low_limit_no <= rhs->m_low_limit_no);
- }
-
- trx_id_t up_limit_id() const
- {
- return(m_up_limit_id);
- }
-#endif /* UNIV_DEBUG */
-private:
- /**
- Copy the transaction ids from the source vector */
- inline void copy_trx_ids(const trx_ids_t& trx_ids);
-
- /**
- Opens a read view where exactly the transactions serialized before this
- point in time are seen in the view.
- @param id Creator transaction id */
- inline void prepare(trx_id_t id);
-
- /**
- Complete the read view creation */
- inline void complete();
-
- /**
- Copy state from another view. Must call copy_complete() to finish.
- @param other view to copy from */
- inline void copy_prepare(const ReadView& other);
-
- /**
- Complete the copy, insert the creator transaction id into the
- m_trx_ids too and adjust the m_up_limit_id *, if required */
- inline void copy_complete();
-
- /**
- Set the creator transaction id, existing id must be 0 */
- void creator_trx_id(trx_id_t id)
- {
- ut_ad(m_creator_trx_id == 0);
- m_creator_trx_id = id;
- }
-
- friend class MVCC;
-
-private:
- // Disable copying
- ReadView(const ReadView&);
- ReadView& operator=(const ReadView&);
private:
/** The read should not see any transaction with trx id >= this
@@ -311,21 +274,12 @@ private:
/** Set of RW transactions that was active when this snapshot
was taken */
- ids_t m_ids;
+ trx_ids_t m_ids;
/** The view does not need to see the undo logs for transactions
whose transaction number is strictly smaller (<) than this value:
they can be removed in purge if not needed by other views */
trx_id_t m_low_limit_no;
-
- /** AC-NL-RO transaction view that has been "closed". */
- bool m_closed;
-
- typedef UT_LIST_NODE_T(ReadView) node_t;
-
- /** List of read views in trx_sys */
- byte pad1[64 - sizeof(node_t)];
- node_t m_view_list;
};
#endif
diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h
index 3b1f1c7f742..3ee993944e9 100644
--- a/storage/innobase/include/rem0rec.h
+++ b/storage/innobase/include/rem0rec.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -33,6 +33,7 @@ Created 5/30/1994 Heikki Tuuri
#include "rem0types.h"
#include "mtr0types.h"
#include "page0types.h"
+#include "dict0dict.h"
#include "trx0types.h"
#endif /*! UNIV_INNOCHECKSUM */
#include <ostream>
@@ -54,11 +55,29 @@ in addition to the data and the offsets */
in addition to the data and the offsets */
#define REC_N_NEW_EXTRA_BYTES 5
-/* Record status values */
-#define REC_STATUS_ORDINARY 0
-#define REC_STATUS_NODE_PTR 1
-#define REC_STATUS_INFIMUM 2
-#define REC_STATUS_SUPREMUM 3
+/** Record status values for ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED */
+enum rec_comp_status_t {
+ /** User record (PAGE_LEVEL=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+ REC_STATUS_ORDINARY = 0,
+ /** Node pointer record (PAGE_LEVEL>=0, heap>=PAGE_HEAP_NO_USER_LOW) */
+ REC_STATUS_NODE_PTR = 1,
+ /** The page infimum pseudo-record (heap=PAGE_HEAP_NO_INFIMUM) */
+ REC_STATUS_INFIMUM = 2,
+ /** The page supremum pseudo-record (heap=PAGE_HEAP_NO_SUPREMUM) */
+ REC_STATUS_SUPREMUM = 3,
+ /** Clustered index record that has been inserted or updated
+ after instant ADD COLUMN (more than dict_index_t::n_core_fields) */
+ REC_STATUS_COLUMNS_ADDED = 4
+};
+
+/** The dtuple_t::info_bits of the 'default row' record.
+@see rec_is_default_row() */
+static const byte REC_INFO_DEFAULT_ROW
+ = REC_INFO_MIN_REC_FLAG | REC_STATUS_COLUMNS_ADDED;
+
+#define REC_NEW_STATUS 3 /* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK 0x7UL
+#define REC_NEW_STATUS_SHIFT 0
/* The following four constants are needed in page0zip.cc in order to
efficiently compress and decompress pages. */
@@ -94,6 +113,22 @@ offsets[] array, first passed to rec_get_offsets() */
#define REC_OFFS_NORMAL_SIZE OFFS_IN_REC_NORMAL_SIZE
#define REC_OFFS_SMALL_SIZE 10
+/** Get the base address of offsets. The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/** Compact flag ORed to the extra size returned by rec_get_offsets() */
+const ulint REC_OFFS_COMPACT = ~(ulint(~0) >> 1);
+/** SQL NULL flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_SQL_NULL = REC_OFFS_COMPACT;
+/** External flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_EXTERNAL = REC_OFFS_COMPACT >> 1;
+/** Default value flag in offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_DEFAULT = REC_OFFS_COMPACT >> 2;
+/** Mask for offsets returned by rec_get_offsets() */
+const ulint REC_OFFS_MASK = REC_OFFS_DEFAULT - 1;
+
#ifndef UNIV_INNOCHECKSUM
/******************************************************//**
The following function is used to get the pointer of the next chained record
@@ -252,25 +287,55 @@ rec_set_info_bits_new(
rec_t* rec, /*!< in/out: new-style physical record */
ulint bits) /*!< in: info bits */
MY_ATTRIBUTE((nonnull));
-/******************************************************//**
-The following function retrieves the status bits of a new-style record.
+
+/** Determine the status bits of a non-REDUNDANT record.
+@param[in] rec ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec) /*!< in: physical record */
- MY_ATTRIBUTE((warn_unused_result));
+inline
+rec_comp_status_t
+rec_get_status(const rec_t* rec)
+{
+ byte bits = rec[-REC_NEW_STATUS] & REC_NEW_STATUS_MASK;
+ ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+ return static_cast<rec_comp_status_t>(bits);
+}
-/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
+/** Set the status bits of a non-REDUNDANT record.
+@param[in,out] rec ROW_FORMAT=COMPACT,DYNAMIC,COMPRESSED record
+@param[in] bits status bits */
+inline
void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
- MY_ATTRIBUTE((nonnull));
+rec_set_status(rec_t* rec, byte bits)
+{
+ ut_ad(bits <= REC_STATUS_COLUMNS_ADDED);
+ rec[-REC_NEW_STATUS] = (rec[-REC_NEW_STATUS] & ~REC_NEW_STATUS_MASK)
+ | bits;
+}
+
+/** Get the length of added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in] n_add_field number of added fields, minus one
+@return storage size of the field count, in bytes */
+inline unsigned rec_get_n_add_field_len(ulint n_add_field)
+{
+ ut_ad(n_add_field < REC_MAX_N_FIELDS);
+ return n_add_field < 0x80 ? 1 : 2;
+}
+
+/** Set the added field count in a REC_STATUS_COLUMNS_ADDED record.
+@param[in,out] header variable header of a REC_STATUS_COLUMNS_ADDED record
+@param[in] n_add number of added fields, minus 1
+@return record header before the number of added fields */
+inline void rec_set_n_add_field(byte*& header, ulint n_add)
+{
+ ut_ad(n_add < REC_MAX_N_FIELDS);
+
+ if (n_add < 0x80) {
+ *header-- = byte(n_add);
+ } else {
+ *header-- = byte(n_add) | 0x80;
+ *header-- = byte(n_add >> 7);
+ }
+}
/******************************************************//**
The following function is used to retrieve the info and status
@@ -327,7 +392,7 @@ rec_set_deleted_flag_new(
The following function tells if a new-style record is a node pointer.
@return TRUE if node pointer */
UNIV_INLINE
-ibool
+bool
rec_get_node_ptr_flag(
/*==================*/
const rec_t* rec) /*!< in: physical record */
@@ -459,9 +524,7 @@ rec_get_offsets_func(
const rec_t* rec,
const dict_index_t* index,
ulint* offsets,
-#ifdef UNIV_DEBUG
bool leaf,
-#endif /* UNIV_DEBUG */
ulint n_fields,
#ifdef UNIV_DEBUG
const char* file, /*!< in: file name where called */
@@ -471,7 +534,7 @@ rec_get_offsets_func(
#ifdef UNIV_DEBUG
MY_ATTRIBUTE((nonnull(1,2,6,8),warn_unused_result));
#else /* UNIV_DEBUG */
- MY_ATTRIBUTE((nonnull(1,2,5),warn_unused_result));
+ MY_ATTRIBUTE((nonnull(1,2,6),warn_unused_result));
#endif /* UNIV_DEBUG */
#ifdef UNIV_DEBUG
@@ -479,7 +542,7 @@ rec_get_offsets_func(
rec_get_offsets_func(rec,index,offsets,leaf,n,__FILE__,__LINE__,heap)
#else /* UNIV_DEBUG */
# define rec_get_offsets(rec, index, offsets, leaf, n, heap) \
- rec_get_offsets_func(rec, index, offsets, n, heap)
+ rec_get_offsets_func(rec, index, offsets, leaf, n, heap)
#endif /* UNIV_DEBUG */
/******************************************************//**
@@ -499,32 +562,31 @@ rec_get_offsets_reverse(
offsets[0] allocated elements */
MY_ATTRIBUTE((nonnull));
#ifdef UNIV_DEBUG
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
+/** Validate offsets returned by rec_get_offsets().
+@param[in] rec record, or NULL
+@param[in] index the index that the record belongs in, or NULL
+@param[in,out] offsets the offsets of the record
+@return true */
+bool
rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
+ const rec_t* rec,
+ const dict_index_t* index,
+ const ulint* offsets)
MY_ATTRIBUTE((nonnull(3), warn_unused_result));
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
+/** Update debug data in offsets, in order to tame rec_offs_validate().
+@param[in] rec record
+@param[in] index the index that the record belongs in
+@param[in] leaf whether the record resides in a leaf page
+@param[in,out] offsets offsets from rec_get_offsets() to adjust */
void
rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
+ const rec_t* rec,
+ const dict_index_t* index,
+ bool leaf,
+ ulint* offsets)
MY_ATTRIBUTE((nonnull));
#else
-# define rec_offs_make_valid(rec, index, offsets) ((void) 0)
+# define rec_offs_make_valid(rec, index, leaf, offsets)
#endif /* UNIV_DEBUG */
/************************************************************//**
@@ -568,26 +630,7 @@ rec_get_nth_field_offs(
MY_ATTRIBUTE((nonnull));
#define rec_get_nth_field(rec, offsets, n, len) \
((rec) + rec_get_nth_field_offs(offsets, n, len))
-/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((warn_unused_result));
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((warn_unused_result));
+
/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
@@ -598,15 +641,16 @@ rec_offs_any_null_extern(
const rec_t* rec, /*!< in: record */
const ulint* offsets) /*!< in: rec_get_offsets(rec) */
MY_ATTRIBUTE((warn_unused_result));
+
/******************************************************//**
Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
UNIV_INLINE
ulint
-rec_offs_nth_extern(
+rec_offs_nth_extern_old(
/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
+ const rec_t* rec, /*!< in: record */
+ ulint n /*!< in: index of the field */)
MY_ATTRIBUTE((warn_unused_result));
/** Mark the nth field as externally stored.
@@ -616,16 +660,177 @@ void
rec_offs_make_nth_extern(
ulint* offsets,
const ulint n);
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
+
+/** Determine the number of allocated elements for an array of offsets.
+@param[in] offsets offsets after rec_offs_set_n_alloc()
+@return number of elements */
+inline
ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
- MY_ATTRIBUTE((warn_unused_result));
+rec_offs_get_n_alloc(const ulint* offsets)
+{
+ ulint n_alloc;
+ ut_ad(offsets);
+ n_alloc = offsets[0];
+ ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
+ UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
+ return(n_alloc);
+}
+
+/** Determine the number of fields for which offsets have been initialized.
+@param[in] offsets rec_get_offsets()
+@return number of fields */
+inline
+ulint
+rec_offs_n_fields(const ulint* offsets)
+{
+ ulint n_fields;
+ ut_ad(offsets);
+ n_fields = offsets[1];
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+ <= rec_offs_get_n_alloc(offsets));
+ return(n_fields);
+}
+
+/** Get a flag of a record field.
+@param[in] offsets rec_get_offsets()
+@param[in] n nth field
+@param[in] flag flag to extract
+@return the flag of the record field */
+inline
+ulint
+rec_offs_nth_flag(const ulint* offsets, ulint n, ulint flag)
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ /* The DEFAULT, NULL, EXTERNAL flags are mutually exclusive. */
+ ut_ad(ut_is_2pow(rec_offs_base(offsets)[1 + n]
+ & (REC_OFFS_DEFAULT
+ | REC_OFFS_SQL_NULL
+ | REC_OFFS_EXTERNAL)));
+ return rec_offs_base(offsets)[1 + n] & flag;
+}
+
+/** Determine if a record field is missing
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in] offsets rec_get_offsets()
+@param[in] n nth field
+@return nonzero if default bit is set */
+inline
+ulint
+rec_offs_nth_default(const ulint* offsets, ulint n)
+{
+ return rec_offs_nth_flag(offsets, n, REC_OFFS_DEFAULT);
+}
+
+/** Determine if a record field is SQL NULL
+(should be replaced by dict_index_t::instant_field_value()).
+@param[in] offsets rec_get_offsets()
+@param[in] n nth field
+@return nonzero if SQL NULL set */
+inline
+ulint
+rec_offs_nth_sql_null(const ulint* offsets, ulint n)
+{
+ return rec_offs_nth_flag(offsets, n, REC_OFFS_SQL_NULL);
+}
+
+/** Determine if a record field is stored off-page.
+@param[in] offsets rec_get_offsets()
+@param[in] n nth field
+Returns nonzero if the extern bit is set in nth field of rec.
+@return nonzero if externally stored */
+inline
+ulint
+rec_offs_nth_extern(const ulint* offsets, ulint n)
+{
+ return rec_offs_nth_flag(offsets, n, REC_OFFS_EXTERNAL);
+}
+
+/** Get a global flag of a record.
+@param[in] offsets rec_get_offsets()
+@param[in] flag flag to extract
+@return the flag of the record field */
+inline
+ulint
+rec_offs_any_flag(const ulint* offsets, ulint flag)
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ return *rec_offs_base(offsets) & flag;
+}
+
+/** Determine if the offsets are for a record containing off-page columns.
+@param[in] offsets rec_get_offsets()
+@return nonzero if any off-page columns exist */
+inline bool rec_offs_any_extern(const ulint* offsets)
+{
+ return rec_offs_any_flag(offsets, REC_OFFS_EXTERNAL);
+}
+
+/** Determine if the offsets are for a record that is missing fields.
+@param[in] offsets rec_get_offsets()
+@return nonzero if any fields need to be replaced with
+ dict_index_t::instant_field_value() */
+inline
+ulint
+rec_offs_any_default(const ulint* offsets)
+{
+ return rec_offs_any_flag(offsets, REC_OFFS_DEFAULT);
+}
+
+/** Determine if the offsets are for other than ROW_FORMAT=REDUNDANT.
+@param[in] offsets rec_get_offsets()
+@return nonzero if ROW_FORMAT is COMPACT,DYNAMIC or COMPRESSED
+@retval 0 if ROW_FORMAT=REDUNDANT */
+inline
+ulint
+rec_offs_comp(const ulint* offsets)
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
+}
+
+/** Determine if the record is the 'default row' pseudo-record
+in the clustered index.
+@param[in] rec leaf page record
+@param[in] index index of the record
+@return whether the record is the 'default row' pseudo-record */
+inline
+bool
+rec_is_default_row(const rec_t* rec, const dict_index_t* index)
+{
+ bool is = rec_get_info_bits(rec, dict_table_is_comp(index->table))
+ & REC_INFO_MIN_REC_FLAG;
+ ut_ad(!is || index->is_instant());
+ ut_ad(!is || !dict_table_is_comp(index->table)
+ || rec_get_status(rec) == REC_STATUS_COLUMNS_ADDED);
+ return is;
+}
+
+/** Get the nth field from an index.
+@param[in] rec index record
+@param[in] index index
+@param[in] offsets rec_get_offsets(rec, index)
+@param[in] n field number
+@param[out] len length of the field in bytes, or UNIV_SQL_NULL
+@return a read-only copy of the index field */
+inline
+const byte*
+rec_get_nth_cfield(
+ const rec_t* rec,
+ const dict_index_t* index,
+ const ulint* offsets,
+ ulint n,
+ ulint* len)
+{
+ ut_ad(rec_offs_validate(rec, index, offsets));
+ if (!rec_offs_nth_default(offsets, n)) {
+ return rec_get_nth_field(rec, offsets, n, len);
+ }
+ return index->instant_field_value(n, len);
+}
+
/******************************************************//**
Gets the physical size of a field.
@return length of field */
@@ -679,16 +884,6 @@ rec_get_data_size_old(
const rec_t* rec) /*!< in: physical record */
MY_ATTRIBUTE((warn_unused_result));
/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
- MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
UNIV_INLINE
@@ -702,15 +897,6 @@ rec_offs_set_n_alloc(
#define rec_offs_init(offsets) \
rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets)
/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
- MY_ATTRIBUTE((warn_unused_result));
-/**********************************************************//**
The following function returns the data size of a physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
@@ -785,37 +971,60 @@ rec_copy(
@param[in] fields data fields
@param[in] n_fields number of data fields
@param[out] extra record header size
+@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
@return total size, in bytes */
ulint
rec_get_converted_size_temp(
const dict_index_t* index,
const dfield_t* fields,
ulint n_fields,
- ulint* extra)
- MY_ATTRIBUTE((warn_unused_result, nonnull(1,2)));
+ ulint* extra,
+ rec_comp_status_t status = REC_STATUS_ORDINARY)
+ MY_ATTRIBUTE((warn_unused_result, nonnull));
-/******************************************************//**
-Determine the offset to each field in temporary file.
-@see rec_convert_dtuple_to_temp() */
+/** Determine the offset to each field in temporary file.
+@param[in] rec temporary file record
+@param[in] index index of that the record belongs to
+@param[in,out] offsets offsets to the fields; in: rec_offs_n_fields(offsets)
+@param[in] n_core number of core fields (index->n_core_fields)
+@param[in] def_val default values for non-core fields
+@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED */
void
rec_init_offsets_temp(
-/*==================*/
- const rec_t* rec, /*!< in: temporary file record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in/out: array of offsets;
- in: n=rec_offs_n_fields(offsets) */
+ const rec_t* rec,
+ const dict_index_t* index,
+ ulint* offsets,
+ ulint n_core,
+ const dict_col_t::def_t*def_val,
+ rec_comp_status_t status = REC_STATUS_ORDINARY)
+ MY_ATTRIBUTE((nonnull));
+/** Determine the offset to each field in temporary file.
+@param[in] rec temporary file record
+@param[in] index index of that the record belongs to
+@param[in,out] offsets offsets to the fields; in: rec_offs_n_fields(offsets)
+*/
+void
+rec_init_offsets_temp(
+ const rec_t* rec,
+ const dict_index_t* index,
+ ulint* offsets)
MY_ATTRIBUTE((nonnull));
-/*********************************************************//**
-Builds a temporary file record out of a data tuple.
-@see rec_init_offsets_temp() */
+/** Convert a data tuple prefix to the temporary file format.
+@param[out] rec record in temporary file format
+@param[in] index clustered or secondary index
+@param[in] fields data fields
+@param[in] n_fields number of data fields
+@param[in] status REC_STATUS_ORDINARY or REC_STATUS_COLUMNS_ADDED
+*/
void
rec_convert_dtuple_to_temp(
-/*=======================*/
- rec_t* rec, /*!< out: record */
- const dict_index_t* index, /*!< in: record descriptor */
- const dfield_t* fields, /*!< in: array of data fields */
- ulint n_fields); /*!< in: number of fields */
+ rec_t* rec,
+ const dict_index_t* index,
+ const dfield_t* fields,
+ ulint n_fields,
+ rec_comp_status_t status = REC_STATUS_ORDINARY)
+ MY_ATTRIBUTE((nonnull));
/**************************************************************//**
Copies the first n fields of a physical record to a new physical record in
@@ -833,22 +1042,6 @@ rec_copy_prefix_to_buf(
or NULL */
ulint* buf_size) /*!< in/out: buffer size */
MY_ATTRIBUTE((nonnull));
-/** Fold a prefix of a physical record.
-@param[in] rec index record
-@param[in] offsets return value of rec_get_offsets()
-@param[in] n_fields number of complete fields to fold
-@param[in] n_bytes number of bytes to fold in the last field
-@param[in] index_id index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
- const rec_t* rec,
- const ulint* offsets,
- ulint n_fields,
- ulint n_bytes,
- index_id_t tree_id)
- MY_ATTRIBUTE((warn_unused_result));
/*********************************************************//**
Builds a physical record out of a data tuple and
stores it into the given buffer.
@@ -896,7 +1089,7 @@ rec_get_converted_size_comp(
dict_table_is_comp() is
assumed to hold, even if
it does not */
- ulint status, /*!< in: status bits of the record */
+ rec_comp_status_t status, /*!< in: status bits of the record */
const dfield_t* fields, /*!< in: array of data fields */
ulint n_fields,/*!< in: number of data fields */
ulint* extra) /*!< out: extra size */
@@ -921,23 +1114,14 @@ The fields are copied into the memory heap.
@param[in] n_fields number of fields to copy
@param[in,out] heap memory heap */
void
-rec_copy_prefix_to_dtuple_func(
+rec_copy_prefix_to_dtuple(
dtuple_t* tuple,
const rec_t* rec,
const dict_index_t* index,
-#ifdef UNIV_DEBUG
bool is_leaf,
-#endif /* UNIV_DEBUG */
ulint n_fields,
mem_heap_t* heap)
MY_ATTRIBUTE((nonnull));
-#ifdef UNIV_DEBUG
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap) \
- rec_copy_prefix_to_dtuple_func(tuple,rec,index,leaf,n_fields,heap)
-#else /* UNIV_DEBUG */
-# define rec_copy_prefix_to_dtuple(tuple,rec,index,leaf,n_fields,heap) \
- rec_copy_prefix_to_dtuple_func(tuple,rec,index,n_fields,heap)
-#endif /* UNIV_DEBUG */
/***************************************************************//**
Validates the consistency of a physical record.
@return TRUE if ok */
diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
index cb1f0d9836f..5e9dbcdcfb6 100644
--- a/storage/innobase/include/rem0rec.ic
+++ b/storage/innobase/include/rem0rec.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -26,19 +26,9 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
-#include "dict0dict.h"
#include "dict0boot.h"
#include "btr0types.h"
-/* Compact flag ORed to the extra size returned by rec_get_offsets() */
-#define REC_OFFS_COMPACT ((ulint) 1 << 31)
-/* SQL NULL flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
-/* External flag in offsets returned by rec_get_offsets() */
-#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
-/* Mask for offsets returned by rec_get_offsets() */
-#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
-
/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
most significant bytes and bits are written below less significant.
@@ -71,12 +61,13 @@ most significant bytes and bits are written below less significant.
we can calculate the offset of the next
record with the formula:
relative_offset + offset_of_this_record
- mod UNIV_PAGE_SIZE
+ mod srv_page_size
3 3 bits status:
- 000=conventional record
- 001=node pointer record (inside B-tree)
- 010=infimum record
- 011=supremum record
+ 000=REC_STATUS_ORDINARY
+ 001=REC_STATUS_NODE_PTR
+ 010=REC_STATUS_INFIMUM
+ 011=REC_STATUS_SUPREMUM
+ 100=REC_STATUS_COLUMNS_ADDED
1xx=reserved
5 bits heap number
4 8 bits heap number
@@ -99,10 +90,6 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_OLD_N_FIELDS_MASK 0x7FEUL
#define REC_OLD_N_FIELDS_SHIFT 1
-#define REC_NEW_STATUS 3 /* This is single byte bit-field */
-#define REC_NEW_STATUS_MASK 0x7UL
-#define REC_NEW_STATUS_SHIFT 0
-
#define REC_OLD_HEAP_NO 5
#define REC_HEAP_NO_MASK 0xFFF8UL
#if 0 /* defined in rem0rec.h for use of page0zip.cc */
@@ -248,8 +235,8 @@ rec_get_next_ptr_const(
{
ulint field_value;
- ut_ad(REC_NEXT_MASK == 0xFFFFUL);
- ut_ad(REC_NEXT_SHIFT == 0);
+ compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+ compile_time_assert(REC_NEXT_SHIFT == 0);
field_value = mach_read_from_2(rec - REC_NEXT);
@@ -267,13 +254,13 @@ rec_get_next_ptr_const(
as signed 16-bit integer in 2's complement arithmetics.
If all platforms defined int16_t in the standard headers,
the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+ (int16_t) field_value + ut_align_offset(...) < srv_page_size
*/
ut_ad((field_value >= 32768
? field_value - 65536
: field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
+ + ut_align_offset(rec, srv_page_size)
+ < srv_page_size);
#endif
/* There must be at least REC_N_NEW_EXTRA_BYTES + 1
between each record. */
@@ -281,12 +268,12 @@ rec_get_next_ptr_const(
&& field_value < 32768)
|| field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
- + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+ return((byte*) ut_align_down(rec, srv_page_size)
+ + ut_align_offset(rec + field_value, srv_page_size));
} else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
+ ut_ad(field_value < srv_page_size);
- return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE)
+ return((byte*) ut_align_down(rec, srv_page_size)
+ field_value);
}
}
@@ -317,12 +304,8 @@ rec_get_next_offs(
ulint comp) /*!< in: nonzero=compact page format */
{
ulint field_value;
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
+ compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+ compile_time_assert(REC_NEXT_SHIFT == 0);
field_value = mach_read_from_2(rec - REC_NEXT);
@@ -335,13 +318,13 @@ rec_get_next_offs(
as signed 16-bit integer in 2's complement arithmetics.
If all platforms defined int16_t in the standard headers,
the expression could be written simpler as
- (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+ (int16_t) field_value + ut_align_offset(...) < srv_page_size
*/
ut_ad((field_value >= 32768
? field_value - 65536
: field_value)
- + ut_align_offset(rec, UNIV_PAGE_SIZE)
- < UNIV_PAGE_SIZE);
+ + ut_align_offset(rec, srv_page_size)
+ < srv_page_size);
#endif
if (field_value == 0) {
@@ -354,9 +337,9 @@ rec_get_next_offs(
&& field_value < 32768)
|| field_value < (uint16) -REC_N_NEW_EXTRA_BYTES);
- return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+ return(ut_align_offset(rec + field_value, srv_page_size));
} else {
- ut_ad(field_value < UNIV_PAGE_SIZE);
+ ut_ad(field_value < srv_page_size);
return(field_value);
}
@@ -373,14 +356,9 @@ rec_set_next_offs_old(
ulint next) /*!< in: offset of the next record */
{
ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
-#if REC_NEXT_MASK != 0xFFFFUL
-# error "REC_NEXT_MASK != 0xFFFFUL"
-#endif
-#if REC_NEXT_SHIFT
-# error "REC_NEXT_SHIFT != 0"
-#endif
-
+ ut_ad(srv_page_size > next);
+ compile_time_assert(REC_NEXT_MASK == 0xFFFFUL);
+ compile_time_assert(REC_NEXT_SHIFT == 0);
mach_write_to_2(rec - REC_NEXT, next);
}
@@ -397,7 +375,7 @@ rec_set_next_offs_new(
ulint field_value;
ut_ad(rec);
- ut_ad(UNIV_PAGE_SIZE > next);
+ ut_ad(srv_page_size > next);
if (!next) {
field_value = 0;
@@ -408,7 +386,7 @@ rec_set_next_offs_new(
field_value = (ulint)
((lint) next
- - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE));
+ - (lint) ut_align_offset(rec, srv_page_size));
field_value &= REC_NEXT_MASK;
}
@@ -457,26 +435,6 @@ rec_set_n_fields_old(
}
/******************************************************//**
-The following function retrieves the status bits of a new-style record.
-@return status bits */
-UNIV_INLINE
-ulint
-rec_get_status(
-/*===========*/
- const rec_t* rec) /*!< in: physical record */
-{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
- ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
-
- return(ret);
-}
-
-/******************************************************//**
The following function is used to get the number of fields
in a record.
@return number of data fields */
@@ -495,6 +453,7 @@ rec_get_n_fields(
}
switch (rec_get_status(rec)) {
+ case REC_STATUS_COLUMNS_ADDED:
case REC_STATUS_ORDINARY:
return(dict_index_get_n_fields(index));
case REC_STATUS_NODE_PTR:
@@ -502,10 +461,10 @@ rec_get_n_fields(
case REC_STATUS_INFIMUM:
case REC_STATUS_SUPREMUM:
return(1);
- default:
- ut_error;
- return(ULINT_UNDEFINED);
}
+
+ ut_error;
+ return(ULINT_UNDEFINED);
}
/** Confirms the n_fields of the entry is sane with comparing the other
@@ -521,13 +480,15 @@ rec_n_fields_is_sane(
const rec_t* rec,
const dtuple_t* entry)
{
- return(rec_get_n_fields(rec, index)
- == dtuple_get_n_fields(entry)
+ const ulint n_fields = rec_get_n_fields(rec, index);
+
+ return(n_fields == dtuple_get_n_fields(entry)
+ || (index->is_instant()
+ && n_fields >= index->n_core_fields)
/* a record for older SYS_INDEXES table
(missing merge_threshold column) is acceptable. */
|| (index->table->id == DICT_INDEXES_ID
- && rec_get_n_fields(rec, index)
- == dtuple_get_n_fields(entry) - 1));
+ && n_fields == dtuple_get_n_fields(entry) - 1));
}
/******************************************************//**
@@ -646,19 +607,6 @@ rec_set_info_bits_new(
}
/******************************************************//**
-The following function is used to set the status bits of a new-style record. */
-UNIV_INLINE
-void
-rec_set_status(
-/*===========*/
- rec_t* rec, /*!< in/out: physical record */
- ulint bits) /*!< in: info bits */
-{
- rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
- REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
-}
-
-/******************************************************//**
The following function is used to retrieve the info and status
bits of a record. (Only compact records have status bits.)
@return info bits */
@@ -670,12 +618,11 @@ rec_get_info_and_status_bits(
ulint comp) /*!< in: nonzero=compact page format */
{
ulint bits;
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+ compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+ & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
if (comp) {
- bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
+ bits = rec_get_info_bits(rec, TRUE)
+ | ulint(rec_get_status(rec));
} else {
bits = rec_get_info_bits(rec, FALSE);
ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
@@ -692,10 +639,8 @@ rec_set_info_and_status_bits(
rec_t* rec, /*!< in/out: physical record */
ulint bits) /*!< in: info bits */
{
-#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
-& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
-# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
-#endif
+ compile_time_assert(!((REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT)
+ & (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK);
}
@@ -774,7 +719,7 @@ rec_set_deleted_flag_new(
The following function tells if a new-style record is a node pointer.
@return TRUE if node pointer */
UNIV_INLINE
-ibool
+bool
rec_get_node_ptr_flag(
/*==================*/
const rec_t* rec) /*!< in: physical record */
@@ -848,10 +793,6 @@ rec_get_1byte_offs_flag(
/*====================*/
const rec_t* rec) /*!< in: physical record */
{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
-
return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
REC_OLD_SHORT_SHIFT));
}
@@ -865,10 +806,7 @@ rec_set_1byte_offs_flag(
rec_t* rec, /*!< in: physical record */
ibool flag) /*!< in: TRUE if 1byte form */
{
-#if TRUE != 1
-#error "TRUE != 1"
-#endif
- ut_ad(flag <= TRUE);
+ ut_ad(flag <= 1);
rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
REC_OLD_SHORT_SHIFT);
@@ -925,29 +863,6 @@ rec_2_is_field_extern(
return(rec_2_get_field_end_info(rec, n) & REC_2BYTE_EXTERN_MASK);
}
-/* Get the base address of offsets. The extra_size is stored at
-this position, and following positions hold the end offsets of
-the fields. */
-#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
-
-/**********************************************************//**
-The following function returns the number of allocated elements
-for an array of offsets.
-@return number of elements */
-UNIV_INLINE
-ulint
-rec_offs_get_n_alloc(
-/*=================*/
- const ulint* offsets)/*!< in: array for rec_get_offsets() */
-{
- ulint n_alloc;
- ut_ad(offsets);
- n_alloc = offsets[0];
- ut_ad(n_alloc > REC_OFFS_HEADER_SIZE);
- UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets);
- return(n_alloc);
-}
-
/**********************************************************//**
The following function sets the number of allocated elements
for an array of offsets. */
@@ -965,102 +880,6 @@ rec_offs_set_n_alloc(
offsets[0] = n_alloc;
}
-/**********************************************************//**
-The following function returns the number of fields in a record.
-@return number of fields */
-UNIV_INLINE
-ulint
-rec_offs_n_fields(
-/*==============*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ulint n_fields;
- ut_ad(offsets);
- n_fields = offsets[1];
- ut_ad(n_fields > 0);
- ut_ad(n_fields <= REC_MAX_N_FIELDS);
- ut_ad(n_fields + REC_OFFS_HEADER_SIZE
- <= rec_offs_get_n_alloc(offsets));
- return(n_fields);
-}
-
-/************************************************************//**
-Validates offsets returned by rec_get_offsets().
-@return TRUE if valid */
-UNIV_INLINE
-ibool
-rec_offs_validate(
-/*==============*/
- const rec_t* rec, /*!< in: record or NULL */
- const dict_index_t* index, /*!< in: record descriptor or NULL */
- const ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ulint i = rec_offs_n_fields(offsets);
- ulint last = ULINT_MAX;
- ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT;
-
- if (rec) {
- ut_ad((ulint) rec == offsets[2]);
- if (!comp) {
- ut_a(rec_get_n_fields_old(rec) >= i);
- }
- }
- if (index) {
- ulint max_n_fields;
- ut_ad((ulint) index == offsets[3]);
- max_n_fields = ut_max(
- dict_index_get_n_fields(index),
- dict_index_get_n_unique_in_tree(index) + 1);
- if (comp && rec) {
- switch (rec_get_status(rec)) {
- case REC_STATUS_ORDINARY:
- break;
- case REC_STATUS_NODE_PTR:
- max_n_fields = dict_index_get_n_unique_in_tree(
- index) + 1;
- break;
- case REC_STATUS_INFIMUM:
- case REC_STATUS_SUPREMUM:
- max_n_fields = 1;
- break;
- default:
- ut_error;
- }
- }
- /* index->n_def == 0 for dummy indexes if !comp */
- ut_a(!comp || index->n_def);
- ut_a(!index->n_def || i <= max_n_fields);
- }
- while (i--) {
- ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
- ut_a(curr <= last);
- last = curr;
- }
- return(TRUE);
-}
-#ifdef UNIV_DEBUG
-/************************************************************//**
-Updates debug data in offsets, in order to avoid bogus
-rec_offs_validate() failures. */
-UNIV_INLINE
-void
-rec_offs_make_valid(
-/*================*/
- const rec_t* rec, /*!< in: record */
- const dict_index_t* index, /*!< in: record descriptor */
- ulint* offsets)/*!< in: array returned by
- rec_get_offsets() */
-{
- ut_ad(rec);
- ut_ad(index);
- ut_ad(offsets);
- ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
- offsets[2] = (ulint) rec;
- offsets[3] = (ulint) index;
-}
-#endif /* UNIV_DEBUG */
-
/************************************************************//**
The following function is used to get an offset to the nth
data field in a record.
@@ -1072,7 +891,7 @@ rec_get_nth_field_offs(
const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
ulint n, /*!< in: index of the field */
ulint* len) /*!< out: length of the field; UNIV_SQL_NULL
- if SQL null */
+ if SQL null; UNIV_SQL_DEFAULT is default value */
{
ulint offs;
ulint length;
@@ -1089,6 +908,8 @@ rec_get_nth_field_offs(
if (length & REC_OFFS_SQL_NULL) {
length = UNIV_SQL_NULL;
+ } else if (length & REC_OFFS_DEFAULT) {
+ length = UNIV_SQL_DEFAULT;
} else {
length &= REC_OFFS_MASK;
length -= offs;
@@ -1099,34 +920,6 @@ rec_get_nth_field_offs(
}
/******************************************************//**
-Determine if the offsets are for a record in the new
-compact format.
-@return nonzero if compact format */
-UNIV_INLINE
-ulint
-rec_offs_comp(
-/*==========*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_COMPACT);
-}
-
-/******************************************************//**
-Determine if the offsets are for a record containing
-externally stored columns.
-@return nonzero if externally stored */
-UNIV_INLINE
-ulint
-rec_offs_any_extern(
-/*================*/
- const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- return(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
Determine if the offsets are for a record containing null BLOB pointers.
@return first field containing a null BLOB pointer, or NULL if none found */
UNIV_INLINE
@@ -1167,29 +960,14 @@ Returns nonzero if the extern bit is set in nth field of rec.
@return nonzero if externally stored */
UNIV_INLINE
ulint
-rec_offs_nth_extern(
+rec_offs_nth_extern_old(
/*================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
-{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL);
-}
-
-/******************************************************//**
-Returns nonzero if the SQL NULL bit is set in nth field of rec.
-@return nonzero if SQL NULL */
-UNIV_INLINE
-ulint
-rec_offs_nth_sql_null(
-/*==================*/
- const ulint* offsets,/*!< in: array returned by rec_get_offsets() */
- ulint n) /*!< in: nth field */
+ const rec_t* rec, /*!< in: record */
+ ulint n /*!< in: index of the field */)
{
- ut_ad(rec_offs_validate(NULL, NULL, offsets));
- ut_ad(n < rec_offs_n_fields(offsets));
- return(rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL);
+ if(rec_get_1byte_offs_flag(rec))
+ return 0;
+ return (rec_2_get_field_end_info(rec,n) & REC_2BYTE_EXTERN_MASK);
}
/******************************************************//**
@@ -1400,7 +1178,7 @@ rec_get_nth_field_size(
os = rec_get_field_start_offs(rec, n);
next_os = rec_get_field_start_offs(rec, n + 1);
- ut_ad(next_os - os < UNIV_PAGE_SIZE);
+ ut_ad(next_os - os < srv_page_size);
return(next_os - os);
}
@@ -1427,6 +1205,7 @@ rec_set_nth_field(
ut_ad(rec);
ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(!rec_offs_nth_default(offsets, n));
if (len == UNIV_SQL_NULL) {
if (!rec_offs_nth_sql_null(offsets, n)) {
@@ -1437,7 +1216,7 @@ rec_set_nth_field(
return;
}
- data2 = rec_get_nth_field(rec, offsets, n, &len2);
+ data2 = (byte*)rec_get_nth_field(rec, offsets, n, &len2);
if (len2 == UNIV_SQL_NULL) {
ut_ad(!rec_offs_comp(offsets));
rec_set_nth_field_null_bit(rec, n, FALSE);
@@ -1501,7 +1280,7 @@ rec_offs_data_size(
ut_ad(rec_offs_validate(NULL, NULL, offsets));
size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
& REC_OFFS_MASK;
- ut_ad(size < UNIV_PAGE_SIZE);
+ ut_ad(size < srv_page_size);
return(size);
}
@@ -1518,8 +1297,8 @@ rec_offs_extra_size(
{
ulint size;
ut_ad(rec_offs_validate(NULL, NULL, offsets));
- size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL);
- ut_ad(size < UNIV_PAGE_SIZE);
+ size = *rec_offs_base(offsets) & REC_OFFS_MASK;
+ ut_ad(size < srv_page_size);
return(size);
}
@@ -1631,27 +1410,34 @@ rec_get_converted_size(
ut_ad(index);
ut_ad(dtuple);
ut_ad(dtuple_check_typed(dtuple));
-
- ut_ad(dict_index_is_ibuf(index)
-
- || dtuple_get_n_fields(dtuple)
- == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
- == REC_STATUS_NODE_PTR)
- ? dict_index_get_n_unique_in_tree_nonleaf(index) + 1
- : dict_index_get_n_fields(index))
-
- /* a record for older SYS_INDEXES table
- (missing merge_threshold column) is acceptable. */
- || (index->table->id == DICT_INDEXES_ID
- && dtuple_get_n_fields(dtuple)
- == dict_index_get_n_fields(index) - 1));
+#ifdef UNIV_DEBUG
+ if (dict_index_is_ibuf(index)) {
+ ut_ad(dtuple->n_fields > 1);
+ } else if ((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+ == REC_STATUS_NODE_PTR) {
+ ut_ad(dtuple->n_fields
+ == dict_index_get_n_unique_in_tree_nonleaf(index) + 1);
+ } else if (index->table->id == DICT_INDEXES_ID) {
+ /* The column SYS_INDEXES.MERGE_THRESHOLD was
+ instantly added in MariaDB 10.2.2 (MySQL 5.7). */
+ ut_ad(index->n_fields == DICT_NUM_FIELDS__SYS_INDEXES);
+ ut_ad(dtuple->n_fields == DICT_NUM_FIELDS__SYS_INDEXES
+ || dtuple->n_fields
+ == DICT_FLD__SYS_INDEXES__MERGE_THRESHOLD);
+ } else {
+ ut_ad(dtuple->n_fields >= index->n_core_fields);
+ ut_ad(dtuple->n_fields <= index->n_fields);
+ }
+#endif
if (dict_table_is_comp(index->table)) {
- return(rec_get_converted_size_comp(index,
- dtuple_get_info_bits(dtuple)
- & REC_NEW_STATUS_MASK,
- dtuple->fields,
- dtuple->n_fields, NULL));
+ return(rec_get_converted_size_comp(
+ index,
+ static_cast<rec_comp_status_t>(
+ dtuple->info_bits
+ & REC_NEW_STATUS_MASK),
+ dtuple->fields,
+ dtuple->n_fields, NULL));
}
data_size = dtuple_get_data_size(dtuple, 0);
@@ -1659,105 +1445,5 @@ rec_get_converted_size(
extra_size = rec_get_converted_extra_size(
data_size, dtuple_get_n_fields(dtuple), n_ext);
-#if 0
- /* This code is inactive since it may be the wrong place to add
- in the size of node pointers used in parent pages AND it is not
- currently needed since ha_innobase::max_supported_key_length()
- ensures that the key size limit for each page size is well below
- the actual limit ((free space on page / 4) - record overhead).
- But those limits will need to be raised when InnoDB can
- support multiple page sizes. At that time, we will need
- to consider the node pointer on these universal btrees. */
-
- if (dict_index_is_ibuf(index)) {
- /* This is for the insert buffer B-tree.
- All fields in the leaf tuple ascend to the
- parent node plus the child page pointer. */
-
- /* ibuf cannot contain externally stored fields */
- ut_ad(n_ext == 0);
-
- /* Add the data pointer and recompute extra_size
- based on one more field. */
- data_size += REC_NODE_PTR_SIZE;
- extra_size = rec_get_converted_extra_size(
- data_size,
- dtuple_get_n_fields(dtuple) + 1,
- 0);
-
- /* Be sure dtuple->n_fields has this node ptr
- accounted for. This function should correspond to
- what rec_convert_dtuple_to_rec() needs in storage.
- In optimistic insert or update-not-in-place, we will
- have to ensure that if the record is converted to a
- node pointer, it will not become too large.*/
- }
-#endif
-
return(data_size + extra_size);
}
-
-/** Fold a prefix of a physical record.
-@param[in] rec index record
-@param[in] offsets return value of rec_get_offsets()
-@param[in] n_fields number of complete fields to fold
-@param[in] n_bytes number of bytes to fold in the last field
-@param[in] index_id index tree ID
-@return the folded value */
-UNIV_INLINE
-ulint
-rec_fold(
- const rec_t* rec,
- const ulint* offsets,
- ulint n_fields,
- ulint n_bytes,
- index_id_t tree_id)
-{
- ulint i;
- const byte* data;
- ulint len;
- ulint fold;
- ulint n_fields_rec;
-
- ut_ad(rec_offs_validate(rec, NULL, offsets));
- ut_ad(rec_validate(rec, offsets));
- ut_ad(n_fields > 0 || n_bytes > 0);
-
- n_fields_rec = rec_offs_n_fields(offsets);
- ut_ad(n_fields <= n_fields_rec);
- ut_ad(n_fields < n_fields_rec || n_bytes == 0);
-
- if (n_fields > n_fields_rec) {
- n_fields = n_fields_rec;
- }
-
- if (n_fields == n_fields_rec) {
- n_bytes = 0;
- }
-
- fold = ut_fold_ull(tree_id);
-
- for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- if (n_bytes > 0) {
- data = rec_get_nth_field(rec, offsets, i, &len);
-
- if (len != UNIV_SQL_NULL) {
- if (len > n_bytes) {
- len = n_bytes;
- }
-
- fold = ut_fold_ulint_pair(fold,
- ut_fold_binary(data, len));
- }
- }
-
- return(fold);
-}
diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h
index f8133f77466..ac78a3c6748 100644
--- a/storage/innobase/include/rem0types.h
+++ b/storage/innobase/include/rem0types.h
@@ -1,6 +1,6 @@
/*****************************************************************************
-Copyright (c) 1994, 2012, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -54,8 +54,7 @@ This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
files would be at risk! */
#define REC_ANTELOPE_MAX_INDEX_COL_LEN 768
-/** Maximum indexed field length for table format UNIV_FORMAT_B and
-beyond.
+/** Maximum indexed field length for tables that have atomic BLOBs.
This (3072) is the maximum index row length allowed, so we cannot create index
prefix column longer than that. */
#define REC_VERSION_56_MAX_INDEX_COL_LEN 3072
diff --git a/storage/innobase/include/row0ftsort.h b/storage/innobase/include/row0ftsort.h
index 8f7632ed9ac..3ae5d5bc175 100644
--- a/storage/innobase/include/row0ftsort.h
+++ b/storage/innobase/include/row0ftsort.h
@@ -185,15 +185,15 @@ tokenized doc string. The index has three "fields":
dict_index_t*
row_merge_create_fts_sort_index(
/*============================*/
- dict_index_t* index, /*!< in: Original FTS index
- based on which this sort index
- is created */
- const dict_table_t* table, /*!< in: table that FTS index
- is being created on */
- ibool* opt_doc_id_size);
- /*!< out: whether to use 4 bytes
- instead of 8 bytes integer to
- store Doc ID during sort */
+ dict_index_t* index, /*!< in: Original FTS index
+ based on which this sort index
+ is created */
+ dict_table_t* table, /*!< in,out: table that FTS index
+ is being created on */
+ ibool* opt_doc_id_size);
+ /*!< out: whether to use 4 bytes
+ instead of 8 bytes integer to
+ store Doc ID during sort */
/********************************************************************//**
Initialize FTS parallel sort structures.
diff --git a/storage/innobase/include/row0import.h b/storage/innobase/include/row0import.h
index c6dfca9d7e8..5eb5425b983 100644
--- a/storage/innobase/include/row0import.h
+++ b/storage/innobase/include/row0import.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 2012, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -47,21 +47,13 @@ row_import_for_mysql(
in MySQL */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/*****************************************************************//**
-Update the DICT_TF2_DISCARDED flag in SYS_TABLES.
-@return DB_SUCCESS or error code. */
-dberr_t
-row_import_update_discarded_flag(
-/*=============================*/
- trx_t* trx, /*!< in/out: transaction that
- covers the update */
- table_id_t table_id, /*!< in: Table for which we want
- to set the root table->flags2 */
- bool discarded, /*!< in: set MIX_LEN column bit
- to discarded, if true */
- bool dict_locked) /*!< in: Set to true if the
- caller already owns the
- dict_sys_t:: mutex. */
+/** Update the DICT_TF2_DISCARDED flag in SYS_TABLES.MIX_LEN.
+@param[in,out] trx dictionary transaction
+@param[in] table_id table identifier
+@param[in] discarded whether to set or clear the flag
+@return DB_SUCCESS or error code */
+dberr_t row_import_update_discarded_flag(trx_t* trx, table_id_t table_id,
+ bool discarded)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/*****************************************************************//**
diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h
index b0b9ccd271b..ed425390ed2 100644
--- a/storage/innobase/include/row0ins.h
+++ b/storage/innobase/include/row0ins.h
@@ -203,6 +203,8 @@ struct ins_node_t{
+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN];
trx_id_t trx_id; /*!< trx id or the last trx which executed the
node */
+ byte vers_start_buf[8]; /* Buffers for System Versioning */
+ byte vers_end_buf[8]; /* system fields. */
mem_heap_t* entry_sys_heap;
/* memory heap used as auxiliary storage;
entry_list and sys fields are stored here;
@@ -228,5 +230,4 @@ struct ins_node_t{
#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */
#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and
inserted */
-
#endif
diff --git a/storage/innobase/include/row0log.h b/storage/innobase/include/row0log.h
index c52beb495da..723cf310f95 100644
--- a/storage/innobase/include/row0log.h
+++ b/storage/innobase/include/row0log.h
@@ -49,17 +49,21 @@ for online creation.
bool
row_log_allocate(
/*=============*/
+ const trx_t* trx, /*!< in: the ALTER TABLE transaction */
dict_index_t* index, /*!< in/out: index */
dict_table_t* table, /*!< in/out: new table being rebuilt,
or NULL when creating a secondary index */
bool same_pk,/*!< in: whether the definition of the
PRIMARY KEY has remained the same */
- const dtuple_t* add_cols,
+ const dtuple_t* defaults,
/*!< in: default values of
- added columns, or NULL */
+ added, changed columns, or NULL */
const ulint* col_map,/*!< in: mapping of old column
numbers to new ones, or NULL if !table */
- const char* path) /*!< in: where to create temporary file */
+ const char* path, /*!< in: where to create temporary file */
+ const TABLE* old_table, /*!< in:table definition before alter */
+ bool allow_not_null) /*!< in: allow null to non-null
+ conversion */
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/******************************************************//**
@@ -207,13 +211,15 @@ row_log_table_blob_alloc(
@param[in,out] stage performance schema accounting object, used by
ALTER TABLE. stage->begin_phase_log_table() will be called initially and then
stage->inc() will be called for each block of log that is applied.
+@param[in] new_table Altered table
@return DB_SUCCESS, or error code on failure */
dberr_t
row_log_table_apply(
que_thr_t* thr,
dict_table_t* old_table,
struct TABLE* table,
- ut_stage_alter_t* stage)
+ ut_stage_alter_t* stage,
+ dict_table_t* new_table)
MY_ATTRIBUTE((warn_unused_result));
/******************************************************//**
diff --git a/storage/innobase/include/row0merge.h b/storage/innobase/include/row0merge.h
index 51ad5cc5cd7..ad4005239c3 100644
--- a/storage/innobase/include/row0merge.h
+++ b/storage/innobase/include/row0merge.h
@@ -61,11 +61,11 @@ struct ib_sequence_t;
/** @brief Block size for I/O operations in merge sort.
-The minimum is UNIV_PAGE_SIZE, or page_get_free_space_of_empty()
+The minimum is srv_page_size, or page_get_free_space_of_empty()
rounded to a power of 2.
When not creating a PRIMARY KEY that contains column prefixes, this
-can be set as small as UNIV_PAGE_SIZE / 2. */
+can be set as small as srv_page_size / 2. */
typedef byte row_merge_block_t;
/** @brief Secondary buffer for I/O operations of merge records.
@@ -101,7 +101,7 @@ struct row_merge_buf_t {
/** Information about temporary files used in merge sort */
struct merge_file_t {
- int fd; /*!< file descriptor */
+ pfs_os_file_t fd; /*!< file descriptor */
ulint offset; /*!< file offset (end of file) */
ib_uint64_t n_rec; /*!< number of records in the file */
};
@@ -193,7 +193,7 @@ row_merge_drop_temp_indexes(void);
UNIV_PFS_IO defined, register the file descriptor with Performance Schema.
@param[in] path location for creating temporary merge files, or NULL
@return File descriptor */
-int
+pfs_os_file_t
row_merge_file_create_low(
const char* path)
MY_ATTRIBUTE((warn_unused_result));
@@ -203,7 +203,7 @@ if UNIV_PFS_IO is defined. */
void
row_merge_file_destroy_low(
/*=======================*/
- int fd); /*!< in: merge file descriptor */
+ const pfs_os_file_t& fd); /*!< in: merge file descriptor */
/*********************************************************************//**
Provide a new pathname for a table that is being renamed if it belongs to
@@ -260,7 +260,6 @@ row_merge_rename_index_to_drop(
MY_ATTRIBUTE((nonnull(1), warn_unused_result));
/** Create the index and load in to the dictionary.
-@param[in,out] trx trx (sets error_state)
@param[in,out] table the index is on this table
@param[in] index_def the index definition
@param[in] add_v new virtual columns added along with add
@@ -268,7 +267,6 @@ row_merge_rename_index_to_drop(
@return index, or NULL on error */
dict_index_t*
row_merge_create_index(
- trx_t* trx,
dict_table_t* table,
const index_def_t* index_def,
const dict_add_v_col_t* add_v)
@@ -310,7 +308,7 @@ old_table unless creating a PRIMARY KEY
@param[in] n_indexes size of indexes[]
@param[in,out] table MySQL table, for reporting erroneous key value
if applicable
-@param[in] add_cols default values of added columns, or NULL
+@param[in] defaults default values of added, changed columns, or NULL
@param[in] col_map mapping of old column numbers to new ones, or
NULL if old_table == new_table
@param[in] add_autoinc number of added AUTO_INCREMENT columns, or
@@ -324,6 +322,7 @@ this function and it will be passed to other functions for further accounting.
@param[in] add_v new virtual columns added along with indexes
@param[in] eval_table mysql table used to evaluate virtual column
value, see innobase_get_computed_value().
+@param[in] allow_non_null allow the conversion from null to not-null
@return DB_SUCCESS or error code */
dberr_t
row_merge_build_indexes(
@@ -335,14 +334,15 @@ row_merge_build_indexes(
const ulint* key_numbers,
ulint n_indexes,
struct TABLE* table,
- const dtuple_t* add_cols,
+ const dtuple_t* defaults,
const ulint* col_map,
ulint add_autoinc,
ib_sequence_t& sequence,
bool skip_pk_sort,
ut_stage_alter_t* stage,
const dict_add_v_col_t* add_v,
- struct TABLE* eval_table)
+ struct TABLE* eval_table,
+ bool allow_non_null)
MY_ATTRIBUTE((warn_unused_result));
/********************************************************************//**
@@ -372,7 +372,7 @@ UNIV_INTERN
bool
row_merge_write(
/*============*/
- int fd, /*!< in: file descriptor */
+ const pfs_os_file_t& fd, /*!< in: file descriptor */
ulint offset, /*!< in: offset where to write,
in number of row_merge_block_t elements */
const void* buf, /*!< in: data */
@@ -393,7 +393,7 @@ row_merge_buf_empty(
@param[out] merge_file merge file structure
@param[in] path location for creating temporary file, or NULL
@return file descriptor, or -1 on failure */
-int
+pfs_os_file_t
row_merge_file_create(
merge_file_t* merge_file,
const char* path)
@@ -421,7 +421,7 @@ row_merge_sort(
const row_merge_dup_t* dup,
merge_file_t* file,
row_merge_block_t* block,
- int* tmpfd,
+ pfs_os_file_t* tmpfd,
const bool update_progress,
const double pct_progress,
const double pct_cost,
@@ -460,7 +460,7 @@ row_merge_file_destroy(
bool
row_merge_read(
/*===========*/
- int fd, /*!< in: file descriptor */
+ const pfs_os_file_t& fd, /*!< in: file descriptor */
ulint offset, /*!< in: offset where to read
in number of row_merge_block_t
elements */
@@ -479,7 +479,7 @@ row_merge_read_rec(
mrec_buf_t* buf, /*!< in/out: secondary buffer */
const byte* b, /*!< in: pointer to record */
const dict_index_t* index, /*!< in: index of the record */
- int fd, /*!< in: file descriptor */
+ const pfs_os_file_t& fd, /*!< in: file descriptor */
ulint* foffs, /*!< in/out: file offset */
const mrec_t** mrec, /*!< out: pointer to merge record,
or NULL on end of list
diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
index 3a53c35ba8b..c59248d88c4 100644
--- a/storage/innobase/include/row0mysql.h
+++ b/storage/innobase/include/row0mysql.h
@@ -227,14 +227,26 @@ row_lock_table_autoinc_for_mysql(
dberr_t
row_lock_table(row_prebuilt_t* prebuilt);
+/** System Versioning: row_insert_for_mysql() modes */
+enum ins_mode_t {
+ /* plain row (without versioning) */
+ ROW_INS_NORMAL = 0,
+ /* row_start = TRX_ID, row_end = MAX */
+ ROW_INS_VERSIONED,
+ /* row_end = TRX_ID */
+ ROW_INS_HISTORICAL
+};
+
/** Does an insert for MySQL.
@param[in] mysql_rec row in the MySQL format
@param[in,out] prebuilt prebuilt struct in MySQL handle
+@param[in] ins_mode what row type we're inserting
@return error code or DB_SUCCESS*/
dberr_t
row_insert_for_mysql(
const byte* mysql_rec,
- row_prebuilt_t* prebuilt)
+ row_prebuilt_t* prebuilt,
+ ins_mode_t ins_mode)
MY_ATTRIBUTE((warn_unused_result));
/*********************************************************************//**
@@ -258,7 +270,8 @@ row_get_prebuilt_update_vector(
@param[in,out] prebuilt prebuilt struct in MySQL handle
@return error code or DB_SUCCESS */
dberr_t
-row_update_for_mysql(row_prebuilt_t* prebuilt)
+row_update_for_mysql(
+ row_prebuilt_t* prebuilt)
MY_ATTRIBUTE((warn_unused_result));
/** This can only be used when srv_locks_unsafe_for_binlog is TRUE or this
@@ -660,6 +673,8 @@ struct row_prebuilt_t {
not to be confused with InnoDB
externally stored columns
(VARCHAR can be off-page too) */
+ unsigned versioned_write:1;/*!< whether this is
+ a versioned write */
mysql_row_templ_t* mysql_template;/*!< template used to transform
rows fast between MySQL and Innobase
formats; memory for this template
@@ -775,7 +790,7 @@ struct row_prebuilt_t {
allocated mem buf start, because
there is a 4 byte magic number at the
start and at the end */
- ibool keep_other_fields_on_keyread; /*!< when using fetch
+ bool keep_other_fields_on_keyread; /*!< when using fetch
cache with HA_EXTRA_KEYREAD, don't
overwrite other fields in mysql row
row buffer.*/
@@ -836,6 +851,20 @@ struct row_prebuilt_t {
/** The MySQL table object */
TABLE* m_mysql_table;
+
+ /** Get template by dict_table_t::cols[] number */
+ const mysql_row_templ_t* get_template_by_col(ulint col) const
+ {
+ ut_ad(col < n_template);
+ ut_ad(mysql_template);
+ for (ulint i = col; i < n_template; ++i) {
+ const mysql_row_templ_t* templ = &mysql_template[i];
+ if (!templ->is_virtual && templ->col_no == col) {
+ return templ;
+ }
+ }
+ return NULL;
+ }
};
/** Callback for row_mysql_sys_index_iterate() */
diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h
index 655685c02a8..a7ddef4fe8a 100644
--- a/storage/innobase/include/row0purge.h
+++ b/storage/innobase/include/row0purge.h
@@ -111,7 +111,7 @@ struct purge_node_t{
upd_t* update; /*!< update vector for a clustered index
record */
- dtuple_t* ref; /*!< NULL, or row reference to the next row to
+ const dtuple_t* ref; /*!< NULL, or row reference to the next row to
handle */
dtuple_t* row; /*!< NULL, or a copy (also fields copied to
heap) of the indexed fields of the row to
diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h
index d24ae37b13d..1f37a6b02d7 100644
--- a/storage/innobase/include/row0row.h
+++ b/storage/innobase/include/row0row.h
@@ -153,9 +153,9 @@ row_build(
consulted instead; the user
columns in this table should be
the same columns as in index->table */
- const dtuple_t* add_cols,
+ const dtuple_t* defaults,
/*!< in: default values of
- added columns, or NULL */
+ added, changed columns, or NULL */
const ulint* col_map,/*!< in: mapping of old column
numbers to new ones, or NULL */
row_ext_t** ext, /*!< out, own: cache of
@@ -177,7 +177,7 @@ addition of new virtual columns.
of an index, or NULL if
index->table should be
consulted instead
-@param[in] add_cols default values of added columns, or NULL
+@param[in] defaults default values of added, changed columns, or NULL
@param[in] add_v new virtual columns added
along with new indexes
@param[in] col_map mapping of old column
@@ -194,7 +194,7 @@ row_build_w_add_vcol(
const rec_t* rec,
const ulint* offsets,
const dict_table_t* col_table,
- const dtuple_t* add_cols,
+ const dtuple_t* defaults,
const dict_add_v_col_t* add_v,
const ulint* col_map,
row_ext_t** ext,
@@ -269,9 +269,8 @@ row_build_row_ref_in_tuple(
held as long as the row
reference is used! */
const dict_index_t* index, /*!< in: secondary index */
- ulint* offsets,/*!< in: rec_get_offsets(rec, index)
+ ulint* offsets)/*!< in: rec_get_offsets(rec, index)
or NULL */
- trx_t* trx) /*!< in: transaction or NULL */
MY_ATTRIBUTE((nonnull(1,2,3)));
/*******************************************************************//**
Builds from a secondary index record a row reference with which we can
@@ -285,8 +284,8 @@ row_build_row_ref_fast(
const ulint* map, /*!< in: array of field numbers in rec
telling how ref should be built from
the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
+ const rec_t* rec, /*!< in: secondary index record;
+ must be preserved while ref is used, as we do
not copy field values to heap */
const ulint* offsets);/*!< in: array returned by rec_get_offsets() */
/***************************************************************//**
@@ -398,7 +397,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
{
mtr->start();
- switch (index->space) {
+ switch (index->table->space->id) {
case IBUF_SPACE_ID:
if (pessimistic
&& !(index->type & (DICT_UNIQUE | DICT_SPATIAL))) {
@@ -409,7 +408,7 @@ row_mtr_start(mtr_t* mtr, dict_index_t* index, bool pessimistic)
mtr->set_log_mode(MTR_LOG_NO_REDO);
break;
default:
- mtr->set_named_space(index->space);
+ index->set_modified(*mtr);
break;
}
diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic
index 8a32bb3ffd2..a7c0f2551b5 100644
--- a/storage/innobase/include/row0row.ic
+++ b/storage/innobase/include/row0row.ic
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2017, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -144,8 +145,8 @@ row_build_row_ref_fast(
const ulint* map, /*!< in: array of field numbers in rec
telling how ref should be built from
the fields of rec */
- const rec_t* rec, /*!< in: record in the index; must be
- preserved while ref is used, as we do
+ const rec_t* rec, /*!< in: secondary index record;
+ must be preserved while ref is used, as we do
not copy field values to heap */
const ulint* offsets)/*!< in: array returned by rec_get_offsets() */
{
diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
index 552680b16d1..ef0ccbbda9f 100644
--- a/storage/innobase/include/row0sel.h
+++ b/storage/innobase/include/row0sel.h
@@ -135,8 +135,7 @@ row_sel_convert_mysql_key_to_innobase(
ulint buf_len, /*!< in: buffer length */
dict_index_t* index, /*!< in: index of the key value */
const byte* key_ptr, /*!< in: MySQL key value */
- ulint key_len, /*!< in: MySQL key value length */
- trx_t* trx); /*!< in: transaction */
+ ulint key_len); /*!< in: MySQL key value length */
/** Searches for rows in the database. This is used in the interface to
diff --git a/storage/innobase/include/row0trunc.h b/storage/innobase/include/row0trunc.h
index f9a20665a3b..993dac295da 100644
--- a/storage/innobase/include/row0trunc.h
+++ b/storage/innobase/include/row0trunc.h
@@ -182,19 +182,16 @@ public:
/** Create an index for a table.
@param[in] table_name table name, for which to create
the index
- @param[in] space_id space id where we have to
- create the index
- @param[in] page_size page size of the .ibd file
+ @param[in,out] space tablespace
@param[in] index_type type of index to truncate
@param[in] index_id id of index to truncate
@param[in] btr_redo_create_info control info for ::btr_create()
@param[in,out] mtr mini-transaction covering the
create index
@return root page no or FIL_NULL on failure */
- ulint create_index(
+ inline ulint create_index(
const char* table_name,
- ulint space_id,
- const page_size_t& page_size,
+ fil_space_t* space,
ulint index_type,
index_id_t index_id,
const btr_create_t& btr_redo_create_info,
@@ -203,31 +200,27 @@ public:
/** Create the indexes for a table
@param[in] table_name table name, for which to create the
indexes
- @param[in] space_id space id where we have to create the
- indexes
- @param[in] page_size page size of the .ibd file
- @param[in] flags tablespace flags
+ @param[in,out] space tablespace
@param[in] format_flags page format flags
@return DB_SUCCESS or error code. */
- dberr_t create_indexes(
+ inline dberr_t create_indexes(
const char* table_name,
- ulint space_id,
- const page_size_t& page_size,
- ulint flags,
+ fil_space_t* space,
ulint format_flags);
/** Check if index has been modified since TRUNCATE log snapshot
was recorded.
- @param space_id space_id where table/indexes resides.
+ @param[in] space tablespace
+ @param[in] root_page_no index root page number
@return true if modified else false */
- bool is_index_modified_since_logged(
- ulint space_id,
- ulint root_page_no) const;
+ inline bool is_index_modified_since_logged(
+ const fil_space_t* space,
+ ulint root_page_no) const;
/** Drop indexes for a table.
- @param space_id space_id where table/indexes resides.
+ @param[in,out] space tablespace
@return DB_SUCCESS or error code. */
- void drop_indexes(ulint space_id) const;
+ void drop_indexes(fil_space_t* space) const;
/**
Parses log record during recovery
diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h
index 730e7f559c6..f7cec643b33 100644
--- a/storage/innobase/include/row0undo.h
+++ b/storage/innobase/include/row0undo.h
@@ -111,7 +111,7 @@ struct undo_node_t{
ulint cmpl_info;/*!< compiler analysis of an update */
upd_t* update; /*!< update vector for a clustered index
record */
- dtuple_t* ref; /*!< row reference to the next row to handle */
+ const dtuple_t* ref; /*!< row reference to the next row to handle */
dtuple_t* row; /*!< a copy (also fields copied to heap) of the
row to handle */
row_ext_t* ext; /*!< NULL, or prefixes of the externally
diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h
index 77708d7d568..5e01e513a50 100644
--- a/storage/innobase/include/row0upd.h
+++ b/storage/innobase/include/row0upd.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,7 +27,6 @@ Created 12/27/1996 Heikki Tuuri
#ifndef row0upd_h
#define row0upd_h
-#include "univ.i"
#include "data0data.h"
#include "row0types.h"
#include "btr0types.h"
@@ -234,27 +233,19 @@ row_upd_build_difference_binary(
mem_heap_t* heap,
TABLE* mysql_table)
MY_ATTRIBUTE((nonnull(1,2,3,7), warn_unused_result));
-/***********************************************************//**
-Replaces the new column values stored in the update vector to the index entry
-given. */
+/** Apply an update vector to an index entry.
+@param[in,out] entry index entry to be updated; the clustered index record
+ must be covered by a lock or a page latch to prevent
+ deletion (rollback or purge)
+@param[in] index index of the entry
+@param[in] update update vector built for the entry
+@param[in,out] heap memory heap for copying off-page columns */
void
row_upd_index_replace_new_col_vals_index_pos(
-/*=========================================*/
- dtuple_t* entry, /*!< in/out: index entry where replaced;
- the clustered index record must be
- covered by a lock or a page latch to
- prevent deletion (rollback or purge) */
- dict_index_t* index, /*!< in: index; NOTE that this may also be a
- non-clustered index */
- const upd_t* update, /*!< in: an update vector built for the index so
- that the field number in an upd_field is the
- index position */
- ibool order_only,
- /*!< in: if TRUE, limit the replacement to
- ordering fields of index; note that this
- does not work for non-clustered indexes. */
- mem_heap_t* heap) /*!< in: memory heap for allocating and
- copying the new values */
+ dtuple_t* entry,
+ const dict_index_t* index,
+ const upd_t* update,
+ mem_heap_t* heap)
MY_ATTRIBUTE((nonnull));
/***********************************************************//**
Replaces the new column values stored in the update vector to the index entry
@@ -462,6 +453,7 @@ struct upd_t{
virtual column update now */
ulint n_fields; /*!< number of update fields */
upd_field_t* fields; /*!< array of update fields */
+ byte vers_sys_value[8]; /*!< buffer for updating system fields */
/** Append an update field to the end of array
@param[in] field an update field */
@@ -482,6 +474,22 @@ struct upd_t{
return(false);
}
+ /** Determine if the update affects a system versioned column or row_end. */
+ bool affects_versioned() const
+ {
+ for (ulint i = 0; i < n_fields; i++) {
+ dtype_t type = fields[i].new_val.type;
+ if (type.is_versioned()) {
+ return true;
+ }
+ // versioned DELETE is UPDATE SET row_end=NOW
+ if (type.vers_sys_end()) {
+ return true;
+ }
+ }
+ return false;
+ }
+
#ifdef UNIV_DEBUG
bool validate() const
{
@@ -498,17 +506,24 @@ struct upd_t{
};
+/** Kinds of update operation */
+enum delete_mode_t {
+ NO_DELETE = 0, /*!< this operation does not delete */
+ PLAIN_DELETE, /*!< ordinary delete */
+ VERSIONED_DELETE /*!< update old and insert a new row */
+};
+
/* Update node structure which also implements the delete operation
of a row */
struct upd_node_t{
que_common_t common; /*!< node type: QUE_NODE_UPDATE */
- ibool is_delete;/* TRUE if delete, FALSE if update */
+ delete_mode_t is_delete; /*!< kind of DELETE */
ibool searched_update;
/* TRUE if searched update, FALSE if
positioned */
- ibool in_mysql_interface;
- /* TRUE if the update node was created
+ bool in_mysql_interface;
+ /* whether the update node was created
for the MySQL interface */
dict_foreign_t* foreign;/* NULL or pointer to a foreign key
constraint if this update node is used in
@@ -553,6 +568,12 @@ struct upd_node_t{
dtuple_t* row; /*!< NULL, or a copy (also fields copied to
heap) of the row to update; this must be reset
to NULL after a successful update */
+ dtuple_t* historical_row; /*!< historical row used in
+ CASCADE UPDATE/SET NULL;
+ allocated from historical_heap */
+ mem_heap_t* historical_heap; /*!< heap for historical row insertion;
+ created when row to update is located;
+ freed right before row update */
row_ext_t* ext; /*!< NULL, or prefixes of the externally
stored columns in the old row */
dtuple_t* upd_row;/* NULL, or a copy of the updated row */
@@ -567,6 +588,22 @@ struct upd_node_t{
/* column assignment list */
ulint magic_n;
+ /** Also set row_start = CURRENT_TIMESTAMP/trx->id
+ @param[in] trx transaction */
+ void make_versioned_update(const trx_t* trx);
+ /** Only set row_end = CURRENT_TIMESTAMP/trx->id.
+ Do not touch other fields at all.
+ @param[in] trx transaction */
+ void make_versioned_delete(const trx_t* trx);
+
+private:
+ /** Appends row_start or row_end field to update vector and sets a
+ CURRENT_TIMESTAMP/trx->id value to it.
+ Supposed to be called only by make_versioned_update() and
+ make_versioned_delete().
+ @param[in] trx transaction
+ @param[in] vers_sys_idx table->row_start or table->row_end */
+ void make_versioned_helper(const trx_t* trx, ulint idx);
};
#define UPD_NODE_MAGIC_N 1579975
diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic
index 364c876ecc7..5e43a272388 100644
--- a/storage/innobase/include/row0upd.ic
+++ b/storage/innobase/include/row0upd.ic
@@ -181,9 +181,8 @@ row_upd_rec_sys_fields(
offset = row_get_trx_id_offset(index, offsets);
}
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR"
-#endif
+ compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+
/* During IMPORT the trx id in the record can be in the
future, if the .ibd file is being imported from another
instance. During IMPORT roll_ptr will be 0. */
diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h
index 23c2e8546bc..9869a3acf95 100644
--- a/storage/innobase/include/row0vers.h
+++ b/storage/innobase/include/row0vers.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1997, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -42,33 +42,19 @@ class ReadView;
/** Determine if an active transaction has inserted or modified a secondary
index record.
+@param[in,out] caller_trx trx of current thread
@param[in] rec secondary index record
@param[in] index secondary index
@param[in] offsets rec_get_offsets(rec, index)
-@return the active transaction; trx_release_reference() must be invoked
+@return the active transaction; trx->release_reference() must be invoked
@retval NULL if the record was committed */
trx_t*
row_vers_impl_x_locked(
+ trx_t* caller_trx,
const rec_t* rec,
dict_index_t* index,
const ulint* offsets);
-/*****************************************************************//**
-Finds out if we must preserve a delete marked earlier version of a clustered
-index record, because it is >= the purge view.
-@param[in] trx_id transaction id in the version
-@param[in] name table name
-@param[in,out] mtr mini transaction holding the latch on the
- clustered index record; it will also hold
- the latch on purge_view
-@return TRUE if earlier version should be preserved */
-ibool
-row_vers_must_preserve_del_marked(
-/*==============================*/
- trx_id_t trx_id,
- const table_name_t& name,
- mtr_t* mtr);
-
/** Finds out if a version of the record, where the version >= the current
purge view, should have ientry as its secondary index entry. We check
if there is any not delete marked version of the record where the trx
@@ -134,6 +120,7 @@ which should be seen by a semi-consistent read. */
void
row_vers_build_for_semi_consistent_read(
/*====================================*/
+ trx_t* caller_trx,/*!<in/out: trx of current thread */
const rec_t* rec, /*!< in: record in a clustered index; the
caller must have a latch on the page; this
latch locks the top of the stack of versions
diff --git a/storage/innobase/include/srv0conc.h b/storage/innobase/include/srv0conc.h
index 9573c5add84..35937fe1204 100644
--- a/storage/innobase/include/srv0conc.h
+++ b/storage/innobase/include/srv0conc.h
@@ -1,6 +1,7 @@
/*****************************************************************************
Copyright (c) 2011, 2014, Oracle and/or its affiliates. All Rights Reserved.
+Copyright (c) 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -41,9 +42,7 @@ Created 2011/04/18 Sunny Bains
#define srv_conc_h
/** We are prepared for a situation that we have this many threads waiting for
-a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the
-value. */
-
+a semaphore inside InnoDB. srv_start() sets the value. */
extern ulint srv_max_n_threads;
/** The following controls how many threads we let inside InnoDB concurrently:
diff --git a/storage/innobase/include/srv0mon.h b/storage/innobase/include/srv0mon.h
index e4034f3a6ff..069ab5cf93a 100644
--- a/storage/innobase/include/srv0mon.h
+++ b/storage/innobase/include/srv0mon.h
@@ -2,7 +2,7 @@
Copyright (c) 2010, 2015, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2012, Facebook Inc.
-Copyright (c) 2013, 2017, MariaDB Corporation.
+Copyright (c) 2013, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it
under the terms of the GNU General Public License as published by the
@@ -503,18 +503,18 @@ extern ulint monitor_set_tbl[(NUM_MONITOR + NUM_BITS_ULINT - 1) /
/** Macros to turn on/off the control bit in monitor_set_tbl for a monitor
counter option. */
-#define MONITOR_ON(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] |= \
- ((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_ON(monitor) \
+ (monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] |= \
+ (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
-#define MONITOR_OFF(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] &= \
- ~((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_OFF(monitor) \
+ (monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] &= \
+ ~(ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
/** Check whether the requested monitor is turned on/off */
-#define MONITOR_IS_ON(monitor) \
- (monitor_set_tbl[monitor / NUM_BITS_ULINT] & \
- ((ulint)1 << (monitor % NUM_BITS_ULINT)))
+#define MONITOR_IS_ON(monitor) \
+ (monitor_set_tbl[unsigned(monitor) / NUM_BITS_ULINT] & \
+ (ulint(1) << (unsigned(monitor) % NUM_BITS_ULINT)))
/** The actual monitor counter array that records each monintor counter
value */
@@ -608,8 +608,9 @@ Use MONITOR_INC if appropriate mutex protection exists.
#define MONITOR_ATOMIC_INC_LOW(monitor, enabled) \
if (enabled) { \
ib_uint64_t value; \
- value = my_atomic_add64( \
- (int64*) &MONITOR_VALUE(monitor), 1) + 1; \
+ value = my_atomic_add64_explicit( \
+ (int64*) &MONITOR_VALUE(monitor), 1, \
+ MY_MEMORY_ORDER_RELAXED) + 1; \
/* Note: This is not 100% accurate because of the \
inherent race, we ignore it due to performance. */ \
if (value > (ib_uint64_t) MONITOR_MAX_VALUE(monitor)) { \
@@ -624,8 +625,9 @@ Use MONITOR_DEC if appropriate mutex protection exists.
#define MONITOR_ATOMIC_DEC_LOW(monitor, enabled) \
if (enabled) { \
ib_uint64_t value; \
- value = my_atomic_add64( \
- (int64*) &MONITOR_VALUE(monitor), -1) - 1; \
+ value = my_atomic_add64_explicit( \
+ (int64*) &MONITOR_VALUE(monitor), -1, \
+ MY_MEMORY_ORDER_RELAXED) - 1; \
/* Note: This is not 100% accurate because of the \
inherent race, we ignore it due to performance. */ \
if (value < (ib_uint64_t) MONITOR_MIN_VALUE(monitor)) { \
diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h
index ecd2914515d..422b8ef39e4 100644
--- a/storage/innobase/include/srv0srv.h
+++ b/storage/innobase/include/srv0srv.h
@@ -48,7 +48,6 @@ Created 10/10/1995 Heikki Tuuri
#include "mysql/psi/psi.h"
#include "univ.i"
-#include "log0log.h"
#include "os0event.h"
#include "que0types.h"
#include "trx0types.h"
@@ -81,7 +80,7 @@ struct srv_stats_t
lsn_ctr_1_t os_log_written;
/** Number of writes being done to the log files.
- Protected by log_sys->write_mutex. */
+ Protected by log_sys.write_mutex. */
ulint_ctr_1_t os_log_pending_writes;
/** We increase this counter, when we don't have enough
@@ -148,7 +147,7 @@ struct srv_stats_t
ulint_ctr_1_t n_lock_wait_count;
/** Number of threads currently waiting on database locks */
- simple_counter<ulint, true> n_lock_wait_current_count;
+ simple_atomic_counter<> n_lock_wait_current_count;
/** Number of rows read. */
ulint_ctr_64_t n_rows_read;
@@ -261,12 +260,6 @@ extern ulong srv_thread_sleep_delay;
/** Maximum sleep delay (in micro-seconds), value of 0 disables it.*/
extern ulong srv_adaptive_max_sleep_delay;
-/** The file format to use on new *.ibd files. */
-extern ulint srv_file_format;
-/** Whether to check file format during startup. A value of
-UNIV_FORMAT_MAX + 1 means no checking ie. FALSE. The default is to
-set it to the highest format we support. */
-extern ulint srv_max_file_format_at_startup;
/** Place locks to records only i.e. do not use next-key locking except
on duplicate key checking and foreign key checking */
extern ibool srv_locks_unsafe_for_binlog;
@@ -283,25 +276,12 @@ Currently we support native aio on windows and linux */
extern my_bool srv_use_native_aio;
extern my_bool srv_numa_interleave;
-/* Use trim operation */
-extern my_bool srv_use_trim;
-
/* Use atomic writes i.e disable doublewrite buffer */
extern my_bool srv_use_atomic_writes;
/* Compression algorithm*/
extern ulong innodb_compression_algorithm;
-/* Number of flush threads */
-#define MTFLUSH_MAX_WORKER 64
-#define MTFLUSH_DEFAULT_WORKER 8
-
-/* Number of threads used for multi-threaded flush */
-extern long srv_mtflush_threads;
-
-/* If this flag is TRUE, then we will use multi threaded flush. */
-extern my_bool srv_use_mtflush;
-
/** TRUE if the server was successfully started */
extern bool srv_was_started;
@@ -359,17 +339,15 @@ extern const ulint SRV_UNDO_TABLESPACE_SIZE_IN_PAGES;
extern char* srv_log_group_home_dir;
-/** Maximum number of srv_n_log_files, or innodb_log_files_in_group */
-#define SRV_N_LOG_FILES_MAX 100
extern ulong srv_n_log_files;
/** The InnoDB redo log file size, or 0 when changing the redo log format
at startup (while disallowing writes to the redo log). */
extern ulonglong srv_log_file_size;
-extern ulint srv_log_buffer_size;
+extern ulong srv_log_buffer_size;
extern ulong srv_flush_log_at_trx_commit;
extern uint srv_flush_log_at_timeout;
extern ulong srv_log_write_ahead_size;
-extern char srv_adaptive_flushing;
+extern my_bool srv_adaptive_flushing;
extern my_bool srv_flush_sync;
#ifdef WITH_INNODB_DISALLOW_WRITES
@@ -400,8 +378,6 @@ extern ulong srv_n_page_hash_locks;
/** Scan depth for LRU flush batch i.e.: number of blocks scanned*/
extern ulong srv_LRU_scan_depth;
/** Whether or not to flush neighbors of a block */
-extern ulong srv_buf_pool_dump_pct; /*!< dump that may % of each buffer
- pool during BP dump */
extern ulong srv_flush_neighbors;
/** Previously requested size */
extern ulint srv_buf_pool_old_size;
@@ -411,14 +387,18 @@ extern ulint srv_buf_pool_base_size;
extern ulint srv_buf_pool_curr_size;
/** Dump this % of each buffer pool during BP dump */
extern ulong srv_buf_pool_dump_pct;
+#ifdef UNIV_DEBUG
+/** Abort load after this amount of pages */
+extern ulong srv_buf_pool_load_pages_abort;
+#endif
/** Lock table size in bytes */
extern ulint srv_lock_table_size;
extern ulint srv_n_file_io_threads;
extern my_bool srv_random_read_ahead;
extern ulong srv_read_ahead_threshold;
-extern ulint srv_n_read_io_threads;
-extern ulint srv_n_write_io_threads;
+extern ulong srv_n_read_io_threads;
+extern ulong srv_n_write_io_threads;
/* Defragmentation, Origianlly facebook default value is 100, but it's too high */
#define SRV_DEFRAGMENT_FREQUENCY_DEFAULT 40
@@ -452,8 +432,6 @@ to treat NULL value when collecting statistics. It is not defined
as enum type because the configure option takes unsigned integer type. */
extern ulong srv_innodb_stats_method;
-extern char* srv_file_flush_method_str;
-
extern ulint srv_max_n_open_files;
extern ulong srv_n_page_cleaners;
@@ -488,7 +466,7 @@ extern my_bool srv_stats_include_delete_marked;
extern unsigned long long srv_stats_modified_counter;
extern my_bool srv_stats_sample_traditional;
-extern ibool srv_use_doublewrite_buf;
+extern my_bool srv_use_doublewrite_buf;
extern ulong srv_doublewrite_batch_size;
extern ulong srv_checksum_algorithm;
@@ -630,16 +608,16 @@ extern mysql_pfs_key_t trx_rollback_clean_thread_key;
schema */
# define pfs_register_thread(key) \
do { \
- struct PSI_thread* psi = PSI_THREAD_CALL(new_thread)(key, NULL, 0);\
+ struct PSI_thread* psi = PSI_CALL_new_thread(key, NULL, 0);\
/* JAN: TODO: MYSQL 5.7 PSI \
- PSI_THREAD_CALL(set_thread_os_id)(psi); */ \
- PSI_THREAD_CALL(set_thread)(psi); \
+ PSI_CALL_set_thread_os_id(psi); */ \
+ PSI_CALL_set_thread(psi); \
} while (0)
/* This macro delist the current thread from performance schema */
# define pfs_delete_thread() \
do { \
- PSI_THREAD_CALL(delete_current_thread)(); \
+ PSI_CALL_delete_current_thread(); \
} while (0)
# else
# define pfs_register_thread(key)
@@ -680,10 +658,9 @@ extern PSI_stage_info srv_stage_buffer_pool_load;
#endif /* HAVE_PSI_STAGE_INTERFACE */
-/** Alternatives for the file flush option in Unix; see the InnoDB manual
-about what these mean */
+/** Alternatives for innodb_flush_method */
enum srv_flush_t {
- SRV_FSYNC = 1, /*!< fsync, the default */
+ SRV_FSYNC = 0, /*!< fsync, the default */
SRV_O_DSYNC, /*!< open log files in O_SYNC mode */
SRV_LITTLESYNC, /*!< do not call os_file_flush()
when writing data files, but do flush
@@ -695,18 +672,21 @@ enum srv_flush_t {
the reason for which is that some FS
do not flush meta-data when
unbuffered IO happens */
- SRV_O_DIRECT_NO_FSYNC,
+ SRV_O_DIRECT_NO_FSYNC
/*!< do not use fsync() when using
direct IO i.e.: it can be set to avoid
the fsync() call that we make when
using SRV_UNIX_O_DIRECT. However, in
this case user/DBA should be sure about
the integrity of the meta-data */
- SRV_ALL_O_DIRECT_FSYNC
+#ifdef _WIN32
+ ,SRV_ALL_O_DIRECT_FSYNC
/*!< Traditional Windows appoach to open
all files without caching, and do FileFlushBuffers()*/
+#endif
};
-extern enum srv_flush_t srv_file_flush_method;
+/** innodb_flush_method */
+extern ulong srv_file_flush_method;
/** Alternatives for srv_force_recovery. Non-zero values are intended
to help the user get a damaged database up so that he can dump intact
@@ -943,16 +923,10 @@ srv_was_tablespace_truncated(const fil_space_t* space);
#ifdef UNIV_DEBUG
/** Disables master thread. It's used by:
SET GLOBAL innodb_master_thread_disabled_debug = 1 (0).
-@param[in] thd thread handle
-@param[in] var pointer to system variable
-@param[out] var_ptr where the formal string goes
@param[in] save immediate result from check function */
void
-srv_master_thread_disabled_debug_update(
- THD* thd,
- struct st_mysql_sys_var* var,
- void* var_ptr,
- const void* save);
+srv_master_thread_disabled_debug_update(THD*, st_mysql_sys_var*, void*,
+ const void* save);
#endif /* UNIV_DEBUG */
/** Status variables to be passed to MySQL */
@@ -968,6 +942,7 @@ struct export_var_t{
char innodb_buffer_pool_dump_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool dump status */
char innodb_buffer_pool_load_status[OS_FILE_MAX_PATH + 128];/*!< Buf pool load status */
char innodb_buffer_pool_resize_status[512];/*!< Buf pool resize status */
+ my_bool innodb_buffer_pool_load_incomplete;/*!< Buf pool load incomplete */
ulint innodb_buffer_pool_pages_total; /*!< Buffer pool size */
ulint innodb_buffer_pool_pages_data; /*!< Data pages */
ulint innodb_buffer_pool_bytes_data; /*!< File bytes used */
@@ -996,7 +971,7 @@ struct export_var_t{
ulint innodb_os_log_fsyncs; /*!< fil_n_log_flushes */
ulint innodb_os_log_pending_writes; /*!< srv_os_log_pending_writes */
ulint innodb_os_log_pending_fsyncs; /*!< fil_n_pending_log_flushes */
- ulint innodb_page_size; /*!< UNIV_PAGE_SIZE */
+ ulint innodb_page_size; /*!< srv_page_size */
ulint innodb_pages_created; /*!< buf_pool->stat.n_pages_created */
ulint innodb_pages_read; /*!< buf_pool->stat.n_pages_read*/
ulint innodb_page0_read; /*!< srv_stats.page0_read */
@@ -1033,6 +1008,9 @@ struct export_var_t{
ulint innodb_defragment_count; /*!< Number of defragment
operations*/
+ /** Number of instant ALTER TABLE operations that affect columns */
+ ulong innodb_instant_alter_column;
+
ulint innodb_onlineddl_rowlog_rows; /*!< Online alter rows */
ulint innodb_onlineddl_rowlog_pct_used; /*!< Online alter percentage
of used row log buffer */
diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h
index 3575f2e40b9..ee263f6c1f6 100644
--- a/storage/innobase/include/srv0start.h
+++ b/storage/innobase/include/srv0start.h
@@ -44,20 +44,16 @@ only one buffer pool instance is used. */
dberr_t
srv_undo_tablespaces_init(bool create_new_db);
-/****************************************************************//**
-Starts Innobase and creates a new database if database files
-are not found and the user wants.
+/** Start InnoDB.
+@param[in] create_new_db whether to create a new database
@return DB_SUCCESS or error code */
-dberr_t
-innobase_start_or_create_for_mysql();
+dberr_t srv_start(bool create_new_db);
/** Shut down InnoDB. */
-void
-innodb_shutdown();
+void innodb_shutdown();
/** Shut down background threads that can generate undo log. */
-void
-srv_shutdown_bg_undo_sources();
+void srv_shutdown_bg_undo_sources();
/*************************************************************//**
Copy the file path component of the physical file to parameter. It will
diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h
index 4104e594cf9..b3180c1779d 100644
--- a/storage/innobase/include/sync0arr.h
+++ b/storage/innobase/include/sync0arr.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2015, 2017, MariaDB Corporation.
+Copyright (c) 2015, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -99,16 +99,11 @@ void
sync_array_print(
FILE* file); /*!< in: file where to print */
-/**********************************************************************//**
-Create the primary system wait array(s), they are protected by an OS mutex */
-void
-sync_array_init(
- ulint n_threads); /*!< in: Number of slots to create */
+/** Create the primary system wait arrays */
+void sync_array_init();
-/**********************************************************************//**
-Close sync array wait sub-system. */
-void
-sync_array_close();
+/** Destroy the sync array wait sub-system. */
+void sync_array_close();
/**********************************************************************//**
Get an instance of the sync wait array. */
diff --git a/storage/innobase/include/sync0policy.h b/storage/innobase/include/sync0policy.h
index de27c87816c..daa08a91e1e 100644
--- a/storage/innobase/include/sync0policy.h
+++ b/storage/innobase/include/sync0policy.h
@@ -50,7 +50,7 @@ public:
m_mutex(),
m_filename(),
m_line(),
- m_thread_id(os_thread_id_t(ULINT_UNDEFINED))
+ m_thread_id(ULINT_UNDEFINED)
{
/* No op */
}
@@ -76,7 +76,8 @@ public:
{
m_mutex = mutex;
- m_thread_id = os_thread_get_curr_id();
+ my_atomic_storelint(&m_thread_id,
+ ulint(os_thread_get_curr_id()));
m_filename = filename;
@@ -89,7 +90,7 @@ public:
{
m_mutex = NULL;
- m_thread_id = os_thread_id_t(ULINT_UNDEFINED);
+ my_atomic_storelint(&m_thread_id, ULINT_UNDEFINED);
m_filename = NULL;
@@ -105,7 +106,7 @@ public:
msg << m_mutex->policy().to_string();
- if (os_thread_pf(m_thread_id) != ULINT_UNDEFINED) {
+ if (m_thread_id != ULINT_UNDEFINED) {
msg << " addr: " << m_mutex
<< " acquired: " << locked_from().c_str();
@@ -138,7 +139,7 @@ public:
unsigned m_line;
/** Thread ID of the thread that own(ed) the mutex */
- os_thread_id_t m_thread_id;
+ ulint m_thread_id;
};
/** Constructor. */
@@ -157,7 +158,7 @@ public:
/** Mutex is being destroyed. */
void destroy() UNIV_NOTHROW
{
- ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+ ut_ad((ulint)my_atomic_loadlint(&m_context.m_thread_id) == ULINT_UNDEFINED);
m_magic_n = 0;
@@ -167,8 +168,7 @@ public:
/** Called when the mutex is "created". Note: Not from the constructor
but when the mutex is initialised.
@param[in] id Mutex ID */
- void init(latch_id_t id)
- UNIV_NOTHROW;
+ void init(latch_id_t id) UNIV_NOTHROW;
/** Called when an attempt is made to lock the mutex
@param[in] mutex Mutex instance to be locked
@@ -199,7 +199,7 @@ public:
bool is_owned() const UNIV_NOTHROW
{
return(os_thread_eq(
- m_context.m_thread_id,
+ (os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id),
os_thread_get_curr_id()));
}
@@ -221,7 +221,7 @@ public:
os_thread_id_t get_thread_id() const
UNIV_NOTHROW
{
- return(m_context.m_thread_id);
+ return((os_thread_id_t)my_atomic_loadlint(&m_context.m_thread_id));
}
/** Magic number to check for memory corruption. */
@@ -241,7 +241,7 @@ struct NoPolicy {
void init(const Mutex&, latch_id_t, const char*, uint32_t)
UNIV_NOTHROW { }
void destroy() UNIV_NOTHROW { }
- void enter(const Mutex&, const char*, unsigned line) UNIV_NOTHROW { }
+ void enter(const Mutex&, const char*, unsigned) UNIV_NOTHROW { }
void add(uint32_t, uint32_t) UNIV_NOTHROW { }
void locked(const Mutex&, const char*, ulint) UNIV_NOTHROW { }
void release(const Mutex&) UNIV_NOTHROW { }
@@ -275,12 +275,11 @@ public:
/** Called when the mutex is "created". Note: Not from the constructor
but when the mutex is initialised.
- @param[in] mutex Mutex instance to track
@param[in] id Mutex ID
@param[in] filename File where mutex was created
@param[in] line Line in filename */
void init(
- const MutexType& mutex,
+ const Mutex&,
latch_id_t id,
const char* filename,
uint32_t line)
@@ -423,15 +422,8 @@ public:
/** Called when the mutex is "created". Note: Not from the constructor
but when the mutex is initialised.
- @param[in] mutex Mutex instance to track
- @param[in] id Mutex ID
- @param[in] filename File where mutex was created
- @param[in] line Line in filename */
- void init(
- const MutexType& mutex,
- latch_id_t id,
- const char* filename,
- uint32_t line)
+ @param[in] id Mutex ID */
+ void init(const Mutex&, latch_id_t id, const char*, uint32)
UNIV_NOTHROW
{
/* It can be LATCH_ID_BUF_BLOCK_MUTEX or
diff --git a/storage/innobase/include/sync0policy.ic b/storage/innobase/include/sync0policy.ic
index f3526bbfef5..a28e3c382b4 100644
--- a/storage/innobase/include/sync0policy.ic
+++ b/storage/innobase/include/sync0policy.ic
@@ -80,7 +80,7 @@ void MutexDebug<Mutex>::locked(
UNIV_NOTHROW
{
ut_ad(!is_owned());
- ut_ad(m_context.m_thread_id == os_thread_id_t(ULINT_UNDEFINED));
+ ut_ad(m_context.m_thread_id == ULINT_UNDEFINED);
m_context.locked(mutex, name, line);
@@ -88,7 +88,7 @@ void MutexDebug<Mutex>::locked(
}
template <typename Mutex>
-void MutexDebug<Mutex>::release(const Mutex* mutex)
+void MutexDebug<Mutex>::release(const Mutex*)
UNIV_NOTHROW
{
ut_ad(is_owned());
diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h
index 888a32007ce..b61553fc380 100644
--- a/storage/innobase/include/sync0rw.h
+++ b/storage/innobase/include/sync0rw.h
@@ -501,13 +501,13 @@ bool
rw_lock_lock_word_decr(
/*===================*/
rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount, /*!< in: amount to decrement */
- lint threshold); /*!< in: threshold of judgement */
+ int32_t amount, /*!< in: amount to decrement */
+ int32_t threshold); /*!< in: threshold of judgement */
#ifdef UNIV_DEBUG
/******************************************************************//**
Checks if the thread has locked the rw-lock in the specified mode, with
the pass value == 0. */
-ibool
+bool
rw_lock_own(
/*========*/
rw_lock_t* lock, /*!< in: rw-lock */
@@ -571,10 +571,10 @@ struct rw_lock_t
#endif /* UNIV_DEBUG */
{
/** Holds the state of the lock. */
- volatile lint lock_word;
+ int32_t lock_word;
/** 1: there are waiters */
- volatile uint32_t waiters;
+ int32_t waiters;
/** number of granted SX locks. */
volatile ulint sx_recursive;
@@ -603,9 +603,6 @@ struct rw_lock_t
/** File name where lock created */
const char* cfile_name;
- /** last s-lock file/line is not guaranteed to be correct */
- const char* last_s_file_name;
-
/** File name where last x-locked */
const char* last_x_file_name;
@@ -615,9 +612,6 @@ struct rw_lock_t
/** If 1 then the rw-lock is a block lock */
unsigned is_block_lock:1;
- /** Line number where last time s-locked */
- unsigned last_s_line:14;
-
/** Line number where last time x-locked */
unsigned last_x_line:14;
diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic
index 404c7cb9b86..f0c33ecbeda 100644
--- a/storage/innobase/include/sync0rw.ic
+++ b/storage/innobase/include/sync0rw.ic
@@ -2,7 +2,7 @@
Copyright (c) 1995, 2016, Oracle and/or its affiliates. All Rights Reserved.
Copyright (c) 2008, Google Inc.
-Copyright (c) 2017, MariaDB Corporation. All Rights Reserved.
+Copyright (c) 2017, 2018, MariaDB Corporation.
Portions of this file contain modifications contributed and copyrighted by
Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -77,7 +77,8 @@ rw_lock_get_writer(
/*===============*/
const rw_lock_t* lock) /*!< in: rw-lock */
{
- lint lock_word = lock->lock_word;
+ int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+ MY_MEMORY_ORDER_RELAXED);
ut_ad(lock_word <= X_LOCK_DECR);
if (lock_word > X_LOCK_HALF_DECR) {
@@ -109,15 +110,16 @@ rw_lock_get_reader_count(
/*=====================*/
const rw_lock_t* lock) /*!< in: rw-lock */
{
- lint lock_word = lock->lock_word;
+ int32_t lock_word = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+ MY_MEMORY_ORDER_RELAXED);
ut_ad(lock_word <= X_LOCK_DECR);
if (lock_word > X_LOCK_HALF_DECR) {
/* s-locked, no x-waiter */
- return(X_LOCK_DECR - lock_word);
+ return ulint(X_LOCK_DECR - lock_word);
} else if (lock_word > 0) {
/* s-locked, with sx-locks only */
- return(X_LOCK_HALF_DECR - lock_word);
+ return ulint(X_LOCK_HALF_DECR - lock_word);
} else if (lock_word == 0) {
/* x-locked */
return(0);
@@ -145,7 +147,8 @@ rw_lock_get_x_lock_count(
/*=====================*/
const rw_lock_t* lock) /*!< in: rw-lock */
{
- lint lock_copy = lock->lock_word;
+ int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+ MY_MEMORY_ORDER_RELAXED);
ut_ad(lock_copy <= X_LOCK_DECR);
if (lock_copy == 0 || lock_copy == -X_LOCK_HALF_DECR) {
@@ -158,12 +161,12 @@ rw_lock_get_x_lock_count(
/* no s-lock, no sx-lock, 2 or more x-locks.
First 2 x-locks are set with -X_LOCK_DECR,
all other recursive x-locks are set with -1 */
- return(2 - (lock_copy + X_LOCK_DECR));
+ return ulint(2 - X_LOCK_DECR - lock_copy);
} else {
/* no s-lock, 1 or more sx-lock, 2 or more x-locks.
First 2 x-locks are set with -(X_LOCK_DECR + X_LOCK_HALF_DECR),
all other recursive x-locks are set with -1 */
- return(2 - (lock_copy + X_LOCK_DECR + X_LOCK_HALF_DECR));
+ return ulint(2 - X_LOCK_DECR - X_LOCK_HALF_DECR - lock_copy);
}
}
@@ -178,7 +181,8 @@ rw_lock_get_sx_lock_count(
const rw_lock_t* lock) /*!< in: rw-lock */
{
#ifdef UNIV_DEBUG
- lint lock_copy = lock->lock_word;
+ int32_t lock_copy = my_atomic_load32_explicit(const_cast<int32_t*>(&lock->lock_word),
+ MY_MEMORY_ORDER_RELAXED);
ut_ad(lock_copy <= X_LOCK_DECR);
@@ -197,9 +201,7 @@ rw_lock_get_sx_lock_count(
}
/******************************************************************//**
-Two different implementations for decrementing the lock_word of a rw_lock:
-one for systems supporting atomic operations, one for others. This does
-does not support recusive x-locks: they should be handled by the caller and
+Recursive x-locks are not supported: they should be handled by the caller and
need not be atomic since they are performed by the current lock holder.
Returns true if the decrement was made, false if not.
@return true if decr occurs */
@@ -208,16 +210,17 @@ bool
rw_lock_lock_word_decr(
/*===================*/
rw_lock_t* lock, /*!< in/out: rw-lock */
- ulint amount, /*!< in: amount to decrement */
- lint threshold) /*!< in: threshold of judgement */
+ int32_t amount, /*!< in: amount to decrement */
+ int32_t threshold) /*!< in: threshold of judgement */
{
- lint local_lock_word;
-
- local_lock_word = lock->lock_word;
- while (local_lock_word > threshold) {
- if (my_atomic_caslint(&lock->lock_word,
- &local_lock_word,
- local_lock_word - amount)) {
+ int32_t lock_copy = my_atomic_load32_explicit(&lock->lock_word,
+ MY_MEMORY_ORDER_RELAXED);
+ while (lock_copy > threshold) {
+ if (my_atomic_cas32_strong_explicit(&lock->lock_word,
+ &lock_copy,
+ lock_copy - amount,
+ MY_MEMORY_ORDER_ACQUIRE,
+ MY_MEMORY_ORDER_RELAXED)) {
return(true);
}
}
@@ -246,11 +249,6 @@ rw_lock_s_lock_low(
ut_d(rw_lock_add_debug_info(lock, pass, RW_LOCK_S, file_name, line));
- /* These debugging values are not set safely: they may be incorrect
- or even refer to a line that is invalid for the file name. */
- lock->last_s_file_name = file_name;
- lock->last_s_line = line;
-
return(TRUE); /* locking succeeded */
}
@@ -304,29 +302,32 @@ rw_lock_x_lock_func_nowait(
const char* file_name,/*!< in: file name where lock requested */
unsigned line) /*!< in: line where requested */
{
- lint oldval = X_LOCK_DECR;
+ int32_t oldval = X_LOCK_DECR;
- if (my_atomic_caslint(&lock->lock_word, &oldval, 0)) {
+ if (my_atomic_cas32_strong_explicit(&lock->lock_word, &oldval, 0,
+ MY_MEMORY_ORDER_ACQUIRE,
+ MY_MEMORY_ORDER_RELAXED)) {
lock->writer_thread = os_thread_get_curr_id();
} else if (os_thread_eq(lock->writer_thread, os_thread_get_curr_id())) {
- /* Relock: this lock_word modification is safe since no other
- threads can modify (lock, unlock, or reserve) lock_word while
- there is an exclusive writer and this is the writer thread. */
- if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
+ /* Relock: even though no other thread can modify (lock, unlock
+ or reserve) lock_word while there is an exclusive writer and
+ this is the writer thread, we still want concurrent threads to
+ observe consistent values. */
+ if (oldval == 0 || oldval == -X_LOCK_HALF_DECR) {
/* There are 1 x-locks */
- lock->lock_word -= X_LOCK_DECR;
- } else if (lock->lock_word <= -X_LOCK_DECR) {
+ my_atomic_add32_explicit(&lock->lock_word, -X_LOCK_DECR,
+ MY_MEMORY_ORDER_RELAXED);
+ } else if (oldval <= -X_LOCK_DECR) {
/* There are 2 or more x-locks */
- lock->lock_word--;
+ my_atomic_add32_explicit(&lock->lock_word, -1,
+ MY_MEMORY_ORDER_RELAXED);
+ /* Watch for too many recursive locks */
+ ut_ad(oldval < 1);
} else {
/* Failure */
return(FALSE);
}
-
- /* Watch for too many recursive locks */
- ut_ad(lock->lock_word < 0);
-
} else {
/* Failure */
return(FALSE);
@@ -354,14 +355,19 @@ rw_lock_s_unlock_func(
#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad(lock->lock_word > -X_LOCK_DECR);
- ut_ad(lock->lock_word != 0);
- ut_ad(lock->lock_word < X_LOCK_DECR);
+#ifdef UNIV_DEBUG
+ int32_t dbg_lock_word = my_atomic_load32_explicit(&lock->lock_word,
+ MY_MEMORY_ORDER_RELAXED);
+ ut_ad(dbg_lock_word > -X_LOCK_DECR);
+ ut_ad(dbg_lock_word != 0);
+ ut_ad(dbg_lock_word < X_LOCK_DECR);
+#endif
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_S));
/* Increment lock_word to indicate 1 less reader */
- lint lock_word = my_atomic_addlint(&lock->lock_word, 1) + 1;
+ int32_t lock_word = my_atomic_add32_explicit(&lock->lock_word, 1,
+ MY_MEMORY_ORDER_RELEASE) + 1;
if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
/* wait_ex waiter exists. It may not be asleep, but we signal
@@ -387,41 +393,49 @@ rw_lock_x_unlock_func(
#endif /* UNIV_DEBUG */
rw_lock_t* lock) /*!< in/out: rw-lock */
{
- ut_ad(lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR
- || lock->lock_word <= -X_LOCK_DECR);
+ int32_t lock_word = my_atomic_load32_explicit(&lock->lock_word,
+ MY_MEMORY_ORDER_RELAXED);
- if (lock->lock_word == 0) {
+ ut_ad(lock_word == 0 || lock_word == -X_LOCK_HALF_DECR
+ || lock_word <= -X_LOCK_DECR);
+
+ if (lock_word == 0) {
/* Last caller in a possible recursive chain. */
lock->writer_thread = 0;
}
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_X));
- if (lock->lock_word == 0 || lock->lock_word == -X_LOCK_HALF_DECR) {
- /* There is 1 x-lock */
- /* atomic increment is needed, because it is last */
- if (my_atomic_addlint(&lock->lock_word, X_LOCK_DECR) <= -X_LOCK_DECR) {
- ut_error;
- }
+ if (lock_word == 0 || lock_word == -X_LOCK_HALF_DECR) {
+ /* Last X-lock owned by this thread, it may still hold SX-locks.
+ ACQ_REL due to...
+ RELEASE: we release rw-lock
+ ACQUIRE: we want waiters to be loaded after lock_word is stored */
+ my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+ MY_MEMORY_ORDER_ACQ_REL);
/* This no longer has an X-lock but it may still have
an SX-lock. So it is now free for S-locks by other threads.
We need to signal read/write waiters.
We do not need to signal wait_ex waiters, since they cannot
exist when there is a writer. */
- if (lock->waiters) {
- my_atomic_store32((int32*) &lock->waiters, 0);
+ if (my_atomic_load32_explicit(&lock->waiters,
+ MY_MEMORY_ORDER_RELAXED)) {
+ my_atomic_store32_explicit(&lock->waiters, 0,
+ MY_MEMORY_ORDER_RELAXED);
os_event_set(lock->event);
sync_array_object_signalled();
}
- } else if (lock->lock_word == -X_LOCK_DECR
- || lock->lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
+ } else if (lock_word == -X_LOCK_DECR
+ || lock_word == -(X_LOCK_DECR + X_LOCK_HALF_DECR)) {
/* There are 2 x-locks */
- lock->lock_word += X_LOCK_DECR;
+ my_atomic_add32_explicit(&lock->lock_word, X_LOCK_DECR,
+ MY_MEMORY_ORDER_RELAXED);
} else {
/* There are more than 2 x-locks. */
- ut_ad(lock->lock_word < -X_LOCK_DECR);
- lock->lock_word += 1;
+ ut_ad(lock_word < -X_LOCK_DECR);
+ my_atomic_add32_explicit(&lock->lock_word, 1,
+ MY_MEMORY_ORDER_RELAXED);
}
ut_ad(rw_lock_validate(lock));
@@ -447,28 +461,37 @@ rw_lock_sx_unlock_func(
ut_d(rw_lock_remove_debug_info(lock, pass, RW_LOCK_SX));
if (lock->sx_recursive == 0) {
+ int32_t lock_word = my_atomic_load32_explicit(&lock->lock_word,
+ MY_MEMORY_ORDER_RELAXED);
/* Last caller in a possible recursive chain. */
- if (lock->lock_word > 0) {
+ if (lock_word > 0) {
lock->writer_thread = 0;
+ ut_ad(lock_word <= INT_MAX32 - X_LOCK_HALF_DECR);
+
+ /* Last SX-lock owned by this thread, doesn't own X-lock.
+ ACQ_REL due to...
+ RELEASE: we release rw-lock
+ ACQUIRE: we want waiters to be loaded after lock_word is stored */
+ my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+ MY_MEMORY_ORDER_ACQ_REL);
- if (my_atomic_addlint(&lock->lock_word, X_LOCK_HALF_DECR) <= 0) {
- ut_error;
- }
/* Lock is now free. May have to signal read/write
waiters. We do not need to signal wait_ex waiters,
since they cannot exist when there is an sx-lock
holder. */
- if (lock->waiters) {
- my_atomic_store32((int32*) &lock->waiters, 0);
+ if (my_atomic_load32_explicit(&lock->waiters,
+ MY_MEMORY_ORDER_RELAXED)) {
+ my_atomic_store32_explicit(&lock->waiters, 0,
+ MY_MEMORY_ORDER_RELAXED);
os_event_set(lock->event);
sync_array_object_signalled();
}
} else {
/* still has x-lock */
- ut_ad(lock->lock_word == -X_LOCK_HALF_DECR
- || lock->lock_word <= -(X_LOCK_DECR
- + X_LOCK_HALF_DECR));
- lock->lock_word += X_LOCK_HALF_DECR;
+ ut_ad(lock_word == -X_LOCK_HALF_DECR ||
+ lock_word <= -(X_LOCK_DECR + X_LOCK_HALF_DECR));
+ my_atomic_add32_explicit(&lock->lock_word, X_LOCK_HALF_DECR,
+ MY_MEMORY_ORDER_RELAXED);
}
}
diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h
index 3c40591e873..ffa682b46db 100644
--- a/storage/innobase/include/sync0sync.h
+++ b/storage/innobase/include/sync0sync.h
@@ -57,7 +57,6 @@ extern mysql_pfs_key_t buf_pool_zip_mutex_key;
extern mysql_pfs_key_t cache_last_read_mutex_key;
extern mysql_pfs_key_t dict_foreign_err_mutex_key;
extern mysql_pfs_key_t dict_sys_mutex_key;
-extern mysql_pfs_key_t file_format_max_mutex_key;
extern mysql_pfs_key_t fil_system_mutex_key;
extern mysql_pfs_key_t flush_list_mutex_key;
extern mysql_pfs_key_t fts_bg_threads_mutex_key;
@@ -95,7 +94,6 @@ extern mysql_pfs_key_t srv_innodb_monitor_mutex_key;
extern mysql_pfs_key_t srv_misc_tmpfile_mutex_key;
extern mysql_pfs_key_t srv_monitor_file_mutex_key;
extern mysql_pfs_key_t buf_dblwr_mutex_key;
-extern mysql_pfs_key_t trx_undo_mutex_key;
extern mysql_pfs_key_t trx_mutex_key;
extern mysql_pfs_key_t trx_pool_mutex_key;
extern mysql_pfs_key_t trx_pool_manager_mutex_key;
@@ -110,6 +108,7 @@ extern mysql_pfs_key_t sync_array_mutex_key;
extern mysql_pfs_key_t thread_mutex_key;
extern mysql_pfs_key_t zip_pad_mutex_key;
extern mysql_pfs_key_t row_drop_list_mutex_key;
+extern mysql_pfs_key_t rw_trx_hash_element_mutex_key;
#endif /* UNIV_PFS_MUTEX */
#ifdef UNIV_PFS_RWLOCK
diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h
index 19f992f5f50..773c610d30e 100644
--- a/storage/innobase/include/sync0types.h
+++ b/storage/innobase/include/sync0types.h
@@ -108,16 +108,6 @@ V
Transaction system header
|
V
-Transaction undo mutex The undo log entry must be written
-| before any index page is modified.
-| Transaction undo mutex is for the undo
-| logs the analogue of the tree latch
-| for a B-tree. If a thread has the
-| trx undo mutex reserved, it is allowed
-| to latch the undo log pages in any
-| order, and also after it has acquired
-| the fsp latch.
-V
Rollback segment mutex The rollback segment mutex must be
| reserved, if, e.g., a new page must
| be added to an undo log. The rollback
@@ -160,7 +150,7 @@ V
lock_sys_mutex Mutex protecting lock_sys_t
|
V
-trx_sys->mutex Mutex protecting trx_sys_t
+trx_sys.mutex Mutex protecting trx_sys_t
|
V
Threads mutex Background thread scheduling mutex
@@ -233,6 +223,7 @@ enum latch_level_t {
SYNC_REC_LOCK,
SYNC_THREADS,
SYNC_TRX,
+ SYNC_RW_TRX_HASH_ELEMENT,
SYNC_TRX_SYS,
SYNC_LOCK_SYS,
SYNC_LOCK_WAIT_SYS,
@@ -255,7 +246,6 @@ enum latch_level_t {
SYNC_RSEG_HEADER_NEW,
SYNC_NOREDO_RSEG,
SYNC_REDO_RSEG,
- SYNC_TRX_UNDO,
SYNC_PURGE_LATCH,
SYNC_TREE_NODE,
SYNC_TREE_NODE_FROM_HASH,
@@ -270,8 +260,6 @@ enum latch_level_t {
SYNC_DICT,
SYNC_FTS_CACHE,
- SYNC_FILE_FORMAT_TAG,
-
SYNC_DICT_OPERATION,
SYNC_TRX_I_S_LAST_READ,
@@ -339,7 +327,6 @@ enum latch_id_t {
LATCH_ID_SRV_MISC_TMPFILE,
LATCH_ID_SRV_MONITOR_FILE,
LATCH_ID_BUF_DBLWR,
- LATCH_ID_TRX_UNDO,
LATCH_ID_TRX_POOL,
LATCH_ID_TRX_POOL_MANAGER,
LATCH_ID_TRX,
@@ -380,12 +367,11 @@ enum latch_id_t {
LATCH_ID_SCRUB_STAT_MUTEX,
LATCH_ID_DEFRAGMENT_MUTEX,
LATCH_ID_BTR_DEFRAGMENT_MUTEX,
- LATCH_ID_MTFLUSH_THREAD_MUTEX,
- LATCH_ID_MTFLUSH_MUTEX,
LATCH_ID_FIL_CRYPT_MUTEX,
LATCH_ID_FIL_CRYPT_STAT_MUTEX,
LATCH_ID_FIL_CRYPT_DATA_MUTEX,
LATCH_ID_FIL_CRYPT_THREADS_MUTEX,
+ LATCH_ID_RW_TRX_HASH_ELEMENT,
LATCH_ID_TEST_MUTEX,
LATCH_ID_MAX = LATCH_ID_TEST_MUTEX
};
@@ -493,10 +479,10 @@ struct OSMutex {
}
private:
-#ifdef UNIV_DEBUG
+#ifdef DBUG_ASSERT_EXISTS
/** true if the mutex has been freed/destroyed. */
bool m_freed;
-#endif /* UNIV_DEBUG */
+#endif /* DBUG_ASSERT_EXISTS */
sys_mutex_t m_mutex;
};
@@ -993,8 +979,7 @@ struct latch_t {
UNIV_NOTHROW
:
m_id(id),
- m_rw_lock(),
- m_temp_fsp() { }
+ m_rw_lock() {}
/** Destructor */
virtual ~latch_t() UNIV_NOTHROW { }
@@ -1028,24 +1013,6 @@ struct latch_t {
return(sync_latch_get_level(m_id));
}
- /** @return true if the latch is for a temporary file space*/
- bool is_temp_fsp() const
- UNIV_NOTHROW
- {
- return(m_temp_fsp);
- }
-
- /** Set the temporary tablespace flag. (For internal temporary
- tables, MySQL 5.7 does not always acquire the index->lock. We
- need to figure out the context and add some special rules
- during the checks.) */
- void set_temp_fsp()
- UNIV_NOTHROW
- {
- ut_ad(get_id() == LATCH_ID_FIL_SPACE);
- m_temp_fsp = true;
- }
-
/** @return the latch name, m_id must be set */
const char* get_name() const
UNIV_NOTHROW
@@ -1061,9 +1028,6 @@ struct latch_t {
/** true if it is a rw-lock. In debug mode, rw_lock_t derives from
this class and sets this variable. */
bool m_rw_lock;
-
- /** true if it is an temporary space latch */
- bool m_temp_fsp;
};
/** Subclass this to iterate over a thread's acquired latch levels. */
@@ -1155,92 +1119,88 @@ enum rw_lock_flag_t {
#endif /* UNIV_INNOCHECKSUM */
-#ifdef _WIN64
static inline ulint my_atomic_addlint(ulint *A, ulint B)
{
+#ifdef _WIN64
return ulint(my_atomic_add64((volatile int64*)A, B));
+#else
+ return ulint(my_atomic_addlong(A, B));
+#endif
}
static inline ulint my_atomic_loadlint(const ulint *A)
{
+#ifdef _WIN64
return ulint(my_atomic_load64((volatile int64*)A));
+#else
+ return ulint(my_atomic_loadlong(A));
+#endif
}
static inline lint my_atomic_addlint(volatile lint *A, lint B)
{
+#ifdef _WIN64
return my_atomic_add64((volatile int64*)A, B);
+#else
+ return my_atomic_addlong(A, B);
+#endif
}
static inline lint my_atomic_loadlint(const lint *A)
{
+#ifdef _WIN64
return lint(my_atomic_load64((volatile int64*)A));
+#else
+ return my_atomic_loadlong(A);
+#endif
}
static inline void my_atomic_storelint(ulint *A, ulint B)
{
+#ifdef _WIN64
my_atomic_store64((volatile int64*)A, B);
+#else
+ my_atomic_storelong(A, B);
+#endif
}
-static inline lint my_atomic_caslint(volatile lint *A, lint *B, lint C)
+/** Simple non-atomic counter aligned to CACHE_LINE_SIZE
+@tparam Type the integer type of the counter */
+template <typename Type>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
{
- return my_atomic_cas64((volatile int64*)A, (int64 *)B, C);
-}
+ /** Increment the counter */
+ Type inc() { return add(1); }
+ /** Decrement the counter */
+ Type dec() { return add(Type(~0)); }
-static inline ulint my_atomic_caslint(ulint *A, ulint *B, ulint C)
-{
- return my_atomic_cas64((volatile int64*)A, (int64 *)B, (int64)C);
-}
+ /** Add to the counter
+ @param[in] i amount to be added
+ @return the value of the counter after adding */
+ Type add(Type i) { return m_counter += i; }
-#else
-#define my_atomic_addlint my_atomic_addlong
-#define my_atomic_loadlint my_atomic_loadlong
-#define my_atomic_caslint my_atomic_caslong
-#endif
+ /** @return the value of the counter */
+ operator Type() const { return m_counter; }
-/** Simple counter aligned to CACHE_LINE_SIZE
-@tparam Type the integer type of the counter
-@tparam atomic whether to use atomic memory access */
-template <typename Type = ulint, bool atomic = false>
-struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_counter
+private:
+ /** The counter */
+ Type m_counter;
+};
+
+/** Simple atomic counter aligned to CACHE_LINE_SIZE
+@tparam Type lint or ulint */
+template <typename Type = ulint>
+struct MY_ALIGNED(CPU_LEVEL1_DCACHE_LINESIZE) simple_atomic_counter
{
/** Increment the counter */
Type inc() { return add(1); }
/** Decrement the counter */
- Type dec() { return sub(1); }
+ Type dec() { return add(Type(~0)); }
/** Add to the counter
@param[in] i amount to be added
- @return the value of the counter after adding */
- Type add(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
- if (atomic) {
-#ifdef _MSC_VER
-// Suppress type conversion/ possible loss of data warning
-#pragma warning (push)
-#pragma warning (disable : 4244)
-#endif
- return Type(my_atomic_addlint(reinterpret_cast<ulint*>
- (&m_counter), i));
-#ifdef _MSC_VER
-#pragma warning (pop)
-#endif
- } else {
- return m_counter += i;
- }
- }
- /** Subtract from the counter
- @param[in] i amount to be subtracted
- @return the value of the counter after adding */
- Type sub(Type i)
- {
- compile_time_assert(!atomic || sizeof(Type) == sizeof(lint));
- if (atomic) {
- return Type(my_atomic_addlint(&m_counter, -lint(i)));
- } else {
- return m_counter -= i;
- }
- }
+ @return the value of the counter before adding */
+ Type add(Type i) { return my_atomic_addlint(&m_counter, i); }
/** @return the value of the counter (non-atomic access)! */
operator Type() const { return m_counter; }
diff --git a/storage/innobase/include/trx0i_s.h b/storage/innobase/include/trx0i_s.h
index e02c5d88a29..ee7da7b74dc 100644
--- a/storage/innobase/include/trx0i_s.h
+++ b/storage/innobase/include/trx0i_s.h
@@ -264,10 +264,10 @@ trx_i_s_possibly_fetch_data_into_cache(
trx_i_s_cache_t* cache); /*!< in/out: cache */
/*******************************************************************//**
-Returns TRUE if the data in the cache is truncated due to the memory
+Returns true, if the data in the cache is truncated due to the memory
limit posed by TRX_I_S_MEM_LIMIT.
@return TRUE if truncated */
-ibool
+bool
trx_i_s_cache_is_truncated(
/*=======================*/
trx_i_s_cache_t* cache); /*!< in: cache */
diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h
index 8d31a50f028..27807321212 100644
--- a/storage/innobase/include/trx0purge.h
+++ b/storage/innobase/include/trx0purge.h
@@ -27,14 +27,8 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0purge_h
#define trx0purge_h
-#include "univ.i"
-#include "trx0types.h"
-#include "mtr0mtr.h"
-#include "trx0sys.h"
+#include "trx0rseg.h"
#include "que0types.h"
-#include "page0page.h"
-#include "fil0fil.h"
-#include "read0types.h"
/** A dummy undo record used as a return value when we have a whole undo log
which needs no purge */
@@ -50,16 +44,13 @@ trx_purge_get_log_from_hist(
/*========================*/
fil_addr_t node_addr); /*!< in: file address of the history
list node of the log */
-/************************************************************************
-Adds the update undo log as the first log in the history list. Removes the
-update undo log segment from the rseg slot if it is too big for reuse. */
+/** Prepend the history list with an undo log.
+Remove the undo log segment from the rseg slot if it is too big for reuse.
+@param[in] trx transaction
+@param[in,out] undo undo log
+@param[in,out] mtr mini-transaction */
void
-trx_purge_add_update_undo_to_history(
-/*=================================*/
- trx_t* trx, /*!< in: transaction */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
+trx_purge_add_undo_to_history(const trx_t* trx, trx_undo_t*& undo, mtr_t* mtr);
/*******************************************************************//**
This function runs a purge batch.
@return number of undo log pages handled in the batch */
@@ -68,35 +59,7 @@ trx_purge(
/*======*/
ulint n_purge_threads, /*!< in: number of purge tasks to
submit to task queue. */
- ulint limit, /*!< in: the maximum number of
- records to purge in one batch */
bool truncate); /*!< in: truncate history if true */
-/*******************************************************************//**
-Stop purge and wait for it to stop, move to PURGE_STATE_STOP. */
-void
-trx_purge_stop(void);
-/*================*/
-/*******************************************************************//**
-Resume purge, move to PURGE_STATE_RUN. */
-void
-trx_purge_run(void);
-/*================*/
-
-/** Purge states */
-enum purge_state_t {
- PURGE_STATE_INIT, /*!< Purge instance created */
- PURGE_STATE_RUN, /*!< Purge should be running */
- PURGE_STATE_STOP, /*!< Purge should be stopped */
- PURGE_STATE_EXIT, /*!< Purge has been shutdown */
- PURGE_STATE_DISABLED /*!< Purge was never started */
-};
-
-/*******************************************************************//**
-Get the purge state.
-@return purge state. */
-purge_state_t
-trx_purge_state(void);
-/*=================*/
/** Rollback segements from a given transaction with trx-no
scheduled for purge. */
@@ -106,69 +69,28 @@ private:
trx_rsegs_t;
public:
typedef trx_rsegs_t::iterator iterator;
+ typedef trx_rsegs_t::const_iterator const_iterator;
/** Default constructor */
- TrxUndoRsegs() : m_trx_no() { }
-
- explicit TrxUndoRsegs(trx_id_t trx_no)
- :
- m_trx_no(trx_no)
- {
- // Do nothing
- }
-
- /** Get transaction number
- @return trx_id_t - get transaction number. */
- trx_id_t get_trx_no() const
- {
- return(m_trx_no);
- }
-
- /** Add rollback segment.
- @param rseg rollback segment to add. */
- void push_back(trx_rseg_t* rseg)
- {
- m_rsegs.push_back(rseg);
- }
-
- /** Erase the element pointed by given iterator.
- @param[in] iterator iterator */
- void erase(iterator& it)
- {
- m_rsegs.erase(it);
- }
-
- /** Number of registered rsegs.
- @return size of rseg list. */
- ulint size() const
- {
- return(m_rsegs.size());
- }
-
- /**
- @return an iterator to the first element */
- iterator begin()
- {
- return(m_rsegs.begin());
- }
-
- /**
- @return an iterator to the end */
- iterator end()
- {
- return(m_rsegs.end());
- }
+ TrxUndoRsegs() {}
+ /** Constructor */
+ TrxUndoRsegs(trx_rseg_t& rseg)
+ : m_commit(rseg.last_commit), m_rsegs(1, &rseg) {}
+ /** Constructor */
+ TrxUndoRsegs(trx_id_t trx_no, trx_rseg_t& rseg)
+ : m_commit(trx_no << 1), m_rsegs(1, &rseg) {}
- /** Append rollback segments from referred instance to current
- instance. */
- void append(const TrxUndoRsegs& append_from)
- {
- ut_ad(get_trx_no() == append_from.get_trx_no());
+ /** @return the transaction commit identifier */
+ trx_id_t trx_no() const { return m_commit >> 1; }
- m_rsegs.insert(m_rsegs.end(),
- append_from.m_rsegs.begin(),
- append_from.m_rsegs.end());
- }
+ bool operator!=(const TrxUndoRsegs& other) const
+ { return m_commit != other.m_commit; }
+ bool empty() const { return m_rsegs.empty(); }
+ void erase(iterator& it) { m_rsegs.erase(it); }
+ iterator begin() { return(m_rsegs.begin()); }
+ iterator end() { return(m_rsegs.end()); }
+ const_iterator begin() const { return m_rsegs.begin(); }
+ const_iterator end() const { return m_rsegs.end(); }
/** Compare two TrxUndoRsegs based on trx_no.
@param elem1 first element to compare
@@ -176,17 +98,12 @@ public:
@return true if elem1 > elem2 else false.*/
bool operator()(const TrxUndoRsegs& lhs, const TrxUndoRsegs& rhs)
{
- return(lhs.m_trx_no > rhs.m_trx_no);
+ return(lhs.m_commit > rhs.m_commit);
}
- /** Compiler defined copy-constructor/assignment operator
- should be fine given that there is no reference to a memory
- object outside scope of class object.*/
-
private:
- /** The rollback segments transaction number. */
- trx_id_t m_trx_no;
-
+ /** Copy trx_rseg_t::last_commit */
+ trx_id_t m_commit;
/** Rollback segments of a transaction, scheduled for purge. */
trx_rsegs_t m_rsegs;
};
@@ -196,16 +113,14 @@ typedef std::priority_queue<
std::vector<TrxUndoRsegs, ut_allocator<TrxUndoRsegs> >,
TrxUndoRsegs> purge_pq_t;
-/**
-Chooses the rollback segment with the smallest trx_no. */
+/** Chooses the rollback segment with the oldest committed transaction */
struct TrxUndoRsegsIterator {
-
/** Constructor */
TrxUndoRsegsIterator();
-
/** Sets the next rseg to purge in purge_sys.
+ Executed in the purge coordinator thread.
@return whether anything is to be purged */
- bool set_next();
+ inline bool set_next();
private:
// Disable copying
@@ -213,38 +128,11 @@ private:
TrxUndoRsegsIterator& operator=(const TrxUndoRsegsIterator&);
/** The current element to process */
- TrxUndoRsegs m_trx_undo_rsegs;
-
- /** Track the current element in m_trx_undo_rseg */
- TrxUndoRsegs::iterator m_iter;
-
- /** Sentinel value */
- static const TrxUndoRsegs NullElement;
+ TrxUndoRsegs m_rsegs;
+ /** Track the current element in m_rsegs */
+ TrxUndoRsegs::const_iterator m_iter;
};
-/** This is the purge pointer/iterator. We need both the undo no and the
-transaction no up to which purge has parsed and applied the records. */
-struct purge_iter_t {
- purge_iter_t()
- :
- trx_no(),
- undo_no(),
- undo_rseg_space(ULINT_UNDEFINED)
- {
- // Do nothing
- }
-
- trx_id_t trx_no; /*!< Purge has advanced past all
- transactions whose number is less
- than this */
- undo_no_t undo_no; /*!< Purge has advanced past all records
- whose undo number is less than this */
- ulint undo_rseg_space;
- /*!< Last undo record resided in this
- space id. */
-};
-
-
/* Namespace to hold all the related functions and variables need for truncate
of undo tablespace. */
namespace undo {
@@ -269,17 +157,12 @@ namespace undo {
/** Track UNDO tablespace mark for truncate. */
class Truncate {
public:
-
- Truncate()
- :
- m_undo_for_trunc(ULINT_UNDEFINED),
- m_rseg_for_trunc(),
- m_scan_start(1),
- m_purge_rseg_truncate_frequency(
- static_cast<ulint>(
- srv_purge_rseg_truncate_frequency))
+ void create()
{
- /* Do Nothing. */
+ m_undo_for_trunc = ULINT_UNDEFINED;
+ m_scan_start = 1;
+ m_purge_rseg_truncate_frequency =
+ ulint(srv_purge_rseg_truncate_frequency);
}
/** Clear the cached rollback segment. Normally done
@@ -450,53 +333,58 @@ namespace undo {
class purge_sys_t
{
public:
- /** Construct the purge system. */
- purge_sys_t();
- /** Destruct the purge system. */
- ~purge_sys_t();
-
- rw_lock_t latch; /*!< The latch protecting the purge
- view. A purge operation must acquire an
- x-latch here for the instant at which
- it changes the purge view: an undo
- log operation can prevent this by
- obtaining an s-latch here. It also
- protects state and running */
- os_event_t event; /*!< State signal event;
- os_event_set() and os_event_reset()
- are protected by purge_sys_t::latch
- X-lock */
- ulint n_stop; /*!< Counter to track number stops */
- volatile bool running; /*!< true, if purge is active,
- we check this without the latch too */
- volatile purge_state_t state; /*!< Purge coordinator thread states,
- we check this in several places
- without holding the latch. */
+ /** signal state changes; os_event_reset() and os_event_set()
+ are protected by rw_lock_x_lock(latch) */
+ MY_ALIGNED(CACHE_LINE_SIZE)
+ os_event_t event;
+ /** latch protecting view, m_enabled */
+ MY_ALIGNED(CACHE_LINE_SIZE)
+ rw_lock_t latch;
+private:
+ /** whether purge is enabled; protected by latch and my_atomic */
+ int32_t m_enabled;
+ /** number of pending stop() calls without resume() */
+ int32_t m_paused;
+public:
que_t* query; /*!< The query graph which will do the
parallelized purge operation */
+ MY_ALIGNED(CACHE_LINE_SIZE)
ReadView view; /*!< The purge will not remove undo logs
which are >= this view (purge view) */
- ulint n_submitted; /*!< Count of total tasks submitted
- to the task queue */
- ulint n_completed; /*!< Count of total tasks completed */
-
- /*------------------------------*/
- /* The following two fields form the 'purge pointer' which advances
- during a purge, and which is used in history list truncation */
-
- purge_iter_t iter; /* Limit up to which we have read and
- parsed the UNDO log records. Not
- necessarily purged from the indexes.
- Note that this can never be less than
- the limit below, we check for this
- invariant in trx0purge.cc */
- purge_iter_t limit; /* The 'purge pointer' which advances
- during a purge, and which is used in
- history list truncation */
-#ifdef UNIV_DEBUG
- purge_iter_t done; /* Indicate 'purge pointer' which have
- purged already accurately. */
-#endif /* UNIV_DEBUG */
+ /** Total number of tasks submitted by srv_purge_coordinator_thread.
+ Not accessed by other threads. */
+ ulint n_submitted;
+ /** Number of completed tasks. Accessed by srv_purge_coordinator
+ and srv_worker_thread by my_atomic. */
+ ulint n_completed;
+
+ /** Iterator to the undo log records of committed transactions */
+ struct iterator
+ {
+ bool operator<=(const iterator& other) const
+ {
+ if (commit < other.commit) return true;
+ if (commit > other.commit) return false;
+ return undo_no <= other.undo_no;
+ }
+
+ /** @return the commit number of the transaction */
+ trx_id_t trx_no() const { return commit >> 1; }
+ void reset_trx_no(trx_id_t trx_no) { commit = trx_no << 1; }
+
+ /** 2 * trx_t::no + old_insert of the committed transaction */
+ trx_id_t commit;
+ /** The record number within the committed transaction's undo
+ log, increasing, purged from from 0 onwards */
+ undo_no_t undo_no;
+ };
+
+ /** The tail of the purge queue; the last parsed undo log of a
+ committed transaction. */
+ iterator tail;
+ /** The head of the purge queue; any older undo logs of committed
+ transactions may be discarded (history list truncation). */
+ iterator head;
/*-----------------------------*/
bool next_stored; /*!< whether rseg holds the next record
to purge */
@@ -524,10 +412,70 @@ public:
undo::Truncate undo_trunc; /*!< Track UNDO tablespace marked
for truncate. */
+
+
+ /**
+ Constructor.
+
+ Some members may require late initialisation, thus we just mark object as
+ uninitialised. Real initialisation happens in create().
+ */
+
+ purge_sys_t() : event(NULL), m_enabled(false) {}
+
+
+ /** Create the instance */
+ void create();
+
+ /** Close the purge system on shutdown */
+ void close();
+
+ /** @return whether purge is enabled */
+ bool enabled()
+ {
+ return my_atomic_load32_explicit(&m_enabled, MY_MEMORY_ORDER_RELAXED);
+ }
+ /** @return whether purge is enabled */
+ bool enabled_latched()
+ {
+ ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ return bool(m_enabled);
+ }
+ /** @return whether the purge coordinator is paused */
+ bool paused()
+ { return my_atomic_load32_explicit(&m_paused, MY_MEMORY_ORDER_RELAXED); }
+ /** @return whether the purge coordinator is paused */
+ bool paused_latched()
+ {
+ ut_ad(rw_lock_own_flagged(&latch, RW_LOCK_FLAG_X | RW_LOCK_FLAG_S));
+ return m_paused != 0;
+ }
+
+ /** Enable purge at startup. Not protected by latch; the main thread
+ will wait for purge_sys.enabled() in srv_start() */
+ void coordinator_startup()
+ {
+ ut_ad(!enabled());
+ my_atomic_store32_explicit(&m_enabled, true, MY_MEMORY_ORDER_RELAXED);
+ }
+
+ /** Disable purge at shutdown */
+ void coordinator_shutdown()
+ {
+ ut_ad(enabled());
+ my_atomic_store32_explicit(&m_enabled, false, MY_MEMORY_ORDER_RELAXED);
+ }
+
+ /** @return whether the purge coordinator thread is active */
+ bool running();
+ /** Stop purge during FLUSH TABLES FOR EXPORT */
+ void stop();
+ /** Resume purge at UNLOCK TABLES after FLUSH TABLES FOR EXPORT */
+ void resume();
};
/** The global data structure coordinating a purge */
-extern purge_sys_t* purge_sys;
+extern purge_sys_t purge_sys;
/** Info required to purge a record */
struct trx_purge_rec_t {
diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic
index c32651b7a00..cd519a8e64d 100644
--- a/storage/innobase/include/trx0purge.ic
+++ b/storage/innobase/include/trx0purge.ic
@@ -40,24 +40,3 @@ trx_purge_get_log_from_hist(
return(node_addr);
}
-
-/********************************************************************//**
-address of its history list node.
-@return true if purge_sys_t::limit <= purge_sys_t::iter */
-UNIV_INLINE
-bool
-trx_purge_check_limit(void)
-/*=======================*/
-{
- /* limit is used to track till what point purge element has been
- processed and so limit <= iter.
- undo_no ordering is enforced only within the same rollback segment.
- If a transaction uses multiple rollback segments then we need to
- consider the rollback segment space id too. */
- return(purge_sys->iter.trx_no > purge_sys->limit.trx_no
- || (purge_sys->iter.trx_no == purge_sys->limit.trx_no
- && ((purge_sys->iter.undo_no >= purge_sys->limit.undo_no)
- || (purge_sys->iter.undo_rseg_space
- != purge_sys->limit.undo_rseg_space))));
-}
-
diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h
index 2551d5759ae..88c98625462 100644
--- a/storage/innobase/include/trx0rec.h
+++ b/storage/innobase/include/trx0rec.h
@@ -56,22 +56,6 @@ trx_undo_rec_get_type(
/*==================*/
const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec); /*!< in: undo log record */
-/**********************************************************************//**
Reads the undo log record number.
@return undo no */
UNIV_INLINE
@@ -114,7 +98,7 @@ trx_undo_rec_get_row_ref(
used, as we do NOT copy the data in the
record! */
dict_index_t* index, /*!< in: clustered index */
- dtuple_t** ref, /*!< out, own: row reference */
+ const dtuple_t**ref, /*!< out, own: row reference */
mem_heap_t* heap); /*!< in: memory heap from which the memory
needed is allocated */
/**********************************************************************//**
@@ -260,25 +244,22 @@ trx_undo_prev_version_build(
into this function by purge thread or not.
And if we read "after image" of undo log */
-/***********************************************************//**
-Parses a redo log record of adding an undo log record.
-@return end of log record or NULL */
+/** Parse MLOG_UNDO_INSERT.
+@param[in] ptr log record
+@param[in] end_ptr end of log record buffer
+@param[in,out] page page or NULL
+@return end of log record
+@retval NULL if the log record is incomplete */
byte*
trx_undo_parse_add_undo_rec(
-/*========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page); /*!< in: page or NULL */
-/***********************************************************//**
-Parses a redo log record of erasing of an undo page end.
-@return end of log record or NULL */
-byte*
-trx_undo_parse_erase_page_end(
-/*==========================*/
- byte* ptr, /*!< in: buffer */
- byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* page);
+/** Erase the unused undo log page end.
+@param[in,out] undo_page undo log page
+@return whether the page contained something */
+bool
+trx_undo_erase_page_end(page_t* undo_page);
/** Read from an undo log record a non-virtual column value.
@param[in,out] ptr pointer to remaining part of the undo record
@@ -330,6 +311,8 @@ compilation info multiplied by 16 is ORed to this value in an undo log
record */
#define TRX_UNDO_RENAME_TABLE 9 /*!< RENAME TABLE */
+#define TRX_UNDO_INSERT_DEFAULT 10 /*!< insert a "default value"
+ pseudo-record for instant ALTER */
#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */
#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked
record */
@@ -345,6 +328,9 @@ record */
storage fields: used by purge to
free the external storage */
+/** The search tuple corresponding to TRX_UNDO_INSERT_DEFAULT */
+extern const dtuple_t trx_undo_default_rec;
+
#include "trx0rec.ic"
#endif /* trx0rec_h */
diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic
index d0771a94b05..a9794eb213d 100644
--- a/storage/innobase/include/trx0rec.ic
+++ b/storage/innobase/include/trx0rec.ic
@@ -36,35 +36,6 @@ trx_undo_rec_get_type(
}
/**********************************************************************//**
-Reads from an undo log record the record compiler info.
-@return compiler info */
-UNIV_INLINE
-ulint
-trx_undo_rec_get_cmpl_info(
-/*=======================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT);
-}
-
-/**********************************************************************//**
-Returns TRUE if an undo log record contains an extern storage field.
-@return TRUE if extern */
-UNIV_INLINE
-ibool
-trx_undo_rec_get_extern_storage(
-/*============================*/
- const trx_undo_rec_t* undo_rec) /*!< in: undo log record */
-{
- if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) {
-
- return(TRUE);
- }
-
- return(FALSE);
-}
-
-/**********************************************************************//**
Reads the undo log record number.
@return undo no */
UNIV_INLINE
@@ -93,8 +64,8 @@ trx_undo_rec_copy(
ulint len;
len = mach_read_from_2(undo_rec)
- - ut_align_offset(undo_rec, UNIV_PAGE_SIZE);
- ut_ad(len < UNIV_PAGE_SIZE);
+ - ut_align_offset(undo_rec, srv_page_size);
+ ut_ad(len < srv_page_size);
trx_undo_rec_t* rec = static_cast<trx_undo_rec_t*>(
mem_heap_dup(heap, undo_rec, len));
mach_write_to_2(rec, len);
diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h
index 66e6f137b5a..af5ed73f04b 100644
--- a/storage/innobase/include/trx0roll.h
+++ b/storage/innobase/include/trx0roll.h
@@ -33,7 +33,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
-extern bool trx_rollback_or_clean_is_active;
+extern bool trx_rollback_is_active;
extern const trx_t* trx_roll_crash_recv_trx;
/*******************************************************************//**
@@ -63,20 +63,17 @@ trx_undo_rec_t*
trx_roll_pop_top_rec_of_trx(trx_t* trx, roll_ptr_t* roll_ptr, mem_heap_t* heap)
MY_ATTRIBUTE((nonnull, warn_unused_result));
-/** Report progress when rolling back a row of a recovered transaction.
-@return whether the rollback should be aborted due to pending shutdown */
-bool
-trx_roll_must_shutdown();
+/** Report progress when rolling back a row of a recovered transaction. */
+void trx_roll_report_progress();
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
committed, then we clean up a possible insert undo log. If the
-transaction was not yet committed, then we roll it back. */
+transaction was not yet committed, then we roll it back.
+@param all true=roll back all recovered active transactions;
+false=roll back any incomplete dictionary transaction */
void
-trx_rollback_or_clean_recovered(
-/*============================*/
- ibool all); /*!< in: FALSE=roll back dictionary transactions;
- TRUE=roll back all non-PREPARED transactions */
+trx_rollback_recovered(bool all);
/*******************************************************************//**
Rollback or clean up any incomplete transactions which were
encountered in crash recovery. If the transaction already was
@@ -86,11 +83,7 @@ Note: this is done in a background thread.
@return a dummy parameter */
extern "C"
os_thread_ret_t
-DECLARE_THREAD(trx_rollback_or_clean_all_recovered)(
-/*================================================*/
- void* arg MY_ATTRIBUTE((unused)));
- /*!< in: a dummy parameter required by
- os_thread_create */
+DECLARE_THREAD(trx_rollback_all_recovered)(void*);
/*********************************************************************//**
Creates a rollback command node struct.
@return own: rollback node struct */
@@ -225,6 +218,4 @@ struct trx_named_savept_t{
transaction */
};
-#include "trx0roll.ic"
-
#endif
diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic
deleted file mode 100644
index b09a1471150..00000000000
--- a/storage/innobase/include/trx0roll.ic
+++ /dev/null
@@ -1,62 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0roll.ic
-Transaction rollback
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#ifdef UNIV_DEBUG
-/*******************************************************************//**
-Check if undo numbering is maintained while processing undo records
-for rollback.
-@return true if undo numbering is maintained. */
-UNIV_INLINE
-bool
-trx_roll_check_undo_rec_ordering(
-/*=============================*/
- undo_no_t curr_undo_rec_no, /*!< in: record number of
- undo record to process. */
- ulint curr_undo_space_id, /*!< in: space-id of rollback
- segment that contains the
- undo record to process. */
- const trx_t* trx) /*!< in: transaction */
-{
- /* Each transaction now can have multiple rollback segments.
- If a transaction involves temp and non-temp tables, both the rollback
- segments will be active. In this case undo records will be distrubuted
- across the two rollback segments.
- CASE-1: UNDO action will apply all undo records from one rollback
- segment before moving to next. This means undo record numbers can't be
- sequential but ordering is still enforced as next undo record number
- should be < processed undo record number.
- CASE-2: For normal rollback (not initiated by crash) all rollback
- segments will be active (including non-redo).
- Based on transaction operation pattern undo record number of first
- undo record from this new rollback segment can be > last undo number
- from previous rollback segment and so we ignore this check if
- rollback segments are switching. Once switched new rollback segment
- should re-follow undo record number pattern (as mentioned in CASE-1). */
-
- return(curr_undo_space_id != trx->undo_rseg_space
- || curr_undo_rec_no + 1 <= trx->undo_no);
-}
-#endif /* UNIV_DEBUG */
-
diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h
index 48c5133644c..dbd80486b71 100644
--- a/storage/innobase/include/trx0rseg.h
+++ b/storage/innobase/include/trx0rseg.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -27,10 +27,8 @@ Created 3/26/1996 Heikki Tuuri
#ifndef trx0rseg_h
#define trx0rseg_h
-#include "trx0types.h"
#include "trx0sys.h"
#include "fut0lst.h"
-#include <vector>
/** Gets a rollback segment header.
@param[in] space space where placed
@@ -39,10 +37,7 @@ Created 3/26/1996 Heikki Tuuri
@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
-trx_rsegf_get(
- ulint space,
- ulint page_no,
- mtr_t* mtr);
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr);
/** Gets a newly created rollback segment header.
@param[in] space space where placed
@@ -57,16 +52,6 @@ trx_rsegf_get_new(
mtr_t* mtr);
/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr); /*!< in: mtr */
-/***************************************************************//**
Sets the file page number of the nth undo log slot. */
UNIV_INLINE
void
@@ -81,25 +66,20 @@ Looks for a free slot for an undo log segment.
@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr); /*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf);
-/** Creates a rollback segment header.
-This function is called only when a new rollback segment is created in
-the database.
-@param[in] space space id
-@param[in] max_size max size in pages
-@param[in] rseg_slot_no rseg id == slot number in trx sys
+/** Create a rollback segment header.
+@param[in,out] space system, undo, or temporary tablespace
+@param[in] rseg_id rollback segment identifier
+@param[in,out] sys_header the TRX_SYS page (NULL for temporary rseg)
@param[in,out] mtr mini-transaction
@return page number of the created segment, FIL_NULL if fail */
ulint
trx_rseg_header_create(
- ulint space,
- ulint max_size,
- ulint rseg_slot_no,
- mtr_t* mtr);
+ fil_space_t* space,
+ ulint rseg_id,
+ buf_block_t* sys_header,
+ mtr_t* mtr);
/** Initialize the rollback segments in memory at database startup. */
void
@@ -133,7 +113,7 @@ trx_rseg_get_n_undo_tablespaces(
ulint* space_ids); /*!< out: array of space ids of
UNDO tablespaces */
/* Number of undo log slots in a rollback segment file copy */
-#define TRX_RSEG_N_SLOTS (UNIV_PAGE_SIZE / 16)
+#define TRX_RSEG_N_SLOTS (srv_page_size / 16)
/* Maximum number of transactions supported by a single rollback segment */
#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2)
@@ -150,32 +130,25 @@ struct trx_rseg_t {
RsegMutex mutex;
/** space where the rollback segment header is placed */
- ulint space;
+ fil_space_t* space;
/** page number of the rollback segment header */
ulint page_no;
- /** maximum allowed size in pages */
- ulint max_size;
-
/** current size in pages */
ulint curr_size;
/*--------------------------------------------------------*/
- /* Fields for update undo logs */
- /** List of update undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list;
-
- /** List of update undo log segments cached for fast reuse */
- UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached;
+ /* Fields for undo logs */
+ /** List of undo logs */
+ UT_LIST_BASE_NODE_T(trx_undo_t) undo_list;
- /*--------------------------------------------------------*/
- /* Fields for insert undo logs */
- /** List of insert undo logs */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list;
+ /** List of undo log segments cached for fast reuse */
+ UT_LIST_BASE_NODE_T(trx_undo_t) undo_cached;
- /** List of insert undo log segments cached for fast reuse */
- UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached;
+ /** List of recovered old insert_undo logs of incomplete
+ transactions (to roll back or XA COMMIT & purge) */
+ UT_LIST_BASE_NODE_T(trx_undo_t) old_insert_list;
/*--------------------------------------------------------*/
@@ -186,11 +159,11 @@ struct trx_rseg_t {
/** Byte offset of the last not yet purged log header */
ulint last_offset;
- /** Transaction number of the last not yet purged log */
- trx_id_t last_trx_no;
+ /** trx_t::no * 2 + old_insert of the last not yet purged log */
+ trx_id_t last_commit;
- /** TRUE if the last not yet purged log needs purging */
- ibool last_del_marks;
+ /** Whether the log segment needs purge */
+ bool needs_purge;
/** Reference counter to track rseg allocated transactions. */
ulint trx_ref_count;
@@ -199,23 +172,31 @@ struct trx_rseg_t {
UNDO-tablespace marked for truncate. */
bool skip_allocation;
+ /** @return the commit ID of the last committed transaction */
+ trx_id_t last_trx_no() const { return last_commit >> 1; }
+
+ void set_last_trx_no(trx_id_t trx_no, bool is_update)
+ {
+ last_commit = trx_no << 1 | trx_id_t(is_update);
+ }
+
/** @return whether the rollback segment is persistent */
bool is_persistent() const
{
- ut_ad(space == SRV_TMP_SPACE_ID
- || space == TRX_SYS_SPACE
+ ut_ad(space == fil_system.temp_space
+ || space == fil_system.sys_space
|| (srv_undo_space_id_start > 0
- && space >= srv_undo_space_id_start
- && space <= srv_undo_space_id_start
+ && space->id >= srv_undo_space_id_start
+ && space->id <= srv_undo_space_id_start
+ TRX_SYS_MAX_UNDO_SPACES));
- ut_ad(space == SRV_TMP_SPACE_ID
- || space == TRX_SYS_SPACE
+ ut_ad(space == fil_system.temp_space
+ || space == fil_system.sys_space
|| (srv_undo_space_id_start > 0
- && space >= srv_undo_space_id_start
- && space <= srv_undo_space_id_start
+ && space->id >= srv_undo_space_id_start
+ && space->id <= srv_undo_space_id_start
+ srv_undo_tablespaces_active)
|| !srv_was_started);
- return(space != SRV_TMP_SPACE_ID);
+ return(space->id != SRV_TMP_SPACE_ID);
}
};
@@ -232,19 +213,99 @@ struct trx_rseg_t {
/* Transaction rollback segment header */
/*-------------------------------------------------------------*/
-#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback
- segment in pages */
-#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied
- by the logs in the history list */
-#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed
- transactions */
+/** 0xfffffffe = pre-MariaDB 10.3.5 format; 0=MariaDB 10.3.5 or later */
+#define TRX_RSEG_FORMAT 0
+/** Number of pages in the TRX_RSEG_HISTORY list */
+#define TRX_RSEG_HISTORY_SIZE 4
+/** Committed transaction logs that have not been purged yet */
+#define TRX_RSEG_HISTORY 8
#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE)
/* Header for the file segment where
this page is placed */
#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE)
/* Undo log segment slots */
+/** Maximum transaction ID (valid only if TRX_RSEG_FORMAT is 0) */
+#define TRX_RSEG_MAX_TRX_ID (TRX_RSEG_UNDO_SLOTS + TRX_RSEG_N_SLOTS \
+ * TRX_RSEG_SLOT_SIZE)
+
+/** 8 bytes offset within the binlog file */
+#define TRX_RSEG_BINLOG_OFFSET TRX_RSEG_MAX_TRX_ID + 8
+/** MySQL log file name, 512 bytes, including terminating NUL
+(valid only if TRX_RSEG_FORMAT is 0).
+If no binlog information is present, the first byte is NUL. */
+#define TRX_RSEG_BINLOG_NAME TRX_RSEG_MAX_TRX_ID + 16
+/** Maximum length of binlog file name, including terminating NUL, in bytes */
+#define TRX_RSEG_BINLOG_NAME_LEN 512
+
+#ifdef WITH_WSREP
+/** The offset to WSREP XID headers */
+#define TRX_RSEG_WSREP_XID_INFO TRX_RSEG_MAX_TRX_ID + 16 + 512
+
+/** WSREP XID format (1 if present and valid, 0 if not present) */
+#define TRX_RSEG_WSREP_XID_FORMAT TRX_RSEG_WSREP_XID_INFO
+/** WSREP XID GTRID length */
+#define TRX_RSEG_WSREP_XID_GTRID_LEN TRX_RSEG_WSREP_XID_INFO + 4
+/** WSREP XID bqual length */
+#define TRX_RSEG_WSREP_XID_BQUAL_LEN TRX_RSEG_WSREP_XID_INFO + 8
+/** WSREP XID data (XIDDATASIZE bytes) */
+#define TRX_RSEG_WSREP_XID_DATA TRX_RSEG_WSREP_XID_INFO + 12
+#endif /* WITH_WSREP*/
+
/*-------------------------------------------------------------*/
+/** Read the page number of an undo log slot.
+@param[in] rsegf rollback segment header
+@param[in] n slot number */
+inline
+uint32_t
+trx_rsegf_get_nth_undo(const trx_rsegf_t* rsegf, ulint n)
+{
+ ut_ad(n < TRX_RSEG_N_SLOTS);
+ return mach_read_from_4(rsegf + TRX_RSEG_UNDO_SLOTS
+ + n * TRX_RSEG_SLOT_SIZE);
+}
+
+#ifdef WITH_WSREP
+/** Update the WSREP XID information in rollback segment header.
+@param[in,out] rseg_header rollback segment header
+@param[in] xid WSREP XID
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_wsrep_checkpoint(
+ trx_rsegf_t* rseg_header,
+ const XID* xid,
+ mtr_t* mtr);
+
+/** Update WSREP checkpoint XID in first rollback segment header
+as part of wsrep_set_SE_checkpoint() when it is guaranteed that there
+are no wsrep transactions committing.
+If the UUID part of the WSREP XID does not match to the UUIDs of XIDs already
+stored into rollback segments, the WSREP XID in all the remaining rollback
+segments will be reset.
+@param[in] xid WSREP XID */
+void trx_rseg_update_wsrep_checkpoint(const XID* xid);
+
+/** Recover the latest WSREP checkpoint XID.
+@param[out] xid WSREP XID
+@return whether the WSREP XID was found */
+bool trx_rseg_read_wsrep_checkpoint(XID& xid);
+#endif /* WITH_WSREP */
+
+/** Upgrade a rollback segment header page to MariaDB 10.3 format.
+@param[in,out] rseg_header rollback segment header page
+@param[in,out] mtr mini-transaction */
+void trx_rseg_format_upgrade(trx_rsegf_t* rseg_header, mtr_t* mtr);
+
+/** Update the offset information about the end of the binlog entry
+which corresponds to the transaction just being committed.
+In a replication slave, this updates the master binlog position
+up to which replication has proceeded.
+@param[in,out] rseg_header rollback segment header
+@param[in] trx committing transaction
+@param[in,out] mtr mini-transaction */
+void
+trx_rseg_update_binlog_offset(byte* rseg_header, const trx_t* trx, mtr_t* mtr);
+
#include "trx0rseg.ic"
#endif
diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic
index dac7dadfb68..9edfe897155 100644
--- a/storage/innobase/include/trx0rseg.ic
+++ b/storage/innobase/include/trx0rseg.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -34,28 +34,18 @@ Created 3/26/1996 Heikki Tuuri
@return rollback segment header, page x-latched */
UNIV_INLINE
trx_rsegf_t*
-trx_rsegf_get(
- ulint space,
- ulint page_no,
- mtr_t* mtr)
+trx_rsegf_get(fil_space_t* space, ulint page_no, mtr_t* mtr)
{
- buf_block_t* block;
- trx_rsegf_t* header;
-
- ut_ad(space <= srv_undo_space_id_start + srv_undo_tablespaces_active
- || space == SRV_TMP_SPACE_ID
+ ut_ad(space == fil_system.sys_space || space == fil_system.temp_space
+ || srv_is_undo_tablespace(space->id)
|| !srv_was_started);
- ut_ad(space <= srv_undo_space_id_start + TRX_SYS_MAX_UNDO_SPACES
- || space == SRV_TMP_SPACE_ID);
- block = buf_page_get(
- page_id_t(space, page_no), univ_page_size, RW_X_LATCH, mtr);
+ buf_block_t* block = buf_page_get(page_id_t(space->id, page_no),
+ univ_page_size, RW_X_LATCH, mtr);
buf_block_dbg_add_level(block, SYNC_RSEG_HEADER);
- header = TRX_RSEG + buf_block_get_frame(block);
-
- return(header);
+ return TRX_RSEG + block->frame;
}
/** Gets a newly created rollback segment header.
@@ -88,23 +78,6 @@ trx_rsegf_get_new(
}
/***************************************************************//**
-Gets the file page number of the nth undo log slot.
-@return page number of the undo log segment */
-UNIV_INLINE
-ulint
-trx_rsegf_get_nth_undo(
-/*===================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- ulint n, /*!< in: index of slot */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_a(n < TRX_RSEG_N_SLOTS);
-
- return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS
- + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr));
-}
-
-/***************************************************************//**
Sets the file page number of the nth undo log slot. */
UNIV_INLINE
void
@@ -126,10 +99,7 @@ Looks for a free slot for an undo log segment.
@return slot index or ULINT_UNDEFINED if not found */
UNIV_INLINE
ulint
-trx_rsegf_undo_find_free(
-/*=====================*/
- trx_rsegf_t* rsegf, /*!< in: rollback segment header */
- mtr_t* mtr) /*!< in: mtr */
+trx_rsegf_undo_find_free(const trx_rsegf_t* rsegf)
{
ulint i;
ulint page_no;
@@ -143,7 +113,7 @@ trx_rsegf_undo_find_free(
#endif
for (i = 0; i < max_slots; i++) {
- page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr);
+ page_no = trx_rsegf_get_nth_undo(rsegf, i);
if (page_no == FIL_NULL) {
return(i);
diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h
index ebe70a1c70e..6af212d35ff 100644
--- a/storage/innobase/include/trx0sys.h
+++ b/storage/innobase/include/trx0sys.h
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2016, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -35,7 +35,6 @@ Created 3/26/1996 Heikki Tuuri
#include "mem0mem.h"
#include "mtr0mtr.h"
#include "ut0byte.h"
-#include "mem0mem.h"
#include "ut0lst.h"
#include "read0types.h"
#include "page0types.h"
@@ -47,185 +46,84 @@ Created 3/26/1996 Heikki Tuuri
typedef UT_LIST_BASE_NODE_T(trx_t) trx_ut_list_t;
-// Forward declaration
-class MVCC;
-class ReadView;
-
-/** The transaction system */
-extern trx_sys_t* trx_sys;
-
/** Checks if a page address is the trx sys header page.
@param[in] page_id page id
@return true if trx sys header page */
-UNIV_INLINE
+inline
bool
-trx_sys_hdr_page(
- const page_id_t& page_id);
-
-/** Initialize the transaction system main-memory data structures. */
-void trx_sys_init_at_db_start();
+trx_sys_hdr_page(const page_id_t& page_id)
+{
+ return(page_id.space() == TRX_SYS_SPACE
+ && page_id.page_no() == TRX_SYS_PAGE_NO);
+}
/*****************************************************************//**
-Creates the trx_sys instance and initializes purge_queue and mutex. */
-void
-trx_sys_create(void);
-/*================*/
-/*****************************************************************//**
Creates and initializes the transaction system at the database creation. */
void
trx_sys_create_sys_pages(void);
/*==========================*/
-/** @return an unallocated rollback segment slot in the TRX_SYS header
+/** Find an available rollback segment.
+@param[in] sys_header
+@return an unallocated rollback segment slot in the TRX_SYS header
@retval ULINT_UNDEFINED if not found */
ulint
-trx_sysf_rseg_find_free(mtr_t* mtr);
-/**********************************************************************//**
-Gets a pointer to the transaction system file copy and x-locks its page.
-@return pointer to system file copy, page x-locked */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-file copy.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if
- the slot is reset to unused */
- mtr_t* mtr); /*!< in: mtr */
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id();
-/*===================*/
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void);
-/*========================*/
+trx_sys_rseg_find_free(const buf_block_t* sys_header);
+/** Request the TRX_SYS page.
+@param[in] rw whether to lock the page for writing
+@return the TRX_SYS page
+@retval NULL if the page cannot be read */
+inline
+buf_block_t*
+trx_sysf_get(mtr_t* mtr, bool rw = true)
+{
+ buf_block_t* block = buf_page_get(
+ page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
+ univ_page_size, rw ? RW_X_LATCH : RW_S_LATCH, mtr);
+ if (block) {
+ buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
+ }
+ return block;
+}
#ifdef UNIV_DEBUG
/* Flag to control TRX_RSEG_N_SLOTS behavior debugging. */
extern uint trx_rseg_n_slots_debug;
#endif
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
+/** Write DB_TRX_ID.
+@param[out] db_trx_id the DB_TRX_ID field to be written to
+@param[in] id transaction ID */
UNIV_INLINE
void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id); /*!< in: id */
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
+trx_write_trx_id(byte* db_trx_id, trx_id_t id)
+{
+ compile_time_assert(DATA_TRX_ID_LEN == 6);
+ ut_ad(id);
+ mach_write_to_6(db_trx_id, id);
+}
+
+/** Read a transaction identifier.
@return id */
-UNIV_INLINE
+inline
trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr); /*!< in: pointer to memory from where to read */
-/****************************************************************//**
-Looks for the trx instance with the given id in the rw trx_list.
-@return the trx handle or NULL if not found */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
- trx_id_t trx_id);/*!< in: trx id to search for */
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state to
-find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void);
-/*===================*/
-/****************************************************************//**
-Checks if a rw transaction with the given id is active.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt); /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding trx_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt, /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
- bool do_ref_count); /*!< in: if true then increment the
- trx_t::n_ref_count */
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
- trx_id_t trx_id) /*!< in: transaction identifier */
- MY_ATTRIBUTE((warn_unused_result));
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
+trx_read_trx_id(const byte* ptr)
+{
+ compile_time_assert(DATA_TRX_ID_LEN == 6);
+ return(mach_read_from_6(ptr));
+}
+
+#ifdef UNIV_DEBUG
+/** Check that the DB_TRX_ID in a record is valid.
+@param[in] db_trx_id the DB_TRX_ID column to validate
+@param[in] trx_id the id of the ALTER TABLE transaction */
+inline bool trx_id_check(const void* db_trx_id, trx_id_t trx_id)
+{
+ trx_id_t id = trx_read_trx_id(static_cast<const byte*>(db_trx_id));
+ ut_ad(id == 0 || id > trx_id);
+ return true;
+}
+#endif
+
/*****************************************************************//**
Updates the offset information about the end of the MySQL binlog entry
which corresponds to the transaction just being committed. In a MySQL
@@ -236,138 +134,17 @@ trx_sys_update_mysql_binlog_offset(
/*===============================*/
const char* file_name,/*!< in: MySQL log file name */
int64_t offset, /*!< in: position in that log file */
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- mtr_t* mtr); /*!< in: mtr */
+ buf_block_t* sys_header, /*!< in,out: trx sys header */
+ mtr_t* mtr); /*!< in,out: mini-transaction */
/** Display the MySQL binlog offset info if it is present in the trx
system header. */
void
trx_sys_print_mysql_binlog_offset();
-#ifdef WITH_WSREP
-/** Update WSREP XID info in sys_header of TRX_SYS_PAGE_NO = 5.
-@param[in] xid Transaction XID
-@param[in,out] sys_header sys_header
-@param[in] mtr minitransaction */
-UNIV_INTERN
-void
-trx_sys_update_wsrep_checkpoint(
- const XID* xid,
- trx_sysf_t* sys_header,
- mtr_t* mtr);
-
-/** Read WSREP checkpoint XID from sys header.
-@param[out] xid WSREP XID
-@return whether the checkpoint was present */
-UNIV_INTERN
-bool
-trx_sys_read_wsrep_checkpoint(XID* xid);
-#endif /* WITH_WSREP */
-
-/** Initializes the tablespace tag system. */
-void
-trx_sys_file_format_init(void);
-/*==========================*/
-
-/*****************************************************************//**
-Closes the tablespace tag system. */
-void
-trx_sys_file_format_close(void);
-/*===========================*/
-
-/********************************************************************//**
-Tags the system table space with minimum format id if it has not been
-tagged yet.
-WARNING: This function is only called during the startup and AFTER the
-redo log application during recovery has finished. */
-void
-trx_sys_file_format_tag_init(void);
-/*==============================*/
-
-/*****************************************************************//**
-Shutdown/Close the transaction system. */
-void
-trx_sys_close(void);
-/*===============*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-/*****************************************************************//**
-Set the file format id unconditionally except if it's already the
-same value.
-@return TRUE if value updated */
-ibool
-trx_sys_file_format_max_set(
-/*========================*/
- ulint format_id, /*!< in: file format id */
- const char** name); /*!< out: max file format name or
- NULL if not needed. */
/** Create the rollback segments.
@return whether the creation succeeded */
bool
trx_sys_create_rsegs();
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void);
-/*======================*/
-
-/*********************************************************************
-Check if there are any active (non-prepared) transactions.
-@return total number of active transactions or 0 if none */
-ulint
-trx_sys_any_active_transactions(void);
-/*=================================*/
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the max format name */
-const char*
-trx_sys_file_format_max_get(void);
-/*=============================*/
-/*****************************************************************//**
-Check for the max file format tag stored on disk.
-@return DB_SUCCESS or error code */
-dberr_t
-trx_sys_file_format_max_check(
-/*==========================*/
- ulint max_format_id); /*!< in: the max format id to check */
-/********************************************************************//**
-Update the file format tag in the system tablespace only if the given
-format id is greater than the known max id.
-@return TRUE if format_id was bigger than the known max id */
-ibool
-trx_sys_file_format_max_upgrade(
-/*============================*/
- const char** name, /*!< out: max file format name */
- ulint format_id); /*!< in: file format identifier */
-/*****************************************************************//**
-Get the name representation of the file format from its id.
-@return pointer to the name */
-const char*
-trx_sys_file_format_id_to_name(
-/*===========================*/
- const ulint id); /*!< in: id of the file format */
-
-/**
-Add the transaction to the RW transaction set
-@param trx transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx);
-
-#ifdef UNIV_DEBUG
-/*************************************************************//**
-Validate the trx_sys_t::rw_trx_list.
-@return true if the list is valid */
-bool
-trx_sys_validate_trx_list();
-/*========================*/
-#endif /* UNIV_DEBUG */
/** The automatically created system rollback segment has this id */
#define TRX_SYS_SYSTEM_RSEG_ID 0
@@ -377,18 +154,13 @@ trx_sys_validate_trx_list();
/** Transaction system header */
/*------------------------------------------------------------- @{ */
-#define TRX_SYS_TRX_ID_STORE 0 /*!< the maximum trx id or trx
- number modulo
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- written to a file page by any
- transaction; the assignment of
- transaction ids continues from
- this number rounded up by
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- plus
- TRX_SYS_TRX_ID_UPDATE_MARGIN
- when the database is
- started */
+/** In old versions of InnoDB, this persisted the value of
+trx_sys.get_max_trx_id(). Starting with MariaDB 10.3.5,
+the field TRX_RSEG_MAX_TRX_ID in rollback segment header pages
+and the fields TRX_UNDO_TRX_ID, TRX_UNDO_TRX_NO in undo log pages
+are used instead. The field only exists for the purpose of upgrading
+from older MySQL or MariaDB versions. */
+#define TRX_SYS_TRX_ID_STORE 0
#define TRX_SYS_FSEG_HEADER 8 /*!< segment header for the
tablespace segment the trx
system is created into */
@@ -398,16 +170,52 @@ trx_sys_validate_trx_list();
slots */
/*------------------------------------------------------------- @} */
-/* Max number of rollback segments: the number of segment specification slots
-in the transaction system array; rollback segment id must fit in one (signed)
-byte, therefore 128; each slot is currently 8 bytes in size. If you want
-to raise the level to 256 then you will need to fix some assertions that
-impose the 7 bit restriction. e.g., mach_write_to_3() */
+/** The number of rollback segments; rollback segment id must fit in
+the 7 bits reserved for it in DB_ROLL_PTR. */
#define TRX_SYS_N_RSEGS 128
/** Maximum number of undo tablespaces (not counting the system tablespace) */
#define TRX_SYS_MAX_UNDO_SPACES (TRX_SYS_N_RSEGS - 1)
-/** Maximum length of MySQL binlog file name, in bytes. */
+/* Rollback segment specification slot offsets */
+
+/** the tablespace ID of an undo log header; starting with
+MySQL/InnoDB 5.1.7, this is FIL_NULL if the slot is unused */
+#define TRX_SYS_RSEG_SPACE 0
+/** the page number of an undo log header, or FIL_NULL if unused */
+#define TRX_SYS_RSEG_PAGE_NO 4
+/** Size of a rollback segment specification slot */
+#define TRX_SYS_RSEG_SLOT_SIZE 8
+
+/** Read the tablespace ID of a rollback segment slot.
+@param[in] sys_header TRX_SYS page
+@param[in] rseg_id rollback segment identifier
+@return undo tablespace id */
+inline
+uint32_t
+trx_sysf_rseg_get_space(const buf_block_t* sys_header, ulint rseg_id)
+{
+ ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+ return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_SPACE
+ + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+ + sys_header->frame);
+}
+
+/** Read the page number of a rollback segment slot.
+@param[in] sys_header TRX_SYS page
+@param[in] rseg_id rollback segment identifier
+@return undo page number */
+inline
+uint32_t
+trx_sysf_rseg_get_page_no(const buf_block_t* sys_header, ulint rseg_id)
+{
+ ut_ad(rseg_id < TRX_SYS_N_RSEGS);
+ return mach_read_from_4(TRX_SYS + TRX_SYS_RSEGS + TRX_SYS_RSEG_PAGE_NO
+ + rseg_id * TRX_SYS_RSEG_SLOT_SIZE
+ + sys_header->frame);
+}
+
+/** Maximum length of MySQL binlog file name, in bytes.
+(Used before MariaDB 10.3.5.) */
#define TRX_SYS_MYSQL_LOG_NAME_LEN 512
/** Contents of TRX_SYS_MYSQL_LOG_MAGIC_N_FLD */
#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344
@@ -416,7 +224,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
# error "UNIV_PAGE_SIZE_MIN < 4096"
#endif
/** The offset of the MySQL binlog offset info in the trx system header */
-#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000)
+#define TRX_SYS_MYSQL_LOG_INFO (srv_page_size - 1000)
#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /*!< magic number which is
TRX_SYS_MYSQL_LOG_MAGIC_N
if we have valid data in the
@@ -425,7 +233,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
within that file */
#define TRX_SYS_MYSQL_LOG_NAME 12 /*!< MySQL log file name */
-/** Memory map TRX_SYS_PAGE_NO = 5 when UNIV_PAGE_SIZE = 4096
+/** Memory map TRX_SYS_PAGE_NO = 5 when srv_page_size = 4096
0...37 FIL_HEADER
38...45 TRX_SYS_TRX_ID_STORE
@@ -441,7 +249,7 @@ impose the 7 bit restriction. e.g., mach_write_to_3() */
...
...1063 TRX_SYS_RSEG_PAGE_NO for slot 126
-(UNIV_PAGE_SIZE-3500 WSREP ::: FAIL would overwrite undo tablespace
+(srv_page_size-3500 WSREP ::: FAIL would overwrite undo tablespace
space_id, page_no pairs :::)
596 TRX_SYS_WSREP_XID_INFO TRX_SYS_WSREP_XID_MAGIC_N_FLD
600 TRX_SYS_WSREP_XID_FORMAT
@@ -451,7 +259,7 @@ space_id, page_no pairs :::)
739 TRX_SYS_WSREP_XID_DATA_END
FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
-(UNIV_PAGE_SIZE-2500)
+(srv_page_size-2500)
1596 TRX_SYS_WSREP_XID_INFO TRX_SYS_WSREP_XID_MAGIC_N_FLD
1600 TRX_SYS_WSREP_XID_FORMAT
1604 TRX_SYS_WSREP_XID_GTRID_LEN
@@ -459,19 +267,19 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
1612 TRX_SYS_WSREP_XID_DATA (len = 128)
1739 TRX_SYS_WSREP_XID_DATA_END
-(UNIV_PAGE_SIZE - 2000 MYSQL MASTER LOG)
+(srv_page_size - 2000 MYSQL MASTER LOG)
2096 TRX_SYS_MYSQL_MASTER_LOG_INFO TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
2100 TRX_SYS_MYSQL_LOG_OFFSET_HIGH
2104 TRX_SYS_MYSQL_LOG_OFFSET_LOW
2108 TRX_SYS_MYSQL_LOG_NAME
-(UNIV_PAGE_SIZE - 1000 MYSQL LOG)
+(srv_page_size - 1000 MYSQL LOG)
3096 TRX_SYS_MYSQL_LOG_INFO TRX_SYS_MYSQL_LOG_MAGIC_N_FLD
3100 TRX_SYS_MYSQL_LOG_OFFSET_HIGH
3104 TRX_SYS_MYSQL_LOG_OFFSET_LOW
3108 TRX_SYS_MYSQL_LOG_NAME
-(UNIV_PAGE_SIZE - 200 DOUBLEWRITE)
+(srv_page_size - 200 DOUBLEWRITE)
3896 TRX_SYS_DOUBLEWRITE TRX_SYS_DOUBLEWRITE_FSEG
3906 TRX_SYS_DOUBLEWRITE_MAGIC
3910 TRX_SYS_DOUBLEWRITE_BLOCK1
@@ -479,12 +287,12 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
3918 TRX_SYS_DOUBLEWRITE_REPEAT
3930 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N
-(UNIV_PAGE_SIZE - 8, TAILER)
+(srv_page_size - 8, TAILER)
4088..4096 FIL_TAILER
*/
#ifdef WITH_WSREP
-/** The offset to WSREP XID headers */
+/** The offset to WSREP XID headers (used before MariaDB 10.3.5) */
#define TRX_SYS_WSREP_XID_INFO std::max(srv_page_size - 3500, 1596UL)
#define TRX_SYS_WSREP_XID_MAGIC_N_FLD 0
#define TRX_SYS_WSREP_XID_MAGIC_N 0x77737265
@@ -500,7 +308,7 @@ FIXED WSREP XID info offsets for 4k page size 10.0.32-galera
/** Doublewrite buffer */
/* @{ */
/** The offset of the doublewrite buffer header on the trx system header page */
-#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200)
+#define TRX_SYS_DOUBLEWRITE (srv_page_size - 200)
/*-------------------------------------------------------------*/
#define TRX_SYS_DOUBLEWRITE_FSEG 0 /*!< fseg header of the fseg
containing the doublewrite
@@ -548,83 +356,477 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE
/* @} */
-/** File format tag */
-/* @{ */
-/** The offset of the file format tag on the trx system header page
-(TRX_SYS_PAGE_NO of TRX_SYS_SPACE) */
-#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16)
-
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG+4 when valid */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL
-/** Contents of TRX_SYS_FILE_FORMAT_TAG when valid. The file format
-identifier is added to this 64-bit constant. */
-#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N \
- ((ib_uint64_t) TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH << 32 \
- | TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW)
-/* @} */
+trx_t* current_trx();
+
+struct rw_trx_hash_element_t
+{
+ rw_trx_hash_element_t(): trx(0)
+ {
+ mutex_create(LATCH_ID_RW_TRX_HASH_ELEMENT, &mutex);
+ }
+
+
+ ~rw_trx_hash_element_t()
+ {
+ mutex_free(&mutex);
+ }
+
+
+ trx_id_t id; /* lf_hash_init() relies on this to be first in the struct */
+ trx_id_t no;
+ trx_t *trx;
+ ib_mutex_t mutex;
+};
+
+
+/**
+ Wrapper around LF_HASH to store set of in memory read-write transactions.
+*/
+
+class rw_trx_hash_t
+{
+ LF_HASH hash;
+
+
+ /**
+ Constructor callback for lock-free allocator.
+
+ Object is just allocated and is not yet accessible via rw_trx_hash by
+ concurrent threads. Object can be reused multiple times before it is freed.
+ Every time object is being reused initializer() callback is called.
+ */
+
+ static void rw_trx_hash_constructor(uchar *arg)
+ {
+ new(arg + LF_HASH_OVERHEAD) rw_trx_hash_element_t();
+ }
+
+
+ /**
+ Destructor callback for lock-free allocator.
+
+ Object is about to be freed and is not accessible via rw_trx_hash by
+ concurrent threads.
+ */
+
+ static void rw_trx_hash_destructor(uchar *arg)
+ {
+ reinterpret_cast<rw_trx_hash_element_t*>
+ (arg + LF_HASH_OVERHEAD)->~rw_trx_hash_element_t();
+ }
+
+
+ /**
+ Destructor callback for lock-free allocator.
+
+ This destructor is used at shutdown. It frees remaining transaction
+ objects.
+
+ XA PREPARED transactions may remain if they haven't been committed or
+ rolled back. ACTIVE transactions may remain if startup was interrupted or
+ server is running in read-only mode or for certain srv_force_recovery
+ levels.
+ */
+
+ static void rw_trx_hash_shutdown_destructor(uchar *arg)
+ {
+ rw_trx_hash_element_t *element=
+ reinterpret_cast<rw_trx_hash_element_t*>(arg + LF_HASH_OVERHEAD);
+ if (trx_t *trx= element->trx)
+ {
+ ut_ad(trx_state_eq(trx, TRX_STATE_PREPARED) ||
+ (trx_state_eq(trx, TRX_STATE_ACTIVE) &&
+ (!srv_was_started ||
+ srv_read_only_mode ||
+ srv_force_recovery >= SRV_FORCE_NO_TRX_UNDO)));
+ trx_free_at_shutdown(trx);
+ }
+ element->~rw_trx_hash_element_t();
+ }
+
+
+ /**
+ Initializer callback for lock-free hash.
+
+ Object is not yet accessible via rw_trx_hash by concurrent threads, but is
+ about to become such. Object id can be changed only by this callback and
+ remains the same until all pins to this object are released.
+
+ Object trx can be changed to 0 by erase() under object mutex protection,
+ which indicates it is about to be removed from lock-free hash and become
+ not accessible by concurrent threads.
+ */
+
+ static void rw_trx_hash_initializer(LF_HASH *,
+ rw_trx_hash_element_t *element,
+ trx_t *trx)
+ {
+ ut_ad(element->trx == 0);
+ element->trx= trx;
+ element->id= trx->id;
+ element->no= TRX_ID_MAX;
+ trx->rw_trx_hash_element= element;
+ }
+
+
+ /**
+ Gets LF_HASH pins.
+
+ Pins are used to protect object from being destroyed or reused. They are
+ normally stored in trx object for quick access. If caller doesn't have trx
+ available, we try to get it using currnet_trx(). If caller doesn't have trx
+ at all, temporary pins are allocated.
+ */
+
+ LF_PINS *get_pins(trx_t *trx)
+ {
+ if (!trx->rw_trx_hash_pins)
+ {
+ trx->rw_trx_hash_pins= lf_hash_get_pins(&hash);
+ ut_a(trx->rw_trx_hash_pins);
+ }
+ return trx->rw_trx_hash_pins;
+ }
+
+
+ struct eliminate_duplicates_arg
+ {
+ trx_ids_t ids;
+ my_hash_walk_action action;
+ void *argument;
+ eliminate_duplicates_arg(size_t size, my_hash_walk_action act, void* arg):
+ action(act), argument(arg) { ids.reserve(size); }
+ };
+
+
+ static my_bool eliminate_duplicates(rw_trx_hash_element_t *element,
+ eliminate_duplicates_arg *arg)
+ {
+ for (trx_ids_t::iterator it= arg->ids.begin(); it != arg->ids.end(); it++)
+ {
+ if (*it == element->id)
+ return 0;
+ }
+ arg->ids.push_back(element->id);
+ return arg->action(element, arg->argument);
+ }
+
-/** The transaction system central memory data structure. */
-struct trx_sys_t {
-
- TrxSysMutex mutex; /*!< mutex protecting most fields in
- this structure except when noted
- otherwise */
-
- MVCC* mvcc; /*!< Multi version concurrency control
- manager */
- volatile trx_id_t
- max_trx_id; /*!< The smallest number not yet
- assigned as a transaction id or
- transaction number. This is declared
- volatile because it can be accessed
- without holding any mutex during
- AC-NL-RO view creation. */
- trx_ut_list_t serialisation_list;
- /*!< Ordered on trx_t::no of all the
- currenrtly active RW transactions */
#ifdef UNIV_DEBUG
- trx_id_t rw_max_trx_id; /*!< Max trx id of read-write
- transactions which exist or existed */
-#endif /* UNIV_DEBUG */
-
- /** Avoid false sharing */
- const char pad1[CACHE_LINE_SIZE];
- trx_ut_list_t rw_trx_list; /*!< List of active and committed in
- memory read-write transactions, sorted
- on trx id, biggest first. Recovered
- transactions are always on this list. */
-
- /** Avoid false sharing */
- const char pad2[CACHE_LINE_SIZE];
- trx_ut_list_t mysql_trx_list; /*!< List of transactions created
- for MySQL. All user transactions are
- on mysql_trx_list. The rw_trx_list
- can contain system transactions and
- recovered transactions that will not
- be in the mysql_trx_list.
- mysql_trx_list may additionally contain
- transactions that have not yet been
- started in InnoDB. */
-
- trx_ids_t rw_trx_ids; /*!< Array of Read write transaction IDs
- for MVCC snapshot. A ReadView would take
- a snapshot of these transactions whose
- changes are not visible to it. We should
- remove transactions from the list before
- committing in memory and releasing locks
- to ensure right order of removal and
- consistent snapshot. */
-
- /** Avoid false sharing */
- const char pad3[CACHE_LINE_SIZE];
+ static void validate_element(trx_t *trx)
+ {
+ ut_ad(!trx->read_only || !trx->rsegs.m_redo.rseg);
+ ut_ad(!trx_is_autocommit_non_locking(trx));
+ mutex_enter(&trx->mutex);
+ ut_ad(trx_state_eq(trx, TRX_STATE_ACTIVE) ||
+ trx_state_eq(trx, TRX_STATE_PREPARED));
+ mutex_exit(&trx->mutex);
+ }
+
+
+ struct debug_iterator_arg
+ {
+ my_hash_walk_action action;
+ void *argument;
+ };
+
+
+ static my_bool debug_iterator(rw_trx_hash_element_t *element,
+ debug_iterator_arg *arg)
+ {
+ mutex_enter(&element->mutex);
+ if (element->trx)
+ validate_element(element->trx);
+ mutex_exit(&element->mutex);
+ return arg->action(element, arg->argument);
+ }
+#endif
+
+
+public:
+ void init()
+ {
+ lf_hash_init(&hash, sizeof(rw_trx_hash_element_t), LF_HASH_UNIQUE, 0,
+ sizeof(trx_id_t), 0, &my_charset_bin);
+ hash.alloc.constructor= rw_trx_hash_constructor;
+ hash.alloc.destructor= rw_trx_hash_destructor;
+ hash.initializer=
+ reinterpret_cast<lf_hash_initializer>(rw_trx_hash_initializer);
+ }
+
+
+ void destroy()
+ {
+ hash.alloc.destructor= rw_trx_hash_shutdown_destructor;
+ lf_hash_destroy(&hash);
+ }
+
+
+ /**
+ Releases LF_HASH pins.
+
+ Must be called by thread that owns trx_t object when the latter is being
+ "detached" from thread (e.g. released to the pool by trx_free()). Can be
+ called earlier if thread is expected not to use rw_trx_hash.
+
+ Since pins are not allowed to be transferred to another thread,
+ initialisation thread calls this for recovered transactions.
+ */
+
+ void put_pins(trx_t *trx)
+ {
+ if (trx->rw_trx_hash_pins)
+ {
+ lf_hash_put_pins(trx->rw_trx_hash_pins);
+ trx->rw_trx_hash_pins= 0;
+ }
+ }
+
+
+ /**
+ Finds trx object in lock-free hash with given id.
+
+ Only ACTIVE or PREPARED trx objects may participate in hash. Nevertheless
+ the transaction may get committed before this method returns.
+
+ With do_ref_count == false the caller may dereference returned trx pointer
+ only if lock_sys.mutex was acquired before calling find().
+
+ With do_ref_count == true caller may dereference trx even if it is not
+ holding lock_sys.mutex. Caller is responsible for calling
+ trx->release_reference() when it is done playing with trx.
+
+ Ideally this method should get caller rw_trx_hash_pins along with trx
+ object as a parameter, similar to insert() and erase(). However most
+ callers lose trx early in their call chains and it is not that easy to pass
+ them through.
+
+ So we take more expensive approach: get trx through current_thd()->ha_data.
+ Some threads don't have trx attached to THD, and at least server
+ initialisation thread, fts_optimize_thread, srv_master_thread,
+ dict_stats_thread, srv_monitor_thread, btr_defragment_thread don't even
+ have THD at all. For such cases we allocate pins only for duration of
+ search and free them immediately.
+
+ This has negative performance impact and should be fixed eventually (by
+ passing caller_trx as a parameter). Still stream of DML is more or less Ok.
+
+ @return
+ @retval 0 not found
+ @retval pointer to trx
+ */
+
+ trx_t *find(trx_t *caller_trx, trx_id_t trx_id, bool do_ref_count)
+ {
+ /*
+ In MariaDB 10.3, purge will reset DB_TRX_ID to 0
+ when the history is lost. Read/write transactions will
+ always have a nonzero trx_t::id; there the value 0 is
+ reserved for transactions that did not write or lock
+ anything yet.
+
+ The caller should already have handled trx_id==0 specially.
+ */
+ ut_ad(trx_id);
+ if (caller_trx && caller_trx->id == trx_id)
+ {
+ if (do_ref_count)
+ caller_trx->reference();
+ return caller_trx;
+ }
+
+ trx_t *trx= 0;
+ LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+ ut_a(pins);
+
+ rw_trx_hash_element_t *element= reinterpret_cast<rw_trx_hash_element_t*>
+ (lf_hash_search(&hash, pins, reinterpret_cast<const void*>(&trx_id),
+ sizeof(trx_id_t)));
+ if (element)
+ {
+ mutex_enter(&element->mutex);
+ lf_hash_search_unpin(pins);
+ trx= element->trx;
+ if (!trx);
+ else if (UNIV_UNLIKELY(trx_id != trx->id))
+ trx= NULL;
+ else {
+ if (do_ref_count)
+ trx->reference();
+ ut_d(validate_element(trx));
+ }
+ mutex_exit(&element->mutex);
+ }
+ if (!caller_trx)
+ lf_hash_put_pins(pins);
+ return trx;
+ }
+
+
+ /**
+ Inserts trx to lock-free hash.
+
+ Object becomes accessible via rw_trx_hash.
+ */
+
+ void insert(trx_t *trx)
+ {
+ ut_d(validate_element(trx));
+ int res= lf_hash_insert(&hash, get_pins(trx),
+ reinterpret_cast<void*>(trx));
+ ut_a(res == 0);
+ }
+
+
+ /**
+ Removes trx from lock-free hash.
+
+ Object becomes not accessible via rw_trx_hash. But it still can be pinned
+ by concurrent find(), which is supposed to release it immediately after
+ it sees object trx is 0.
+ */
+
+ void erase(trx_t *trx)
+ {
+ ut_d(validate_element(trx));
+ mutex_enter(&trx->rw_trx_hash_element->mutex);
+ trx->rw_trx_hash_element->trx= 0;
+ mutex_exit(&trx->rw_trx_hash_element->mutex);
+ int res= lf_hash_delete(&hash, get_pins(trx),
+ reinterpret_cast<const void*>(&trx->id),
+ sizeof(trx_id_t));
+ ut_a(res == 0);
+ }
+
+
+ /**
+ Returns the number of elements in the hash.
+
+ The number is exact only if hash is protected against concurrent
+ modifications (e.g. single threaded startup or hash is protected
+ by some mutex). Otherwise the number may be used as a hint only,
+ because it may change even before this method returns.
+ */
+
+ uint32_t size()
+ {
+ return uint32_t(my_atomic_load32_explicit(&hash.count,
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+
+ /**
+ Iterates the hash.
+
+ @param caller_trx used to get/set pins
+ @param action called for every element in hash
+ @param argument opque argument passed to action
+
+ May return the same element multiple times if hash is under contention.
+ If caller doesn't like to see the same transaction multiple times, it has
+ to call iterate_no_dups() instead.
+
+ May return element with committed transaction. If caller doesn't like to
+ see committed transactions, it has to skip those under element mutex:
+
+ mutex_enter(&element->mutex);
+ if (trx_t trx= element->trx)
+ {
+ // trx is protected against commit in this branch
+ }
+ mutex_exit(&element->mutex);
+
+ May miss concurrently inserted transactions.
+
+ @return
+ @retval 0 iteration completed successfully
+ @retval 1 iteration was interrupted (action returned 1)
+ */
+
+ int iterate(trx_t *caller_trx, my_hash_walk_action action, void *argument)
+ {
+ LF_PINS *pins= caller_trx ? get_pins(caller_trx) : lf_hash_get_pins(&hash);
+ ut_a(pins);
+#ifdef UNIV_DEBUG
+ debug_iterator_arg debug_arg= { action, argument };
+ action= reinterpret_cast<my_hash_walk_action>(debug_iterator);
+ argument= &debug_arg;
+#endif
+ int res= lf_hash_iterate(&hash, pins, action, argument);
+ if (!caller_trx)
+ lf_hash_put_pins(pins);
+ return res;
+ }
+
+
+ int iterate(my_hash_walk_action action, void *argument)
+ {
+ return iterate(current_trx(), action, argument);
+ }
+
+
+ /**
+ Iterates the hash and eliminates duplicate elements.
+
+ @sa iterate()
+ */
+
+ int iterate_no_dups(trx_t *caller_trx, my_hash_walk_action action,
+ void *argument)
+ {
+ eliminate_duplicates_arg arg(size() + 32, action, argument);
+ return iterate(caller_trx, reinterpret_cast<my_hash_walk_action>
+ (eliminate_duplicates), &arg);
+ }
+
+
+ int iterate_no_dups(my_hash_walk_action action, void *argument)
+ {
+ return iterate_no_dups(current_trx(), action, argument);
+ }
+};
+
+
+/** The transaction system central memory data structure. */
+class trx_sys_t
+{
+ /**
+ The smallest number not yet assigned as a transaction id or transaction
+ number. Accessed and updated with atomic operations.
+ */
+ MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_max_trx_id;
+
+
+ /**
+ Solves race conditions between register_rw() and snapshot_ids() as well as
+ race condition between assign_new_trx_no() and snapshot_ids().
+
+ @sa register_rw()
+ @sa assign_new_trx_no()
+ @sa snapshot_ids()
+ */
+ MY_ALIGNED(CACHE_LINE_SIZE) trx_id_t m_rw_trx_hash_version;
+
+
+ /**
+ TRX_RSEG_HISTORY list length (number of committed transactions to purge)
+ */
+ MY_ALIGNED(CACHE_LINE_SIZE) int32 rseg_history_len;
+
+ bool m_initialised;
+
+public:
+ /** Mutex protecting trx_list. */
+ MY_ALIGNED(CACHE_LINE_SIZE) mutable TrxSysMutex mutex;
+
+ /** List of all transactions. */
+ MY_ALIGNED(CACHE_LINE_SIZE) trx_ut_list_t trx_list;
+
+ MY_ALIGNED(CACHE_LINE_SIZE)
/** Temporary rollback segments */
trx_rseg_t* temp_rsegs[TRX_SYS_N_RSEGS];
- /** Avoid false sharing */
- const char pad4[CACHE_LINE_SIZE];
+ MY_ALIGNED(CACHE_LINE_SIZE)
trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS];
/*!< Pointer array to rollback
segments; NULL if slot not in use;
@@ -632,46 +834,378 @@ struct trx_sys_t {
single-threaded mode; not protected
by any mutex, because it is read-only
during multi-threaded operation */
- ulint rseg_history_len;
- /*!< Length of the TRX_RSEG_HISTORY
- list (update undo logs for committed
- transactions), protected by
- rseg->mutex */
-
- TrxIdSet rw_trx_set; /*!< Mapping from transaction id
- to transaction instance */
-
- ulint n_prepared_trx; /*!< Number of transactions currently
- in the XA PREPARED state */
-
- ulint n_prepared_recovered_trx; /*!< Number of transactions
- currently in XA PREPARED state that are
- also recovered. Such transactions cannot
- be added during runtime. They can only
- occur after recovery if mysqld crashed
- while there were XA PREPARED
- transactions. We disable query cache
- if such transactions exist. */
-};
-/** When a trx id which is zero modulo this number (which must be a power of
-two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system
-page is updated */
-#define TRX_SYS_TRX_ID_WRITE_MARGIN ((trx_id_t) 256)
+ /**
+ Lock-free hash of in memory read-write transactions.
+ Works faster when it is on it's own cache line (tested).
+ */
+
+ MY_ALIGNED(CACHE_LINE_SIZE) rw_trx_hash_t rw_trx_hash;
+
+
+#ifdef WITH_WSREP
+ /** Latest recovered XID during startup */
+ XID recovered_wsrep_xid;
+#endif
+ /** Latest recovered binlog offset */
+ uint64_t recovered_binlog_offset;
+ /** Latest recovred binlog file name */
+ char recovered_binlog_filename[TRX_SYS_MYSQL_LOG_NAME_LEN];
+
+
+ /**
+ Constructor.
+
+ Some members may require late initialisation, thus we just mark object as
+ uninitialised. Real initialisation happens in create().
+ */
+
+ trx_sys_t(): m_initialised(false) {}
+
+
+ /**
+ Returns the minimum trx id in rw trx list.
-/** Test if trx_sys->mutex is owned. */
-#define trx_sys_mutex_own() (trx_sys->mutex.is_owned())
+ This is the smallest id for which the trx can possibly be active. (But, you
+ must look at the trx->state to find out if the minimum trx id transaction
+ itself is active, or already committed.)
-/** Acquire the trx_sys->mutex. */
-#define trx_sys_mutex_enter() do { \
- mutex_enter(&trx_sys->mutex); \
-} while (0)
+ @return the minimum trx id, or m_max_trx_id if the trx list is empty
+ */
-/** Release the trx_sys->mutex. */
-#define trx_sys_mutex_exit() do { \
- trx_sys->mutex.exit(); \
-} while (0)
+ trx_id_t get_min_trx_id()
+ {
+ trx_id_t id= get_max_trx_id();
+ rw_trx_hash.iterate(reinterpret_cast<my_hash_walk_action>
+ (get_min_trx_id_callback), &id);
+ return id;
+ }
-#include "trx0sys.ic"
+
+ /**
+ Determines the maximum transaction id.
+
+ @return maximum currently allocated trx id; will be stale after the
+ next call to trx_sys.get_new_trx_id()
+ */
+
+ trx_id_t get_max_trx_id()
+ {
+ return static_cast<trx_id_t>
+ (my_atomic_load64_explicit(reinterpret_cast<int64*>(&m_max_trx_id),
+ MY_MEMORY_ORDER_RELAXED));
+ }
+
+
+ /**
+ Allocates a new transaction id.
+ @return new, allocated trx id
+ */
+
+ trx_id_t get_new_trx_id()
+ {
+ trx_id_t id= get_new_trx_id_no_refresh();
+ refresh_rw_trx_hash_version();
+ return id;
+ }
+
+
+ /**
+ Allocates and assigns new transaction serialisation number.
+
+ There's a gap between m_max_trx_id increment and transaction serialisation
+ number becoming visible through rw_trx_hash. While we're in this gap
+ concurrent thread may come and do MVCC snapshot without seeing allocated
+ but not yet assigned serialisation number. Then at some point purge thread
+ may clone this view. As a result it won't see newly allocated serialisation
+ number and may remove "unnecessary" history data of this transaction from
+ rollback segments.
+
+ m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+ to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+ means that all transaction serialisation numbers up to m_max_trx_id are
+ available through rw_trx_hash.
+
+ We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+ that m_rw_trx_hash_version increment happens after
+ trx->rw_trx_hash_element->no becomes visible through rw_trx_hash.
+
+ @param trx transaction
+ */
+ void assign_new_trx_no(trx_t *trx)
+ {
+ trx->no= get_new_trx_id_no_refresh();
+ my_atomic_store64_explicit(reinterpret_cast<int64*>
+ (&trx->rw_trx_hash_element->no),
+ trx->no, MY_MEMORY_ORDER_RELAXED);
+ refresh_rw_trx_hash_version();
+ }
+
+
+ /**
+ Takes MVCC snapshot.
+
+ To reduce malloc probablility we reserver rw_trx_hash.size() + 32 elements
+ in ids.
+
+ For details about get_rw_trx_hash_version() != get_max_trx_id() spin
+ @sa register_rw() and @sa assign_new_trx_no().
+
+ We rely on get_rw_trx_hash_version() to issue ACQUIRE memory barrier so
+ that loading of m_rw_trx_hash_version happens before accessing rw_trx_hash.
+
+ To optimise snapshot creation rw_trx_hash.iterate() is being used instead
+ of rw_trx_hash.iterate_no_dups(). It means that some transaction
+ identifiers may appear multiple times in ids.
+
+ @param[in,out] caller_trx used to get access to rw_trx_hash_pins
+ @param[out] ids array to store registered transaction identifiers
+ @param[out] max_trx_id variable to store m_max_trx_id value
+ @param[out] mix_trx_no variable to store min(trx->no) value
+ */
+
+ void snapshot_ids(trx_t *caller_trx, trx_ids_t *ids, trx_id_t *max_trx_id,
+ trx_id_t *min_trx_no)
+ {
+ ut_ad(!mutex_own(&mutex));
+ snapshot_ids_arg arg(ids);
+
+ while ((arg.m_id= get_rw_trx_hash_version()) != get_max_trx_id())
+ ut_delay(1);
+ arg.m_no= arg.m_id;
+
+ ids->clear();
+ ids->reserve(rw_trx_hash.size() + 32);
+ rw_trx_hash.iterate(caller_trx,
+ reinterpret_cast<my_hash_walk_action>(copy_one_id),
+ &arg);
+
+ *max_trx_id= arg.m_id;
+ *min_trx_no= arg.m_no;
+ }
+
+
+ /** Initialiser for m_max_trx_id and m_rw_trx_hash_version. */
+ void init_max_trx_id(trx_id_t value)
+ {
+ m_max_trx_id= m_rw_trx_hash_version= value;
+ }
+
+
+ bool is_initialised() { return m_initialised; }
+
+
+ /** Initialise the transaction subsystem. */
+ void create();
+
+ /** Close the transaction subsystem on shutdown. */
+ void close();
+
+ /** @return total number of active (non-prepared) transactions */
+ ulint any_active_transactions();
+
+
+ /**
+ Registers read-write transaction.
+
+ Transaction becomes visible to MVCC.
+
+ There's a gap between m_max_trx_id increment and transaction becoming
+ visible through rw_trx_hash. While we're in this gap concurrent thread may
+ come and do MVCC snapshot. As a result concurrent read view will be able to
+ observe records owned by this transaction even before it was committed.
+
+ m_rw_trx_hash_version is intended to solve this problem. MVCC snapshot has
+ to wait until m_max_trx_id == m_rw_trx_hash_version, which effectively
+ means that all transactions up to m_max_trx_id are available through
+ rw_trx_hash.
+
+ We rely on refresh_rw_trx_hash_version() to issue RELEASE memory barrier so
+ that m_rw_trx_hash_version increment happens after transaction becomes
+ visible through rw_trx_hash.
+ */
+
+ void register_rw(trx_t *trx)
+ {
+ trx->id= get_new_trx_id_no_refresh();
+ rw_trx_hash.insert(trx);
+ refresh_rw_trx_hash_version();
+ }
+
+
+ /**
+ Deregisters read-write transaction.
+
+ Transaction is removed from rw_trx_hash, which releases all implicit locks.
+ MVCC snapshot won't see this transaction anymore.
+ */
+
+ void deregister_rw(trx_t *trx)
+ {
+ rw_trx_hash.erase(trx);
+ }
+
+
+ bool is_registered(trx_t *caller_trx, trx_id_t id)
+ {
+ return id && find(caller_trx, id, false);
+ }
+
+
+ trx_t *find(trx_t *caller_trx, trx_id_t id, bool do_ref_count= true)
+ {
+ return rw_trx_hash.find(caller_trx, id, do_ref_count);
+ }
+
+
+ /**
+ Registers transaction in trx_sys.
+
+ @param trx transaction
+ */
+ void register_trx(trx_t *trx)
+ {
+ mutex_enter(&mutex);
+ UT_LIST_ADD_FIRST(trx_list, trx);
+ mutex_exit(&mutex);
+ }
+
+
+ /**
+ Deregisters transaction in trx_sys.
+
+ @param trx transaction
+ */
+ void deregister_trx(trx_t *trx)
+ {
+ mutex_enter(&mutex);
+ UT_LIST_REMOVE(trx_list, trx);
+ mutex_exit(&mutex);
+ }
+
+
+ /**
+ Clones the oldest view and stores it in view.
+
+ No need to call ReadView::close(). The caller owns the view that is passed
+ in. This function is called by purge thread to determine whether it should
+ purge the delete marked record or not.
+ */
+ void clone_oldest_view();
+
+
+ /** @return the number of active views */
+ size_t view_count() const
+ {
+ size_t count= 0;
+
+ mutex_enter(&mutex);
+ for (const trx_t *trx= UT_LIST_GET_FIRST(trx_list); trx;
+ trx= UT_LIST_GET_NEXT(trx_list, trx))
+ {
+ if (trx->read_view.get_state() == READ_VIEW_STATE_OPEN)
+ ++count;
+ }
+ mutex_exit(&mutex);
+ return count;
+ }
+
+ /** @return number of committed transactions waiting for purge */
+ ulint history_size() const
+ {
+ return uint32(my_atomic_load32(&const_cast<trx_sys_t*>(this)
+ ->rseg_history_len));
+ }
+ /** Add to the TRX_RSEG_HISTORY length (on database startup). */
+ void history_add(int32 len)
+ {
+ my_atomic_add32(&rseg_history_len, len);
+ }
+ /** Register a committed transaction. */
+ void history_insert() { history_add(1); }
+ /** Note that a committed transaction was purged. */
+ void history_remove() { history_add(-1); }
+
+private:
+ static my_bool get_min_trx_id_callback(rw_trx_hash_element_t *element,
+ trx_id_t *id)
+ {
+ if (element->id < *id)
+ {
+ mutex_enter(&element->mutex);
+ /* We don't care about read-only transactions here. */
+ if (element->trx && element->trx->rsegs.m_redo.rseg)
+ *id= element->id;
+ mutex_exit(&element->mutex);
+ }
+ return 0;
+ }
+
+
+ struct snapshot_ids_arg
+ {
+ snapshot_ids_arg(trx_ids_t *ids): m_ids(ids) {}
+ trx_ids_t *m_ids;
+ trx_id_t m_id;
+ trx_id_t m_no;
+ };
+
+
+ static my_bool copy_one_id(rw_trx_hash_element_t *element,
+ snapshot_ids_arg *arg)
+ {
+ if (element->id < arg->m_id)
+ {
+ trx_id_t no= static_cast<trx_id_t>(my_atomic_load64_explicit(
+ reinterpret_cast<int64*>(&element->no), MY_MEMORY_ORDER_RELAXED));
+ arg->m_ids->push_back(element->id);
+ if (no < arg->m_no)
+ arg->m_no= no;
+ }
+ return 0;
+ }
+
+
+ /** Getter for m_rw_trx_hash_version, must issue ACQUIRE memory barrier. */
+ trx_id_t get_rw_trx_hash_version()
+ {
+ return static_cast<trx_id_t>
+ (my_atomic_load64_explicit(reinterpret_cast<int64*>
+ (&m_rw_trx_hash_version),
+ MY_MEMORY_ORDER_ACQUIRE));
+ }
+
+
+ /** Increments m_rw_trx_hash_version, must issue RELEASE memory barrier. */
+ void refresh_rw_trx_hash_version()
+ {
+ my_atomic_add64_explicit(reinterpret_cast<int64*>(&m_rw_trx_hash_version),
+ 1, MY_MEMORY_ORDER_RELEASE);
+ }
+
+
+ /**
+ Allocates new transaction id without refreshing rw_trx_hash version.
+
+ This method is extracted for exclusive use by register_rw() and
+ assign_new_trx_no() where new id must be allocated atomically with
+ payload of these methods from MVCC snapshot point of view.
+
+ @sa get_new_trx_id()
+ @sa assign_new_trx_no()
+
+ @return new transaction id
+ */
+
+ trx_id_t get_new_trx_id_no_refresh()
+ {
+ return static_cast<trx_id_t>(my_atomic_add64_explicit(
+ reinterpret_cast<int64*>(&m_max_trx_id), 1, MY_MEMORY_ORDER_RELAXED));
+ }
+};
+
+
+/** The transaction system */
+extern trx_sys_t trx_sys;
#endif
diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic
deleted file mode 100644
index 861800ef40e..00000000000
--- a/storage/innobase/include/trx0sys.ic
+++ /dev/null
@@ -1,464 +0,0 @@
-/*****************************************************************************
-
-Copyright (c) 1996, 2015, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2018, MariaDB Corporation.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc.,
-51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
-
-*****************************************************************************/
-
-/**************************************************//**
-@file include/trx0sys.ic
-Transaction system
-
-Created 3/26/1996 Heikki Tuuri
-*******************************************************/
-
-#include "trx0trx.h"
-#include "data0type.h"
-#include "srv0srv.h"
-#include "mtr0log.h"
-
-/* The typedef for rseg slot in the file copy */
-typedef byte trx_sysf_rseg_t;
-
-/* Rollback segment specification slot offsets */
-/*-------------------------------------------------------------*/
-#define TRX_SYS_RSEG_SPACE 0 /* space where the segment
- header is placed; starting with
- MySQL/InnoDB 5.1.7, this is
- UNIV_UNDEFINED if the slot is unused */
-#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the segment
- header is placed; this is FIL_NULL
- if the slot is unused */
-/*-------------------------------------------------------------*/
-/* Size of a rollback segment specification slot */
-#define TRX_SYS_RSEG_SLOT_SIZE 8
-
-/*****************************************************************//**
-Writes the value of max_trx_id to the file based trx system header. */
-void
-trx_sys_flush_max_trx_id(void);
-/*==========================*/
-
-/** Checks if a page address is the trx sys header page.
-@param[in] page_id page id
-@return true if trx sys header page */
-UNIV_INLINE
-bool
-trx_sys_hdr_page(
- const page_id_t& page_id)
-{
- return(page_id.space() == TRX_SYS_SPACE
- && page_id.page_no() == TRX_SYS_PAGE_NO);
-}
-
-/**********************************************************************//**
-Gets a pointer to the transaction system header and x-latches its page.
-@return pointer to system header, page x-latched. */
-UNIV_INLINE
-trx_sysf_t*
-trx_sysf_get(
-/*=========*/
- mtr_t* mtr) /*!< in: mtr */
-{
- buf_block_t* block = NULL;
- trx_sysf_t* header = NULL;
-
- ut_ad(mtr);
-
- block = buf_page_get(page_id_t(TRX_SYS_SPACE, TRX_SYS_PAGE_NO),
- univ_page_size, RW_X_LATCH, mtr);
-
- if (block) {
- buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER);
-
- header = TRX_SYS + buf_block_get_frame(block);
- }
-
- return(header);
-}
-
-/*****************************************************************//**
-Gets the space of the nth rollback segment slot in the trx system
-file copy.
-@return space id */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Gets the page number of the nth rollback segment slot in the trx system
-header.
-@return page number, FIL_NULL if slot unused */
-UNIV_INLINE
-ulint
-trx_sysf_rseg_get_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx system header */
- ulint i, /*!< in: slot index == rseg id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr));
-}
-
-/*****************************************************************//**
-Sets the space id of the nth rollback segment slot in the trx system
-file copy. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_space(
-/*====================*/
- trx_sysf_t* sys_header, /*!< in: trx sys file copy */
- ulint i, /*!< in: slot index == rseg id */
- ulint space, /*!< in: space id */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_SPACE,
- space,
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Sets the page number of the nth rollback segment slot in the trx system
-header. */
-UNIV_INLINE
-void
-trx_sysf_rseg_set_page_no(
-/*======================*/
- trx_sysf_t* sys_header, /*!< in: trx sys header */
- ulint i, /*!< in: slot index == rseg id */
- ulint page_no, /*!< in: page number, FIL_NULL if the
- slot is reset to unused */
- mtr_t* mtr) /*!< in: mtr */
-{
- ut_ad(sys_header);
- ut_ad(i < TRX_SYS_N_RSEGS);
-
- mlog_write_ulint(sys_header + TRX_SYS_RSEGS
- + i * TRX_SYS_RSEG_SLOT_SIZE
- + TRX_SYS_RSEG_PAGE_NO,
- page_no,
- MLOG_4BYTES, mtr);
-}
-
-/*****************************************************************//**
-Writes a trx id to an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_write_... */
-UNIV_INLINE
-void
-trx_write_trx_id(
-/*=============*/
- byte* ptr, /*!< in: pointer to memory where written */
- trx_id_t id) /*!< in: id */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- ut_ad(id > 0);
- mach_write_to_6(ptr, id);
-}
-
-/*****************************************************************//**
-Reads a trx id from an index page. In case that the id size changes in
-some future version, this function should be used instead of
-mach_read_...
-@return id */
-UNIV_INLINE
-trx_id_t
-trx_read_trx_id(
-/*============*/
- const byte* ptr) /*!< in: pointer to memory from where to read */
-{
-#if DATA_TRX_ID_LEN != 6
-# error "DATA_TRX_ID_LEN != 6"
-#endif
- return(mach_read_from_6(ptr));
-}
-
-/****************************************************************//**
-Looks for the trx handle with the given id in rw_trx_list.
-The caller must be holding trx_sys->mutex.
-@return the trx handle or NULL if not found;
-the pointer must not be dereferenced unless lock_sys->mutex was
-acquired before calling this function and is still being held */
-UNIV_INLINE
-trx_t*
-trx_get_rw_trx_by_id(
-/*=================*/
- trx_id_t trx_id) /*!< in: trx id to search for */
-{
- ut_ad(trx_id > 0);
- ut_ad(trx_sys_mutex_own());
-
- if (trx_sys->rw_trx_set.empty()) {
- return(NULL);
- }
-
- TrxIdSet::iterator it;
-
- it = trx_sys->rw_trx_set.find(TrxTrack(trx_id));
-
- return(it == trx_sys->rw_trx_set.end() ? NULL : it->m_trx);
-}
-
-/****************************************************************//**
-Returns the minimum trx id in trx list. This is the smallest id for which
-the trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.). The caller must be holding the trx_sys_t::mutex in shared mode.
-@return the minimum trx id, or trx_sys->max_trx_id if the trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id_low(void)
-/*=======================*/
-{
- trx_id_t id;
-
- ut_ad(trx_sys_mutex_own());
-
- const trx_t* trx = UT_LIST_GET_LAST(trx_sys->rw_trx_list);
-
- if (trx == NULL) {
- id = trx_sys->max_trx_id;
- } else {
- assert_trx_in_rw_list(trx);
- id = trx->id;
- }
-
- return(id);
-}
-
-#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG
-/***********************************************************//**
-Assert that a transaction has been recovered.
-@return TRUE */
-UNIV_INLINE
-ibool
-trx_assert_recovered(
-/*=================*/
- trx_id_t trx_id) /*!< in: transaction identifier */
-{
- const trx_t* trx;
-
- trx_sys_mutex_enter();
-
- trx = trx_get_rw_trx_by_id(trx_id);
- ut_a(trx->is_recovered);
-
- trx_sys_mutex_exit();
-
- return(TRUE);
-}
-#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */
-
-/****************************************************************//**
-Returns the minimum trx id in rw trx list. This is the smallest id for which
-the rw trx can possibly be active. (But, you must look at the trx->state
-to find out if the minimum trx id transaction itself is active, or already
-committed.)
-@return the minimum trx id, or trx_sys->max_trx_id if rw trx list is empty */
-UNIV_INLINE
-trx_id_t
-trx_rw_min_trx_id(void)
-/*===================*/
-{
- trx_sys_mutex_enter();
-
- trx_id_t id = trx_rw_min_trx_id_low();
-
- trx_sys_mutex_exit();
-
- return(id);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been committed.
-@return transaction instance if active, or NULL */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active_low(
-/*=================*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt) /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
-{
- trx_t* trx;
-
- ut_ad(trx_sys_mutex_own());
-
- if (trx_id < trx_rw_min_trx_id_low()) {
-
- trx = NULL;
- } else if (trx_id >= trx_sys->max_trx_id) {
-
- /* There must be corruption: we let the caller handle the
- diagnostic prints in this case. */
-
- trx = NULL;
- if (corrupt != NULL) {
- *corrupt = TRUE;
- }
- } else {
- trx = trx_get_rw_trx_by_id(trx_id);
-
- if (trx != NULL
- && trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
-
- trx = NULL;
- }
- }
-
- return(trx);
-}
-
-/****************************************************************//**
-Checks if a rw transaction with the given id is active. If the caller is
-not holding lock_sys->mutex, the transaction may already have been
-committed.
-@return transaction instance if active, or NULL; */
-UNIV_INLINE
-trx_t*
-trx_rw_is_active(
-/*=============*/
- trx_id_t trx_id, /*!< in: trx id of the transaction */
- ibool* corrupt, /*!< in: NULL or pointer to a flag
- that will be set if corrupt */
- bool do_ref_count) /*!< in: if true then increment the
- trx_t::n_ref_count */
-{
- ut_ad(trx_id);
-
- trx_sys_mutex_enter();
-
- trx_t* trx = trx_rw_is_active_low(trx_id, corrupt);
-
- if (trx) {
- trx = trx_reference(do_ref_count ? trx_id : 0, trx);
- }
-
- trx_sys_mutex_exit();
-
- return(trx);
-}
-
-/*****************************************************************//**
-Allocates a new transaction id.
-@return new, allocated trx id */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_new_trx_id()
-/*====================*/
-{
- /* wsrep_fake_trx_id violates this assert */
- ut_ad(trx_sys_mutex_own());
-
- /* VERY important: after the database is started, max_trx_id value is
- divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if
- will evaluate to TRUE when this function is first time called,
- and the value for trx id will be written to disk-based header!
- Thus trx id values will not overlap when the database is
- repeatedly started! */
-
- if (!(trx_sys->max_trx_id % TRX_SYS_TRX_ID_WRITE_MARGIN)) {
-
- trx_sys_flush_max_trx_id();
- }
-
- return(trx_sys->max_trx_id++);
-}
-
-/*****************************************************************//**
-Determines the maximum transaction id.
-@return maximum currently allocated trx id; will be stale after the
-next call to trx_sys_get_new_trx_id() */
-UNIV_INLINE
-trx_id_t
-trx_sys_get_max_trx_id(void)
-/*========================*/
-{
- ut_ad(!trx_sys_mutex_own());
-
-#if UNIV_WORD_SIZE < DATA_TRX_ID_LEN
- /* Avoid torn reads. */
-
- trx_sys_mutex_enter();
-
- trx_id_t max_trx_id = trx_sys->max_trx_id;
-
- trx_sys_mutex_exit();
-
- return(max_trx_id);
-#else
- /* Perform a dirty read. Callers should be prepared for stale
- values, and we know that the value fits in a machine word, so
- that it will be read and written atomically. */
- return(trx_sys->max_trx_id);
-#endif /* UNIV_WORD_SIZE < DATA_TRX_ID_LEN */
-}
-
-/*****************************************************************//**
-Get the number of transaction in the system, independent of their state.
-@return count of transactions in trx_sys_t::rw_trx_list */
-UNIV_INLINE
-ulint
-trx_sys_get_n_rw_trx(void)
-/*======================*/
-{
- ulint n_trx;
-
- trx_sys_mutex_enter();
-
- n_trx = UT_LIST_GET_LEN(trx_sys->rw_trx_list);
-
- trx_sys_mutex_exit();
-
- return(n_trx);
-}
-
-/**
-Add the transaction to the RW transaction set
-@param trx transaction instance to add */
-UNIV_INLINE
-void
-trx_sys_rw_trx_add(trx_t* trx)
-{
- ut_ad(trx->id != 0);
-
- trx_sys->rw_trx_set.insert(TrxTrack(trx->id, trx));
- ut_d(trx->in_rw_trx_list = true);
-}
diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
index 917222477b1..d6a8b8c771b 100644
--- a/storage/innobase/include/trx0trx.h
+++ b/storage/innobase/include/trx0trx.h
@@ -41,16 +41,16 @@ Created 3/26/1996 Heikki Tuuri
#include "trx0xa.h"
#include "ut0vec.h"
#include "fts0fts.h"
+#include "read0types.h"
// Forward declaration
struct mtr_t;
// Forward declaration
-class ReadView;
-
-// Forward declaration
class FlushObserver;
+struct rw_trx_hash_element_t;
+
/** Set flush observer for the transaction
@param[in/out] trx transaction struct
@param[in] observer flush observer */
@@ -82,45 +82,19 @@ const dict_index_t*
trx_get_error_info(
/*===============*/
const trx_t* trx); /*!< in: trx object */
-/********************************************************************//**
-Creates a transaction object for MySQL.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_mysql(void);
-/*========================*/
-/********************************************************************//**
-Creates a transaction object for background operations by the master thread.
-@return own: transaction object */
-trx_t*
-trx_allocate_for_background(void);
-/*=============================*/
-
-/** Frees and initialize a transaction object instantinated during recovery.
-@param trx trx object to free and initialize during recovery */
-void
-trx_free_resurrected(trx_t* trx);
-
-/** Free a transaction that was allocated by background or user threads.
-@param trx trx object to free */
-void
-trx_free_for_background(trx_t* trx);
-/********************************************************************//**
-At shutdown, frees a transaction object that is in the PREPARED state. */
-void
-trx_free_prepared(
-/*==============*/
- trx_t* trx); /*!< in, own: trx object */
+/** @return a trx_t instance from trx_pools. */
+trx_t *trx_create();
-/** Free a transaction object for MySQL.
-@param[in,out] trx transaction */
-void
-trx_free_for_mysql(trx_t* trx);
+/**
+ Release a trx_t instance back to the pool.
+ @param trx the instance to release.
+*/
+void trx_free(trx_t*& trx);
-/** Disconnect a transaction from MySQL.
-@param[in,out] trx transaction */
+/** At shutdown, frees a transaction object. */
void
-trx_disconnect_plain(trx_t* trx);
+trx_free_at_shutdown(trx_t *trx);
/** Disconnect a prepared transaction from MySQL.
@param[in,out] trx transaction */
@@ -229,22 +203,10 @@ trx_commit(
/*=======*/
trx_t* trx); /*!< in/out: transaction */
-/****************************************************************//**
-Commits a transaction and a mini-transaction. */
-void
-trx_commit_low(
-/*===========*/
- trx_t* trx, /*!< in/out: transaction */
- mtr_t* mtr); /*!< in/out: mini-transaction (will be committed),
- or NULL if trx made no modifications */
-/****************************************************************//**
-Cleans up a transaction at database startup. The cleanup is needed if
-the transaction already got to the middle of a commit when the database
-crashed, and we cannot roll it back. */
-void
-trx_cleanup_at_db_startup(
-/*======================*/
- trx_t* trx); /*!< in: transaction */
+/** Commit a transaction and a mini-transaction.
+@param[in,out] trx transaction
+@param[in,out] mtr mini-transaction (NULL if no modifications) */
+void trx_commit_low(trx_t* trx, mtr_t* mtr);
/**********************************************************************//**
Does the transaction commit for MySQL.
@return DB_SUCCESS or error number */
@@ -263,13 +225,13 @@ int
trx_recover_for_mysql(
/*==================*/
XID* xid_list, /*!< in/out: prepared transactions */
- ulint len); /*!< in: number of slots in xid_list */
+ uint len); /*!< in: number of slots in xid_list */
/*******************************************************************//**
This function is used to find one X/Open XA distributed transaction
which is in the prepared state
@return trx or NULL; on match, the trx->xid will be invalidated;
note that the trx may have been committed, unless the caller is
-holding lock_sys->mutex */
+holding lock_sys.mutex */
trx_t *
trx_get_trx_by_xid(
/*===============*/
@@ -287,31 +249,6 @@ void
trx_mark_sql_stat_end(
/*==================*/
trx_t* trx); /*!< in: trx handle */
-/********************************************************************//**
-Assigns a read view for a consistent read query. All the consistent reads
-within the same transaction will get the same read view, which is created
-when this function is first called for a new started transaction. */
-ReadView*
-trx_assign_read_view(
-/*=================*/
- trx_t* trx); /*!< in: active transaction */
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
-/*==============*/
- trx_t* trx);
-
-/****************************************************************//**
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
-/*==============*/
- const trx_t* trx);
-
/****************************************************************//**
Prepares a transaction for commit/rollback. */
void
@@ -335,7 +272,7 @@ trx_commit_step(
/**********************************************************************//**
Prints info about a transaction.
-Caller must hold trx_sys->mutex. */
+Caller must hold trx_sys.mutex. */
void
trx_print_low(
/*==========*/
@@ -355,7 +292,7 @@ trx_print_low(
/**********************************************************************//**
Prints info about a transaction.
-The caller must hold lock_sys->mutex and trx_sys->mutex.
+The caller must hold lock_sys.mutex and trx_sys.mutex.
When possible, use trx_print() instead. */
void
trx_print_latched(
@@ -365,25 +302,9 @@ trx_print_latched(
ulint max_query_len); /*!< in: max query length to print,
or 0 to use the default max length */
-#ifdef WITH_WSREP
-/**********************************************************************//**
-Prints info about a transaction.
-Transaction information may be retrieved without having trx_sys->mutex acquired
-so it may not be completely accurate. The caller must own lock_sys->mutex
-and the trx must have some locks to make sure that it does not escape
-without locking lock_sys->mutex. */
-UNIV_INTERN
-void
-wsrep_trx_print_locking(
- FILE* f, /*!< in: output stream */
- const trx_t* trx, /*!< in: transaction */
- ulint max_query_len) /*!< in: max query length to print,
- or 0 to use the default max length */
- MY_ATTRIBUTE((nonnull));
-#endif /* WITH_WSREP */
/**********************************************************************//**
Prints info about a transaction.
-Acquires and releases lock_sys->mutex and trx_sys->mutex. */
+Acquires and releases lock_sys.mutex. */
void
trx_print(
/*======*/
@@ -413,9 +334,9 @@ trx_set_dict_operation(
/**********************************************************************//**
Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */
UNIV_INLINE
bool
@@ -431,22 +352,11 @@ trx_state_eq(
trx->state == TRX_STATE_NOT_STARTED
after an error has been reported */
MY_ATTRIBUTE((nonnull, warn_unused_result));
-# ifdef UNIV_DEBUG
-/**********************************************************************//**
-Asserts that a transaction has been started.
-The caller must hold trx_sys->mutex.
-@return TRUE if started */
-ibool
-trx_assert_started(
-/*===============*/
- const trx_t* trx) /*!< in: transaction */
- MY_ATTRIBUTE((warn_unused_result));
-# endif /* UNIV_DEBUG */
/**********************************************************************//**
Determines if the currently running transaction has been interrupted.
-@return TRUE if interrupted */
-ibool
+@return true if interrupted */
+bool
trx_is_interrupted(
/*===============*/
const trx_t* trx); /*!< in: transaction */
@@ -519,18 +429,6 @@ trx_set_rw_mode(
trx_t* trx);
/**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
- trx_t* trx);
-
-/**
-Check if the transaction is being referenced. */
-#define trx_is_referenced(t) ((t)->n_ref > 0)
-
-/**
Transactions that aren't started by the MySQL server don't set
the trx_t::mysql_thd field. For such transactions we set the lock
wait timeout to 0 instead of the user configured value that comes
@@ -559,15 +457,6 @@ with an explicit check for the read-only status.
((t)->read_only && trx_is_autocommit_non_locking((t)))
/**
-Assert that the transaction is in the trx_sys_t::rw_trx_list */
-#define assert_trx_in_rw_list(t) do { \
- ut_ad(!(t)->read_only); \
- ut_ad((t)->in_rw_trx_list \
- == !((t)->read_only || !(t)->rsegs.m_redo.rseg)); \
- check_trx_state(t); \
-} while (0)
-
-/**
Check transaction state */
#define check_trx_state(t) do { \
ut_ad(!trx_is_autocommit_non_locking((t))); \
@@ -589,8 +478,8 @@ Check transaction state */
ut_ad(trx_state_eq((t), TRX_STATE_NOT_STARTED)); \
ut_ad(!(t)->id); \
ut_ad(!(t)->has_logged()); \
- ut_ad(!(t)->n_ref); \
- ut_ad(!MVCC::is_view_active((t)->read_view)); \
+ ut_ad(!(t)->is_referenced()); \
+ ut_ad(!(t)->read_view.is_open()); \
ut_ad((t)->lock.wait_thr == NULL); \
ut_ad(UT_LIST_GET_LEN((t)->lock.trx_locks) == 0); \
ut_ad((t)->dict_operation == TRX_DICT_OP_NONE); \
@@ -607,16 +496,15 @@ transaction pool.
#ifdef UNIV_DEBUG
/*******************************************************************//**
Assert that an autocommit non-locking select cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
# define assert_trx_nonlocking_or_in_list(t) \
do { \
if (trx_is_autocommit_non_locking(t)) { \
trx_state_t t_state = (t)->state; \
ut_ad((t)->read_only); \
ut_ad(!(t)->is_recovered); \
- ut_ad(!(t)->in_rw_trx_list); \
- ut_ad((t)->in_mysql_trx_list); \
+ ut_ad((t)->mysql_thd); \
ut_ad(t_state == TRX_STATE_NOT_STARTED \
|| t_state == TRX_STATE_ACTIVE); \
} else { \
@@ -626,8 +514,8 @@ The tranasction must be in the mysql_trx_list. */
#else /* UNIV_DEBUG */
/*******************************************************************//**
Assert that an autocommit non-locking slect cannot be in the
-rw_trx_list and that it is a read-only transaction.
-The tranasction must be in the mysql_trx_list. */
+rw_trx_hash and that it is a read-only transaction.
+The transaction must have mysql_thd assigned. */
# define assert_trx_nonlocking_or_in_list(trx) ((void)0)
#endif /* UNIV_DEBUG */
@@ -654,7 +542,7 @@ To query the state either of the mutexes is sufficient within the locking
code and no mutex is required when the query thread is no longer waiting. */
/** The locks and state of an active transaction. Protected by
-lock_sys->mutex, trx->mutex or both. */
+lock_sys.mutex, trx->mutex or both. */
struct trx_lock_t {
ulint n_active_thrs; /*!< number of active query threads */
@@ -666,10 +554,10 @@ struct trx_lock_t {
TRX_QUE_LOCK_WAIT, this points to
the lock request, otherwise this is
NULL; set to non-NULL when holding
- both trx->mutex and lock_sys->mutex;
+ both trx->mutex and lock_sys.mutex;
set to NULL when holding
- lock_sys->mutex; readers should
- hold lock_sys->mutex, except when
+ lock_sys.mutex; readers should
+ hold lock_sys.mutex, except when
they are holding trx->mutex and
wait_lock==NULL */
ib_uint64_t deadlock_mark; /*!< A mark field that is initialized
@@ -683,13 +571,13 @@ struct trx_lock_t {
resolution, it sets this to true.
Protected by trx->mutex. */
time_t wait_started; /*!< lock wait started at this time,
- protected only by lock_sys->mutex */
+ protected only by lock_sys.mutex */
que_thr_t* wait_thr; /*!< query thread belonging to this
trx that is in QUE_THR_LOCK_WAIT
state. For threads suspended in a
lock wait, this is protected by
- lock_sys->mutex. Otherwise, this may
+ lock_sys.mutex. Otherwise, this may
only be modified by the thread that is
serving the running transaction. */
@@ -708,12 +596,12 @@ struct trx_lock_t {
unsigned table_cached;
mem_heap_t* lock_heap; /*!< memory heap for trx_locks;
- protected by lock_sys->mutex */
+ protected by lock_sys.mutex */
trx_lock_list_t trx_locks; /*!< locks requested by the transaction;
insertions are protected by trx->mutex
- and lock_sys->mutex; removals are
- protected by lock_sys->mutex */
+ and lock_sys.mutex; removals are
+ protected by lock_sys.mutex */
lock_list table_locks; /*!< All table locks requested by this
transaction, including AUTOINC locks */
@@ -732,14 +620,73 @@ struct trx_lock_t {
ulint n_rec_locks; /*!< number of rec locks in this trx */
};
-/** Type used to store the list of tables that are modified by a given
-transaction. We store pointers to the table objects in memory because
+/** Logical first modification time of a table in a transaction */
+class trx_mod_table_time_t
+{
+ /** First modification of the table */
+ undo_no_t first;
+ /** First modification of a system versioned column */
+ undo_no_t first_versioned;
+
+ /** Magic value signifying that a system versioned column of a
+ table was never modified in a transaction. */
+ static const undo_no_t UNVERSIONED = IB_ID_MAX;
+
+public:
+ /** Constructor
+ @param[in] rows number of modified rows so far */
+ trx_mod_table_time_t(undo_no_t rows)
+ : first(rows), first_versioned(UNVERSIONED) {}
+
+#ifdef UNIV_DEBUG
+ /** Validation
+ @param[in] rows number of modified rows so far
+ @return whether the object is valid */
+ bool valid(undo_no_t rows = UNVERSIONED) const
+ {
+ return first <= first_versioned && first <= rows;
+ }
+#endif /* UNIV_DEBUG */
+ /** @return if versioned columns were modified */
+ bool is_versioned() const { return first_versioned != UNVERSIONED; }
+
+ /** After writing an undo log record, set is_versioned() if needed
+ @param[in] rows number of modified rows so far */
+ void set_versioned(undo_no_t rows)
+ {
+ ut_ad(!is_versioned());
+ first_versioned = rows;
+ ut_ad(valid());
+ }
+
+ /** Invoked after partial rollback
+ @param[in] limit number of surviving modified rows
+ @return whether this should be erased from trx_t::mod_tables */
+ bool rollback(undo_no_t limit)
+ {
+ ut_ad(valid());
+ if (first >= limit) {
+ return true;
+ }
+
+ if (first_versioned < limit && is_versioned()) {
+ first_versioned = UNVERSIONED;
+ }
+
+ return false;
+ }
+};
+
+/** Collection of persistent tables and their first modification
+in a transaction.
+We store pointers to the table objects in memory because
we know that a table object will not be destroyed while a transaction
that modified it is running. */
-typedef std::set<
- dict_table_t*,
+typedef std::map<
+ dict_table_t*, trx_mod_table_time_t,
std::less<dict_table_t*>,
- ut_allocator<dict_table_t*> > trx_mod_tables_t;
+ ut_allocator<std::pair<dict_table_t* const, trx_mod_table_time_t> > >
+ trx_mod_tables_t;
/** The transaction handle
@@ -769,30 +716,31 @@ so without holding any mutex. The following are exceptions to this:
* trx_rollback_resurrected() may access resurrected (connectionless)
transactions while the system is already processing new user
-transactions. The trx_sys->mutex prevents a race condition between it
+transactions. The trx_sys.mutex prevents a race condition between it
and lock_trx_release_locks() [invoked by trx_commit()].
* trx_print_low() may access transactions not associated with the current
-thread. The caller must be holding trx_sys->mutex and lock_sys->mutex.
+thread. The caller must be holding lock_sys.mutex.
-* When a transaction handle is in the trx_sys->mysql_trx_list or
-trx_sys->trx_list, some of its fields must not be modified without
-holding trx_sys->mutex exclusively.
+* When a transaction handle is in the trx_sys.trx_list, some of its fields
+must not be modified without holding trx->mutex.
* The locking code (in particular, lock_deadlock_recursive() and
lock_rec_convert_impl_to_expl()) will access transactions associated
to other connections. The locks of transactions are protected by
-lock_sys->mutex and sometimes by trx->mutex. */
+lock_sys.mutex and sometimes by trx->mutex. */
/** Represents an instance of rollback segment along with its state variables.*/
struct trx_undo_ptr_t {
trx_rseg_t* rseg; /*!< rollback segment assigned to the
transaction, or NULL if not assigned
yet */
- trx_undo_t* insert_undo; /*!< pointer to the insert undo log, or
- NULL if no inserts performed yet */
- trx_undo_t* update_undo; /*!< pointer to the update undo log, or
- NULL if no update performed yet */
+ trx_undo_t* undo; /*!< pointer to the undo log, or
+ NULL if nothing logged yet */
+ trx_undo_t* old_insert; /*!< pointer to recovered
+ insert undo log, or NULL if no
+ INSERT transactions were
+ recovered from old-format undo logs */
};
/** An instance of temporary rollback segment. */
@@ -816,10 +764,23 @@ struct trx_rsegs_t {
};
struct trx_t {
+private:
+ /**
+ Count of references.
+
+ We can't release the locks nor commit the transaction until this reference
+ is 0. We can change the state to TRX_STATE_COMMITTED_IN_MEMORY to signify
+ that it is no longer "active".
+ */
+
+ int32_t n_ref;
+
+
+public:
TrxMutex mutex; /*!< Mutex protecting the fields
state and lock (except some fields
of lock, which are protected by
- lock_sys->mutex) */
+ lock_sys.mutex) */
trx_id_t id; /*!< transaction id */
@@ -828,7 +789,7 @@ struct trx_t {
transaction is moved to
COMMITTED_IN_MEMORY state.
Protected by trx_sys_t::mutex
- when trx->in_rw_trx_list. Initially
+ when trx is in rw_trx_hash. Initially
set to TRX_ID_MAX. */
/** State of the trx from the point of view of concurrency control
@@ -855,6 +816,9 @@ struct trx_t {
Recovered XA:
* NOT_STARTED -> PREPARED -> COMMITTED -> (freed)
+ Recovered XA followed by XA ROLLBACK:
+ * NOT_STARTED -> PREPARED -> ACTIVE -> COMMITTED -> (freed)
+
XA (2PC) (shutdown or disconnect before ROLLBACK or COMMIT):
* NOT_STARTED -> PREPARED -> (freed)
@@ -865,11 +829,11 @@ struct trx_t {
XA (2PC) transactions are always treated as non-autocommit.
- Transitions to ACTIVE or NOT_STARTED occur when
- !in_rw_trx_list (no trx_sys->mutex needed).
+ Transitions to ACTIVE or NOT_STARTED occur when transaction
+ is not in rw_trx_hash (no trx_sys.mutex needed).
Autocommit non-locking read-only transactions move between states
- without holding any mutex. They are !in_rw_trx_list.
+ without holding any mutex. They are not in rw_trx_hash.
All transactions, unless they are determined to be ac-nl-ro,
explicitly tagged as read-only or read-write, will first be put
@@ -878,16 +842,16 @@ struct trx_t {
do we remove it from the read-only list and put it on the read-write
list. During this switch we assign it a rollback segment.
- When a transaction is NOT_STARTED, it can be in_mysql_trx_list if
- it is a user transaction. It cannot be in rw_trx_list.
+ When a transaction is NOT_STARTED, it can be in trx_list. It cannot be
+ in rw_trx_hash.
- ACTIVE->PREPARED->COMMITTED is only possible when trx->in_rw_trx_list.
- The transition ACTIVE->PREPARED is protected by trx_sys->mutex.
+ ACTIVE->PREPARED->COMMITTED is only possible when trx is in rw_trx_hash.
+ The transition ACTIVE->PREPARED is protected by trx_sys.mutex.
ACTIVE->COMMITTED is possible when the transaction is in
- rw_trx_list.
+ rw_trx_hash.
- Transitions to COMMITTED are protected by both lock_sys->mutex
+ Transitions to COMMITTED are protected by both lock_sys.mutex
and trx->mutex.
NOTE: Some of these state change constraints are an overkill,
@@ -896,25 +860,16 @@ struct trx_t {
trx_state_t state;
- ReadView* read_view; /*!< consistent read view used in the
+ ReadView read_view; /*!< consistent read view used in the
transaction, or NULL if not yet set */
-
- UT_LIST_NODE_T(trx_t)
- trx_list; /*!< list of transactions;
- protected by trx_sys->mutex. */
- UT_LIST_NODE_T(trx_t)
- no_list; /*!< Required during view creation
- to check for the view limit for
- transactions that are committing */
-
trx_lock_t lock; /*!< Information about the transaction
locks and state. Protected by
- trx->mutex or lock_sys->mutex
+ trx->mutex or lock_sys.mutex
or both */
bool is_recovered; /*!< 0=normal transaction,
1=recovered, must be rolled back,
- protected by trx_sys->mutex when
- trx->in_rw_trx_list holds */
+ protected by trx_sys.mutex when
+ trx is in rw_trx_hash */
/* These fields are not protected by any mutex. */
@@ -993,7 +948,7 @@ struct trx_t {
contains a pointer to the latest file
name; this is NULL if binlog is not
used */
- int64_t mysql_log_offset;
+ ulonglong mysql_log_offset;
/*!< if MySQL binlog is used, this
field contains the end offset of the
binlog entry */
@@ -1006,21 +961,8 @@ struct trx_t {
statement uses, except those
in consistent read */
/*------------------------------*/
-#ifdef UNIV_DEBUG
- /** The following two fields are mutually exclusive. */
- /* @{ */
-
- bool in_rw_trx_list; /*!< true if in trx_sys->rw_trx_list */
- /* @} */
-#endif /* UNIV_DEBUG */
- UT_LIST_NODE_T(trx_t)
- mysql_trx_list; /*!< list of transactions created for
- MySQL; protected by trx_sys->mutex */
-#ifdef UNIV_DEBUG
- bool in_mysql_trx_list;
- /*!< true if in
- trx_sys->mysql_trx_list */
-#endif /* UNIV_DEBUG */
+ UT_LIST_NODE_T(trx_t) trx_list; /*!< list of all transactions;
+ protected by trx_sys.mutex */
/*------------------------------*/
dberr_t error_state; /*!< 0 if no error, otherwise error
number; NOTE That ONLY the thread
@@ -1044,12 +986,6 @@ struct trx_t {
trx_savepoints; /*!< savepoints set with SAVEPOINT ...,
oldest first */
/*------------------------------*/
- UndoMutex undo_mutex; /*!< mutex protecting the fields in this
- section (down to undo_no_arr), EXCEPT
- last_sql_stat_start, which can be
- accessed only when we know that there
- cannot be any activity in the undo
- logs! */
undo_no_t undo_no; /*!< next undo log record number to
assign; since the undo log is
private for a transaction, this
@@ -1057,21 +993,15 @@ struct trx_t {
with no gaps; thus it represents
the number of modified/inserted
rows in a transaction */
- ulint undo_rseg_space;
- /*!< space id where last undo record
- was written */
trx_savept_t last_sql_stat_start;
/*!< undo_no when the last sql statement
was started: in case of an error, trx
- is rolled back down to this undo
- number; see note at undo_mutex! */
+ is rolled back down to this number */
trx_rsegs_t rsegs; /* rollback segments for undo logging */
undo_no_t roll_limit; /*!< least undo number to undo during
a partial rollback; 0 otherwise */
-#ifdef UNIV_DEBUG
bool in_rollback; /*!< true when the transaction is
executing a partial or full rollback */
-#endif /* UNIV_DEBUG */
ulint pages_undone; /*!< number of undo log pages undone
since the last undo log truncation */
/*------------------------------*/
@@ -1083,7 +1013,7 @@ struct trx_t {
also in the lock list trx_locks. This
vector needs to be freed explicitly
when the trx instance is destroyed.
- Protected by lock_sys->mutex. */
+ Protected by lock_sys.mutex. */
/*------------------------------*/
bool read_only; /*!< true if transaction is flagged
as a READ-ONLY transaction.
@@ -1120,14 +1050,6 @@ struct trx_t {
const char* start_file; /*!< Filename where it was started */
#endif /* UNIV_DEBUG */
- lint n_ref; /*!< Count of references, protected
- by trx_t::mutex. We can't release the
- locks nor commit the transaction until
- this reference is 0. We can change
- the state to COMMITTED_IN_MEMORY to
- signify that it is no longer
- "active". */
-
XID* xid; /*!< X/Open XA transaction
identification to identify a
transaction branch */
@@ -1156,12 +1078,14 @@ struct trx_t {
os_event_t wsrep_event; /* event waited for in srv_conc_slot */
#endif /* WITH_WSREP */
+ rw_trx_hash_element_t *rw_trx_hash_element;
+ LF_PINS *rw_trx_hash_pins;
ulint magic_n;
/** @return whether any persistent undo log has been generated */
bool has_logged_persistent() const
{
- return(rsegs.m_redo.insert_undo || rsegs.m_redo.update_undo);
+ return(rsegs.m_redo.undo);
}
/** @return whether any undo log has been generated */
@@ -1170,6 +1094,13 @@ struct trx_t {
return(has_logged_persistent() || rsegs.m_noredo.undo);
}
+ /** @return whether any undo log has been generated or
+ recovered */
+ bool has_logged_or_recovered() const
+ {
+ return(has_logged() || rsegs.m_redo.old_insert);
+ }
+
/** @return rollback segment for modifying temporary tables */
trx_rseg_t* get_temp_rseg()
{
@@ -1181,6 +1112,33 @@ struct trx_t {
return(assign_temp_rseg());
}
+
+ bool is_referenced()
+ {
+ return my_atomic_load32_explicit(&n_ref, MY_MEMORY_ORDER_RELAXED) > 0;
+ }
+
+
+ void reference()
+ {
+#ifdef UNIV_DEBUG
+ int32_t old_n_ref=
+#endif
+ my_atomic_add32_explicit(&n_ref, 1, MY_MEMORY_ORDER_RELAXED);
+ ut_ad(old_n_ref >= 0);
+ }
+
+
+ void release_reference()
+ {
+#ifdef UNIV_DEBUG
+ int32_t old_n_ref=
+#endif
+ my_atomic_add32_explicit(&n_ref, -1, MY_MEMORY_ORDER_RELAXED);
+ ut_ad(old_n_ref > 0);
+ }
+
+
private:
/** Assign a rollback segment for modifying temporary tables.
@return the assigned rollback segment */
@@ -1266,32 +1224,6 @@ struct commit_node_t{
mutex_exit(&t->mutex); \
} while (0)
-/**
-Increase the reference count. If the transaction is in state
-TRX_STATE_COMMITTED_IN_MEMORY then the transaction is considered
-committed and the reference count is not incremented.
-@param id the transaction ID; 0 if not to increment the reference count
-@param trx Transaction that is being referenced
-@return trx
-@retval NULL if the transaction is no longer active */
-inline trx_t* trx_reference(trx_id_t id, trx_t* trx)
-{
- trx_mutex_enter(trx);
-
- if (trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)) {
- trx = NULL;
- } else if (!id) {
- } else if (trx->id != id) {
- trx = NULL;
- } else {
- ut_ad(trx->n_ref >= 0);
- ++trx->n_ref;
- }
-
- trx_mutex_exit(trx);
- return(trx);
-}
-
#include "trx0trx.ic"
#endif
diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic
index dd42c8b8368..6589aca4e77 100644
--- a/storage/innobase/include/trx0trx.ic
+++ b/storage/innobase/include/trx0trx.ic
@@ -24,13 +24,11 @@ The transaction
Created 3/26/1996 Heikki Tuuri
*******************************************************/
-#include "read0read.h"
-
/**********************************************************************//**
Determines if a transaction is in the given state.
-The caller must hold trx_sys->mutex, or it must be the thread
+The caller must hold trx_sys.mutex, or it must be the thread
that is serving a running transaction.
-A running RW transaction must be in trx_sys->rw_trx_list.
+A running RW transaction must be in trx_sys.rw_trx_hash.
@return TRUE if trx->state == state */
UNIV_INLINE
bool
@@ -69,8 +67,6 @@ trx_state_eq(
|| (relaxed
&& thd_get_error_number(trx->mysql_thd)));
- ut_ad(!trx->in_rw_trx_list);
-
return(true);
}
ut_error;
@@ -209,42 +205,3 @@ ok:
trx->ddl = true;
trx->dict_operation = op;
}
-
-/**
-Release the transaction. Decrease the reference count.
-@param trx Transaction that is being released */
-UNIV_INLINE
-void
-trx_release_reference(
- trx_t* trx)
-{
- trx_mutex_enter(trx);
-
- ut_ad(trx->n_ref > 0);
- --trx->n_ref;
-
- trx_mutex_exit(trx);
-}
-
-
-/**
-@param trx Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-ReadView*
-trx_get_read_view(
- trx_t* trx)
-{
- return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
-
-/**
-@param trx Get the active view for this transaction, if one exists
-@return the transaction's read view or NULL if one not assigned. */
-UNIV_INLINE
-const ReadView*
-trx_get_read_view(
- const trx_t* trx)
-{
- return(!MVCC::is_view_active(trx->read_view) ? NULL : trx->read_view);
-}
diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h
index b42871bef31..abc92a6edec 100644
--- a/storage/innobase/include/trx0types.h
+++ b/storage/innobase/include/trx0types.h
@@ -31,12 +31,9 @@ Created 3/26/1996 Heikki Tuuri
#include "ut0mutex.h"
#include "ut0new.h"
-#include <set>
#include <queue>
#include <vector>
-//#include <unordered_set>
-
/** printf(3) format used for printing DB_TRX_ID and other system fields */
#define TRX_ID_FMT IB_ID_FMT
@@ -95,8 +92,6 @@ enum trx_dict_op_t {
struct trx_t;
/** The locks and state of an active transaction */
struct trx_lock_t;
-/** Transaction system */
-struct trx_sys_t;
/** Signal */
struct trx_sig_t;
/** Rollback segment */
@@ -120,9 +115,6 @@ typedef ib_id_t roll_ptr_t;
/** Undo number */
typedef ib_id_t undo_no_t;
-/** Maximum transaction identifier */
-#define TRX_ID_MAX IB_ID_MAX
-
/** Transaction savepoint */
struct trx_savept_t{
undo_no_t least_undo_no; /*!< least undo number to undo */
@@ -130,8 +122,6 @@ struct trx_savept_t{
/** File objects */
/* @{ */
-/** Transaction system header */
-typedef byte trx_sysf_t;
/** Rollback segment header */
typedef byte trx_rsegf_t;
/** Undo segment header */
@@ -148,56 +138,8 @@ typedef byte trx_undo_rec_t;
typedef ib_mutex_t RsegMutex;
typedef ib_mutex_t TrxMutex;
-typedef ib_mutex_t UndoMutex;
typedef ib_mutex_t PQMutex;
typedef ib_mutex_t TrxSysMutex;
typedef std::vector<trx_id_t, ut_allocator<trx_id_t> > trx_ids_t;
-
-/** Mapping read-write transactions from id to transaction instance, for
-creating read views and during trx id lookup for MVCC and locking. */
-struct TrxTrack {
- explicit TrxTrack(trx_id_t id, trx_t* trx = NULL)
- :
- m_id(id),
- m_trx(trx)
- {
- // Do nothing
- }
-
- trx_id_t m_id;
- trx_t* m_trx;
-};
-
-struct TrxTrackHash {
- size_t operator()(const TrxTrack& key) const
- {
- return(size_t(key.m_id));
- }
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackHashCmp {
-
- bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
- {
- return(lhs.m_id == rhs.m_id);
- }
-};
-
-/**
-Comparator for TrxMap */
-struct TrxTrackCmp {
-
- bool operator() (const TrxTrack& lhs, const TrxTrack& rhs) const
- {
- return(lhs.m_id < rhs.m_id);
- }
-};
-
-//typedef std::unordered_set<TrxTrack, TrxTrackHash, TrxTrackHashCmp> TrxIdSet;
-typedef std::set<TrxTrack, TrxTrackCmp, ut_allocator<TrxTrack> >
- TrxIdSet;
-
#endif /* trx0types_h */
diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
index f738af4b454..16e2a384424 100644
--- a/storage/innobase/include/trx0undo.h
+++ b/storage/innobase/include/trx0undo.h
@@ -118,17 +118,6 @@ page_t*
trx_undo_page_get_s_latched(const page_id_t& page_id, mtr_t* mtr);
/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
Returns the next undo log record on the page in the specified log, or
NULL if none exists.
@return pointer to record, NULL if none */
@@ -139,28 +128,6 @@ trx_undo_page_get_next_rec(
trx_undo_rec_t* rec, /*!< in: undo log record */
ulint page_no,/*!< in: undo log header page number */
ulint offset);/*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset); /*!< in: undo log header offset on page */
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset);/*!< in: undo log header offset on page */
/***********************************************************************//**
Gets the previous record in an undo log.
@return undo log record, the page s-latched, NULL if none */
@@ -192,20 +159,18 @@ trx_undo_get_next_rec(
@return undo log record, the page latched, NULL if none */
trx_undo_rec_t*
trx_undo_get_first_rec(
- ulint space,
+ fil_space_t* space,
ulint page_no,
ulint offset,
ulint mode,
mtr_t* mtr);
/** Allocate an undo log page.
-@param[in,out] trx transaction
@param[in,out] undo undo log
@param[in,out] mtr mini-transaction that does not hold any page latch
@return X-latched block if success
@retval NULL on failure */
-buf_block_t*
-trx_undo_add_page(trx_t* trx, trx_undo_t* undo, mtr_t* mtr)
+buf_block_t* trx_undo_add_page(trx_undo_t* undo, mtr_t* mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/** Free the last undo log page. The caller must hold the rseg mutex.
@@ -238,32 +203,28 @@ trx_undo_truncate_start(
ulint hdr_page_no,
ulint hdr_offset,
undo_no_t limit);
-/********************************************************************//**
-Initializes the undo log lists for a rollback segment memory copy.
-This function is only called when the database is started or a new
-rollback segment created.
-@return the combined size of undo log segments in pages */
-ulint
-trx_undo_lists_init(
-/*================*/
- trx_rseg_t* rseg); /*!< in: rollback segment memory object */
+/** Assign an undo log for a persistent transaction.
+A new undo log is created or a cached undo log reused.
+@param[in,out] trx transaction
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return the undo log block
+@retval NULL on error */
+buf_block_t*
+trx_undo_assign(trx_t* trx, dberr_t* err, mtr_t* mtr)
+ MY_ATTRIBUTE((nonnull));
/** Assign an undo log for a transaction.
A new undo log is created or a cached undo log reused.
@param[in,out] trx transaction
@param[in] rseg rollback segment
@param[out] undo the undo log
-@param[in] type TRX_UNDO_INSERT or TRX_UNDO_UPDATE
-@retval DB_SUCCESS on success
-@retval DB_TOO_MANY_CONCURRENT_TRXS
-@retval DB_OUT_OF_FILE_SPACE
-@retval DB_READ_ONLY
-@retval DB_OUT_OF_MEMORY */
-dberr_t
-trx_undo_assign_undo(
- trx_t* trx,
- trx_rseg_t* rseg,
- trx_undo_t** undo,
- ulint type)
+@param[out] err error code
+@param[in,out] mtr mini-transaction
+@return the undo log block
+@retval NULL on error */
+buf_block_t*
+trx_undo_assign_low(trx_t* trx, trx_rseg_t* rseg, trx_undo_t** undo,
+ dberr_t* err, mtr_t* mtr)
MY_ATTRIBUTE((nonnull, warn_unused_result));
/******************************************************************//**
Sets the state of the undo log segment at a transaction finish.
@@ -276,7 +237,7 @@ trx_undo_set_state_at_finish(
/** Set the state of the undo log segment at a XA PREPARE or XA ROLLBACK.
@param[in,out] trx transaction
-@param[in,out] undo insert_undo or update_undo log
+@param[in,out] undo undo log
@param[in] rollback false=XA PREPARE, true=XA ROLLBACK
@param[in,out] mtr mini-transaction
@return undo log segment header page, x-latched */
@@ -287,20 +248,7 @@ trx_undo_set_state_at_prepare(
bool rollback,
mtr_t* mtr);
-/**********************************************************************//**
-Adds the update undo log header as the first in the history list, and
-frees the memory object, or puts it to the list of cached update undo log
-segments. */
-void
-trx_undo_update_cleanup(
-/*====================*/
- trx_t* trx, /*!< in: trx owning the update
- undo log */
- page_t* undo_page, /*!< in: update undo log header page,
- x-latched */
- mtr_t* mtr); /*!< in: mtr */
-
-/** Free an insert or temporary undo log after commit or rollback.
+/** Free an old insert or temporary undo log after commit or rollback.
The information is not needed after a commit or rollback, therefore
the data can be discarded.
@param[in,out] undo undo log
@@ -308,26 +256,31 @@ the data can be discarded.
void
trx_undo_commit_cleanup(trx_undo_t* undo, bool is_temp);
-/********************************************************************//**
-At shutdown, frees the undo logs of a PREPARED transaction. */
+/** At shutdown, frees the undo logs of a transaction. */
void
-trx_undo_free_prepared(
-/*===================*/
- trx_t* trx) /*!< in/out: PREPARED transaction */
- ATTRIBUTE_COLD __attribute__((nonnull));
-
-/***********************************************************//**
-Parses the redo log entry of an undo log page initialization.
+trx_undo_free_at_shutdown(trx_t *trx);
+
+/** Parse MLOG_UNDO_INIT.
+@param[in] ptr log record
+@param[in] end_ptr end of log record buffer
+@param[in,out] page page or NULL
+@param[in,out] mtr mini-transaction
+@return end of log record
+@retval NULL if the log record is incomplete */
+byte*
+trx_undo_parse_page_init(const byte* ptr, const byte* end_ptr, page_t* page);
+/** Parse MLOG_UNDO_HDR_REUSE for crash-upgrade from MariaDB 10.2.
+@param[in] ptr redo log record
+@param[in] end_ptr end of log buffer
+@param[in,out] page undo page or NULL
@return end of log record or NULL */
byte*
-trx_undo_parse_page_init(
-/*=====================*/
- const byte* ptr, /*!< in: buffer */
- const byte* end_ptr,/*!< in: buffer end */
- page_t* page, /*!< in: page or NULL */
- mtr_t* mtr); /*!< in: mtr or NULL */
-/** Parse the redo log entry of an undo log page header create or reuse.
-@param[in] type MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE
+trx_undo_parse_page_header_reuse(
+ const byte* ptr,
+ const byte* end_ptr,
+ page_t* page);
+
+/** Parse the redo log entry of an undo log page header create.
@param[in] ptr redo log record
@param[in] end_ptr end of log buffer
@param[in,out] page page frame or NULL
@@ -335,17 +288,19 @@ trx_undo_parse_page_init(
@return end of log record or NULL */
byte*
trx_undo_parse_page_header(
- mlog_id_t type,
const byte* ptr,
const byte* end_ptr,
page_t* page,
mtr_t* mtr);
-/************************************************************************
-Frees an undo log memory copy. */
-void
-trx_undo_mem_free(
-/*==============*/
- trx_undo_t* undo); /* in: the undo object to be freed */
+/** Read an undo log when starting up the database.
+@param[in,out] rseg rollback segment
+@param[in] id rollback segment slot
+@param[in] page_no undo log segment page number
+@param[in,out] max_trx_id the largest observed transaction ID
+@return size of the undo log in pages */
+ulint
+trx_undo_mem_create_at_db_start(trx_rseg_t* rseg, ulint id, ulint page_no,
+ trx_id_t& max_trx_id);
#endif /* !UNIV_INNOCHECKSUM */
@@ -368,25 +323,15 @@ trx_undo_mem_free(
#ifndef UNIV_INNOCHECKSUM
-/** Transaction undo log memory object; this is protected by the undo_mutex
-in the corresponding transaction object */
+/** Transaction undo log memory object; modified by the thread associated
+with the transaction. */
struct trx_undo_t {
/*-----------------------------*/
ulint id; /*!< undo log slot number within the
rollback segment */
- ulint type; /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
ulint state; /*!< state of the corresponding undo log
segment */
- ibool del_marks; /*!< relevant only in an update undo
- log: this is TRUE if the transaction may
- have delete marked records, because of
- a delete of a row or an update of an
- indexed field; purge is then
- necessary; also TRUE if the transaction
- has updated an externally stored
- field */
trx_id_t trx_id; /*!< id of the trx assigned to the undo
log */
XID xid; /*!< X/Open XA transaction
@@ -396,8 +341,6 @@ struct trx_undo_t {
id */
trx_rseg_t* rseg; /*!< rseg where the undo log belongs */
/*-----------------------------*/
- ulint space; /*!< space id where the undo log
- placed */
ulint hdr_page_no; /*!< page number of the header page in
the undo log */
ulint hdr_offset; /*!< header offset of the undo log on
@@ -407,8 +350,6 @@ struct trx_undo_t {
top_page_no during a rollback */
ulint size; /*!< current size in pages */
/*-----------------------------*/
- ulint empty; /*!< TRUE if the stack of undo log
- records is currently empty */
ulint top_page_no; /*!< page number where the latest undo
log record was catenated; during
rollback the page from which the latest
@@ -416,11 +357,16 @@ struct trx_undo_t {
ulint top_offset; /*!< offset of the latest undo record,
i.e., the topmost element in the undo
log if we think of it as a stack */
- undo_no_t top_undo_no; /*!< undo number of the latest record */
+ undo_no_t top_undo_no; /*!< undo number of the latest record
+ (IB_ID_MAX if the undo log is empty) */
buf_block_t* guess_block; /*!< guess for the buffer block where
the top page might reside */
ulint withdraw_clock; /*!< the withdraw clock value of the
buffer pool when guess_block was stored */
+
+ /** @return whether the undo log is empty */
+ bool empty() const { return top_undo_no == IB_ID_MAX; }
+
/*-----------------------------*/
UT_LIST_NODE_T(trx_undo_t) undo_list;
/*!< undo log objects in the rollback
@@ -433,8 +379,8 @@ struct trx_undo_t {
/*-------------------------------------------------------------*/
/** Transaction undo log page header offsets */
/* @{ */
-#define TRX_UNDO_PAGE_TYPE 0 /*!< TRX_UNDO_INSERT or
- TRX_UNDO_UPDATE */
+#define TRX_UNDO_PAGE_TYPE 0 /*!< unused; 0 (before MariaDB 10.3.1:
+ TRX_UNDO_INSERT or TRX_UNDO_UPDATE) */
#define TRX_UNDO_PAGE_START 2 /*!< Byte offset where the undo log
records for the LATEST transaction
start on this page (remember that
@@ -455,7 +401,7 @@ struct trx_undo_t {
at most this many bytes used; we must leave space at least for one new undo
log header on the page */
-#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
+#define TRX_UNDO_PAGE_REUSE_LIMIT (3 << (srv_page_size_shift - 2))
/* An update undo log segment may contain several undo logs on its first page
if the undo logs took so little space that the segment could be cached and
@@ -495,14 +441,23 @@ log segment */
page of an update undo log segment. */
/* @{ */
/*-------------------------------------------------------------*/
-#define TRX_UNDO_TRX_ID 0 /*!< Transaction id */
-#define TRX_UNDO_TRX_NO 8 /*!< Transaction number of the
- transaction; defined only if the log
- is in a history list */
-#define TRX_UNDO_DEL_MARKS 16 /*!< Defined only in an update undo
- log: TRUE if the transaction may have
- done delete markings of records, and
- thus purge is necessary */
+/** Transaction start identifier, or 0 if the undo log segment has been
+completely purged and trx_purge_free_segment() has started freeing it */
+#define TRX_UNDO_TRX_ID 0
+/** Transaction end identifier (if the log is in a history list),
+or 0 if the transaction has not been committed */
+#define TRX_UNDO_TRX_NO 8
+/** Before MariaDB 10.3.1, when purge did not reset DB_TRX_ID of
+surviving user records, this used to be called TRX_UNDO_DEL_MARKS.
+
+The value 1 indicates that purge needs to process the undo log segment.
+The value 0 indicates that all of it has been processed, and
+trx_purge_free_segment() has been invoked, so the log is not safe to access.
+
+Before MariaDB 10.3.1, a log segment may carry the value 0 even before
+trx_purge_free_segment() was called, for those undo log records for
+which purge would not result in removing delete-marked records. */
+#define TRX_UNDO_NEEDS_PURGE 16
#define TRX_UNDO_LOG_START 18 /*!< Offset of the first undo log record
of this log on the header page; purge
may remove undo log record from the
@@ -532,7 +487,7 @@ page of an update undo log segment. */
#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
/* Note: the writing of the undo log old header is coded by a log record
-MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+MLOG_UNDO_HDR_CREATE. The appending of an XID to the
header is logged separately. In this sense, the XID is not really a member
of the undo log header. TODO: do not append the XID to the log header if XA
is not needed by the user. The XID wastes about 150 bytes of space in every
diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic
index 0285c212bdd..630638f6b7b 100644
--- a/storage/innobase/include/trx0undo.ic
+++ b/storage/innobase/include/trx0undo.ic
@@ -1,7 +1,7 @@
/*****************************************************************************
Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved.
-Copyright (c) 2017, MariaDB Corporation.
+Copyright (c) 2017, 2018, MariaDB Corporation.
This program is free software; you can redistribute it and/or modify it under
the terms of the GNU General Public License as published by the Free Software
@@ -40,9 +40,7 @@ trx_undo_build_roll_ptr(
ulint offset) /*!< in: offset of the undo entry within page */
{
roll_ptr_t roll_ptr;
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+ compile_time_assert(DATA_ROLL_PTR_LEN == 7);
ut_ad(is_insert == 0 || is_insert == 1);
ut_ad(rseg_id < TRX_SYS_N_RSEGS);
ut_ad(offset < 65536);
@@ -67,12 +65,7 @@ trx_undo_decode_roll_ptr(
ulint* offset) /*!< out: offset of the undo
entry within page */
{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
+ compile_time_assert(DATA_ROLL_PTR_LEN == 7);
ut_ad(roll_ptr < (1ULL << 56));
*offset = (ulint) roll_ptr & 0xFFFF;
roll_ptr >>= 16;
@@ -92,14 +85,9 @@ trx_undo_roll_ptr_is_insert(
/*========================*/
roll_ptr_t roll_ptr) /*!< in: roll pointer */
{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
- ut_ad(roll_ptr < (1ULL << 56));
- return((ibool) (roll_ptr >> 55));
+ compile_time_assert(DATA_ROLL_PTR_LEN == 7);
+ ut_ad(roll_ptr < (1ULL << (ROLL_PTR_INSERT_FLAG_POS + 1)));
+ return((ibool) (roll_ptr >> ROLL_PTR_INSERT_FLAG_POS));
}
/***********************************************************************//**
@@ -111,10 +99,8 @@ trx_undo_trx_id_is_insert(
/*======================*/
const byte* trx_id) /*!< in: DB_TRX_ID, followed by DB_ROLL_PTR */
{
-#if DATA_TRX_ID + 1 != DATA_ROLL_PTR
-# error
-#endif
- return(static_cast<bool>(trx_id[DATA_TRX_ID_LEN] >> 7));
+ compile_time_assert(DATA_TRX_ID + 1 == DATA_ROLL_PTR);
+ return bool(trx_id[DATA_TRX_ID_LEN] >> 7);
}
/*****************************************************************//**
@@ -129,9 +115,7 @@ trx_write_roll_ptr(
written */
roll_ptr_t roll_ptr) /*!< in: roll ptr */
{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+ compile_time_assert(DATA_ROLL_PTR_LEN == 7);
mach_write_to_7(ptr, roll_ptr);
}
@@ -146,9 +130,7 @@ trx_read_roll_ptr(
/*==============*/
const byte* ptr) /*!< in: pointer to memory from where to read */
{
-#if DATA_ROLL_PTR_LEN != 7
-# error "DATA_ROLL_PTR_LEN != 7"
-#endif
+ compile_time_assert(DATA_ROLL_PTR_LEN == 7);
return(mach_read_from_7(ptr));
}
@@ -184,89 +166,24 @@ trx_undo_page_get_s_latched(const page_id_t& page_id, mtr_t* mtr)
return(buf_block_get_frame(block));
}
-/******************************************************************//**
-Returns the start offset of the undo log records of the specified undo
-log on the page.
-@return start offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_start(
-/*====================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
-
- if (page_no == page_get_page_no(undo_page)) {
-
- start = mach_read_from_2(offset + undo_page
- + TRX_UNDO_LOG_START);
- } else {
- start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE;
- }
-
- return(start);
-}
-
-/******************************************************************//**
-Returns the end offset of the undo log records of the specified undo
-log on the page.
+/** Determine the end offset of undo log records of an undo log page.
+@param[in] undo_page undo log page
+@param[in] page_no undo log header page number
+@param[in] offset undo log header offset
@return end offset */
-UNIV_INLINE
-ulint
-trx_undo_page_get_end(
-/*==================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
+inline
+uint16_t
+trx_undo_page_get_end(const page_t* undo_page, ulint page_no, ulint offset)
{
- trx_ulogf_t* log_hdr;
- ulint end;
-
if (page_no == page_get_page_no(undo_page)) {
-
- log_hdr = undo_page + offset;
-
- end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG);
-
- if (end == 0) {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
+ if (uint16_t end = mach_read_from_2(TRX_UNDO_NEXT_LOG
+ + offset + undo_page)) {
+ return end;
}
- } else {
- end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR
- + TRX_UNDO_PAGE_FREE);
- }
-
- return(end);
-}
-
-/******************************************************************//**
-Returns the previous undo record on the page in the specified log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_prev_rec(
-/*=======================*/
- trx_undo_rec_t* rec, /*!< in: undo log record */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- page_t* undo_page;
- ulint start;
-
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
-
- if (start + undo_page == rec) {
-
- return(NULL);
}
- return(undo_page + mach_read_from_2(rec - 2));
+ return mach_read_from_2(TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE
+ + undo_page);
}
/******************************************************************//**
@@ -285,7 +202,7 @@ trx_undo_page_get_next_rec(
ulint end;
ulint next;
- undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE);
+ undo_page = (page_t*) ut_align_down(rec, srv_page_size);
end = trx_undo_page_get_end(undo_page, page_no, offset);
@@ -298,55 +215,3 @@ trx_undo_page_get_next_rec(
return(undo_page + next);
}
-
-/******************************************************************//**
-Returns the last undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_last_rec(
-/*=======================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + mach_read_from_2(undo_page + end - 2));
-}
-
-/******************************************************************//**
-Returns the first undo record on the page in the specified undo log, or
-NULL if none exists.
-@return pointer to record, NULL if none */
-UNIV_INLINE
-trx_undo_rec_t*
-trx_undo_page_get_first_rec(
-/*========================*/
- page_t* undo_page,/*!< in: undo log page */
- ulint page_no,/*!< in: undo log header page number */
- ulint offset) /*!< in: undo log header offset on page */
-{
- ulint start;
- ulint end;
-
- start = trx_undo_page_get_start(undo_page, page_no, offset);
- end = trx_undo_page_get_end(undo_page, page_no, offset);
-
- if (start == end) {
-
- return(NULL);
- }
-
- return(undo_page + start);
-}
diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
index 10eb83289da..bf1d245a65e 100644
--- a/storage/innobase/include/univ.i
+++ b/storage/innobase/include/univ.i
@@ -50,16 +50,14 @@ calculated in make_version_string() in sql/sql_show.cc like this:
because the version is shown with only one dot, we skip the last
component, i.e. we show M.N.P as M.N */
#define INNODB_VERSION_SHORT \
- (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR)
+ (MYSQL_VERSION_MAJOR << 8 | MYSQL_VERSION_MINOR)
#define INNODB_VERSION_STR \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "." \
- IB_TO_STR(INNODB_VERSION_BUGFIX)
+ IB_TO_STR(MYSQL_VERSION_MAJOR) "." \
+ IB_TO_STR(MYSQL_VERSION_MINOR) "." \
+ IB_TO_STR(MYSQL_VERSION_PATCH)
-#define REFMAN "http://dev.mysql.com/doc/refman/" \
- IB_TO_STR(INNODB_VERSION_MAJOR) "." \
- IB_TO_STR(INNODB_VERSION_MINOR) "/en/"
+#define REFMAN "http://dev.mysql.com/doc/refman/5.7/en/"
/** How far ahead should we tell the service manager the timeout
(time in seconds) */
@@ -172,9 +170,8 @@ for all cases. This is used by ut0lst.h related code. */
/* When this macro is defined then additional test functions will be
compiled. These functions live at the end of each relevant source file
and have "test_" prefix. These functions can be called from the end of
-innobase_init() or they can be called from gdb after
-innobase_start_or_create_for_mysql() has executed using the call
-command. */
+innodb_init() or they can be called from gdb after srv_start() has executed
+using the call command. */
/*
#define UNIV_COMPILE_TEST_FUNCS
#define UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
@@ -275,33 +272,6 @@ management to ensure correct alignment for doubles etc. */
========================
*/
-/** There are currently two InnoDB file formats which are used to group
-features with similar restrictions and dependencies. Using an enum allows
-switch statements to give a compiler warning when a new one is introduced. */
-enum innodb_file_formats_enum {
- /** Antelope File Format: InnoDB/MySQL up to 5.1.
- This format includes REDUNDANT and COMPACT row formats */
- UNIV_FORMAT_A = 0,
-
- /** Barracuda File Format: Introduced in InnoDB plugin for 5.1:
- This format includes COMPRESSED and DYNAMIC row formats. It
- includes the ability to create secondary indexes from data that
- is not on the clustered index page and the ability to store more
- data off the clustered index page. */
- UNIV_FORMAT_B = 1
-};
-
-typedef enum innodb_file_formats_enum innodb_file_formats_t;
-
-/** Minimum supported file format */
-#define UNIV_FORMAT_MIN UNIV_FORMAT_A
-
-/** Maximum supported file format */
-#define UNIV_FORMAT_MAX UNIV_FORMAT_B
-
-/** The 2-logarithm of UNIV_PAGE_SIZE: */
-#define UNIV_PAGE_SIZE_SHIFT srv_page_size_shift
-
#ifdef HAVE_LZO
#define IF_LZO(A,B) A
#else
@@ -338,32 +308,29 @@ typedef enum innodb_file_formats_enum innodb_file_formats_t;
#define IF_PUNCH_HOLE(A,B) B
#endif
-/** The universal page size of the database */
-#define UNIV_PAGE_SIZE ((ulint) srv_page_size)
-
/** log2 of smallest compressed page size (1<<10 == 1024 bytes)
Note: This must never change! */
-#define UNIV_ZIP_SIZE_SHIFT_MIN 10
+#define UNIV_ZIP_SIZE_SHIFT_MIN 10U
/** log2 of largest compressed page size (1<<14 == 16384 bytes).
A compressed page directory entry reserves 14 bits for the start offset
and 2 bits for flags. This limits the uncompressed page size to 16k.
*/
-#define UNIV_ZIP_SIZE_SHIFT_MAX 14
+#define UNIV_ZIP_SIZE_SHIFT_MAX 14U
/* Define the Min, Max, Default page sizes. */
/** Minimum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MIN 12
+#define UNIV_PAGE_SIZE_SHIFT_MIN 12U
/** log2 of largest page size (1<<16 == 64436 bytes). */
/** Maximum Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_MAX 16
+#define UNIV_PAGE_SIZE_SHIFT_MAX 16U
/** log2 of default page size (1<<14 == 16384 bytes). */
/** Default Page Size Shift (power of 2) */
-#define UNIV_PAGE_SIZE_SHIFT_DEF 14
+#define UNIV_PAGE_SIZE_SHIFT_DEF 14U
/** Original 16k InnoDB Page Size Shift, in case the default changes */
-#define UNIV_PAGE_SIZE_SHIFT_ORIG 14
+#define UNIV_PAGE_SIZE_SHIFT_ORIG 14U
/** Original 16k InnoDB Page Size as an ssize (log2 - 9) */
-#define UNIV_PAGE_SSIZE_ORIG (UNIV_PAGE_SIZE_SHIFT_ORIG - 9)
+#define UNIV_PAGE_SSIZE_ORIG (UNIV_PAGE_SIZE_SHIFT_ORIG - 9U)
/** Minimum page size InnoDB currently supports. */
#define UNIV_PAGE_SIZE_MIN (1U << UNIV_PAGE_SIZE_SHIFT_MIN)
@@ -383,13 +350,13 @@ and 2 bits for flags. This limits the uncompressed page size to 16k.
/** Largest possible ssize for an uncompressed page.
(The convention 'ssize' is used for 'log2 minus 9' or the number of
shifts starting with 512.)
-This max number varies depending on UNIV_PAGE_SIZE. */
+This max number varies depending on srv_page_size. */
#define UNIV_PAGE_SSIZE_MAX \
- static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+ ulint(srv_page_size_shift - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
/** Smallest possible ssize for an uncompressed page. */
#define UNIV_PAGE_SSIZE_MIN \
- static_cast<ulint>(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1)
+ ulint(UNIV_PAGE_SIZE_SHIFT_MIN - UNIV_ZIP_SIZE_SHIFT_MIN + 1U)
/** Maximum number of parallel threads in a parallelized operation */
#define UNIV_MAX_PARALLELISM 32
@@ -494,7 +461,7 @@ typedef ib_uint64_t lsn_t;
#define UINT64_UNDEFINED ((ib_uint64_t)(-1))
/** The bitmask of 32-bit unsigned integer */
-#define ULINT32_MASK 0xFFFFFFFF
+#define ULINT32_MASK 0xFFFFFFFFU
/** The undefined 32-bit unsigned integer */
#define ULINT32_UNDEFINED ULINT32_MASK
diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic
index 9c0cd6ee3c3..1ef90eca416 100644
--- a/storage/innobase/include/ut0byte.ic
+++ b/storage/innobase/include/ut0byte.ic
@@ -144,9 +144,6 @@ ut_bit_get_nth(
ulint n) /*!< in: nth bit requested */
{
ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
return(1 & (a >> n));
}
@@ -162,9 +159,6 @@ ut_bit_set_nth(
ibool val) /*!< in: value for the bit to set */
{
ut_ad(n < 8 * sizeof(ulint));
-#if TRUE != 1
-# error "TRUE != 1"
-#endif
if (val) {
return(((ulint) 1 << n) | a);
} else {
diff --git a/storage/innobase/include/ut0crc32.h b/storage/innobase/include/ut0crc32.h
index 36b389b5bd2..32ad066f85a 100644
--- a/storage/innobase/include/ut0crc32.h
+++ b/storage/innobase/include/ut0crc32.h
@@ -47,14 +47,11 @@ typedef uint32_t (*ut_crc32_func_t)(const byte* ptr, ulint len);
/** Pointer to CRC32 calculation function. */
extern ut_crc32_func_t ut_crc32;
-/** Pointer to CRC32 calculation function, which uses big-endian byte order
+/** CRC32 calculation function, which uses big-endian byte order
when converting byte strings to integers internally. */
-extern ut_crc32_func_t ut_crc32_legacy_big_endian;
-
-/** Pointer to CRC32-byte-by-byte calculation function (byte order agnostic,
-but very slow). */
-extern ut_crc32_func_t ut_crc32_byte_by_byte;
+extern uint32_t ut_crc32_legacy_big_endian(const byte* buf, ulint len);
+/** Text description of CRC32 implementation */
extern const char* ut_crc32_implementation;
#endif /* ut0crc32_h */
diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h
index fd9a064ba35..6672be62617 100644
--- a/storage/innobase/include/ut0dbg.h
+++ b/storage/innobase/include/ut0dbg.h
@@ -61,8 +61,8 @@ ut_dbg_assertion_failed(
ut_dbg_assertion_failed(0, __FILE__, __LINE__)
/** Debug assertion */
-#define ut_ad DBUG_ASSERT
-#ifdef UNIV_DEBUG
+#define ut_ad DBUG_SLOW_ASSERT
+#if defined(UNIV_DEBUG) || !defined(DBUG_OFF)
/** Debug statement. Does nothing unless UNIV_DEBUG is defined. */
#define ut_d(EXPR) EXPR
#else
diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h
index 09733da20a0..f62d3744b96 100644
--- a/storage/innobase/include/ut0lst.h
+++ b/storage/innobase/include/ut0lst.h
@@ -426,7 +426,7 @@ Gets the last node in a two-way list.
@return last node, or NULL if the list is empty */
#define UT_LIST_GET_LAST(BASE) (BASE).end
-struct NullValidate { void operator()(const void* elem) { } };
+struct NullValidate { void operator()(const void*) { } };
/********************************************************************//**
Iterate over all the elements and call the functor for each element.
diff --git a/storage/innobase/include/ut0new.h b/storage/innobase/include/ut0new.h
index 955e7b026c7..5dcb25271c5 100644
--- a/storage/innobase/include/ut0new.h
+++ b/storage/innobase/include/ut0new.h
@@ -129,6 +129,10 @@ InnoDB:
#include <string.h> /* strlen(), strrchr(), strncmp() */
#include "my_global.h" /* needed for headers from mysql/psi/ */
+#if !defined(DBUG_OFF) && defined(HAVE_MADVISE)
+#include <sys/mman.h>
+#endif
+
/* JAN: TODO: missing 5.7 header */
#ifdef HAVE_MYSQL_MEMORY_H
#include "mysql/psi/mysql_memory.h" /* PSI_MEMORY_CALL() */
@@ -172,7 +176,6 @@ extern PSI_memory_key mem_key_other;
extern PSI_memory_key mem_key_row_log_buf;
extern PSI_memory_key mem_key_row_merge_sort;
extern PSI_memory_key mem_key_std;
-extern PSI_memory_key mem_key_trx_sys_t_rw_trx_ids;
extern PSI_memory_key mem_key_partitioning;
/** Setup the internal objects needed for UT_NEW() to operate.
@@ -235,6 +238,51 @@ struct ut_new_pfx_t {
#endif
};
+static inline void ut_allocate_trace_dontdump(void *ptr, size_t bytes,
+ bool
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+ dontdump
+#endif
+ , ut_new_pfx_t* pfx,
+ const char*
+#ifdef UNIV_PFS_MEMORY
+ file
+#endif
+
+ )
+{
+ ut_a(ptr != NULL);
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DONTDUMP)
+ if (dontdump && madvise(ptr, bytes, MADV_DONTDUMP)) {
+ ib::warn() << "Failed to set memory to DONTDUMP: "
+ << strerror(errno)
+ << " ptr " << ptr
+ << " size " << bytes;
+ }
+#endif
+ if (pfx != NULL) {
+#ifdef UNIV_PFS_MEMORY
+ allocate_trace(bytes, file, pfx);
+#endif /* UNIV_PFS_MEMORY */
+ pfx->m_size = bytes;
+ }
+}
+
+#if defined(DBUG_OFF) && defined(HAVE_MADVISE) && defined(MADV_DODUMP)
+static inline void ut_dodump(void* ptr, size_t m_size)
+{
+ if (ptr && madvise(ptr, m_size, MADV_DODUMP)) {
+ ib::warn() << "Failed to set memory to DODUMP: "
+ << strerror(errno)
+ << " ptr " << ptr
+ << " size " << m_size;
+ }
+}
+#else
+static inline void ut_dodump(void*, size_t) {}
+#endif
+
/** Allocator class for allocating memory from inside std::* containers.
@tparam T type of allocated object
@tparam oom_fatal whether to commit suicide when running out of memory */
@@ -249,19 +297,25 @@ public:
typedef size_t size_type;
typedef ptrdiff_t difference_type;
+#ifdef UNIV_PFS_MEMORY
/** Default constructor. */
explicit
ut_allocator(PSI_memory_key key = PSI_NOT_INSTRUMENTED)
-#ifdef UNIV_PFS_MEMORY
: m_key(key)
-#endif /* UNIV_PFS_MEMORY */
{
}
+#else
+ ut_allocator() {}
+ ut_allocator(PSI_memory_key) {}
+#endif /* UNIV_PFS_MEMORY */
/** Constructor from allocator of another type. */
template <class U>
- ut_allocator(
- const ut_allocator<U>& other)
+ ut_allocator(const ut_allocator<U>&
+#ifdef UNIV_PFS_MEMORY
+ other
+#endif
+ )
#ifdef UNIV_PFS_MEMORY
: m_key(other.m_key)
#endif /* UNIV_PFS_MEMORY */
@@ -282,6 +336,8 @@ public:
#endif /* UNIV_PFS_MEMORY */
}
+ pointer allocate(size_type n) { return allocate(n, NULL, NULL); }
+
/** Allocate a chunk of memory that can hold 'n_elements' objects of
type 'T' and trace the allocation.
If the allocation fails this method may throw an exception. This
@@ -290,17 +346,19 @@ public:
After successfull allocation the returned pointer must be passed
to ut_allocator::deallocate() when no longer needed.
@param[in] n_elements number of elements
- @param[in] hint pointer to a nearby memory location,
- unused by this implementation
- @param[in] file file name of the caller
@param[in] set_to_zero if true, then the returned memory is
initialized with 0x0 bytes.
+ @param[in] throw_on_error if true, raize exception if too big
@return pointer to the allocated memory */
pointer
allocate(
size_type n_elements,
- const_pointer hint = NULL,
- const char* file = NULL,
+ const_pointer,
+ const char*
+#ifdef UNIV_PFS_MEMORY
+ file /*!< file name of the caller */
+#endif
+ ,
bool set_to_zero = false,
bool throw_on_error = true)
{
@@ -567,6 +625,8 @@ public:
/** Allocate a large chunk of memory that can hold 'n_elements'
objects of type 'T' and trace the allocation.
@param[in] n_elements number of elements
+ @param[in] dontdump if true, advise the OS is not to core
+ dump this memory.
@param[out] pfx storage for the description of the
allocated memory. The caller must provide space for this one and keep
it until the memory is no longer needed and then pass it to
@@ -575,7 +635,8 @@ public:
pointer
allocate_large(
size_type n_elements,
- ut_new_pfx_t* pfx)
+ ut_new_pfx_t* pfx,
+ bool dontdump = false)
{
if (n_elements == 0 || n_elements > max_size()) {
return(NULL);
@@ -586,13 +647,11 @@ public:
pointer ptr = reinterpret_cast<pointer>(
os_mem_alloc_large(&n_bytes));
-#ifdef UNIV_PFS_MEMORY
- if (ptr != NULL) {
- allocate_trace(n_bytes, NULL, pfx);
+ if (ptr == NULL) {
+ return NULL;
}
-#else
- pfx->m_size = n_bytes;
-#endif /* UNIV_PFS_MEMORY */
+
+ ut_allocate_trace_dontdump(ptr, n_bytes, dontdump, pfx, NULL);
return(ptr);
}
@@ -601,17 +660,30 @@ public:
deallocation.
@param[in,out] ptr pointer to memory to free
@param[in] pfx descriptor of the memory, as returned by
- allocate_large(). */
+ allocate_large().
+ @param[in] dodump if true, advise the OS to include this
+ memory again if a core dump occurs. */
void
deallocate_large(
pointer ptr,
- const ut_new_pfx_t* pfx)
+ const ut_new_pfx_t*
+#ifdef UNIV_PFS_MEMORY
+ pfx
+#endif
+ ,
+ size_t size,
+ bool dodump = false)
{
+ if (dodump) {
+ ut_dodump(ptr, size);
+ }
#ifdef UNIV_PFS_MEMORY
- deallocate_trace(pfx);
+ if (pfx) {
+ deallocate_trace(pfx);
+ }
#endif /* UNIV_PFS_MEMORY */
- os_mem_free_large(ptr, pfx->m_size);
+ os_mem_free_large(ptr, size);
}
#ifdef UNIV_PFS_MEMORY
@@ -725,12 +797,7 @@ could be freed by A2 even if the pfs mem key is different. */
template <typename T>
inline
bool
-operator==(
- const ut_allocator<T>& lhs,
- const ut_allocator<T>& rhs)
-{
- return(true);
-}
+operator==(const ut_allocator<T>&, const ut_allocator<T>&) { return(true); }
/** Compare two allocators of the same type. */
template <typename T>
@@ -843,6 +910,10 @@ ut_delete_array(
ut_allocator<byte>(key).allocate( \
n_bytes, NULL, __FILE__, false, false))
+#define ut_malloc_dontdump(n_bytes) static_cast<void*>( \
+ ut_allocator<byte>(PSI_NOT_INSTRUMENTED).allocate_large( \
+ n_bytes, true))
+
#define ut_zalloc(n_bytes, key) static_cast<void*>( \
ut_allocator<byte>(key).allocate( \
n_bytes, NULL, __FILE__, true, false))
@@ -866,6 +937,10 @@ ut_delete_array(
#define ut_free(ptr) ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate( \
reinterpret_cast<byte*>(ptr))
+#define ut_free_dodump(ptr, size) static_cast<void*>( \
+ ut_allocator<byte>(PSI_NOT_INSTRUMENTED).deallocate_large( \
+ ptr, NULL, size, true))
+
#else /* UNIV_PFS_MEMORY */
/* Fallbacks when memory tracing is disabled at compile time. */
@@ -888,6 +963,14 @@ ut_delete_array(
#define ut_malloc_nokey(n_bytes) ::malloc(n_bytes)
+static inline void *ut_malloc_dontdump(size_t n_bytes)
+{
+ void *ptr = os_mem_alloc_large(&n_bytes);
+
+ ut_allocate_trace_dontdump(ptr, n_bytes, true, NULL, NULL);
+ return ptr;
+}
+
#define ut_zalloc_nokey(n_bytes) ::calloc(1, n_bytes)
#define ut_zalloc_nokey_nofatal(n_bytes) ::calloc(1, n_bytes)
@@ -896,6 +979,12 @@ ut_delete_array(
#define ut_free(ptr) ::free(ptr)
+static inline void ut_free_dodump(void *ptr, size_t size)
+{
+ ut_dodump(ptr, size);
+ os_mem_free_large(ptr, size);
+}
+
#endif /* UNIV_PFS_MEMORY */
#endif /* ut0new_h */
diff --git a/storage/innobase/include/ut0pool.h b/storage/innobase/include/ut0pool.h
index c0237158ce5..d3ea733a440 100644
--- a/storage/innobase/include/ut0pool.h
+++ b/storage/innobase/include/ut0pool.h
@@ -115,7 +115,7 @@ struct Pool {
} else if (m_last < m_end) {
/* Initialise the remaining elements. */
- init(m_end - m_last);
+ init(size_t(m_end - m_last));
ut_ad(!m_pqueue.empty());
diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h
index 49ae3c81356..5baf8684d23 100644
--- a/storage/innobase/include/ut0rnd.h
+++ b/storage/innobase/include/ut0rnd.h
@@ -61,16 +61,6 @@ UNIV_INLINE
ulint
ut_rnd_gen_ulint(void);
/*==================*/
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high); /*!< in: high limit; can generate also this value */
-
/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime or some
diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic
index 16dccb545d8..1e4915dd0f9 100644
--- a/storage/innobase/include/ut0rnd.ic
+++ b/storage/innobase/include/ut0rnd.ic
@@ -97,30 +97,6 @@ ut_rnd_gen_ulint(void)
return(rnd);
}
-/********************************************************//**
-Generates a random integer from a given interval.
-@return the 'random' number */
-UNIV_INLINE
-ulint
-ut_rnd_interval(
-/*============*/
- ulint low, /*!< in: low limit; can generate also this value */
- ulint high) /*!< in: high limit; can generate also this value */
-{
- ulint rnd;
-
- ut_ad(high >= low);
-
- if (low == high) {
-
- return(low);
- }
-
- rnd = ut_rnd_gen_ulint();
-
- return(low + (rnd % (high - low)));
-}
-
/*******************************************************//**
The following function generates a hash value for a ulint integer
to a hash table of size table_size, which should be a prime
diff --git a/storage/innobase/include/ut0stage.h b/storage/innobase/include/ut0stage.h
index 1d5457a3ab0..4b96fad3c21 100644
--- a/storage/innobase/include/ut0stage.h
+++ b/storage/innobase/include/ut0stage.h
@@ -529,65 +529,28 @@ ut_stage_alter_t::change_phase(
class ut_stage_alter_t {
public:
- explicit
- ut_stage_alter_t(
- const dict_index_t* pk)
- {
- }
+ explicit ut_stage_alter_t(const dict_index_t*) {}
- void
- begin_phase_read_pk(
- ulint n_sort_indexes)
- {
- }
+ void begin_phase_read_pk(ulint) {}
- void
- n_pk_recs_inc()
- {
- }
+ void n_pk_recs_inc() {}
- void
- inc(
- ulint inc_val = 1)
- {
- }
+ void inc() {}
+ void inc(ulint) {}
- void
- end_phase_read_pk()
- {
- }
+ void end_phase_read_pk() {}
- void
- begin_phase_sort(
- double sort_multi_factor)
- {
- }
+ void begin_phase_sort(double) {}
- void
- begin_phase_insert()
- {
- }
+ void begin_phase_insert() {}
- void
- begin_phase_flush(
- ulint n_flush_pages)
- {
- }
+ void begin_phase_flush(ulint) {}
- void
- begin_phase_log_index()
- {
- }
+ void begin_phase_log_index() {}
- void
- begin_phase_log_table()
- {
- }
+ void begin_phase_log_table() {}
- void
- begin_phase_end()
- {
- }
+ void begin_phase_end() {}
};
#endif /* HAVE_PSI_STAGE_INTERFACE */
diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
index 4e9c2599933..1614d3ead6d 100644
--- a/storage/innobase/include/ut0ut.h
+++ b/storage/innobase/include/ut0ut.h
@@ -45,6 +45,7 @@ Created 1/20/1994 Heikki Tuuri
#include <stdarg.h>
#include <string>
+#include <my_atomic.h>
/** Index name prefix in fast index creation, as a string constant */
#define TEMP_INDEX_PREFIX_STR "\377"
@@ -52,35 +53,6 @@ Created 1/20/1994 Heikki Tuuri
/** Time stamp */
typedef time_t ib_time_t;
-#ifdef HAVE_PAUSE_INSTRUCTION
- /* According to the gcc info page, asm volatile means that the
- instruction has important side-effects and must not be removed.
- Also asm volatile may trigger a memory barrier (spilling all registers
- to memory). */
-# ifdef __SUNPRO_CC
-# define UT_RELAX_CPU() asm ("pause" )
-# else
-# define UT_RELAX_CPU() __asm__ __volatile__ ("pause")
-# endif /* __SUNPRO_CC */
-
-#elif defined(HAVE_FAKE_PAUSE_INSTRUCTION)
-# define UT_RELAX_CPU() __asm__ __volatile__ ("rep; nop")
-#elif defined _WIN32
- /* In the Win32 API, the x86 PAUSE instruction is executed by calling
- the YieldProcessor macro defined in WinNT.h. It is a CPU architecture-
- independent way by using YieldProcessor. */
-# define UT_RELAX_CPU() YieldProcessor()
-#elif defined(__powerpc__) && defined __GLIBC__
-# include <sys/platform/ppc.h>
-# define UT_RELAX_CPU() __ppc_get_timebase()
-#else
-# define UT_RELAX_CPU() do { \
- volatile int32 volatile_var; \
- int32 oldval= 0; \
- my_atomic_cas32(&volatile_var, &oldval, 1); \
- } while (0)
-#endif
-
#if defined (__GNUC__)
# define UT_COMPILER_BARRIER() __asm__ __volatile__ ("":::"memory")
#elif defined (_MSC_VER)
@@ -89,15 +61,6 @@ typedef time_t ib_time_t;
# define UT_COMPILER_BARRIER()
#endif
-#if defined(HAVE_HMT_PRIORITY_INSTRUCTION)
-# include <sys/platform/ppc.h>
-# define UT_LOW_PRIORITY_CPU() __ppc_set_ppr_low()
-# define UT_RESUME_PRIORITY_CPU() __ppc_set_ppr_med()
-#else
-# define UT_LOW_PRIORITY_CPU() ((void)0)
-# define UT_RESUME_PRIORITY_CPU() ((void)0)
-#endif
-
/*********************************************************************//**
Delays execution for at most max_wait_us microseconds or returns earlier
if cond becomes true.