summaryrefslogtreecommitdiff
path: root/storage/innobase/ibuf
diff options
context:
space:
mode:
authorSergei Golubchik <vuvova@gmail.com>2015-05-04 19:17:21 +0200
committerSergei Golubchik <vuvova@gmail.com>2015-05-04 19:17:21 +0200
commit6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch)
tree21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/ibuf
parent1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff)
downloadmariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz
move to storage/innobase
Diffstat (limited to 'storage/innobase/ibuf')
-rw-r--r--storage/innobase/ibuf/ibuf0ibuf.cc5223
1 files changed, 5223 insertions, 0 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc
new file mode 100644
index 00000000000..caf1f1a864b
--- /dev/null
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc
@@ -0,0 +1,5223 @@
+/*****************************************************************************
+
+Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file ibuf/ibuf0ibuf.cc
+Insert buffer
+
+Created 7/19/1997 Heikki Tuuri
+*******************************************************/
+
+#include "ibuf0ibuf.h"
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+UNIV_INTERN my_bool srv_ibuf_disable_background_merge;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+/** Number of bits describing a single page */
+#define IBUF_BITS_PER_PAGE 4
+#if IBUF_BITS_PER_PAGE % 2
+# error "IBUF_BITS_PER_PAGE must be an even number!"
+#endif
+/** The start address for an insert buffer bitmap page bitmap */
+#define IBUF_BITMAP PAGE_DATA
+
+#ifdef UNIV_NONINL
+#include "ibuf0ibuf.ic"
+#endif
+
+#ifndef UNIV_HOTBACKUP
+
+#include "buf0buf.h"
+#include "buf0rea.h"
+#include "fsp0fsp.h"
+#include "trx0sys.h"
+#include "fil0fil.h"
+#include "rem0rec.h"
+#include "btr0cur.h"
+#include "btr0pcur.h"
+#include "btr0btr.h"
+#include "row0upd.h"
+#include "sync0sync.h"
+#include "dict0boot.h"
+#include "fut0lst.h"
+#include "lock0lock.h"
+#include "log0recv.h"
+#include "que0que.h"
+#include "srv0start.h" /* srv_shutdown_state */
+#include "ha_prototypes.h"
+#include "rem0cmp.h"
+
+/* STRUCTURE OF AN INSERT BUFFER RECORD
+
+In versions < 4.1.x:
+
+1. The first field is the page number.
+2. The second field is an array which stores type info for each subsequent
+ field. We store the information which affects the ordering of records, and
+ also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it
+ is 10 bytes.
+3. Next we have the fields of the actual index record.
+
+In versions >= 4.1.x:
+
+Note that contary to what we planned in the 1990's, there will only be one
+insert buffer tree, and that is in the system tablespace of InnoDB.
+
+1. The first field is the space id.
+2. The second field is a one-byte marker (0) which differentiates records from
+ the < 4.1.x storage format.
+3. The third field is the page number.
+4. The fourth field contains the type info, where we have also added 2 bytes to
+ store the charset. In the compressed table format of 5.0.x we must add more
+ information here so that we can build a dummy 'index' struct which 5.0.x
+ can use in the binary search on the index page in the ibuf merge phase.
+5. The rest of the fields contain the fields of the actual index record.
+
+In versions >= 5.0.3:
+
+The first byte of the fourth field is an additional marker (0) if the record
+is in the compact format. The presence of this marker can be detected by
+looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE.
+
+The high-order bit of the character set field in the type info is the
+"nullable" flag for the field.
+
+In versions >= 5.5:
+
+The optional marker byte at the start of the fourth field is replaced by
+mandatory 3 fields, totaling 4 bytes:
+
+ 1. 2 bytes: Counter field, used to sort records within a (space id, page
+ no) in the order they were added. This is needed so that for example the
+ sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled
+ correctly.
+
+ 2. 1 byte: Operation type (see ibuf_op_t).
+
+ 3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT.
+
+To ensure older records, which do not have counters to enforce correct
+sorting, are merged before any new records, ibuf_insert checks if we're
+trying to insert to a position that contains old-style records, and if so,
+refuses the insert. Thus, ibuf pages are gradually converted to the new
+format as their corresponding buffer pool pages are read into memory.
+*/
+
+
+/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM
+
+If an OS thread performs any operation that brings in disk pages from
+non-system tablespaces into the buffer pool, or creates such a page there,
+then the operation may have as a side effect an insert buffer index tree
+compression. Thus, the tree latch of the insert buffer tree may be acquired
+in the x-mode, and also the file space latch of the system tablespace may
+be acquired in the x-mode.
+
+Also, an insert to an index in a non-system tablespace can have the same
+effect. How do we know this cannot lead to a deadlock of OS threads? There
+is a problem with the i\o-handler threads: they break the latching order
+because they own x-latches to pages which are on a lower level than the
+insert buffer tree latch, its page latches, and the tablespace latch an
+insert buffer operation can reserve.
+
+The solution is the following: Let all the tree and page latches connected
+with the insert buffer be later in the latching order than the fsp latch and
+fsp page latches.
+
+Insert buffer pages must be such that the insert buffer is never invoked
+when these pages are accessed as this would result in a recursion violating
+the latching order. We let a special i/o-handler thread take care of i/o to
+the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap
+pages and the first inode page, which contains the inode of the ibuf tree: let
+us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead
+access both non-ibuf and ibuf pages.
+
+Then an i/o-handler for the insert buffer never needs to access recursively the
+insert buffer tree and thus obeys the latching order. On the other hand, other
+i/o-handlers for other tablespaces may require access to the insert buffer,
+but because all kinds of latches they need to access there are later in the
+latching order, no violation of the latching order occurs in this case,
+either.
+
+A problem is how to grow and contract an insert buffer tree. As it is later
+in the latching order than the fsp management, we have to reserve the fsp
+latch first, before adding or removing pages from the insert buffer tree.
+We let the insert buffer tree have its own file space management: a free
+list of pages linked to the tree root. To prevent recursive using of the
+insert buffer when adding pages to the tree, we must first load these pages
+to memory, obtaining a latch on them, and only after that add them to the
+free list of the insert buffer tree. More difficult is removing of pages
+from the free list. If there is an excess of pages in the free list of the
+ibuf tree, they might be needed if some thread reserves the fsp latch,
+intending to allocate more file space. So we do the following: if a thread
+reserves the fsp latch, we check the writer count field of the latch. If
+this field has value 1, it means that the thread did not own the latch
+before entering the fsp system, and the mtr of the thread contains no
+modifications to the fsp pages. Now we are free to reserve the ibuf latch,
+and check if there is an excess of pages in the free list. We can then, in a
+separate mini-transaction, take them out of the free list and free them to
+the fsp system.
+
+To avoid deadlocks in the ibuf system, we divide file pages into three levels:
+
+(1) non-ibuf pages,
+(2) ibuf tree pages and the pages in the ibuf tree free list, and
+(3) ibuf bitmap pages.
+
+No OS thread is allowed to access higher level pages if it has latches to
+lower level pages; even if the thread owns a B-tree latch it must not access
+the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead
+is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle
+exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively
+level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e.,
+it uses synchronous aio, it can access any pages, as long as it obeys the
+access order rules. */
+
+/** Table name for the insert buffer. */
+#define IBUF_TABLE_NAME "SYS_IBUF_TABLE"
+
+/** Operations that can currently be buffered. */
+UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL;
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+/** Flag to control insert buffer debugging. */
+UNIV_INTERN uint ibuf_debug;
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+/** The insert buffer control structure */
+UNIV_INTERN ibuf_t* ibuf = NULL;
+
+#ifdef UNIV_PFS_MUTEX
+UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key;
+UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key;
+UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key;
+#endif /* UNIV_PFS_MUTEX */
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/** Number of tablespaces in the ibuf_counts array */
+#define IBUF_COUNT_N_SPACES 4
+/** Number of pages within each tablespace in the ibuf_counts array */
+#define IBUF_COUNT_N_PAGES 130000
+
+/** Buffered entry counts for file pages, used in debugging */
+static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
+
+/******************************************************************//**
+Checks that the indexes to ibuf_counts[][] are within limits. */
+UNIV_INLINE
+void
+ibuf_count_check(
+/*=============*/
+ ulint space_id, /*!< in: space identifier */
+ ulint page_no) /*!< in: page number */
+{
+ if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
+ return;
+ }
+
+ fprintf(stderr,
+ "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n"
+ "InnoDB: and breaks crash recovery.\n"
+ "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
+ "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
+ (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
+ (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
+ ut_error;
+}
+#endif
+
+/** @name Offsets to the per-page bits in the insert buffer bitmap */
+/* @{ */
+#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the
+ amount of free space */
+#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered
+ changes for the page */
+#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of
+ the ibuf tree, excluding the
+ root page, or is in the free
+ list of the ibuf */
+/* @} */
+
+#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format,
+ the page number. later, the space_id */
+#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker
+ consisting of 1 byte that is 0 */
+#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the
+ page number */
+#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */
+#define IBUF_REC_FIELD_USER 4 /* first user field */
+
+/* Various constants for checking the type of an ibuf record and extracting
+data from it. For details, see the description of the record format at the
+top of this file. */
+
+/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record
+The fourth column in the MySQL 5.5 format contains an operation
+type, counter, and some flags. */
+/* @{ */
+#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at
+ the beginning of the fourth field */
+#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
+/* Offsets for the fields at the beginning of the fourth field */
+#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */
+#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */
+#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */
+
+/* Record flag masks */
+#define IBUF_REC_COMPACT 0x1 /*!< Set in
+ IBUF_REC_OFFSET_FLAGS if the
+ user index is in COMPACT
+ format or later */
+
+
+/** The mutex used to block pessimistic inserts to ibuf trees */
+static ib_mutex_t ibuf_pessimistic_insert_mutex;
+
+/** The mutex protecting the insert buffer structs */
+static ib_mutex_t ibuf_mutex;
+
+/** The mutex protecting the insert buffer bitmaps */
+static ib_mutex_t ibuf_bitmap_mutex;
+
+/** The area in pages from which contract looks for page numbers for merge */
+#define IBUF_MERGE_AREA 8UL
+
+/** Inside the merge area, pages which have at most 1 per this number less
+buffered entries compared to maximum volume that can buffered for a single
+page are merged along with the page whose buffer became full */
+#define IBUF_MERGE_THRESHOLD 4
+
+/** In ibuf_contract at most this number of pages is read to memory in one
+batch, in order to merge the entries for them in the insert buffer */
+#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA
+
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
+many pages, we start to contract it in connection to inserts there, using
+non-synchronous contract */
+#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0
+
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by this
+many pages, we start to contract it in connection to inserts there, using
+synchronous contract */
+#define IBUF_CONTRACT_ON_INSERT_SYNC 5
+
+/** If the combined size of the ibuf trees exceeds ibuf->max_size by
+this many pages, we start to contract it synchronous contract, but do
+not insert */
+#define IBUF_CONTRACT_DO_NOT_INSERT 10
+
+/* TODO: how to cope with drop table if there are records in the insert
+buffer for the indexes of the table? Is there actually any problem,
+because ibuf merge is done to a page when it is read in, and it is
+still physically like the index page even if the index would have been
+dropped! So, there seems to be no problem. */
+
+/******************************************************************//**
+Sets the flag in the current mini-transaction record indicating we're
+inside an insert buffer routine. */
+UNIV_INLINE
+void
+ibuf_enter(
+/*=======*/
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ut_ad(!mtr->inside_ibuf);
+ mtr->inside_ibuf = TRUE;
+}
+
+/******************************************************************//**
+Sets the flag in the current mini-transaction record indicating we're
+exiting an insert buffer routine. */
+UNIV_INLINE
+void
+ibuf_exit(
+/*======*/
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ut_ad(mtr->inside_ibuf);
+ mtr->inside_ibuf = FALSE;
+}
+
+/**************************************************************//**
+Commits an insert buffer mini-transaction and sets the persistent
+cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */
+UNIV_INLINE
+void
+ibuf_btr_pcur_commit_specify_mtr(
+/*=============================*/
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ut_d(ibuf_exit(mtr));
+ btr_pcur_commit_specify_mtr(pcur, mtr);
+}
+
+/******************************************************************//**
+Gets the ibuf header page and x-latches it.
+@return insert buffer header page */
+static
+page_t*
+ibuf_header_page_get(
+/*=================*/
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ buf_block_t* block;
+
+ ut_ad(!ibuf_inside(mtr));
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr);
+ buf_block_dbg_add_level(block, SYNC_IBUF_HEADER);
+
+ return(buf_block_get_frame(block));
+}
+
+/******************************************************************//**
+Gets the root page and x-latches it.
+@return insert buffer tree root page */
+static
+page_t*
+ibuf_tree_root_get(
+/*===============*/
+ mtr_t* mtr) /*!< in: mtr */
+{
+ buf_block_t* block;
+ page_t* root;
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ mtr_x_lock(dict_index_get_lock(ibuf->index), mtr);
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr);
+
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
+
+ root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO);
+ ut_ad(ibuf->empty == page_is_empty(root));
+
+ return(root);
+}
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+/******************************************************************//**
+Gets the ibuf count for a given page.
+@return number of entries in the insert buffer currently buffered for
+this page */
+UNIV_INTERN
+ulint
+ibuf_count_get(
+/*===========*/
+ ulint space, /*!< in: space id */
+ ulint page_no)/*!< in: page number */
+{
+ ibuf_count_check(space, page_no);
+
+ return(ibuf_counts[space][page_no]);
+}
+
+/******************************************************************//**
+Sets the ibuf count for a given page. */
+static
+void
+ibuf_count_set(
+/*===========*/
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: page number */
+ ulint val) /*!< in: value to set */
+{
+ ibuf_count_check(space, page_no);
+ ut_a(val < UNIV_PAGE_SIZE);
+
+ ibuf_counts[space][page_no] = val;
+}
+#endif
+
+/******************************************************************//**
+Closes insert buffer and frees the data structures. */
+UNIV_INTERN
+void
+ibuf_close(void)
+/*============*/
+{
+ mutex_free(&ibuf_pessimistic_insert_mutex);
+ memset(&ibuf_pessimistic_insert_mutex,
+ 0x0, sizeof(ibuf_pessimistic_insert_mutex));
+
+ mutex_free(&ibuf_mutex);
+ memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex));
+
+ mutex_free(&ibuf_bitmap_mutex);
+ memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex));
+
+ mem_free(ibuf);
+ ibuf = NULL;
+}
+
+/******************************************************************//**
+Updates the size information of the ibuf, assuming the segment size has not
+changed. */
+static
+void
+ibuf_size_update(
+/*=============*/
+ const page_t* root, /*!< in: ibuf tree root */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ ibuf->free_list_len = flst_get_len(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, mtr);
+
+ ibuf->height = 1 + btr_page_get_level(root, mtr);
+
+ /* the '1 +' is the ibuf header page */
+ ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len);
+}
+
+/******************************************************************//**
+Creates the insert buffer data structure at a database startup and initializes
+the data structures for the insert buffer. */
+UNIV_INTERN
+void
+ibuf_init_at_db_start(void)
+/*=======================*/
+{
+ page_t* root;
+ mtr_t mtr;
+ dict_table_t* table;
+ mem_heap_t* heap;
+ dict_index_t* index;
+ ulint n_used;
+ page_t* header_page;
+ dberr_t error;
+
+ ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t)));
+
+ /* At startup we intialize ibuf to have a maximum of
+ CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the
+ buffer pool size. Once ibuf struct is initialized this
+ value is updated with the user supplied size by calling
+ ibuf_max_size_update(). */
+ ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
+ * CHANGE_BUFFER_DEFAULT_SIZE) / 100;
+
+ mutex_create(ibuf_pessimistic_insert_mutex_key,
+ &ibuf_pessimistic_insert_mutex,
+ SYNC_IBUF_PESS_INSERT_MUTEX);
+
+ mutex_create(ibuf_mutex_key,
+ &ibuf_mutex, SYNC_IBUF_MUTEX);
+
+ mutex_create(ibuf_bitmap_mutex_key,
+ &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX);
+
+ mtr_start(&mtr);
+
+ mutex_enter(&ibuf_mutex);
+
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr);
+
+ header_page = ibuf_header_page_get(&mtr);
+
+ fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
+ &n_used, &mtr);
+ ibuf_enter(&mtr);
+
+ ut_ad(n_used >= 2);
+
+ ibuf->seg_size = n_used;
+
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO,
+ RW_X_LATCH, &mtr);
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+
+ root = buf_block_get_frame(block);
+ }
+
+ ibuf_size_update(root, &mtr);
+ mutex_exit(&ibuf_mutex);
+
+ ibuf->empty = page_is_empty(root);
+ ibuf_mtr_commit(&mtr);
+
+ heap = mem_heap_create(450);
+
+ /* Use old-style record format for the insert buffer. */
+ table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0);
+
+ dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0);
+
+ table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
+
+ dict_table_add_to_cache(table, FALSE, heap);
+ mem_heap_free(heap);
+
+ index = dict_mem_index_create(
+ IBUF_TABLE_NAME, "CLUST_IND",
+ IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1);
+
+ dict_mem_index_add_field(index, "DUMMY_COLUMN", 0);
+
+ index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID;
+
+ error = dict_index_add_to_cache(table, index,
+ FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE);
+ ut_a(error == DB_SUCCESS);
+
+ ibuf->index = dict_table_get_first_index(table);
+}
+
+/*********************************************************************//**
+Updates the max_size value for ibuf. */
+UNIV_INTERN
+void
+ibuf_max_size_update(
+/*=================*/
+ ulint new_val) /*!< in: new value in terms of
+ percentage of the buffer pool size */
+{
+ ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE)
+ * new_val) / 100;
+ mutex_enter(&ibuf_mutex);
+ ibuf->max_size = new_size;
+ mutex_exit(&ibuf_mutex);
+}
+
+
+#endif /* !UNIV_HOTBACKUP */
+/*********************************************************************//**
+Initializes an ibuf bitmap page. */
+UNIV_INTERN
+void
+ibuf_bitmap_page_init(
+/*==================*/
+ buf_block_t* block, /*!< in: bitmap page */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_t* page;
+ ulint byte_offset;
+ ulint zip_size = buf_block_get_zip_size(block);
+
+ ut_a(ut_is_2pow(zip_size));
+
+ page = buf_block_get_frame(block);
+ fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
+
+ /* Write all zeros to the bitmap */
+
+ if (!zip_size) {
+ byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE
+ * IBUF_BITS_PER_PAGE);
+ } else {
+ byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE);
+ }
+
+ memset(page + IBUF_BITMAP, 0, byte_offset);
+
+ /* The remaining area (up to the page trailer) is uninitialized. */
+
+#ifndef UNIV_HOTBACKUP
+ mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr);
+#endif /* !UNIV_HOTBACKUP */
+}
+
+/*********************************************************************//**
+Parses a redo log record of an ibuf bitmap page init.
+@return end of log record or NULL */
+UNIV_INTERN
+byte*
+ibuf_parse_bitmap_init(
+/*===================*/
+ byte* ptr, /*!< in: buffer */
+ byte* end_ptr __attribute__((unused)), /*!< in: buffer end */
+ buf_block_t* block, /*!< in: block or NULL */
+ mtr_t* mtr) /*!< in: mtr or NULL */
+{
+ ut_ad(ptr && end_ptr);
+
+ if (block) {
+ ibuf_bitmap_page_init(block, mtr);
+ }
+
+ return(ptr);
+}
+#ifndef UNIV_HOTBACKUP
+# ifdef UNIV_DEBUG
+/** Gets the desired bits for a given page from a bitmap page.
+@param page in: bitmap page
+@param offset in: page whose bits to get
+@param zs in: compressed page size in bytes; 0 for uncompressed pages
+@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param mtr in: mini-transaction holding an x-latch on the bitmap page
+@return value of bits */
+# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
+ ibuf_bitmap_page_get_bits_low(page, offset, zs, \
+ MTR_MEMO_PAGE_X_FIX, mtr, bit)
+# else /* UNIV_DEBUG */
+/** Gets the desired bits for a given page from a bitmap page.
+@param page in: bitmap page
+@param offset in: page whose bits to get
+@param zs in: compressed page size in bytes; 0 for uncompressed pages
+@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ...
+@param mtr in: mini-transaction holding an x-latch on the bitmap page
+@return value of bits */
+# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \
+ ibuf_bitmap_page_get_bits_low(page, offset, zs, bit)
+# endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Gets the desired bits for a given page from a bitmap page.
+@return value of bits */
+UNIV_INLINE
+ulint
+ibuf_bitmap_page_get_bits_low(
+/*==========================*/
+ const page_t* page, /*!< in: bitmap page */
+ ulint page_no,/*!< in: page whose bits to get */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+#ifdef UNIV_DEBUG
+ ulint latch_type,
+ /*!< in: MTR_MEMO_PAGE_X_FIX,
+ MTR_MEMO_BUF_FIX, ... */
+ mtr_t* mtr, /*!< in: mini-transaction holding latch_type
+ on the bitmap page */
+#endif /* UNIV_DEBUG */
+ ulint bit) /*!< in: IBUF_BITMAP_FREE,
+ IBUF_BITMAP_BUFFERED, ... */
+{
+ ulint byte_offset;
+ ulint bit_offset;
+ ulint map_byte;
+ ulint value;
+
+ ut_ad(bit < IBUF_BITS_PER_PAGE);
+#if IBUF_BITS_PER_PAGE % 2
+# error "IBUF_BITS_PER_PAGE % 2 != 0"
+#endif
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, latch_type));
+
+ if (!zip_size) {
+ bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+ + bit;
+ } else {
+ bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+ + bit;
+ }
+
+ byte_offset = bit_offset / 8;
+ bit_offset = bit_offset % 8;
+
+ ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+
+ map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
+
+ value = ut_bit_get_nth(map_byte, bit_offset);
+
+ if (bit == IBUF_BITMAP_FREE) {
+ ut_ad(bit_offset + 1 < 8);
+
+ value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1);
+ }
+
+ return(value);
+}
+
+/********************************************************************//**
+Sets the desired bit for a given page in a bitmap page. */
+static
+void
+ibuf_bitmap_page_set_bits(
+/*======================*/
+ page_t* page, /*!< in: bitmap page */
+ ulint page_no,/*!< in: page whose bits to set */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */
+ ulint val, /*!< in: value to set */
+ mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */
+{
+ ulint byte_offset;
+ ulint bit_offset;
+ ulint map_byte;
+
+ ut_ad(bit < IBUF_BITS_PER_PAGE);
+#if IBUF_BITS_PER_PAGE % 2
+# error "IBUF_BITS_PER_PAGE % 2 != 0"
+#endif
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX));
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE)
+ || (0 == ibuf_count_get(page_get_space_id(page),
+ page_no)));
+#endif
+ if (!zip_size) {
+ bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE
+ + bit;
+ } else {
+ bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE
+ + bit;
+ }
+
+ byte_offset = bit_offset / 8;
+ bit_offset = bit_offset % 8;
+
+ ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE);
+
+ map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset);
+
+ if (bit == IBUF_BITMAP_FREE) {
+ ut_ad(bit_offset + 1 < 8);
+ ut_ad(val <= 3);
+
+ map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2);
+ map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2);
+ } else {
+ ut_ad(val <= 1);
+ map_byte = ut_bit_set_nth(map_byte, bit_offset, val);
+ }
+
+ mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte,
+ MLOG_1BYTE, mtr);
+}
+
+/********************************************************************//**
+Calculates the bitmap page number for a given page number.
+@return the bitmap page number where the file page is mapped */
+UNIV_INLINE
+ulint
+ibuf_bitmap_page_no_calc(
+/*=====================*/
+ ulint zip_size, /*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no) /*!< in: tablespace page number */
+{
+ ut_ad(ut_is_2pow(zip_size));
+
+ if (!zip_size) {
+ return(FSP_IBUF_BITMAP_OFFSET
+ + (page_no & ~(UNIV_PAGE_SIZE - 1)));
+ } else {
+ return(FSP_IBUF_BITMAP_OFFSET
+ + (page_no & ~(zip_size - 1)));
+ }
+}
+
+/********************************************************************//**
+Gets the ibuf bitmap page where the bits describing a given file page are
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched */
+static
+page_t*
+ibuf_bitmap_get_map_page_func(
+/*==========================*/
+ ulint space, /*!< in: space id of the file page */
+ ulint page_no,/*!< in: page number of the file page */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ buf_block_t* block;
+
+ block = buf_page_get_gen(space, zip_size,
+ ibuf_bitmap_page_no_calc(zip_size, page_no),
+ RW_X_LATCH, NULL, BUF_GET,
+ file, line, mtr);
+ buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
+
+ return(buf_block_get_frame(block));
+}
+
+/********************************************************************//**
+Gets the ibuf bitmap page where the bits describing a given file page are
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched
+@param space in: space id of the file page
+@param page_no in: page number of the file page
+@param zip_size in: compressed page size in bytes; 0 for uncompressed pages
+@param mtr in: mini-transaction */
+#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \
+ ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \
+ __FILE__, __LINE__, mtr)
+
+/************************************************************************//**
+Sets the free bits of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+UNIV_INLINE
+void
+ibuf_set_free_bits_low(
+/*===================*/
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ const buf_block_t* block, /*!< in: index page; free bits are set if
+ the index is non-clustered and page
+ level is 0 */
+ ulint val, /*!< in: value to set: < 4 */
+ mtr_t* mtr) /*!< in/out: mtr */
+{
+ page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
+
+ if (!page_is_leaf(buf_block_get_frame(block))) {
+
+ return;
+ }
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+#ifdef UNIV_IBUF_DEBUG
+# if 0
+ fprintf(stderr,
+ "Setting space %lu page %lu free bits to %lu should be %lu\n",
+ space, page_no, val,
+ ibuf_index_page_calc_free(zip_size, block));
+# endif
+
+ ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#endif /* UNIV_IBUF_DEBUG */
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, val, mtr);
+}
+
+/************************************************************************//**
+Sets the free bit of the page in the ibuf bitmap. This is done in a separate
+mini-transaction, hence this operation does not restrict further work to only
+ibuf bitmap operations, which would result if the latch to the bitmap page
+were kept. */
+UNIV_INTERN
+void
+ibuf_set_free_bits_func(
+/*====================*/
+ buf_block_t* block, /*!< in: index page of a non-clustered index;
+ free bit is reset if page level is 0 */
+#ifdef UNIV_IBUF_DEBUG
+ ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum
+ value which the bits must have before
+ setting; this is for debugging */
+#endif /* UNIV_IBUF_DEBUG */
+ ulint val) /*!< in: value to set: < 4 */
+{
+ mtr_t mtr;
+ page_t* page;
+ page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
+
+ page = buf_block_get_frame(block);
+
+ if (!page_is_leaf(page)) {
+
+ return;
+ }
+
+ mtr_start(&mtr);
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ zip_size = buf_block_get_zip_size(block);
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr);
+
+#ifdef UNIV_IBUF_DEBUG
+ if (max_val != ULINT_UNDEFINED) {
+ ulint old_val;
+
+ old_val = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, &mtr);
+# if 0
+ if (old_val != max_val) {
+ fprintf(stderr,
+ "Ibuf: page %lu old val %lu max val %lu\n",
+ page_get_page_no(page),
+ old_val, max_val);
+ }
+# endif
+
+ ut_a(old_val <= max_val);
+ }
+# if 0
+ fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n",
+ page_get_page_no(page), val,
+ ibuf_index_page_calc_free(zip_size, block));
+# endif
+
+ ut_a(val <= ibuf_index_page_calc_free(zip_size, block));
+#endif /* UNIV_IBUF_DEBUG */
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, val, &mtr);
+ mtr_commit(&mtr);
+}
+
+/************************************************************************//**
+Resets the free bits of the page in the ibuf bitmap. This is done in a
+separate mini-transaction, hence this operation does not restrict
+further work to only ibuf bitmap operations, which would result if the
+latch to the bitmap page were kept. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to decrement or reset the bits in the bitmap in a mini-transaction
+that is committed before the mini-transaction that affects the free
+space. */
+UNIV_INTERN
+void
+ibuf_reset_free_bits(
+/*=================*/
+ buf_block_t* block) /*!< in: index page; free bits are set to 0
+ if the index is a non-clustered
+ non-unique, and page level is 0 */
+{
+ ibuf_set_free_bits(block, 0, ULINT_UNDEFINED);
+}
+
+/**********************************************************************//**
+Updates the free bits for an uncompressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_low(
+/*======================*/
+ const buf_block_t* block, /*!< in: index page */
+ ulint max_ins_size, /*!< in: value of
+ maximum insert size
+ with reorganize before
+ the latest operation
+ performed to the page */
+ mtr_t* mtr) /*!< in/out: mtr */
+{
+ ulint before;
+ ulint after;
+
+ ut_a(!buf_block_get_page_zip(block));
+
+ before = ibuf_index_page_calc_free_bits(0, max_ins_size);
+
+ after = ibuf_index_page_calc_free(0, block);
+
+ /* This approach cannot be used on compressed pages, since the
+ computed value of "before" often does not match the current
+ state of the bitmap. This is because the free space may
+ increase or decrease when a compressed page is reorganized. */
+ if (before != after) {
+ ibuf_set_free_bits_low(0, block, after, mtr);
+ }
+}
+
+/**********************************************************************//**
+Updates the free bits for a compressed page to reflect the present
+state. Does this in the mtr given, which means that the latching
+order rules virtually prevent any further operations for this OS
+thread until mtr is committed. NOTE: The free bits in the insert
+buffer bitmap must never exceed the free space on a page. It is safe
+to set the free bits in the same mini-transaction that updated the
+page. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_zip(
+/*======================*/
+ buf_block_t* block, /*!< in/out: index page */
+ mtr_t* mtr) /*!< in/out: mtr */
+{
+ page_t* bitmap_page;
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
+ ulint after;
+
+ space = buf_block_get_space(block);
+ page_no = buf_block_get_page_no(block);
+ zip_size = buf_block_get_zip_size(block);
+
+ ut_a(page_is_leaf(buf_block_get_frame(block)));
+ ut_a(zip_size);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+
+ after = ibuf_index_page_calc_free_zip(zip_size, block);
+
+ if (after == 0) {
+ /* We move the page to the front of the buffer pool LRU list:
+ the purpose of this is to prevent those pages to which we
+ cannot make inserts using the insert buffer from slipping
+ out of the buffer pool */
+
+ buf_page_make_young(&block->page);
+ }
+
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, after, mtr);
+}
+
+/**********************************************************************//**
+Updates the free bits for the two pages to reflect the present state.
+Does this in the mtr given, which means that the latching order rules
+virtually prevent any further operations until mtr is committed.
+NOTE: The free bits in the insert buffer bitmap must never exceed the
+free space on a page. It is safe to set the free bits in the same
+mini-transaction that updated the pages. */
+UNIV_INTERN
+void
+ibuf_update_free_bits_for_two_pages_low(
+/*====================================*/
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ buf_block_t* block1, /*!< in: index page */
+ buf_block_t* block2, /*!< in: index page */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ulint state;
+
+ /* As we have to x-latch two random bitmap pages, we have to acquire
+ the bitmap mutex to prevent a deadlock with a similar operation
+ performed by another OS thread. */
+
+ mutex_enter(&ibuf_bitmap_mutex);
+
+ state = ibuf_index_page_calc_free(zip_size, block1);
+
+ ibuf_set_free_bits_low(zip_size, block1, state, mtr);
+
+ state = ibuf_index_page_calc_free(zip_size, block2);
+
+ ibuf_set_free_bits_low(zip_size, block2, state, mtr);
+
+ mutex_exit(&ibuf_bitmap_mutex);
+}
+
+/**********************************************************************//**
+Returns TRUE if the page is one of the fixed address ibuf pages.
+@return TRUE if a fixed address ibuf i/o page */
+UNIV_INLINE
+ibool
+ibuf_fixed_addr_page(
+/*=================*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes;
+ 0 for uncompressed pages */
+ ulint page_no)/*!< in: page number */
+{
+ return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO)
+ || ibuf_bitmap_page(zip_size, page_no));
+}
+
+/***********************************************************************//**
+Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages.
+Must not be called when recv_no_ibuf_operations==TRUE.
+@return TRUE if level 2 or level 3 page */
+UNIV_INTERN
+ibool
+ibuf_page_low(
+/*==========*/
+ ulint space, /*!< in: space id */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number */
+#ifdef UNIV_DEBUG
+ ibool x_latch,/*!< in: FALSE if relaxed check
+ (avoid latching the bitmap page) */
+#endif /* UNIV_DEBUG */
+ const char* file, /*!< in: file name */
+ ulint line, /*!< in: line where called */
+ mtr_t* mtr) /*!< in: mtr which will contain an
+ x-latch to the bitmap page if the page
+ is not one of the fixed address ibuf
+ pages, or NULL, in which case a new
+ transaction is created. */
+{
+ ibool ret;
+ mtr_t local_mtr;
+ page_t* bitmap_page;
+
+ ut_ad(!recv_no_ibuf_operations);
+ ut_ad(x_latch || mtr == NULL);
+
+ if (ibuf_fixed_addr_page(space, zip_size, page_no)) {
+
+ return(TRUE);
+ } else if (space != IBUF_SPACE_ID) {
+
+ return(FALSE);
+ }
+
+ ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE);
+
+#ifdef UNIV_DEBUG
+ if (!x_latch) {
+ mtr_start(&local_mtr);
+
+ /* Get the bitmap page without a page latch, so that
+ we will not be violating the latching order when
+ another bitmap page has already been latched by this
+ thread. The page will be buffer-fixed, and thus it
+ cannot be removed or relocated while we are looking at
+ it. The contents of the page could change, but the
+ IBUF_BITMAP_IBUF bit that we are interested in should
+ not be modified by any other thread. Nobody should be
+ calling ibuf_add_free_page() or ibuf_remove_free_page()
+ while the page is linked to the insert buffer b-tree. */
+
+ bitmap_page = buf_block_get_frame(
+ buf_page_get_gen(
+ space, zip_size,
+ ibuf_bitmap_page_no_calc(zip_size, page_no),
+ RW_NO_LATCH, NULL, BUF_GET_NO_LATCH,
+ file, line, &local_mtr));
+
+ ret = ibuf_bitmap_page_get_bits_low(
+ bitmap_page, page_no, zip_size,
+ MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF);
+
+ mtr_commit(&local_mtr);
+ return(ret);
+ }
+#endif /* UNIV_DEBUG */
+
+ if (mtr == NULL) {
+ mtr = &local_mtr;
+ mtr_start(mtr);
+ }
+
+ bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size,
+ file, line, mtr);
+
+ ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_IBUF, mtr);
+
+ if (mtr == &local_mtr) {
+ mtr_commit(mtr);
+ }
+
+ return(ret);
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec)
+#else /* UNIV_DEBUG */
+# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec)
+#endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Returns the page number field of an ibuf record.
+@return page number */
+static
+ulint
+ibuf_rec_get_page_no_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec) /*!< in: ibuf record */
+{
+ const byte* field;
+ ulint len;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(rec_get_n_fields_old(rec) > 2);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
+
+ ut_a(len == 1);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
+
+ ut_a(len == 4);
+
+ return(mach_read_from_4(field));
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec)
+#else /* UNIV_DEBUG */
+# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec)
+#endif /* UNIV_DEBUG */
+
+/********************************************************************//**
+Returns the space id field of an ibuf record. For < 4.1.x format records
+returns 0.
+@return space id */
+static
+ulint
+ibuf_rec_get_space_func(
+/*====================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec) /*!< in: ibuf record */
+{
+ const byte* field;
+ ulint len;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(rec_get_n_fields_old(rec) > 2);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
+
+ ut_a(len == 1);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
+
+ ut_a(len == 4);
+
+ return(mach_read_from_4(field));
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
+ ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter)
+#else /* UNIV_DEBUG */
+# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \
+ ibuf_rec_get_info_func(rec,op,comp,info_len,counter)
+#endif
+/****************************************************************//**
+Get various information about an ibuf record in >= 4.1.x format. */
+static
+void
+ibuf_rec_get_info_func(
+/*===================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec, /*!< in: ibuf record */
+ ibuf_op_t* op, /*!< out: operation type, or NULL */
+ ibool* comp, /*!< out: compact flag, or NULL */
+ ulint* info_len, /*!< out: length of info fields at the
+ start of the fourth field, or
+ NULL */
+ ulint* counter) /*!< in: counter value, or NULL */
+{
+ const byte* types;
+ ulint fields;
+ ulint len;
+
+ /* Local variables to shadow arguments. */
+ ibuf_op_t op_local;
+ ibool comp_local;
+ ulint info_len_local;
+ ulint counter_local;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+ fields = rec_get_n_fields_old(rec);
+ ut_a(fields > IBUF_REC_FIELD_USER);
+
+ types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
+
+ info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+
+ switch (info_len_local) {
+ case 0:
+ case 1:
+ op_local = IBUF_OP_INSERT;
+ comp_local = info_len_local;
+ ut_ad(!counter);
+ counter_local = ULINT_UNDEFINED;
+ break;
+
+ case IBUF_REC_INFO_SIZE:
+ op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
+ comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT;
+ counter_local = mach_read_from_2(
+ types + IBUF_REC_OFFSET_COUNTER);
+ break;
+
+ default:
+ ut_error;
+ }
+
+ ut_a(op_local < IBUF_OP_COUNT);
+ ut_a((len - info_len_local) ==
+ (fields - IBUF_REC_FIELD_USER)
+ * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
+ if (op) {
+ *op = op_local;
+ }
+
+ if (comp) {
+ *comp = comp_local;
+ }
+
+ if (info_len) {
+ *info_len = info_len_local;
+ }
+
+ if (counter) {
+ *counter = counter_local;
+ }
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec)
+#else /* UNIV_DEBUG */
+# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec)
+#endif
+
+/****************************************************************//**
+Returns the operation type field of an ibuf record.
+@return operation type */
+static
+ibuf_op_t
+ibuf_rec_get_op_type_func(
+/*======================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec) /*!< in: ibuf record */
+{
+ ulint len;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(rec_get_n_fields_old(rec) > 2);
+
+ (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
+
+ if (len > 1) {
+ /* This is a < 4.1.x format record */
+
+ return(IBUF_OP_INSERT);
+ } else {
+ ibuf_op_t op;
+
+ ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL);
+
+ return(op);
+ }
+}
+
+/****************************************************************//**
+Read the first two bytes from a record's fourth field (counter field in new
+records; something else in older records).
+@return "counter" field, or ULINT_UNDEFINED if for some reason it
+can't be read */
+UNIV_INTERN
+ulint
+ibuf_rec_get_counter(
+/*=================*/
+ const rec_t* rec) /*!< in: ibuf record */
+{
+ const byte* ptr;
+ ulint len;
+
+ if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) {
+
+ return(ULINT_UNDEFINED);
+ }
+
+ ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
+
+ if (len >= 2) {
+
+ return(mach_read_from_2(ptr));
+ } else {
+
+ return(ULINT_UNDEFINED);
+ }
+}
+
+/****************************************************************//**
+Add accumulated operation counts to a permanent array. Both arrays must be
+of size IBUF_OP_COUNT. */
+static
+void
+ibuf_add_ops(
+/*=========*/
+ ulint* arr, /*!< in/out: array to modify */
+ const ulint* ops) /*!< in: operation counts */
+
+{
+ ulint i;
+
+#ifndef HAVE_ATOMIC_BUILTINS
+ ut_ad(mutex_own(&ibuf_mutex));
+#endif /* !HAVE_ATOMIC_BUILTINS */
+
+ for (i = 0; i < IBUF_OP_COUNT; i++) {
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_increment_ulint(&arr[i], ops[i]);
+#else /* HAVE_ATOMIC_BUILTINS */
+ arr[i] += ops[i];
+#endif /* HAVE_ATOMIC_BUILTINS */
+ }
+}
+
+/****************************************************************//**
+Print operation counts. The array must be of size IBUF_OP_COUNT. */
+static
+void
+ibuf_print_ops(
+/*===========*/
+ const ulint* ops, /*!< in: operation counts */
+ FILE* file) /*!< in: file where to print */
+{
+ static const char* op_names[] = {
+ "insert",
+ "delete mark",
+ "delete"
+ };
+ ulint i;
+
+ ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT);
+
+ for (i = 0; i < IBUF_OP_COUNT; i++) {
+ fprintf(file, "%s %lu%s", op_names[i],
+ (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : "");
+ }
+
+ putc('\n', file);
+}
+
+/********************************************************************//**
+Creates a dummy index for inserting a record to a non-clustered index.
+@return dummy index */
+static
+dict_index_t*
+ibuf_dummy_index_create(
+/*====================*/
+ ulint n, /*!< in: number of fields */
+ ibool comp) /*!< in: TRUE=use compact record format */
+{
+ dict_table_t* table;
+ dict_index_t* index;
+
+ table = dict_mem_table_create("IBUF_DUMMY",
+ DICT_HDR_SPACE, n,
+ comp ? DICT_TF_COMPACT : 0, 0);
+
+ index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY",
+ DICT_HDR_SPACE, 0, n);
+
+ index->table = table;
+
+ /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */
+ index->cached = TRUE;
+
+ return(index);
+}
+/********************************************************************//**
+Add a column to the dummy index */
+static
+void
+ibuf_dummy_index_add_col(
+/*=====================*/
+ dict_index_t* index, /*!< in: dummy index */
+ const dtype_t* type, /*!< in: the data type of the column */
+ ulint len) /*!< in: length of the column */
+{
+ ulint i = index->table->n_def;
+ dict_mem_table_add_col(index->table, NULL, NULL,
+ dtype_get_mtype(type),
+ dtype_get_prtype(type),
+ dtype_get_len(type));
+ dict_index_add_col(index, index->table,
+ dict_table_get_nth_col(index->table, i), len);
+}
+/********************************************************************//**
+Deallocates a dummy index for inserting a record to a non-clustered index. */
+static
+void
+ibuf_dummy_index_free(
+/*==================*/
+ dict_index_t* index) /*!< in, own: dummy index */
+{
+ dict_table_t* table = index->table;
+
+ dict_mem_index_free(index);
+ dict_mem_table_free(table);
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
+ ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex)
+#else /* UNIV_DEBUG */
+# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \
+ ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex)
+#endif
+
+/*********************************************************************//**
+Builds the entry used to
+
+1) IBUF_OP_INSERT: insert into a non-clustered index
+
+2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to
+ activate
+
+3) IBUF_OP_DELETE: find the record we need to delete
+
+when we have the corresponding record in an ibuf index.
+
+NOTE that as we copy pointers to fields in ibuf_rec, the caller must
+hold a latch to the ibuf_rec page as long as the entry is used!
+
+@return own: entry to insert to a non-clustered index */
+static
+dtuple_t*
+ibuf_build_entry_from_ibuf_rec_func(
+/*================================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* ibuf_rec, /*!< in: record in an insert buffer */
+ mem_heap_t* heap, /*!< in: heap where built */
+ dict_index_t** pindex) /*!< out, own: dummy index that
+ describes the entry */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ ulint n_fields;
+ const byte* types;
+ const byte* data;
+ ulint len;
+ ulint info_len;
+ ulint i;
+ ulint comp;
+ dict_index_t* index;
+
+ ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+
+ data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
+
+ ut_a(len == 1);
+ ut_a(*data == 0);
+ ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER);
+
+ n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER;
+
+ tuple = dtuple_create(heap, n_fields);
+
+ types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
+
+ ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL);
+
+ index = ibuf_dummy_index_create(n_fields, comp);
+
+ len -= info_len;
+ types += info_len;
+
+ ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
+ for (i = 0; i < n_fields; i++) {
+ field = dtuple_get_nth_field(tuple, i);
+
+ data = rec_get_nth_field_old(
+ ibuf_rec, i + IBUF_REC_FIELD_USER, &len);
+
+ dfield_set_data(field, data, len);
+
+ dtype_new_read_for_order_and_null_size(
+ dfield_get_type(field),
+ types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+
+ ibuf_dummy_index_add_col(index, dfield_get_type(field), len);
+ }
+
+ /* Prevent an ut_ad() failure in page_zip_write_rec() by
+ adding system columns to the dummy table pointed to by the
+ dummy secondary index. The insert buffer is only used for
+ secondary indexes, whose records never contain any system
+ columns, such as DB_TRX_ID. */
+ ut_d(dict_table_add_system_columns(index->table, index->table->heap));
+
+ *pindex = index;
+
+ return(tuple);
+}
+
+/******************************************************************//**
+Get the data size.
+@return size of fields */
+UNIV_INLINE
+ulint
+ibuf_rec_get_size(
+/*==============*/
+ const rec_t* rec, /*!< in: ibuf record */
+ const byte* types, /*!< in: fields */
+ ulint n_fields, /*!< in: number of fields */
+ ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT,
+ nonzero=ROW_FORMAT=COMPACT */
+{
+ ulint i;
+ ulint field_offset;
+ ulint types_offset;
+ ulint size = 0;
+
+ field_offset = IBUF_REC_FIELD_USER;
+ types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+
+ for (i = 0; i < n_fields; i++) {
+ ulint len;
+ dtype_t dtype;
+
+ rec_get_nth_field_offs_old(rec, i + field_offset, &len);
+
+ if (len != UNIV_SQL_NULL) {
+ size += len;
+ } else {
+ dtype_new_read_for_order_and_null_size(&dtype, types);
+
+ size += dtype_get_sql_null_size(&dtype, comp);
+ }
+
+ types += types_offset;
+ }
+
+ return(size);
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec)
+#else /* UNIV_DEBUG */
+# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec)
+#endif
+
+/********************************************************************//**
+Returns the space taken by a stored non-clustered index entry if converted to
+an index record.
+@return size of index record in bytes + an upper limit of the space
+taken in the page directory */
+static
+ulint
+ibuf_rec_get_volume_func(
+/*=====================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* ibuf_rec)/*!< in: ibuf record */
+{
+ ulint len;
+ const byte* data;
+ const byte* types;
+ ulint n_fields;
+ ulint data_size;
+ ulint comp;
+ ibuf_op_t op;
+ ulint info_len;
+
+ ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
+
+ data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
+ ut_a(len == 1);
+ ut_a(*data == 0);
+
+ types = rec_get_nth_field_old(
+ ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
+
+ ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
+
+ if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
+ /* Delete-marking a record doesn't take any
+ additional space, and while deleting a record
+ actually frees up space, we have to play it safe and
+ pretend it takes no additional space (the record
+ might not exist, etc.). */
+
+ return(0);
+ } else if (comp) {
+ dtuple_t* entry;
+ ulint volume;
+ dict_index_t* dummy_index;
+ mem_heap_t* heap = mem_heap_create(500);
+
+ entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec,
+ heap, &dummy_index);
+
+ volume = rec_get_converted_size(dummy_index, entry, 0);
+
+ ibuf_dummy_index_free(dummy_index);
+ mem_heap_free(heap);
+
+ return(volume + page_dir_calc_reserved_space(1));
+ }
+
+ types += info_len;
+ n_fields = rec_get_n_fields_old(ibuf_rec)
+ - IBUF_REC_FIELD_USER;
+
+ data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp);
+
+ return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
+ + page_dir_calc_reserved_space(1));
+}
+
+/*********************************************************************//**
+Builds the tuple to insert to an ibuf tree when we have an entry for a
+non-clustered index.
+
+NOTE that the original entry must be kept because we copy pointers to
+its fields.
+
+@return own: entry to insert into an ibuf index tree */
+static
+dtuple_t*
+ibuf_entry_build(
+/*=============*/
+ ibuf_op_t op, /*!< in: operation type */
+ dict_index_t* index, /*!< in: non-clustered index */
+ const dtuple_t* entry, /*!< in: entry for a non-clustered index */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number where entry should
+ be inserted */
+ ulint counter,/*!< in: counter value;
+ ULINT_UNDEFINED=not used */
+ mem_heap_t* heap) /*!< in: heap into which to build */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ const dfield_t* entry_field;
+ ulint n_fields;
+ byte* buf;
+ byte* ti;
+ byte* type_info;
+ ulint i;
+
+ ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT);
+ ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF);
+ ut_ad(op < IBUF_OP_COUNT);
+
+ /* We have to build a tuple with the following fields:
+
+ 1-4) These are described at the top of this file.
+
+ 5) The rest of the fields are copied from the entry.
+
+ All fields in the tuple are ordered like the type binary in our
+ insert buffer tree. */
+
+ n_fields = dtuple_get_n_fields(entry);
+
+ tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER);
+
+ /* 1) Space Id */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, space);
+
+ dfield_set_data(field, buf, 4);
+
+ /* 2) Marker byte */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
+
+ /* We set the marker byte zero */
+
+ mach_write_to_1(buf, 0);
+
+ dfield_set_data(field, buf, 1);
+
+ /* 3) Page number */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, page_no);
+
+ dfield_set_data(field, buf, 4);
+
+ /* 4) Type info, part #1 */
+
+ if (counter == ULINT_UNDEFINED) {
+ i = dict_table_is_comp(index->table) ? 1 : 0;
+ } else {
+ ut_ad(counter <= 0xFFFF);
+ i = IBUF_REC_INFO_SIZE;
+ }
+
+ ti = type_info = static_cast<byte*>(
+ mem_heap_alloc(
+ heap,
+ i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE));
+
+ switch (i) {
+ default:
+ ut_error;
+ break;
+ case 1:
+ /* set the flag for ROW_FORMAT=COMPACT */
+ *ti++ = 0;
+ /* fall through */
+ case 0:
+ /* the old format does not allow delete buffering */
+ ut_ad(op == IBUF_OP_INSERT);
+ break;
+ case IBUF_REC_INFO_SIZE:
+ mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter);
+
+ ti[IBUF_REC_OFFSET_TYPE] = (byte) op;
+ ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table)
+ ? IBUF_REC_COMPACT : 0;
+ ti += IBUF_REC_INFO_SIZE;
+ break;
+ }
+
+ /* 5+) Fields from the entry */
+
+ for (i = 0; i < n_fields; i++) {
+ ulint fixed_len;
+ const dict_field_t* ifield;
+
+ field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER);
+ entry_field = dtuple_get_nth_field(entry, i);
+ dfield_copy(field, entry_field);
+
+ ifield = dict_index_get_nth_field(index, i);
+ /* Prefix index columns of fixed-length columns are of
+ fixed length. However, in the function call below,
+ dfield_get_type(entry_field) contains the fixed length
+ of the column in the clustered index. Replace it with
+ the fixed length of the secondary index column. */
+ fixed_len = ifield->fixed_len;
+
+#ifdef UNIV_DEBUG
+ if (fixed_len) {
+ /* dict_index_add_col() should guarantee these */
+ ut_ad(fixed_len <= (ulint)
+ dfield_get_type(entry_field)->len);
+ if (ifield->prefix_len) {
+ ut_ad(ifield->prefix_len == fixed_len);
+ } else {
+ ut_ad(fixed_len == (ulint)
+ dfield_get_type(entry_field)->len);
+ }
+ }
+#endif /* UNIV_DEBUG */
+
+ dtype_new_store_for_order_and_null_size(
+ ti, dfield_get_type(entry_field), fixed_len);
+ ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
+ }
+
+ /* 4) Type info, part #2 */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA);
+
+ dfield_set_data(field, type_info, ti - type_info);
+
+ /* Set all the types in the new tuple binary */
+
+ dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER);
+
+ return(tuple);
+}
+
+/*********************************************************************//**
+Builds a search tuple used to search buffered inserts for an index page.
+This is for >= 4.1.x format records.
+@return own: search tuple */
+static
+dtuple_t*
+ibuf_search_tuple_build(
+/*====================*/
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number */
+ mem_heap_t* heap) /*!< in: heap into which to build */
+{
+ dtuple_t* tuple;
+ dfield_t* field;
+ byte* buf;
+
+ tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA);
+
+ /* Store the space id in tuple */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, space);
+
+ dfield_set_data(field, buf, 4);
+
+ /* Store the new format record marker byte */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 1));
+
+ mach_write_to_1(buf, 0);
+
+ dfield_set_data(field, buf, 1);
+
+ /* Store the page number in tuple */
+
+ field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE);
+
+ buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
+
+ mach_write_to_4(buf, page_no);
+
+ dfield_set_data(field, buf, 4);
+
+ dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA);
+
+ return(tuple);
+}
+
+/*********************************************************************//**
+Checks if there are enough pages in the free list of the ibuf tree that we
+dare to start a pessimistic insert to the insert buffer.
+@return TRUE if enough free pages in list */
+UNIV_INLINE
+ibool
+ibuf_data_enough_free_for_insert(void)
+/*==================================*/
+{
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ /* We want a big margin of free pages, because a B-tree can sometimes
+ grow in size also if records are deleted from it, as the node pointers
+ can change, and we must make sure that we are able to delete the
+ inserts buffered for pages that we read to the buffer pool, without
+ any risk of running out of free space in the insert buffer. */
+
+ return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height);
+}
+
+/*********************************************************************//**
+Checks if there are enough pages in the free list of the ibuf tree that we
+should remove them and free to the file space management.
+@return TRUE if enough free pages in list */
+UNIV_INLINE
+ibool
+ibuf_data_too_much_free(void)
+/*=========================*/
+{
+ ut_ad(mutex_own(&ibuf_mutex));
+
+ return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height);
+}
+
+/*********************************************************************//**
+Allocates a new page from the ibuf file segment and adds it to the free
+list.
+@return TRUE on success, FALSE if no space left */
+static
+ibool
+ibuf_add_free_page(void)
+/*====================*/
+{
+ mtr_t mtr;
+ page_t* header_page;
+ ulint flags;
+ ulint zip_size;
+ buf_block_t* block;
+ page_t* page;
+ page_t* root;
+ page_t* bitmap_page;
+
+ mtr_start(&mtr);
+
+ /* Acquire the fsp latch before the ibuf header, obeying the latching
+ order */
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+ zip_size = fsp_flags_get_zip_size(flags);
+
+ header_page = ibuf_header_page_get(&mtr);
+
+ /* Allocate a new page: NOTE that if the page has been a part of a
+ non-clustered index which has subsequently been dropped, then the
+ page may have buffered inserts in the insert buffer, and these
+ should be deleted from there. These get deleted when the page
+ allocation creates the page in buffer. Thus the call below may end
+ up calling the insert buffer routines and, as we yet have no latches
+ to insert buffer tree pages, these routines can run without a risk
+ of a deadlock. This is the reason why we created a special ibuf
+ header page apart from the ibuf tree. */
+
+ block = fseg_alloc_free_page(
+ header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP,
+ &mtr);
+
+ if (block == NULL) {
+ mtr_commit(&mtr);
+
+ return(FALSE);
+ }
+
+ ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1);
+ ibuf_enter(&mtr);
+ mutex_enter(&ibuf_mutex);
+ root = ibuf_tree_root_get(&mtr);
+
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW);
+ page = buf_block_get_frame(block);
+
+ /* Add the page to the free list and update the ibuf size data */
+
+ flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+
+ mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST,
+ MLOG_2BYTES, &mtr);
+
+ ibuf->seg_size++;
+ ibuf->free_list_len++;
+
+ /* Set the bit indicating that this page is now an ibuf tree page
+ (level 2 page) */
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr);
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, buf_block_get_page_no(block), zip_size,
+ IBUF_BITMAP_IBUF, TRUE, &mtr);
+
+ ibuf_mtr_commit(&mtr);
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+Removes a page from the free list and frees it to the fsp system. */
+static
+void
+ibuf_remove_free_page(void)
+/*=======================*/
+{
+ mtr_t mtr;
+ mtr_t mtr2;
+ page_t* header_page;
+ ulint flags;
+ ulint zip_size;
+ ulint page_no;
+ page_t* page;
+ page_t* root;
+ page_t* bitmap_page;
+
+ mtr_start(&mtr);
+
+ /* Acquire the fsp latch before the ibuf header, obeying the latching
+ order */
+ mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr);
+ zip_size = fsp_flags_get_zip_size(flags);
+
+ header_page = ibuf_header_page_get(&mtr);
+
+ /* Prevent pessimistic inserts to insert buffer trees for a while */
+ ibuf_enter(&mtr);
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+ mutex_enter(&ibuf_mutex);
+
+ if (!ibuf_data_too_much_free()) {
+
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ ibuf_mtr_commit(&mtr);
+
+ return;
+ }
+
+ ibuf_mtr_start(&mtr2);
+
+ root = ibuf_tree_root_get(&mtr2);
+
+ mutex_exit(&ibuf_mutex);
+
+ page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ &mtr2).page;
+
+ /* NOTE that we must release the latch on the ibuf tree root
+ because in fseg_free_page we access level 1 pages, and the root
+ is a level 2 page. */
+
+ ibuf_mtr_commit(&mtr2);
+ ibuf_exit(&mtr);
+
+ /* Since pessimistic inserts were prevented, we know that the
+ page is still in the free list. NOTE that also deletes may take
+ pages from the free list, but they take them from the start, and
+ the free list was so long that they cannot have taken the last
+ page from it. */
+
+ fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER,
+ IBUF_SPACE_ID, page_no, &mtr);
+
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+ buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no);
+#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+
+ ibuf_enter(&mtr);
+
+ mutex_enter(&ibuf_mutex);
+
+ root = ibuf_tree_root_get(&mtr);
+
+ ut_ad(page_no == flst_get_last(root + PAGE_HEADER
+ + PAGE_BTR_IBUF_FREE_LIST, &mtr).page);
+
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr);
+
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+
+ page = buf_block_get_frame(block);
+ }
+
+ /* Remove the page from the free list and update the ibuf size data */
+
+ flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST,
+ page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr);
+
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ ibuf->seg_size--;
+ ibuf->free_list_len--;
+
+ /* Set the bit indicating that this page is no more an ibuf tree page
+ (level 2 page) */
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ IBUF_SPACE_ID, page_no, zip_size, &mtr);
+
+ mutex_exit(&ibuf_mutex);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr);
+
+#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG
+ buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no);
+#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */
+ ibuf_mtr_commit(&mtr);
+}
+
+/***********************************************************************//**
+Frees excess pages from the ibuf free list. This function is called when an OS
+thread calls fsp services to allocate a new file segment, or a new page to a
+file segment, and the thread did not own the fsp latch before this call. */
+UNIV_INTERN
+void
+ibuf_free_excess_pages(void)
+/*========================*/
+{
+ ulint i;
+
+#ifdef UNIV_SYNC_DEBUG
+ ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL),
+ RW_LOCK_EX));
+#endif /* UNIV_SYNC_DEBUG */
+
+ ut_ad(rw_lock_get_x_lock_count(
+ fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1);
+
+ /* NOTE: We require that the thread did not own the latch before,
+ because then we know that we can obey the correct latching order
+ for ibuf latches */
+
+ if (!ibuf) {
+ /* Not yet initialized; not sure if this is possible, but
+ does no harm to check for it. */
+
+ return;
+ }
+
+ /* Free at most a few pages at a time, so that we do not delay the
+ requested service too much */
+
+ for (i = 0; i < 4; i++) {
+
+ ibool too_much_free;
+
+ mutex_enter(&ibuf_mutex);
+ too_much_free = ibuf_data_too_much_free();
+ mutex_exit(&ibuf_mutex);
+
+ if (!too_much_free) {
+ return;
+ }
+
+ ibuf_remove_free_page();
+ }
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
+ ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored)
+#else /* UNIV_DEBUG */
+# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \
+ ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored)
+#endif /* UNIV_DEBUG */
+
+/*********************************************************************//**
+Reads page numbers from a leaf in an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
+static
+ulint
+ibuf_get_merge_page_nos_func(
+/*=========================*/
+ ibool contract,/*!< in: TRUE if this function is called to
+ contract the tree, FALSE if this is called
+ when a single page becomes full and we look
+ if it pays to read also nearby pages */
+ const rec_t* rec, /*!< in: insert buffer record */
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction holding rec */
+#endif /* UNIV_DEBUG */
+ ulint* space_ids,/*!< in/out: space id's of the pages */
+ ib_int64_t* space_versions,/*!< in/out: tablespace version
+ timestamps; used to prevent reading in old
+ pages after DISCARD + IMPORT tablespace */
+ ulint* page_nos,/*!< in/out: buffer for at least
+ IBUF_MAX_N_PAGES_MERGED many page numbers;
+ the page numbers are in an ascending order */
+ ulint* n_stored)/*!< out: number of page numbers stored to
+ page_nos in this function */
+{
+ ulint prev_page_no;
+ ulint prev_space_id;
+ ulint first_page_no;
+ ulint first_space_id;
+ ulint rec_page_no;
+ ulint rec_space_id;
+ ulint sum_volumes;
+ ulint volume_for_page;
+ ulint rec_volume;
+ ulint limit;
+ ulint n_pages;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+
+ *n_stored = 0;
+
+ limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4);
+
+ if (page_rec_is_supremum(rec)) {
+
+ rec = page_rec_get_prev_const(rec);
+ }
+
+ if (page_rec_is_infimum(rec)) {
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+ if (page_rec_is_supremum(rec)) {
+
+ return(0);
+ }
+
+ first_page_no = ibuf_rec_get_page_no(mtr, rec);
+ first_space_id = ibuf_rec_get_space(mtr, rec);
+ n_pages = 0;
+ prev_page_no = 0;
+ prev_space_id = 0;
+
+ /* Go backwards from the first rec until we reach the border of the
+ 'merge area', or the page start or the limit of storeable pages is
+ reached */
+
+ while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) {
+
+ rec_page_no = ibuf_rec_get_page_no(mtr, rec);
+ rec_space_id = ibuf_rec_get_space(mtr, rec);
+
+ if (rec_space_id != first_space_id
+ || (rec_page_no / IBUF_MERGE_AREA)
+ != (first_page_no / IBUF_MERGE_AREA)) {
+
+ break;
+ }
+
+ if (rec_page_no != prev_page_no
+ || rec_space_id != prev_space_id) {
+ n_pages++;
+ }
+
+ prev_page_no = rec_page_no;
+ prev_space_id = rec_space_id;
+
+ rec = page_rec_get_prev_const(rec);
+ }
+
+ rec = page_rec_get_next_const(rec);
+
+ /* At the loop start there is no prev page; we mark this with a pair
+ of space id, page no (0, 0) for which there can never be entries in
+ the insert buffer */
+
+ prev_page_no = 0;
+ prev_space_id = 0;
+ sum_volumes = 0;
+ volume_for_page = 0;
+
+ while (*n_stored < limit) {
+ if (page_rec_is_supremum(rec)) {
+ /* When no more records available, mark this with
+ another 'impossible' pair of space id, page no */
+ rec_page_no = 1;
+ rec_space_id = 0;
+ } else {
+ rec_page_no = ibuf_rec_get_page_no(mtr, rec);
+ rec_space_id = ibuf_rec_get_space(mtr, rec);
+ /* In the system tablespace, the smallest
+ possible secondary index leaf page number is
+ bigger than IBUF_TREE_ROOT_PAGE_NO (4). In
+ other tablespaces, the clustered index tree is
+ created at page 3, which makes page 4 the
+ smallest possible secondary index leaf page
+ (and that only after DROP INDEX). */
+ ut_ad(rec_page_no
+ > (ulint) IBUF_TREE_ROOT_PAGE_NO
+ - (rec_space_id != 0));
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED);
+#endif
+ if ((rec_space_id != prev_space_id
+ || rec_page_no != prev_page_no)
+ && (prev_space_id != 0 || prev_page_no != 0)) {
+
+ if (contract
+ || (prev_page_no == first_page_no
+ && prev_space_id == first_space_id)
+ || (volume_for_page
+ > ((IBUF_MERGE_THRESHOLD - 1)
+ * 4 * UNIV_PAGE_SIZE
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE)
+ / IBUF_MERGE_THRESHOLD)) {
+
+ space_ids[*n_stored] = prev_space_id;
+ space_versions[*n_stored]
+ = fil_space_get_version(prev_space_id);
+ page_nos[*n_stored] = prev_page_no;
+
+ (*n_stored)++;
+
+ sum_volumes += volume_for_page;
+ }
+
+ if (rec_space_id != first_space_id
+ || rec_page_no / IBUF_MERGE_AREA
+ != first_page_no / IBUF_MERGE_AREA) {
+
+ break;
+ }
+
+ volume_for_page = 0;
+ }
+
+ if (rec_page_no == 1 && rec_space_id == 0) {
+ /* Supremum record */
+
+ break;
+ }
+
+ rec_volume = ibuf_rec_get_volume(mtr, rec);
+
+ volume_for_page += rec_volume;
+
+ prev_page_no = rec_page_no;
+ prev_space_id = rec_space_id;
+
+ rec = page_rec_get_next_const(rec);
+ }
+
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED);
+#endif
+#if 0
+ fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n",
+ *n_stored, sum_volumes);
+#endif
+ return(sum_volumes);
+}
+
+/*******************************************************************//**
+Get the matching records for space id.
+@return current rec or NULL */
+static __attribute__((nonnull, warn_unused_result))
+const rec_t*
+ibuf_get_user_rec(
+/*===============*/
+ btr_pcur_t* pcur, /*!< in: the current cursor */
+ mtr_t* mtr) /*!< in: mini transaction */
+{
+ do {
+ const rec_t* rec = btr_pcur_get_rec(pcur);
+
+ if (page_rec_is_user_rec(rec)) {
+ return(rec);
+ }
+ } while (btr_pcur_move_to_next(pcur, mtr));
+
+ return(NULL);
+}
+
+/*********************************************************************//**
+Reads page numbers for a space id from an ibuf tree.
+@return a lower limit for the combined volume of records which will be
+merged */
+static __attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_get_merge_pages(
+/*=================*/
+ btr_pcur_t* pcur, /*!< in/out: cursor */
+ ulint space, /*!< in: space for which to merge */
+ ulint limit, /*!< in: max page numbers to read */
+ ulint* pages, /*!< out: pages read */
+ ulint* spaces, /*!< out: spaces read */
+ ib_int64_t* versions,/*!< out: space versions read */
+ ulint* n_pages,/*!< out: number of pages read */
+ mtr_t* mtr) /*!< in: mini transaction */
+{
+ const rec_t* rec;
+ ulint volume = 0;
+ ib_int64_t version = fil_space_get_version(space);
+
+ ut_a(space != ULINT_UNDEFINED);
+
+ *n_pages = 0;
+
+ while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0
+ && ibuf_rec_get_space(mtr, rec) == space
+ && *n_pages < limit) {
+
+ ulint page_no = ibuf_rec_get_page_no(mtr, rec);
+
+ if (*n_pages == 0 || pages[*n_pages - 1] != page_no) {
+ spaces[*n_pages] = space;
+ pages[*n_pages] = page_no;
+ versions[*n_pages] = version;
+ ++*n_pages;
+ }
+
+ volume += ibuf_rec_get_volume(mtr, rec);
+
+ btr_pcur_move_to_next(pcur, mtr);
+ }
+
+ return(volume);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static
+ulint
+ibuf_merge_pages(
+/*=============*/
+ ulint* n_pages, /*!< out: number of pages to which merged */
+ bool sync) /*!< in: true if the caller wants to wait for
+ the issued read with the highest tablespace
+ address to complete */
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ ulint sum_sizes;
+ ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
+ ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
+
+ *n_pages = 0;
+
+ ibuf_mtr_start(&mtr);
+
+ /* Open a cursor to a randomly chosen leaf of the tree, at a random
+ position within the leaf */
+
+ btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr);
+
+ ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+ if (page_is_empty(btr_pcur_get_page(&pcur))) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ ut_ad(ibuf->empty);
+ ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+ == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ return(0);
+ }
+
+ sum_sizes = ibuf_get_merge_page_nos(TRUE,
+ btr_pcur_get_rec(&pcur), &mtr,
+ space_ids, space_versions,
+ page_nos, n_pages);
+#if 0 /* defined UNIV_IBUF_DEBUG */
+ fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n",
+ sync, *n_pages, sum_sizes);
+#endif
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ buf_read_ibuf_merge_pages(
+ sync, space_ids, space_versions, page_nos, *n_pages);
+
+ return(sum_sizes + 1);
+}
+
+/*********************************************************************//**
+Get the table instance from the table id.
+@return table instance */
+static __attribute__((warn_unused_result))
+dict_table_t*
+ibuf_get_table(
+/*===========*/
+ table_id_t table_id) /*!< in: valid table id */
+{
+ rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__);
+
+ dict_table_t* table = dict_table_open_on_id(
+ table_id, FALSE, DICT_TABLE_OP_NORMAL);
+
+ rw_lock_s_unlock_gen(&dict_operation_lock, 0);
+
+ return(table);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static
+ulint
+ibuf_merge_space(
+/*=============*/
+ ulint space, /*!< in: tablespace id to merge */
+ ulint* n_pages)/*!< out: number of pages to which merged */
+{
+ mtr_t mtr;
+ btr_pcur_t pcur;
+ mem_heap_t* heap = mem_heap_create(512);
+ dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap);
+
+ ibuf_mtr_start(&mtr);
+
+ /* Position the cursor on the first matching record. */
+
+ btr_pcur_open(
+ ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur,
+ &mtr);
+
+ mem_heap_free(heap);
+
+ ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+ ulint sum_sizes = 0;
+ ulint pages[IBUF_MAX_N_PAGES_MERGED];
+ ulint spaces[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED];
+
+ if (page_is_empty(btr_pcur_get_page(&pcur))) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ ut_ad(ibuf->empty);
+ ut_ad(page_get_space_id(btr_pcur_get_page(&pcur))
+ == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(btr_pcur_get_page(&pcur))
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ } else {
+
+ sum_sizes = ibuf_get_merge_pages(
+ &pcur, space, IBUF_MAX_N_PAGES_MERGED,
+ &pages[0], &spaces[0], &versions[0], n_pages,
+ &mtr);
+
+ ++sum_sizes;
+ }
+
+ ibuf_mtr_commit(&mtr);
+
+ btr_pcur_close(&pcur);
+
+ if (sum_sizes > 0) {
+
+ ut_a(*n_pages > 0 || sum_sizes == 1);
+
+#ifdef UNIV_DEBUG
+ ut_ad(*n_pages <= UT_ARR_SIZE(pages));
+
+ for (ulint i = 0; i < *n_pages; ++i) {
+ ut_ad(spaces[i] == space);
+ ut_ad(i == 0 || versions[i] == versions[i - 1]);
+ }
+#endif /* UNIV_DEBUG */
+
+ buf_read_ibuf_merge_pages(
+ true, spaces, versions, pages, *n_pages);
+ }
+
+ return(sum_sizes);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static __attribute__((nonnull, warn_unused_result))
+ulint
+ibuf_merge(
+/*=======*/
+ table_id_t table_id, /*!< in: if merge should be
+ done only for a specific
+ table, for all tables this
+ should be 0 */
+ ulint* n_pages, /*!< out: number of pages to
+ which merged */
+ bool sync) /*!< in: TRUE if the caller
+ wants to wait for the issued
+ read with the highest
+ tablespace address to complete */
+{
+ dict_table_t* table;
+
+ *n_pages = 0;
+
+ /* We perform a dirty read of ibuf->empty, without latching
+ the insert buffer root page. We trust this dirty read except
+ when a slow shutdown is being executed. During a slow
+ shutdown, the insert buffer merge must be completed. */
+
+ if (ibuf->empty && !srv_shutdown_state) {
+ return(0);
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ } else if (ibuf_debug) {
+ return(0);
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+ } else if (table_id == 0) {
+ return(ibuf_merge_pages(n_pages, sync));
+ } else if ((table = ibuf_get_table(table_id)) == 0) {
+ /* Table has been dropped. */
+ return(0);
+ }
+
+ ulint volume = ibuf_merge_space(table->space, n_pages);
+
+ dict_table_close(table, FALSE, FALSE);
+
+ return(volume);
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+static
+ulint
+ibuf_contract(
+/*==========*/
+ ibool sync) /*!< in: TRUE if the caller wants to wait for the
+ issued read with the highest tablespace address
+ to complete */
+{
+ ulint n_pages;
+
+ return(ibuf_merge(0, &n_pages, sync));
+}
+
+/*********************************************************************//**
+Contracts insert buffer trees by reading pages to the buffer pool.
+@return a lower limit for the combined size in bytes of entries which
+will be merged from ibuf trees to the pages read, 0 if ibuf is
+empty */
+UNIV_INTERN
+ulint
+ibuf_contract_in_background(
+/*========================*/
+ table_id_t table_id, /*!< in: if merge should be done only
+ for a specific table, for all tables
+ this should be 0 */
+ ibool full) /*!< in: TRUE if the caller wants to
+ do a full contract based on PCT_IO(100).
+ If FALSE then the size of contract
+ batch is determined based on the
+ current size of the ibuf tree. */
+{
+ ulint sum_bytes = 0;
+ ulint sum_pages = 0;
+ ulint n_pag2;
+ ulint n_pages;
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (srv_ibuf_disable_background_merge && table_id == 0) {
+ return(0);
+ }
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+ if (full) {
+ /* Caller has requested a full batch */
+ n_pages = PCT_IO(100);
+ } else {
+ /* By default we do a batch of 5% of the io_capacity */
+ n_pages = PCT_IO(5);
+
+ mutex_enter(&ibuf_mutex);
+
+ /* If the ibuf->size is more than half the max_size
+ then we make more agreesive contraction.
+ +1 is to avoid division by zero. */
+ if (ibuf->size > ibuf->max_size / 2) {
+ ulint diff = ibuf->size - ibuf->max_size / 2;
+ n_pages += PCT_IO((diff * 100)
+ / (ibuf->max_size + 1));
+ }
+
+ mutex_exit(&ibuf_mutex);
+ }
+
+ while (sum_pages < n_pages) {
+ ulint n_bytes;
+
+ n_bytes = ibuf_merge(table_id, &n_pag2, FALSE);
+
+ if (n_bytes == 0) {
+ return(sum_bytes);
+ }
+
+ sum_bytes += n_bytes;
+ sum_pages += n_pag2;
+ }
+
+ return(sum_bytes);
+}
+
+/*********************************************************************//**
+Contract insert buffer trees after insert if they are too big. */
+UNIV_INLINE
+void
+ibuf_contract_after_insert(
+/*=======================*/
+ ulint entry_size) /*!< in: size of a record which was inserted
+ into an ibuf tree */
+{
+ ibool sync;
+ ulint sum_sizes;
+ ulint size;
+ ulint max_size;
+
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. ibuf->max_size remains constant
+ after ibuf_init_at_db_start(), but ibuf->size should be
+ protected by ibuf_mutex. Given that ibuf->size fits in a
+ machine word, this should be OK; at worst we are doing some
+ excessive ibuf_contract() or occasionally skipping a
+ ibuf_contract(). */
+ size = ibuf->size;
+ max_size = ibuf->max_size;
+
+ if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) {
+ return;
+ }
+
+ sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC);
+
+ /* Contract at least entry_size many bytes */
+ sum_sizes = 0;
+ size = 1;
+
+ do {
+
+ size = ibuf_contract(sync);
+ sum_sizes += size;
+ } while (size > 0 && sum_sizes < entry_size);
+}
+
+/*********************************************************************//**
+Determine if an insert buffer record has been encountered already.
+@return TRUE if a new record, FALSE if possible duplicate */
+static
+ibool
+ibuf_get_volume_buffered_hash(
+/*==========================*/
+ const rec_t* rec, /*!< in: ibuf record in post-4.1 format */
+ const byte* types, /*!< in: fields */
+ const byte* data, /*!< in: start of user record data */
+ ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT,
+ nonzero=ROW_FORMAT=COMPACT */
+ ulint* hash, /*!< in/out: hash array */
+ ulint size) /*!< in: number of elements in hash array */
+{
+ ulint len;
+ ulint fold;
+ ulint bitmask;
+
+ len = ibuf_rec_get_size(
+ rec, types,
+ rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp);
+ fold = ut_fold_binary(data, len);
+
+ hash += (fold / (CHAR_BIT * sizeof *hash)) % size;
+ bitmask = static_cast<ulint>(
+ 1 << (fold % (CHAR_BIT * sizeof(*hash))));
+
+ if (*hash & bitmask) {
+
+ return(FALSE);
+ }
+
+ /* We have not seen this record yet. Insert it. */
+ *hash |= bitmask;
+
+ return(TRUE);
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
+ ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs)
+#else /* UNIV_DEBUG */
+# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \
+ ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs)
+#endif
+/*********************************************************************//**
+Update the estimate of the number of records on a page, and
+get the space taken by merging the buffered record to the index page.
+@return size of index record in bytes + an upper limit of the space
+taken in the page directory */
+static
+ulint
+ibuf_get_volume_buffered_count_func(
+/*================================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction owning rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec, /*!< in: insert buffer record */
+ ulint* hash, /*!< in/out: hash array */
+ ulint size, /*!< in: number of elements in hash array */
+ lint* n_recs) /*!< in/out: estimated number of records
+ on the page that rec points to */
+{
+ ulint len;
+ ibuf_op_t ibuf_op;
+ const byte* types;
+ ulint n_fields;
+
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(ibuf_inside(mtr));
+
+ n_fields = rec_get_n_fields_old(rec);
+ ut_ad(n_fields > IBUF_REC_FIELD_USER);
+ n_fields -= IBUF_REC_FIELD_USER;
+
+ rec_get_nth_field_offs_old(rec, 1, &len);
+ /* This function is only invoked when buffering new
+ operations. All pre-4.1 records should have been merged
+ when the database was started up. */
+ ut_a(len == 1);
+
+ if (rec_get_deleted_flag(rec, 0)) {
+ /* This record has been merged already,
+ but apparently the system crashed before
+ the change was discarded from the buffer.
+ Pretend that the record does not exist. */
+ return(0);
+ }
+
+ types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
+
+ switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
+ IBUF_REC_INFO_SIZE)) {
+ default:
+ ut_error;
+ case 0:
+ /* This ROW_TYPE=REDUNDANT record does not include an
+ operation counter. Exclude it from the *n_recs,
+ because deletes cannot be buffered if there are
+ old-style inserts buffered for the page. */
+
+ len = ibuf_rec_get_size(rec, types, n_fields, 0);
+
+ return(len
+ + rec_get_converted_extra_size(len, n_fields, 0)
+ + page_dir_calc_reserved_space(1));
+ case 1:
+ /* This ROW_TYPE=COMPACT record does not include an
+ operation counter. Exclude it from the *n_recs,
+ because deletes cannot be buffered if there are
+ old-style inserts buffered for the page. */
+ goto get_volume_comp;
+
+ case IBUF_REC_INFO_SIZE:
+ ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE];
+ break;
+ }
+
+ switch (ibuf_op) {
+ case IBUF_OP_INSERT:
+ /* Inserts can be done by updating a delete-marked record.
+ Because delete-mark and insert operations can be pointing to
+ the same records, we must not count duplicates. */
+ case IBUF_OP_DELETE_MARK:
+ /* There must be a record to delete-mark.
+ See if this record has been already buffered. */
+ if (n_recs && ibuf_get_volume_buffered_hash(
+ rec, types + IBUF_REC_INFO_SIZE,
+ types + len,
+ types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT,
+ hash, size)) {
+ (*n_recs)++;
+ }
+
+ if (ibuf_op == IBUF_OP_DELETE_MARK) {
+ /* Setting the delete-mark flag does not
+ affect the available space on the page. */
+ return(0);
+ }
+ break;
+ case IBUF_OP_DELETE:
+ /* A record will be removed from the page. */
+ if (n_recs) {
+ (*n_recs)--;
+ }
+ /* While deleting a record actually frees up space,
+ we have to play it safe and pretend that it takes no
+ additional space (the record might not exist, etc.). */
+ return(0);
+ default:
+ ut_error;
+ }
+
+ ut_ad(ibuf_op == IBUF_OP_INSERT);
+
+get_volume_comp:
+ {
+ dtuple_t* entry;
+ ulint volume;
+ dict_index_t* dummy_index;
+ mem_heap_t* heap = mem_heap_create(500);
+
+ entry = ibuf_build_entry_from_ibuf_rec(
+ mtr, rec, heap, &dummy_index);
+
+ volume = rec_get_converted_size(dummy_index, entry, 0);
+
+ ibuf_dummy_index_free(dummy_index);
+ mem_heap_free(heap);
+
+ return(volume + page_dir_calc_reserved_space(1));
+ }
+}
+
+/*********************************************************************//**
+Gets an upper limit for the combined size of entries buffered in the insert
+buffer for a given page.
+@return upper limit for the volume of buffered inserts for the index
+page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span
+several pages in the insert buffer */
+static
+ulint
+ibuf_get_volume_buffered(
+/*=====================*/
+ const btr_pcur_t*pcur, /*!< in: pcur positioned at a place in an
+ insert buffer tree where we would insert an
+ entry for the index page whose number is
+ page_no, latch mode has to be BTR_MODIFY_PREV
+ or BTR_MODIFY_TREE */
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: page number of an index page */
+ lint* n_recs, /*!< in/out: minimum number of records on the
+ page after the buffered changes have been
+ applied, or NULL to disable the counting */
+ mtr_t* mtr) /*!< in: mini-transaction of pcur */
+{
+ ulint volume;
+ const rec_t* rec;
+ const page_t* page;
+ ulint prev_page_no;
+ const page_t* prev_page;
+ ulint next_page_no;
+ const page_t* next_page;
+ /* bitmap of buffered recs */
+ ulint hash_bitmap[128 / sizeof(ulint)];
+
+ ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
+ || (pcur->latch_mode == BTR_MODIFY_TREE));
+
+ /* Count the volume of inserts earlier in the alphabetical order than
+ pcur */
+
+ volume = 0;
+
+ if (n_recs) {
+ memset(hash_bitmap, 0, sizeof hash_bitmap);
+ }
+
+ rec = btr_pcur_get_rec(pcur);
+ page = page_align(rec);
+ ut_ad(page_validate(page, ibuf->index));
+
+ if (page_rec_is_supremum(rec)) {
+ rec = page_rec_get_prev_const(rec);
+ }
+
+ for (; !page_rec_is_infimum(rec);
+ rec = page_rec_get_prev_const(rec)) {
+ ut_ad(page_align(rec) == page);
+
+ if (page_no != ibuf_rec_get_page_no(mtr, rec)
+ || space != ibuf_rec_get_space(mtr, rec)) {
+
+ goto count_later;
+ }
+
+ volume += ibuf_get_volume_buffered_count(
+ mtr, rec,
+ hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
+ }
+
+ /* Look at the previous page */
+
+ prev_page_no = btr_page_get_prev(page, mtr);
+
+ if (prev_page_no == FIL_NULL) {
+
+ goto count_later;
+ }
+
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH,
+ mtr);
+
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+
+
+ prev_page = buf_block_get_frame(block);
+ ut_ad(page_validate(prev_page, ibuf->index));
+ }
+
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+ rec = page_get_supremum_rec(prev_page);
+ rec = page_rec_get_prev_const(rec);
+
+ for (;; rec = page_rec_get_prev_const(rec)) {
+ ut_ad(page_align(rec) == prev_page);
+
+ if (page_rec_is_infimum(rec)) {
+
+ /* We cannot go to yet a previous page, because we
+ do not have the x-latch on it, and cannot acquire one
+ because of the latching order: we have to give up */
+
+ return(UNIV_PAGE_SIZE);
+ }
+
+ if (page_no != ibuf_rec_get_page_no(mtr, rec)
+ || space != ibuf_rec_get_space(mtr, rec)) {
+
+ goto count_later;
+ }
+
+ volume += ibuf_get_volume_buffered_count(
+ mtr, rec,
+ hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
+ }
+
+count_later:
+ rec = btr_pcur_get_rec(pcur);
+
+ if (!page_rec_is_supremum(rec)) {
+ rec = page_rec_get_next_const(rec);
+ }
+
+ for (; !page_rec_is_supremum(rec);
+ rec = page_rec_get_next_const(rec)) {
+ if (page_no != ibuf_rec_get_page_no(mtr, rec)
+ || space != ibuf_rec_get_space(mtr, rec)) {
+
+ return(volume);
+ }
+
+ volume += ibuf_get_volume_buffered_count(
+ mtr, rec,
+ hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
+ }
+
+ /* Look at the next page */
+
+ next_page_no = btr_page_get_next(page, mtr);
+
+ if (next_page_no == FIL_NULL) {
+
+ return(volume);
+ }
+
+ {
+ buf_block_t* block;
+
+ block = buf_page_get(
+ IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH,
+ mtr);
+
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+
+
+ next_page = buf_block_get_frame(block);
+ ut_ad(page_validate(next_page, ibuf->index));
+ }
+
+#ifdef UNIV_BTR_DEBUG
+ ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page));
+#endif /* UNIV_BTR_DEBUG */
+
+ rec = page_get_infimum_rec(next_page);
+ rec = page_rec_get_next_const(rec);
+
+ for (;; rec = page_rec_get_next_const(rec)) {
+ ut_ad(page_align(rec) == next_page);
+
+ if (page_rec_is_supremum(rec)) {
+
+ /* We give up */
+
+ return(UNIV_PAGE_SIZE);
+ }
+
+ if (page_no != ibuf_rec_get_page_no(mtr, rec)
+ || space != ibuf_rec_get_space(mtr, rec)) {
+
+ return(volume);
+ }
+
+ volume += ibuf_get_volume_buffered_count(
+ mtr, rec,
+ hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs);
+ }
+}
+
+/*********************************************************************//**
+Reads the biggest tablespace id from the high end of the insert buffer
+tree and updates the counter in fil_system. */
+UNIV_INTERN
+void
+ibuf_update_max_tablespace_id(void)
+/*===============================*/
+{
+ ulint max_space_id;
+ const rec_t* rec;
+ const byte* field;
+ ulint len;
+ btr_pcur_t pcur;
+ mtr_t mtr;
+
+ ut_a(!dict_table_is_comp(ibuf->index->table));
+
+ ibuf_mtr_start(&mtr);
+
+ btr_pcur_open_at_index_side(
+ false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr);
+
+ ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+ btr_pcur_move_to_prev(&pcur, &mtr);
+
+ if (btr_pcur_is_before_first_on_page(&pcur)) {
+ /* The tree is empty */
+
+ max_space_id = 0;
+ } else {
+ rec = btr_pcur_get_rec(&pcur);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
+
+ ut_a(len == 4);
+
+ max_space_id = mach_read_from_4(field);
+ }
+
+ ibuf_mtr_commit(&mtr);
+
+ /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */
+
+ fil_set_max_space_id_if_bigger(max_space_id);
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
+ ibuf_get_entry_counter_low_func(mtr,rec,space,page_no)
+#else /* UNIV_DEBUG */
+# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \
+ ibuf_get_entry_counter_low_func(rec,space,page_no)
+#endif
+/****************************************************************//**
+Helper function for ibuf_get_entry_counter_func. Checks if rec is for
+(space, page_no), and if so, reads counter value from it and returns
+that + 1.
+@retval ULINT_UNDEFINED if the record does not contain any counter
+@retval 0 if the record is not for (space, page_no)
+@retval 1 + previous counter value, otherwise */
+static
+ulint
+ibuf_get_entry_counter_low_func(
+/*============================*/
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction of rec */
+#endif /* UNIV_DEBUG */
+ const rec_t* rec, /*!< in: insert buffer record */
+ ulint space, /*!< in: space id */
+ ulint page_no) /*!< in: page number */
+{
+ ulint counter;
+ const byte* field;
+ ulint len;
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)
+ || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX));
+ ut_ad(rec_get_n_fields_old(rec) > 2);
+
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
+
+ ut_a(len == 1);
+
+ /* Check the tablespace identifier. */
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
+
+ ut_a(len == 4);
+
+ if (mach_read_from_4(field) != space) {
+
+ return(0);
+ }
+
+ /* Check the page offset. */
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
+ ut_a(len == 4);
+
+ if (mach_read_from_4(field) != page_no) {
+
+ return(0);
+ }
+
+ /* Check if the record contains a counter field. */
+ field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
+
+ switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) {
+ default:
+ ut_error;
+ case 0: /* ROW_FORMAT=REDUNDANT */
+ case 1: /* ROW_FORMAT=COMPACT */
+ return(ULINT_UNDEFINED);
+
+ case IBUF_REC_INFO_SIZE:
+ counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER);
+ ut_a(counter < 0xFFFF);
+ return(counter + 1);
+ }
+}
+
+#ifdef UNIV_DEBUG
+# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
+ ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf)
+#else /* UNIV_DEBUG */
+# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \
+ ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf)
+#endif
+
+/****************************************************************//**
+Calculate the counter field for an entry based on the current
+last record in ibuf for (space, page_no).
+@return the counter field, or ULINT_UNDEFINED
+if we should abort this insertion to ibuf */
+static
+ulint
+ibuf_get_entry_counter_func(
+/*========================*/
+ ulint space, /*!< in: space id of entry */
+ ulint page_no, /*!< in: page number of entry */
+ const rec_t* rec, /*!< in: the record preceding the
+ insertion point */
+#ifdef UNIV_DEBUG
+ mtr_t* mtr, /*!< in: mini-transaction */
+#endif /* UNIV_DEBUG */
+ ibool only_leaf) /*!< in: TRUE if this is the only
+ leaf page that can contain entries
+ for (space,page_no), that is, there
+ was no exact match for (space,page_no)
+ in the node pointer */
+{
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX));
+ ut_ad(page_validate(page_align(rec), ibuf->index));
+
+ if (page_rec_is_supremum(rec)) {
+ /* This is just for safety. The record should be a
+ page infimum or a user record. */
+ ut_ad(0);
+ return(ULINT_UNDEFINED);
+ } else if (!page_rec_is_infimum(rec)) {
+ return(ibuf_get_entry_counter_low(mtr, rec, space, page_no));
+ } else if (only_leaf
+ || fil_page_get_prev(page_align(rec)) == FIL_NULL) {
+ /* The parent node pointer did not contain the
+ searched for (space, page_no), which means that the
+ search ended on the correct page regardless of the
+ counter value, and since we're at the infimum record,
+ there are no existing records. */
+ return(0);
+ } else {
+ /* We used to read the previous page here. It would
+ break the latching order, because the caller has
+ buffer-fixed an insert buffer bitmap page. */
+ return(ULINT_UNDEFINED);
+ }
+}
+
+/*********************************************************************//**
+Buffer an operation in the insert/delete buffer, instead of doing it
+directly to the disk page, if this is possible.
+@return DB_SUCCESS, DB_STRONG_FAIL or other error */
+static __attribute__((nonnull, warn_unused_result))
+dberr_t
+ibuf_insert_low(
+/*============*/
+ ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */
+ ibuf_op_t op, /*!< in: operation type */
+ ibool no_counter,
+ /*!< in: TRUE=use 5.0.3 format;
+ FALSE=allow delete buffering */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ ulint entry_size,
+ /*!< in: rec_get_converted_size(index, entry) */
+ dict_index_t* index, /*!< in: index where to insert; must not be
+ unique or clustered */
+ ulint space, /*!< in: space id where to insert */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number where to insert */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ big_rec_t* dummy_big_rec;
+ btr_pcur_t pcur;
+ btr_cur_t* cursor;
+ dtuple_t* ibuf_entry;
+ mem_heap_t* offsets_heap = NULL;
+ mem_heap_t* heap;
+ ulint* offsets = NULL;
+ ulint buffered;
+ lint min_n_recs;
+ rec_t* ins_rec;
+ ibool old_bit_value;
+ page_t* bitmap_page;
+ buf_block_t* block;
+ page_t* root;
+ dberr_t err;
+ ibool do_merge;
+ ulint space_ids[IBUF_MAX_N_PAGES_MERGED];
+ ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED];
+ ulint page_nos[IBUF_MAX_N_PAGES_MERGED];
+ ulint n_stored;
+ mtr_t mtr;
+ mtr_t bitmap_mtr;
+
+ ut_a(!dict_index_is_clust(index));
+ ut_ad(dtuple_check_typed(entry));
+ ut_ad(ut_is_2pow(zip_size));
+ ut_ad(!no_counter || op == IBUF_OP_INSERT);
+ ut_a(op < IBUF_OP_COUNT);
+
+ do_merge = FALSE;
+
+ /* Perform dirty reads of ibuf->size and ibuf->max_size, to
+ reduce ibuf_mutex contention. Given that ibuf->max_size and
+ ibuf->size fit in a machine word, this should be OK; at worst
+ we are doing some excessive ibuf_contract() or occasionally
+ skipping an ibuf_contract(). */
+ if (ibuf->max_size == 0) {
+ return(DB_STRONG_FAIL);
+ }
+
+ if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) {
+ /* Insert buffer is now too big, contract it but do not try
+ to insert */
+
+
+#ifdef UNIV_IBUF_DEBUG
+ fputs("Ibuf too big\n", stderr);
+#endif
+ /* Use synchronous contract (== TRUE) */
+ ibuf_contract(TRUE);
+
+ return(DB_STRONG_FAIL);
+ }
+
+ heap = mem_heap_create(1024);
+
+ /* Build the entry which contains the space id and the page number
+ as the first fields and the type information for other fields, and
+ which will be inserted to the insert buffer. Using a counter value
+ of 0xFFFF we find the last record for (space, page_no), from which
+ we can then read the counter value N and use N + 1 in the record we
+ insert. (We patch the ibuf_entry's counter field to the correct
+ value just before actually inserting the entry.) */
+
+ ibuf_entry = ibuf_entry_build(
+ op, index, entry, space, page_no,
+ no_counter ? ULINT_UNDEFINED : 0xFFFF, heap);
+
+ /* Open a cursor to the insert buffer tree to calculate if we can add
+ the new entry to it without exceeding the free space limit for the
+ page. */
+
+ if (mode == BTR_MODIFY_TREE) {
+ for (;;) {
+ mutex_enter(&ibuf_pessimistic_insert_mutex);
+ mutex_enter(&ibuf_mutex);
+
+ if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) {
+
+ break;
+ }
+
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+
+ if (UNIV_UNLIKELY(!ibuf_add_free_page())) {
+
+ mem_heap_free(heap);
+ return(DB_STRONG_FAIL);
+ }
+ }
+ }
+
+ ibuf_mtr_start(&mtr);
+
+ btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr);
+ ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index));
+
+ /* Find out the volume of already buffered inserts for the same index
+ page */
+ min_n_recs = 0;
+ buffered = ibuf_get_volume_buffered(&pcur, space, page_no,
+ op == IBUF_OP_DELETE
+ ? &min_n_recs
+ : NULL, &mtr);
+
+ if (op == IBUF_OP_DELETE
+ && (min_n_recs < 2
+ || buf_pool_watch_occurred(space, page_no))) {
+ /* The page could become empty after the record is
+ deleted, or the page has been read in to the buffer
+ pool. Refuse to buffer the operation. */
+
+ /* The buffer pool watch is needed for IBUF_OP_DELETE
+ because of latching order considerations. We can
+ check buf_pool_watch_occurred() only after latching
+ the insert buffer B-tree pages that contain buffered
+ changes for the page. We never buffer IBUF_OP_DELETE,
+ unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have
+ been previously buffered for the page. Because there
+ are buffered operations for the page, the insert
+ buffer B-tree page latches held by mtr will guarantee
+ that no changes for the user page will be merged
+ before mtr_commit(&mtr). We must not mtr_commit(&mtr)
+ until after the IBUF_OP_DELETE has been buffered. */
+
+fail_exit:
+ if (mode == BTR_MODIFY_TREE) {
+ mutex_exit(&ibuf_mutex);
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ }
+
+ err = DB_STRONG_FAIL;
+ goto func_exit;
+ }
+
+ /* After this point, the page could still be loaded to the
+ buffer pool, but we do not have to care about it, since we are
+ holding a latch on the insert buffer leaf page that contains
+ buffered changes for (space, page_no). If the page enters the
+ buffer pool, buf_page_io_complete() for (space, page_no) will
+ have to acquire a latch on the same insert buffer leaf page,
+ which it cannot do until we have buffered the IBUF_OP_DELETE
+ and done mtr_commit(&mtr) to release the latch. */
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a((buffered == 0) || ibuf_count_get(space, page_no));
+#endif
+ ibuf_mtr_start(&bitmap_mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
+ zip_size, &bitmap_mtr);
+
+ /* We check if the index page is suitable for buffered entries */
+
+ if (buf_page_peek(space, page_no)
+ || lock_rec_expl_exist_on_page(space, page_no)) {
+
+ ibuf_mtr_commit(&bitmap_mtr);
+ goto fail_exit;
+ }
+
+ if (op == IBUF_OP_INSERT) {
+ ulint bits = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE,
+ &bitmap_mtr);
+
+ if (buffered + entry_size + page_dir_calc_reserved_space(1)
+ > ibuf_index_page_calc_free_from_bits(zip_size, bits)) {
+ /* Release the bitmap page latch early. */
+ ibuf_mtr_commit(&bitmap_mtr);
+
+ /* It may not fit */
+ do_merge = TRUE;
+
+ ibuf_get_merge_page_nos(FALSE,
+ btr_pcur_get_rec(&pcur), &mtr,
+ space_ids, space_versions,
+ page_nos, &n_stored);
+
+ goto fail_exit;
+ }
+ }
+
+ if (!no_counter) {
+ /* Patch correct counter value to the entry to
+ insert. This can change the insert position, which can
+ result in the need to abort in some cases. */
+ ulint counter = ibuf_get_entry_counter(
+ space, page_no, btr_pcur_get_rec(&pcur), &mtr,
+ btr_pcur_get_btr_cur(&pcur)->low_match
+ < IBUF_REC_FIELD_METADATA);
+ dfield_t* field;
+
+ if (counter == ULINT_UNDEFINED) {
+ ibuf_mtr_commit(&bitmap_mtr);
+ goto fail_exit;
+ }
+
+ field = dtuple_get_nth_field(
+ ibuf_entry, IBUF_REC_FIELD_METADATA);
+ mach_write_to_2(
+ (byte*) dfield_get_data(field)
+ + IBUF_REC_OFFSET_COUNTER, counter);
+ }
+
+ /* Set the bitmap bit denoting that the insert buffer contains
+ buffered entries for this index page, if the bit is not set yet */
+
+ old_bit_value = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, &bitmap_mtr);
+
+ if (!old_bit_value) {
+ ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, TRUE,
+ &bitmap_mtr);
+ }
+
+ ibuf_mtr_commit(&bitmap_mtr);
+
+ cursor = btr_pcur_get_btr_cur(&pcur);
+
+ if (mode == BTR_MODIFY_PREV) {
+ err = btr_cur_optimistic_insert(
+ BTR_NO_LOCKING_FLAG,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
+ &dummy_big_rec, 0, thr, &mtr);
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+
+ /* If this is the root page, update ibuf->empty. */
+ if (UNIV_UNLIKELY(buf_block_get_page_no(block)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO)) {
+ const page_t* root = buf_block_get_frame(block);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ ibuf->empty = page_is_empty(root);
+ }
+ } else {
+ ut_ad(mode == BTR_MODIFY_TREE);
+
+ /* We acquire an x-latch to the root page before the insert,
+ because a pessimistic insert releases the tree x-latch,
+ which would cause the x-latching of the root after that to
+ break the latching order. */
+
+ root = ibuf_tree_root_get(&mtr);
+
+ err = btr_cur_optimistic_insert(
+ BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
+ &dummy_big_rec, 0, thr, &mtr);
+
+ if (err == DB_FAIL) {
+ err = btr_cur_pessimistic_insert(
+ BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG,
+ cursor, &offsets, &offsets_heap,
+ ibuf_entry, &ins_rec,
+ &dummy_big_rec, 0, thr, &mtr);
+ }
+
+ mutex_exit(&ibuf_pessimistic_insert_mutex);
+ ibuf_size_update(root, &mtr);
+ mutex_exit(&ibuf_mutex);
+ ibuf->empty = page_is_empty(root);
+
+ block = btr_cur_get_block(cursor);
+ ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID);
+ }
+
+ if (offsets_heap) {
+ mem_heap_free(offsets_heap);
+ }
+
+ if (err == DB_SUCCESS && op != IBUF_OP_DELETE) {
+ /* Update the page max trx id field */
+ page_update_max_trx_id(block, NULL,
+ thr_get_trx(thr)->id, &mtr);
+ }
+
+func_exit:
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ if (err == DB_SUCCESS) {
+ fprintf(stderr,
+ "Incrementing ibuf count of space %lu page %lu\n"
+ "from %lu by 1\n", space, page_no,
+ ibuf_count_get(space, page_no));
+
+ ibuf_count_set(space, page_no,
+ ibuf_count_get(space, page_no) + 1);
+ }
+#endif
+
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ mem_heap_free(heap);
+
+ if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) {
+ ibuf_contract_after_insert(entry_size);
+ }
+
+ if (do_merge) {
+#ifdef UNIV_IBUF_DEBUG
+ ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED);
+#endif
+ buf_read_ibuf_merge_pages(false, space_ids, space_versions,
+ page_nos, n_stored);
+ }
+
+ return(err);
+}
+
+/*********************************************************************//**
+Buffer an operation in the insert/delete buffer, instead of doing it
+directly to the disk page, if this is possible. Does not do it if the index
+is clustered or unique.
+@return TRUE if success */
+UNIV_INTERN
+ibool
+ibuf_insert(
+/*========*/
+ ibuf_op_t op, /*!< in: operation type */
+ const dtuple_t* entry, /*!< in: index entry to insert */
+ dict_index_t* index, /*!< in: index where to insert */
+ ulint space, /*!< in: space id where to insert */
+ ulint zip_size,/*!< in: compressed page size in bytes, or 0 */
+ ulint page_no,/*!< in: page number where to insert */
+ que_thr_t* thr) /*!< in: query thread */
+{
+ dberr_t err;
+ ulint entry_size;
+ ibool no_counter;
+ /* Read the settable global variable ibuf_use only once in
+ this function, so that we will have a consistent view of it. */
+ ibuf_use_t use = ibuf_use;
+ DBUG_ENTER("ibuf_insert");
+
+ DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld",
+ op, space, page_no));
+
+ ut_ad(dtuple_check_typed(entry));
+ ut_ad(ut_is_2pow(zip_size));
+
+ ut_a(!dict_index_is_clust(index));
+
+ no_counter = use <= IBUF_USE_INSERT;
+
+ switch (op) {
+ case IBUF_OP_INSERT:
+ switch (use) {
+ case IBUF_USE_NONE:
+ case IBUF_USE_DELETE:
+ case IBUF_USE_DELETE_MARK:
+ DBUG_RETURN(FALSE);
+ case IBUF_USE_INSERT:
+ case IBUF_USE_INSERT_DELETE_MARK:
+ case IBUF_USE_ALL:
+ goto check_watch;
+ case IBUF_USE_COUNT:
+ break;
+ }
+ break;
+ case IBUF_OP_DELETE_MARK:
+ switch (use) {
+ case IBUF_USE_NONE:
+ case IBUF_USE_INSERT:
+ DBUG_RETURN(FALSE);
+ case IBUF_USE_DELETE_MARK:
+ case IBUF_USE_DELETE:
+ case IBUF_USE_INSERT_DELETE_MARK:
+ case IBUF_USE_ALL:
+ ut_ad(!no_counter);
+ goto check_watch;
+ case IBUF_USE_COUNT:
+ break;
+ }
+ break;
+ case IBUF_OP_DELETE:
+ switch (use) {
+ case IBUF_USE_NONE:
+ case IBUF_USE_INSERT:
+ case IBUF_USE_INSERT_DELETE_MARK:
+ DBUG_RETURN(FALSE);
+ case IBUF_USE_DELETE_MARK:
+ case IBUF_USE_DELETE:
+ case IBUF_USE_ALL:
+ ut_ad(!no_counter);
+ goto skip_watch;
+ case IBUF_USE_COUNT:
+ break;
+ }
+ break;
+ case IBUF_OP_COUNT:
+ break;
+ }
+
+ /* unknown op or use */
+ ut_error;
+
+check_watch:
+ /* If a thread attempts to buffer an insert on a page while a
+ purge is in progress on the same page, the purge must not be
+ buffered, because it could remove a record that was
+ re-inserted later. For simplicity, we block the buffering of
+ all operations on a page that has a purge pending.
+
+ We do not check this in the IBUF_OP_DELETE case, because that
+ would always trigger the buffer pool watch during purge and
+ thus prevent the buffering of delete operations. We assume
+ that the issuer of IBUF_OP_DELETE has called
+ buf_pool_watch_set(space, page_no). */
+
+ {
+ buf_page_t* bpage;
+ buf_pool_t* buf_pool = buf_pool_get(space, page_no);
+ bpage = buf_page_get_also_watch(buf_pool, space, page_no);
+
+ if (UNIV_LIKELY_NULL(bpage)) {
+ /* A buffer pool watch has been set or the
+ page has been read into the buffer pool.
+ Do not buffer the request. If a purge operation
+ is being buffered, have this request executed
+ directly on the page in the buffer pool after the
+ buffered entries for this page have been merged. */
+ DBUG_RETURN(FALSE);
+ }
+ }
+
+skip_watch:
+ entry_size = rec_get_converted_size(index, entry, 0);
+
+ if (entry_size
+ >= page_get_free_space_of_empty(dict_table_is_comp(index->table))
+ / 2) {
+
+ DBUG_RETURN(FALSE);
+ }
+
+ err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter,
+ entry, entry_size,
+ index, space, zip_size, page_no, thr);
+ if (err == DB_FAIL) {
+ err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter,
+ entry, entry_size,
+ index, space, zip_size, page_no, thr);
+ }
+
+ if (err == DB_SUCCESS) {
+#ifdef UNIV_IBUF_DEBUG
+ /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n",
+ page_no, index->name); */
+#endif
+ DBUG_RETURN(TRUE);
+
+ } else {
+ ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD);
+
+ DBUG_RETURN(FALSE);
+ }
+}
+
+/********************************************************************//**
+During merge, inserts to an index page a secondary index entry extracted
+from the insert buffer.
+@return newly inserted record */
+static __attribute__((nonnull))
+rec_t*
+ibuf_insert_to_index_page_low(
+/*==========================*/
+ const dtuple_t* entry, /*!< in: buffered entry to insert */
+ buf_block_t* block, /*!< in/out: index page where the buffered
+ entry should be placed */
+ dict_index_t* index, /*!< in: record descriptor */
+ ulint** offsets,/*!< out: offsets on *rec */
+ mem_heap_t* heap, /*!< in/out: memory heap */
+ mtr_t* mtr, /*!< in/out: mtr */
+ page_cur_t* page_cur)/*!< in/out: cursor positioned on the record
+ after which to insert the buffered entry */
+{
+ const page_t* page;
+ ulint space;
+ ulint page_no;
+ ulint zip_size;
+ const page_t* bitmap_page;
+ ulint old_bits;
+ rec_t* rec;
+ DBUG_ENTER("ibuf_insert_to_index_page_low");
+
+ rec = page_cur_tuple_insert(page_cur, entry, index,
+ offsets, &heap, 0, mtr);
+ if (rec != NULL) {
+ DBUG_RETURN(rec);
+ }
+
+ /* Page reorganization or recompression should already have
+ been attempted by page_cur_tuple_insert(). Besides, per
+ ibuf_index_page_calc_free_zip() the page should not have been
+ recompressed or reorganized. */
+ ut_ad(!buf_block_get_page_zip(block));
+
+ /* If the record did not fit, reorganize */
+
+ btr_page_reorganize(page_cur, index, mtr);
+
+ /* This time the record must fit */
+
+ rec = page_cur_tuple_insert(page_cur, entry, index,
+ offsets, &heap, 0, mtr);
+ if (rec != NULL) {
+ DBUG_RETURN(rec);
+ }
+
+ page = buf_block_get_frame(block);
+
+ ut_print_timestamp(stderr);
+
+ fprintf(stderr,
+ " InnoDB: Error: Insert buffer insert fails;"
+ " page free %lu, dtuple size %lu\n",
+ (ulong) page_get_max_insert_size(page, 1),
+ (ulong) rec_get_converted_size(index, entry, 0));
+ fputs("InnoDB: Cannot insert index record ", stderr);
+ dtuple_print(stderr, entry);
+ fputs("\nInnoDB: The table where this index record belongs\n"
+ "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n"
+ "InnoDB: that table.\n", stderr);
+
+ space = page_get_space_id(page);
+ zip_size = buf_block_get_zip_size(block);
+ page_no = page_get_page_no(page);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr);
+ old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, mtr);
+
+ fprintf(stderr,
+ "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n",
+ (ulong) space, (ulong) page_no,
+ (ulong) zip_size, (ulong) old_bits);
+
+ fputs("InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n", stderr);
+ ut_ad(0);
+ DBUG_RETURN(NULL);
+}
+
+/************************************************************************
+During merge, inserts to an index page a secondary index entry extracted
+from the insert buffer. */
+static
+void
+ibuf_insert_to_index_page(
+/*======================*/
+ const dtuple_t* entry, /*!< in: buffered entry to insert */
+ buf_block_t* block, /*!< in/out: index page where the buffered entry
+ should be placed */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_cur_t page_cur;
+ ulint low_match;
+ page_t* page = buf_block_get_frame(block);
+ rec_t* rec;
+ ulint* offsets;
+ mem_heap_t* heap;
+
+ DBUG_ENTER("ibuf_insert_to_index_page");
+
+ DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block)));
+ DBUG_PRINT("ibuf", ("index name: %s", index->name));
+ DBUG_PRINT("ibuf", ("online status: %d",
+ dict_index_get_online_status(index)));
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(dtuple_check_typed(entry));
+ ut_ad(!buf_block_align(page)->index);
+
+ if (UNIV_UNLIKELY(dict_table_is_comp(index->table)
+ != (ibool)!!page_is_comp(page))) {
+ fputs("InnoDB: Trying to insert a record from"
+ " the insert buffer to an index page\n"
+ "InnoDB: but the 'compact' flag does not match!\n",
+ stderr);
+ goto dump;
+ }
+
+ rec = page_rec_get_next(page_get_infimum_rec(page));
+
+ if (page_rec_is_supremum(rec)) {
+ fputs("InnoDB: Trying to insert a record from"
+ " the insert buffer to an index page\n"
+ "InnoDB: but the index page is empty!\n",
+ stderr);
+ goto dump;
+ }
+
+ if (UNIV_UNLIKELY(rec_get_n_fields(rec, index)
+ != dtuple_get_n_fields(entry))) {
+ fputs("InnoDB: Trying to insert a record from"
+ " the insert buffer to an index page\n"
+ "InnoDB: but the number of fields does not match!\n",
+ stderr);
+dump:
+ buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH);
+
+ dtuple_print(stderr, entry);
+ ut_ad(0);
+
+ fputs("InnoDB: The table where where"
+ " this index record belongs\n"
+ "InnoDB: is now probably corrupt."
+ " Please run CHECK TABLE on\n"
+ "InnoDB: your tables.\n"
+ "InnoDB: Submit a detailed bug report to"
+ " http://bugs.mysql.com!\n", stderr);
+
+ DBUG_VOID_RETURN;
+ }
+
+ low_match = page_cur_search(block, index, entry,
+ PAGE_CUR_LE, &page_cur);
+
+ heap = mem_heap_create(
+ sizeof(upd_t)
+ + REC_OFFS_HEADER_SIZE * sizeof(*offsets)
+ + dtuple_get_n_fields(entry)
+ * (sizeof(upd_field_t) + sizeof *offsets));
+
+ if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) {
+ upd_t* update;
+ page_zip_des_t* page_zip;
+
+ rec = page_cur_get_rec(&page_cur);
+
+ /* This is based on
+ row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */
+ ut_ad(rec_get_deleted_flag(rec, page_is_comp(page)));
+
+ offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED,
+ &heap);
+ update = row_upd_build_sec_rec_difference_binary(
+ rec, index, offsets, entry, heap);
+
+ page_zip = buf_block_get_page_zip(block);
+
+ if (update->n_fields == 0) {
+ /* The records only differ in the delete-mark.
+ Clear the delete-mark, like we did before
+ Bug #56680 was fixed. */
+ btr_cur_set_deleted_flag_for_ibuf(
+ rec, page_zip, FALSE, mtr);
+ goto updated_in_place;
+ }
+
+ /* Copy the info bits. Clear the delete-mark. */
+ update->info_bits = rec_get_info_bits(rec, page_is_comp(page));
+ update->info_bits &= ~REC_INFO_DELETED_FLAG;
+
+ /* We cannot invoke btr_cur_optimistic_update() here,
+ because we do not have a btr_cur_t or que_thr_t,
+ as the insert buffer merge occurs at a very low level. */
+ if (!row_upd_changes_field_size_or_external(index, offsets,
+ update)
+ && (!page_zip || btr_cur_update_alloc_zip(
+ page_zip, &page_cur, index, offsets,
+ rec_offs_size(offsets), false, mtr))) {
+ /* This is the easy case. Do something similar
+ to btr_cur_update_in_place(). */
+ rec = page_cur_get_rec(&page_cur);
+ row_upd_rec_in_place(rec, index, offsets,
+ update, page_zip);
+
+ /* Log the update in place operation. During recovery
+ MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE
+ expects trx_id, roll_ptr for secondary indexes. So we
+ just write dummy trx_id(0), roll_ptr(0) */
+ btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec,
+ index, update, 0, 0, mtr);
+ DBUG_EXECUTE_IF(
+ "crash_after_log_ibuf_upd_inplace",
+ log_buffer_flush_to_disk();
+ ib_logf(IB_LOG_LEVEL_INFO,
+ "Wrote log record for ibuf update in "
+ "place operation");
+ DBUG_SUICIDE();
+ );
+
+ goto updated_in_place;
+ }
+
+ /* btr_cur_update_alloc_zip() may have changed this */
+ rec = page_cur_get_rec(&page_cur);
+
+ /* A collation may identify values that differ in
+ storage length.
+ Some examples (1 or 2 bytes):
+ utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I
+ utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S
+ utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS
+
+ latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S
+
+ Examples of a character (3-byte UTF-8 sequence)
+ identified with 2 or 4 characters (1-byte UTF-8 sequences):
+
+ utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO
+ utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN
+ */
+
+ /* Delete the different-length record, and insert the
+ buffered one. */
+
+ lock_rec_store_on_page_infimum(block, rec);
+ page_cur_delete_rec(&page_cur, index, offsets, mtr);
+ page_cur_move_to_prev(&page_cur);
+ rec = ibuf_insert_to_index_page_low(entry, block, index,
+ &offsets, heap, mtr,
+ &page_cur);
+
+ ut_ad(!cmp_dtuple_rec(entry, rec, offsets));
+ lock_rec_restore_from_page_infimum(block, rec, block);
+ } else {
+ offsets = NULL;
+ ibuf_insert_to_index_page_low(entry, block, index,
+ &offsets, heap, mtr,
+ &page_cur);
+ }
+updated_in_place:
+ mem_heap_free(heap);
+
+ DBUG_VOID_RETURN;
+}
+
+/****************************************************************//**
+During merge, sets the delete mark on a record for a secondary index
+entry. */
+static
+void
+ibuf_set_del_mark(
+/*==============*/
+ const dtuple_t* entry, /*!< in: entry */
+ buf_block_t* block, /*!< in/out: block */
+ const dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ page_cur_t page_cur;
+ ulint low_match;
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(dtuple_check_typed(entry));
+
+ low_match = page_cur_search(
+ block, index, entry, PAGE_CUR_LE, &page_cur);
+
+ if (low_match == dtuple_get_n_fields(entry)) {
+ rec_t* rec;
+ page_zip_des_t* page_zip;
+
+ rec = page_cur_get_rec(&page_cur);
+ page_zip = page_cur_get_page_zip(&page_cur);
+
+ /* Delete mark the old index record. According to a
+ comment in row_upd_sec_index_entry(), it can already
+ have been delete marked if a lock wait occurred in
+ row_ins_sec_index_entry() in a previous invocation of
+ row_upd_sec_index_entry(). */
+
+ if (UNIV_LIKELY
+ (!rec_get_deleted_flag(
+ rec, dict_table_is_comp(index->table)))) {
+ btr_cur_set_deleted_flag_for_ibuf(rec, page_zip,
+ TRUE, mtr);
+ }
+ } else {
+ const page_t* page
+ = page_cur_get_page(&page_cur);
+ const buf_block_t* block
+ = page_cur_get_block(&page_cur);
+
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: unable to find a record to delete-mark\n",
+ stderr);
+ fputs("InnoDB: tuple ", stderr);
+ dtuple_print(stderr, entry);
+ fputs("\n"
+ "InnoDB: record ", stderr);
+ rec_print(stderr, page_cur_get_rec(&page_cur), index);
+ fprintf(stderr, "\nspace %u offset %u"
+ " (%u records, index id %llu)\n"
+ "InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n",
+ (unsigned) buf_block_get_space(block),
+ (unsigned) buf_block_get_page_no(block),
+ (unsigned) page_get_n_recs(page),
+ (ulonglong) btr_page_get_index_id(page));
+ ut_ad(0);
+ }
+}
+
+/****************************************************************//**
+During merge, delete a record for a secondary index entry. */
+static
+void
+ibuf_delete(
+/*========*/
+ const dtuple_t* entry, /*!< in: entry */
+ buf_block_t* block, /*!< in/out: block */
+ dict_index_t* index, /*!< in: record descriptor */
+ mtr_t* mtr) /*!< in/out: mtr; must be committed
+ before latching any further pages */
+{
+ page_cur_t page_cur;
+ ulint low_match;
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(dtuple_check_typed(entry));
+
+ low_match = page_cur_search(
+ block, index, entry, PAGE_CUR_LE, &page_cur);
+
+ if (low_match == dtuple_get_n_fields(entry)) {
+ page_zip_des_t* page_zip= buf_block_get_page_zip(block);
+ page_t* page = buf_block_get_frame(block);
+ rec_t* rec = page_cur_get_rec(&page_cur);
+
+ /* TODO: the below should probably be a separate function,
+ it's a bastardized version of btr_cur_optimistic_delete. */
+
+ ulint offsets_[REC_OFFS_NORMAL_SIZE];
+ ulint* offsets = offsets_;
+ mem_heap_t* heap = NULL;
+ ulint max_ins_size = 0;
+
+ rec_offs_init(offsets_);
+
+ offsets = rec_get_offsets(
+ rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+ if (page_get_n_recs(page) <= 1
+ || !(REC_INFO_DELETED_FLAG
+ & rec_get_info_bits(rec, page_is_comp(page)))) {
+ /* Refuse to purge the last record or a
+ record that has not been marked for deletion. */
+ ut_print_timestamp(stderr);
+ fputs(" InnoDB: unable to purge a record\n",
+ stderr);
+ fputs("InnoDB: tuple ", stderr);
+ dtuple_print(stderr, entry);
+ fputs("\n"
+ "InnoDB: record ", stderr);
+ rec_print_new(stderr, rec, offsets);
+ fprintf(stderr, "\nspace %u offset %u"
+ " (%u records, index id %llu)\n"
+ "InnoDB: Submit a detailed bug report"
+ " to http://bugs.mysql.com\n",
+ (unsigned) buf_block_get_space(block),
+ (unsigned) buf_block_get_page_no(block),
+ (unsigned) page_get_n_recs(page),
+ (ulonglong) btr_page_get_index_id(page));
+
+ ut_ad(0);
+ return;
+ }
+
+ lock_update_delete(block, rec);
+
+ if (!page_zip) {
+ max_ins_size
+ = page_get_max_insert_size_after_reorganize(
+ page, 1);
+ }
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+ page_cur_delete_rec(&page_cur, index, offsets, mtr);
+#ifdef UNIV_ZIP_DEBUG
+ ut_a(!page_zip || page_zip_validate(page_zip, page, index));
+#endif /* UNIV_ZIP_DEBUG */
+
+ if (page_zip) {
+ ibuf_update_free_bits_zip(block, mtr);
+ } else {
+ ibuf_update_free_bits_low(block, max_ins_size, mtr);
+ }
+
+ if (UNIV_LIKELY_NULL(heap)) {
+ mem_heap_free(heap);
+ }
+ } else {
+ /* The record must have been purged already. */
+ }
+}
+
+/*********************************************************************//**
+Restores insert buffer tree cursor position
+@return TRUE if the position was restored; FALSE if not */
+static __attribute__((nonnull))
+ibool
+ibuf_restore_pos(
+/*=============*/
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number where the record
+ should belong */
+ const dtuple_t* search_tuple,
+ /*!< in: search tuple for entries of page_no */
+ ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
+ btr_pcur_t* pcur, /*!< in/out: persistent cursor whose
+ position is to be restored */
+ mtr_t* mtr) /*!< in/out: mini-transaction */
+{
+ ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE);
+
+ if (btr_pcur_restore_position(mode, pcur, mtr)) {
+
+ return(TRUE);
+ }
+
+ if (fil_space_get_flags(space) == ULINT_UNDEFINED) {
+ /* The tablespace has been dropped. It is possible
+ that another thread has deleted the insert buffer
+ entry. Do not complain. */
+ ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
+ } else {
+ fprintf(stderr,
+ "InnoDB: ERROR: Submit the output to"
+ " http://bugs.mysql.com\n"
+ "InnoDB: ibuf cursor restoration fails!\n"
+ "InnoDB: ibuf record inserted to page %lu:%lu\n",
+ (ulong) space, (ulong) page_no);
+ fflush(stderr);
+
+ rec_print_old(stderr, btr_pcur_get_rec(pcur));
+ rec_print_old(stderr, pcur->old_rec);
+ dtuple_print(stderr, search_tuple);
+
+ rec_print_old(stderr,
+ page_rec_get_next(btr_pcur_get_rec(pcur)));
+ fflush(stderr);
+
+ ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
+ ut_ad(0);
+ }
+
+ return(FALSE);
+}
+
+/*********************************************************************//**
+Deletes from ibuf the record on which pcur is positioned. If we have to
+resort to a pessimistic delete, this function commits mtr and closes
+the cursor.
+@return TRUE if mtr was committed and pcur closed in this operation */
+static __attribute__((warn_unused_result))
+ibool
+ibuf_delete_rec(
+/*============*/
+ ulint space, /*!< in: space id */
+ ulint page_no,/*!< in: index page number that the record
+ should belong to */
+ btr_pcur_t* pcur, /*!< in: pcur positioned on the record to
+ delete, having latch mode BTR_MODIFY_LEAF */
+ const dtuple_t* search_tuple,
+ /*!< in: search tuple for entries of page_no */
+ mtr_t* mtr) /*!< in: mtr */
+{
+ ibool success;
+ page_t* root;
+ dberr_t err;
+
+ ut_ad(ibuf_inside(mtr));
+ ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+ ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
+ ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
+
+#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG
+ if (ibuf_debug == 2) {
+ /* Inject a fault (crash). We do this before trying
+ optimistic delete, because a pessimistic delete in the
+ change buffer would require a larger test case. */
+
+ /* Flag the buffered record as processed, to avoid
+ an assertion failure after crash recovery. */
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+ ibuf_mtr_commit(mtr);
+ log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE);
+ DBUG_SUICIDE();
+ }
+#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */
+
+ success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur),
+ 0, mtr);
+
+ if (success) {
+ if (page_is_empty(btr_pcur_get_page(pcur))) {
+ /* If a B-tree page is empty, it must be the root page
+ and the whole B-tree must be empty. InnoDB does not
+ allow empty B-tree pages other than the root. */
+ root = btr_pcur_get_page(pcur);
+
+ ut_ad(page_get_space_id(root) == IBUF_SPACE_ID);
+ ut_ad(page_get_page_no(root)
+ == FSP_IBUF_TREE_ROOT_PAGE_NO);
+
+ /* ibuf->empty is protected by the root page latch.
+ Before the deletion, it had to be FALSE. */
+ ut_ad(!ibuf->empty);
+ ibuf->empty = true;
+ }
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ fprintf(stderr,
+ "Decrementing ibuf count of space %lu page %lu\n"
+ "from %lu by 1\n", space, page_no,
+ ibuf_count_get(space, page_no));
+ ibuf_count_set(space, page_no,
+ ibuf_count_get(space, page_no) - 1);
+#endif
+ return(FALSE);
+ }
+
+ ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur)));
+ ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no);
+ ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space);
+
+ /* We have to resort to a pessimistic delete from ibuf.
+ Delete-mark the record so that it will not be applied again,
+ in case the server crashes before the pessimistic delete is
+ made persistent. */
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(pcur), NULL, TRUE, mtr);
+
+ btr_pcur_store_position(pcur, mtr);
+ ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
+
+ ibuf_mtr_start(mtr);
+ mutex_enter(&ibuf_mutex);
+
+ if (!ibuf_restore_pos(space, page_no, search_tuple,
+ BTR_MODIFY_TREE, pcur, mtr)) {
+
+ mutex_exit(&ibuf_mutex);
+ ut_ad(mtr->state == MTR_COMMITTED);
+ goto func_exit;
+ }
+
+ root = ibuf_tree_root_get(mtr);
+
+ btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0,
+ RB_NONE, mtr);
+ ut_a(err == DB_SUCCESS);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1);
+#endif
+ ibuf_size_update(root, mtr);
+ mutex_exit(&ibuf_mutex);
+
+ ibuf->empty = page_is_empty(root);
+ ibuf_btr_pcur_commit_specify_mtr(pcur, mtr);
+
+func_exit:
+ ut_ad(mtr->state == MTR_COMMITTED);
+ btr_pcur_close(pcur);
+
+ return(TRUE);
+}
+
+/*********************************************************************//**
+When an index page is read from a disk to the buffer pool, this function
+applies any buffered operations to the page and deletes the entries from the
+insert buffer. If the page is not read, but created in the buffer pool, this
+function deletes its buffered entries from the insert buffer; there can
+exist entries for such a page if the page belonged to an index which
+subsequently was dropped. */
+UNIV_INTERN
+void
+ibuf_merge_or_delete_for_page(
+/*==========================*/
+ buf_block_t* block, /*!< in: if page has been read from
+ disk, pointer to the page x-latched,
+ else NULL */
+ ulint space, /*!< in: space id of the index page */
+ ulint page_no,/*!< in: page number of the index page */
+ ulint zip_size,/*!< in: compressed page size in bytes,
+ or 0 */
+ ibool update_ibuf_bitmap)/*!< in: normally this is set
+ to TRUE, but if we have deleted or are
+ deleting the tablespace, then we
+ naturally do not want to update a
+ non-existent bitmap page */
+{
+ mem_heap_t* heap;
+ btr_pcur_t pcur;
+ dtuple_t* search_tuple;
+#ifdef UNIV_IBUF_DEBUG
+ ulint volume = 0;
+#endif
+ page_zip_des_t* page_zip = NULL;
+ ibool tablespace_being_deleted = FALSE;
+ ibool corruption_noticed = FALSE;
+ mtr_t mtr;
+
+ /* Counts for merged & discarded operations. */
+ ulint mops[IBUF_OP_COUNT];
+ ulint dops[IBUF_OP_COUNT];
+
+ ut_ad(!block || buf_block_get_space(block) == space);
+ ut_ad(!block || buf_block_get_page_no(block) == page_no);
+ ut_ad(!block || buf_block_get_zip_size(block) == zip_size);
+ ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ);
+
+ if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE
+ || trx_sys_hdr_page(space, page_no)) {
+ return;
+ }
+
+ /* We cannot refer to zip_size in the following, because
+ zip_size is passed as ULINT_UNDEFINED (it is unknown) when
+ buf_read_ibuf_merge_pages() is merging (discarding) changes
+ for a dropped tablespace. When block != NULL or
+ update_ibuf_bitmap is specified, the zip_size must be known.
+ That is why we will repeat the check below, with zip_size in
+ place of 0. Passing zip_size as 0 assumes that the
+ uncompressed page size always is a power-of-2 multiple of the
+ compressed page size. */
+
+ if (ibuf_fixed_addr_page(space, 0, page_no)
+ || fsp_descr_page(0, page_no)) {
+ return;
+ }
+
+ if (UNIV_LIKELY(update_ibuf_bitmap)) {
+ ut_a(ut_is_2pow(zip_size));
+
+ if (ibuf_fixed_addr_page(space, zip_size, page_no)
+ || fsp_descr_page(zip_size, page_no)) {
+ return;
+ }
+
+ /* If the following returns FALSE, we get the counter
+ incremented, and must decrement it when we leave this
+ function. When the counter is > 0, that prevents tablespace
+ from being dropped. */
+
+ tablespace_being_deleted = fil_inc_pending_ops(space, true);
+
+ if (UNIV_UNLIKELY(tablespace_being_deleted)) {
+ /* Do not try to read the bitmap page from space;
+ just delete the ibuf records for the page */
+
+ block = NULL;
+ update_ibuf_bitmap = FALSE;
+ } else {
+ page_t* bitmap_page;
+ ulint bitmap_bits;
+
+ ibuf_mtr_start(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space, page_no, zip_size, &mtr);
+ bitmap_bits = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, &mtr);
+
+ ibuf_mtr_commit(&mtr);
+
+ if (!bitmap_bits) {
+ /* No inserts buffered for this page */
+
+ if (!tablespace_being_deleted) {
+ fil_decr_pending_ops(space);
+ }
+
+ return;
+ }
+ }
+ } else if (block
+ && (ibuf_fixed_addr_page(space, zip_size, page_no)
+ || fsp_descr_page(zip_size, page_no))) {
+
+ return;
+ }
+
+ heap = mem_heap_create(512);
+
+ search_tuple = ibuf_search_tuple_build(space, page_no, heap);
+
+ if (block) {
+ /* Move the ownership of the x-latch on the page to this OS
+ thread, so that we can acquire a second x-latch on it. This
+ is needed for the insert operations to the index page to pass
+ the debug checks. */
+
+ rw_lock_x_lock_move_ownership(&(block->lock));
+ page_zip = buf_block_get_page_zip(block);
+
+ if (UNIV_UNLIKELY(fil_page_get_type(block->frame)
+ != FIL_PAGE_INDEX)
+ || UNIV_UNLIKELY(!page_is_leaf(block->frame))) {
+
+ page_t* bitmap_page;
+
+ corruption_noticed = TRUE;
+
+ ut_print_timestamp(stderr);
+
+ ibuf_mtr_start(&mtr);
+
+ fputs(" InnoDB: Dump of the ibuf bitmap page:\n",
+ stderr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(space, page_no,
+ zip_size, &mtr);
+ buf_page_print(bitmap_page, 0,
+ BUF_PAGE_PRINT_NO_CRASH);
+ ibuf_mtr_commit(&mtr);
+
+ fputs("\nInnoDB: Dump of the page:\n", stderr);
+
+ buf_page_print(block->frame, 0,
+ BUF_PAGE_PRINT_NO_CRASH);
+
+ fprintf(stderr,
+ "InnoDB: Error: corruption in the tablespace."
+ " Bitmap shows insert\n"
+ "InnoDB: buffer records to page n:o %lu"
+ " though the page\n"
+ "InnoDB: type is %lu, which is"
+ " not an index leaf page!\n"
+ "InnoDB: We try to resolve the problem"
+ " by skipping the insert buffer\n"
+ "InnoDB: merge for this page."
+ " Please run CHECK TABLE on your tables\n"
+ "InnoDB: to determine if they are corrupt"
+ " after this.\n\n"
+ "InnoDB: Please submit a detailed bug report"
+ " to http://bugs.mysql.com\n\n",
+ (ulong) page_no,
+ (ulong)
+ fil_page_get_type(block->frame));
+ ut_ad(0);
+ }
+ }
+
+ memset(mops, 0, sizeof(mops));
+ memset(dops, 0, sizeof(dops));
+
+loop:
+ ibuf_mtr_start(&mtr);
+
+ /* Position pcur in the insert buffer at the first entry for this
+ index page */
+ btr_pcur_open_on_user_rec(
+ ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ if (block) {
+ ibool success;
+
+ success = buf_page_get_known_nowait(
+ RW_X_LATCH, block,
+ BUF_KEEP_OLD, __FILE__, __LINE__, &mtr);
+
+ ut_a(success);
+
+ /* This is a user page (secondary index leaf page),
+ but we pretend that it is a change buffer page in
+ order to obey the latching order. This should be OK,
+ because buffered changes are applied immediately while
+ the block is io-fixed. Other threads must not try to
+ latch an io-fixed block. */
+ buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE);
+ }
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ goto reset_bit;
+ }
+
+ for (;;) {
+ rec_t* rec;
+
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ rec = btr_pcur_get_rec(&pcur);
+
+ /* Check if the entry is for this index page */
+ if (ibuf_rec_get_page_no(&mtr, rec) != page_no
+ || ibuf_rec_get_space(&mtr, rec) != space) {
+
+ if (block) {
+ page_header_reset_last_insert(
+ block->frame, page_zip, &mtr);
+ }
+
+ goto reset_bit;
+ }
+
+ if (UNIV_UNLIKELY(corruption_noticed)) {
+ fputs("InnoDB: Discarding record\n ", stderr);
+ rec_print_old(stderr, rec);
+ fputs("\nInnoDB: from the insert buffer!\n\n", stderr);
+ } else if (block && !rec_get_deleted_flag(rec, 0)) {
+ /* Now we have at pcur a record which should be
+ applied on the index page; NOTE that the call below
+ copies pointers to fields in rec, and we must
+ keep the latch to the rec page until the
+ insertion is finished! */
+ dtuple_t* entry;
+ trx_id_t max_trx_id;
+ dict_index_t* dummy_index;
+ ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec);
+
+ max_trx_id = page_get_max_trx_id(page_align(rec));
+ page_update_max_trx_id(block, page_zip, max_trx_id,
+ &mtr);
+
+ ut_ad(page_validate(page_align(rec), ibuf->index));
+
+ entry = ibuf_build_entry_from_ibuf_rec(
+ &mtr, rec, heap, &dummy_index);
+
+ ut_ad(page_validate(block->frame, dummy_index));
+
+ switch (op) {
+ ibool success;
+ case IBUF_OP_INSERT:
+#ifdef UNIV_IBUF_DEBUG
+ volume += rec_get_converted_size(
+ dummy_index, entry, 0);
+
+ volume += page_dir_calc_reserved_space(1);
+
+ ut_a(volume <= 4 * UNIV_PAGE_SIZE
+ / IBUF_PAGE_SIZE_PER_FREE_SPACE);
+#endif
+ ibuf_insert_to_index_page(
+ entry, block, dummy_index, &mtr);
+ break;
+
+ case IBUF_OP_DELETE_MARK:
+ ibuf_set_del_mark(
+ entry, block, dummy_index, &mtr);
+ break;
+
+ case IBUF_OP_DELETE:
+ ibuf_delete(entry, block, dummy_index, &mtr);
+ /* Because ibuf_delete() will latch an
+ insert buffer bitmap page, commit mtr
+ before latching any further pages.
+ Store and restore the cursor position. */
+ ut_ad(rec == btr_pcur_get_rec(&pcur));
+ ut_ad(page_rec_is_user_rec(rec));
+ ut_ad(ibuf_rec_get_page_no(&mtr, rec)
+ == page_no);
+ ut_ad(ibuf_rec_get_space(&mtr, rec) == space);
+
+ /* Mark the change buffer record processed,
+ so that it will not be merged again in case
+ the server crashes between the following
+ mtr_commit() and the subsequent mtr_commit()
+ of deleting the change buffer record. */
+
+ btr_cur_set_deleted_flag_for_ibuf(
+ btr_pcur_get_rec(&pcur), NULL,
+ TRUE, &mtr);
+
+ btr_pcur_store_position(&pcur, &mtr);
+ ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+ ibuf_mtr_start(&mtr);
+
+ success = buf_page_get_known_nowait(
+ RW_X_LATCH, block,
+ BUF_KEEP_OLD,
+ __FILE__, __LINE__, &mtr);
+ ut_a(success);
+
+ /* This is a user page (secondary
+ index leaf page), but it should be OK
+ to use too low latching order for it,
+ as the block is io-fixed. */
+ buf_block_dbg_add_level(
+ block, SYNC_IBUF_TREE_NODE);
+
+ if (!ibuf_restore_pos(space, page_no,
+ search_tuple,
+ BTR_MODIFY_LEAF,
+ &pcur, &mtr)) {
+
+ ut_ad(mtr.state == MTR_COMMITTED);
+ mops[op]++;
+ ibuf_dummy_index_free(dummy_index);
+ goto loop;
+ }
+
+ break;
+ default:
+ ut_error;
+ }
+
+ mops[op]++;
+
+ ibuf_dummy_index_free(dummy_index);
+ } else {
+ dops[ibuf_rec_get_op_type(&mtr, rec)]++;
+ }
+
+ /* Delete the record from ibuf */
+ if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
+ &mtr)) {
+ /* Deletion was pessimistic and mtr was committed:
+ we start from the beginning again */
+
+ ut_ad(mtr.state == MTR_COMMITTED);
+ goto loop;
+ } else if (btr_pcur_is_after_last_on_page(&pcur)) {
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ goto loop;
+ }
+ }
+
+reset_bit:
+ if (UNIV_LIKELY(update_ibuf_bitmap)) {
+ page_t* bitmap_page;
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space, page_no, zip_size, &mtr);
+
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+
+ if (block) {
+ ulint old_bits = ibuf_bitmap_page_get_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, &mtr);
+
+ ulint new_bits = ibuf_index_page_calc_free(
+ zip_size, block);
+
+ if (old_bits != new_bits) {
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, page_no, zip_size,
+ IBUF_BITMAP_FREE, new_bits, &mtr);
+ }
+ }
+ }
+
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+ mem_heap_free(heap);
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ os_atomic_increment_ulint(&ibuf->n_merges, 1);
+ ibuf_add_ops(ibuf->n_merged_ops, mops);
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+#else /* HAVE_ATOMIC_BUILTINS */
+ /* Protect our statistics keeping from race conditions */
+ mutex_enter(&ibuf_mutex);
+
+ ibuf->n_merges++;
+ ibuf_add_ops(ibuf->n_merged_ops, mops);
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+
+ mutex_exit(&ibuf_mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ if (update_ibuf_bitmap && !tablespace_being_deleted) {
+
+ fil_decr_pending_ops(space);
+ }
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ut_a(ibuf_count_get(space, page_no) == 0);
+#endif
+}
+
+/*********************************************************************//**
+Deletes all entries in the insert buffer for a given space id. This is used
+in DISCARD TABLESPACE and IMPORT TABLESPACE.
+NOTE: this does not update the page free bitmaps in the space. The space will
+become CORRUPT when you call this function! */
+UNIV_INTERN
+void
+ibuf_delete_for_discarded_space(
+/*============================*/
+ ulint space) /*!< in: space id */
+{
+ mem_heap_t* heap;
+ btr_pcur_t pcur;
+ dtuple_t* search_tuple;
+ const rec_t* ibuf_rec;
+ ulint page_no;
+ mtr_t mtr;
+
+ /* Counts for discarded operations. */
+ ulint dops[IBUF_OP_COUNT];
+
+ heap = mem_heap_create(512);
+
+ /* Use page number 0 to build the search tuple so that we get the
+ cursor positioned at the first entry for this space id */
+
+ search_tuple = ibuf_search_tuple_build(space, 0, heap);
+
+ memset(dops, 0, sizeof(dops));
+loop:
+ ibuf_mtr_start(&mtr);
+
+ /* Position pcur in the insert buffer at the first entry for the
+ space */
+ btr_pcur_open_on_user_rec(
+ ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF,
+ &pcur, &mtr);
+
+ if (!btr_pcur_is_on_user_rec(&pcur)) {
+ ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr));
+
+ goto leave_loop;
+ }
+
+ for (;;) {
+ ut_ad(btr_pcur_is_on_user_rec(&pcur));
+
+ ibuf_rec = btr_pcur_get_rec(&pcur);
+
+ /* Check if the entry is for this space */
+ if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) {
+
+ goto leave_loop;
+ }
+
+ page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec);
+
+ dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++;
+
+ /* Delete the record from ibuf */
+ if (ibuf_delete_rec(space, page_no, &pcur, search_tuple,
+ &mtr)) {
+ /* Deletion was pessimistic and mtr was committed:
+ we start from the beginning again */
+
+ ut_ad(mtr.state == MTR_COMMITTED);
+ goto loop;
+ }
+
+ if (btr_pcur_is_after_last_on_page(&pcur)) {
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+ goto loop;
+ }
+ }
+
+leave_loop:
+ ibuf_mtr_commit(&mtr);
+ btr_pcur_close(&pcur);
+
+#ifdef HAVE_ATOMIC_BUILTINS
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+#else /* HAVE_ATOMIC_BUILTINS */
+ /* Protect our statistics keeping from race conditions */
+ mutex_enter(&ibuf_mutex);
+ ibuf_add_ops(ibuf->n_discarded_ops, dops);
+ mutex_exit(&ibuf_mutex);
+#endif /* HAVE_ATOMIC_BUILTINS */
+
+ mem_heap_free(heap);
+}
+
+/******************************************************************//**
+Looks if the insert buffer is empty.
+@return true if empty */
+UNIV_INTERN
+bool
+ibuf_is_empty(void)
+/*===============*/
+{
+ bool is_empty;
+ const page_t* root;
+ mtr_t mtr;
+
+ ibuf_mtr_start(&mtr);
+
+ mutex_enter(&ibuf_mutex);
+ root = ibuf_tree_root_get(&mtr);
+ mutex_exit(&ibuf_mutex);
+
+ is_empty = page_is_empty(root);
+ ut_a(is_empty == ibuf->empty);
+ ibuf_mtr_commit(&mtr);
+
+ return(is_empty);
+}
+
+/******************************************************************//**
+Prints info of ibuf. */
+UNIV_INTERN
+void
+ibuf_print(
+/*=======*/
+ FILE* file) /*!< in: file where to print */
+{
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ ulint i;
+ ulint j;
+#endif
+
+ mutex_enter(&ibuf_mutex);
+
+ fprintf(file,
+ "Ibuf: size %lu, free list len %lu,"
+ " seg size %lu, %lu merges\n",
+ (ulong) ibuf->size,
+ (ulong) ibuf->free_list_len,
+ (ulong) ibuf->seg_size,
+ (ulong) ibuf->n_merges);
+
+ fputs("merged operations:\n ", file);
+ ibuf_print_ops(ibuf->n_merged_ops, file);
+
+ fputs("discarded operations:\n ", file);
+ ibuf_print_ops(ibuf->n_discarded_ops, file);
+
+#ifdef UNIV_IBUF_COUNT_DEBUG
+ for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
+ for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
+ ulint count = ibuf_count_get(i, j);
+
+ if (count > 0) {
+ fprintf(stderr,
+ "Ibuf count for space/page %lu/%lu"
+ " is %lu\n",
+ (ulong) i, (ulong) j, (ulong) count);
+ }
+ }
+ }
+#endif /* UNIV_IBUF_COUNT_DEBUG */
+
+ mutex_exit(&ibuf_mutex);
+}
+
+/******************************************************************//**
+Checks the insert buffer bitmaps on IMPORT TABLESPACE.
+@return DB_SUCCESS or error code */
+UNIV_INTERN
+dberr_t
+ibuf_check_bitmap_on_import(
+/*========================*/
+ const trx_t* trx, /*!< in: transaction */
+ ulint space_id) /*!< in: tablespace identifier */
+{
+ ulint zip_size;
+ ulint page_size;
+ ulint size;
+ ulint page_no;
+
+ ut_ad(space_id);
+ ut_ad(trx->mysql_thd);
+
+ zip_size = fil_space_get_zip_size(space_id);
+
+ if (zip_size == ULINT_UNDEFINED) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ size = fil_space_get_size(space_id);
+
+ if (size == 0) {
+ return(DB_TABLE_NOT_FOUND);
+ }
+
+ mutex_enter(&ibuf_mutex);
+
+ page_size = zip_size ? zip_size : UNIV_PAGE_SIZE;
+
+ for (page_no = 0; page_no < size; page_no += page_size) {
+ mtr_t mtr;
+ page_t* bitmap_page;
+ ulint i;
+
+ if (trx_is_interrupted(trx)) {
+ mutex_exit(&ibuf_mutex);
+ return(DB_INTERRUPTED);
+ }
+
+ mtr_start(&mtr);
+
+ mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
+
+ ibuf_enter(&mtr);
+
+ bitmap_page = ibuf_bitmap_get_map_page(
+ space_id, page_no, zip_size, &mtr);
+
+ for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) {
+ const ulint offset = page_no + i;
+
+ if (ibuf_bitmap_page_get_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_IBUF, &mtr)) {
+
+ mutex_exit(&ibuf_mutex);
+ ibuf_exit(&mtr);
+ mtr_commit(&mtr);
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_ERROR,
+ ER_INNODB_INDEX_CORRUPT,
+ "Space %u page %u"
+ " is wrongly flagged to belong to the"
+ " insert buffer",
+ (unsigned) space_id,
+ (unsigned) offset);
+
+ return(DB_CORRUPTION);
+ }
+
+ if (ibuf_bitmap_page_get_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_BUFFERED, &mtr)) {
+
+ ib_errf(trx->mysql_thd,
+ IB_LOG_LEVEL_WARN,
+ ER_INNODB_INDEX_CORRUPT,
+ "Buffered changes"
+ " for space %u page %u are lost",
+ (unsigned) space_id,
+ (unsigned) offset);
+
+ /* Tolerate this error, so that
+ slightly corrupted tables can be
+ imported and dumped. Clear the bit. */
+ ibuf_bitmap_page_set_bits(
+ bitmap_page, offset, zip_size,
+ IBUF_BITMAP_BUFFERED, FALSE, &mtr);
+ }
+ }
+
+ ibuf_exit(&mtr);
+ mtr_commit(&mtr);
+ }
+
+ mutex_exit(&ibuf_mutex);
+ return(DB_SUCCESS);
+}
+#endif /* !UNIV_HOTBACKUP */