diff options
author | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:17:21 +0200 |
---|---|---|
committer | Sergei Golubchik <vuvova@gmail.com> | 2015-05-04 19:17:21 +0200 |
commit | 6d06fbbd1dc25b3c12568f9038060dfdb69f9683 (patch) | |
tree | 21e27f3fddc89f9dda6b337091464ba10c490123 /storage/innobase/ibuf | |
parent | 1645930d0bd02f79df3ebff412b90acdc15bd9a0 (diff) | |
download | mariadb-git-6d06fbbd1dc25b3c12568f9038060dfdb69f9683.tar.gz |
move to storage/innobase
Diffstat (limited to 'storage/innobase/ibuf')
-rw-r--r-- | storage/innobase/ibuf/ibuf0ibuf.cc | 5223 |
1 files changed, 5223 insertions, 0 deletions
diff --git a/storage/innobase/ibuf/ibuf0ibuf.cc b/storage/innobase/ibuf/ibuf0ibuf.cc new file mode 100644 index 00000000000..caf1f1a864b --- /dev/null +++ b/storage/innobase/ibuf/ibuf0ibuf.cc @@ -0,0 +1,5223 @@ +/***************************************************************************** + +Copyright (c) 1997, 2014, Oracle and/or its affiliates. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., +51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA + +*****************************************************************************/ + +/**************************************************//** +@file ibuf/ibuf0ibuf.cc +Insert buffer + +Created 7/19/1997 Heikki Tuuri +*******************************************************/ + +#include "ibuf0ibuf.h" + +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +UNIV_INTERN my_bool srv_ibuf_disable_background_merge; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + +/** Number of bits describing a single page */ +#define IBUF_BITS_PER_PAGE 4 +#if IBUF_BITS_PER_PAGE % 2 +# error "IBUF_BITS_PER_PAGE must be an even number!" +#endif +/** The start address for an insert buffer bitmap page bitmap */ +#define IBUF_BITMAP PAGE_DATA + +#ifdef UNIV_NONINL +#include "ibuf0ibuf.ic" +#endif + +#ifndef UNIV_HOTBACKUP + +#include "buf0buf.h" +#include "buf0rea.h" +#include "fsp0fsp.h" +#include "trx0sys.h" +#include "fil0fil.h" +#include "rem0rec.h" +#include "btr0cur.h" +#include "btr0pcur.h" +#include "btr0btr.h" +#include "row0upd.h" +#include "sync0sync.h" +#include "dict0boot.h" +#include "fut0lst.h" +#include "lock0lock.h" +#include "log0recv.h" +#include "que0que.h" +#include "srv0start.h" /* srv_shutdown_state */ +#include "ha_prototypes.h" +#include "rem0cmp.h" + +/* STRUCTURE OF AN INSERT BUFFER RECORD + +In versions < 4.1.x: + +1. The first field is the page number. +2. The second field is an array which stores type info for each subsequent + field. We store the information which affects the ordering of records, and + also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it + is 10 bytes. +3. Next we have the fields of the actual index record. + +In versions >= 4.1.x: + +Note that contary to what we planned in the 1990's, there will only be one +insert buffer tree, and that is in the system tablespace of InnoDB. + +1. The first field is the space id. +2. The second field is a one-byte marker (0) which differentiates records from + the < 4.1.x storage format. +3. The third field is the page number. +4. The fourth field contains the type info, where we have also added 2 bytes to + store the charset. In the compressed table format of 5.0.x we must add more + information here so that we can build a dummy 'index' struct which 5.0.x + can use in the binary search on the index page in the ibuf merge phase. +5. The rest of the fields contain the fields of the actual index record. + +In versions >= 5.0.3: + +The first byte of the fourth field is an additional marker (0) if the record +is in the compact format. The presence of this marker can be detected by +looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. + +The high-order bit of the character set field in the type info is the +"nullable" flag for the field. + +In versions >= 5.5: + +The optional marker byte at the start of the fourth field is replaced by +mandatory 3 fields, totaling 4 bytes: + + 1. 2 bytes: Counter field, used to sort records within a (space id, page + no) in the order they were added. This is needed so that for example the + sequence of operations "INSERT x, DEL MARK x, INSERT x" is handled + correctly. + + 2. 1 byte: Operation type (see ibuf_op_t). + + 3. 1 byte: Flags. Currently only one flag exists, IBUF_REC_COMPACT. + +To ensure older records, which do not have counters to enforce correct +sorting, are merged before any new records, ibuf_insert checks if we're +trying to insert to a position that contains old-style records, and if so, +refuses the insert. Thus, ibuf pages are gradually converted to the new +format as their corresponding buffer pool pages are read into memory. +*/ + + +/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM + +If an OS thread performs any operation that brings in disk pages from +non-system tablespaces into the buffer pool, or creates such a page there, +then the operation may have as a side effect an insert buffer index tree +compression. Thus, the tree latch of the insert buffer tree may be acquired +in the x-mode, and also the file space latch of the system tablespace may +be acquired in the x-mode. + +Also, an insert to an index in a non-system tablespace can have the same +effect. How do we know this cannot lead to a deadlock of OS threads? There +is a problem with the i\o-handler threads: they break the latching order +because they own x-latches to pages which are on a lower level than the +insert buffer tree latch, its page latches, and the tablespace latch an +insert buffer operation can reserve. + +The solution is the following: Let all the tree and page latches connected +with the insert buffer be later in the latching order than the fsp latch and +fsp page latches. + +Insert buffer pages must be such that the insert buffer is never invoked +when these pages are accessed as this would result in a recursion violating +the latching order. We let a special i/o-handler thread take care of i/o to +the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap +pages and the first inode page, which contains the inode of the ibuf tree: let +us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead +access both non-ibuf and ibuf pages. + +Then an i/o-handler for the insert buffer never needs to access recursively the +insert buffer tree and thus obeys the latching order. On the other hand, other +i/o-handlers for other tablespaces may require access to the insert buffer, +but because all kinds of latches they need to access there are later in the +latching order, no violation of the latching order occurs in this case, +either. + +A problem is how to grow and contract an insert buffer tree. As it is later +in the latching order than the fsp management, we have to reserve the fsp +latch first, before adding or removing pages from the insert buffer tree. +We let the insert buffer tree have its own file space management: a free +list of pages linked to the tree root. To prevent recursive using of the +insert buffer when adding pages to the tree, we must first load these pages +to memory, obtaining a latch on them, and only after that add them to the +free list of the insert buffer tree. More difficult is removing of pages +from the free list. If there is an excess of pages in the free list of the +ibuf tree, they might be needed if some thread reserves the fsp latch, +intending to allocate more file space. So we do the following: if a thread +reserves the fsp latch, we check the writer count field of the latch. If +this field has value 1, it means that the thread did not own the latch +before entering the fsp system, and the mtr of the thread contains no +modifications to the fsp pages. Now we are free to reserve the ibuf latch, +and check if there is an excess of pages in the free list. We can then, in a +separate mini-transaction, take them out of the free list and free them to +the fsp system. + +To avoid deadlocks in the ibuf system, we divide file pages into three levels: + +(1) non-ibuf pages, +(2) ibuf tree pages and the pages in the ibuf tree free list, and +(3) ibuf bitmap pages. + +No OS thread is allowed to access higher level pages if it has latches to +lower level pages; even if the thread owns a B-tree latch it must not access +the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead +is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle +exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively +level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e., +it uses synchronous aio, it can access any pages, as long as it obeys the +access order rules. */ + +/** Table name for the insert buffer. */ +#define IBUF_TABLE_NAME "SYS_IBUF_TABLE" + +/** Operations that can currently be buffered. */ +UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL; + +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/** Flag to control insert buffer debugging. */ +UNIV_INTERN uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + +/** The insert buffer control structure */ +UNIV_INTERN ibuf_t* ibuf = NULL; + +#ifdef UNIV_PFS_MUTEX +UNIV_INTERN mysql_pfs_key_t ibuf_pessimistic_insert_mutex_key; +UNIV_INTERN mysql_pfs_key_t ibuf_mutex_key; +UNIV_INTERN mysql_pfs_key_t ibuf_bitmap_mutex_key; +#endif /* UNIV_PFS_MUTEX */ + +#ifdef UNIV_IBUF_COUNT_DEBUG +/** Number of tablespaces in the ibuf_counts array */ +#define IBUF_COUNT_N_SPACES 4 +/** Number of pages within each tablespace in the ibuf_counts array */ +#define IBUF_COUNT_N_PAGES 130000 + +/** Buffered entry counts for file pages, used in debugging */ +static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES]; + +/******************************************************************//** +Checks that the indexes to ibuf_counts[][] are within limits. */ +UNIV_INLINE +void +ibuf_count_check( +/*=============*/ + ulint space_id, /*!< in: space identifier */ + ulint page_no) /*!< in: page number */ +{ + if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) { + return; + } + + fprintf(stderr, + "InnoDB: UNIV_IBUF_COUNT_DEBUG limits space_id and page_no\n" + "InnoDB: and breaks crash recovery.\n" + "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n" + "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n", + (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES, + (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES); + ut_error; +} +#endif + +/** @name Offsets to the per-page bits in the insert buffer bitmap */ +/* @{ */ +#define IBUF_BITMAP_FREE 0 /*!< Bits indicating the + amount of free space */ +#define IBUF_BITMAP_BUFFERED 2 /*!< TRUE if there are buffered + changes for the page */ +#define IBUF_BITMAP_IBUF 3 /*!< TRUE if page is a part of + the ibuf tree, excluding the + root page, or is in the free + list of the ibuf */ +/* @} */ + +#define IBUF_REC_FIELD_SPACE 0 /*!< in the pre-4.1 format, + the page number. later, the space_id */ +#define IBUF_REC_FIELD_MARKER 1 /*!< starting with 4.1, a marker + consisting of 1 byte that is 0 */ +#define IBUF_REC_FIELD_PAGE 2 /*!< starting with 4.1, the + page number */ +#define IBUF_REC_FIELD_METADATA 3 /* the metadata field */ +#define IBUF_REC_FIELD_USER 4 /* first user field */ + +/* Various constants for checking the type of an ibuf record and extracting +data from it. For details, see the description of the record format at the +top of this file. */ + +/** @name Format of the IBUF_REC_FIELD_METADATA of an insert buffer record +The fourth column in the MySQL 5.5 format contains an operation +type, counter, and some flags. */ +/* @{ */ +#define IBUF_REC_INFO_SIZE 4 /*!< Combined size of info fields at + the beginning of the fourth field */ +#if IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +# error "IBUF_REC_INFO_SIZE >= DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + +/* Offsets for the fields at the beginning of the fourth field */ +#define IBUF_REC_OFFSET_COUNTER 0 /*!< Operation counter */ +#define IBUF_REC_OFFSET_TYPE 2 /*!< Type of operation */ +#define IBUF_REC_OFFSET_FLAGS 3 /*!< Additional flags */ + +/* Record flag masks */ +#define IBUF_REC_COMPACT 0x1 /*!< Set in + IBUF_REC_OFFSET_FLAGS if the + user index is in COMPACT + format or later */ + + +/** The mutex used to block pessimistic inserts to ibuf trees */ +static ib_mutex_t ibuf_pessimistic_insert_mutex; + +/** The mutex protecting the insert buffer structs */ +static ib_mutex_t ibuf_mutex; + +/** The mutex protecting the insert buffer bitmaps */ +static ib_mutex_t ibuf_bitmap_mutex; + +/** The area in pages from which contract looks for page numbers for merge */ +#define IBUF_MERGE_AREA 8UL + +/** Inside the merge area, pages which have at most 1 per this number less +buffered entries compared to maximum volume that can buffered for a single +page are merged along with the page whose buffer became full */ +#define IBUF_MERGE_THRESHOLD 4 + +/** In ibuf_contract at most this number of pages is read to memory in one +batch, in order to merge the entries for them in the insert buffer */ +#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA + +/** If the combined size of the ibuf trees exceeds ibuf->max_size by this +many pages, we start to contract it in connection to inserts there, using +non-synchronous contract */ +#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0 + +/** If the combined size of the ibuf trees exceeds ibuf->max_size by this +many pages, we start to contract it in connection to inserts there, using +synchronous contract */ +#define IBUF_CONTRACT_ON_INSERT_SYNC 5 + +/** If the combined size of the ibuf trees exceeds ibuf->max_size by +this many pages, we start to contract it synchronous contract, but do +not insert */ +#define IBUF_CONTRACT_DO_NOT_INSERT 10 + +/* TODO: how to cope with drop table if there are records in the insert +buffer for the indexes of the table? Is there actually any problem, +because ibuf merge is done to a page when it is read in, and it is +still physically like the index page even if the index would have been +dropped! So, there seems to be no problem. */ + +/******************************************************************//** +Sets the flag in the current mini-transaction record indicating we're +inside an insert buffer routine. */ +UNIV_INLINE +void +ibuf_enter( +/*=======*/ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ut_ad(!mtr->inside_ibuf); + mtr->inside_ibuf = TRUE; +} + +/******************************************************************//** +Sets the flag in the current mini-transaction record indicating we're +exiting an insert buffer routine. */ +UNIV_INLINE +void +ibuf_exit( +/*======*/ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ut_ad(mtr->inside_ibuf); + mtr->inside_ibuf = FALSE; +} + +/**************************************************************//** +Commits an insert buffer mini-transaction and sets the persistent +cursor latch mode to BTR_NO_LATCHES, that is, detaches the cursor. */ +UNIV_INLINE +void +ibuf_btr_pcur_commit_specify_mtr( +/*=============================*/ + btr_pcur_t* pcur, /*!< in/out: persistent cursor */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ut_d(ibuf_exit(mtr)); + btr_pcur_commit_specify_mtr(pcur, mtr); +} + +/******************************************************************//** +Gets the ibuf header page and x-latches it. +@return insert buffer header page */ +static +page_t* +ibuf_header_page_get( +/*=================*/ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + buf_block_t* block; + + ut_ad(!ibuf_inside(mtr)); + + block = buf_page_get( + IBUF_SPACE_ID, 0, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_IBUF_HEADER); + + return(buf_block_get_frame(block)); +} + +/******************************************************************//** +Gets the root page and x-latches it. +@return insert buffer tree root page */ +static +page_t* +ibuf_tree_root_get( +/*===============*/ + mtr_t* mtr) /*!< in: mtr */ +{ + buf_block_t* block; + page_t* root; + + ut_ad(ibuf_inside(mtr)); + ut_ad(mutex_own(&ibuf_mutex)); + + mtr_x_lock(dict_index_get_lock(ibuf->index), mtr); + + block = buf_page_get( + IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); + + root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) == FSP_IBUF_TREE_ROOT_PAGE_NO); + ut_ad(ibuf->empty == page_is_empty(root)); + + return(root); +} + +#ifdef UNIV_IBUF_COUNT_DEBUG +/******************************************************************//** +Gets the ibuf count for a given page. +@return number of entries in the insert buffer currently buffered for +this page */ +UNIV_INTERN +ulint +ibuf_count_get( +/*===========*/ + ulint space, /*!< in: space id */ + ulint page_no)/*!< in: page number */ +{ + ibuf_count_check(space, page_no); + + return(ibuf_counts[space][page_no]); +} + +/******************************************************************//** +Sets the ibuf count for a given page. */ +static +void +ibuf_count_set( +/*===========*/ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: page number */ + ulint val) /*!< in: value to set */ +{ + ibuf_count_check(space, page_no); + ut_a(val < UNIV_PAGE_SIZE); + + ibuf_counts[space][page_no] = val; +} +#endif + +/******************************************************************//** +Closes insert buffer and frees the data structures. */ +UNIV_INTERN +void +ibuf_close(void) +/*============*/ +{ + mutex_free(&ibuf_pessimistic_insert_mutex); + memset(&ibuf_pessimistic_insert_mutex, + 0x0, sizeof(ibuf_pessimistic_insert_mutex)); + + mutex_free(&ibuf_mutex); + memset(&ibuf_mutex, 0x0, sizeof(ibuf_mutex)); + + mutex_free(&ibuf_bitmap_mutex); + memset(&ibuf_bitmap_mutex, 0x0, sizeof(ibuf_mutex)); + + mem_free(ibuf); + ibuf = NULL; +} + +/******************************************************************//** +Updates the size information of the ibuf, assuming the segment size has not +changed. */ +static +void +ibuf_size_update( +/*=============*/ + const page_t* root, /*!< in: ibuf tree root */ + mtr_t* mtr) /*!< in: mtr */ +{ + ut_ad(mutex_own(&ibuf_mutex)); + + ibuf->free_list_len = flst_get_len(root + PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST, mtr); + + ibuf->height = 1 + btr_page_get_level(root, mtr); + + /* the '1 +' is the ibuf header page */ + ibuf->size = ibuf->seg_size - (1 + ibuf->free_list_len); +} + +/******************************************************************//** +Creates the insert buffer data structure at a database startup and initializes +the data structures for the insert buffer. */ +UNIV_INTERN +void +ibuf_init_at_db_start(void) +/*=======================*/ +{ + page_t* root; + mtr_t mtr; + dict_table_t* table; + mem_heap_t* heap; + dict_index_t* index; + ulint n_used; + page_t* header_page; + dberr_t error; + + ibuf = static_cast<ibuf_t*>(mem_zalloc(sizeof(ibuf_t))); + + /* At startup we intialize ibuf to have a maximum of + CHANGE_BUFFER_DEFAULT_SIZE in terms of percentage of the + buffer pool size. Once ibuf struct is initialized this + value is updated with the user supplied size by calling + ibuf_max_size_update(). */ + ibuf->max_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE) + * CHANGE_BUFFER_DEFAULT_SIZE) / 100; + + mutex_create(ibuf_pessimistic_insert_mutex_key, + &ibuf_pessimistic_insert_mutex, + SYNC_IBUF_PESS_INSERT_MUTEX); + + mutex_create(ibuf_mutex_key, + &ibuf_mutex, SYNC_IBUF_MUTEX); + + mutex_create(ibuf_bitmap_mutex_key, + &ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX); + + mtr_start(&mtr); + + mutex_enter(&ibuf_mutex); + + mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, NULL), &mtr); + + header_page = ibuf_header_page_get(&mtr); + + fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + &n_used, &mtr); + ibuf_enter(&mtr); + + ut_ad(n_used >= 2); + + ibuf->seg_size = n_used; + + { + buf_block_t* block; + + block = buf_page_get( + IBUF_SPACE_ID, 0, FSP_IBUF_TREE_ROOT_PAGE_NO, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); + + root = buf_block_get_frame(block); + } + + ibuf_size_update(root, &mtr); + mutex_exit(&ibuf_mutex); + + ibuf->empty = page_is_empty(root); + ibuf_mtr_commit(&mtr); + + heap = mem_heap_create(450); + + /* Use old-style record format for the insert buffer. */ + table = dict_mem_table_create(IBUF_TABLE_NAME, IBUF_SPACE_ID, 1, 0, 0); + + dict_mem_table_add_col(table, heap, "DUMMY_COLUMN", DATA_BINARY, 0, 0); + + table->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID; + + dict_table_add_to_cache(table, FALSE, heap); + mem_heap_free(heap); + + index = dict_mem_index_create( + IBUF_TABLE_NAME, "CLUST_IND", + IBUF_SPACE_ID, DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 1); + + dict_mem_index_add_field(index, "DUMMY_COLUMN", 0); + + index->id = DICT_IBUF_ID_MIN + IBUF_SPACE_ID; + + error = dict_index_add_to_cache(table, index, + FSP_IBUF_TREE_ROOT_PAGE_NO, FALSE); + ut_a(error == DB_SUCCESS); + + ibuf->index = dict_table_get_first_index(table); +} + +/*********************************************************************//** +Updates the max_size value for ibuf. */ +UNIV_INTERN +void +ibuf_max_size_update( +/*=================*/ + ulint new_val) /*!< in: new value in terms of + percentage of the buffer pool size */ +{ + ulint new_size = ((buf_pool_get_curr_size() / UNIV_PAGE_SIZE) + * new_val) / 100; + mutex_enter(&ibuf_mutex); + ibuf->max_size = new_size; + mutex_exit(&ibuf_mutex); +} + + +#endif /* !UNIV_HOTBACKUP */ +/*********************************************************************//** +Initializes an ibuf bitmap page. */ +UNIV_INTERN +void +ibuf_bitmap_page_init( +/*==================*/ + buf_block_t* block, /*!< in: bitmap page */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_t* page; + ulint byte_offset; + ulint zip_size = buf_block_get_zip_size(block); + + ut_a(ut_is_2pow(zip_size)); + + page = buf_block_get_frame(block); + fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP); + + /* Write all zeros to the bitmap */ + + if (!zip_size) { + byte_offset = UT_BITS_IN_BYTES(UNIV_PAGE_SIZE + * IBUF_BITS_PER_PAGE); + } else { + byte_offset = UT_BITS_IN_BYTES(zip_size * IBUF_BITS_PER_PAGE); + } + + memset(page + IBUF_BITMAP, 0, byte_offset); + + /* The remaining area (up to the page trailer) is uninitialized. */ + +#ifndef UNIV_HOTBACKUP + mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr); +#endif /* !UNIV_HOTBACKUP */ +} + +/*********************************************************************//** +Parses a redo log record of an ibuf bitmap page init. +@return end of log record or NULL */ +UNIV_INTERN +byte* +ibuf_parse_bitmap_init( +/*===================*/ + byte* ptr, /*!< in: buffer */ + byte* end_ptr __attribute__((unused)), /*!< in: buffer end */ + buf_block_t* block, /*!< in: block or NULL */ + mtr_t* mtr) /*!< in: mtr or NULL */ +{ + ut_ad(ptr && end_ptr); + + if (block) { + ibuf_bitmap_page_init(block, mtr); + } + + return(ptr); +} +#ifndef UNIV_HOTBACKUP +# ifdef UNIV_DEBUG +/** Gets the desired bits for a given page from a bitmap page. +@param page in: bitmap page +@param offset in: page whose bits to get +@param zs in: compressed page size in bytes; 0 for uncompressed pages +@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... +@param mtr in: mini-transaction holding an x-latch on the bitmap page +@return value of bits */ +# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \ + ibuf_bitmap_page_get_bits_low(page, offset, zs, \ + MTR_MEMO_PAGE_X_FIX, mtr, bit) +# else /* UNIV_DEBUG */ +/** Gets the desired bits for a given page from a bitmap page. +@param page in: bitmap page +@param offset in: page whose bits to get +@param zs in: compressed page size in bytes; 0 for uncompressed pages +@param bit in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... +@param mtr in: mini-transaction holding an x-latch on the bitmap page +@return value of bits */ +# define ibuf_bitmap_page_get_bits(page, offset, zs, bit, mtr) \ + ibuf_bitmap_page_get_bits_low(page, offset, zs, bit) +# endif /* UNIV_DEBUG */ + +/********************************************************************//** +Gets the desired bits for a given page from a bitmap page. +@return value of bits */ +UNIV_INLINE +ulint +ibuf_bitmap_page_get_bits_low( +/*==========================*/ + const page_t* page, /*!< in: bitmap page */ + ulint page_no,/*!< in: page whose bits to get */ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ +#ifdef UNIV_DEBUG + ulint latch_type, + /*!< in: MTR_MEMO_PAGE_X_FIX, + MTR_MEMO_BUF_FIX, ... */ + mtr_t* mtr, /*!< in: mini-transaction holding latch_type + on the bitmap page */ +#endif /* UNIV_DEBUG */ + ulint bit) /*!< in: IBUF_BITMAP_FREE, + IBUF_BITMAP_BUFFERED, ... */ +{ + ulint byte_offset; + ulint bit_offset; + ulint map_byte; + ulint value; + + ut_ad(bit < IBUF_BITS_PER_PAGE); +#if IBUF_BITS_PER_PAGE % 2 +# error "IBUF_BITS_PER_PAGE % 2 != 0" +#endif + ut_ad(ut_is_2pow(zip_size)); + ut_ad(mtr_memo_contains_page(mtr, page, latch_type)); + + if (!zip_size) { + bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE + + bit; + } else { + bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE + + bit; + } + + byte_offset = bit_offset / 8; + bit_offset = bit_offset % 8; + + ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); + + map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); + + value = ut_bit_get_nth(map_byte, bit_offset); + + if (bit == IBUF_BITMAP_FREE) { + ut_ad(bit_offset + 1 < 8); + + value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1); + } + + return(value); +} + +/********************************************************************//** +Sets the desired bit for a given page in a bitmap page. */ +static +void +ibuf_bitmap_page_set_bits( +/*======================*/ + page_t* page, /*!< in: bitmap page */ + ulint page_no,/*!< in: page whose bits to set */ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint bit, /*!< in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ + ulint val, /*!< in: value to set */ + mtr_t* mtr) /*!< in: mtr containing an x-latch to the bitmap page */ +{ + ulint byte_offset; + ulint bit_offset; + ulint map_byte; + + ut_ad(bit < IBUF_BITS_PER_PAGE); +#if IBUF_BITS_PER_PAGE % 2 +# error "IBUF_BITS_PER_PAGE % 2 != 0" +#endif + ut_ad(ut_is_2pow(zip_size)); + ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); +#ifdef UNIV_IBUF_COUNT_DEBUG + ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE) + || (0 == ibuf_count_get(page_get_space_id(page), + page_no))); +#endif + if (!zip_size) { + bit_offset = (page_no % UNIV_PAGE_SIZE) * IBUF_BITS_PER_PAGE + + bit; + } else { + bit_offset = (page_no & (zip_size - 1)) * IBUF_BITS_PER_PAGE + + bit; + } + + byte_offset = bit_offset / 8; + bit_offset = bit_offset % 8; + + ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); + + map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); + + if (bit == IBUF_BITMAP_FREE) { + ut_ad(bit_offset + 1 < 8); + ut_ad(val <= 3); + + map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2); + map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2); + } else { + ut_ad(val <= 1); + map_byte = ut_bit_set_nth(map_byte, bit_offset, val); + } + + mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte, + MLOG_1BYTE, mtr); +} + +/********************************************************************//** +Calculates the bitmap page number for a given page number. +@return the bitmap page number where the file page is mapped */ +UNIV_INLINE +ulint +ibuf_bitmap_page_no_calc( +/*=====================*/ + ulint zip_size, /*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no) /*!< in: tablespace page number */ +{ + ut_ad(ut_is_2pow(zip_size)); + + if (!zip_size) { + return(FSP_IBUF_BITMAP_OFFSET + + (page_no & ~(UNIV_PAGE_SIZE - 1))); + } else { + return(FSP_IBUF_BITMAP_OFFSET + + (page_no & ~(zip_size - 1))); + } +} + +/********************************************************************//** +Gets the ibuf bitmap page where the bits describing a given file page are +stored. +@return bitmap page where the file page is mapped, that is, the bitmap +page containing the descriptor bits for the file page; the bitmap page +is x-latched */ +static +page_t* +ibuf_bitmap_get_map_page_func( +/*==========================*/ + ulint space, /*!< in: space id of the file page */ + ulint page_no,/*!< in: page number of the file page */ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mtr */ +{ + buf_block_t* block; + + block = buf_page_get_gen(space, zip_size, + ibuf_bitmap_page_no_calc(zip_size, page_no), + RW_X_LATCH, NULL, BUF_GET, + file, line, mtr); + buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP); + + return(buf_block_get_frame(block)); +} + +/********************************************************************//** +Gets the ibuf bitmap page where the bits describing a given file page are +stored. +@return bitmap page where the file page is mapped, that is, the bitmap +page containing the descriptor bits for the file page; the bitmap page +is x-latched +@param space in: space id of the file page +@param page_no in: page number of the file page +@param zip_size in: compressed page size in bytes; 0 for uncompressed pages +@param mtr in: mini-transaction */ +#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr) \ + ibuf_bitmap_get_map_page_func(space, page_no, zip_size, \ + __FILE__, __LINE__, mtr) + +/************************************************************************//** +Sets the free bits of the page in the ibuf bitmap. This is done in a separate +mini-transaction, hence this operation does not restrict further work to only +ibuf bitmap operations, which would result if the latch to the bitmap page +were kept. */ +UNIV_INLINE +void +ibuf_set_free_bits_low( +/*===================*/ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + const buf_block_t* block, /*!< in: index page; free bits are set if + the index is non-clustered and page + level is 0 */ + ulint val, /*!< in: value to set: < 4 */ + mtr_t* mtr) /*!< in/out: mtr */ +{ + page_t* bitmap_page; + ulint space; + ulint page_no; + + if (!page_is_leaf(buf_block_get_frame(block))) { + + return; + } + + space = buf_block_get_space(block); + page_no = buf_block_get_page_no(block); + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); +#ifdef UNIV_IBUF_DEBUG +# if 0 + fprintf(stderr, + "Setting space %lu page %lu free bits to %lu should be %lu\n", + space, page_no, val, + ibuf_index_page_calc_free(zip_size, block)); +# endif + + ut_a(val <= ibuf_index_page_calc_free(zip_size, block)); +#endif /* UNIV_IBUF_DEBUG */ + ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, val, mtr); +} + +/************************************************************************//** +Sets the free bit of the page in the ibuf bitmap. This is done in a separate +mini-transaction, hence this operation does not restrict further work to only +ibuf bitmap operations, which would result if the latch to the bitmap page +were kept. */ +UNIV_INTERN +void +ibuf_set_free_bits_func( +/*====================*/ + buf_block_t* block, /*!< in: index page of a non-clustered index; + free bit is reset if page level is 0 */ +#ifdef UNIV_IBUF_DEBUG + ulint max_val,/*!< in: ULINT_UNDEFINED or a maximum + value which the bits must have before + setting; this is for debugging */ +#endif /* UNIV_IBUF_DEBUG */ + ulint val) /*!< in: value to set: < 4 */ +{ + mtr_t mtr; + page_t* page; + page_t* bitmap_page; + ulint space; + ulint page_no; + ulint zip_size; + + page = buf_block_get_frame(block); + + if (!page_is_leaf(page)) { + + return; + } + + mtr_start(&mtr); + + space = buf_block_get_space(block); + page_no = buf_block_get_page_no(block); + zip_size = buf_block_get_zip_size(block); + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, &mtr); + +#ifdef UNIV_IBUF_DEBUG + if (max_val != ULINT_UNDEFINED) { + ulint old_val; + + old_val = ibuf_bitmap_page_get_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, &mtr); +# if 0 + if (old_val != max_val) { + fprintf(stderr, + "Ibuf: page %lu old val %lu max val %lu\n", + page_get_page_no(page), + old_val, max_val); + } +# endif + + ut_a(old_val <= max_val); + } +# if 0 + fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n", + page_get_page_no(page), val, + ibuf_index_page_calc_free(zip_size, block)); +# endif + + ut_a(val <= ibuf_index_page_calc_free(zip_size, block)); +#endif /* UNIV_IBUF_DEBUG */ + ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, val, &mtr); + mtr_commit(&mtr); +} + +/************************************************************************//** +Resets the free bits of the page in the ibuf bitmap. This is done in a +separate mini-transaction, hence this operation does not restrict +further work to only ibuf bitmap operations, which would result if the +latch to the bitmap page were kept. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to decrement or reset the bits in the bitmap in a mini-transaction +that is committed before the mini-transaction that affects the free +space. */ +UNIV_INTERN +void +ibuf_reset_free_bits( +/*=================*/ + buf_block_t* block) /*!< in: index page; free bits are set to 0 + if the index is a non-clustered + non-unique, and page level is 0 */ +{ + ibuf_set_free_bits(block, 0, ULINT_UNDEFINED); +} + +/**********************************************************************//** +Updates the free bits for an uncompressed page to reflect the present +state. Does this in the mtr given, which means that the latching +order rules virtually prevent any further operations for this OS +thread until mtr is committed. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to set the free bits in the same mini-transaction that updated the +page. */ +UNIV_INTERN +void +ibuf_update_free_bits_low( +/*======================*/ + const buf_block_t* block, /*!< in: index page */ + ulint max_ins_size, /*!< in: value of + maximum insert size + with reorganize before + the latest operation + performed to the page */ + mtr_t* mtr) /*!< in/out: mtr */ +{ + ulint before; + ulint after; + + ut_a(!buf_block_get_page_zip(block)); + + before = ibuf_index_page_calc_free_bits(0, max_ins_size); + + after = ibuf_index_page_calc_free(0, block); + + /* This approach cannot be used on compressed pages, since the + computed value of "before" often does not match the current + state of the bitmap. This is because the free space may + increase or decrease when a compressed page is reorganized. */ + if (before != after) { + ibuf_set_free_bits_low(0, block, after, mtr); + } +} + +/**********************************************************************//** +Updates the free bits for a compressed page to reflect the present +state. Does this in the mtr given, which means that the latching +order rules virtually prevent any further operations for this OS +thread until mtr is committed. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to set the free bits in the same mini-transaction that updated the +page. */ +UNIV_INTERN +void +ibuf_update_free_bits_zip( +/*======================*/ + buf_block_t* block, /*!< in/out: index page */ + mtr_t* mtr) /*!< in/out: mtr */ +{ + page_t* bitmap_page; + ulint space; + ulint page_no; + ulint zip_size; + ulint after; + + space = buf_block_get_space(block); + page_no = buf_block_get_page_no(block); + zip_size = buf_block_get_zip_size(block); + + ut_a(page_is_leaf(buf_block_get_frame(block))); + ut_a(zip_size); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); + + after = ibuf_index_page_calc_free_zip(zip_size, block); + + if (after == 0) { + /* We move the page to the front of the buffer pool LRU list: + the purpose of this is to prevent those pages to which we + cannot make inserts using the insert buffer from slipping + out of the buffer pool */ + + buf_page_make_young(&block->page); + } + + ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, after, mtr); +} + +/**********************************************************************//** +Updates the free bits for the two pages to reflect the present state. +Does this in the mtr given, which means that the latching order rules +virtually prevent any further operations until mtr is committed. +NOTE: The free bits in the insert buffer bitmap must never exceed the +free space on a page. It is safe to set the free bits in the same +mini-transaction that updated the pages. */ +UNIV_INTERN +void +ibuf_update_free_bits_for_two_pages_low( +/*====================================*/ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + buf_block_t* block1, /*!< in: index page */ + buf_block_t* block2, /*!< in: index page */ + mtr_t* mtr) /*!< in: mtr */ +{ + ulint state; + + /* As we have to x-latch two random bitmap pages, we have to acquire + the bitmap mutex to prevent a deadlock with a similar operation + performed by another OS thread. */ + + mutex_enter(&ibuf_bitmap_mutex); + + state = ibuf_index_page_calc_free(zip_size, block1); + + ibuf_set_free_bits_low(zip_size, block1, state, mtr); + + state = ibuf_index_page_calc_free(zip_size, block2); + + ibuf_set_free_bits_low(zip_size, block2, state, mtr); + + mutex_exit(&ibuf_bitmap_mutex); +} + +/**********************************************************************//** +Returns TRUE if the page is one of the fixed address ibuf pages. +@return TRUE if a fixed address ibuf i/o page */ +UNIV_INLINE +ibool +ibuf_fixed_addr_page( +/*=================*/ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no)/*!< in: page number */ +{ + return((space == IBUF_SPACE_ID && page_no == IBUF_TREE_ROOT_PAGE_NO) + || ibuf_bitmap_page(zip_size, page_no)); +} + +/***********************************************************************//** +Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. +Must not be called when recv_no_ibuf_operations==TRUE. +@return TRUE if level 2 or level 3 page */ +UNIV_INTERN +ibool +ibuf_page_low( +/*==========*/ + ulint space, /*!< in: space id */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint page_no,/*!< in: page number */ +#ifdef UNIV_DEBUG + ibool x_latch,/*!< in: FALSE if relaxed check + (avoid latching the bitmap page) */ +#endif /* UNIV_DEBUG */ + const char* file, /*!< in: file name */ + ulint line, /*!< in: line where called */ + mtr_t* mtr) /*!< in: mtr which will contain an + x-latch to the bitmap page if the page + is not one of the fixed address ibuf + pages, or NULL, in which case a new + transaction is created. */ +{ + ibool ret; + mtr_t local_mtr; + page_t* bitmap_page; + + ut_ad(!recv_no_ibuf_operations); + ut_ad(x_latch || mtr == NULL); + + if (ibuf_fixed_addr_page(space, zip_size, page_no)) { + + return(TRUE); + } else if (space != IBUF_SPACE_ID) { + + return(FALSE); + } + + ut_ad(fil_space_get_type(IBUF_SPACE_ID) == FIL_TABLESPACE); + +#ifdef UNIV_DEBUG + if (!x_latch) { + mtr_start(&local_mtr); + + /* Get the bitmap page without a page latch, so that + we will not be violating the latching order when + another bitmap page has already been latched by this + thread. The page will be buffer-fixed, and thus it + cannot be removed or relocated while we are looking at + it. The contents of the page could change, but the + IBUF_BITMAP_IBUF bit that we are interested in should + not be modified by any other thread. Nobody should be + calling ibuf_add_free_page() or ibuf_remove_free_page() + while the page is linked to the insert buffer b-tree. */ + + bitmap_page = buf_block_get_frame( + buf_page_get_gen( + space, zip_size, + ibuf_bitmap_page_no_calc(zip_size, page_no), + RW_NO_LATCH, NULL, BUF_GET_NO_LATCH, + file, line, &local_mtr)); + + ret = ibuf_bitmap_page_get_bits_low( + bitmap_page, page_no, zip_size, + MTR_MEMO_BUF_FIX, &local_mtr, IBUF_BITMAP_IBUF); + + mtr_commit(&local_mtr); + return(ret); + } +#endif /* UNIV_DEBUG */ + + if (mtr == NULL) { + mtr = &local_mtr; + mtr_start(mtr); + } + + bitmap_page = ibuf_bitmap_get_map_page_func(space, page_no, zip_size, + file, line, mtr); + + ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_IBUF, mtr); + + if (mtr == &local_mtr) { + mtr_commit(mtr); + } + + return(ret); +} + +#ifdef UNIV_DEBUG +# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(mtr,rec) +#else /* UNIV_DEBUG */ +# define ibuf_rec_get_page_no(mtr,rec) ibuf_rec_get_page_no_func(rec) +#endif /* UNIV_DEBUG */ + +/********************************************************************//** +Returns the page number field of an ibuf record. +@return page number */ +static +ulint +ibuf_rec_get_page_no_func( +/*======================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec) /*!< in: ibuf record */ +{ + const byte* field; + ulint len; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + ut_ad(rec_get_n_fields_old(rec) > 2); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); + + ut_a(len == 1); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); + + ut_a(len == 4); + + return(mach_read_from_4(field)); +} + +#ifdef UNIV_DEBUG +# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(mtr,rec) +#else /* UNIV_DEBUG */ +# define ibuf_rec_get_space(mtr,rec) ibuf_rec_get_space_func(rec) +#endif /* UNIV_DEBUG */ + +/********************************************************************//** +Returns the space id field of an ibuf record. For < 4.1.x format records +returns 0. +@return space id */ +static +ulint +ibuf_rec_get_space_func( +/*====================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec) /*!< in: ibuf record */ +{ + const byte* field; + ulint len; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + ut_ad(rec_get_n_fields_old(rec) > 2); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); + + ut_a(len == 1); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); + + ut_a(len == 4); + + return(mach_read_from_4(field)); +} + +#ifdef UNIV_DEBUG +# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \ + ibuf_rec_get_info_func(mtr,rec,op,comp,info_len,counter) +#else /* UNIV_DEBUG */ +# define ibuf_rec_get_info(mtr,rec,op,comp,info_len,counter) \ + ibuf_rec_get_info_func(rec,op,comp,info_len,counter) +#endif +/****************************************************************//** +Get various information about an ibuf record in >= 4.1.x format. */ +static +void +ibuf_rec_get_info_func( +/*===================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec, /*!< in: ibuf record */ + ibuf_op_t* op, /*!< out: operation type, or NULL */ + ibool* comp, /*!< out: compact flag, or NULL */ + ulint* info_len, /*!< out: length of info fields at the + start of the fourth field, or + NULL */ + ulint* counter) /*!< in: counter value, or NULL */ +{ + const byte* types; + ulint fields; + ulint len; + + /* Local variables to shadow arguments. */ + ibuf_op_t op_local; + ibool comp_local; + ulint info_len_local; + ulint counter_local; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + fields = rec_get_n_fields_old(rec); + ut_a(fields > IBUF_REC_FIELD_USER); + + types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); + + info_len_local = len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; + + switch (info_len_local) { + case 0: + case 1: + op_local = IBUF_OP_INSERT; + comp_local = info_len_local; + ut_ad(!counter); + counter_local = ULINT_UNDEFINED; + break; + + case IBUF_REC_INFO_SIZE: + op_local = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE]; + comp_local = types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT; + counter_local = mach_read_from_2( + types + IBUF_REC_OFFSET_COUNTER); + break; + + default: + ut_error; + } + + ut_a(op_local < IBUF_OP_COUNT); + ut_a((len - info_len_local) == + (fields - IBUF_REC_FIELD_USER) + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + if (op) { + *op = op_local; + } + + if (comp) { + *comp = comp_local; + } + + if (info_len) { + *info_len = info_len_local; + } + + if (counter) { + *counter = counter_local; + } +} + +#ifdef UNIV_DEBUG +# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(mtr,rec) +#else /* UNIV_DEBUG */ +# define ibuf_rec_get_op_type(mtr,rec) ibuf_rec_get_op_type_func(rec) +#endif + +/****************************************************************//** +Returns the operation type field of an ibuf record. +@return operation type */ +static +ibuf_op_t +ibuf_rec_get_op_type_func( +/*======================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec) /*!< in: ibuf record */ +{ + ulint len; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + ut_ad(rec_get_n_fields_old(rec) > 2); + + (void) rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); + + if (len > 1) { + /* This is a < 4.1.x format record */ + + return(IBUF_OP_INSERT); + } else { + ibuf_op_t op; + + ibuf_rec_get_info(mtr, rec, &op, NULL, NULL, NULL); + + return(op); + } +} + +/****************************************************************//** +Read the first two bytes from a record's fourth field (counter field in new +records; something else in older records). +@return "counter" field, or ULINT_UNDEFINED if for some reason it +can't be read */ +UNIV_INTERN +ulint +ibuf_rec_get_counter( +/*=================*/ + const rec_t* rec) /*!< in: ibuf record */ +{ + const byte* ptr; + ulint len; + + if (rec_get_n_fields_old(rec) <= IBUF_REC_FIELD_METADATA) { + + return(ULINT_UNDEFINED); + } + + ptr = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); + + if (len >= 2) { + + return(mach_read_from_2(ptr)); + } else { + + return(ULINT_UNDEFINED); + } +} + +/****************************************************************//** +Add accumulated operation counts to a permanent array. Both arrays must be +of size IBUF_OP_COUNT. */ +static +void +ibuf_add_ops( +/*=========*/ + ulint* arr, /*!< in/out: array to modify */ + const ulint* ops) /*!< in: operation counts */ + +{ + ulint i; + +#ifndef HAVE_ATOMIC_BUILTINS + ut_ad(mutex_own(&ibuf_mutex)); +#endif /* !HAVE_ATOMIC_BUILTINS */ + + for (i = 0; i < IBUF_OP_COUNT; i++) { +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&arr[i], ops[i]); +#else /* HAVE_ATOMIC_BUILTINS */ + arr[i] += ops[i]; +#endif /* HAVE_ATOMIC_BUILTINS */ + } +} + +/****************************************************************//** +Print operation counts. The array must be of size IBUF_OP_COUNT. */ +static +void +ibuf_print_ops( +/*===========*/ + const ulint* ops, /*!< in: operation counts */ + FILE* file) /*!< in: file where to print */ +{ + static const char* op_names[] = { + "insert", + "delete mark", + "delete" + }; + ulint i; + + ut_a(UT_ARR_SIZE(op_names) == IBUF_OP_COUNT); + + for (i = 0; i < IBUF_OP_COUNT; i++) { + fprintf(file, "%s %lu%s", op_names[i], + (ulong) ops[i], (i < (IBUF_OP_COUNT - 1)) ? ", " : ""); + } + + putc('\n', file); +} + +/********************************************************************//** +Creates a dummy index for inserting a record to a non-clustered index. +@return dummy index */ +static +dict_index_t* +ibuf_dummy_index_create( +/*====================*/ + ulint n, /*!< in: number of fields */ + ibool comp) /*!< in: TRUE=use compact record format */ +{ + dict_table_t* table; + dict_index_t* index; + + table = dict_mem_table_create("IBUF_DUMMY", + DICT_HDR_SPACE, n, + comp ? DICT_TF_COMPACT : 0, 0); + + index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", + DICT_HDR_SPACE, 0, n); + + index->table = table; + + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + index->cached = TRUE; + + return(index); +} +/********************************************************************//** +Add a column to the dummy index */ +static +void +ibuf_dummy_index_add_col( +/*=====================*/ + dict_index_t* index, /*!< in: dummy index */ + const dtype_t* type, /*!< in: the data type of the column */ + ulint len) /*!< in: length of the column */ +{ + ulint i = index->table->n_def; + dict_mem_table_add_col(index->table, NULL, NULL, + dtype_get_mtype(type), + dtype_get_prtype(type), + dtype_get_len(type)); + dict_index_add_col(index, index->table, + dict_table_get_nth_col(index->table, i), len); +} +/********************************************************************//** +Deallocates a dummy index for inserting a record to a non-clustered index. */ +static +void +ibuf_dummy_index_free( +/*==================*/ + dict_index_t* index) /*!< in, own: dummy index */ +{ + dict_table_t* table = index->table; + + dict_mem_index_free(index); + dict_mem_table_free(table); +} + +#ifdef UNIV_DEBUG +# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \ + ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex) +#else /* UNIV_DEBUG */ +# define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex) \ + ibuf_build_entry_from_ibuf_rec_func(ibuf_rec,heap,pindex) +#endif + +/*********************************************************************//** +Builds the entry used to + +1) IBUF_OP_INSERT: insert into a non-clustered index + +2) IBUF_OP_DELETE_MARK: find the record whose delete-mark flag we need to + activate + +3) IBUF_OP_DELETE: find the record we need to delete + +when we have the corresponding record in an ibuf index. + +NOTE that as we copy pointers to fields in ibuf_rec, the caller must +hold a latch to the ibuf_rec page as long as the entry is used! + +@return own: entry to insert to a non-clustered index */ +static +dtuple_t* +ibuf_build_entry_from_ibuf_rec_func( +/*================================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* ibuf_rec, /*!< in: record in an insert buffer */ + mem_heap_t* heap, /*!< in: heap where built */ + dict_index_t** pindex) /*!< out, own: dummy index that + describes the entry */ +{ + dtuple_t* tuple; + dfield_t* field; + ulint n_fields; + const byte* types; + const byte* data; + ulint len; + ulint info_len; + ulint i; + ulint comp; + dict_index_t* index; + + ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + + data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); + + ut_a(len == 1); + ut_a(*data == 0); + ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER); + + n_fields = rec_get_n_fields_old(ibuf_rec) - IBUF_REC_FIELD_USER; + + tuple = dtuple_create(heap, n_fields); + + types = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_METADATA, &len); + + ibuf_rec_get_info(mtr, ibuf_rec, NULL, &comp, &info_len, NULL); + + index = ibuf_dummy_index_create(n_fields, comp); + + len -= info_len; + types += info_len; + + ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old( + ibuf_rec, i + IBUF_REC_FIELD_USER, &len); + + dfield_set_data(field, data, len); + + dtype_new_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + ibuf_dummy_index_add_col(index, dfield_get_type(field), len); + } + + /* Prevent an ut_ad() failure in page_zip_write_rec() by + adding system columns to the dummy table pointed to by the + dummy secondary index. The insert buffer is only used for + secondary indexes, whose records never contain any system + columns, such as DB_TRX_ID. */ + ut_d(dict_table_add_system_columns(index->table, index->table->heap)); + + *pindex = index; + + return(tuple); +} + +/******************************************************************//** +Get the data size. +@return size of fields */ +UNIV_INLINE +ulint +ibuf_rec_get_size( +/*==============*/ + const rec_t* rec, /*!< in: ibuf record */ + const byte* types, /*!< in: fields */ + ulint n_fields, /*!< in: number of fields */ + ulint comp) /*!< in: 0=ROW_FORMAT=REDUNDANT, + nonzero=ROW_FORMAT=COMPACT */ +{ + ulint i; + ulint field_offset; + ulint types_offset; + ulint size = 0; + + field_offset = IBUF_REC_FIELD_USER; + types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; + + for (i = 0; i < n_fields; i++) { + ulint len; + dtype_t dtype; + + rec_get_nth_field_offs_old(rec, i + field_offset, &len); + + if (len != UNIV_SQL_NULL) { + size += len; + } else { + dtype_new_read_for_order_and_null_size(&dtype, types); + + size += dtype_get_sql_null_size(&dtype, comp); + } + + types += types_offset; + } + + return(size); +} + +#ifdef UNIV_DEBUG +# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(mtr,rec) +#else /* UNIV_DEBUG */ +# define ibuf_rec_get_volume(mtr,rec) ibuf_rec_get_volume_func(rec) +#endif + +/********************************************************************//** +Returns the space taken by a stored non-clustered index entry if converted to +an index record. +@return size of index record in bytes + an upper limit of the space +taken in the page directory */ +static +ulint +ibuf_rec_get_volume_func( +/*=====================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* ibuf_rec)/*!< in: ibuf record */ +{ + ulint len; + const byte* data; + const byte* types; + ulint n_fields; + ulint data_size; + ulint comp; + ibuf_op_t op; + ulint info_len; + + ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); + + data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len); + ut_a(len == 1); + ut_a(*data == 0); + + types = rec_get_nth_field_old( + ibuf_rec, IBUF_REC_FIELD_METADATA, &len); + + ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL); + + if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) { + /* Delete-marking a record doesn't take any + additional space, and while deleting a record + actually frees up space, we have to play it safe and + pretend it takes no additional space (the record + might not exist, etc.). */ + + return(0); + } else if (comp) { + dtuple_t* entry; + ulint volume; + dict_index_t* dummy_index; + mem_heap_t* heap = mem_heap_create(500); + + entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec, + heap, &dummy_index); + + volume = rec_get_converted_size(dummy_index, entry, 0); + + ibuf_dummy_index_free(dummy_index); + mem_heap_free(heap); + + return(volume + page_dir_calc_reserved_space(1)); + } + + types += info_len; + n_fields = rec_get_n_fields_old(ibuf_rec) + - IBUF_REC_FIELD_USER; + + data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp); + + return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0) + + page_dir_calc_reserved_space(1)); +} + +/*********************************************************************//** +Builds the tuple to insert to an ibuf tree when we have an entry for a +non-clustered index. + +NOTE that the original entry must be kept because we copy pointers to +its fields. + +@return own: entry to insert into an ibuf index tree */ +static +dtuple_t* +ibuf_entry_build( +/*=============*/ + ibuf_op_t op, /*!< in: operation type */ + dict_index_t* index, /*!< in: non-clustered index */ + const dtuple_t* entry, /*!< in: entry for a non-clustered index */ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: index page number where entry should + be inserted */ + ulint counter,/*!< in: counter value; + ULINT_UNDEFINED=not used */ + mem_heap_t* heap) /*!< in: heap into which to build */ +{ + dtuple_t* tuple; + dfield_t* field; + const dfield_t* entry_field; + ulint n_fields; + byte* buf; + byte* ti; + byte* type_info; + ulint i; + + ut_ad(counter != ULINT_UNDEFINED || op == IBUF_OP_INSERT); + ut_ad(counter == ULINT_UNDEFINED || counter <= 0xFFFF); + ut_ad(op < IBUF_OP_COUNT); + + /* We have to build a tuple with the following fields: + + 1-4) These are described at the top of this file. + + 5) The rest of the fields are copied from the entry. + + All fields in the tuple are ordered like the type binary in our + insert buffer tree. */ + + n_fields = dtuple_get_n_fields(entry); + + tuple = dtuple_create(heap, n_fields + IBUF_REC_FIELD_USER); + + /* 1) Space Id */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + mach_write_to_4(buf, space); + + dfield_set_data(field, buf, 4); + + /* 2) Marker byte */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 1)); + + /* We set the marker byte zero */ + + mach_write_to_1(buf, 0); + + dfield_set_data(field, buf, 1); + + /* 3) Page number */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + mach_write_to_4(buf, page_no); + + dfield_set_data(field, buf, 4); + + /* 4) Type info, part #1 */ + + if (counter == ULINT_UNDEFINED) { + i = dict_table_is_comp(index->table) ? 1 : 0; + } else { + ut_ad(counter <= 0xFFFF); + i = IBUF_REC_INFO_SIZE; + } + + ti = type_info = static_cast<byte*>( + mem_heap_alloc( + heap, + i + n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE)); + + switch (i) { + default: + ut_error; + break; + case 1: + /* set the flag for ROW_FORMAT=COMPACT */ + *ti++ = 0; + /* fall through */ + case 0: + /* the old format does not allow delete buffering */ + ut_ad(op == IBUF_OP_INSERT); + break; + case IBUF_REC_INFO_SIZE: + mach_write_to_2(ti + IBUF_REC_OFFSET_COUNTER, counter); + + ti[IBUF_REC_OFFSET_TYPE] = (byte) op; + ti[IBUF_REC_OFFSET_FLAGS] = dict_table_is_comp(index->table) + ? IBUF_REC_COMPACT : 0; + ti += IBUF_REC_INFO_SIZE; + break; + } + + /* 5+) Fields from the entry */ + + for (i = 0; i < n_fields; i++) { + ulint fixed_len; + const dict_field_t* ifield; + + field = dtuple_get_nth_field(tuple, i + IBUF_REC_FIELD_USER); + entry_field = dtuple_get_nth_field(entry, i); + dfield_copy(field, entry_field); + + ifield = dict_index_get_nth_field(index, i); + /* Prefix index columns of fixed-length columns are of + fixed length. However, in the function call below, + dfield_get_type(entry_field) contains the fixed length + of the column in the clustered index. Replace it with + the fixed length of the secondary index column. */ + fixed_len = ifield->fixed_len; + +#ifdef UNIV_DEBUG + if (fixed_len) { + /* dict_index_add_col() should guarantee these */ + ut_ad(fixed_len <= (ulint) + dfield_get_type(entry_field)->len); + if (ifield->prefix_len) { + ut_ad(ifield->prefix_len == fixed_len); + } else { + ut_ad(fixed_len == (ulint) + dfield_get_type(entry_field)->len); + } + } +#endif /* UNIV_DEBUG */ + + dtype_new_store_for_order_and_null_size( + ti, dfield_get_type(entry_field), fixed_len); + ti += DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE; + } + + /* 4) Type info, part #2 */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_METADATA); + + dfield_set_data(field, type_info, ti - type_info); + + /* Set all the types in the new tuple binary */ + + dtuple_set_types_binary(tuple, n_fields + IBUF_REC_FIELD_USER); + + return(tuple); +} + +/*********************************************************************//** +Builds a search tuple used to search buffered inserts for an index page. +This is for >= 4.1.x format records. +@return own: search tuple */ +static +dtuple_t* +ibuf_search_tuple_build( +/*====================*/ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: index page number */ + mem_heap_t* heap) /*!< in: heap into which to build */ +{ + dtuple_t* tuple; + dfield_t* field; + byte* buf; + + tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA); + + /* Store the space id in tuple */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_SPACE); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + mach_write_to_4(buf, space); + + dfield_set_data(field, buf, 4); + + /* Store the new format record marker byte */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_MARKER); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 1)); + + mach_write_to_1(buf, 0); + + dfield_set_data(field, buf, 1); + + /* Store the page number in tuple */ + + field = dtuple_get_nth_field(tuple, IBUF_REC_FIELD_PAGE); + + buf = static_cast<byte*>(mem_heap_alloc(heap, 4)); + + mach_write_to_4(buf, page_no); + + dfield_set_data(field, buf, 4); + + dtuple_set_types_binary(tuple, IBUF_REC_FIELD_METADATA); + + return(tuple); +} + +/*********************************************************************//** +Checks if there are enough pages in the free list of the ibuf tree that we +dare to start a pessimistic insert to the insert buffer. +@return TRUE if enough free pages in list */ +UNIV_INLINE +ibool +ibuf_data_enough_free_for_insert(void) +/*==================================*/ +{ + ut_ad(mutex_own(&ibuf_mutex)); + + /* We want a big margin of free pages, because a B-tree can sometimes + grow in size also if records are deleted from it, as the node pointers + can change, and we must make sure that we are able to delete the + inserts buffered for pages that we read to the buffer pool, without + any risk of running out of free space in the insert buffer. */ + + return(ibuf->free_list_len >= (ibuf->size / 2) + 3 * ibuf->height); +} + +/*********************************************************************//** +Checks if there are enough pages in the free list of the ibuf tree that we +should remove them and free to the file space management. +@return TRUE if enough free pages in list */ +UNIV_INLINE +ibool +ibuf_data_too_much_free(void) +/*=========================*/ +{ + ut_ad(mutex_own(&ibuf_mutex)); + + return(ibuf->free_list_len >= 3 + (ibuf->size / 2) + 3 * ibuf->height); +} + +/*********************************************************************//** +Allocates a new page from the ibuf file segment and adds it to the free +list. +@return TRUE on success, FALSE if no space left */ +static +ibool +ibuf_add_free_page(void) +/*====================*/ +{ + mtr_t mtr; + page_t* header_page; + ulint flags; + ulint zip_size; + buf_block_t* block; + page_t* page; + page_t* root; + page_t* bitmap_page; + + mtr_start(&mtr); + + /* Acquire the fsp latch before the ibuf header, obeying the latching + order */ + mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr); + zip_size = fsp_flags_get_zip_size(flags); + + header_page = ibuf_header_page_get(&mtr); + + /* Allocate a new page: NOTE that if the page has been a part of a + non-clustered index which has subsequently been dropped, then the + page may have buffered inserts in the insert buffer, and these + should be deleted from there. These get deleted when the page + allocation creates the page in buffer. Thus the call below may end + up calling the insert buffer routines and, as we yet have no latches + to insert buffer tree pages, these routines can run without a risk + of a deadlock. This is the reason why we created a special ibuf + header page apart from the ibuf tree. */ + + block = fseg_alloc_free_page( + header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, 0, FSP_UP, + &mtr); + + if (block == NULL) { + mtr_commit(&mtr); + + return(FALSE); + } + + ut_ad(rw_lock_get_x_lock_count(&block->lock) == 1); + ibuf_enter(&mtr); + mutex_enter(&ibuf_mutex); + root = ibuf_tree_root_get(&mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE_NEW); + page = buf_block_get_frame(block); + + /* Add the page to the free list and update the ibuf size data */ + + flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + + mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST, + MLOG_2BYTES, &mtr); + + ibuf->seg_size++; + ibuf->free_list_len++; + + /* Set the bit indicating that this page is now an ibuf tree page + (level 2 page) */ + + bitmap_page = ibuf_bitmap_get_map_page( + IBUF_SPACE_ID, buf_block_get_page_no(block), zip_size, &mtr); + + mutex_exit(&ibuf_mutex); + + ibuf_bitmap_page_set_bits( + bitmap_page, buf_block_get_page_no(block), zip_size, + IBUF_BITMAP_IBUF, TRUE, &mtr); + + ibuf_mtr_commit(&mtr); + + return(TRUE); +} + +/*********************************************************************//** +Removes a page from the free list and frees it to the fsp system. */ +static +void +ibuf_remove_free_page(void) +/*=======================*/ +{ + mtr_t mtr; + mtr_t mtr2; + page_t* header_page; + ulint flags; + ulint zip_size; + ulint page_no; + page_t* page; + page_t* root; + page_t* bitmap_page; + + mtr_start(&mtr); + + /* Acquire the fsp latch before the ibuf header, obeying the latching + order */ + mtr_x_lock(fil_space_get_latch(IBUF_SPACE_ID, &flags), &mtr); + zip_size = fsp_flags_get_zip_size(flags); + + header_page = ibuf_header_page_get(&mtr); + + /* Prevent pessimistic inserts to insert buffer trees for a while */ + ibuf_enter(&mtr); + mutex_enter(&ibuf_pessimistic_insert_mutex); + mutex_enter(&ibuf_mutex); + + if (!ibuf_data_too_much_free()) { + + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + + ibuf_mtr_commit(&mtr); + + return; + } + + ibuf_mtr_start(&mtr2); + + root = ibuf_tree_root_get(&mtr2); + + mutex_exit(&ibuf_mutex); + + page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + &mtr2).page; + + /* NOTE that we must release the latch on the ibuf tree root + because in fseg_free_page we access level 1 pages, and the root + is a level 2 page. */ + + ibuf_mtr_commit(&mtr2); + ibuf_exit(&mtr); + + /* Since pessimistic inserts were prevented, we know that the + page is still in the free list. NOTE that also deletes may take + pages from the free list, but they take them from the start, and + the free list was so long that they cannot have taken the last + page from it. */ + + fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, + IBUF_SPACE_ID, page_no, &mtr); + +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG + buf_page_reset_file_page_was_freed(IBUF_SPACE_ID, page_no); +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ + + ibuf_enter(&mtr); + + mutex_enter(&ibuf_mutex); + + root = ibuf_tree_root_get(&mtr); + + ut_ad(page_no == flst_get_last(root + PAGE_HEADER + + PAGE_BTR_IBUF_FREE_LIST, &mtr).page); + + { + buf_block_t* block; + + block = buf_page_get( + IBUF_SPACE_ID, 0, page_no, RW_X_LATCH, &mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); + + page = buf_block_get_frame(block); + } + + /* Remove the page from the free list and update the ibuf size data */ + + flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, + page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); + + mutex_exit(&ibuf_pessimistic_insert_mutex); + + ibuf->seg_size--; + ibuf->free_list_len--; + + /* Set the bit indicating that this page is no more an ibuf tree page + (level 2 page) */ + + bitmap_page = ibuf_bitmap_get_map_page( + IBUF_SPACE_ID, page_no, zip_size, &mtr); + + mutex_exit(&ibuf_mutex); + + ibuf_bitmap_page_set_bits( + bitmap_page, page_no, zip_size, IBUF_BITMAP_IBUF, FALSE, &mtr); + +#if defined UNIV_DEBUG_FILE_ACCESSES || defined UNIV_DEBUG + buf_page_set_file_page_was_freed(IBUF_SPACE_ID, page_no); +#endif /* UNIV_DEBUG_FILE_ACCESSES || UNIV_DEBUG */ + ibuf_mtr_commit(&mtr); +} + +/***********************************************************************//** +Frees excess pages from the ibuf free list. This function is called when an OS +thread calls fsp services to allocate a new file segment, or a new page to a +file segment, and the thread did not own the fsp latch before this call. */ +UNIV_INTERN +void +ibuf_free_excess_pages(void) +/*========================*/ +{ + ulint i; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(fil_space_get_latch(IBUF_SPACE_ID, NULL), + RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + ut_ad(rw_lock_get_x_lock_count( + fil_space_get_latch(IBUF_SPACE_ID, NULL)) == 1); + + /* NOTE: We require that the thread did not own the latch before, + because then we know that we can obey the correct latching order + for ibuf latches */ + + if (!ibuf) { + /* Not yet initialized; not sure if this is possible, but + does no harm to check for it. */ + + return; + } + + /* Free at most a few pages at a time, so that we do not delay the + requested service too much */ + + for (i = 0; i < 4; i++) { + + ibool too_much_free; + + mutex_enter(&ibuf_mutex); + too_much_free = ibuf_data_too_much_free(); + mutex_exit(&ibuf_mutex); + + if (!too_much_free) { + return; + } + + ibuf_remove_free_page(); + } +} + +#ifdef UNIV_DEBUG +# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \ + ibuf_get_merge_page_nos_func(contract,rec,mtr,ids,vers,pages,n_stored) +#else /* UNIV_DEBUG */ +# define ibuf_get_merge_page_nos(contract,rec,mtr,ids,vers,pages,n_stored) \ + ibuf_get_merge_page_nos_func(contract,rec,ids,vers,pages,n_stored) +#endif /* UNIV_DEBUG */ + +/*********************************************************************//** +Reads page numbers from a leaf in an ibuf tree. +@return a lower limit for the combined volume of records which will be +merged */ +static +ulint +ibuf_get_merge_page_nos_func( +/*=========================*/ + ibool contract,/*!< in: TRUE if this function is called to + contract the tree, FALSE if this is called + when a single page becomes full and we look + if it pays to read also nearby pages */ + const rec_t* rec, /*!< in: insert buffer record */ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction holding rec */ +#endif /* UNIV_DEBUG */ + ulint* space_ids,/*!< in/out: space id's of the pages */ + ib_int64_t* space_versions,/*!< in/out: tablespace version + timestamps; used to prevent reading in old + pages after DISCARD + IMPORT tablespace */ + ulint* page_nos,/*!< in/out: buffer for at least + IBUF_MAX_N_PAGES_MERGED many page numbers; + the page numbers are in an ascending order */ + ulint* n_stored)/*!< out: number of page numbers stored to + page_nos in this function */ +{ + ulint prev_page_no; + ulint prev_space_id; + ulint first_page_no; + ulint first_space_id; + ulint rec_page_no; + ulint rec_space_id; + ulint sum_volumes; + ulint volume_for_page; + ulint rec_volume; + ulint limit; + ulint n_pages; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + + *n_stored = 0; + + limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool_get_curr_size() / 4); + + if (page_rec_is_supremum(rec)) { + + rec = page_rec_get_prev_const(rec); + } + + if (page_rec_is_infimum(rec)) { + + rec = page_rec_get_next_const(rec); + } + + if (page_rec_is_supremum(rec)) { + + return(0); + } + + first_page_no = ibuf_rec_get_page_no(mtr, rec); + first_space_id = ibuf_rec_get_space(mtr, rec); + n_pages = 0; + prev_page_no = 0; + prev_space_id = 0; + + /* Go backwards from the first rec until we reach the border of the + 'merge area', or the page start or the limit of storeable pages is + reached */ + + while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) { + + rec_page_no = ibuf_rec_get_page_no(mtr, rec); + rec_space_id = ibuf_rec_get_space(mtr, rec); + + if (rec_space_id != first_space_id + || (rec_page_no / IBUF_MERGE_AREA) + != (first_page_no / IBUF_MERGE_AREA)) { + + break; + } + + if (rec_page_no != prev_page_no + || rec_space_id != prev_space_id) { + n_pages++; + } + + prev_page_no = rec_page_no; + prev_space_id = rec_space_id; + + rec = page_rec_get_prev_const(rec); + } + + rec = page_rec_get_next_const(rec); + + /* At the loop start there is no prev page; we mark this with a pair + of space id, page no (0, 0) for which there can never be entries in + the insert buffer */ + + prev_page_no = 0; + prev_space_id = 0; + sum_volumes = 0; + volume_for_page = 0; + + while (*n_stored < limit) { + if (page_rec_is_supremum(rec)) { + /* When no more records available, mark this with + another 'impossible' pair of space id, page no */ + rec_page_no = 1; + rec_space_id = 0; + } else { + rec_page_no = ibuf_rec_get_page_no(mtr, rec); + rec_space_id = ibuf_rec_get_space(mtr, rec); + /* In the system tablespace, the smallest + possible secondary index leaf page number is + bigger than IBUF_TREE_ROOT_PAGE_NO (4). In + other tablespaces, the clustered index tree is + created at page 3, which makes page 4 the + smallest possible secondary index leaf page + (and that only after DROP INDEX). */ + ut_ad(rec_page_no + > (ulint) IBUF_TREE_ROOT_PAGE_NO + - (rec_space_id != 0)); + } + +#ifdef UNIV_IBUF_DEBUG + ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED); +#endif + if ((rec_space_id != prev_space_id + || rec_page_no != prev_page_no) + && (prev_space_id != 0 || prev_page_no != 0)) { + + if (contract + || (prev_page_no == first_page_no + && prev_space_id == first_space_id) + || (volume_for_page + > ((IBUF_MERGE_THRESHOLD - 1) + * 4 * UNIV_PAGE_SIZE + / IBUF_PAGE_SIZE_PER_FREE_SPACE) + / IBUF_MERGE_THRESHOLD)) { + + space_ids[*n_stored] = prev_space_id; + space_versions[*n_stored] + = fil_space_get_version(prev_space_id); + page_nos[*n_stored] = prev_page_no; + + (*n_stored)++; + + sum_volumes += volume_for_page; + } + + if (rec_space_id != first_space_id + || rec_page_no / IBUF_MERGE_AREA + != first_page_no / IBUF_MERGE_AREA) { + + break; + } + + volume_for_page = 0; + } + + if (rec_page_no == 1 && rec_space_id == 0) { + /* Supremum record */ + + break; + } + + rec_volume = ibuf_rec_get_volume(mtr, rec); + + volume_for_page += rec_volume; + + prev_page_no = rec_page_no; + prev_space_id = rec_space_id; + + rec = page_rec_get_next_const(rec); + } + +#ifdef UNIV_IBUF_DEBUG + ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED); +#endif +#if 0 + fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n", + *n_stored, sum_volumes); +#endif + return(sum_volumes); +} + +/*******************************************************************//** +Get the matching records for space id. +@return current rec or NULL */ +static __attribute__((nonnull, warn_unused_result)) +const rec_t* +ibuf_get_user_rec( +/*===============*/ + btr_pcur_t* pcur, /*!< in: the current cursor */ + mtr_t* mtr) /*!< in: mini transaction */ +{ + do { + const rec_t* rec = btr_pcur_get_rec(pcur); + + if (page_rec_is_user_rec(rec)) { + return(rec); + } + } while (btr_pcur_move_to_next(pcur, mtr)); + + return(NULL); +} + +/*********************************************************************//** +Reads page numbers for a space id from an ibuf tree. +@return a lower limit for the combined volume of records which will be +merged */ +static __attribute__((nonnull, warn_unused_result)) +ulint +ibuf_get_merge_pages( +/*=================*/ + btr_pcur_t* pcur, /*!< in/out: cursor */ + ulint space, /*!< in: space for which to merge */ + ulint limit, /*!< in: max page numbers to read */ + ulint* pages, /*!< out: pages read */ + ulint* spaces, /*!< out: spaces read */ + ib_int64_t* versions,/*!< out: space versions read */ + ulint* n_pages,/*!< out: number of pages read */ + mtr_t* mtr) /*!< in: mini transaction */ +{ + const rec_t* rec; + ulint volume = 0; + ib_int64_t version = fil_space_get_version(space); + + ut_a(space != ULINT_UNDEFINED); + + *n_pages = 0; + + while ((rec = ibuf_get_user_rec(pcur, mtr)) != 0 + && ibuf_rec_get_space(mtr, rec) == space + && *n_pages < limit) { + + ulint page_no = ibuf_rec_get_page_no(mtr, rec); + + if (*n_pages == 0 || pages[*n_pages - 1] != page_no) { + spaces[*n_pages] = space; + pages[*n_pages] = page_no; + versions[*n_pages] = version; + ++*n_pages; + } + + volume += ibuf_rec_get_volume(mtr, rec); + + btr_pcur_move_to_next(pcur, mtr); + } + + return(volume); +} + +/*********************************************************************//** +Contracts insert buffer trees by reading pages to the buffer pool. +@return a lower limit for the combined size in bytes of entries which +will be merged from ibuf trees to the pages read, 0 if ibuf is +empty */ +static +ulint +ibuf_merge_pages( +/*=============*/ + ulint* n_pages, /*!< out: number of pages to which merged */ + bool sync) /*!< in: true if the caller wants to wait for + the issued read with the highest tablespace + address to complete */ +{ + mtr_t mtr; + btr_pcur_t pcur; + ulint sum_sizes; + ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; + ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; + ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; + + *n_pages = 0; + + ibuf_mtr_start(&mtr); + + /* Open a cursor to a randomly chosen leaf of the tree, at a random + position within the leaf */ + + btr_pcur_open_at_rnd_pos(ibuf->index, BTR_SEARCH_LEAF, &pcur, &mtr); + + ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); + + if (page_is_empty(btr_pcur_get_page(&pcur))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + ut_ad(ibuf->empty); + ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) + == IBUF_SPACE_ID); + ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + + return(0); + } + + sum_sizes = ibuf_get_merge_page_nos(TRUE, + btr_pcur_get_rec(&pcur), &mtr, + space_ids, space_versions, + page_nos, n_pages); +#if 0 /* defined UNIV_IBUF_DEBUG */ + fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", + sync, *n_pages, sum_sizes); +#endif + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + + buf_read_ibuf_merge_pages( + sync, space_ids, space_versions, page_nos, *n_pages); + + return(sum_sizes + 1); +} + +/*********************************************************************//** +Get the table instance from the table id. +@return table instance */ +static __attribute__((warn_unused_result)) +dict_table_t* +ibuf_get_table( +/*===========*/ + table_id_t table_id) /*!< in: valid table id */ +{ + rw_lock_s_lock_func(&dict_operation_lock, 0, __FILE__, __LINE__); + + dict_table_t* table = dict_table_open_on_id( + table_id, FALSE, DICT_TABLE_OP_NORMAL); + + rw_lock_s_unlock_gen(&dict_operation_lock, 0); + + return(table); +} + +/*********************************************************************//** +Contracts insert buffer trees by reading pages to the buffer pool. +@return a lower limit for the combined size in bytes of entries which +will be merged from ibuf trees to the pages read, 0 if ibuf is +empty */ +static +ulint +ibuf_merge_space( +/*=============*/ + ulint space, /*!< in: tablespace id to merge */ + ulint* n_pages)/*!< out: number of pages to which merged */ +{ + mtr_t mtr; + btr_pcur_t pcur; + mem_heap_t* heap = mem_heap_create(512); + dtuple_t* tuple = ibuf_search_tuple_build(space, 0, heap); + + ibuf_mtr_start(&mtr); + + /* Position the cursor on the first matching record. */ + + btr_pcur_open( + ibuf->index, tuple, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, + &mtr); + + mem_heap_free(heap); + + ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); + + ulint sum_sizes = 0; + ulint pages[IBUF_MAX_N_PAGES_MERGED]; + ulint spaces[IBUF_MAX_N_PAGES_MERGED]; + ib_int64_t versions[IBUF_MAX_N_PAGES_MERGED]; + + if (page_is_empty(btr_pcur_get_page(&pcur))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + ut_ad(ibuf->empty); + ut_ad(page_get_space_id(btr_pcur_get_page(&pcur)) + == IBUF_SPACE_ID); + ut_ad(page_get_page_no(btr_pcur_get_page(&pcur)) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + } else { + + sum_sizes = ibuf_get_merge_pages( + &pcur, space, IBUF_MAX_N_PAGES_MERGED, + &pages[0], &spaces[0], &versions[0], n_pages, + &mtr); + + ++sum_sizes; + } + + ibuf_mtr_commit(&mtr); + + btr_pcur_close(&pcur); + + if (sum_sizes > 0) { + + ut_a(*n_pages > 0 || sum_sizes == 1); + +#ifdef UNIV_DEBUG + ut_ad(*n_pages <= UT_ARR_SIZE(pages)); + + for (ulint i = 0; i < *n_pages; ++i) { + ut_ad(spaces[i] == space); + ut_ad(i == 0 || versions[i] == versions[i - 1]); + } +#endif /* UNIV_DEBUG */ + + buf_read_ibuf_merge_pages( + true, spaces, versions, pages, *n_pages); + } + + return(sum_sizes); +} + +/*********************************************************************//** +Contracts insert buffer trees by reading pages to the buffer pool. +@return a lower limit for the combined size in bytes of entries which +will be merged from ibuf trees to the pages read, 0 if ibuf is +empty */ +static __attribute__((nonnull, warn_unused_result)) +ulint +ibuf_merge( +/*=======*/ + table_id_t table_id, /*!< in: if merge should be + done only for a specific + table, for all tables this + should be 0 */ + ulint* n_pages, /*!< out: number of pages to + which merged */ + bool sync) /*!< in: TRUE if the caller + wants to wait for the issued + read with the highest + tablespace address to complete */ +{ + dict_table_t* table; + + *n_pages = 0; + + /* We perform a dirty read of ibuf->empty, without latching + the insert buffer root page. We trust this dirty read except + when a slow shutdown is being executed. During a slow + shutdown, the insert buffer merge must be completed. */ + + if (ibuf->empty && !srv_shutdown_state) { + return(0); +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + } else if (ibuf_debug) { + return(0); +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + } else if (table_id == 0) { + return(ibuf_merge_pages(n_pages, sync)); + } else if ((table = ibuf_get_table(table_id)) == 0) { + /* Table has been dropped. */ + return(0); + } + + ulint volume = ibuf_merge_space(table->space, n_pages); + + dict_table_close(table, FALSE, FALSE); + + return(volume); +} + +/*********************************************************************//** +Contracts insert buffer trees by reading pages to the buffer pool. +@return a lower limit for the combined size in bytes of entries which +will be merged from ibuf trees to the pages read, 0 if ibuf is +empty */ +static +ulint +ibuf_contract( +/*==========*/ + ibool sync) /*!< in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +{ + ulint n_pages; + + return(ibuf_merge(0, &n_pages, sync)); +} + +/*********************************************************************//** +Contracts insert buffer trees by reading pages to the buffer pool. +@return a lower limit for the combined size in bytes of entries which +will be merged from ibuf trees to the pages read, 0 if ibuf is +empty */ +UNIV_INTERN +ulint +ibuf_contract_in_background( +/*========================*/ + table_id_t table_id, /*!< in: if merge should be done only + for a specific table, for all tables + this should be 0 */ + ibool full) /*!< in: TRUE if the caller wants to + do a full contract based on PCT_IO(100). + If FALSE then the size of contract + batch is determined based on the + current size of the ibuf tree. */ +{ + ulint sum_bytes = 0; + ulint sum_pages = 0; + ulint n_pag2; + ulint n_pages; + +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if (srv_ibuf_disable_background_merge && table_id == 0) { + return(0); + } +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + + if (full) { + /* Caller has requested a full batch */ + n_pages = PCT_IO(100); + } else { + /* By default we do a batch of 5% of the io_capacity */ + n_pages = PCT_IO(5); + + mutex_enter(&ibuf_mutex); + + /* If the ibuf->size is more than half the max_size + then we make more agreesive contraction. + +1 is to avoid division by zero. */ + if (ibuf->size > ibuf->max_size / 2) { + ulint diff = ibuf->size - ibuf->max_size / 2; + n_pages += PCT_IO((diff * 100) + / (ibuf->max_size + 1)); + } + + mutex_exit(&ibuf_mutex); + } + + while (sum_pages < n_pages) { + ulint n_bytes; + + n_bytes = ibuf_merge(table_id, &n_pag2, FALSE); + + if (n_bytes == 0) { + return(sum_bytes); + } + + sum_bytes += n_bytes; + sum_pages += n_pag2; + } + + return(sum_bytes); +} + +/*********************************************************************//** +Contract insert buffer trees after insert if they are too big. */ +UNIV_INLINE +void +ibuf_contract_after_insert( +/*=======================*/ + ulint entry_size) /*!< in: size of a record which was inserted + into an ibuf tree */ +{ + ibool sync; + ulint sum_sizes; + ulint size; + ulint max_size; + + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. ibuf->max_size remains constant + after ibuf_init_at_db_start(), but ibuf->size should be + protected by ibuf_mutex. Given that ibuf->size fits in a + machine word, this should be OK; at worst we are doing some + excessive ibuf_contract() or occasionally skipping a + ibuf_contract(). */ + size = ibuf->size; + max_size = ibuf->max_size; + + if (size < max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { + return; + } + + sync = (size >= max_size + IBUF_CONTRACT_ON_INSERT_SYNC); + + /* Contract at least entry_size many bytes */ + sum_sizes = 0; + size = 1; + + do { + + size = ibuf_contract(sync); + sum_sizes += size; + } while (size > 0 && sum_sizes < entry_size); +} + +/*********************************************************************//** +Determine if an insert buffer record has been encountered already. +@return TRUE if a new record, FALSE if possible duplicate */ +static +ibool +ibuf_get_volume_buffered_hash( +/*==========================*/ + const rec_t* rec, /*!< in: ibuf record in post-4.1 format */ + const byte* types, /*!< in: fields */ + const byte* data, /*!< in: start of user record data */ + ulint comp, /*!< in: 0=ROW_FORMAT=REDUNDANT, + nonzero=ROW_FORMAT=COMPACT */ + ulint* hash, /*!< in/out: hash array */ + ulint size) /*!< in: number of elements in hash array */ +{ + ulint len; + ulint fold; + ulint bitmask; + + len = ibuf_rec_get_size( + rec, types, + rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp); + fold = ut_fold_binary(data, len); + + hash += (fold / (CHAR_BIT * sizeof *hash)) % size; + bitmask = static_cast<ulint>( + 1 << (fold % (CHAR_BIT * sizeof(*hash)))); + + if (*hash & bitmask) { + + return(FALSE); + } + + /* We have not seen this record yet. Insert it. */ + *hash |= bitmask; + + return(TRUE); +} + +#ifdef UNIV_DEBUG +# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \ + ibuf_get_volume_buffered_count_func(mtr,rec,hash,size,n_recs) +#else /* UNIV_DEBUG */ +# define ibuf_get_volume_buffered_count(mtr,rec,hash,size,n_recs) \ + ibuf_get_volume_buffered_count_func(rec,hash,size,n_recs) +#endif +/*********************************************************************//** +Update the estimate of the number of records on a page, and +get the space taken by merging the buffered record to the index page. +@return size of index record in bytes + an upper limit of the space +taken in the page directory */ +static +ulint +ibuf_get_volume_buffered_count_func( +/*================================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction owning rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec, /*!< in: insert buffer record */ + ulint* hash, /*!< in/out: hash array */ + ulint size, /*!< in: number of elements in hash array */ + lint* n_recs) /*!< in/out: estimated number of records + on the page that rec points to */ +{ + ulint len; + ibuf_op_t ibuf_op; + const byte* types; + ulint n_fields; + + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(ibuf_inside(mtr)); + + n_fields = rec_get_n_fields_old(rec); + ut_ad(n_fields > IBUF_REC_FIELD_USER); + n_fields -= IBUF_REC_FIELD_USER; + + rec_get_nth_field_offs_old(rec, 1, &len); + /* This function is only invoked when buffering new + operations. All pre-4.1 records should have been merged + when the database was started up. */ + ut_a(len == 1); + + if (rec_get_deleted_flag(rec, 0)) { + /* This record has been merged already, + but apparently the system crashed before + the change was discarded from the buffer. + Pretend that the record does not exist. */ + return(0); + } + + types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); + + switch (UNIV_EXPECT(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE, + IBUF_REC_INFO_SIZE)) { + default: + ut_error; + case 0: + /* This ROW_TYPE=REDUNDANT record does not include an + operation counter. Exclude it from the *n_recs, + because deletes cannot be buffered if there are + old-style inserts buffered for the page. */ + + len = ibuf_rec_get_size(rec, types, n_fields, 0); + + return(len + + rec_get_converted_extra_size(len, n_fields, 0) + + page_dir_calc_reserved_space(1)); + case 1: + /* This ROW_TYPE=COMPACT record does not include an + operation counter. Exclude it from the *n_recs, + because deletes cannot be buffered if there are + old-style inserts buffered for the page. */ + goto get_volume_comp; + + case IBUF_REC_INFO_SIZE: + ibuf_op = (ibuf_op_t) types[IBUF_REC_OFFSET_TYPE]; + break; + } + + switch (ibuf_op) { + case IBUF_OP_INSERT: + /* Inserts can be done by updating a delete-marked record. + Because delete-mark and insert operations can be pointing to + the same records, we must not count duplicates. */ + case IBUF_OP_DELETE_MARK: + /* There must be a record to delete-mark. + See if this record has been already buffered. */ + if (n_recs && ibuf_get_volume_buffered_hash( + rec, types + IBUF_REC_INFO_SIZE, + types + len, + types[IBUF_REC_OFFSET_FLAGS] & IBUF_REC_COMPACT, + hash, size)) { + (*n_recs)++; + } + + if (ibuf_op == IBUF_OP_DELETE_MARK) { + /* Setting the delete-mark flag does not + affect the available space on the page. */ + return(0); + } + break; + case IBUF_OP_DELETE: + /* A record will be removed from the page. */ + if (n_recs) { + (*n_recs)--; + } + /* While deleting a record actually frees up space, + we have to play it safe and pretend that it takes no + additional space (the record might not exist, etc.). */ + return(0); + default: + ut_error; + } + + ut_ad(ibuf_op == IBUF_OP_INSERT); + +get_volume_comp: + { + dtuple_t* entry; + ulint volume; + dict_index_t* dummy_index; + mem_heap_t* heap = mem_heap_create(500); + + entry = ibuf_build_entry_from_ibuf_rec( + mtr, rec, heap, &dummy_index); + + volume = rec_get_converted_size(dummy_index, entry, 0); + + ibuf_dummy_index_free(dummy_index); + mem_heap_free(heap); + + return(volume + page_dir_calc_reserved_space(1)); + } +} + +/*********************************************************************//** +Gets an upper limit for the combined size of entries buffered in the insert +buffer for a given page. +@return upper limit for the volume of buffered inserts for the index +page, in bytes; UNIV_PAGE_SIZE, if the entries for the index page span +several pages in the insert buffer */ +static +ulint +ibuf_get_volume_buffered( +/*=====================*/ + const btr_pcur_t*pcur, /*!< in: pcur positioned at a place in an + insert buffer tree where we would insert an + entry for the index page whose number is + page_no, latch mode has to be BTR_MODIFY_PREV + or BTR_MODIFY_TREE */ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: page number of an index page */ + lint* n_recs, /*!< in/out: minimum number of records on the + page after the buffered changes have been + applied, or NULL to disable the counting */ + mtr_t* mtr) /*!< in: mini-transaction of pcur */ +{ + ulint volume; + const rec_t* rec; + const page_t* page; + ulint prev_page_no; + const page_t* prev_page; + ulint next_page_no; + const page_t* next_page; + /* bitmap of buffered recs */ + ulint hash_bitmap[128 / sizeof(ulint)]; + + ut_ad((pcur->latch_mode == BTR_MODIFY_PREV) + || (pcur->latch_mode == BTR_MODIFY_TREE)); + + /* Count the volume of inserts earlier in the alphabetical order than + pcur */ + + volume = 0; + + if (n_recs) { + memset(hash_bitmap, 0, sizeof hash_bitmap); + } + + rec = btr_pcur_get_rec(pcur); + page = page_align(rec); + ut_ad(page_validate(page, ibuf->index)); + + if (page_rec_is_supremum(rec)) { + rec = page_rec_get_prev_const(rec); + } + + for (; !page_rec_is_infimum(rec); + rec = page_rec_get_prev_const(rec)) { + ut_ad(page_align(rec) == page); + + if (page_no != ibuf_rec_get_page_no(mtr, rec) + || space != ibuf_rec_get_space(mtr, rec)) { + + goto count_later; + } + + volume += ibuf_get_volume_buffered_count( + mtr, rec, + hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + } + + /* Look at the previous page */ + + prev_page_no = btr_page_get_prev(page, mtr); + + if (prev_page_no == FIL_NULL) { + + goto count_later; + } + + { + buf_block_t* block; + + block = buf_page_get( + IBUF_SPACE_ID, 0, prev_page_no, RW_X_LATCH, + mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); + + + prev_page = buf_block_get_frame(block); + ut_ad(page_validate(prev_page, ibuf->index)); + } + +#ifdef UNIV_BTR_DEBUG + ut_a(btr_page_get_next(prev_page, mtr) == page_get_page_no(page)); +#endif /* UNIV_BTR_DEBUG */ + + rec = page_get_supremum_rec(prev_page); + rec = page_rec_get_prev_const(rec); + + for (;; rec = page_rec_get_prev_const(rec)) { + ut_ad(page_align(rec) == prev_page); + + if (page_rec_is_infimum(rec)) { + + /* We cannot go to yet a previous page, because we + do not have the x-latch on it, and cannot acquire one + because of the latching order: we have to give up */ + + return(UNIV_PAGE_SIZE); + } + + if (page_no != ibuf_rec_get_page_no(mtr, rec) + || space != ibuf_rec_get_space(mtr, rec)) { + + goto count_later; + } + + volume += ibuf_get_volume_buffered_count( + mtr, rec, + hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + } + +count_later: + rec = btr_pcur_get_rec(pcur); + + if (!page_rec_is_supremum(rec)) { + rec = page_rec_get_next_const(rec); + } + + for (; !page_rec_is_supremum(rec); + rec = page_rec_get_next_const(rec)) { + if (page_no != ibuf_rec_get_page_no(mtr, rec) + || space != ibuf_rec_get_space(mtr, rec)) { + + return(volume); + } + + volume += ibuf_get_volume_buffered_count( + mtr, rec, + hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + } + + /* Look at the next page */ + + next_page_no = btr_page_get_next(page, mtr); + + if (next_page_no == FIL_NULL) { + + return(volume); + } + + { + buf_block_t* block; + + block = buf_page_get( + IBUF_SPACE_ID, 0, next_page_no, RW_X_LATCH, + mtr); + + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); + + + next_page = buf_block_get_frame(block); + ut_ad(page_validate(next_page, ibuf->index)); + } + +#ifdef UNIV_BTR_DEBUG + ut_a(btr_page_get_prev(next_page, mtr) == page_get_page_no(page)); +#endif /* UNIV_BTR_DEBUG */ + + rec = page_get_infimum_rec(next_page); + rec = page_rec_get_next_const(rec); + + for (;; rec = page_rec_get_next_const(rec)) { + ut_ad(page_align(rec) == next_page); + + if (page_rec_is_supremum(rec)) { + + /* We give up */ + + return(UNIV_PAGE_SIZE); + } + + if (page_no != ibuf_rec_get_page_no(mtr, rec) + || space != ibuf_rec_get_space(mtr, rec)) { + + return(volume); + } + + volume += ibuf_get_volume_buffered_count( + mtr, rec, + hash_bitmap, UT_ARR_SIZE(hash_bitmap), n_recs); + } +} + +/*********************************************************************//** +Reads the biggest tablespace id from the high end of the insert buffer +tree and updates the counter in fil_system. */ +UNIV_INTERN +void +ibuf_update_max_tablespace_id(void) +/*===============================*/ +{ + ulint max_space_id; + const rec_t* rec; + const byte* field; + ulint len; + btr_pcur_t pcur; + mtr_t mtr; + + ut_a(!dict_table_is_comp(ibuf->index->table)); + + ibuf_mtr_start(&mtr); + + btr_pcur_open_at_index_side( + false, ibuf->index, BTR_SEARCH_LEAF, &pcur, true, 0, &mtr); + + ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); + + btr_pcur_move_to_prev(&pcur, &mtr); + + if (btr_pcur_is_before_first_on_page(&pcur)) { + /* The tree is empty */ + + max_space_id = 0; + } else { + rec = btr_pcur_get_rec(&pcur); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); + + ut_a(len == 4); + + max_space_id = mach_read_from_4(field); + } + + ibuf_mtr_commit(&mtr); + + /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */ + + fil_set_max_space_id_if_bigger(max_space_id); +} + +#ifdef UNIV_DEBUG +# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \ + ibuf_get_entry_counter_low_func(mtr,rec,space,page_no) +#else /* UNIV_DEBUG */ +# define ibuf_get_entry_counter_low(mtr,rec,space,page_no) \ + ibuf_get_entry_counter_low_func(rec,space,page_no) +#endif +/****************************************************************//** +Helper function for ibuf_get_entry_counter_func. Checks if rec is for +(space, page_no), and if so, reads counter value from it and returns +that + 1. +@retval ULINT_UNDEFINED if the record does not contain any counter +@retval 0 if the record is not for (space, page_no) +@retval 1 + previous counter value, otherwise */ +static +ulint +ibuf_get_entry_counter_low_func( +/*============================*/ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction of rec */ +#endif /* UNIV_DEBUG */ + const rec_t* rec, /*!< in: insert buffer record */ + ulint space, /*!< in: space id */ + ulint page_no) /*!< in: page number */ +{ + ulint counter; + const byte* field; + ulint len; + + ut_ad(ibuf_inside(mtr)); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_S_FIX)); + ut_ad(rec_get_n_fields_old(rec) > 2); + + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len); + + ut_a(len == 1); + + /* Check the tablespace identifier. */ + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len); + + ut_a(len == 4); + + if (mach_read_from_4(field) != space) { + + return(0); + } + + /* Check the page offset. */ + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len); + ut_a(len == 4); + + if (mach_read_from_4(field) != page_no) { + + return(0); + } + + /* Check if the record contains a counter field. */ + field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len); + + switch (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + default: + ut_error; + case 0: /* ROW_FORMAT=REDUNDANT */ + case 1: /* ROW_FORMAT=COMPACT */ + return(ULINT_UNDEFINED); + + case IBUF_REC_INFO_SIZE: + counter = mach_read_from_2(field + IBUF_REC_OFFSET_COUNTER); + ut_a(counter < 0xFFFF); + return(counter + 1); + } +} + +#ifdef UNIV_DEBUG +# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ + ibuf_get_entry_counter_func(space,page_no,rec,mtr,exact_leaf) +#else /* UNIV_DEBUG */ +# define ibuf_get_entry_counter(space,page_no,rec,mtr,exact_leaf) \ + ibuf_get_entry_counter_func(space,page_no,rec,exact_leaf) +#endif + +/****************************************************************//** +Calculate the counter field for an entry based on the current +last record in ibuf for (space, page_no). +@return the counter field, or ULINT_UNDEFINED +if we should abort this insertion to ibuf */ +static +ulint +ibuf_get_entry_counter_func( +/*========================*/ + ulint space, /*!< in: space id of entry */ + ulint page_no, /*!< in: page number of entry */ + const rec_t* rec, /*!< in: the record preceding the + insertion point */ +#ifdef UNIV_DEBUG + mtr_t* mtr, /*!< in: mini-transaction */ +#endif /* UNIV_DEBUG */ + ibool only_leaf) /*!< in: TRUE if this is the only + leaf page that can contain entries + for (space,page_no), that is, there + was no exact match for (space,page_no) + in the node pointer */ +{ + ut_ad(ibuf_inside(mtr)); + ut_ad(mtr_memo_contains_page(mtr, rec, MTR_MEMO_PAGE_X_FIX)); + ut_ad(page_validate(page_align(rec), ibuf->index)); + + if (page_rec_is_supremum(rec)) { + /* This is just for safety. The record should be a + page infimum or a user record. */ + ut_ad(0); + return(ULINT_UNDEFINED); + } else if (!page_rec_is_infimum(rec)) { + return(ibuf_get_entry_counter_low(mtr, rec, space, page_no)); + } else if (only_leaf + || fil_page_get_prev(page_align(rec)) == FIL_NULL) { + /* The parent node pointer did not contain the + searched for (space, page_no), which means that the + search ended on the correct page regardless of the + counter value, and since we're at the infimum record, + there are no existing records. */ + return(0); + } else { + /* We used to read the previous page here. It would + break the latching order, because the caller has + buffer-fixed an insert buffer bitmap page. */ + return(ULINT_UNDEFINED); + } +} + +/*********************************************************************//** +Buffer an operation in the insert/delete buffer, instead of doing it +directly to the disk page, if this is possible. +@return DB_SUCCESS, DB_STRONG_FAIL or other error */ +static __attribute__((nonnull, warn_unused_result)) +dberr_t +ibuf_insert_low( +/*============*/ + ulint mode, /*!< in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */ + ibuf_op_t op, /*!< in: operation type */ + ibool no_counter, + /*!< in: TRUE=use 5.0.3 format; + FALSE=allow delete buffering */ + const dtuple_t* entry, /*!< in: index entry to insert */ + ulint entry_size, + /*!< in: rec_get_converted_size(index, entry) */ + dict_index_t* index, /*!< in: index where to insert; must not be + unique or clustered */ + ulint space, /*!< in: space id where to insert */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint page_no,/*!< in: page number where to insert */ + que_thr_t* thr) /*!< in: query thread */ +{ + big_rec_t* dummy_big_rec; + btr_pcur_t pcur; + btr_cur_t* cursor; + dtuple_t* ibuf_entry; + mem_heap_t* offsets_heap = NULL; + mem_heap_t* heap; + ulint* offsets = NULL; + ulint buffered; + lint min_n_recs; + rec_t* ins_rec; + ibool old_bit_value; + page_t* bitmap_page; + buf_block_t* block; + page_t* root; + dberr_t err; + ibool do_merge; + ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; + ib_int64_t space_versions[IBUF_MAX_N_PAGES_MERGED]; + ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; + ulint n_stored; + mtr_t mtr; + mtr_t bitmap_mtr; + + ut_a(!dict_index_is_clust(index)); + ut_ad(dtuple_check_typed(entry)); + ut_ad(ut_is_2pow(zip_size)); + ut_ad(!no_counter || op == IBUF_OP_INSERT); + ut_a(op < IBUF_OP_COUNT); + + do_merge = FALSE; + + /* Perform dirty reads of ibuf->size and ibuf->max_size, to + reduce ibuf_mutex contention. Given that ibuf->max_size and + ibuf->size fit in a machine word, this should be OK; at worst + we are doing some excessive ibuf_contract() or occasionally + skipping an ibuf_contract(). */ + if (ibuf->max_size == 0) { + return(DB_STRONG_FAIL); + } + + if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { + /* Insert buffer is now too big, contract it but do not try + to insert */ + + +#ifdef UNIV_IBUF_DEBUG + fputs("Ibuf too big\n", stderr); +#endif + /* Use synchronous contract (== TRUE) */ + ibuf_contract(TRUE); + + return(DB_STRONG_FAIL); + } + + heap = mem_heap_create(1024); + + /* Build the entry which contains the space id and the page number + as the first fields and the type information for other fields, and + which will be inserted to the insert buffer. Using a counter value + of 0xFFFF we find the last record for (space, page_no), from which + we can then read the counter value N and use N + 1 in the record we + insert. (We patch the ibuf_entry's counter field to the correct + value just before actually inserting the entry.) */ + + ibuf_entry = ibuf_entry_build( + op, index, entry, space, page_no, + no_counter ? ULINT_UNDEFINED : 0xFFFF, heap); + + /* Open a cursor to the insert buffer tree to calculate if we can add + the new entry to it without exceeding the free space limit for the + page. */ + + if (mode == BTR_MODIFY_TREE) { + for (;;) { + mutex_enter(&ibuf_pessimistic_insert_mutex); + mutex_enter(&ibuf_mutex); + + if (UNIV_LIKELY(ibuf_data_enough_free_for_insert())) { + + break; + } + + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + + if (UNIV_UNLIKELY(!ibuf_add_free_page())) { + + mem_heap_free(heap); + return(DB_STRONG_FAIL); + } + } + } + + ibuf_mtr_start(&mtr); + + btr_pcur_open(ibuf->index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); + ut_ad(page_validate(btr_pcur_get_page(&pcur), ibuf->index)); + + /* Find out the volume of already buffered inserts for the same index + page */ + min_n_recs = 0; + buffered = ibuf_get_volume_buffered(&pcur, space, page_no, + op == IBUF_OP_DELETE + ? &min_n_recs + : NULL, &mtr); + + if (op == IBUF_OP_DELETE + && (min_n_recs < 2 + || buf_pool_watch_occurred(space, page_no))) { + /* The page could become empty after the record is + deleted, or the page has been read in to the buffer + pool. Refuse to buffer the operation. */ + + /* The buffer pool watch is needed for IBUF_OP_DELETE + because of latching order considerations. We can + check buf_pool_watch_occurred() only after latching + the insert buffer B-tree pages that contain buffered + changes for the page. We never buffer IBUF_OP_DELETE, + unless some IBUF_OP_INSERT or IBUF_OP_DELETE_MARK have + been previously buffered for the page. Because there + are buffered operations for the page, the insert + buffer B-tree page latches held by mtr will guarantee + that no changes for the user page will be merged + before mtr_commit(&mtr). We must not mtr_commit(&mtr) + until after the IBUF_OP_DELETE has been buffered. */ + +fail_exit: + if (mode == BTR_MODIFY_TREE) { + mutex_exit(&ibuf_mutex); + mutex_exit(&ibuf_pessimistic_insert_mutex); + } + + err = DB_STRONG_FAIL; + goto func_exit; + } + + /* After this point, the page could still be loaded to the + buffer pool, but we do not have to care about it, since we are + holding a latch on the insert buffer leaf page that contains + buffered changes for (space, page_no). If the page enters the + buffer pool, buf_page_io_complete() for (space, page_no) will + have to acquire a latch on the same insert buffer leaf page, + which it cannot do until we have buffered the IBUF_OP_DELETE + and done mtr_commit(&mtr) to release the latch. */ + +#ifdef UNIV_IBUF_COUNT_DEBUG + ut_a((buffered == 0) || ibuf_count_get(space, page_no)); +#endif + ibuf_mtr_start(&bitmap_mtr); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, + zip_size, &bitmap_mtr); + + /* We check if the index page is suitable for buffered entries */ + + if (buf_page_peek(space, page_no) + || lock_rec_expl_exist_on_page(space, page_no)) { + + ibuf_mtr_commit(&bitmap_mtr); + goto fail_exit; + } + + if (op == IBUF_OP_INSERT) { + ulint bits = ibuf_bitmap_page_get_bits( + bitmap_page, page_no, zip_size, IBUF_BITMAP_FREE, + &bitmap_mtr); + + if (buffered + entry_size + page_dir_calc_reserved_space(1) + > ibuf_index_page_calc_free_from_bits(zip_size, bits)) { + /* Release the bitmap page latch early. */ + ibuf_mtr_commit(&bitmap_mtr); + + /* It may not fit */ + do_merge = TRUE; + + ibuf_get_merge_page_nos(FALSE, + btr_pcur_get_rec(&pcur), &mtr, + space_ids, space_versions, + page_nos, &n_stored); + + goto fail_exit; + } + } + + if (!no_counter) { + /* Patch correct counter value to the entry to + insert. This can change the insert position, which can + result in the need to abort in some cases. */ + ulint counter = ibuf_get_entry_counter( + space, page_no, btr_pcur_get_rec(&pcur), &mtr, + btr_pcur_get_btr_cur(&pcur)->low_match + < IBUF_REC_FIELD_METADATA); + dfield_t* field; + + if (counter == ULINT_UNDEFINED) { + ibuf_mtr_commit(&bitmap_mtr); + goto fail_exit; + } + + field = dtuple_get_nth_field( + ibuf_entry, IBUF_REC_FIELD_METADATA); + mach_write_to_2( + (byte*) dfield_get_data(field) + + IBUF_REC_OFFSET_COUNTER, counter); + } + + /* Set the bitmap bit denoting that the insert buffer contains + buffered entries for this index page, if the bit is not set yet */ + + old_bit_value = ibuf_bitmap_page_get_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_BUFFERED, &bitmap_mtr); + + if (!old_bit_value) { + ibuf_bitmap_page_set_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_BUFFERED, TRUE, + &bitmap_mtr); + } + + ibuf_mtr_commit(&bitmap_mtr); + + cursor = btr_pcur_get_btr_cur(&pcur); + + if (mode == BTR_MODIFY_PREV) { + err = btr_cur_optimistic_insert( + BTR_NO_LOCKING_FLAG, + cursor, &offsets, &offsets_heap, + ibuf_entry, &ins_rec, + &dummy_big_rec, 0, thr, &mtr); + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + + /* If this is the root page, update ibuf->empty. */ + if (UNIV_UNLIKELY(buf_block_get_page_no(block) + == FSP_IBUF_TREE_ROOT_PAGE_NO)) { + const page_t* root = buf_block_get_frame(block); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + ibuf->empty = page_is_empty(root); + } + } else { + ut_ad(mode == BTR_MODIFY_TREE); + + /* We acquire an x-latch to the root page before the insert, + because a pessimistic insert releases the tree x-latch, + which would cause the x-latching of the root after that to + break the latching order. */ + + root = ibuf_tree_root_get(&mtr); + + err = btr_cur_optimistic_insert( + BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, + cursor, &offsets, &offsets_heap, + ibuf_entry, &ins_rec, + &dummy_big_rec, 0, thr, &mtr); + + if (err == DB_FAIL) { + err = btr_cur_pessimistic_insert( + BTR_NO_LOCKING_FLAG | BTR_NO_UNDO_LOG_FLAG, + cursor, &offsets, &offsets_heap, + ibuf_entry, &ins_rec, + &dummy_big_rec, 0, thr, &mtr); + } + + mutex_exit(&ibuf_pessimistic_insert_mutex); + ibuf_size_update(root, &mtr); + mutex_exit(&ibuf_mutex); + ibuf->empty = page_is_empty(root); + + block = btr_cur_get_block(cursor); + ut_ad(buf_block_get_space(block) == IBUF_SPACE_ID); + } + + if (offsets_heap) { + mem_heap_free(offsets_heap); + } + + if (err == DB_SUCCESS && op != IBUF_OP_DELETE) { + /* Update the page max trx id field */ + page_update_max_trx_id(block, NULL, + thr_get_trx(thr)->id, &mtr); + } + +func_exit: +#ifdef UNIV_IBUF_COUNT_DEBUG + if (err == DB_SUCCESS) { + fprintf(stderr, + "Incrementing ibuf count of space %lu page %lu\n" + "from %lu by 1\n", space, page_no, + ibuf_count_get(space, page_no)); + + ibuf_count_set(space, page_no, + ibuf_count_get(space, page_no) + 1); + } +#endif + + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + + mem_heap_free(heap); + + if (err == DB_SUCCESS && mode == BTR_MODIFY_TREE) { + ibuf_contract_after_insert(entry_size); + } + + if (do_merge) { +#ifdef UNIV_IBUF_DEBUG + ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); +#endif + buf_read_ibuf_merge_pages(false, space_ids, space_versions, + page_nos, n_stored); + } + + return(err); +} + +/*********************************************************************//** +Buffer an operation in the insert/delete buffer, instead of doing it +directly to the disk page, if this is possible. Does not do it if the index +is clustered or unique. +@return TRUE if success */ +UNIV_INTERN +ibool +ibuf_insert( +/*========*/ + ibuf_op_t op, /*!< in: operation type */ + const dtuple_t* entry, /*!< in: index entry to insert */ + dict_index_t* index, /*!< in: index where to insert */ + ulint space, /*!< in: space id where to insert */ + ulint zip_size,/*!< in: compressed page size in bytes, or 0 */ + ulint page_no,/*!< in: page number where to insert */ + que_thr_t* thr) /*!< in: query thread */ +{ + dberr_t err; + ulint entry_size; + ibool no_counter; + /* Read the settable global variable ibuf_use only once in + this function, so that we will have a consistent view of it. */ + ibuf_use_t use = ibuf_use; + DBUG_ENTER("ibuf_insert"); + + DBUG_PRINT("ibuf", ("op: %d, space: %ld, page_no: %ld", + op, space, page_no)); + + ut_ad(dtuple_check_typed(entry)); + ut_ad(ut_is_2pow(zip_size)); + + ut_a(!dict_index_is_clust(index)); + + no_counter = use <= IBUF_USE_INSERT; + + switch (op) { + case IBUF_OP_INSERT: + switch (use) { + case IBUF_USE_NONE: + case IBUF_USE_DELETE: + case IBUF_USE_DELETE_MARK: + DBUG_RETURN(FALSE); + case IBUF_USE_INSERT: + case IBUF_USE_INSERT_DELETE_MARK: + case IBUF_USE_ALL: + goto check_watch; + case IBUF_USE_COUNT: + break; + } + break; + case IBUF_OP_DELETE_MARK: + switch (use) { + case IBUF_USE_NONE: + case IBUF_USE_INSERT: + DBUG_RETURN(FALSE); + case IBUF_USE_DELETE_MARK: + case IBUF_USE_DELETE: + case IBUF_USE_INSERT_DELETE_MARK: + case IBUF_USE_ALL: + ut_ad(!no_counter); + goto check_watch; + case IBUF_USE_COUNT: + break; + } + break; + case IBUF_OP_DELETE: + switch (use) { + case IBUF_USE_NONE: + case IBUF_USE_INSERT: + case IBUF_USE_INSERT_DELETE_MARK: + DBUG_RETURN(FALSE); + case IBUF_USE_DELETE_MARK: + case IBUF_USE_DELETE: + case IBUF_USE_ALL: + ut_ad(!no_counter); + goto skip_watch; + case IBUF_USE_COUNT: + break; + } + break; + case IBUF_OP_COUNT: + break; + } + + /* unknown op or use */ + ut_error; + +check_watch: + /* If a thread attempts to buffer an insert on a page while a + purge is in progress on the same page, the purge must not be + buffered, because it could remove a record that was + re-inserted later. For simplicity, we block the buffering of + all operations on a page that has a purge pending. + + We do not check this in the IBUF_OP_DELETE case, because that + would always trigger the buffer pool watch during purge and + thus prevent the buffering of delete operations. We assume + that the issuer of IBUF_OP_DELETE has called + buf_pool_watch_set(space, page_no). */ + + { + buf_page_t* bpage; + buf_pool_t* buf_pool = buf_pool_get(space, page_no); + bpage = buf_page_get_also_watch(buf_pool, space, page_no); + + if (UNIV_LIKELY_NULL(bpage)) { + /* A buffer pool watch has been set or the + page has been read into the buffer pool. + Do not buffer the request. If a purge operation + is being buffered, have this request executed + directly on the page in the buffer pool after the + buffered entries for this page have been merged. */ + DBUG_RETURN(FALSE); + } + } + +skip_watch: + entry_size = rec_get_converted_size(index, entry, 0); + + if (entry_size + >= page_get_free_space_of_empty(dict_table_is_comp(index->table)) + / 2) { + + DBUG_RETURN(FALSE); + } + + err = ibuf_insert_low(BTR_MODIFY_PREV, op, no_counter, + entry, entry_size, + index, space, zip_size, page_no, thr); + if (err == DB_FAIL) { + err = ibuf_insert_low(BTR_MODIFY_TREE, op, no_counter, + entry, entry_size, + index, space, zip_size, page_no, thr); + } + + if (err == DB_SUCCESS) { +#ifdef UNIV_IBUF_DEBUG + /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n", + page_no, index->name); */ +#endif + DBUG_RETURN(TRUE); + + } else { + ut_a(err == DB_STRONG_FAIL || err == DB_TOO_BIG_RECORD); + + DBUG_RETURN(FALSE); + } +} + +/********************************************************************//** +During merge, inserts to an index page a secondary index entry extracted +from the insert buffer. +@return newly inserted record */ +static __attribute__((nonnull)) +rec_t* +ibuf_insert_to_index_page_low( +/*==========================*/ + const dtuple_t* entry, /*!< in: buffered entry to insert */ + buf_block_t* block, /*!< in/out: index page where the buffered + entry should be placed */ + dict_index_t* index, /*!< in: record descriptor */ + ulint** offsets,/*!< out: offsets on *rec */ + mem_heap_t* heap, /*!< in/out: memory heap */ + mtr_t* mtr, /*!< in/out: mtr */ + page_cur_t* page_cur)/*!< in/out: cursor positioned on the record + after which to insert the buffered entry */ +{ + const page_t* page; + ulint space; + ulint page_no; + ulint zip_size; + const page_t* bitmap_page; + ulint old_bits; + rec_t* rec; + DBUG_ENTER("ibuf_insert_to_index_page_low"); + + rec = page_cur_tuple_insert(page_cur, entry, index, + offsets, &heap, 0, mtr); + if (rec != NULL) { + DBUG_RETURN(rec); + } + + /* Page reorganization or recompression should already have + been attempted by page_cur_tuple_insert(). Besides, per + ibuf_index_page_calc_free_zip() the page should not have been + recompressed or reorganized. */ + ut_ad(!buf_block_get_page_zip(block)); + + /* If the record did not fit, reorganize */ + + btr_page_reorganize(page_cur, index, mtr); + + /* This time the record must fit */ + + rec = page_cur_tuple_insert(page_cur, entry, index, + offsets, &heap, 0, mtr); + if (rec != NULL) { + DBUG_RETURN(rec); + } + + page = buf_block_get_frame(block); + + ut_print_timestamp(stderr); + + fprintf(stderr, + " InnoDB: Error: Insert buffer insert fails;" + " page free %lu, dtuple size %lu\n", + (ulong) page_get_max_insert_size(page, 1), + (ulong) rec_get_converted_size(index, entry, 0)); + fputs("InnoDB: Cannot insert index record ", stderr); + dtuple_print(stderr, entry); + fputs("\nInnoDB: The table where this index record belongs\n" + "InnoDB: is now probably corrupt. Please run CHECK TABLE on\n" + "InnoDB: that table.\n", stderr); + + space = page_get_space_id(page); + zip_size = buf_block_get_zip_size(block); + page_no = page_get_page_no(page); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr); + old_bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, mtr); + + fprintf(stderr, + "InnoDB: space %lu, page %lu, zip_size %lu, bitmap bits %lu\n", + (ulong) space, (ulong) page_no, + (ulong) zip_size, (ulong) old_bits); + + fputs("InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", stderr); + ut_ad(0); + DBUG_RETURN(NULL); +} + +/************************************************************************ +During merge, inserts to an index page a secondary index entry extracted +from the insert buffer. */ +static +void +ibuf_insert_to_index_page( +/*======================*/ + const dtuple_t* entry, /*!< in: buffered entry to insert */ + buf_block_t* block, /*!< in/out: index page where the buffered entry + should be placed */ + dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_cur_t page_cur; + ulint low_match; + page_t* page = buf_block_get_frame(block); + rec_t* rec; + ulint* offsets; + mem_heap_t* heap; + + DBUG_ENTER("ibuf_insert_to_index_page"); + + DBUG_PRINT("ibuf", ("page_no: %ld", buf_block_get_page_no(block))); + DBUG_PRINT("ibuf", ("index name: %s", index->name)); + DBUG_PRINT("ibuf", ("online status: %d", + dict_index_get_online_status(index))); + + ut_ad(ibuf_inside(mtr)); + ut_ad(dtuple_check_typed(entry)); + ut_ad(!buf_block_align(page)->index); + + if (UNIV_UNLIKELY(dict_table_is_comp(index->table) + != (ibool)!!page_is_comp(page))) { + fputs("InnoDB: Trying to insert a record from" + " the insert buffer to an index page\n" + "InnoDB: but the 'compact' flag does not match!\n", + stderr); + goto dump; + } + + rec = page_rec_get_next(page_get_infimum_rec(page)); + + if (page_rec_is_supremum(rec)) { + fputs("InnoDB: Trying to insert a record from" + " the insert buffer to an index page\n" + "InnoDB: but the index page is empty!\n", + stderr); + goto dump; + } + + if (UNIV_UNLIKELY(rec_get_n_fields(rec, index) + != dtuple_get_n_fields(entry))) { + fputs("InnoDB: Trying to insert a record from" + " the insert buffer to an index page\n" + "InnoDB: but the number of fields does not match!\n", + stderr); +dump: + buf_page_print(page, 0, BUF_PAGE_PRINT_NO_CRASH); + + dtuple_print(stderr, entry); + ut_ad(0); + + fputs("InnoDB: The table where where" + " this index record belongs\n" + "InnoDB: is now probably corrupt." + " Please run CHECK TABLE on\n" + "InnoDB: your tables.\n" + "InnoDB: Submit a detailed bug report to" + " http://bugs.mysql.com!\n", stderr); + + DBUG_VOID_RETURN; + } + + low_match = page_cur_search(block, index, entry, + PAGE_CUR_LE, &page_cur); + + heap = mem_heap_create( + sizeof(upd_t) + + REC_OFFS_HEADER_SIZE * sizeof(*offsets) + + dtuple_get_n_fields(entry) + * (sizeof(upd_field_t) + sizeof *offsets)); + + if (UNIV_UNLIKELY(low_match == dtuple_get_n_fields(entry))) { + upd_t* update; + page_zip_des_t* page_zip; + + rec = page_cur_get_rec(&page_cur); + + /* This is based on + row_ins_sec_index_entry_by_modify(BTR_MODIFY_LEAF). */ + ut_ad(rec_get_deleted_flag(rec, page_is_comp(page))); + + offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, + &heap); + update = row_upd_build_sec_rec_difference_binary( + rec, index, offsets, entry, heap); + + page_zip = buf_block_get_page_zip(block); + + if (update->n_fields == 0) { + /* The records only differ in the delete-mark. + Clear the delete-mark, like we did before + Bug #56680 was fixed. */ + btr_cur_set_deleted_flag_for_ibuf( + rec, page_zip, FALSE, mtr); + goto updated_in_place; + } + + /* Copy the info bits. Clear the delete-mark. */ + update->info_bits = rec_get_info_bits(rec, page_is_comp(page)); + update->info_bits &= ~REC_INFO_DELETED_FLAG; + + /* We cannot invoke btr_cur_optimistic_update() here, + because we do not have a btr_cur_t or que_thr_t, + as the insert buffer merge occurs at a very low level. */ + if (!row_upd_changes_field_size_or_external(index, offsets, + update) + && (!page_zip || btr_cur_update_alloc_zip( + page_zip, &page_cur, index, offsets, + rec_offs_size(offsets), false, mtr))) { + /* This is the easy case. Do something similar + to btr_cur_update_in_place(). */ + rec = page_cur_get_rec(&page_cur); + row_upd_rec_in_place(rec, index, offsets, + update, page_zip); + + /* Log the update in place operation. During recovery + MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE + expects trx_id, roll_ptr for secondary indexes. So we + just write dummy trx_id(0), roll_ptr(0) */ + btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, + index, update, 0, 0, mtr); + DBUG_EXECUTE_IF( + "crash_after_log_ibuf_upd_inplace", + log_buffer_flush_to_disk(); + ib_logf(IB_LOG_LEVEL_INFO, + "Wrote log record for ibuf update in " + "place operation"); + DBUG_SUICIDE(); + ); + + goto updated_in_place; + } + + /* btr_cur_update_alloc_zip() may have changed this */ + rec = page_cur_get_rec(&page_cur); + + /* A collation may identify values that differ in + storage length. + Some examples (1 or 2 bytes): + utf8_turkish_ci: I = U+0131 LATIN SMALL LETTER DOTLESS I + utf8_general_ci: S = U+00DF LATIN SMALL LETTER SHARP S + utf8_general_ci: A = U+00E4 LATIN SMALL LETTER A WITH DIAERESIS + + latin1_german2_ci: SS = U+00DF LATIN SMALL LETTER SHARP S + + Examples of a character (3-byte UTF-8 sequence) + identified with 2 or 4 characters (1-byte UTF-8 sequences): + + utf8_unicode_ci: 'II' = U+2171 SMALL ROMAN NUMERAL TWO + utf8_unicode_ci: '(10)' = U+247D PARENTHESIZED NUMBER TEN + */ + + /* Delete the different-length record, and insert the + buffered one. */ + + lock_rec_store_on_page_infimum(block, rec); + page_cur_delete_rec(&page_cur, index, offsets, mtr); + page_cur_move_to_prev(&page_cur); + rec = ibuf_insert_to_index_page_low(entry, block, index, + &offsets, heap, mtr, + &page_cur); + + ut_ad(!cmp_dtuple_rec(entry, rec, offsets)); + lock_rec_restore_from_page_infimum(block, rec, block); + } else { + offsets = NULL; + ibuf_insert_to_index_page_low(entry, block, index, + &offsets, heap, mtr, + &page_cur); + } +updated_in_place: + mem_heap_free(heap); + + DBUG_VOID_RETURN; +} + +/****************************************************************//** +During merge, sets the delete mark on a record for a secondary index +entry. */ +static +void +ibuf_set_del_mark( +/*==============*/ + const dtuple_t* entry, /*!< in: entry */ + buf_block_t* block, /*!< in/out: block */ + const dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr) /*!< in: mtr */ +{ + page_cur_t page_cur; + ulint low_match; + + ut_ad(ibuf_inside(mtr)); + ut_ad(dtuple_check_typed(entry)); + + low_match = page_cur_search( + block, index, entry, PAGE_CUR_LE, &page_cur); + + if (low_match == dtuple_get_n_fields(entry)) { + rec_t* rec; + page_zip_des_t* page_zip; + + rec = page_cur_get_rec(&page_cur); + page_zip = page_cur_get_page_zip(&page_cur); + + /* Delete mark the old index record. According to a + comment in row_upd_sec_index_entry(), it can already + have been delete marked if a lock wait occurred in + row_ins_sec_index_entry() in a previous invocation of + row_upd_sec_index_entry(). */ + + if (UNIV_LIKELY + (!rec_get_deleted_flag( + rec, dict_table_is_comp(index->table)))) { + btr_cur_set_deleted_flag_for_ibuf(rec, page_zip, + TRUE, mtr); + } + } else { + const page_t* page + = page_cur_get_page(&page_cur); + const buf_block_t* block + = page_cur_get_block(&page_cur); + + ut_print_timestamp(stderr); + fputs(" InnoDB: unable to find a record to delete-mark\n", + stderr); + fputs("InnoDB: tuple ", stderr); + dtuple_print(stderr, entry); + fputs("\n" + "InnoDB: record ", stderr); + rec_print(stderr, page_cur_get_rec(&page_cur), index); + fprintf(stderr, "\nspace %u offset %u" + " (%u records, index id %llu)\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + (unsigned) buf_block_get_space(block), + (unsigned) buf_block_get_page_no(block), + (unsigned) page_get_n_recs(page), + (ulonglong) btr_page_get_index_id(page)); + ut_ad(0); + } +} + +/****************************************************************//** +During merge, delete a record for a secondary index entry. */ +static +void +ibuf_delete( +/*========*/ + const dtuple_t* entry, /*!< in: entry */ + buf_block_t* block, /*!< in/out: block */ + dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr) /*!< in/out: mtr; must be committed + before latching any further pages */ +{ + page_cur_t page_cur; + ulint low_match; + + ut_ad(ibuf_inside(mtr)); + ut_ad(dtuple_check_typed(entry)); + + low_match = page_cur_search( + block, index, entry, PAGE_CUR_LE, &page_cur); + + if (low_match == dtuple_get_n_fields(entry)) { + page_zip_des_t* page_zip= buf_block_get_page_zip(block); + page_t* page = buf_block_get_frame(block); + rec_t* rec = page_cur_get_rec(&page_cur); + + /* TODO: the below should probably be a separate function, + it's a bastardized version of btr_cur_optimistic_delete. */ + + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + mem_heap_t* heap = NULL; + ulint max_ins_size = 0; + + rec_offs_init(offsets_); + + offsets = rec_get_offsets( + rec, index, offsets, ULINT_UNDEFINED, &heap); + + if (page_get_n_recs(page) <= 1 + || !(REC_INFO_DELETED_FLAG + & rec_get_info_bits(rec, page_is_comp(page)))) { + /* Refuse to purge the last record or a + record that has not been marked for deletion. */ + ut_print_timestamp(stderr); + fputs(" InnoDB: unable to purge a record\n", + stderr); + fputs("InnoDB: tuple ", stderr); + dtuple_print(stderr, entry); + fputs("\n" + "InnoDB: record ", stderr); + rec_print_new(stderr, rec, offsets); + fprintf(stderr, "\nspace %u offset %u" + " (%u records, index id %llu)\n" + "InnoDB: Submit a detailed bug report" + " to http://bugs.mysql.com\n", + (unsigned) buf_block_get_space(block), + (unsigned) buf_block_get_page_no(block), + (unsigned) page_get_n_recs(page), + (ulonglong) btr_page_get_index_id(page)); + + ut_ad(0); + return; + } + + lock_update_delete(block, rec); + + if (!page_zip) { + max_ins_size + = page_get_max_insert_size_after_reorganize( + page, 1); + } +#ifdef UNIV_ZIP_DEBUG + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); +#endif /* UNIV_ZIP_DEBUG */ + page_cur_delete_rec(&page_cur, index, offsets, mtr); +#ifdef UNIV_ZIP_DEBUG + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); +#endif /* UNIV_ZIP_DEBUG */ + + if (page_zip) { + ibuf_update_free_bits_zip(block, mtr); + } else { + ibuf_update_free_bits_low(block, max_ins_size, mtr); + } + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + } else { + /* The record must have been purged already. */ + } +} + +/*********************************************************************//** +Restores insert buffer tree cursor position +@return TRUE if the position was restored; FALSE if not */ +static __attribute__((nonnull)) +ibool +ibuf_restore_pos( +/*=============*/ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: index page number where the record + should belong */ + const dtuple_t* search_tuple, + /*!< in: search tuple for entries of page_no */ + ulint mode, /*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ + btr_pcur_t* pcur, /*!< in/out: persistent cursor whose + position is to be restored */ + mtr_t* mtr) /*!< in/out: mini-transaction */ +{ + ut_ad(mode == BTR_MODIFY_LEAF || mode == BTR_MODIFY_TREE); + + if (btr_pcur_restore_position(mode, pcur, mtr)) { + + return(TRUE); + } + + if (fil_space_get_flags(space) == ULINT_UNDEFINED) { + /* The tablespace has been dropped. It is possible + that another thread has deleted the insert buffer + entry. Do not complain. */ + ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); + } else { + fprintf(stderr, + "InnoDB: ERROR: Submit the output to" + " http://bugs.mysql.com\n" + "InnoDB: ibuf cursor restoration fails!\n" + "InnoDB: ibuf record inserted to page %lu:%lu\n", + (ulong) space, (ulong) page_no); + fflush(stderr); + + rec_print_old(stderr, btr_pcur_get_rec(pcur)); + rec_print_old(stderr, pcur->old_rec); + dtuple_print(stderr, search_tuple); + + rec_print_old(stderr, + page_rec_get_next(btr_pcur_get_rec(pcur))); + fflush(stderr); + + ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); + ut_ad(0); + } + + return(FALSE); +} + +/*********************************************************************//** +Deletes from ibuf the record on which pcur is positioned. If we have to +resort to a pessimistic delete, this function commits mtr and closes +the cursor. +@return TRUE if mtr was committed and pcur closed in this operation */ +static __attribute__((warn_unused_result)) +ibool +ibuf_delete_rec( +/*============*/ + ulint space, /*!< in: space id */ + ulint page_no,/*!< in: index page number that the record + should belong to */ + btr_pcur_t* pcur, /*!< in: pcur positioned on the record to + delete, having latch mode BTR_MODIFY_LEAF */ + const dtuple_t* search_tuple, + /*!< in: search tuple for entries of page_no */ + mtr_t* mtr) /*!< in: mtr */ +{ + ibool success; + page_t* root; + dberr_t err; + + ut_ad(ibuf_inside(mtr)); + ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur))); + ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no); + ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space); + +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if (ibuf_debug == 2) { + /* Inject a fault (crash). We do this before trying + optimistic delete, because a pessimistic delete in the + change buffer would require a larger test case. */ + + /* Flag the buffered record as processed, to avoid + an assertion failure after crash recovery. */ + btr_cur_set_deleted_flag_for_ibuf( + btr_pcur_get_rec(pcur), NULL, TRUE, mtr); + ibuf_mtr_commit(mtr); + log_write_up_to(LSN_MAX, LOG_WAIT_ALL_GROUPS, TRUE); + DBUG_SUICIDE(); + } +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + + success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), + 0, mtr); + + if (success) { + if (page_is_empty(btr_pcur_get_page(pcur))) { + /* If a B-tree page is empty, it must be the root page + and the whole B-tree must be empty. InnoDB does not + allow empty B-tree pages other than the root. */ + root = btr_pcur_get_page(pcur); + + ut_ad(page_get_space_id(root) == IBUF_SPACE_ID); + ut_ad(page_get_page_no(root) + == FSP_IBUF_TREE_ROOT_PAGE_NO); + + /* ibuf->empty is protected by the root page latch. + Before the deletion, it had to be FALSE. */ + ut_ad(!ibuf->empty); + ibuf->empty = true; + } + +#ifdef UNIV_IBUF_COUNT_DEBUG + fprintf(stderr, + "Decrementing ibuf count of space %lu page %lu\n" + "from %lu by 1\n", space, page_no, + ibuf_count_get(space, page_no)); + ibuf_count_set(space, page_no, + ibuf_count_get(space, page_no) - 1); +#endif + return(FALSE); + } + + ut_ad(page_rec_is_user_rec(btr_pcur_get_rec(pcur))); + ut_ad(ibuf_rec_get_page_no(mtr, btr_pcur_get_rec(pcur)) == page_no); + ut_ad(ibuf_rec_get_space(mtr, btr_pcur_get_rec(pcur)) == space); + + /* We have to resort to a pessimistic delete from ibuf. + Delete-mark the record so that it will not be applied again, + in case the server crashes before the pessimistic delete is + made persistent. */ + btr_cur_set_deleted_flag_for_ibuf( + btr_pcur_get_rec(pcur), NULL, TRUE, mtr); + + btr_pcur_store_position(pcur, mtr); + ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); + + ibuf_mtr_start(mtr); + mutex_enter(&ibuf_mutex); + + if (!ibuf_restore_pos(space, page_no, search_tuple, + BTR_MODIFY_TREE, pcur, mtr)) { + + mutex_exit(&ibuf_mutex); + ut_ad(mtr->state == MTR_COMMITTED); + goto func_exit; + } + + root = ibuf_tree_root_get(mtr); + + btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), 0, + RB_NONE, mtr); + ut_a(err == DB_SUCCESS); + +#ifdef UNIV_IBUF_COUNT_DEBUG + ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); +#endif + ibuf_size_update(root, mtr); + mutex_exit(&ibuf_mutex); + + ibuf->empty = page_is_empty(root); + ibuf_btr_pcur_commit_specify_mtr(pcur, mtr); + +func_exit: + ut_ad(mtr->state == MTR_COMMITTED); + btr_pcur_close(pcur); + + return(TRUE); +} + +/*********************************************************************//** +When an index page is read from a disk to the buffer pool, this function +applies any buffered operations to the page and deletes the entries from the +insert buffer. If the page is not read, but created in the buffer pool, this +function deletes its buffered entries from the insert buffer; there can +exist entries for such a page if the page belonged to an index which +subsequently was dropped. */ +UNIV_INTERN +void +ibuf_merge_or_delete_for_page( +/*==========================*/ + buf_block_t* block, /*!< in: if page has been read from + disk, pointer to the page x-latched, + else NULL */ + ulint space, /*!< in: space id of the index page */ + ulint page_no,/*!< in: page number of the index page */ + ulint zip_size,/*!< in: compressed page size in bytes, + or 0 */ + ibool update_ibuf_bitmap)/*!< in: normally this is set + to TRUE, but if we have deleted or are + deleting the tablespace, then we + naturally do not want to update a + non-existent bitmap page */ +{ + mem_heap_t* heap; + btr_pcur_t pcur; + dtuple_t* search_tuple; +#ifdef UNIV_IBUF_DEBUG + ulint volume = 0; +#endif + page_zip_des_t* page_zip = NULL; + ibool tablespace_being_deleted = FALSE; + ibool corruption_noticed = FALSE; + mtr_t mtr; + + /* Counts for merged & discarded operations. */ + ulint mops[IBUF_OP_COUNT]; + ulint dops[IBUF_OP_COUNT]; + + ut_ad(!block || buf_block_get_space(block) == space); + ut_ad(!block || buf_block_get_page_no(block) == page_no); + ut_ad(!block || buf_block_get_zip_size(block) == zip_size); + ut_ad(!block || buf_block_get_io_fix(block) == BUF_IO_READ); + + if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE + || trx_sys_hdr_page(space, page_no)) { + return; + } + + /* We cannot refer to zip_size in the following, because + zip_size is passed as ULINT_UNDEFINED (it is unknown) when + buf_read_ibuf_merge_pages() is merging (discarding) changes + for a dropped tablespace. When block != NULL or + update_ibuf_bitmap is specified, the zip_size must be known. + That is why we will repeat the check below, with zip_size in + place of 0. Passing zip_size as 0 assumes that the + uncompressed page size always is a power-of-2 multiple of the + compressed page size. */ + + if (ibuf_fixed_addr_page(space, 0, page_no) + || fsp_descr_page(0, page_no)) { + return; + } + + if (UNIV_LIKELY(update_ibuf_bitmap)) { + ut_a(ut_is_2pow(zip_size)); + + if (ibuf_fixed_addr_page(space, zip_size, page_no) + || fsp_descr_page(zip_size, page_no)) { + return; + } + + /* If the following returns FALSE, we get the counter + incremented, and must decrement it when we leave this + function. When the counter is > 0, that prevents tablespace + from being dropped. */ + + tablespace_being_deleted = fil_inc_pending_ops(space, true); + + if (UNIV_UNLIKELY(tablespace_being_deleted)) { + /* Do not try to read the bitmap page from space; + just delete the ibuf records for the page */ + + block = NULL; + update_ibuf_bitmap = FALSE; + } else { + page_t* bitmap_page; + ulint bitmap_bits; + + ibuf_mtr_start(&mtr); + + bitmap_page = ibuf_bitmap_get_map_page( + space, page_no, zip_size, &mtr); + bitmap_bits = ibuf_bitmap_page_get_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_BUFFERED, &mtr); + + ibuf_mtr_commit(&mtr); + + if (!bitmap_bits) { + /* No inserts buffered for this page */ + + if (!tablespace_being_deleted) { + fil_decr_pending_ops(space); + } + + return; + } + } + } else if (block + && (ibuf_fixed_addr_page(space, zip_size, page_no) + || fsp_descr_page(zip_size, page_no))) { + + return; + } + + heap = mem_heap_create(512); + + search_tuple = ibuf_search_tuple_build(space, page_no, heap); + + if (block) { + /* Move the ownership of the x-latch on the page to this OS + thread, so that we can acquire a second x-latch on it. This + is needed for the insert operations to the index page to pass + the debug checks. */ + + rw_lock_x_lock_move_ownership(&(block->lock)); + page_zip = buf_block_get_page_zip(block); + + if (UNIV_UNLIKELY(fil_page_get_type(block->frame) + != FIL_PAGE_INDEX) + || UNIV_UNLIKELY(!page_is_leaf(block->frame))) { + + page_t* bitmap_page; + + corruption_noticed = TRUE; + + ut_print_timestamp(stderr); + + ibuf_mtr_start(&mtr); + + fputs(" InnoDB: Dump of the ibuf bitmap page:\n", + stderr); + + bitmap_page = ibuf_bitmap_get_map_page(space, page_no, + zip_size, &mtr); + buf_page_print(bitmap_page, 0, + BUF_PAGE_PRINT_NO_CRASH); + ibuf_mtr_commit(&mtr); + + fputs("\nInnoDB: Dump of the page:\n", stderr); + + buf_page_print(block->frame, 0, + BUF_PAGE_PRINT_NO_CRASH); + + fprintf(stderr, + "InnoDB: Error: corruption in the tablespace." + " Bitmap shows insert\n" + "InnoDB: buffer records to page n:o %lu" + " though the page\n" + "InnoDB: type is %lu, which is" + " not an index leaf page!\n" + "InnoDB: We try to resolve the problem" + " by skipping the insert buffer\n" + "InnoDB: merge for this page." + " Please run CHECK TABLE on your tables\n" + "InnoDB: to determine if they are corrupt" + " after this.\n\n" + "InnoDB: Please submit a detailed bug report" + " to http://bugs.mysql.com\n\n", + (ulong) page_no, + (ulong) + fil_page_get_type(block->frame)); + ut_ad(0); + } + } + + memset(mops, 0, sizeof(mops)); + memset(dops, 0, sizeof(dops)); + +loop: + ibuf_mtr_start(&mtr); + + /* Position pcur in the insert buffer at the first entry for this + index page */ + btr_pcur_open_on_user_rec( + ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, + &pcur, &mtr); + + if (block) { + ibool success; + + success = buf_page_get_known_nowait( + RW_X_LATCH, block, + BUF_KEEP_OLD, __FILE__, __LINE__, &mtr); + + ut_a(success); + + /* This is a user page (secondary index leaf page), + but we pretend that it is a change buffer page in + order to obey the latching order. This should be OK, + because buffered changes are applied immediately while + the block is io-fixed. Other threads must not try to + latch an io-fixed block. */ + buf_block_dbg_add_level(block, SYNC_IBUF_TREE_NODE); + } + + if (!btr_pcur_is_on_user_rec(&pcur)) { + ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); + + goto reset_bit; + } + + for (;;) { + rec_t* rec; + + ut_ad(btr_pcur_is_on_user_rec(&pcur)); + + rec = btr_pcur_get_rec(&pcur); + + /* Check if the entry is for this index page */ + if (ibuf_rec_get_page_no(&mtr, rec) != page_no + || ibuf_rec_get_space(&mtr, rec) != space) { + + if (block) { + page_header_reset_last_insert( + block->frame, page_zip, &mtr); + } + + goto reset_bit; + } + + if (UNIV_UNLIKELY(corruption_noticed)) { + fputs("InnoDB: Discarding record\n ", stderr); + rec_print_old(stderr, rec); + fputs("\nInnoDB: from the insert buffer!\n\n", stderr); + } else if (block && !rec_get_deleted_flag(rec, 0)) { + /* Now we have at pcur a record which should be + applied on the index page; NOTE that the call below + copies pointers to fields in rec, and we must + keep the latch to the rec page until the + insertion is finished! */ + dtuple_t* entry; + trx_id_t max_trx_id; + dict_index_t* dummy_index; + ibuf_op_t op = ibuf_rec_get_op_type(&mtr, rec); + + max_trx_id = page_get_max_trx_id(page_align(rec)); + page_update_max_trx_id(block, page_zip, max_trx_id, + &mtr); + + ut_ad(page_validate(page_align(rec), ibuf->index)); + + entry = ibuf_build_entry_from_ibuf_rec( + &mtr, rec, heap, &dummy_index); + + ut_ad(page_validate(block->frame, dummy_index)); + + switch (op) { + ibool success; + case IBUF_OP_INSERT: +#ifdef UNIV_IBUF_DEBUG + volume += rec_get_converted_size( + dummy_index, entry, 0); + + volume += page_dir_calc_reserved_space(1); + + ut_a(volume <= 4 * UNIV_PAGE_SIZE + / IBUF_PAGE_SIZE_PER_FREE_SPACE); +#endif + ibuf_insert_to_index_page( + entry, block, dummy_index, &mtr); + break; + + case IBUF_OP_DELETE_MARK: + ibuf_set_del_mark( + entry, block, dummy_index, &mtr); + break; + + case IBUF_OP_DELETE: + ibuf_delete(entry, block, dummy_index, &mtr); + /* Because ibuf_delete() will latch an + insert buffer bitmap page, commit mtr + before latching any further pages. + Store and restore the cursor position. */ + ut_ad(rec == btr_pcur_get_rec(&pcur)); + ut_ad(page_rec_is_user_rec(rec)); + ut_ad(ibuf_rec_get_page_no(&mtr, rec) + == page_no); + ut_ad(ibuf_rec_get_space(&mtr, rec) == space); + + /* Mark the change buffer record processed, + so that it will not be merged again in case + the server crashes between the following + mtr_commit() and the subsequent mtr_commit() + of deleting the change buffer record. */ + + btr_cur_set_deleted_flag_for_ibuf( + btr_pcur_get_rec(&pcur), NULL, + TRUE, &mtr); + + btr_pcur_store_position(&pcur, &mtr); + ibuf_btr_pcur_commit_specify_mtr(&pcur, &mtr); + + ibuf_mtr_start(&mtr); + + success = buf_page_get_known_nowait( + RW_X_LATCH, block, + BUF_KEEP_OLD, + __FILE__, __LINE__, &mtr); + ut_a(success); + + /* This is a user page (secondary + index leaf page), but it should be OK + to use too low latching order for it, + as the block is io-fixed. */ + buf_block_dbg_add_level( + block, SYNC_IBUF_TREE_NODE); + + if (!ibuf_restore_pos(space, page_no, + search_tuple, + BTR_MODIFY_LEAF, + &pcur, &mtr)) { + + ut_ad(mtr.state == MTR_COMMITTED); + mops[op]++; + ibuf_dummy_index_free(dummy_index); + goto loop; + } + + break; + default: + ut_error; + } + + mops[op]++; + + ibuf_dummy_index_free(dummy_index); + } else { + dops[ibuf_rec_get_op_type(&mtr, rec)]++; + } + + /* Delete the record from ibuf */ + if (ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr)) { + /* Deletion was pessimistic and mtr was committed: + we start from the beginning again */ + + ut_ad(mtr.state == MTR_COMMITTED); + goto loop; + } else if (btr_pcur_is_after_last_on_page(&pcur)) { + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + + goto loop; + } + } + +reset_bit: + if (UNIV_LIKELY(update_ibuf_bitmap)) { + page_t* bitmap_page; + + bitmap_page = ibuf_bitmap_get_map_page( + space, page_no, zip_size, &mtr); + + ibuf_bitmap_page_set_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_BUFFERED, FALSE, &mtr); + + if (block) { + ulint old_bits = ibuf_bitmap_page_get_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, &mtr); + + ulint new_bits = ibuf_index_page_calc_free( + zip_size, block); + + if (old_bits != new_bits) { + ibuf_bitmap_page_set_bits( + bitmap_page, page_no, zip_size, + IBUF_BITMAP_FREE, new_bits, &mtr); + } + } + } + + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + mem_heap_free(heap); + +#ifdef HAVE_ATOMIC_BUILTINS + os_atomic_increment_ulint(&ibuf->n_merges, 1); + ibuf_add_ops(ibuf->n_merged_ops, mops); + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ + /* Protect our statistics keeping from race conditions */ + mutex_enter(&ibuf_mutex); + + ibuf->n_merges++; + ibuf_add_ops(ibuf->n_merged_ops, mops); + ibuf_add_ops(ibuf->n_discarded_ops, dops); + + mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ + + if (update_ibuf_bitmap && !tablespace_being_deleted) { + + fil_decr_pending_ops(space); + } + +#ifdef UNIV_IBUF_COUNT_DEBUG + ut_a(ibuf_count_get(space, page_no) == 0); +#endif +} + +/*********************************************************************//** +Deletes all entries in the insert buffer for a given space id. This is used +in DISCARD TABLESPACE and IMPORT TABLESPACE. +NOTE: this does not update the page free bitmaps in the space. The space will +become CORRUPT when you call this function! */ +UNIV_INTERN +void +ibuf_delete_for_discarded_space( +/*============================*/ + ulint space) /*!< in: space id */ +{ + mem_heap_t* heap; + btr_pcur_t pcur; + dtuple_t* search_tuple; + const rec_t* ibuf_rec; + ulint page_no; + mtr_t mtr; + + /* Counts for discarded operations. */ + ulint dops[IBUF_OP_COUNT]; + + heap = mem_heap_create(512); + + /* Use page number 0 to build the search tuple so that we get the + cursor positioned at the first entry for this space id */ + + search_tuple = ibuf_search_tuple_build(space, 0, heap); + + memset(dops, 0, sizeof(dops)); +loop: + ibuf_mtr_start(&mtr); + + /* Position pcur in the insert buffer at the first entry for the + space */ + btr_pcur_open_on_user_rec( + ibuf->index, search_tuple, PAGE_CUR_GE, BTR_MODIFY_LEAF, + &pcur, &mtr); + + if (!btr_pcur_is_on_user_rec(&pcur)) { + ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); + + goto leave_loop; + } + + for (;;) { + ut_ad(btr_pcur_is_on_user_rec(&pcur)); + + ibuf_rec = btr_pcur_get_rec(&pcur); + + /* Check if the entry is for this space */ + if (ibuf_rec_get_space(&mtr, ibuf_rec) != space) { + + goto leave_loop; + } + + page_no = ibuf_rec_get_page_no(&mtr, ibuf_rec); + + dops[ibuf_rec_get_op_type(&mtr, ibuf_rec)]++; + + /* Delete the record from ibuf */ + if (ibuf_delete_rec(space, page_no, &pcur, search_tuple, + &mtr)) { + /* Deletion was pessimistic and mtr was committed: + we start from the beginning again */ + + ut_ad(mtr.state == MTR_COMMITTED); + goto loop; + } + + if (btr_pcur_is_after_last_on_page(&pcur)) { + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + + goto loop; + } + } + +leave_loop: + ibuf_mtr_commit(&mtr); + btr_pcur_close(&pcur); + +#ifdef HAVE_ATOMIC_BUILTINS + ibuf_add_ops(ibuf->n_discarded_ops, dops); +#else /* HAVE_ATOMIC_BUILTINS */ + /* Protect our statistics keeping from race conditions */ + mutex_enter(&ibuf_mutex); + ibuf_add_ops(ibuf->n_discarded_ops, dops); + mutex_exit(&ibuf_mutex); +#endif /* HAVE_ATOMIC_BUILTINS */ + + mem_heap_free(heap); +} + +/******************************************************************//** +Looks if the insert buffer is empty. +@return true if empty */ +UNIV_INTERN +bool +ibuf_is_empty(void) +/*===============*/ +{ + bool is_empty; + const page_t* root; + mtr_t mtr; + + ibuf_mtr_start(&mtr); + + mutex_enter(&ibuf_mutex); + root = ibuf_tree_root_get(&mtr); + mutex_exit(&ibuf_mutex); + + is_empty = page_is_empty(root); + ut_a(is_empty == ibuf->empty); + ibuf_mtr_commit(&mtr); + + return(is_empty); +} + +/******************************************************************//** +Prints info of ibuf. */ +UNIV_INTERN +void +ibuf_print( +/*=======*/ + FILE* file) /*!< in: file where to print */ +{ +#ifdef UNIV_IBUF_COUNT_DEBUG + ulint i; + ulint j; +#endif + + mutex_enter(&ibuf_mutex); + + fprintf(file, + "Ibuf: size %lu, free list len %lu," + " seg size %lu, %lu merges\n", + (ulong) ibuf->size, + (ulong) ibuf->free_list_len, + (ulong) ibuf->seg_size, + (ulong) ibuf->n_merges); + + fputs("merged operations:\n ", file); + ibuf_print_ops(ibuf->n_merged_ops, file); + + fputs("discarded operations:\n ", file); + ibuf_print_ops(ibuf->n_discarded_ops, file); + +#ifdef UNIV_IBUF_COUNT_DEBUG + for (i = 0; i < IBUF_COUNT_N_SPACES; i++) { + for (j = 0; j < IBUF_COUNT_N_PAGES; j++) { + ulint count = ibuf_count_get(i, j); + + if (count > 0) { + fprintf(stderr, + "Ibuf count for space/page %lu/%lu" + " is %lu\n", + (ulong) i, (ulong) j, (ulong) count); + } + } + } +#endif /* UNIV_IBUF_COUNT_DEBUG */ + + mutex_exit(&ibuf_mutex); +} + +/******************************************************************//** +Checks the insert buffer bitmaps on IMPORT TABLESPACE. +@return DB_SUCCESS or error code */ +UNIV_INTERN +dberr_t +ibuf_check_bitmap_on_import( +/*========================*/ + const trx_t* trx, /*!< in: transaction */ + ulint space_id) /*!< in: tablespace identifier */ +{ + ulint zip_size; + ulint page_size; + ulint size; + ulint page_no; + + ut_ad(space_id); + ut_ad(trx->mysql_thd); + + zip_size = fil_space_get_zip_size(space_id); + + if (zip_size == ULINT_UNDEFINED) { + return(DB_TABLE_NOT_FOUND); + } + + size = fil_space_get_size(space_id); + + if (size == 0) { + return(DB_TABLE_NOT_FOUND); + } + + mutex_enter(&ibuf_mutex); + + page_size = zip_size ? zip_size : UNIV_PAGE_SIZE; + + for (page_no = 0; page_no < size; page_no += page_size) { + mtr_t mtr; + page_t* bitmap_page; + ulint i; + + if (trx_is_interrupted(trx)) { + mutex_exit(&ibuf_mutex); + return(DB_INTERRUPTED); + } + + mtr_start(&mtr); + + mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO); + + ibuf_enter(&mtr); + + bitmap_page = ibuf_bitmap_get_map_page( + space_id, page_no, zip_size, &mtr); + + for (i = FSP_IBUF_BITMAP_OFFSET + 1; i < page_size; i++) { + const ulint offset = page_no + i; + + if (ibuf_bitmap_page_get_bits( + bitmap_page, offset, zip_size, + IBUF_BITMAP_IBUF, &mtr)) { + + mutex_exit(&ibuf_mutex); + ibuf_exit(&mtr); + mtr_commit(&mtr); + + ib_errf(trx->mysql_thd, + IB_LOG_LEVEL_ERROR, + ER_INNODB_INDEX_CORRUPT, + "Space %u page %u" + " is wrongly flagged to belong to the" + " insert buffer", + (unsigned) space_id, + (unsigned) offset); + + return(DB_CORRUPTION); + } + + if (ibuf_bitmap_page_get_bits( + bitmap_page, offset, zip_size, + IBUF_BITMAP_BUFFERED, &mtr)) { + + ib_errf(trx->mysql_thd, + IB_LOG_LEVEL_WARN, + ER_INNODB_INDEX_CORRUPT, + "Buffered changes" + " for space %u page %u are lost", + (unsigned) space_id, + (unsigned) offset); + + /* Tolerate this error, so that + slightly corrupted tables can be + imported and dumped. Clear the bit. */ + ibuf_bitmap_page_set_bits( + bitmap_page, offset, zip_size, + IBUF_BITMAP_BUFFERED, FALSE, &mtr); + } + } + + ibuf_exit(&mtr); + mtr_commit(&mtr); + } + + mutex_exit(&ibuf_mutex); + return(DB_SUCCESS); +} +#endif /* !UNIV_HOTBACKUP */ |