diff options
Diffstat (limited to 'storage/innobase/include/btr0btr.h')
-rw-r--r-- | storage/innobase/include/btr0btr.h | 509 |
1 files changed, 509 insertions, 0 deletions
diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h new file mode 100644 index 00000000000..d5c8258513c --- /dev/null +++ b/storage/innobase/include/btr0btr.h @@ -0,0 +1,509 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/**************************************************//** +@file include/btr0btr.h +The B-tree + +Created 6/2/1994 Heikki Tuuri +*******************************************************/ + +#ifndef btr0btr_h +#define btr0btr_h + +#include "univ.i" + +#include "dict0dict.h" +#include "data0data.h" +#include "page0cur.h" +#include "mtr0mtr.h" +#include "btr0types.h" + +#ifndef UNIV_HOTBACKUP +/** Maximum record size which can be stored on a page, without using the +special big record storage structure */ +#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200) + +/** @brief Maximum depth of a B-tree in InnoDB. + +Note that this isn't a maximum as such; none of the tree operations +avoid producing trees bigger than this. It is instead a "max depth +that other code must work with", useful for e.g. fixed-size arrays +that must store some information about each level in a tree. In other +words: if a B-tree with bigger depth than this is encountered, it is +not acceptable for it to lead to mysterious memory corruption, but it +is acceptable for the program to die with a clear assert failure. */ +#define BTR_MAX_LEVELS 100 + +/** Latching modes for btr_cur_search_to_nth_level(). */ +enum btr_latch_mode { + /** Search a record on a leaf page and S-latch it. */ + BTR_SEARCH_LEAF = RW_S_LATCH, + /** (Prepare to) modify a record on a leaf page and X-latch it. */ + BTR_MODIFY_LEAF = RW_X_LATCH, + /** Obtain no latches. */ + BTR_NO_LATCHES = RW_NO_LATCH, + /** Start modifying the entire B-tree. */ + BTR_MODIFY_TREE = 33, + /** Continue modifying the entire B-tree. */ + BTR_CONT_MODIFY_TREE = 34, + /** Search the previous record. */ + BTR_SEARCH_PREV = 35, + /** Modify the previous record. */ + BTR_MODIFY_PREV = 36 +}; + +/** If this is ORed to btr_latch_mode, it means that the search tuple +will be inserted to the index, at the searched position */ +#define BTR_INSERT 512 + +/** This flag ORed to btr_latch_mode says that we do the search in query +optimization */ +#define BTR_ESTIMATE 1024 + +/** This flag ORed to btr_latch_mode says that we can ignore possible +UNIQUE definition on secondary indexes when we decide if we can use +the insert buffer to speed up inserts */ +#define BTR_IGNORE_SEC_UNIQUE 2048 + +/**************************************************************//** +Gets the root node of a tree and x-latches it. +@return root page, x-latched */ +UNIV_INTERN +page_t* +btr_root_get( +/*=========*/ + dict_index_t* index, /*!< in: index tree */ + mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +buf_block_t* +btr_block_get( +/*==========*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +page_t* +btr_page_get( +/*=========*/ + ulint space, /*!< in: space id */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number */ + ulint mode, /*!< in: latch mode */ + mtr_t* mtr); /*!< in: mtr */ +#endif /* !UNIV_HOTBACKUP */ +/**************************************************************//** +Gets the index id field of a page. +@return index id */ +UNIV_INLINE +dulint +btr_page_get_index_id( +/*==================*/ + const page_t* page); /*!< in: index page */ +#ifndef UNIV_HOTBACKUP +/********************************************************//** +Gets the node level field in an index page. +@return level, leaf level == 0 */ +UNIV_INLINE +ulint +btr_page_get_level_low( +/*===================*/ + const page_t* page); /*!< in: index page */ +/********************************************************//** +Gets the node level field in an index page. +@return level, leaf level == 0 */ +UNIV_INLINE +ulint +btr_page_get_level( +/*===============*/ + const page_t* page, /*!< in: index page */ + mtr_t* mtr); /*!< in: mini-transaction handle */ +/********************************************************//** +Gets the next index page number. +@return next page number */ +UNIV_INLINE +ulint +btr_page_get_next( +/*==============*/ + const page_t* page, /*!< in: index page */ + mtr_t* mtr); /*!< in: mini-transaction handle */ +/********************************************************//** +Gets the previous index page number. +@return prev page number */ +UNIV_INLINE +ulint +btr_page_get_prev( +/*==============*/ + const page_t* page, /*!< in: index page */ + mtr_t* mtr); /*!< in: mini-transaction handle */ +/*************************************************************//** +Gets pointer to the previous user record in the tree. It is assumed +that the caller has appropriate latches on the page and its neighbor. +@return previous user record, NULL if there is none */ +UNIV_INTERN +rec_t* +btr_get_prev_user_rec( +/*==================*/ + rec_t* rec, /*!< in: record on leaf level */ + mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if + needed, also to the previous page */ +/*************************************************************//** +Gets pointer to the next user record in the tree. It is assumed +that the caller has appropriate latches on the page and its neighbor. +@return next user record, NULL if there is none */ +UNIV_INTERN +rec_t* +btr_get_next_user_rec( +/*==================*/ + rec_t* rec, /*!< in: record on leaf level */ + mtr_t* mtr); /*!< in: mtr holding a latch on the page, and if + needed, also to the next page */ +/**************************************************************//** +Releases the latch on a leaf page and bufferunfixes it. */ +UNIV_INLINE +void +btr_leaf_page_release( +/*==================*/ + buf_block_t* block, /*!< in: buffer block */ + ulint latch_mode, /*!< in: BTR_SEARCH_LEAF or + BTR_MODIFY_LEAF */ + mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Gets the child node file address in a node pointer. +@return child node address */ +UNIV_INLINE +ulint +btr_node_ptr_get_child_page_no( +/*===========================*/ + const rec_t* rec, /*!< in: node pointer record */ + const ulint* offsets);/*!< in: array returned by rec_get_offsets() */ +/************************************************************//** +Creates the root node for a new index tree. +@return page number of the created root, FIL_NULL if did not succeed */ +UNIV_INTERN +ulint +btr_create( +/*=======*/ + ulint type, /*!< in: type of the index */ + ulint space, /*!< in: space where created */ + ulint zip_size,/*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + dulint index_id,/*!< in: index id */ + dict_index_t* index, /*!< in: index */ + mtr_t* mtr); /*!< in: mini-transaction handle */ +/************************************************************//** +Frees a B-tree except the root page, which MUST be freed after this +by calling btr_free_root. */ +UNIV_INTERN +void +btr_free_but_not_root( +/*==================*/ + ulint space, /*!< in: space where created */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint root_page_no); /*!< in: root page number */ +/************************************************************//** +Frees the B-tree root page. Other tree MUST already have been freed. */ +UNIV_INTERN +void +btr_free_root( +/*==========*/ + ulint space, /*!< in: space where created */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint root_page_no, /*!< in: root page number */ + mtr_t* mtr); /*!< in: a mini-transaction which has already + been started */ +/*************************************************************//** +Makes tree one level higher by splitting the root, and inserts +the tuple. It is assumed that mtr contains an x-latch on the tree. +NOTE that the operation of this function must always succeed, +we cannot reverse it: therefore enough free disk space must be +guaranteed to be available before this function is called. +@return inserted record */ +UNIV_INTERN +rec_t* +btr_root_raise_and_insert( +/*======================*/ + btr_cur_t* cursor, /*!< in: cursor at which to insert: must be + on the root page; when the function returns, + the cursor is positioned on the predecessor + of the inserted record */ + const dtuple_t* tuple, /*!< in: tuple to insert */ + ulint n_ext, /*!< in: number of externally stored columns */ + mtr_t* mtr); /*!< in: mtr */ +/*************************************************************//** +Reorganizes an index page. +IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf +page of a non-clustered index, the caller must update the insert +buffer free bits in the same mini-transaction in such a way that the +modification will be redo-logged. +@return TRUE on success, FALSE on failure */ +UNIV_INTERN +ibool +btr_page_reorganize( +/*================*/ + buf_block_t* block, /*!< in: page to be reorganized */ + dict_index_t* index, /*!< in: record descriptor */ + mtr_t* mtr); /*!< in: mtr */ +/*************************************************************//** +Decides if the page should be split at the convergence point of +inserts converging to left. +@return TRUE if split recommended */ +UNIV_INTERN +ibool +btr_page_get_split_rec_to_left( +/*===========================*/ + btr_cur_t* cursor, /*!< in: cursor at which to insert */ + rec_t** split_rec);/*!< out: if split recommended, + the first record on upper half page, + or NULL if tuple should be first */ +/*************************************************************//** +Decides if the page should be split at the convergence point of +inserts converging to right. +@return TRUE if split recommended */ +UNIV_INTERN +ibool +btr_page_get_split_rec_to_right( +/*============================*/ + btr_cur_t* cursor, /*!< in: cursor at which to insert */ + rec_t** split_rec);/*!< out: if split recommended, + the first record on upper half page, + or NULL if tuple should be first */ +/*************************************************************//** +Splits an index page to halves and inserts the tuple. It is assumed +that mtr holds an x-latch to the index tree. NOTE: the tree x-latch is +released within this function! NOTE that the operation of this +function must always succeed, we cannot reverse it: therefore enough +free disk space (2 pages) must be guaranteed to be available before +this function is called. + +@return inserted record */ +UNIV_INTERN +rec_t* +btr_page_split_and_insert( +/*======================*/ + btr_cur_t* cursor, /*!< in: cursor at which to insert; when the + function returns, the cursor is positioned + on the predecessor of the inserted record */ + const dtuple_t* tuple, /*!< in: tuple to insert */ + ulint n_ext, /*!< in: number of externally stored columns */ + mtr_t* mtr); /*!< in: mtr */ +/*******************************************************//** +Inserts a data tuple to a tree on a non-leaf level. It is assumed +that mtr holds an x-latch on the tree. */ +UNIV_INTERN +void +btr_insert_on_non_leaf_level( +/*=========================*/ + dict_index_t* index, /*!< in: index */ + ulint level, /*!< in: level, must be > 0 */ + dtuple_t* tuple, /*!< in: the record to be inserted */ + mtr_t* mtr); /*!< in: mtr */ +#endif /* !UNIV_HOTBACKUP */ +/****************************************************************//** +Sets a record as the predefined minimum record. */ +UNIV_INTERN +void +btr_set_min_rec_mark( +/*=================*/ + rec_t* rec, /*!< in/out: record */ + mtr_t* mtr); /*!< in: mtr */ +#ifndef UNIV_HOTBACKUP +/*************************************************************//** +Deletes on the upper level the node pointer to a page. */ +UNIV_INTERN +void +btr_node_ptr_delete( +/*================*/ + dict_index_t* index, /*!< in: index tree */ + buf_block_t* block, /*!< in: page whose node pointer is deleted */ + mtr_t* mtr); /*!< in: mtr */ +#ifdef UNIV_DEBUG +/************************************************************//** +Checks that the node pointer to a page is appropriate. +@return TRUE */ +UNIV_INTERN +ibool +btr_check_node_ptr( +/*===============*/ + dict_index_t* index, /*!< in: index tree */ + buf_block_t* block, /*!< in: index page */ + mtr_t* mtr); /*!< in: mtr */ +#endif /* UNIV_DEBUG */ +/*************************************************************//** +Tries to merge the page first to the left immediate brother if such a +brother exists, and the node pointers to the current page and to the +brother reside on the same page. If the left brother does not satisfy these +conditions, looks at the right brother. If the page is the only one on that +level lifts the records of the page to the father page, thus reducing the +tree height. It is assumed that mtr holds an x-latch on the tree and on the +page. If cursor is on the leaf level, mtr must also hold x-latches to +the brothers, if they exist. +@return TRUE on success */ +UNIV_INTERN +ibool +btr_compress( +/*=========*/ + btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /*!< in: mtr */ +/*************************************************************//** +Discards a page from a B-tree. This is used to remove the last record from +a B-tree page: the whole page must be removed at the same time. This cannot +be used for the root page, which is allowed to be empty. */ +UNIV_INTERN +void +btr_discard_page( +/*=============*/ + btr_cur_t* cursor, /*!< in: cursor on the page to discard: not on + the root page */ + mtr_t* mtr); /*!< in: mtr */ +#endif /* !UNIV_HOTBACKUP */ +/****************************************************************//** +Parses the redo log record for setting an index record as the predefined +minimum record. +@return end of log record or NULL */ +UNIV_INTERN +byte* +btr_parse_set_min_rec_mark( +/*=======================*/ + byte* ptr, /*!< in: buffer */ + byte* end_ptr,/*!< in: buffer end */ + ulint comp, /*!< in: nonzero=compact page format */ + page_t* page, /*!< in: page or NULL */ + mtr_t* mtr); /*!< in: mtr or NULL */ +/***********************************************************//** +Parses a redo log record of reorganizing a page. +@return end of log record or NULL */ +UNIV_INTERN +byte* +btr_parse_page_reorganize( +/*======================*/ + byte* ptr, /*!< in: buffer */ + byte* end_ptr,/*!< in: buffer end */ + dict_index_t* index, /*!< in: record descriptor */ + buf_block_t* block, /*!< in: page to be reorganized, or NULL */ + mtr_t* mtr); /*!< in: mtr or NULL */ +#ifndef UNIV_HOTBACKUP +/**************************************************************//** +Gets the number of pages in a B-tree. +@return number of pages */ +UNIV_INTERN +ulint +btr_get_size( +/*=========*/ + dict_index_t* index, /*!< in: index */ + ulint flag); /*!< in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ +/**************************************************************//** +Allocates a new file page to be used in an index tree. NOTE: we assume +that the caller has made the reservation for free extents! +@return new allocated block, x-latched; NULL if out of space */ +UNIV_INTERN +buf_block_t* +btr_page_alloc( +/*===========*/ + dict_index_t* index, /*!< in: index tree */ + ulint hint_page_no, /*!< in: hint of a good page */ + byte file_direction, /*!< in: direction where a possible + page split is made */ + ulint level, /*!< in: level where the page is placed + in the tree */ + mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Frees a file page used in an index tree. NOTE: cannot free field external +storage pages because the page must contain info on its level. */ +UNIV_INTERN +void +btr_page_free( +/*==========*/ + dict_index_t* index, /*!< in: index tree */ + buf_block_t* block, /*!< in: block to be freed, x-latched */ + mtr_t* mtr); /*!< in: mtr */ +/**************************************************************//** +Frees a file page used in an index tree. Can be used also to BLOB +external storage pages, because the page level 0 can be given as an +argument. */ +UNIV_INTERN +void +btr_page_free_low( +/*==============*/ + dict_index_t* index, /*!< in: index tree */ + buf_block_t* block, /*!< in: block to be freed, x-latched */ + ulint level, /*!< in: page level */ + mtr_t* mtr); /*!< in: mtr */ +#ifdef UNIV_BTR_PRINT +/*************************************************************//** +Prints size info of a B-tree. */ +UNIV_INTERN +void +btr_print_size( +/*===========*/ + dict_index_t* index); /*!< in: index tree */ +/**************************************************************//** +Prints directories and other info of all nodes in the index. */ +UNIV_INTERN +void +btr_print_index( +/*============*/ + dict_index_t* index, /*!< in: index */ + ulint width); /*!< in: print this many entries from start + and end */ +#endif /* UNIV_BTR_PRINT */ +/************************************************************//** +Checks the size and number of fields in a record based on the definition of +the index. +@return TRUE if ok */ +UNIV_INTERN +ibool +btr_index_rec_validate( +/*===================*/ + const rec_t* rec, /*!< in: index record */ + const dict_index_t* index, /*!< in: index */ + ibool dump_on_error); /*!< in: TRUE if the function + should print hex dump of record + and page on error */ +/**************************************************************//** +Checks the consistency of an index tree. +@return TRUE if ok */ +UNIV_INTERN +ibool +btr_validate_index( +/*===============*/ + dict_index_t* index, /*!< in: index */ + trx_t* trx); /*!< in: transaction or NULL */ + +#define BTR_N_LEAF_PAGES 1 +#define BTR_TOTAL_SIZE 2 +#endif /* !UNIV_HOTBACKUP */ + +#ifndef UNIV_NONINL +#include "btr0btr.ic" +#endif + +#endif |