diff options
author | unknown <knielsen@knielsen-hq.org> | 2009-06-09 13:16:11 +0200 |
---|---|---|
committer | unknown <knielsen@knielsen-hq.org> | 2009-06-09 13:16:11 +0200 |
commit | a6b7f71329ceb7d0188572f494b5d1a1f0461fc5 (patch) | |
tree | d7e62c1af5118cd3ec9346de436569e907fcc51d /storage/xtradb/include | |
parent | b125770aaadd09e839ad9211047e88095984308b (diff) | |
parent | 107072563d771422c9bbb9aeeedce8ae19c5b838 (diff) | |
download | mariadb-git-a6b7f71329ceb7d0188572f494b5d1a1f0461fc5.tar.gz |
Import Percona XtraDB into the MariaDB source tree.
Diffstat (limited to 'storage/xtradb/include')
192 files changed, 50154 insertions, 0 deletions
diff --git a/storage/xtradb/include/btr0btr.h b/storage/xtradb/include/btr0btr.h new file mode 100644 index 00000000000..298942bd542 --- /dev/null +++ b/storage/xtradb/include/btr0btr.h @@ -0,0 +1,494 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The B-tree + +Created 6/2/1994 Heikki Tuuri +*******************************************************/ + +#ifndef btr0btr_h +#define btr0btr_h + +#include "univ.i" + +#include "dict0dict.h" +#include "data0data.h" +#include "page0cur.h" +#include "mtr0mtr.h" +#include "btr0types.h" + +/* Maximum record size which can be stored on a page, without using the +special big record storage structure */ + +#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200) + +/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as +such; none of the tree operations avoid producing trees bigger than this. It +is instead a "max depth that other code must work with", useful for e.g. +fixed-size arrays that must store some information about each level in a +tree. In other words: if a B-tree with bigger depth than this is +encountered, it is not acceptable for it to lead to mysterious memory +corruption, but it is acceptable for the program to die with a clear assert +failure. */ +#define BTR_MAX_LEVELS 100 + +/* Latching modes for btr_cur_search_to_nth_level(). */ +#define BTR_SEARCH_LEAF RW_S_LATCH +#define BTR_MODIFY_LEAF RW_X_LATCH +#define BTR_NO_LATCHES RW_NO_LATCH +#define BTR_MODIFY_TREE 33 +#define BTR_CONT_MODIFY_TREE 34 +#define BTR_SEARCH_PREV 35 +#define BTR_MODIFY_PREV 36 + +/* If this is ORed to the latch mode, it means that the search tuple will be +inserted to the index, at the searched position */ +#define BTR_INSERT 512 + +/* This flag ORed to latch mode says that we do the search in query +optimization */ +#define BTR_ESTIMATE 1024 + +/* This flag ORed to latch mode says that we can ignore possible +UNIQUE definition on secondary indexes when we decide if we can use the +insert buffer to speed up inserts */ +#define BTR_IGNORE_SEC_UNIQUE 2048 + +/****************************************************************** +Gets the root node of a tree and x-latches it. */ +UNIV_INTERN +page_t* +btr_root_get( +/*=========*/ + /* out: root page, x-latched */ + dict_index_t* index, /* in: index tree */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +buf_block_t* +btr_block_get( +/*==========*/ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + ulint mode, /* in: latch mode */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +page_t* +btr_page_get( +/*=========*/ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + ulint mode, /* in: latch mode */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Gets the index id field of a page. */ +UNIV_INLINE +dulint +btr_page_get_index_id( +/*==================*/ + /* out: index id */ + const page_t* page); /* in: index page */ +/************************************************************ +Gets the node level field in an index page. */ +UNIV_INLINE +ulint +btr_page_get_level_low( +/*===================*/ + /* out: level, leaf level == 0 */ + const page_t* page); /* in: index page */ +/************************************************************ +Gets the node level field in an index page. */ +UNIV_INLINE +ulint +btr_page_get_level( +/*===============*/ + /* out: level, leaf level == 0 */ + const page_t* page, /* in: index page */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Gets the next index page number. */ +UNIV_INLINE +ulint +btr_page_get_next( +/*==============*/ + /* out: next page number */ + const page_t* page, /* in: index page */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Gets the previous index page number. */ +UNIV_INLINE +ulint +btr_page_get_prev( +/*==============*/ + /* out: prev page number */ + const page_t* page, /* in: index page */ + mtr_t* mtr); /* in: mini-transaction handle */ +/***************************************************************** +Gets pointer to the previous user record in the tree. It is assumed +that the caller has appropriate latches on the page and its neighbor. */ +UNIV_INTERN +rec_t* +btr_get_prev_user_rec( +/*==================*/ + /* out: previous user record, NULL if there is none */ + rec_t* rec, /* in: record on leaf level */ + mtr_t* mtr); /* in: mtr holding a latch on the page, and if + needed, also to the previous page */ +/***************************************************************** +Gets pointer to the next user record in the tree. It is assumed +that the caller has appropriate latches on the page and its neighbor. */ +UNIV_INTERN +rec_t* +btr_get_next_user_rec( +/*==================*/ + /* out: next user record, NULL if there is none */ + rec_t* rec, /* in: record on leaf level */ + mtr_t* mtr); /* in: mtr holding a latch on the page, and if + needed, also to the next page */ +/****************************************************************** +Releases the latch on a leaf page and bufferunfixes it. */ +UNIV_INLINE +void +btr_leaf_page_release( +/*==================*/ + buf_block_t* block, /* in: buffer block */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF or + BTR_MODIFY_LEAF */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Gets the child node file address in a node pointer. */ +UNIV_INLINE +ulint +btr_node_ptr_get_child_page_no( +/*===========================*/ + /* out: child node address */ + const rec_t* rec, /* in: node pointer record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/**************************************************************** +Creates the root node for a new index tree. */ +UNIV_INTERN +ulint +btr_create( +/*=======*/ + /* out: page number of the created root, + FIL_NULL if did not succeed */ + ulint type, /* in: type of the index */ + ulint space, /* in: space where created */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + dulint index_id,/* in: index id */ + dict_index_t* index, /* in: index */ + mtr_t* mtr); /* in: mini-transaction handle */ +/**************************************************************** +Frees a B-tree except the root page, which MUST be freed after this +by calling btr_free_root. */ +UNIV_INTERN +void +btr_free_but_not_root( +/*==================*/ + ulint space, /* in: space where created */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint root_page_no); /* in: root page number */ +/**************************************************************** +Frees the B-tree root page. Other tree MUST already have been freed. */ +UNIV_INTERN +void +btr_free_root( +/*==========*/ + ulint space, /* in: space where created */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint root_page_no, /* in: root page number */ + mtr_t* mtr); /* in: a mini-transaction which has already + been started */ +/***************************************************************** +Makes tree one level higher by splitting the root, and inserts +the tuple. It is assumed that mtr contains an x-latch on the tree. +NOTE that the operation of this function must always succeed, +we cannot reverse it: therefore enough free disk space must be +guaranteed to be available before this function is called. */ +UNIV_INTERN +rec_t* +btr_root_raise_and_insert( +/*======================*/ + /* out: inserted record */ + btr_cur_t* cursor, /* in: cursor at which to insert: must be + on the root page; when the function returns, + the cursor is positioned on the predecessor + of the inserted record */ + const dtuple_t* tuple, /* in: tuple to insert */ + ulint n_ext, /* in: number of externally stored columns */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Reorganizes an index page. +IMPORTANT: if btr_page_reorganize() is invoked on a compressed leaf +page of a non-clustered index, the caller must update the insert +buffer free bits in the same mini-transaction in such a way that the +modification will be redo-logged. */ +UNIV_INTERN +ibool +btr_page_reorganize( +/*================*/ + /* out: TRUE on success, FALSE on failure */ + buf_block_t* block, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Decides if the page should be split at the convergence point of +inserts converging to left. */ +UNIV_INTERN +ibool +btr_page_get_split_rec_to_left( +/*===========================*/ + /* out: TRUE if split recommended */ + btr_cur_t* cursor, /* in: cursor at which to insert */ + rec_t** split_rec);/* out: if split recommended, + the first record on upper half page, + or NULL if tuple should be first */ +/***************************************************************** +Decides if the page should be split at the convergence point of +inserts converging to right. */ +UNIV_INTERN +ibool +btr_page_get_split_rec_to_right( +/*============================*/ + /* out: TRUE if split recommended */ + btr_cur_t* cursor, /* in: cursor at which to insert */ + rec_t** split_rec);/* out: if split recommended, + the first record on upper half page, + or NULL if tuple should be first */ +/***************************************************************** +Splits an index page to halves and inserts the tuple. It is assumed +that mtr holds an x-latch to the index tree. NOTE: the tree x-latch +is released within this function! NOTE that the operation of this +function must always succeed, we cannot reverse it: therefore +enough free disk space must be guaranteed to be available before +this function is called. */ +UNIV_INTERN +rec_t* +btr_page_split_and_insert( +/*======================*/ + /* out: inserted record; NOTE: the tree + x-latch is released! NOTE: 2 free disk + pages must be available! */ + btr_cur_t* cursor, /* in: cursor at which to insert; when the + function returns, the cursor is positioned + on the predecessor of the inserted record */ + const dtuple_t* tuple, /* in: tuple to insert */ + ulint n_ext, /* in: number of externally stored columns */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************** +Inserts a data tuple to a tree on a non-leaf level. It is assumed +that mtr holds an x-latch on the tree. */ +UNIV_INTERN +void +btr_insert_on_non_leaf_level( +/*=========================*/ + dict_index_t* index, /* in: index */ + ulint level, /* in: level, must be > 0 */ + dtuple_t* tuple, /* in: the record to be inserted */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************** +Sets a record as the predefined minimum record. */ +UNIV_INTERN +void +btr_set_min_rec_mark( +/*=================*/ + rec_t* rec, /* in/out: record */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Deletes on the upper level the node pointer to a page. */ +UNIV_INTERN +void +btr_node_ptr_delete( +/*================*/ + dict_index_t* index, /* in: index tree */ + buf_block_t* block, /* in: page whose node pointer is deleted */ + mtr_t* mtr); /* in: mtr */ +#ifdef UNIV_DEBUG +/**************************************************************** +Checks that the node pointer to a page is appropriate. */ +UNIV_INTERN +ibool +btr_check_node_ptr( +/*===============*/ + /* out: TRUE */ + dict_index_t* index, /* in: index tree */ + buf_block_t* block, /* in: index page */ + mtr_t* mtr); /* in: mtr */ +#endif /* UNIV_DEBUG */ +/***************************************************************** +Tries to merge the page first to the left immediate brother if such a +brother exists, and the node pointers to the current page and to the +brother reside on the same page. If the left brother does not satisfy these +conditions, looks at the right brother. If the page is the only one on that +level lifts the records of the page to the father page, thus reducing the +tree height. It is assumed that mtr holds an x-latch on the tree and on the +page. If cursor is on the leaf level, mtr must also hold x-latches to +the brothers, if they exist. */ +UNIV_INTERN +ibool +btr_compress( +/*=========*/ + /* out: TRUE on success */ + btr_cur_t* cursor, /* in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Discards a page from a B-tree. This is used to remove the last record from +a B-tree page: the whole page must be removed at the same time. This cannot +be used for the root page, which is allowed to be empty. */ +UNIV_INTERN +void +btr_discard_page( +/*=============*/ + btr_cur_t* cursor, /* in: cursor on the page to discard: not on + the root page */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************** +Parses the redo log record for setting an index record as the predefined +minimum record. */ +UNIV_INTERN +byte* +btr_parse_set_min_rec_mark( +/*=======================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + ulint comp, /* in: nonzero=compact page format */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +/*************************************************************** +Parses a redo log record of reorganizing a page. */ +UNIV_INTERN +byte* +btr_parse_page_reorganize( +/*======================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + buf_block_t* block, /* in: page to be reorganized, or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +/****************************************************************** +Gets the number of pages in a B-tree. */ +UNIV_INTERN +ulint +btr_get_size( +/*=========*/ + /* out: number of pages */ + dict_index_t* index, /* in: index */ + ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ +/****************************************************************** +Allocates a new file page to be used in an index tree. NOTE: we assume +that the caller has made the reservation for free extents! */ +UNIV_INTERN +buf_block_t* +btr_page_alloc( +/*===========*/ + /* out: new allocated block, x-latched; + NULL if out of space */ + dict_index_t* index, /* in: index tree */ + ulint hint_page_no, /* in: hint of a good page */ + byte file_direction, /* in: direction where a possible + page split is made */ + ulint level, /* in: level where the page is placed + in the tree */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Frees a file page used in an index tree. NOTE: cannot free field external +storage pages because the page must contain info on its level. */ +UNIV_INTERN +void +btr_page_free( +/*==========*/ + dict_index_t* index, /* in: index tree */ + buf_block_t* block, /* in: block to be freed, x-latched */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Frees a file page used in an index tree. Can be used also to BLOB +external storage pages, because the page level 0 can be given as an +argument. */ +UNIV_INTERN +void +btr_page_free_low( +/*==============*/ + dict_index_t* index, /* in: index tree */ + buf_block_t* block, /* in: block to be freed, x-latched */ + ulint level, /* in: page level */ + mtr_t* mtr); /* in: mtr */ +#ifdef UNIV_BTR_PRINT +/***************************************************************** +Prints size info of a B-tree. */ +UNIV_INTERN +void +btr_print_size( +/*===========*/ + dict_index_t* index); /* in: index tree */ +/****************************************************************** +Prints directories and other info of all nodes in the index. */ +UNIV_INTERN +void +btr_print_index( +/*============*/ + dict_index_t* index, /* in: index */ + ulint width); /* in: print this many entries from start + and end */ +#endif /* UNIV_BTR_PRINT */ +/**************************************************************** +Checks the size and number of fields in a record based on the definition of +the index. */ +UNIV_INTERN +ibool +btr_index_rec_validate( +/*===================*/ + /* out: TRUE if ok */ + const rec_t* rec, /* in: index record */ + const dict_index_t* index, /* in: index */ + ibool dump_on_error); /* in: TRUE if the function + should print hex dump of record + and page on error */ +/****************************************************************** +Checks the consistency of an index tree. */ +UNIV_INTERN +ibool +btr_validate_index( +/*===============*/ + /* out: TRUE if ok */ + dict_index_t* index, /* in: index */ + trx_t* trx); /* in: transaction or NULL */ + +#define BTR_N_LEAF_PAGES 1 +#define BTR_TOTAL_SIZE 2 + +#ifndef UNIV_NONINL +#include "btr0btr.ic" +#endif + +#endif diff --git a/storage/xtradb/include/btr0btr.ic b/storage/xtradb/include/btr0btr.ic new file mode 100644 index 00000000000..a8d934ecc87 --- /dev/null +++ b/storage/xtradb/include/btr0btr.ic @@ -0,0 +1,301 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The B-tree + +Created 6/2/1994 Heikki Tuuri +*******************************************************/ + +#include "mach0data.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "page0zip.h" + +#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */ + +/****************************************************************** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +buf_block_t* +btr_block_get( +/*==========*/ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + ulint mode, /* in: latch mode */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block; + + block = buf_page_get(space, zip_size, page_no, mode, mtr); + + if (mode != RW_NO_LATCH) { + + buf_block_dbg_add_level(block, SYNC_TREE_NODE); + } + + return(block); +} + +/****************************************************************** +Gets a buffer page and declares its latching order level. */ +UNIV_INLINE +page_t* +btr_page_get( +/*=========*/ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + ulint mode, /* in: latch mode */ + mtr_t* mtr) /* in: mtr */ +{ + return(buf_block_get_frame(btr_block_get(space, zip_size, page_no, + mode, mtr))); +} + +/****************************************************************** +Sets the index id field of a page. */ +UNIV_INLINE +void +btr_page_set_index_id( +/*==================*/ + page_t* page, /* in: page to be created */ + page_zip_des_t* page_zip,/* in: compressed page whose uncompressed + part will be updated, or NULL */ + dulint id, /* in: index id */ + mtr_t* mtr) /* in: mtr */ +{ + if (UNIV_LIKELY_NULL(page_zip)) { + mach_write_to_8(page + (PAGE_HEADER + PAGE_INDEX_ID), id); + page_zip_write_header(page_zip, + page + (PAGE_HEADER + PAGE_INDEX_ID), + 8, mtr); + } else { + mlog_write_dulint(page + (PAGE_HEADER + PAGE_INDEX_ID), + id, mtr); + } +} + +/****************************************************************** +Gets the index id field of a page. */ +UNIV_INLINE +dulint +btr_page_get_index_id( +/*==================*/ + /* out: index id */ + const page_t* page) /* in: index page */ +{ + return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)); +} + +/************************************************************ +Gets the node level field in an index page. */ +UNIV_INLINE +ulint +btr_page_get_level_low( +/*===================*/ + /* out: level, leaf level == 0 */ + const page_t* page) /* in: index page */ +{ + ulint level; + + ut_ad(page); + + level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL); + + ut_ad(level <= BTR_MAX_NODE_LEVEL); + + return(level); +} + +/************************************************************ +Gets the node level field in an index page. */ +UNIV_INLINE +ulint +btr_page_get_level( +/*===============*/ + /* out: level, leaf level == 0 */ + const page_t* page, /* in: index page */ + mtr_t* mtr __attribute__((unused))) + /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + + return(btr_page_get_level_low(page)); +} + +/************************************************************ +Sets the node level field in an index page. */ +UNIV_INLINE +void +btr_page_set_level( +/*===============*/ + page_t* page, /* in: index page */ + page_zip_des_t* page_zip,/* in: compressed page whose uncompressed + part will be updated, or NULL */ + ulint level, /* in: level, leaf level == 0 */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + ut_ad(level <= BTR_MAX_NODE_LEVEL); + + if (UNIV_LIKELY_NULL(page_zip)) { + mach_write_to_2(page + (PAGE_HEADER + PAGE_LEVEL), level); + page_zip_write_header(page_zip, + page + (PAGE_HEADER + PAGE_LEVEL), + 2, mtr); + } else { + mlog_write_ulint(page + (PAGE_HEADER + PAGE_LEVEL), level, + MLOG_2BYTES, mtr); + } +} + +/************************************************************ +Gets the next index page number. */ +UNIV_INLINE +ulint +btr_page_get_next( +/*==============*/ + /* out: next page number */ + const page_t* page, /* in: index page */ + mtr_t* mtr __attribute__((unused))) + /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX) + || mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_S_FIX)); + + return(mach_read_from_4(page + FIL_PAGE_NEXT)); +} + +/************************************************************ +Sets the next index page field. */ +UNIV_INLINE +void +btr_page_set_next( +/*==============*/ + page_t* page, /* in: index page */ + page_zip_des_t* page_zip,/* in: compressed page whose uncompressed + part will be updated, or NULL */ + ulint next, /* in: next page number */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + + if (UNIV_LIKELY_NULL(page_zip)) { + mach_write_to_4(page + FIL_PAGE_NEXT, next); + page_zip_write_header(page_zip, page + FIL_PAGE_NEXT, 4, mtr); + } else { + mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr); + } +} + +/************************************************************ +Gets the previous index page number. */ +UNIV_INLINE +ulint +btr_page_get_prev( +/*==============*/ + /* out: prev page number */ + const page_t* page, /* in: index page */ + mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + + return(mach_read_from_4(page + FIL_PAGE_PREV)); +} + +/************************************************************ +Sets the previous index page field. */ +UNIV_INLINE +void +btr_page_set_prev( +/*==============*/ + page_t* page, /* in: index page */ + page_zip_des_t* page_zip,/* in: compressed page whose uncompressed + part will be updated, or NULL */ + ulint prev, /* in: previous page number */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ut_ad(page && mtr); + + if (UNIV_LIKELY_NULL(page_zip)) { + mach_write_to_4(page + FIL_PAGE_PREV, prev); + page_zip_write_header(page_zip, page + FIL_PAGE_PREV, 4, mtr); + } else { + mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr); + } +} + +/****************************************************************** +Gets the child node file address in a node pointer. */ +UNIV_INLINE +ulint +btr_node_ptr_get_child_page_no( +/*===========================*/ + /* out: child node address */ + const rec_t* rec, /* in: node pointer record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + const byte* field; + ulint len; + ulint page_no; + + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); + + /* The child address is in the last field */ + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); + + ut_ad(len == 4); + + page_no = mach_read_from_4(field); + + if (UNIV_UNLIKELY(page_no == 0)) { + fprintf(stderr, + "InnoDB: a nonsensical page number 0" + " in a node ptr record at offset %lu\n", + (ulong) page_offset(rec)); + buf_page_print(page_align(rec), 0); + } + + return(page_no); +} + +/****************************************************************** +Releases the latches on a leaf page and bufferunfixes it. */ +UNIV_INLINE +void +btr_leaf_page_release( +/*==================*/ + buf_block_t* block, /* in: buffer block */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF or + BTR_MODIFY_LEAF */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(latch_mode == BTR_SEARCH_LEAF || latch_mode == BTR_MODIFY_LEAF); + ut_ad(!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)); + + mtr_memo_release(mtr, block, + latch_mode == BTR_SEARCH_LEAF + ? MTR_MEMO_PAGE_S_FIX + : MTR_MEMO_PAGE_X_FIX); +} diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h new file mode 100644 index 00000000000..c3a478c4bb7 --- /dev/null +++ b/storage/xtradb/include/btr0cur.h @@ -0,0 +1,742 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The index tree cursor + +Created 10/16/1994 Heikki Tuuri +*******************************************************/ + +#ifndef btr0cur_h +#define btr0cur_h + +#include "univ.i" +#include "dict0dict.h" +#include "page0cur.h" +#include "btr0types.h" +#include "que0types.h" +#include "row0types.h" +#include "ha0ha.h" + +/* Mode flags for btr_cur operations; these can be ORed */ +#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */ +#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ +#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the + update vector or inserted entry */ + +#define BTR_CUR_ADAPT +#define BTR_CUR_HASH_ADAPT + +#ifdef UNIV_DEBUG +/************************************************************* +Returns the page cursor component of a tree cursor. */ +UNIV_INLINE +page_cur_t* +btr_cur_get_page_cur( +/*=================*/ + /* out: pointer to page cursor + component */ + const btr_cur_t* cursor);/* in: tree cursor */ +#else /* UNIV_DEBUG */ +# define btr_cur_get_page_cur(cursor) (&(cursor)->page_cur) +#endif /* UNIV_DEBUG */ +/************************************************************* +Returns the buffer block on which the tree cursor is positioned. */ +UNIV_INLINE +buf_block_t* +btr_cur_get_block( +/*==============*/ + /* out: pointer to buffer block */ + btr_cur_t* cursor);/* in: tree cursor */ +/************************************************************* +Returns the record pointer of a tree cursor. */ +UNIV_INLINE +rec_t* +btr_cur_get_rec( +/*============*/ + /* out: pointer to record */ + btr_cur_t* cursor);/* in: tree cursor */ +/************************************************************* +Returns the compressed page on which the tree cursor is positioned. */ +UNIV_INLINE +page_zip_des_t* +btr_cur_get_page_zip( +/*=================*/ + /* out: pointer to compressed page, + or NULL if the page is not compressed */ + btr_cur_t* cursor);/* in: tree cursor */ +/************************************************************* +Invalidates a tree cursor by setting record pointer to NULL. */ +UNIV_INLINE +void +btr_cur_invalidate( +/*===============*/ + btr_cur_t* cursor);/* in: tree cursor */ +/************************************************************* +Returns the page of a tree cursor. */ +UNIV_INLINE +page_t* +btr_cur_get_page( +/*=============*/ + /* out: pointer to page */ + btr_cur_t* cursor);/* in: tree cursor */ +/************************************************************* +Returns the index of a cursor. */ +UNIV_INLINE +dict_index_t* +btr_cur_get_index( +/*==============*/ + /* out: index */ + btr_cur_t* cursor);/* in: B-tree cursor */ +/************************************************************* +Positions a tree cursor at a given record. */ +UNIV_INLINE +void +btr_cur_position( +/*=============*/ + dict_index_t* index, /* in: index */ + rec_t* rec, /* in: record in tree */ + buf_block_t* block, /* in: buffer block of rec */ + btr_cur_t* cursor);/* in: cursor */ +/************************************************************************ +Searches an index tree and positions a tree cursor on a given level. +NOTE: n_fields_cmp in tuple must be set so that it cannot be compared +to node pointer page number fields on the upper levels of the tree! +Note that if mode is PAGE_CUR_LE, which is used in inserts, then +cursor->up_match and cursor->low_match both will have sensible values. +If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */ +UNIV_INTERN +void +btr_cur_search_to_nth_level( +/*========================*/ + dict_index_t* index, /* in: index */ + ulint level, /* in: the tree level of search */ + const dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in + tuple must be set so that it cannot get + compared to the node ptr page number field! */ + ulint mode, /* in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be PAGE_CUR_LE, + not PAGE_CUR_GE, as the latter may end up on + the previous page of the record! Inserts + should always be made using PAGE_CUR_LE to + search the position! */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with + BTR_INSERT and BTR_ESTIMATE; + cursor->left_block is used to store a pointer + to the left neighbor page, in the cases + BTR_SEARCH_PREV and BTR_MODIFY_PREV; + NOTE that if has_search_latch + is != 0, we maybe do not have a latch set + on the cursor page, we assume + the caller uses his search latch + to protect the record! */ + btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is + s- or x-latched, but see also above! */ + ulint has_search_latch,/* in: latch mode the caller + currently has on btr_search_latch: + RW_S_LATCH, or 0 */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Opens a cursor at either end of an index. */ +UNIV_INTERN +void +btr_cur_open_at_index_side( +/*=======================*/ + ibool from_left, /* in: TRUE if open to the low end, + FALSE if to the high end */ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: latch mode */ + btr_cur_t* cursor, /* in: cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Positions a cursor at a randomly chosen position within a B-tree. */ +UNIV_INTERN +void +btr_cur_open_at_rnd_pos( +/*====================*/ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ + btr_cur_t* cursor, /* in/out: B-tree cursor */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Tries to perform an insert to a page in an index tree, next to cursor. +It is assumed that mtr holds an x-latch on the page. The operation does +not succeed if there is too little space on the page. If there is just +one record on the page, the insert will always succeed; this is to +prevent trying to split a page with just one record. */ +UNIV_INTERN +ulint +btr_cur_optimistic_insert( +/*======================*/ + /* out: DB_SUCCESS, DB_WAIT_LOCK, + DB_FAIL, or error number */ + ulint flags, /* in: undo logging and locking flags: if not + zero, the parameters index and thr should be + specified */ + btr_cur_t* cursor, /* in: cursor on page after which to insert; + cursor stays valid */ + dtuple_t* entry, /* in/out: entry to insert */ + rec_t** rec, /* out: pointer to inserted record if + succeed */ + big_rec_t** big_rec,/* out: big rec vector whose fields have to + be stored externally by the caller, or + NULL */ + ulint n_ext, /* in: number of externally stored columns */ + que_thr_t* thr, /* in: query thread or NULL */ + mtr_t* mtr); /* in: mtr; if this function returns + DB_SUCCESS on a leaf page of a secondary + index in a compressed tablespace, the + mtr must be committed before latching + any further pages */ +/***************************************************************** +Performs an insert on a page of an index tree. It is assumed that mtr +holds an x-latch on the tree and on the cursor page. If the insert is +made on the leaf level, to avoid deadlocks, mtr must also own x-latches +to brothers of page, if those brothers exist. */ +UNIV_INTERN +ulint +btr_cur_pessimistic_insert( +/*=======================*/ + /* out: DB_SUCCESS or error number */ + ulint flags, /* in: undo logging and locking flags: if not + zero, the parameter thr should be + specified; if no undo logging is specified, + then the caller must have reserved enough + free extents in the file space so that the + insertion will certainly succeed */ + btr_cur_t* cursor, /* in: cursor after which to insert; + cursor stays valid */ + dtuple_t* entry, /* in/out: entry to insert */ + rec_t** rec, /* out: pointer to inserted record if + succeed */ + big_rec_t** big_rec,/* out: big rec vector whose fields have to + be stored externally by the caller, or + NULL */ + ulint n_ext, /* in: number of externally stored columns */ + que_thr_t* thr, /* in: query thread or NULL */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Updates a record when the update causes no size changes in its fields. */ +UNIV_INTERN +ulint +btr_cur_update_in_place( +/*====================*/ + /* out: DB_SUCCESS or error number */ + ulint flags, /* in: undo logging and locking flags */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + const upd_t* update, /* in: update vector */ + ulint cmpl_info,/* in: compiler info on secondary index + updates */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr; must be committed before + latching any further pages */ +/***************************************************************** +Tries to update a record on a page in an index tree. It is assumed that mtr +holds an x-latch on the page. The operation does not succeed if there is too +little space on the page or if the update would result in too empty a page, +so that tree compression is recommended. */ +UNIV_INTERN +ulint +btr_cur_optimistic_update( +/*======================*/ + /* out: DB_SUCCESS, or DB_OVERFLOW if the + updated record does not fit, DB_UNDERFLOW + if the page would become too empty, or + DB_ZIP_OVERFLOW if there is not enough + space left on the compressed page */ + ulint flags, /* in: undo logging and locking flags */ + btr_cur_t* cursor, /* in: cursor on the record to update; + cursor stays valid and positioned on the + same record */ + const upd_t* update, /* in: update vector; this must also + contain trx id and roll ptr fields */ + ulint cmpl_info,/* in: compiler info on secondary index + updates */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr; must be committed before + latching any further pages */ +/***************************************************************** +Performs an update of a record on a page of a tree. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. If the +update is made on the leaf level, to avoid deadlocks, mtr must also +own x-latches to brothers of page, if those brothers exist. */ +UNIV_INTERN +ulint +btr_cur_pessimistic_update( +/*=======================*/ + /* out: DB_SUCCESS or error code */ + ulint flags, /* in: undo logging, locking, and rollback + flags */ + btr_cur_t* cursor, /* in: cursor on the record to update */ + mem_heap_t** heap, /* in/out: pointer to memory heap, or NULL */ + big_rec_t** big_rec,/* out: big rec vector whose fields have to + be stored externally by the caller, or NULL */ + const upd_t* update, /* in: update vector; this is allowed also + contain trx id and roll ptr fields, but + the values in update vector have no effect */ + ulint cmpl_info,/* in: compiler info on secondary index + updates */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr; must be committed before + latching any further pages */ +/*************************************************************** +Marks a clustered index record deleted. Writes an undo log record to +undo log on this delete marking. Writes in the trx id field the id +of the deleting transaction, and in the roll ptr field pointer to the +undo log record created. */ +UNIV_INTERN +ulint +btr_cur_del_mark_set_clust_rec( +/*===========================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + number */ + ulint flags, /* in: undo logging and locking flags */ + btr_cur_t* cursor, /* in: cursor */ + ibool val, /* in: value to set */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************** +Sets a secondary index record delete mark to TRUE or FALSE. */ +UNIV_INTERN +ulint +btr_cur_del_mark_set_sec_rec( +/*=========================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, or error + number */ + ulint flags, /* in: locking flag */ + btr_cur_t* cursor, /* in: cursor */ + ibool val, /* in: value to set */ + que_thr_t* thr, /* in: query thread */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************** +Clear a secondary index record's delete mark. This function is only +used by the insert buffer insert merge mechanism. */ +UNIV_INTERN +void +btr_cur_del_unmark_for_ibuf( +/*========================*/ + rec_t* rec, /* in/out: record to delete unmark */ + page_zip_des_t* page_zip, /* in/out: compressed page + corresponding to rec, or NULL + when the tablespace is + uncompressed */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Tries to compress a page of the tree if it seems useful. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. To avoid +deadlocks, mtr must also own x-latches to brothers of page, if those +brothers exist. NOTE: it is assumed that the caller has reserved enough +free extents so that the compression will always succeed if done! */ +UNIV_INTERN +ibool +btr_cur_compress_if_useful( +/*=======================*/ + /* out: TRUE if compression occurred */ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr); /* in: mtr */ +/*********************************************************** +Removes the record on which the tree cursor is positioned. It is assumed +that the mtr has an x-latch on the page where the cursor is positioned, +but no latch on the whole tree. */ +UNIV_INTERN +ibool +btr_cur_optimistic_delete( +/*======================*/ + /* out: TRUE if success, i.e., the page + did not become too empty */ + btr_cur_t* cursor, /* in: cursor on the record to delete; + cursor stays valid: if deletion succeeds, + on function exit it points to the successor + of the deleted record */ + mtr_t* mtr); /* in: mtr; if this function returns + TRUE on a leaf page of a secondary + index, the mtr must be committed + before latching any further pages */ +/***************************************************************** +Removes the record on which the tree cursor is positioned. Tries +to compress the page if its fillfactor drops below a threshold +or if it is the only page on the level. It is assumed that mtr holds +an x-latch on the tree and on the cursor page. To avoid deadlocks, +mtr must also own x-latches to brothers of page, if those brothers +exist. */ +UNIV_INTERN +ibool +btr_cur_pessimistic_delete( +/*=======================*/ + /* out: TRUE if compression occurred */ + ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; + the latter may occur because we may have + to update node pointers on upper levels, + and in the case of variable length keys + these may actually grow in size */ + ibool has_reserved_extents, /* in: TRUE if the + caller has already reserved enough free + extents so that he knows that the operation + will succeed */ + btr_cur_t* cursor, /* in: cursor on the record to delete; + if compression does not occur, the cursor + stays valid: it points to successor of + deleted record on function exit */ + enum trx_rb_ctx rb_ctx, /* in: rollback context */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************** +Parses a redo log record of updating a record in-place. */ +UNIV_INTERN +byte* +btr_cur_parse_update_in_place( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in/out: page or NULL */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + dict_index_t* index); /* in: index corresponding to page */ +/******************************************************************** +Parses the redo log record for delete marking or unmarking of a clustered +index record. */ +UNIV_INTERN +byte* +btr_cur_parse_del_mark_set_clust_rec( +/*=================================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in/out: page or NULL */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + dict_index_t* index); /* in: index corresponding to page */ +/******************************************************************** +Parses the redo log record for delete marking or unmarking of a secondary +index record. */ +UNIV_INTERN +byte* +btr_cur_parse_del_mark_set_sec_rec( +/*===============================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in/out: page or NULL */ + page_zip_des_t* page_zip);/* in/out: compressed page, or NULL */ +/*********************************************************************** +Estimates the number of rows in a given index range. */ +UNIV_INTERN +ib_int64_t +btr_estimate_n_rows_in_range( +/*=========================*/ + /* out: estimated number of rows */ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple1, /* in: range start, may also be empty tuple */ + ulint mode1, /* in: search mode for range start */ + const dtuple_t* tuple2, /* in: range end, may also be empty tuple */ + ulint mode2); /* in: search mode for range end */ +/*********************************************************************** +Estimates the number of different key values in a given index, for +each n-column prefix of the index where n <= dict_index_get_n_unique(index). +The estimates are stored in the array index->stat_n_diff_key_vals. */ +UNIV_INTERN +void +btr_estimate_number_of_different_key_vals( +/*======================================*/ + dict_index_t* index); /* in: index */ +/*********************************************************************** +Marks not updated extern fields as not-owned by this record. The ownership +is transferred to the updated record which is inserted elsewhere in the +index tree. In purge only the owner of externally stored field is allowed +to free the field. */ +UNIV_INTERN +void +btr_cur_mark_extern_inherited_fields( +/*=================================*/ + page_zip_des_t* page_zip,/* in/out: compressed page whose uncompressed + part will be updated, or NULL */ + rec_t* rec, /* in/out: record in a clustered index */ + dict_index_t* index, /* in: index of the page */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + const upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr, or NULL if not logged */ +/*********************************************************************** +The complement of the previous function: in an update entry may inherit +some externally stored fields from a record. We must mark them as inherited +in entry, so that they are not freed in a rollback. */ +UNIV_INTERN +void +btr_cur_mark_dtuple_inherited_extern( +/*=================================*/ + dtuple_t* entry, /* in/out: updated entry to be + inserted to clustered index */ + const upd_t* update); /* in: update vector */ +/*********************************************************************** +Marks all extern fields in a dtuple as owned by the record. */ +UNIV_INTERN +void +btr_cur_unmark_dtuple_extern_fields( +/*================================*/ + dtuple_t* entry); /* in/out: clustered index entry */ +/*********************************************************************** +Stores the fields in big_rec_vec to the tablespace and puts pointers to +them in rec. The extern flags in rec will have to be set beforehand. +The fields are stored on pages allocated from leaf node +file segment of the index tree. */ +UNIV_INTERN +ulint +btr_store_big_rec_extern_fields( +/*============================*/ + /* out: DB_SUCCESS or error */ + dict_index_t* index, /* in: index of rec; the index tree + MUST be X-latched */ + buf_block_t* rec_block, /* in/out: block containing rec */ + rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index); + the "external storage" flags in offsets + will not correspond to rec when + this function returns */ + big_rec_t* big_rec_vec, /* in: vector containing fields + to be stored externally */ + mtr_t* local_mtr); /* in: mtr containing the latch to + rec and to the tree */ +/*********************************************************************** +Frees the space in an externally stored field to the file space +management if the field in data is owned the externally stored field, +in a rollback we may have the additional condition that the field must +not be inherited. */ +UNIV_INTERN +void +btr_free_externally_stored_field( +/*=============================*/ + dict_index_t* index, /* in: index of the data, the index + tree MUST be X-latched; if the tree + height is 1, then also the root page + must be X-latched! (this is relevant + in the case this function is called + from purge where 'data' is located on + an undo log page, not an index + page) */ + byte* field_ref, /* in/out: field reference */ + const rec_t* rec, /* in: record containing field_ref, for + page_zip_write_blob_ptr(), or NULL */ + const ulint* offsets, /* in: rec_get_offsets(rec, index), + or NULL */ + page_zip_des_t* page_zip, /* in: compressed page corresponding + to rec, or NULL if rec == NULL */ + ulint i, /* in: field number of field_ref; + ignored if rec == NULL */ + enum trx_rb_ctx rb_ctx, /* in: rollback context */ + mtr_t* local_mtr); /* in: mtr containing the latch to + data an an X-latch to the index + tree */ +/*********************************************************************** +Copies the prefix of an externally stored field of a record. The +clustered index record must be protected by a lock or a page latch. */ +UNIV_INTERN +ulint +btr_copy_externally_stored_field_prefix( +/*====================================*/ + /* out: the length of the copied field, + or 0 if the column is being or has been + deleted */ + byte* buf, /* out: the field, or a prefix of it */ + ulint len, /* in: length of buf, in bytes */ + ulint zip_size,/* in: nonzero=compressed BLOB page size, + zero for uncompressed BLOBs */ + const byte* data, /* in: 'internally' stored part of the + field containing also the reference to + the external part; must be protected by + a lock or a page latch */ + ulint local_len);/* in: length of data, in bytes */ +/*********************************************************************** +Copies an externally stored field of a record to mem heap. */ +UNIV_INTERN +byte* +btr_rec_copy_externally_stored_field( +/*=================================*/ + /* out: the field copied to heap */ + const rec_t* rec, /* in: record in a clustered index; + must be protected by a lock or a page latch */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint zip_size,/* in: nonzero=compressed BLOB page size, + zero for uncompressed BLOBs */ + ulint no, /* in: field number */ + ulint* len, /* out: length of the field */ + mem_heap_t* heap); /* in: mem heap */ +/*********************************************************************** +Flags the data tuple fields that are marked as extern storage in the +update vector. We use this function to remember which fields we must +mark as extern storage in a record inserted for an update. */ +UNIV_INTERN +ulint +btr_push_update_extern_fields( +/*==========================*/ + /* out: number of flagged external columns */ + dtuple_t* tuple, /* in/out: data tuple */ + const upd_t* update, /* in: update vector */ + mem_heap_t* heap) /* in: memory heap */ + __attribute__((nonnull)); + +/*######################################################################*/ + +/* In the pessimistic delete, if the page data size drops below this +limit, merging it to a neighbor is tried */ + +#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2) + +/* A slot in the path array. We store here info on a search path down the +tree. Each slot contains data on a single level of the tree. */ + +typedef struct btr_path_struct btr_path_t; +struct btr_path_struct{ + ulint nth_rec; /* index of the record + where the page cursor stopped on + this level (index in alphabetical + order); value ULINT_UNDEFINED + denotes array end */ + ulint n_recs; /* number of records on the page */ +}; + +#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */ + +/* The tree cursor: the definition appears here only for the compiler +to know struct size! */ + +struct btr_cur_struct { + dict_index_t* index; /* index where positioned */ + page_cur_t page_cur; /* page cursor */ + buf_block_t* left_block; /* this field is used to store + a pointer to the left neighbor + page, in the cases + BTR_SEARCH_PREV and + BTR_MODIFY_PREV */ + /*------------------------------*/ + que_thr_t* thr; /* this field is only used when + btr_cur_search_... is called for an + index entry insertion: the calling + query thread is passed here to be + used in the insert buffer */ + /*------------------------------*/ + /* The following fields are used in btr_cur_search... to pass + information: */ + ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL, + BTR_CUR_BINARY, or + BTR_CUR_INSERT_TO_IBUF */ + ulint tree_height; /* Tree height if the search is done + for a pessimistic insert or update + operation */ + ulint up_match; /* If the search mode was PAGE_CUR_LE, + the number of matched fields to the + the first user record to the right of + the cursor record after + btr_cur_search_...; + for the mode PAGE_CUR_GE, the matched + fields to the first user record AT THE + CURSOR or to the right of it; + NOTE that the up_match and low_match + values may exceed the correct values + for comparison to the adjacent user + record if that record is on a + different leaf page! (See the note in + row_ins_duplicate_key.) */ + ulint up_bytes; /* number of matched bytes to the + right at the time cursor positioned; + only used internally in searches: not + defined after the search */ + ulint low_match; /* if search mode was PAGE_CUR_LE, + the number of matched fields to the + first user record AT THE CURSOR or + to the left of it after + btr_cur_search_...; + NOT defined for PAGE_CUR_GE or any + other search modes; see also the NOTE + in up_match! */ + ulint low_bytes; /* number of matched bytes to the + right at the time cursor positioned; + only used internally in searches: not + defined after the search */ + ulint n_fields; /* prefix length used in a hash + search if hash_node != NULL */ + ulint n_bytes; /* hash prefix bytes if hash_node != + NULL */ + ulint fold; /* fold value used in the search if + flag is BTR_CUR_HASH */ + /*------------------------------*/ + btr_path_t* path_arr; /* in estimating the number of + rows in range, we store in this array + information of the path through + the tree */ +}; + +/* Values for the flag documenting the used search method */ +#define BTR_CUR_HASH 1 /* successful shortcut using the hash + index */ +#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using + binary search: the misleading hash + reference is stored in the field + hash_node, and might be necessary to + update */ +#define BTR_CUR_BINARY 3 /* success using the binary search */ +#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to + the insert buffer */ + +/* If pessimistic delete fails because of lack of file space, +there is still a good change of success a little later: try this many times, +and sleep this many microseconds in between */ +#define BTR_CUR_RETRY_DELETE_N_TIMES 100 +#define BTR_CUR_RETRY_SLEEP_TIME 50000 + +/* The reference in a field for which data is stored on a different page. +The reference is at the end of the 'locally' stored part of the field. +'Locally' means storage in the index record. +We store locally a long enough prefix of each column so that we can determine +the ordering parts of each index record without looking into the externally +stored part. */ + +/*--------------------------------------*/ +#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */ +#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */ +#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header + on that page */ +#define BTR_EXTERN_LEN 12 /* 8 bytes containing the + length of the externally + stored part of the BLOB. + The 2 highest bits are + reserved to the flags below. */ +/*--------------------------------------*/ +/* #define BTR_EXTERN_FIELD_REF_SIZE 20 // moved to btr0types.h */ + +/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte +at lowest address) is set to 1 if this field does not 'own' the externally +stored field; only the owner field is allowed to free the field in purge! +If the 2nd highest bit is 1 then it means that the externally stored field +was inherited from an earlier version of the row. In rollback we are not +allowed to free an inherited external field. */ + +#define BTR_EXTERN_OWNER_FLAG 128 +#define BTR_EXTERN_INHERITED_FLAG 64 + +extern ulint btr_cur_n_non_sea; +extern ulint btr_cur_n_sea; +extern ulint btr_cur_n_non_sea_old; +extern ulint btr_cur_n_sea_old; + +#ifndef UNIV_NONINL +#include "btr0cur.ic" +#endif + +#endif diff --git a/storage/xtradb/include/btr0cur.ic b/storage/xtradb/include/btr0cur.ic new file mode 100644 index 00000000000..84a3a5cba0b --- /dev/null +++ b/storage/xtradb/include/btr0cur.ic @@ -0,0 +1,200 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The index tree cursor + +Created 10/16/1994 Heikki Tuuri +*******************************************************/ + +#include "btr0btr.h" + +#ifdef UNIV_DEBUG +/************************************************************* +Returns the page cursor component of a tree cursor. */ +UNIV_INLINE +page_cur_t* +btr_cur_get_page_cur( +/*=================*/ + /* out: pointer to page cursor + component */ + const btr_cur_t* cursor) /* in: tree cursor */ +{ + return(&((btr_cur_t*) cursor)->page_cur); +} +#endif /* UNIV_DEBUG */ +/************************************************************* +Returns the buffer block on which the tree cursor is positioned. */ +UNIV_INLINE +buf_block_t* +btr_cur_get_block( +/*==============*/ + /* out: pointer to buffer block */ + btr_cur_t* cursor) /* in: tree cursor */ +{ + return(page_cur_get_block(btr_cur_get_page_cur(cursor))); +} + +/************************************************************* +Returns the record pointer of a tree cursor. */ +UNIV_INLINE +rec_t* +btr_cur_get_rec( +/*============*/ + /* out: pointer to record */ + btr_cur_t* cursor) /* in: tree cursor */ +{ + return(page_cur_get_rec(&(cursor->page_cur))); +} + +/************************************************************* +Returns the compressed page on which the tree cursor is positioned. */ +UNIV_INLINE +page_zip_des_t* +btr_cur_get_page_zip( +/*=================*/ + /* out: pointer to compressed page, + or NULL if the page is not compressed */ + btr_cur_t* cursor) /* in: tree cursor */ +{ + return(buf_block_get_page_zip(btr_cur_get_block(cursor))); +} + +/************************************************************* +Invalidates a tree cursor by setting record pointer to NULL. */ +UNIV_INLINE +void +btr_cur_invalidate( +/*===============*/ + btr_cur_t* cursor) /* in: tree cursor */ +{ + page_cur_invalidate(&(cursor->page_cur)); +} + +/************************************************************* +Returns the page of a tree cursor. */ +UNIV_INLINE +page_t* +btr_cur_get_page( +/*=============*/ + /* out: pointer to page */ + btr_cur_t* cursor) /* in: tree cursor */ +{ + return(page_align(page_cur_get_rec(&(cursor->page_cur)))); +} + +/************************************************************* +Returns the index of a cursor. */ +UNIV_INLINE +dict_index_t* +btr_cur_get_index( +/*==============*/ + /* out: index */ + btr_cur_t* cursor) /* in: B-tree cursor */ +{ + return(cursor->index); +} + +/************************************************************* +Positions a tree cursor at a given record. */ +UNIV_INLINE +void +btr_cur_position( +/*=============*/ + dict_index_t* index, /* in: index */ + rec_t* rec, /* in: record in tree */ + buf_block_t* block, /* in: buffer block of rec */ + btr_cur_t* cursor) /* out: cursor */ +{ + ut_ad(page_align(rec) == block->frame); + + page_cur_position(rec, block, btr_cur_get_page_cur(cursor)); + + cursor->index = index; +} + +/************************************************************************* +Checks if compressing an index page where a btr cursor is placed makes +sense. */ +UNIV_INLINE +ibool +btr_cur_compress_recommendation( +/*============================*/ + /* out: TRUE if compression is recommended */ + btr_cur_t* cursor, /* in: btr cursor */ + mtr_t* mtr) /* in: mtr */ +{ + page_t* page; + + ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), + MTR_MEMO_PAGE_X_FIX)); + + page = btr_cur_get_page(cursor); + + if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) + || ((btr_page_get_next(page, mtr) == FIL_NULL) + && (btr_page_get_prev(page, mtr) == FIL_NULL))) { + + /* The page fillfactor has dropped below a predefined + minimum value OR the level in the B-tree contains just + one page: we recommend compression if this is not the + root page. */ + + return(dict_index_get_page(cursor->index) + != page_get_page_no(page)); + } + + return(FALSE); +} + +/************************************************************************* +Checks if the record on which the cursor is placed can be deleted without +making tree compression necessary (or, recommended). */ +UNIV_INLINE +ibool +btr_cur_can_delete_without_compress( +/*================================*/ + /* out: TRUE if can be deleted without + recommended compression */ + btr_cur_t* cursor, /* in: btr cursor */ + ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/ + mtr_t* mtr) /* in: mtr */ +{ + page_t* page; + + ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), + MTR_MEMO_PAGE_X_FIX)); + + page = btr_cur_get_page(cursor); + + if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) + || ((btr_page_get_next(page, mtr) == FIL_NULL) + && (btr_page_get_prev(page, mtr) == FIL_NULL)) + || (page_get_n_recs(page) < 2)) { + + /* The page fillfactor will drop below a predefined + minimum value, OR the level in the B-tree contains just + one page, OR the page will become empty: we recommend + compression if this is not the root page. */ + + return(dict_index_get_page(cursor->index) + == page_get_page_no(page)); + } + + return(TRUE); +} diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h new file mode 100644 index 00000000000..1fdd102d32a --- /dev/null +++ b/storage/xtradb/include/btr0pcur.h @@ -0,0 +1,545 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The index tree persistent cursor + +Created 2/23/1996 Heikki Tuuri +*******************************************************/ + +#ifndef btr0pcur_h +#define btr0pcur_h + +#include "univ.i" +#include "dict0dict.h" +#include "data0data.h" +#include "mtr0mtr.h" +#include "page0cur.h" +#include "btr0cur.h" +#include "btr0btr.h" +#include "btr0types.h" + +/* Relative positions for a stored cursor position */ +#define BTR_PCUR_ON 1 +#define BTR_PCUR_BEFORE 2 +#define BTR_PCUR_AFTER 3 +/* Note that if the tree is not empty, btr_pcur_store_position does not +use the following, but only uses the above three alternatives, where the +position is stored relative to a specific record: this makes implementation +of a scroll cursor easier */ +#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */ +#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */ + +/****************************************************************** +Allocates memory for a persistent cursor object and initializes the cursor. */ +UNIV_INTERN +btr_pcur_t* +btr_pcur_create_for_mysql(void); +/*============================*/ + /* out, own: persistent cursor */ +/****************************************************************** +Frees the memory for a persistent cursor object. */ +UNIV_INTERN +void +btr_pcur_free_for_mysql( +/*====================*/ + btr_pcur_t* cursor); /* in, own: persistent cursor */ +/****************************************************************** +Copies the stored position of a pcur to another pcur. */ +UNIV_INTERN +void +btr_pcur_copy_stored_position( +/*==========================*/ + btr_pcur_t* pcur_receive, /* in: pcur which will receive the + position info */ + btr_pcur_t* pcur_donate); /* in: pcur from which the info is + copied */ +/****************************************************************** +Sets the old_rec_buf field to NULL. */ +UNIV_INLINE +void +btr_pcur_init( +/*==========*/ + btr_pcur_t* pcur); /* in: persistent cursor */ +/****************************************************************** +Initializes and opens a persistent cursor to an index tree. It should be +closed with btr_pcur_close. */ +UNIV_INLINE +void +btr_pcur_open( +/*==========*/ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple, /* in: tuple on which search done */ + ulint mode, /* in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page from the + record! */ + ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Opens an persistent cursor to an index tree without initializing the +cursor. */ +UNIV_INLINE +void +btr_pcur_open_with_no_init( +/*=======================*/ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple, /* in: tuple on which search done */ + ulint mode, /* in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page of the + record! */ + ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...; + NOTE that if has_search_latch != 0 then + we maybe do not acquire a latch on the cursor + page, but assume that the caller uses his + btr search latch to protect the record! */ + btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ + ulint has_search_latch,/* in: latch mode the caller + currently has on btr_search_latch: + RW_S_LATCH, or 0 */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Opens a persistent cursor at either end of an index. */ +UNIV_INLINE +void +btr_pcur_open_at_index_side( +/*========================*/ + ibool from_left, /* in: TRUE if open to the low end, + FALSE if to the high end */ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: latch mode */ + btr_pcur_t* pcur, /* in: cursor */ + ibool do_init, /* in: TRUE if should be initialized */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Gets the up_match value for a pcur after a search. */ +UNIV_INLINE +ulint +btr_pcur_get_up_match( +/*==================*/ + /* out: number of matched fields at the cursor + or to the right if search mode was PAGE_CUR_GE, + otherwise undefined */ + btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */ +/****************************************************************** +Gets the low_match value for a pcur after a search. */ +UNIV_INLINE +ulint +btr_pcur_get_low_match( +/*===================*/ + /* out: number of matched fields at the cursor + or to the right if search mode was PAGE_CUR_LE, + otherwise undefined */ + btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */ +/****************************************************************** +If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first +user record satisfying the search condition, in the case PAGE_CUR_L or +PAGE_CUR_LE, on the last user record. If no such user record exists, then +in the first case sets the cursor after last in tree, and in the latter case +before first in tree. The latching mode must be BTR_SEARCH_LEAF or +BTR_MODIFY_LEAF. */ +UNIV_INTERN +void +btr_pcur_open_on_user_rec( +/*======================*/ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple, /* in: tuple on which search done */ + ulint mode, /* in: PAGE_CUR_L, ... */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF or + BTR_MODIFY_LEAF */ + btr_pcur_t* cursor, /* in: memory buffer for persistent + cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Positions a cursor at a randomly chosen position within a B-tree. */ +UNIV_INLINE +void +btr_pcur_open_at_rnd_pos( +/*=====================*/ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /* in/out: B-tree pcur */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Frees the possible old_rec_buf buffer of a persistent cursor and sets the +latch mode of the persistent cursor to BTR_NO_LATCHES. */ +UNIV_INLINE +void +btr_pcur_close( +/*===========*/ + btr_pcur_t* cursor); /* in: persistent cursor */ +/****************************************************************** +The position of the cursor is stored by taking an initial segment of the +record the cursor is positioned on, before, or after, and copying it to the +cursor data structure, or just setting a flag if the cursor id before the +first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the +page where the cursor is positioned must not be empty if the index tree is +not totally empty! */ +UNIV_INTERN +void +btr_pcur_store_position( +/*====================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +Restores the stored position of a persistent cursor bufferfixing the page and +obtaining the specified latches. If the cursor position was saved when the +(1) cursor was positioned on a user record: this function restores the position +to the last record LESS OR EQUAL to the stored record; +(2) cursor was positioned on a page infimum record: restores the position to +the last record LESS than the user record which was the successor of the page +infimum; +(3) cursor was positioned on the page supremum: restores to the first record +GREATER than the user record which was the predecessor of the supremum. +(4) cursor was positioned before the first or after the last in an empty tree: +restores to before first or after the last in the tree. */ +UNIV_INTERN +ibool +btr_pcur_restore_position( +/*======================*/ + /* out: TRUE if the cursor position + was stored when it was on a user record + and it can be restored on a user record + whose ordering fields are identical to + the ones of the original user record */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /* in: detached persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/****************************************************************** +If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, +releases the page latch and bufferfix reserved by the cursor. +NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes +made by the current mini-transaction to the data protected by the +cursor latch, as then the latch must not be released until mtr_commit. */ +UNIV_INTERN +void +btr_pcur_release_leaf( +/*==================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Gets the rel_pos field for a cursor whose position has been stored. */ +UNIV_INLINE +ulint +btr_pcur_get_rel_pos( +/*=================*/ + /* out: BTR_PCUR_ON, ... */ + const btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Sets the mtr field for a pcur. */ +UNIV_INLINE +void +btr_pcur_set_mtr( +/*=============*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in, own: mtr */ +/************************************************************* +Gets the mtr field for a pcur. */ +UNIV_INLINE +mtr_t* +btr_pcur_get_mtr( +/*=============*/ + /* out: mtr */ + btr_pcur_t* cursor); /* in: persistent cursor */ +/****************************************************************** +Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +that is, the cursor becomes detached. If there have been modifications +to the page where pcur is positioned, this can be used instead of +btr_pcur_release_leaf. Function btr_pcur_store_position should be used +before calling this, if restoration of cursor is wanted later. */ +UNIV_INLINE +void +btr_pcur_commit( +/*============*/ + btr_pcur_t* pcur); /* in: persistent cursor */ +/****************************************************************** +Differs from btr_pcur_commit in that we can specify the mtr to commit. */ +UNIV_INLINE +void +btr_pcur_commit_specify_mtr( +/*========================*/ + btr_pcur_t* pcur, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr to commit */ +/****************************************************************** +Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */ +UNIV_INLINE +ibool +btr_pcur_is_detached( +/*=================*/ + /* out: TRUE if detached */ + btr_pcur_t* pcur); /* in: persistent cursor */ +/************************************************************* +Moves the persistent cursor to the next record in the tree. If no records are +left, the cursor stays 'after last in tree'. */ +UNIV_INLINE +ibool +btr_pcur_move_to_next( +/*==================*/ + /* out: TRUE if the cursor was not after last + in tree */ + btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the + function may release the page latch */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor to the previous record in the tree. If no records +are left, the cursor stays 'before first in tree'. */ +UNIV_INTERN +ibool +btr_pcur_move_to_prev( +/*==================*/ + /* out: TRUE if the cursor was not before first + in tree */ + btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the + function may release the page latch */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor to the last record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_last_on_page( +/*==========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor to the next user record in the tree. If no user +records are left, the cursor ends up 'after last in tree'. */ +UNIV_INLINE +ibool +btr_pcur_move_to_next_user_rec( +/*===========================*/ + /* out: TRUE if the cursor moved forward, + ending on a user record */ + btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the + function may release the page latch */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor to the first record on the next page. +Releases the latch on the current page, and bufferunfixes it. +Note that there must not be modifications on the current page, +as then the x-latch can be released only in mtr_commit. */ +UNIV_INTERN +void +btr_pcur_move_to_next_page( +/*=======================*/ + btr_pcur_t* cursor, /* in: persistent cursor; must be on the + last record of the current page */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor backward if it is on the first record +of the page. Releases the latch on the current page, and bufferunfixes +it. Note that to prevent a possible deadlock, the operation first +stores the position of the cursor, releases the leaf latch, acquires +necessary latches and restores the cursor position again before returning. +The alphabetical position of the cursor is guaranteed to be sensible +on return, but it may happen that the cursor is not positioned on the +last record of any page, because the structure of the tree may have +changed while the cursor had no latches. */ +UNIV_INTERN +void +btr_pcur_move_backward_from_page( +/*=============================*/ + btr_pcur_t* cursor, /* in: persistent cursor, must be on the + first record of the current page */ + mtr_t* mtr); /* in: mtr */ +#ifdef UNIV_DEBUG +/************************************************************* +Returns the btr cursor component of a persistent cursor. */ +UNIV_INLINE +btr_cur_t* +btr_pcur_get_btr_cur( +/*=================*/ + /* out: pointer to + btr cursor component */ + const btr_pcur_t* cursor); /* in: persistent cursor */ +/************************************************************* +Returns the page cursor component of a persistent cursor. */ +UNIV_INLINE +page_cur_t* +btr_pcur_get_page_cur( +/*==================*/ + /* out: pointer to + page cursor component */ + const btr_pcur_t* cursor); /* in: persistent cursor */ +#else /* UNIV_DEBUG */ +# define btr_pcur_get_btr_cur(cursor) (&(cursor)->btr_cur) +# define btr_pcur_get_page_cur(cursor) (&(cursor)->btr_cur.page_cur) +#endif /* UNIV_DEBUG */ +/************************************************************* +Returns the page of a persistent cursor. */ +UNIV_INLINE +page_t* +btr_pcur_get_page( +/*==============*/ + /* out: pointer to the page */ + btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Returns the buffer block of a persistent cursor. */ +UNIV_INLINE +buf_block_t* +btr_pcur_get_block( +/*===============*/ + /* out: pointer to the block */ + btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Returns the record of a persistent cursor. */ +UNIV_INLINE +rec_t* +btr_pcur_get_rec( +/*=============*/ + /* out: pointer to the record */ + btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Checks if the persistent cursor is on a user record. */ +UNIV_INLINE +ibool +btr_pcur_is_on_user_rec( +/*====================*/ + const btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Checks if the persistent cursor is after the last user record on +a page. */ +UNIV_INLINE +ibool +btr_pcur_is_after_last_on_page( +/*===========================*/ + const btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Checks if the persistent cursor is before the first user record on +a page. */ +UNIV_INLINE +ibool +btr_pcur_is_before_first_on_page( +/*=============================*/ + const btr_pcur_t* cursor);/* in: persistent cursor */ +/************************************************************* +Checks if the persistent cursor is before the first user record in +the index tree. */ +UNIV_INLINE +ibool +btr_pcur_is_before_first_in_tree( +/*=============================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Checks if the persistent cursor is after the last user record in +the index tree. */ +UNIV_INLINE +ibool +btr_pcur_is_after_last_in_tree( +/*===========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr); /* in: mtr */ +/************************************************************* +Moves the persistent cursor to the next record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_next_on_page( +/*==========================*/ + btr_pcur_t* cursor);/* in/out: persistent cursor */ +/************************************************************* +Moves the persistent cursor to the previous record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_prev_on_page( +/*==========================*/ + btr_pcur_t* cursor);/* in/out: persistent cursor */ + + +/* The persistent B-tree cursor structure. This is used mainly for SQL +selects, updates, and deletes. */ + +struct btr_pcur_struct{ + btr_cur_t btr_cur; /* a B-tree cursor */ + ulint latch_mode; /* see TODO note below! + BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, + BTR_MODIFY_TREE, or BTR_NO_LATCHES, + depending on the latching state of + the page and tree where the cursor is + positioned; the last value means that + the cursor is not currently positioned: + we say then that the cursor is + detached; it can be restored to + attached if the old position was + stored in old_rec */ + ulint old_stored; /* BTR_PCUR_OLD_STORED + or BTR_PCUR_OLD_NOT_STORED */ + rec_t* old_rec; /* if cursor position is stored, + contains an initial segment of the + latest record cursor was positioned + either on, before, or after */ + ulint old_n_fields; /* number of fields in old_rec */ + ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or + BTR_PCUR_AFTER, depending on whether + cursor was on, before, or after the + old_rec record */ + buf_block_t* block_when_stored;/* buffer block when the position was + stored */ + ib_uint64_t modify_clock; /* the modify clock value of the + buffer block when the cursor position + was stored */ + ulint pos_state; /* see TODO note below! + BTR_PCUR_IS_POSITIONED, + BTR_PCUR_WAS_POSITIONED, + BTR_PCUR_NOT_POSITIONED */ + ulint search_mode; /* PAGE_CUR_G, ... */ + trx_t* trx_if_known; /* the transaction, if we know it; + otherwise this field is not defined; + can ONLY BE USED in error prints in + fatal assertion failures! */ + /*-----------------------------*/ + /* NOTE that the following fields may possess dynamically allocated + memory which should be freed if not needed anymore! */ + + mtr_t* mtr; /* NULL, or this field may contain + a mini-transaction which holds the + latch on the cursor page */ + byte* old_rec_buf; /* NULL, or a dynamically allocated + buffer for old_rec */ + ulint buf_size; /* old_rec_buf size if old_rec_buf + is not NULL */ +}; + +#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state + can be BTR_PCUR_IS_POSITIONED, + though it really should be + BTR_PCUR_WAS_POSITIONED, + because we have no obligation + to commit the cursor with + mtr; similarly latch_mode may + be out of date. This can + lead to problems if btr_pcur + is not used the right way; + all current code should be + ok. */ +#define BTR_PCUR_WAS_POSITIONED 1187549791 +#define BTR_PCUR_NOT_POSITIONED 1328997689 + +#define BTR_PCUR_OLD_STORED 908467085 +#define BTR_PCUR_OLD_NOT_STORED 122766467 + +#ifndef UNIV_NONINL +#include "btr0pcur.ic" +#endif + +#endif diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic new file mode 100644 index 00000000000..bde7413820a --- /dev/null +++ b/storage/xtradb/include/btr0pcur.ic @@ -0,0 +1,656 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The index tree persistent cursor + +Created 2/23/1996 Heikki Tuuri +*******************************************************/ + + +/************************************************************* +Gets the rel_pos field for a cursor whose position has been stored. */ +UNIV_INLINE +ulint +btr_pcur_get_rel_pos( +/*=================*/ + /* out: BTR_PCUR_ON, ... */ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor); + ut_ad(cursor->old_rec); + ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); + ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); + + return(cursor->rel_pos); +} + +/************************************************************* +Sets the mtr field for a pcur. */ +UNIV_INLINE +void +btr_pcur_set_mtr( +/*=============*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr) /* in, own: mtr */ +{ + ut_ad(cursor); + + cursor->mtr = mtr; +} + +/************************************************************* +Gets the mtr field for a pcur. */ +UNIV_INLINE +mtr_t* +btr_pcur_get_mtr( +/*=============*/ + /* out: mtr */ + btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor); + + return(cursor->mtr); +} + +#ifdef UNIV_DEBUG +/************************************************************* +Returns the btr cursor component of a persistent cursor. */ +UNIV_INLINE +btr_cur_t* +btr_pcur_get_btr_cur( +/*=================*/ + /* out: pointer to + btr cursor component */ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + const btr_cur_t* btr_cur = &cursor->btr_cur; + return((btr_cur_t*) btr_cur); +} + +/************************************************************* +Returns the page cursor component of a persistent cursor. */ +UNIV_INLINE +page_cur_t* +btr_pcur_get_page_cur( +/*==================*/ + /* out: pointer to page cursor + component */ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + return(btr_cur_get_page_cur(btr_pcur_get_btr_cur(cursor))); +} +#endif /* UNIV_DEBUG */ +/************************************************************* +Returns the page of a persistent cursor. */ +UNIV_INLINE +page_t* +btr_pcur_get_page( +/*==============*/ + /* out: pointer to the page */ + btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + + return(btr_cur_get_page(btr_pcur_get_btr_cur(cursor))); +} + +/************************************************************* +Returns the buffer block of a persistent cursor. */ +UNIV_INLINE +buf_block_t* +btr_pcur_get_block( +/*===============*/ + /* out: pointer to the block */ + btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + + return(btr_cur_get_block(btr_pcur_get_btr_cur(cursor))); +} + +/************************************************************* +Returns the record of a persistent cursor. */ +UNIV_INLINE +rec_t* +btr_pcur_get_rec( +/*=============*/ + /* out: pointer to the record */ + btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + return(btr_cur_get_rec(btr_pcur_get_btr_cur(cursor))); +} + +/****************************************************************** +Gets the up_match value for a pcur after a search. */ +UNIV_INLINE +ulint +btr_pcur_get_up_match( +/*==================*/ + /* out: number of matched fields at the cursor + or to the right if search mode was PAGE_CUR_GE, + otherwise undefined */ + btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */ +{ + btr_cur_t* btr_cursor; + + ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) + || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + + btr_cursor = btr_pcur_get_btr_cur(cursor); + + ut_ad(btr_cursor->up_match != ULINT_UNDEFINED); + + return(btr_cursor->up_match); +} + +/****************************************************************** +Gets the low_match value for a pcur after a search. */ +UNIV_INLINE +ulint +btr_pcur_get_low_match( +/*===================*/ + /* out: number of matched fields at the cursor + or to the right if search mode was PAGE_CUR_LE, + otherwise undefined */ + btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */ +{ + btr_cur_t* btr_cursor; + + ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) + || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); + + btr_cursor = btr_pcur_get_btr_cur(cursor); + ut_ad(btr_cursor->low_match != ULINT_UNDEFINED); + + return(btr_cursor->low_match); +} + +/************************************************************* +Checks if the persistent cursor is after the last user record on +a page. */ +UNIV_INLINE +ibool +btr_pcur_is_after_last_on_page( +/*===========================*/ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); +} + +/************************************************************* +Checks if the persistent cursor is before the first user record on +a page. */ +UNIV_INLINE +ibool +btr_pcur_is_before_first_on_page( +/*=============================*/ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); +} + +/************************************************************* +Checks if the persistent cursor is on a user record. */ +UNIV_INLINE +ibool +btr_pcur_is_on_user_rec( +/*====================*/ + const btr_pcur_t* cursor) /* in: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + if (btr_pcur_is_before_first_on_page(cursor) + || btr_pcur_is_after_last_on_page(cursor)) { + + return(FALSE); + } + + return(TRUE); +} + +/************************************************************* +Checks if the persistent cursor is before the first user record in +the index tree. */ +UNIV_INLINE +ibool +btr_pcur_is_before_first_in_tree( +/*=============================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { + + return(FALSE); + } + + return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); +} + +/************************************************************* +Checks if the persistent cursor is after the last user record in +the index tree. */ +UNIV_INLINE +ibool +btr_pcur_is_after_last_in_tree( +/*===========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { + + return(FALSE); + } + + return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); +} + +/************************************************************* +Moves the persistent cursor to the next record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_next_on_page( +/*==========================*/ + btr_pcur_t* cursor) /* in/out: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} + +/************************************************************* +Moves the persistent cursor to the previous record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_prev_on_page( +/*==========================*/ + btr_pcur_t* cursor) /* in/out: persistent cursor */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} + +/************************************************************* +Moves the persistent cursor to the last record on the same page. */ +UNIV_INLINE +void +btr_pcur_move_to_last_on_page( +/*==========================*/ + btr_pcur_t* cursor, /* in: persistent cursor */ + mtr_t* mtr) /* in: mtr */ +{ + UT_NOT_USED(mtr); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + page_cur_set_after_last(btr_pcur_get_block(cursor), + btr_pcur_get_page_cur(cursor)); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +} + +/************************************************************* +Moves the persistent cursor to the next user record in the tree. If no user +records are left, the cursor ends up 'after last in tree'. */ +UNIV_INLINE +ibool +btr_pcur_move_to_next_user_rec( +/*===========================*/ + /* out: TRUE if the cursor moved forward, + ending on a user record */ + btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the + function may release the page latch */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; +loop: + if (btr_pcur_is_after_last_on_page(cursor)) { + + if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { + + return(FALSE); + } + + btr_pcur_move_to_next_page(cursor, mtr); + } else { + btr_pcur_move_to_next_on_page(cursor); + } + + if (btr_pcur_is_on_user_rec(cursor)) { + + return(TRUE); + } + + goto loop; +} + +/************************************************************* +Moves the persistent cursor to the next record in the tree. If no records are +left, the cursor stays 'after last in tree'. */ +UNIV_INLINE +ibool +btr_pcur_move_to_next( +/*==================*/ + /* out: TRUE if the cursor was not after last + in tree */ + btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the + function may release the page latch */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->latch_mode != BTR_NO_LATCHES); + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + if (btr_pcur_is_after_last_on_page(cursor)) { + + if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { + + return(FALSE); + } + + btr_pcur_move_to_next_page(cursor, mtr); + + return(TRUE); + } + + btr_pcur_move_to_next_on_page(cursor); + + return(TRUE); +} + +/****************************************************************** +Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, +that is, the cursor becomes detached. If there have been modifications +to the page where pcur is positioned, this can be used instead of +btr_pcur_release_leaf. Function btr_pcur_store_position should be used +before calling this, if restoration of cursor is wanted later. */ +UNIV_INLINE +void +btr_pcur_commit( +/*============*/ + btr_pcur_t* pcur) /* in: persistent cursor */ +{ + ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); + + pcur->latch_mode = BTR_NO_LATCHES; + + mtr_commit(pcur->mtr); + + pcur->pos_state = BTR_PCUR_WAS_POSITIONED; +} + +/****************************************************************** +Differs from btr_pcur_commit in that we can specify the mtr to commit. */ +UNIV_INLINE +void +btr_pcur_commit_specify_mtr( +/*========================*/ + btr_pcur_t* pcur, /* in: persistent cursor */ + mtr_t* mtr) /* in: mtr to commit */ +{ + ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); + + pcur->latch_mode = BTR_NO_LATCHES; + + mtr_commit(mtr); + + pcur->pos_state = BTR_PCUR_WAS_POSITIONED; +} + +/****************************************************************** +Sets the pcur latch mode to BTR_NO_LATCHES. */ +UNIV_INLINE +void +btr_pcur_detach( +/*============*/ + btr_pcur_t* pcur) /* in: persistent cursor */ +{ + ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); + + pcur->latch_mode = BTR_NO_LATCHES; + + pcur->pos_state = BTR_PCUR_WAS_POSITIONED; +} + +/****************************************************************** +Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */ +UNIV_INLINE +ibool +btr_pcur_is_detached( +/*=================*/ + /* out: TRUE if detached */ + btr_pcur_t* pcur) /* in: persistent cursor */ +{ + if (pcur->latch_mode == BTR_NO_LATCHES) { + + return(TRUE); + } + + return(FALSE); +} + +/****************************************************************** +Sets the old_rec_buf field to NULL. */ +UNIV_INLINE +void +btr_pcur_init( +/*==========*/ + btr_pcur_t* pcur) /* in: persistent cursor */ +{ + pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; + pcur->old_rec_buf = NULL; + pcur->old_rec = NULL; +} + +/****************************************************************** +Initializes and opens a persistent cursor to an index tree. It should be +closed with btr_pcur_close. */ +UNIV_INLINE +void +btr_pcur_open( +/*==========*/ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple, /* in: tuple on which search done */ + ulint mode, /* in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page from the + record! */ + ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ + mtr_t* mtr) /* in: mtr */ +{ + btr_cur_t* btr_cursor; + + /* Initialize the cursor */ + + btr_pcur_init(cursor); + + cursor->latch_mode = latch_mode; + cursor->search_mode = mode; + + /* Search with the tree cursor */ + + btr_cursor = btr_pcur_get_btr_cur(cursor); + + btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, + btr_cursor, 0, mtr); + cursor->pos_state = BTR_PCUR_IS_POSITIONED; + + cursor->trx_if_known = NULL; +} + +/****************************************************************** +Opens an persistent cursor to an index tree without initializing the +cursor. */ +UNIV_INLINE +void +btr_pcur_open_with_no_init( +/*=======================*/ + dict_index_t* index, /* in: index */ + const dtuple_t* tuple, /* in: tuple on which search done */ + ulint mode, /* in: PAGE_CUR_L, ...; + NOTE that if the search is made using a unique + prefix of a record, mode should be + PAGE_CUR_LE, not PAGE_CUR_GE, as the latter + may end up on the previous page of the + record! */ + ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...; + NOTE that if has_search_latch != 0 then + we maybe do not acquire a latch on the cursor + page, but assume that the caller uses his + btr search latch to protect the record! */ + btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ + ulint has_search_latch,/* in: latch mode the caller + currently has on btr_search_latch: + RW_S_LATCH, or 0 */ + mtr_t* mtr) /* in: mtr */ +{ + btr_cur_t* btr_cursor; + + cursor->latch_mode = latch_mode; + cursor->search_mode = mode; + + /* Search with the tree cursor */ + + btr_cursor = btr_pcur_get_btr_cur(cursor); + + btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, + btr_cursor, has_search_latch, mtr); + cursor->pos_state = BTR_PCUR_IS_POSITIONED; + + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + cursor->trx_if_known = NULL; +} + +/********************************************************************* +Opens a persistent cursor at either end of an index. */ +UNIV_INLINE +void +btr_pcur_open_at_index_side( +/*========================*/ + ibool from_left, /* in: TRUE if open to the low end, + FALSE if to the high end */ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: latch mode */ + btr_pcur_t* pcur, /* in: cursor */ + ibool do_init, /* in: TRUE if should be initialized */ + mtr_t* mtr) /* in: mtr */ +{ + pcur->latch_mode = latch_mode; + + if (from_left) { + pcur->search_mode = PAGE_CUR_G; + } else { + pcur->search_mode = PAGE_CUR_L; + } + + if (do_init) { + btr_pcur_init(pcur); + } + + btr_cur_open_at_index_side(from_left, index, latch_mode, + btr_pcur_get_btr_cur(pcur), mtr); + pcur->pos_state = BTR_PCUR_IS_POSITIONED; + + pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; + + pcur->trx_if_known = NULL; +} + +/************************************************************************** +Positions a cursor at a randomly chosen position within a B-tree. */ +UNIV_INLINE +void +btr_pcur_open_at_rnd_pos( +/*=====================*/ + dict_index_t* index, /* in: index */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ + btr_pcur_t* cursor, /* in/out: B-tree pcur */ + mtr_t* mtr) /* in: mtr */ +{ + /* Initialize the cursor */ + + cursor->latch_mode = latch_mode; + cursor->search_mode = PAGE_CUR_G; + + btr_pcur_init(cursor); + + btr_cur_open_at_rnd_pos(index, latch_mode, + btr_pcur_get_btr_cur(cursor), mtr); + cursor->pos_state = BTR_PCUR_IS_POSITIONED; + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + cursor->trx_if_known = NULL; +} + +/****************************************************************** +Frees the possible memory heap of a persistent cursor and sets the latch +mode of the persistent cursor to BTR_NO_LATCHES. */ +UNIV_INLINE +void +btr_pcur_close( +/*===========*/ + btr_pcur_t* cursor) /* in: persistent cursor */ +{ + if (cursor->old_rec_buf != NULL) { + + mem_free(cursor->old_rec_buf); + + cursor->old_rec = NULL; + cursor->old_rec_buf = NULL; + } + + cursor->btr_cur.page_cur.rec = NULL; + cursor->btr_cur.page_cur.block = NULL; + cursor->old_rec = NULL; + cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; + + cursor->latch_mode = BTR_NO_LATCHES; + cursor->pos_state = BTR_PCUR_NOT_POSITIONED; + + cursor->trx_if_known = NULL; +} diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h new file mode 100644 index 00000000000..074e6595258 --- /dev/null +++ b/storage/xtradb/include/btr0sea.h @@ -0,0 +1,298 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The index tree adaptive search + +Created 2/17/1996 Heikki Tuuri +*************************************************************************/ + +#ifndef btr0sea_h +#define btr0sea_h + +#include "univ.i" + +#include "rem0rec.h" +#include "dict0dict.h" +#include "btr0types.h" +#include "mtr0mtr.h" +#include "ha0ha.h" + +/********************************************************************* +Creates and initializes the adaptive search system at a database start. */ +UNIV_INTERN +void +btr_search_sys_create( +/*==================*/ + ulint hash_size); /* in: hash index hash table size */ + +/************************************************************************ +Disable the adaptive hash search system and empty the index. */ +UNIV_INTERN +void +btr_search_disable(void); +/*====================*/ +/************************************************************************ +Enable the adaptive hash search system. */ +UNIV_INTERN +void +btr_search_enable(void); +/*====================*/ + +/************************************************************************ +Returns search info for an index. */ +UNIV_INLINE +btr_search_t* +btr_search_get_info( +/*================*/ + /* out: search info; search mutex reserved */ + dict_index_t* index); /* in: index */ +/********************************************************************* +Creates and initializes a search info struct. */ +UNIV_INTERN +btr_search_t* +btr_search_info_create( +/*===================*/ + /* out, own: search info struct */ + mem_heap_t* heap); /* in: heap where created */ +/********************************************************************* +Returns the value of ref_count. The value is protected by +btr_search_latch. */ +UNIV_INTERN +ulint +btr_search_info_get_ref_count( +/*==========================*/ + /* out: ref_count value. */ + btr_search_t* info); /* in: search info. */ +/************************************************************************* +Updates the search info. */ +UNIV_INLINE +void +btr_search_info_update( +/*===================*/ + dict_index_t* index, /* in: index of the cursor */ + btr_cur_t* cursor);/* in: cursor which was just positioned */ +/********************************************************************** +Tries to guess the right search position based on the hash search info +of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, +and the function returns TRUE, then cursor->up_match and cursor->low_match +both have sensible values. */ +UNIV_INTERN +ibool +btr_search_guess_on_hash( +/*=====================*/ + /* out: TRUE if succeeded */ + dict_index_t* index, /* in: index */ + btr_search_t* info, /* in: index search info */ + const dtuple_t* tuple, /* in: logical record */ + ulint mode, /* in: PAGE_CUR_L, ... */ + ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ + btr_cur_t* cursor, /* out: tree cursor */ + ulint has_search_latch,/* in: latch mode the caller + currently has on btr_search_latch: + RW_S_LATCH, RW_X_LATCH, or 0 */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +Moves or deletes hash entries for moved records. If new_page is already hashed, +then the hash index for page, if any, is dropped. If new_page is not hashed, +and page is hashed, then a new hash index is built to new_page with the same +parameters as page (this often happens when a page is split). */ +UNIV_INTERN +void +btr_search_move_or_delete_hash_entries( +/*===================================*/ + buf_block_t* new_block, /* in: records are copied + to this page */ + buf_block_t* block, /* in: index page from which + records were copied, and the + copied records will be deleted + from this page */ + dict_index_t* index); /* in: record descriptor */ +/************************************************************************ +Drops a page hash index. */ +UNIV_INTERN +void +btr_search_drop_page_hash_index( +/*============================*/ + buf_block_t* block); /* in: block containing index page, + s- or x-latched, or an index page + for which we know that + block->buf_fix_count == 0 */ +/************************************************************************ +Drops a page hash index when a page is freed from a fseg to the file system. +Drops possible hash index if the page happens to be in the buffer pool. */ +UNIV_INTERN +void +btr_search_drop_page_hash_when_freed( +/*=================================*/ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no); /* in: page number */ +/************************************************************************ +Updates the page hash index when a single record is inserted on a page. */ +UNIV_INTERN +void +btr_search_update_hash_node_on_insert( +/*==================================*/ + btr_cur_t* cursor);/* in: cursor which was positioned to the + place to insert using btr_cur_search_..., + and the new record has been inserted next + to the cursor */ +/************************************************************************ +Updates the page hash index when a single record is inserted on a page. */ +UNIV_INTERN +void +btr_search_update_hash_on_insert( +/*=============================*/ + btr_cur_t* cursor);/* in: cursor which was positioned to the + place to insert using btr_cur_search_..., + and the new record has been inserted next + to the cursor */ +/************************************************************************ +Updates the page hash index when a single record is deleted from a page. */ +UNIV_INTERN +void +btr_search_update_hash_on_delete( +/*=============================*/ + btr_cur_t* cursor);/* in: cursor which was positioned on the + record to delete using btr_cur_search_..., + the record is not yet deleted */ +/************************************************************************ +Validates the search system. */ +UNIV_INTERN +ibool +btr_search_validate(void); +/*======================*/ + /* out: TRUE if ok */ + +/* Flag: has the search system been enabled? +Protected by btr_search_latch and btr_search_enabled_mutex. */ +extern char btr_search_enabled; + +/* The search info struct in an index */ + +struct btr_search_struct{ + ulint ref_count; /* Number of blocks in this index tree + that have search index built + i.e. block->index points to this index. + Protected by btr_search_latch except + when during initialization in + btr_search_info_create(). */ + + /* The following fields are not protected by any latch. + Unfortunately, this means that they must be aligned to + the machine word, i.e., they cannot be turned into bit-fields. */ + buf_block_t* root_guess;/* the root page frame when it was last time + fetched, or NULL */ + ulint hash_analysis; /* when this exceeds BTR_SEARCH_HASH_ANALYSIS, + the hash analysis starts; this is reset if no + success noticed */ + ibool last_hash_succ; /* TRUE if the last search would have + succeeded, or did succeed, using the hash + index; NOTE that the value here is not exact: + it is not calculated for every search, and the + calculation itself is not always accurate! */ + ulint n_hash_potential; + /* number of consecutive searches + which would have succeeded, or did succeed, + using the hash index; + the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */ + /*----------------------*/ + ulint n_fields; /* recommended prefix length for hash search: + number of full fields */ + ulint n_bytes; /* recommended prefix: number of bytes in + an incomplete field; + see also BTR_PAGE_MAX_REC_SIZE */ + ibool left_side; /* TRUE or FALSE, depending on whether + the leftmost record of several records with + the same prefix should be indexed in the + hash index */ + /*----------------------*/ +#ifdef UNIV_SEARCH_PERF_STAT + ulint n_hash_succ; /* number of successful hash searches thus + far */ + ulint n_hash_fail; /* number of failed hash searches */ + ulint n_patt_succ; /* number of successful pattern searches thus + far */ + ulint n_searches; /* number of searches */ +#endif /* UNIV_SEARCH_PERF_STAT */ +#ifdef UNIV_DEBUG + ulint magic_n; /* magic number */ +# define BTR_SEARCH_MAGIC_N 1112765 +#endif /* UNIV_DEBUG */ +}; + +/* The hash index system */ + +typedef struct btr_search_sys_struct btr_search_sys_t; + +struct btr_search_sys_struct{ + hash_table_t* hash_index; +}; + +extern btr_search_sys_t* btr_search_sys; + +/* The latch protecting the adaptive search system: this latch protects the +(1) hash index; +(2) columns of a record to which we have a pointer in the hash index; + +but does NOT protect: + +(3) next record offset field in a record; +(4) next or previous records on the same page. + +Bear in mind (3) and (4) when using the hash index. +*/ + +extern rw_lock_t* btr_search_latch_temp; + +#define btr_search_latch (*btr_search_latch_temp) + +#ifdef UNIV_SEARCH_PERF_STAT +extern ulint btr_search_n_succ; +extern ulint btr_search_n_hash_fail; +#endif /* UNIV_SEARCH_PERF_STAT */ + +/* After change in n_fields or n_bytes in info, this many rounds are waited +before starting the hash analysis again: this is to save CPU time when there +is no hope in building a hash index. */ + +#define BTR_SEARCH_HASH_ANALYSIS 17 + +/* Limit of consecutive searches for trying a search shortcut on the search +pattern */ + +#define BTR_SEARCH_ON_PATTERN_LIMIT 3 + +/* Limit of consecutive searches for trying a search shortcut using the hash +index */ + +#define BTR_SEARCH_ON_HASH_LIMIT 3 + +/* We do this many searches before trying to keep the search latch over calls +from MySQL. If we notice someone waiting for the latch, we again set this +much timeout. This is to reduce contention. */ + +#define BTR_SEA_TIMEOUT 10000 + +#ifndef UNIV_NONINL +#include "btr0sea.ic" +#endif + +#endif diff --git a/storage/xtradb/include/btr0sea.ic b/storage/xtradb/include/btr0sea.ic new file mode 100644 index 00000000000..c948d7e92af --- /dev/null +++ b/storage/xtradb/include/btr0sea.ic @@ -0,0 +1,83 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The index tree adaptive search + +Created 2/17/1996 Heikki Tuuri +*************************************************************************/ + +#include "dict0mem.h" +#include "btr0cur.h" +#include "buf0buf.h" + +/************************************************************************* +Updates the search info. */ +UNIV_INTERN +void +btr_search_info_update_slow( +/*========================*/ + btr_search_t* info, /* in/out: search info */ + btr_cur_t* cursor);/* in: cursor which was just positioned */ + +/************************************************************************ +Returns search info for an index. */ +UNIV_INLINE +btr_search_t* +btr_search_get_info( +/*================*/ + /* out: search info; search mutex reserved */ + dict_index_t* index) /* in: index */ +{ + ut_ad(index); + + return(index->search_info); +} + +/************************************************************************* +Updates the search info. */ +UNIV_INLINE +void +btr_search_info_update( +/*===================*/ + dict_index_t* index, /* in: index of the cursor */ + btr_cur_t* cursor) /* in: cursor which was just positioned */ +{ + btr_search_t* info; + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); + ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + info = btr_search_get_info(index); + + info->hash_analysis++; + + if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) { + + /* Do nothing */ + + return; + + } + + ut_ad(cursor->flag != BTR_CUR_HASH); + + btr_search_info_update_slow(info, cursor); +} diff --git a/storage/xtradb/include/btr0types.h b/storage/xtradb/include/btr0types.h new file mode 100644 index 00000000000..074b15fa68d --- /dev/null +++ b/storage/xtradb/include/btr0types.h @@ -0,0 +1,47 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The index tree general types + +Created 2/17/1996 Heikki Tuuri +*************************************************************************/ + +#ifndef btr0types_h +#define btr0types_h + +#include "univ.i" + +#include "rem0types.h" +#include "page0types.h" + +typedef struct btr_pcur_struct btr_pcur_t; +typedef struct btr_cur_struct btr_cur_t; +typedef struct btr_search_struct btr_search_t; + +/* The size of a reference to data stored on a different page. +The reference is stored at the end of the prefix of the field +in the index record. */ +#define BTR_EXTERN_FIELD_REF_SIZE 20 + +/* A BLOB field reference full of zero, for use in assertions and tests. +Initially, BLOB field references are set to zero, in +dtuple_convert_big_rec(). */ +extern const byte field_ref_zero[BTR_EXTERN_FIELD_REF_SIZE]; + +#endif diff --git a/storage/xtradb/include/buf0buddy.h b/storage/xtradb/include/buf0buddy.h new file mode 100644 index 00000000000..2afef7913fc --- /dev/null +++ b/storage/xtradb/include/buf0buddy.h @@ -0,0 +1,91 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Binary buddy allocator for compressed pages + +Created December 2006 by Marko Makela +*******************************************************/ + +#ifndef buf0buddy_h +#define buf0buddy_h + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE +#endif + +#include "univ.i" +#include "buf0types.h" + +/************************************************************************** +Allocate a block. The thread calling this function must hold +buf_pool_mutex and must not hold buf_pool_zip_mutex or any +block->mutex. The buf_pool_mutex may only be released and reacquired +if lru != NULL. This function should only be used for allocating +compressed page frames or control blocks (buf_page_t). Allocated +control blocks must be properly initialized immediately after +buf_buddy_alloc() has returned the memory, before releasing +buf_pool_mutex. */ +UNIV_INLINE +void* +buf_buddy_alloc( +/*============*/ + /* out: allocated block, + possibly NULL if lru == NULL */ + ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ + ibool* lru, /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool_mutex was temporarily released, + or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) + __attribute__((malloc)); + +/************************************************************************** +Release a block. */ +UNIV_INLINE +void +buf_buddy_free( +/*===========*/ + void* buf, /* in: block to be freed, must not be + pointed to by the buffer pool */ + ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ + ibool have_page_hash_mutex) + __attribute__((nonnull)); + +/** Statistics of buddy blocks of a given size. */ +struct buf_buddy_stat_struct { + /** Number of blocks allocated from the buddy system. */ + ulint used; + /** Number of blocks relocated by the buddy system. */ + ib_uint64_t relocated; + /** Total duration of block relocations, in microseconds. */ + ib_uint64_t relocated_usec; +}; + +typedef struct buf_buddy_stat_struct buf_buddy_stat_t; + +/** Statistics of the buddy system, indexed by block size. +Protected by buf_pool_mutex. */ +extern buf_buddy_stat_t buf_buddy_stat[BUF_BUDDY_SIZES + 1]; + +#ifndef UNIV_NONINL +# include "buf0buddy.ic" +#endif + +#endif /* buf0buddy_h */ diff --git a/storage/xtradb/include/buf0buddy.ic b/storage/xtradb/include/buf0buddy.ic new file mode 100644 index 00000000000..7d46b140449 --- /dev/null +++ b/storage/xtradb/include/buf0buddy.ic @@ -0,0 +1,147 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Binary buddy allocator for compressed pages + +Created December 2006 by Marko Makela +*******************************************************/ + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE +#endif + +#include "buf0buf.h" +#include "buf0buddy.h" +#include "ut0ut.h" +#include "sync0sync.h" + +/************************************************************************** +Allocate a block. The thread calling this function must hold +buf_pool_mutex and must not hold buf_pool_zip_mutex or any block->mutex. +The buf_pool_mutex may only be released and reacquired if lru != NULL. */ +UNIV_INTERN +void* +buf_buddy_alloc_low( +/*================*/ + /* out: allocated block, + possibly NULL if lru==NULL */ + ulint i, /* in: index of buf_pool->zip_free[], + or BUF_BUDDY_SIZES */ + ibool* lru, /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool_mutex was temporarily released, + or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) + __attribute__((malloc)); + +/************************************************************************** +Deallocate a block. */ +UNIV_INTERN +void +buf_buddy_free_low( +/*===============*/ + void* buf, /* in: block to be freed, must not be + pointed to by the buffer pool */ + ulint i, /* in: index of buf_pool->zip_free[], + or BUF_BUDDY_SIZES */ + ibool have_page_hash_mutex) + __attribute__((nonnull)); + +/************************************************************************** +Get the index of buf_pool->zip_free[] for a given block size. */ +UNIV_INLINE +ulint +buf_buddy_get_slot( +/*===============*/ + /* out: index of buf_pool->zip_free[], + or BUF_BUDDY_SIZES */ + ulint size) /* in: block size */ +{ + ulint i; + ulint s; + + for (i = 0, s = BUF_BUDDY_LOW; s < size; i++, s <<= 1) { + } + + ut_ad(i <= BUF_BUDDY_SIZES); + return(i); +} + +/************************************************************************** +Allocate a block. The thread calling this function must hold +buf_pool_mutex and must not hold buf_pool_zip_mutex or any +block->mutex. The buf_pool_mutex may only be released and reacquired +if lru != NULL. This function should only be used for allocating +compressed page frames or control blocks (buf_page_t). Allocated +control blocks must be properly initialized immediately after +buf_buddy_alloc() has returned the memory, before releasing +buf_pool_mutex. */ +UNIV_INLINE +void* +buf_buddy_alloc( +/*============*/ + /* out: allocated block, + possibly NULL if lru == NULL */ + ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ + ibool* lru, /* in: pointer to a variable that will be assigned + TRUE if storage was allocated from the LRU list + and buf_pool_mutex was temporarily released, + or NULL if the LRU list should not be used */ + ibool have_page_hash_mutex) +{ + //ut_ad(buf_pool_mutex_own()); + + return(buf_buddy_alloc_low(buf_buddy_get_slot(size), lru, have_page_hash_mutex)); +} + +/************************************************************************** +Deallocate a block. */ +UNIV_INLINE +void +buf_buddy_free( +/*===========*/ + void* buf, /* in: block to be freed, must not be + pointed to by the buffer pool */ + ulint size, /* in: block size, up to UNIV_PAGE_SIZE */ + ibool have_page_hash_mutex) +{ + //ut_ad(buf_pool_mutex_own()); + + if (!have_page_hash_mutex) { + mutex_enter(&LRU_list_mutex); + mutex_enter(&flush_list_mutex); + rw_lock_x_lock(&page_hash_latch); + } + + mutex_enter(&zip_free_mutex); + buf_buddy_free_low(buf, buf_buddy_get_slot(size), TRUE); + mutex_exit(&zip_free_mutex); + + if (!have_page_hash_mutex) { + mutex_exit(&LRU_list_mutex); + mutex_exit(&flush_list_mutex); + rw_lock_x_unlock(&page_hash_latch); + } +} + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE UNIV_INLINE_ORIGINAL +#endif diff --git a/storage/xtradb/include/buf0buf.h b/storage/xtradb/include/buf0buf.h new file mode 100644 index 00000000000..9f94f72e293 --- /dev/null +++ b/storage/xtradb/include/buf0buf.h @@ -0,0 +1,1417 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer pool high-level routines + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef buf0buf_h +#define buf0buf_h + +#include "univ.i" +#include "fil0fil.h" +#include "mtr0types.h" +#include "buf0types.h" +#include "sync0rw.h" +#include "hash0hash.h" +#include "ut0byte.h" +#include "os0proc.h" +#include "page0types.h" + +/* Modes for buf_page_get_gen */ +#define BUF_GET 10 /* get always */ +#define BUF_GET_IF_IN_POOL 11 /* get if in pool */ +#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch; + we have separated this case, because + it is error-prone programming not to + set a latch, and it should be used + with care */ +/* Modes for buf_page_get_known_nowait */ +#define BUF_MAKE_YOUNG 51 +#define BUF_KEEP_OLD 52 +/* Magic value to use instead of checksums when they are disabled */ +#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL + +extern buf_pool_t* buf_pool; /* The buffer pool of the database */ +#ifdef UNIV_DEBUG +extern ibool buf_debug_prints;/* If this is set TRUE, the program + prints info whenever read or flush + occurs */ +#endif /* UNIV_DEBUG */ +extern ulint srv_buf_pool_write_requests; /* variable to count write request + issued */ + +/* States of a control block (@see buf_page_struct). +The enumeration values must be 0..7. */ +enum buf_page_state { + BUF_BLOCK_ZIP_FREE = 0, /* contains a free compressed page */ + BUF_BLOCK_ZIP_PAGE, /* contains a clean compressed page */ + BUF_BLOCK_ZIP_DIRTY, /* contains a compressed page that is + in the buf_pool->flush_list */ + + /* The constants for compressed-only pages must precede + BUF_BLOCK_NOT_USED; @see buf_block_state_valid() */ + + BUF_BLOCK_NOT_USED, /* is in the free list */ + BUF_BLOCK_READY_FOR_USE, /* when buf_LRU_get_free_block returns + a block, it is in this state */ + BUF_BLOCK_FILE_PAGE, /* contains a buffered file page */ + BUF_BLOCK_MEMORY, /* contains some main memory object */ + BUF_BLOCK_REMOVE_HASH /* hash index should be removed + before putting to the free list */ +}; + +/************************************************************************ +Creates the buffer pool. */ +UNIV_INTERN +buf_pool_t* +buf_pool_init(void); +/*===============*/ + /* out, own: buf_pool object, NULL if not + enough memory or error */ +/************************************************************************ +Frees the buffer pool at shutdown. This must not be invoked before +freeing all mutexes. */ +UNIV_INTERN +void +buf_pool_free(void); +/*===============*/ + +/************************************************************************ +Drops the adaptive hash index. To prevent a livelock, this function +is only to be called while holding btr_search_latch and while +btr_search_enabled == FALSE. */ +UNIV_INTERN +void +buf_pool_drop_hash_index(void); +/*==========================*/ + +/************************************************************************ +Relocate a buffer control block. Relocates the block on the LRU list +and in buf_pool->page_hash. Does not relocate bpage->list. +The caller must take care of relocating bpage->list. */ +UNIV_INTERN +void +buf_relocate( +/*=========*/ + buf_page_t* bpage, /* in/out: control block being relocated; + buf_page_get_state(bpage) must be + BUF_BLOCK_ZIP_DIRTY or BUF_BLOCK_ZIP_PAGE */ + buf_page_t* dpage) /* in/out: destination control block */ + __attribute__((nonnull)); +/************************************************************************ +Resizes the buffer pool. */ +UNIV_INTERN +void +buf_pool_resize(void); +/*=================*/ +/************************************************************************* +Gets the current size of buffer buf_pool in bytes. */ +UNIV_INLINE +ulint +buf_pool_get_curr_size(void); +/*========================*/ + /* out: size in bytes */ +/************************************************************************ +Gets the smallest oldest_modification lsn for any page in the pool. Returns +zero if all modified pages have been flushed to disk. */ +UNIV_INLINE +ib_uint64_t +buf_pool_get_oldest_modification(void); +/*==================================*/ + /* out: oldest modification in pool, + zero if none */ +/************************************************************************ +Allocates a buffer block. */ +UNIV_INLINE +buf_block_t* +buf_block_alloc( +/*============*/ + /* out, own: the allocated block, + in state BUF_BLOCK_MEMORY */ + ulint zip_size); /* in: compressed page size in bytes, + or 0 if uncompressed tablespace */ +/************************************************************************ +Frees a buffer block which does not contain a file page. */ +UNIV_INLINE +void +buf_block_free( +/*===========*/ + buf_block_t* block); /* in, own: block to be freed */ +/************************************************************************* +Copies contents of a buffer frame to a given buffer. */ +UNIV_INLINE +byte* +buf_frame_copy( +/*===========*/ + /* out: buf */ + byte* buf, /* in: buffer to copy to */ + const buf_frame_t* frame); /* in: buffer frame */ +/****************************************************************** +NOTE! The following macros should be used instead of buf_page_get_gen, +to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed +in LA! */ +#define buf_page_get(SP, ZS, OF, LA, MTR) buf_page_get_gen(\ + SP, ZS, OF, LA, NULL,\ + BUF_GET, __FILE__, __LINE__, MTR) +/****************************************************************** +Use these macros to bufferfix a page with no latching. Remember not to +read the contents of the page unless you know it is safe. Do not modify +the contents of the page! We have separated this case, because it is +error-prone programming not to set a latch, and it should be used +with care. */ +#define buf_page_get_with_no_latch(SP, ZS, OF, MTR) buf_page_get_gen(\ + SP, ZS, OF, RW_NO_LATCH, NULL,\ + BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) +/****************************************************************** +NOTE! The following macros should be used instead of +buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and +RW_X_LATCH are allowed as LA! */ +#define buf_page_optimistic_get(LA, BL, MC, MTR) \ + buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR) +/************************************************************************ +This is the general function used to get optimistic access to a database +page. */ +UNIV_INTERN +ibool +buf_page_optimistic_get_func( +/*=========================*/ + /* out: TRUE if success */ + ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ + buf_block_t* block, /* in: guessed block */ + ib_uint64_t modify_clock,/* in: modify clock value if mode is + ..._GUESS_ON_CLOCK */ + const char* file, /* in: file name */ + ulint line, /* in: line where called */ + mtr_t* mtr); /* in: mini-transaction */ +/************************************************************************ +This is used to get access to a known database page, when no waiting can be +done. */ +UNIV_INTERN +ibool +buf_page_get_known_nowait( +/*======================*/ + /* out: TRUE if success */ + ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ + buf_block_t* block, /* in: the known page */ + ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ + const char* file, /* in: file name */ + ulint line, /* in: line where called */ + mtr_t* mtr); /* in: mini-transaction */ + +/*********************************************************************** +Given a tablespace id and page number tries to get that page. If the +page is not in the buffer pool it is not loaded and NULL is returned. +Suitable for using when holding the kernel mutex. */ + +const buf_block_t* +buf_page_try_get_func( +/*==================*/ + ulint space_id,/* in: tablespace id */ + ulint page_no,/* in: page number */ + const char* file, /* in: file name */ + ulint line, /* in: line where called */ + mtr_t* mtr); /* in: mini-transaction */ + +#define buf_page_try_get(space_id, page_no, mtr) \ + buf_page_try_get_func(space_id, page_no, __FILE__, __LINE__, mtr); + +/************************************************************************ +Get read access to a compressed page (usually of type +FIL_PAGE_TYPE_ZBLOB or FIL_PAGE_TYPE_ZBLOB2). +The page must be released with buf_page_release_zip(). +NOTE: the page is not protected by any latch. Mutual exclusion has to +be implemented at a higher level. In other words, all possible +accesses to a given page through this function must be protected by +the same set of mutexes or latches. */ +UNIV_INTERN +buf_page_t* +buf_page_get_zip( +/*=============*/ + /* out: pointer to the block, + or NULL if not compressed */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size */ + ulint offset);/* in: page number */ +/************************************************************************ +This is the general function used to get access to a database page. */ +UNIV_INTERN +buf_block_t* +buf_page_get_gen( +/*=============*/ + /* out: pointer to the block or NULL */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint offset, /* in: page number */ + ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ + buf_block_t* guess, /* in: guessed block or NULL */ + ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, + BUF_GET_NO_LATCH */ + const char* file, /* in: file name */ + ulint line, /* in: line where called */ + mtr_t* mtr); /* in: mini-transaction */ +/************************************************************************ +Initializes a page to the buffer buf_pool. The page is usually not read +from a file even if it cannot be found in the buffer buf_pool. This is one +of the functions which perform to a block a state transition NOT_USED => +FILE_PAGE (the other is buf_page_get_gen). */ +UNIV_INTERN +buf_block_t* +buf_page_create( +/*============*/ + /* out: pointer to the block, page bufferfixed */ + ulint space, /* in: space id */ + ulint offset, /* in: offset of the page within space in units of + a page */ + ulint zip_size,/* in: compressed page size, or 0 */ + mtr_t* mtr); /* in: mini-transaction handle */ +#ifdef UNIV_HOTBACKUP +/************************************************************************ +Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ +UNIV_INTERN +void +buf_page_init_for_backup_restore( +/*=============================*/ + ulint space, /* in: space id */ + ulint offset, /* in: offset of the page within space + in units of a page */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + buf_block_t* block); /* in: block to init */ +#endif /* UNIV_HOTBACKUP */ +/************************************************************************ +Releases a compressed-only page acquired with buf_page_get_zip(). */ +UNIV_INLINE +void +buf_page_release_zip( +/*=================*/ + buf_page_t* bpage); /* in: buffer block */ +/************************************************************************ +Decrements the bufferfix count of a buffer control block and releases +a latch, if specified. */ +UNIV_INLINE +void +buf_page_release( +/*=============*/ + buf_block_t* block, /* in: buffer block */ + ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH, + RW_NO_LATCH */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +Moves a page to the start of the buffer pool LRU list. This high-level +function can be used to prevent an important page from from slipping out of +the buffer pool. */ +UNIV_INTERN +void +buf_page_make_young( +/*================*/ + buf_page_t* bpage); /* in: buffer block of a file page */ +/************************************************************************ +Returns TRUE if the page can be found in the buffer pool hash table. NOTE +that it is possible that the page is not yet read from disk, though. */ +UNIV_INLINE +ibool +buf_page_peek( +/*==========*/ + /* out: TRUE if found from page hash table, + NOTE that the page is not necessarily yet read + from disk! */ + ulint space, /* in: space id */ + ulint offset);/* in: page number */ +/************************************************************************ +Resets the check_index_page_at_flush field of a page if found in the buffer +pool. */ +UNIV_INTERN +void +buf_reset_check_index_page_at_flush( +/*================================*/ + ulint space, /* in: space id */ + ulint offset);/* in: page number */ +#ifdef UNIV_DEBUG_FILE_ACCESSES +/************************************************************************ +Sets file_page_was_freed TRUE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ +UNIV_INTERN +buf_page_t* +buf_page_set_file_page_was_freed( +/*=============================*/ + /* out: control block if found in page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset);/* in: page number */ +/************************************************************************ +Sets file_page_was_freed FALSE if the page is found in the buffer pool. +This function should be called when we free a file page and want the +debug version to check that it is not accessed any more unless +reallocated. */ +UNIV_INTERN +buf_page_t* +buf_page_reset_file_page_was_freed( +/*===============================*/ + /* out: control block if found in page hash table, + otherwise NULL */ + ulint space, /* in: space id */ + ulint offset); /* in: page number */ +#endif /* UNIV_DEBUG_FILE_ACCESSES */ +/************************************************************************ +Reads the freed_page_clock of a buffer block. */ +UNIV_INLINE +ulint +buf_page_get_freed_page_clock( +/*==========================*/ + /* out: freed_page_clock */ + const buf_page_t* bpage) /* in: block */ + __attribute__((pure)); +/************************************************************************ +Reads the freed_page_clock of a buffer block. */ +UNIV_INLINE +ulint +buf_block_get_freed_page_clock( +/*===========================*/ + /* out: freed_page_clock */ + const buf_block_t* block) /* in: block */ + __attribute__((pure)); + +/************************************************************************ +Recommends a move of a block to the start of the LRU list if there is danger +of dropping from the buffer pool. NOTE: does not reserve the buffer pool +mutex. */ +UNIV_INLINE +ibool +buf_page_peek_if_too_old( +/*=====================*/ + /* out: TRUE if should be made + younger */ + const buf_page_t* bpage); /* in: block to make younger */ +/************************************************************************ +Returns the current state of is_hashed of a page. FALSE if the page is +not in the pool. NOTE that this operation does not fix the page in the +pool if it is found there. */ +UNIV_INTERN +ibool +buf_page_peek_if_search_hashed( +/*===========================*/ + /* out: TRUE if page hash index is built in search + system */ + ulint space, /* in: space id */ + ulint offset);/* in: page number */ +/************************************************************************ +Gets the youngest modification log sequence number for a frame. +Returns zero if not file page or no modification occurred yet. */ +UNIV_INLINE +ib_uint64_t +buf_page_get_newest_modification( +/*=============================*/ + /* out: newest modification to page */ + const buf_page_t* bpage); /* in: block containing the + page frame */ +/************************************************************************ +Increments the modify clock of a frame by 1. The caller must (1) own the +buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock +on the block. */ +UNIV_INLINE +void +buf_block_modify_clock_inc( +/*=======================*/ + buf_block_t* block); /* in: block */ +/************************************************************************ +Returns the value of the modify clock. The caller must have an s-lock +or x-lock on the block. */ +UNIV_INLINE +ib_uint64_t +buf_block_get_modify_clock( +/*=======================*/ + /* out: value */ + buf_block_t* block); /* in: block */ +/************************************************************************ +Calculates a page checksum which is stored to the page when it is written +to a file. Note that we must be careful to calculate the same value +on 32-bit and 64-bit architectures. */ +UNIV_INTERN +ulint +buf_calc_page_new_checksum( +/*=======================*/ + /* out: checksum */ + const byte* page); /* in: buffer page */ +/************************************************************************ +In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only +looked at the first few bytes of the page. This calculates that old +checksum. +NOTE: we must first store the new formula checksum to +FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum +because this takes that field as an input! */ +UNIV_INTERN +ulint +buf_calc_page_old_checksum( +/*=======================*/ + /* out: checksum */ + const byte* page); /* in: buffer page */ +/************************************************************************ +Checks if a page is corrupt. */ +UNIV_INTERN +ibool +buf_page_is_corrupted( +/*==================*/ + /* out: TRUE if corrupted */ + const byte* read_buf, /* in: a database page */ + ulint zip_size); /* in: size of compressed page; + 0 for uncompressed pages */ +/************************************************************************** +Gets the space id, page offset, and byte offset within page of a +pointer pointing to a buffer frame containing a file page. */ +UNIV_INLINE +void +buf_ptr_get_fsp_addr( +/*=================*/ + const void* ptr, /* in: pointer to a buffer frame */ + ulint* space, /* out: space id */ + fil_addr_t* addr); /* out: page offset and byte offset */ +/************************************************************************** +Gets the hash value of a block. This can be used in searches in the +lock hash table. */ +UNIV_INLINE +ulint +buf_block_get_lock_hash_val( +/*========================*/ + /* out: lock hash value */ + const buf_block_t* block) /* in: block */ + __attribute__((pure)); +#ifdef UNIV_DEBUG +/************************************************************************* +Finds a block in the buffer pool that points to a +given compressed page. */ +UNIV_INTERN +buf_block_t* +buf_pool_contains_zip( +/*==================*/ + /* out: buffer block pointing to + the compressed page, or NULL */ + const void* data); /* in: pointer to compressed page */ +#endif /* UNIV_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/************************************************************************* +Validates the buffer pool data structure. */ +UNIV_INTERN +ibool +buf_validate(void); +/*==============*/ +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/************************************************************************* +Prints info of the buffer pool data structure. */ +UNIV_INTERN +void +buf_print(void); +/*============*/ +#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ +/************************************************************************ +Prints a page to stderr. */ +UNIV_INTERN +void +buf_page_print( +/*===========*/ + const byte* read_buf, /* in: a database page */ + ulint zip_size); /* in: compressed page size, or + 0 for uncompressed pages */ +#ifdef UNIV_DEBUG +/************************************************************************* +Returns the number of latched pages in the buffer pool. */ +UNIV_INTERN +ulint +buf_get_latched_pages_number(void); +/*==============================*/ +#endif /* UNIV_DEBUG */ +/************************************************************************* +Returns the number of pending buf pool ios. */ +UNIV_INTERN +ulint +buf_get_n_pending_ios(void); +/*=======================*/ +/************************************************************************* +Prints info of the buffer i/o. */ +UNIV_INTERN +void +buf_print_io( +/*=========*/ + FILE* file); /* in: file where to print */ +/************************************************************************* +Returns the ratio in percents of modified pages in the buffer pool / +database pages in the buffer pool. */ +UNIV_INTERN +ulint +buf_get_modified_ratio_pct(void); +/*============================*/ +/************************************************************************** +Refreshes the statistics used to print per-second averages. */ +UNIV_INTERN +void +buf_refresh_io_stats(void); +/*======================*/ +/************************************************************************* +Checks that all file pages in the buffer are in a replaceable state. */ +UNIV_INTERN +ibool +buf_all_freed(void); +/*===============*/ +/************************************************************************* +Checks that there currently are no pending i/o-operations for the buffer +pool. */ +UNIV_INTERN +ibool +buf_pool_check_no_pending_io(void); +/*==============================*/ + /* out: TRUE if there is no pending i/o */ +/************************************************************************* +Invalidates the file pages in the buffer pool when an archive recovery is +completed. All the file pages buffered must be in a replaceable state when +this function is called: not latched and not modified. */ +UNIV_INTERN +void +buf_pool_invalidate(void); +/*=====================*/ + +/*======================================================================== +--------------------------- LOWER LEVEL ROUTINES ------------------------- +=========================================================================*/ + +#ifdef UNIV_SYNC_DEBUG +/************************************************************************* +Adds latch level info for the rw-lock protecting the buffer frame. This +should be called in the debug version after a successful latching of a +page if we know the latching order level of the acquired latch. */ +UNIV_INLINE +void +buf_block_dbg_add_level( +/*====================*/ + buf_block_t* block, /* in: buffer page + where we have acquired latch */ + ulint level); /* in: latching order level */ +#else /* UNIV_SYNC_DEBUG */ +# define buf_block_dbg_add_level(block, level) /* nothing */ +#endif /* UNIV_SYNC_DEBUG */ +/************************************************************************* +Gets the state of a block. */ +UNIV_INLINE +enum buf_page_state +buf_page_get_state( +/*===============*/ + /* out: state */ + const buf_page_t* bpage); /* in: pointer to the control block */ +/************************************************************************* +Gets the state of a block. */ +UNIV_INLINE +enum buf_page_state +buf_block_get_state( +/*================*/ + /* out: state */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Sets the state of a block. */ +UNIV_INLINE +void +buf_page_set_state( +/*===============*/ + buf_page_t* bpage, /* in/out: pointer to control block */ + enum buf_page_state state); /* in: state */ +/************************************************************************* +Sets the state of a block. */ +UNIV_INLINE +void +buf_block_set_state( +/*================*/ + buf_block_t* block, /* in/out: pointer to control block */ + enum buf_page_state state); /* in: state */ +/************************************************************************* +Determines if a block is mapped to a tablespace. */ +UNIV_INLINE +ibool +buf_page_in_file( +/*=============*/ + /* out: TRUE if mapped */ + const buf_page_t* bpage) /* in: pointer to control block */ + __attribute__((pure)); +/************************************************************************* +Determines if a block should be on unzip_LRU list. */ +UNIV_INLINE +ibool +buf_page_belongs_to_unzip_LRU( +/*==========================*/ + /* out: TRUE if block belongs + to unzip_LRU */ + const buf_page_t* bpage) /* in: pointer to control block */ + __attribute__((pure)); +/************************************************************************* +Determine the approximate LRU list position of a block. */ +UNIV_INLINE +ulint +buf_page_get_LRU_position( +/*======================*/ + /* out: LRU list position */ + const buf_page_t* bpage) /* in: control block */ + __attribute__((pure)); + +/************************************************************************* +Gets the mutex of a block. */ +UNIV_INLINE +mutex_t* +buf_page_get_mutex( +/*===============*/ + /* out: pointer to mutex + protecting bpage */ + const buf_page_t* bpage) /* in: pointer to control block */ + __attribute__((pure)); + +/************************************************************************* +Get the flush type of a page. */ +UNIV_INLINE +enum buf_flush +buf_page_get_flush_type( +/*====================*/ + /* out: flush type */ + const buf_page_t* bpage) /* in: buffer page */ + __attribute__((pure)); +/************************************************************************* +Set the flush type of a page. */ +UNIV_INLINE +void +buf_page_set_flush_type( +/*====================*/ + buf_page_t* bpage, /* in: buffer page */ + enum buf_flush flush_type); /* in: flush type */ +/************************************************************************* +Map a block to a file page. */ +UNIV_INLINE +void +buf_block_set_file_page( +/*====================*/ + buf_block_t* block, /* in/out: pointer to control block */ + ulint space, /* in: tablespace id */ + ulint page_no);/* in: page number */ +/************************************************************************* +Gets the io_fix state of a block. */ +UNIV_INLINE +enum buf_io_fix +buf_page_get_io_fix( +/*================*/ + /* out: io_fix state */ + const buf_page_t* bpage) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the io_fix state of a block. */ +UNIV_INLINE +enum buf_io_fix +buf_block_get_io_fix( +/*================*/ + /* out: io_fix state */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Sets the io_fix state of a block. */ +UNIV_INLINE +void +buf_page_set_io_fix( +/*================*/ + buf_page_t* bpage, /* in/out: control block */ + enum buf_io_fix io_fix);/* in: io_fix state */ +/************************************************************************* +Sets the io_fix state of a block. */ +UNIV_INLINE +void +buf_block_set_io_fix( +/*=================*/ + buf_block_t* block, /* in/out: control block */ + enum buf_io_fix io_fix);/* in: io_fix state */ + +/************************************************************************ +Determine if a buffer block can be relocated in memory. The block +can be dirty, but it must not be I/O-fixed or bufferfixed. */ +UNIV_INLINE +ibool +buf_page_can_relocate( +/*==================*/ + const buf_page_t* bpage) /* control block being relocated */ + __attribute__((pure)); + +/************************************************************************* +Determine if a block has been flagged old. */ +UNIV_INLINE +ibool +buf_page_is_old( +/*============*/ + /* out: TRUE if old */ + const buf_page_t* bpage) /* in: control block */ + __attribute__((pure)); +/************************************************************************* +Flag a block old. */ +UNIV_INLINE +void +buf_page_set_old( +/*=============*/ + buf_page_t* bpage, /* in/out: control block */ + ibool old); /* in: old */ +/************************************************************************* +Determine if a block has been accessed in the buffer pool. */ +UNIV_INLINE +ibool +buf_page_is_accessed( +/*=================*/ + /* out: TRUE if accessed */ + const buf_page_t* bpage) /* in: control block */ + __attribute__((pure)); +/************************************************************************* +Flag a block accessed. */ +UNIV_INLINE +void +buf_page_set_accessed( +/*==================*/ + buf_page_t* bpage, /* in/out: control block */ + ibool accessed); /* in: accessed */ +/************************************************************************* +Gets the buf_block_t handle of a buffered file block if an uncompressed +page frame exists, or NULL. */ +UNIV_INLINE +buf_block_t* +buf_page_get_block( +/*===============*/ + /* out: control block, or NULL */ + buf_page_t* bpage) /* in: control block, or NULL */ + __attribute__((pure)); +#ifdef UNIV_DEBUG +/************************************************************************* +Gets a pointer to the memory frame of a block. */ +UNIV_INLINE +buf_frame_t* +buf_block_get_frame( +/*================*/ + /* out: pointer to the frame */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +#else /* UNIV_DEBUG */ +# define buf_block_get_frame(block) (block)->frame +#endif /* UNIV_DEBUG */ +/************************************************************************* +Gets the space id of a block. */ +UNIV_INLINE +ulint +buf_page_get_space( +/*===============*/ + /* out: space id */ + const buf_page_t* bpage) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the space id of a block. */ +UNIV_INLINE +ulint +buf_block_get_space( +/*================*/ + /* out: space id */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the page number of a block. */ +UNIV_INLINE +ulint +buf_page_get_page_no( +/*=================*/ + /* out: page number */ + const buf_page_t* bpage) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the page number of a block. */ +UNIV_INLINE +ulint +buf_block_get_page_no( +/*==================*/ + /* out: page number */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the compressed page size of a block. */ +UNIV_INLINE +ulint +buf_page_get_zip_size( +/*==================*/ + /* out: compressed page size, or 0 */ + const buf_page_t* bpage) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the compressed page size of a block. */ +UNIV_INLINE +ulint +buf_block_get_zip_size( +/*===================*/ + /* out: compressed page size, or 0 */ + const buf_block_t* block) /* in: pointer to the control block */ + __attribute__((pure)); +/************************************************************************* +Gets the compressed page descriptor corresponding to an uncompressed page +if applicable. */ +#define buf_block_get_page_zip(block) \ + (UNIV_LIKELY_NULL((block)->page.zip.data) ? &(block)->page.zip : NULL) +/*********************************************************************** +Gets the block to whose frame the pointer is pointing to. */ +UNIV_INTERN +buf_block_t* +buf_block_align( +/*============*/ + /* out: pointer to block, never NULL */ + const byte* ptr); /* in: pointer to a frame */ +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG +/************************************************************************* +Gets the compressed page descriptor corresponding to an uncompressed page +if applicable. */ +UNIV_INLINE +const page_zip_des_t* +buf_frame_get_page_zip( +/*===================*/ + /* out: compressed page descriptor, or NULL */ + const byte* ptr); /* in: pointer to the page */ +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ +/************************************************************************ +This function is used to get info if there is an io operation +going on on a buffer page. */ +UNIV_INLINE +ibool +buf_page_io_query( +/*==============*/ + /* out: TRUE if io going on */ + buf_page_t* bpage); /* in: pool block, must be bufferfixed */ +/************************************************************************ +Function which inits a page for read to the buffer buf_pool. If the page is +(1) already in buf_pool, or +(2) if we specify to read only ibuf pages and the page is not an ibuf page, or +(3) if the space is deleted or being deleted, +then this function does nothing. +Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock +on the buffer frame. The io-handler must take care that the flag is cleared +and the lock released later. */ +UNIV_INTERN +buf_page_t* +buf_page_init_for_read( +/*===================*/ + /* out: pointer to the block or NULL */ + ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */ + ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size, or 0 */ + ibool unzip, /* in: TRUE=request uncompressed page */ + ib_int64_t tablespace_version,/* in: prevents reading from a wrong + version of the tablespace in case we have done + DISCARD + IMPORT */ + ulint offset);/* in: page number */ +/************************************************************************ +Completes an asynchronous read or write request of a file page to or from +the buffer pool. */ +UNIV_INTERN +void +buf_page_io_complete( +/*=================*/ + buf_page_t* bpage); /* in: pointer to the block in question */ +/************************************************************************ +Calculates a folded value of a file page address to use in the page hash +table. */ +UNIV_INLINE +ulint +buf_page_address_fold( +/*==================*/ + /* out: the folded value */ + ulint space, /* in: space id */ + ulint offset) /* in: offset of the page within space */ + __attribute__((const)); +/********************************************************************** +Returns the control block of a file page, NULL if not found. */ +UNIV_INLINE +buf_page_t* +buf_page_hash_get( +/*==============*/ + /* out: block, NULL if not found */ + ulint space, /* in: space id */ + ulint offset);/* in: offset of the page within space */ +/********************************************************************** +Returns the control block of a file page, NULL if not found +or an uncompressed page frame does not exist. */ +UNIV_INLINE +buf_block_t* +buf_block_hash_get( +/*===============*/ + /* out: block, NULL if not found */ + ulint space, /* in: space id */ + ulint offset);/* in: offset of the page within space */ +/*********************************************************************** +Increments the pool clock by one and returns its new value. Remember that +in the 32 bit version the clock wraps around at 4 billion! */ +UNIV_INLINE +ulint +buf_pool_clock_tic(void); +/*====================*/ + /* out: new clock value */ +/************************************************************************* +Gets the current length of the free list of buffer blocks. */ +UNIV_INTERN +ulint +buf_get_free_list_len(void); +/*=======================*/ + + + +/* The common buffer control block structure +for compressed and uncompressed frames */ + +struct buf_page_struct{ + /* None of the following bit-fields must be modified without + holding buf_page_get_mutex() [block->mutex or buf_pool_zip_mutex], + since they can be stored in the same machine word. Some of them are + additionally protected by buf_pool_mutex. */ + + unsigned space:32; /* tablespace id; also protected + by buf_pool_mutex. */ + unsigned offset:32; /* page number; also protected + by buf_pool_mutex. */ + + unsigned state:3; /* state of the control block + (@see enum buf_page_state); also + protected by buf_pool_mutex. + State transitions from + BUF_BLOCK_READY_FOR_USE to + BUF_BLOCK_MEMORY need not be + protected by buf_page_get_mutex(). */ + unsigned flush_type:2; /* if this block is currently being + flushed to disk, this tells the + flush_type (@see enum buf_flush) */ + unsigned accessed:1; /* TRUE if the page has been accessed + while in the buffer pool: read-ahead + may read in pages which have not been + accessed yet; a thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ + unsigned io_fix:2; /* type of pending I/O operation + (@see enum buf_io_fix); also + protected by buf_pool_mutex */ + unsigned buf_fix_count:24;/* count of how manyfold this block + is currently bufferfixed */ + + page_zip_des_t zip; /* compressed page; zip.data + (but not the data it points to) is + also protected by buf_pool_mutex */ + buf_page_t* hash; /* node used in chaining to + buf_pool->page_hash or + buf_pool->zip_hash */ +#ifdef UNIV_DEBUG + ibool in_page_hash; /* TRUE if in buf_pool->page_hash */ + ibool in_zip_hash; /* TRUE if in buf_pool->zip_hash */ +#endif /* UNIV_DEBUG */ + + /* 2. Page flushing fields; protected by buf_pool_mutex */ + + UT_LIST_NODE_T(buf_page_t) list; + /* based on state, this is a list + node in one of the following lists + in buf_pool: + + BUF_BLOCK_NOT_USED: free + BUF_BLOCK_FILE_PAGE: flush_list + BUF_BLOCK_ZIP_DIRTY: flush_list + BUF_BLOCK_ZIP_PAGE: zip_clean + BUF_BLOCK_ZIP_FREE: zip_free[] */ +#ifdef UNIV_DEBUG + ibool in_flush_list; /* TRUE if in buf_pool->flush_list; + when buf_pool_mutex is free, the + following should hold: in_flush_list + == (state == BUF_BLOCK_FILE_PAGE + || state == BUF_BLOCK_ZIP_DIRTY) */ + ibool in_free_list; /* TRUE if in buf_pool->free; when + buf_pool_mutex is free, the following + should hold: in_free_list + == (state == BUF_BLOCK_NOT_USED) */ +#endif /* UNIV_DEBUG */ + ib_uint64_t newest_modification; + /* log sequence number of the youngest + modification to this block, zero if + not modified */ + ib_uint64_t oldest_modification; + /* log sequence number of the START of + the log entry written of the oldest + modification to this block which has + not yet been flushed on disk; zero if + all modifications are on disk */ + + /* 3. LRU replacement algorithm fields; protected by + buf_pool_mutex only (not buf_pool_zip_mutex or block->mutex) */ + + UT_LIST_NODE_T(buf_page_t) LRU; + /* node of the LRU list */ +//#ifdef UNIV_DEBUG + ibool in_LRU_list; /* TRUE if the page is in the LRU list; + used in debugging */ +//#endif /* UNIV_DEBUG */ + unsigned old:1; /* TRUE if the block is in the old + blocks in the LRU list */ + unsigned LRU_position:31;/* value which monotonically decreases + (or may stay constant if old==TRUE) + toward the end of the LRU list, if + buf_pool->ulint_clock has not wrapped + around: NOTE that this value can only + be used in heuristic algorithms, + because of the possibility of a + wrap-around! */ + unsigned freed_page_clock:32;/* the value of + buf_pool->freed_page_clock when this + block was the last time put to the + head of the LRU list; a thread is + allowed to read this for heuristic + purposes without holding any mutex or + latch */ +#ifdef UNIV_DEBUG_FILE_ACCESSES + ibool file_page_was_freed; + /* this is set to TRUE when fsp + frees a page in buffer pool */ +#endif /* UNIV_DEBUG_FILE_ACCESSES */ +}; + +/* The buffer control block structure */ + +struct buf_block_struct{ + + /* 1. General fields */ + + buf_page_t page; /* page information; this must + be the first field, so that + buf_pool->page_hash can point + to buf_page_t or buf_block_t */ + UT_LIST_NODE_T(buf_block_t) unzip_LRU; + /* node of the decompressed LRU list; + a block is in the unzip_LRU list + if page.state == BUF_BLOCK_FILE_PAGE + and page.zip.data != NULL */ +//#ifdef UNIV_DEBUG + ibool in_unzip_LRU_list;/* TRUE if the page is in the + decompressed LRU list; + used in debugging */ +//#endif /* UNIV_DEBUG */ + byte* frame; /* pointer to buffer frame which + is of size UNIV_PAGE_SIZE, and + aligned to an address divisible by + UNIV_PAGE_SIZE */ + mutex_t mutex; /* mutex protecting this block: + state (also protected by the buffer + pool mutex), io_fix, buf_fix_count, + and accessed; we introduce this new + mutex in InnoDB-5.1 to relieve + contention on the buffer pool mutex */ + rw_lock_t lock; /* read-write lock of the buffer + frame */ + unsigned lock_hash_val:32;/* hashed value of the page address + in the record lock hash table */ + unsigned check_index_page_at_flush:1; + /* TRUE if we know that this is + an index page, and want the database + to check its consistency before flush; + note that there may be pages in the + buffer pool which are index pages, + but this flag is not set because + we do not keep track of all pages */ + + /* 2. Optimistic search field */ + + ib_uint64_t modify_clock; /* this clock is incremented every + time a pointer to a record on the + page may become obsolete; this is + used in the optimistic cursor + positioning: if the modify clock has + not changed, we know that the pointer + is still valid; this field may be + changed if the thread (1) owns the + pool mutex and the page is not + bufferfixed, or (2) the thread has an + x-latch on the block */ + + /* 3. Hash search fields: NOTE that the first 4 fields are NOT + protected by any semaphore! */ + + ulint n_hash_helps; /* counter which controls building + of a new hash index for the page */ + ulint n_fields; /* recommended prefix length for hash + search: number of full fields */ + ulint n_bytes; /* recommended prefix: number of bytes + in an incomplete field */ + ibool left_side; /* TRUE or FALSE, depending on + whether the leftmost record of several + records with the same prefix should be + indexed in the hash index */ + + /* These 6 fields may only be modified when we have + an x-latch on btr_search_latch AND + a) we are holding an s-latch or x-latch on block->lock or + b) we know that block->buf_fix_count == 0. + + An exception to this is when we init or create a page + in the buffer pool in buf0buf.c. */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ulint n_pointers; /* used in debugging: the number of + pointers in the adaptive hash index + pointing to this frame */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + unsigned is_hashed:1; /* TRUE if hash index has already been + built on this page; note that it does + not guarantee that the index is + complete, though: there may have been + hash collisions, record deletions, + etc. */ + unsigned curr_n_fields:10;/* prefix length for hash indexing: + number of full fields */ + unsigned curr_n_bytes:15;/* number of bytes in hash indexing */ + unsigned curr_left_side:1;/* TRUE or FALSE in hash indexing */ + dict_index_t* index; /* Index for which the adaptive + hash index has been created. */ + /* 4. Debug fields */ +#ifdef UNIV_SYNC_DEBUG + rw_lock_t debug_latch; /* in the debug version, each thread + which bufferfixes the block acquires + an s-latch here; so we can use the + debug utilities in sync0rw */ +#endif +}; + +/* Check if a buf_block_t object is in a valid state. */ +#define buf_block_state_valid(block) \ +(buf_block_get_state(block) >= BUF_BLOCK_NOT_USED \ + && (buf_block_get_state(block) <= BUF_BLOCK_REMOVE_HASH)) + +/************************************************************************** +Compute the hash fold value for blocks in buf_pool->zip_hash. */ +#define BUF_POOL_ZIP_FOLD_PTR(ptr) ((ulint) (ptr) / UNIV_PAGE_SIZE) +#define BUF_POOL_ZIP_FOLD(b) BUF_POOL_ZIP_FOLD_PTR((b)->frame) +#define BUF_POOL_ZIP_FOLD_BPAGE(b) BUF_POOL_ZIP_FOLD((buf_block_t*) (b)) + +/* The buffer pool structure. NOTE! The definition appears here only for +other modules of this directory (buf) to see it. Do not use from outside! */ + +struct buf_pool_struct{ + + /* 1. General fields */ + + ulint n_chunks; /* number of buffer pool chunks */ + buf_chunk_t* chunks; /* buffer pool chunks */ + ulint curr_size; /* current pool size in pages */ + hash_table_t* page_hash; /* hash table of buf_page_t or + buf_block_t file pages, + buf_page_in_file() == TRUE, + indexed by (space_id, offset) */ + hash_table_t* zip_hash; /* hash table of buf_block_t blocks + whose frames are allocated to the + zip buddy system, + indexed by block->frame */ + ulint n_pend_reads; /* number of pending read operations */ + ulint n_pend_unzip; /* number of pending decompressions */ + + time_t last_printout_time; /* when buf_print was last time + called */ + ulint n_pages_read; /* number read operations */ + ulint n_pages_written;/* number write operations */ + ulint n_pages_created;/* number of pages created in the pool + with no read */ + ulint n_page_gets; /* number of page gets performed; + also successful searches through + the adaptive hash index are + counted as page gets; this field + is NOT protected by the buffer + pool mutex */ + ulint n_page_gets_old;/* n_page_gets when buf_print was + last time called: used to calculate + hit rate */ + ulint n_pages_read_old;/* n_pages_read when buf_print was + last time called */ + ulint n_pages_written_old;/* number write operations */ + ulint n_pages_created_old;/* number of pages created in + the pool with no read */ + /* 2. Page flushing algorithm fields */ + + UT_LIST_BASE_NODE_T(buf_page_t) flush_list; + /* base node of the modified block + list */ + ibool init_flush[BUF_FLUSH_N_TYPES]; + /* this is TRUE when a flush of the + given type is being initialized */ + ulint n_flush[BUF_FLUSH_N_TYPES]; + /* this is the number of pending + writes in the given flush type */ + os_event_t no_flush[BUF_FLUSH_N_TYPES]; + /* this is in the set state when there + is no flush batch of the given type + running */ + ulint ulint_clock; /* a sequence number used to count + time. NOTE! This counter wraps + around at 4 billion (if ulint == + 32 bits)! */ + ulint freed_page_clock;/* a sequence number used to count the + number of buffer blocks removed from + the end of the LRU list; NOTE that + this counter may wrap around at 4 + billion! A thread is allowed to + read this for heuristic purposes + without holding any mutex or latch */ + ulint LRU_flush_ended;/* when an LRU flush ends for a page, + this is incremented by one; this is + set to zero when a buffer block is + allocated */ + + /* 3. LRU replacement algorithm fields */ + + UT_LIST_BASE_NODE_T(buf_page_t) free; + /* base node of the free block list */ + UT_LIST_BASE_NODE_T(buf_page_t) LRU; + /* base node of the LRU list */ + buf_page_t* LRU_old; /* pointer to the about 3/8 oldest + blocks in the LRU list; NULL if LRU + length less than BUF_LRU_OLD_MIN_LEN; + NOTE: when LRU_old != NULL, its length + should always equal LRU_old_len */ + ulint LRU_old_len; /* length of the LRU list from + the block to which LRU_old points + onward, including that block; + see buf0lru.c for the restrictions + on this value; not defined if + LRU_old == NULL; + NOTE: LRU_old_len must be adjusted + whenever LRU_old shrinks or grows! */ + + UT_LIST_BASE_NODE_T(buf_block_t) unzip_LRU; + /* base node of the unzip_LRU list */ + + /* 4. Fields for the buddy allocator of compressed pages */ + UT_LIST_BASE_NODE_T(buf_page_t) zip_clean; + /* unmodified compressed pages */ + UT_LIST_BASE_NODE_T(buf_page_t) zip_free[BUF_BUDDY_SIZES]; + /* buddy free lists */ +#if BUF_BUDDY_HIGH != UNIV_PAGE_SIZE +# error "BUF_BUDDY_HIGH != UNIV_PAGE_SIZE" +#endif +#if BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE +# error "BUF_BUDDY_LOW > PAGE_ZIP_MIN_SIZE" +#endif +}; + +/* mutex protecting the buffer pool struct and control blocks, except the +read-write lock in them */ +extern mutex_t buf_pool_mutex; +extern mutex_t LRU_list_mutex; +extern mutex_t flush_list_mutex; +extern rw_lock_t page_hash_latch; +extern mutex_t free_list_mutex; +extern mutex_t zip_free_mutex; +extern mutex_t zip_hash_mutex; +/* mutex protecting the control blocks of compressed-only pages +(of type buf_page_t, not buf_block_t) */ +extern mutex_t buf_pool_zip_mutex; + +/* Accessors for buf_pool_mutex. Use these instead of accessing +buf_pool_mutex directly. */ + +/* Test if buf_pool_mutex is owned. */ +#define buf_pool_mutex_own() mutex_own(&buf_pool_mutex) +/* Acquire the buffer pool mutex. */ +#define buf_pool_mutex_enter() do { \ + ut_ad(!mutex_own(&buf_pool_zip_mutex)); \ + mutex_enter(&buf_pool_mutex); \ +} while (0) + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/** Flag to forbid the release of the buffer pool mutex. +Protected by buf_pool_mutex. */ +extern ulint buf_pool_mutex_exit_forbidden; +/* Forbid the release of the buffer pool mutex. */ +# define buf_pool_mutex_exit_forbid() do { \ + ut_ad(buf_pool_mutex_own()); \ + buf_pool_mutex_exit_forbidden++; \ +} while (0) +/* Allow the release of the buffer pool mutex. */ +# define buf_pool_mutex_exit_allow() do { \ + ut_ad(buf_pool_mutex_own()); \ + ut_a(buf_pool_mutex_exit_forbidden); \ + buf_pool_mutex_exit_forbidden--; \ +} while (0) +/* Release the buffer pool mutex. */ +# define buf_pool_mutex_exit() do { \ + ut_a(!buf_pool_mutex_exit_forbidden); \ + mutex_exit(&buf_pool_mutex); \ +} while (0) +#else +/* Forbid the release of the buffer pool mutex. */ +# define buf_pool_mutex_exit_forbid() ((void) 0) +/* Allow the release of the buffer pool mutex. */ +# define buf_pool_mutex_exit_allow() ((void) 0) +/* Release the buffer pool mutex. */ +# define buf_pool_mutex_exit() mutex_exit(&buf_pool_mutex) +#endif + +/************************************************************************ +Let us list the consistency conditions for different control block states. + +NOT_USED: is in free list, not in LRU list, not in flush list, nor + page hash table +READY_FOR_USE: is not in free list, LRU list, or flush list, nor page + hash table +MEMORY: is not in free list, LRU list, or flush list, nor page + hash table +FILE_PAGE: space and offset are defined, is in page hash table + if io_fix == BUF_IO_WRITE, + pool: no_flush[flush_type] is in reset state, + pool: n_flush[flush_type] > 0 + + (1) if buf_fix_count == 0, then + is in LRU list, not in free list + is in flush list, + if and only if oldest_modification > 0 + is x-locked, + if and only if io_fix == BUF_IO_READ + is s-locked, + if and only if io_fix == BUF_IO_WRITE + + (2) if buf_fix_count > 0, then + is not in LRU list, not in free list + is in flush list, + if and only if oldest_modification > 0 + if io_fix == BUF_IO_READ, + is x-locked + if io_fix == BUF_IO_WRITE, + is s-locked + +State transitions: + +NOT_USED => READY_FOR_USE +READY_FOR_USE => MEMORY +READY_FOR_USE => FILE_PAGE +MEMORY => NOT_USED +FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if + (1) buf_fix_count == 0, + (2) oldest_modification == 0, and + (3) io_fix == 0. +*/ + +#ifndef UNIV_NONINL +#include "buf0buf.ic" +#endif + +#endif diff --git a/storage/xtradb/include/buf0buf.ic b/storage/xtradb/include/buf0buf.ic new file mode 100644 index 00000000000..c56c0ac62f2 --- /dev/null +++ b/storage/xtradb/include/buf0buf.ic @@ -0,0 +1,1095 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer buf_pool + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#include "buf0flu.h" +#include "buf0lru.h" +#include "buf0rea.h" +#include "mtr0mtr.h" + +/************************************************************************ +Reads the freed_page_clock of a buffer block. */ +UNIV_INLINE +ulint +buf_page_get_freed_page_clock( +/*==========================*/ + /* out: freed_page_clock */ + const buf_page_t* bpage) /* in: block */ +{ + /* This is sometimes read without holding buf_pool_mutex. */ + return(bpage->freed_page_clock); +} + +/************************************************************************ +Reads the freed_page_clock of a buffer block. */ +UNIV_INLINE +ulint +buf_block_get_freed_page_clock( +/*===========================*/ + /* out: freed_page_clock */ + const buf_block_t* block) /* in: block */ +{ + return(buf_page_get_freed_page_clock(&block->page)); +} + +/************************************************************************ +Recommends a move of a block to the start of the LRU list if there is danger +of dropping from the buffer pool. NOTE: does not reserve the buffer pool +mutex. */ +UNIV_INLINE +ibool +buf_page_peek_if_too_old( +/*=====================*/ + /* out: TRUE if should be made + younger */ + const buf_page_t* bpage) /* in: block to make younger */ +{ + return(buf_pool->freed_page_clock + >= buf_page_get_freed_page_clock(bpage) + + 1 + (buf_pool->curr_size / 4)); +} + +/************************************************************************* +Gets the current size of buffer buf_pool in bytes. */ +UNIV_INLINE +ulint +buf_pool_get_curr_size(void) +/*========================*/ + /* out: size in bytes */ +{ + return(buf_pool->curr_size * UNIV_PAGE_SIZE); +} + +/************************************************************************ +Gets the smallest oldest_modification lsn for any page in the pool. Returns +zero if all modified pages have been flushed to disk. */ +UNIV_INLINE +ib_uint64_t +buf_pool_get_oldest_modification(void) +/*==================================*/ + /* out: oldest modification in pool, + zero if none */ +{ + buf_page_t* bpage; + ib_uint64_t lsn; + + //buf_pool_mutex_enter(); + mutex_enter(&flush_list_mutex); + + bpage = UT_LIST_GET_LAST(buf_pool->flush_list); + + if (bpage == NULL) { + lsn = 0; + } else { + ut_ad(bpage->in_flush_list); + lsn = bpage->oldest_modification; + } + + //buf_pool_mutex_exit(); + mutex_exit(&flush_list_mutex); + + /* The returned answer may be out of date: the flush_list can + change after the mutex has been released. */ + + return(lsn); +} + +/*********************************************************************** +Increments the buf_pool clock by one and returns its new value. Remember +that in the 32 bit version the clock wraps around at 4 billion! */ +UNIV_INLINE +ulint +buf_pool_clock_tic(void) +/*====================*/ + /* out: new clock value */ +{ + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&LRU_list_mutex)); + + buf_pool->ulint_clock++; + + return(buf_pool->ulint_clock); +} + +/************************************************************************* +Gets the state of a block. */ +UNIV_INLINE +enum buf_page_state +buf_page_get_state( +/*===============*/ + /* out: state */ + const buf_page_t* bpage) /* in: pointer to the control block */ +{ + enum buf_page_state state = (enum buf_page_state) bpage->state; + +#ifdef UNIV_DEBUG + switch (state) { + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_FILE_PAGE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + break; + default: + ut_error; + } +#endif /* UNIV_DEBUG */ + + return(state); +} +/************************************************************************* +Gets the state of a block. */ +UNIV_INLINE +enum buf_page_state +buf_block_get_state( +/*================*/ + /* out: state */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + return(buf_page_get_state(&block->page)); +} +/************************************************************************* +Sets the state of a block. */ +UNIV_INLINE +void +buf_page_set_state( +/*===============*/ + buf_page_t* bpage, /* in/out: pointer to control block */ + enum buf_page_state state) /* in: state */ +{ +#ifdef UNIV_DEBUG + enum buf_page_state old_state = buf_page_get_state(bpage); + + switch (old_state) { + case BUF_BLOCK_ZIP_FREE: + ut_error; + break; + case BUF_BLOCK_ZIP_PAGE: + ut_a(state == BUF_BLOCK_ZIP_DIRTY); + break; + case BUF_BLOCK_ZIP_DIRTY: + ut_a(state == BUF_BLOCK_ZIP_PAGE); + break; + case BUF_BLOCK_NOT_USED: + ut_a(state == BUF_BLOCK_READY_FOR_USE); + break; + case BUF_BLOCK_READY_FOR_USE: + ut_a(state == BUF_BLOCK_MEMORY + || state == BUF_BLOCK_FILE_PAGE + || state == BUF_BLOCK_NOT_USED); + break; + case BUF_BLOCK_MEMORY: + ut_a(state == BUF_BLOCK_NOT_USED); + break; + case BUF_BLOCK_FILE_PAGE: + ut_a(state == BUF_BLOCK_NOT_USED + || state == BUF_BLOCK_REMOVE_HASH); + break; + case BUF_BLOCK_REMOVE_HASH: + ut_a(state == BUF_BLOCK_MEMORY); + break; + } +#endif /* UNIV_DEBUG */ + bpage->state = state; + ut_ad(buf_page_get_state(bpage) == state); +} + +/************************************************************************* +Sets the state of a block. */ +UNIV_INLINE +void +buf_block_set_state( +/*================*/ + buf_block_t* block, /* in/out: pointer to control block */ + enum buf_page_state state) /* in: state */ +{ + buf_page_set_state(&block->page, state); +} + +/************************************************************************* +Determines if a block is mapped to a tablespace. */ +UNIV_INLINE +ibool +buf_page_in_file( +/*=============*/ + /* out: TRUE if mapped */ + const buf_page_t* bpage) /* in: pointer to control block */ +{ + switch (buf_page_get_state(bpage)) { + case BUF_BLOCK_ZIP_FREE: + /* This is a free page in buf_pool->zip_free[]. + Such pages should only be accessed by the buddy allocator. */ + /* ut_error; */ /* optimistic */ + break; + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + case BUF_BLOCK_FILE_PAGE: + return(TRUE); + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + break; + } + + return(FALSE); +} + +/************************************************************************* +Determines if a block should be on unzip_LRU list. */ +UNIV_INLINE +ibool +buf_page_belongs_to_unzip_LRU( +/*==========================*/ + /* out: TRUE if block belongs + to unzip_LRU */ + const buf_page_t* bpage) /* in: pointer to control block */ +{ + ut_ad(buf_page_in_file(bpage)); + + return(bpage->zip.data + && buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE); +} + +/************************************************************************* +Determine the approximate LRU list position of a block. */ +UNIV_INLINE +ulint +buf_page_get_LRU_position( +/*======================*/ + /* out: LRU list position */ + const buf_page_t* bpage) /* in: control block */ +{ + ut_ad(buf_page_in_file(bpage)); + ut_ad(buf_pool_mutex_own()); + + return(bpage->LRU_position); +} + +/************************************************************************* +Gets the mutex of a block. */ +UNIV_INLINE +mutex_t* +buf_page_get_mutex( +/*===============*/ + /* out: pointer to mutex + protecting bpage */ + const buf_page_t* bpage) /* in: pointer to control block */ +{ + switch (buf_page_get_state(bpage)) { + case BUF_BLOCK_ZIP_FREE: + /* ut_error; */ /* optimistic */ + return(NULL); + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + return(&buf_pool_zip_mutex); + default: + return(&((buf_block_t*) bpage)->mutex); + } +} + +/************************************************************************* +Get the flush type of a page. */ +UNIV_INLINE +enum buf_flush +buf_page_get_flush_type( +/*====================*/ + /* out: flush type */ + const buf_page_t* bpage) /* in: buffer page */ +{ + enum buf_flush flush_type = (enum buf_flush) bpage->flush_type; + +#ifdef UNIV_DEBUG + switch (flush_type) { + case BUF_FLUSH_LRU: + case BUF_FLUSH_SINGLE_PAGE: + case BUF_FLUSH_LIST: + return(flush_type); + case BUF_FLUSH_N_TYPES: + break; + } + ut_error; +#endif /* UNIV_DEBUG */ + return(flush_type); +} +/************************************************************************* +Set the flush type of a page. */ +UNIV_INLINE +void +buf_page_set_flush_type( +/*====================*/ + buf_page_t* bpage, /* in: buffer page */ + enum buf_flush flush_type) /* in: flush type */ +{ + bpage->flush_type = flush_type; + ut_ad(buf_page_get_flush_type(bpage) == flush_type); +} + +/************************************************************************* +Map a block to a file page. */ +UNIV_INLINE +void +buf_block_set_file_page( +/*====================*/ + buf_block_t* block, /* in/out: pointer to control block */ + ulint space, /* in: tablespace id */ + ulint page_no)/* in: page number */ +{ + buf_block_set_state(block, BUF_BLOCK_FILE_PAGE); + block->page.space = space; + block->page.offset = page_no; +} + +/************************************************************************* +Gets the io_fix state of a block. */ +UNIV_INLINE +enum buf_io_fix +buf_page_get_io_fix( +/*================*/ + /* out: io_fix state */ + const buf_page_t* bpage) /* in: pointer to the control block */ +{ + enum buf_io_fix io_fix = (enum buf_io_fix) bpage->io_fix; +#ifdef UNIV_DEBUG + switch (io_fix) { + case BUF_IO_NONE: + case BUF_IO_READ: + case BUF_IO_WRITE: + return(io_fix); + } + ut_error; +#endif /* UNIV_DEBUG */ + return(io_fix); +} + +/************************************************************************* +Gets the io_fix state of a block. */ +UNIV_INLINE +enum buf_io_fix +buf_block_get_io_fix( +/*================*/ + /* out: io_fix state */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + return(buf_page_get_io_fix(&block->page)); +} + +/************************************************************************* +Sets the io_fix state of a block. */ +UNIV_INLINE +void +buf_page_set_io_fix( +/*================*/ + buf_page_t* bpage, /* in/out: control block */ + enum buf_io_fix io_fix) /* in: io_fix state */ +{ + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(buf_page_get_mutex(bpage))); + + bpage->io_fix = io_fix; + ut_ad(buf_page_get_io_fix(bpage) == io_fix); +} + +/************************************************************************* +Sets the io_fix state of a block. */ +UNIV_INLINE +void +buf_block_set_io_fix( +/*=================*/ + buf_block_t* block, /* in/out: control block */ + enum buf_io_fix io_fix) /* in: io_fix state */ +{ + buf_page_set_io_fix(&block->page, io_fix); +} + +/************************************************************************ +Determine if a buffer block can be relocated in memory. The block +can be dirty, but it must not be I/O-fixed or bufferfixed. */ +UNIV_INLINE +ibool +buf_page_can_relocate( +/*==================*/ + const buf_page_t* bpage) /* control block being relocated */ +{ + //ut_ad(buf_pool_mutex_own()); + /* optimistic */ + //ut_ad(mutex_own(buf_page_get_mutex(bpage))); + //ut_ad(buf_page_in_file(bpage)); + //ut_ad(bpage->in_LRU_list); + + return(bpage->in_LRU_list && bpage->io_fix == BUF_IO_NONE + && bpage->buf_fix_count == 0); +} + +/************************************************************************* +Determine if a block has been flagged old. */ +UNIV_INLINE +ibool +buf_page_is_old( +/*============*/ + /* out: TRUE if old */ + const buf_page_t* bpage) /* in: control block */ +{ + ut_ad(buf_page_in_file(bpage)); + ut_ad(buf_pool_mutex_own()); + + return(bpage->old); +} + +/************************************************************************* +Flag a block old. */ +UNIV_INLINE +void +buf_page_set_old( +/*=============*/ + buf_page_t* bpage, /* in/out: control block */ + ibool old) /* in: old */ +{ + ut_a(buf_page_in_file(bpage)); + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&LRU_list_mutex)); + ut_ad(bpage->in_LRU_list); + +#ifdef UNIV_LRU_DEBUG + if (UT_LIST_GET_PREV(LRU, bpage) && UT_LIST_GET_NEXT(LRU, bpage) + && UT_LIST_GET_PREV(LRU, bpage)->old + == UT_LIST_GET_NEXT(LRU, bpage)->old) { + ut_a(UT_LIST_GET_PREV(LRU, bpage)->old == old); + } +#endif /* UNIV_LRU_DEBUG */ + + bpage->old = old; +} + +/************************************************************************* +Determine if a block has been accessed in the buffer pool. */ +UNIV_INLINE +ibool +buf_page_is_accessed( +/*=================*/ + /* out: TRUE if accessed */ + const buf_page_t* bpage) /* in: control block */ +{ + ut_ad(buf_page_in_file(bpage)); + + return(bpage->accessed); +} + +/************************************************************************* +Flag a block accessed. */ +UNIV_INLINE +void +buf_page_set_accessed( +/*==================*/ + buf_page_t* bpage, /* in/out: control block */ + ibool accessed) /* in: accessed */ +{ + ut_a(buf_page_in_file(bpage)); + ut_ad(mutex_own(buf_page_get_mutex(bpage))); + + bpage->accessed = accessed; +} + +/************************************************************************* +Gets the buf_block_t handle of a buffered file block if an uncompressed +page frame exists, or NULL. */ +UNIV_INLINE +buf_block_t* +buf_page_get_block( +/*===============*/ + /* out: control block, or NULL */ + buf_page_t* bpage) /* in: control block, or NULL */ +{ + if (UNIV_LIKELY(bpage != NULL)) { + ut_ad(buf_page_in_file(bpage)); + + if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) { + return((buf_block_t*) bpage); + } + } + + return(NULL); +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Gets a pointer to the memory frame of a block. */ +UNIV_INLINE +buf_frame_t* +buf_block_get_frame( +/*================*/ + /* out: pointer to the frame */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + ut_ad(block); + + switch (buf_block_get_state(block)) { + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + case BUF_BLOCK_NOT_USED: + ut_error; + break; + case BUF_BLOCK_FILE_PAGE: + ut_a(block->page.buf_fix_count > 0); + /* fall through */ + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + goto ok; + } + ut_error; +ok: + return((buf_frame_t*) block->frame); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************* +Gets the space id of a block. */ +UNIV_INLINE +ulint +buf_page_get_space( +/*===============*/ + /* out: space id */ + const buf_page_t* bpage) /* in: pointer to the control block */ +{ + ut_ad(bpage); + ut_a(buf_page_in_file(bpage)); + + return(bpage->space); +} + +/************************************************************************* +Gets the space id of a block. */ +UNIV_INLINE +ulint +buf_block_get_space( +/*================*/ + /* out: space id */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + ut_ad(block); + ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + + return(block->page.space); +} + +/************************************************************************* +Gets the page number of a block. */ +UNIV_INLINE +ulint +buf_page_get_page_no( +/*=================*/ + /* out: page number */ + const buf_page_t* bpage) /* in: pointer to the control block */ +{ + ut_ad(bpage); + ut_a(buf_page_in_file(bpage)); + + return(bpage->offset); +} + +/************************************************************************* +Gets the page number of a block. */ +UNIV_INLINE +ulint +buf_block_get_page_no( +/*==================*/ + /* out: page number */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + ut_ad(block); + ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + + return(block->page.offset); +} + +/************************************************************************* +Gets the compressed page size of a block. */ +UNIV_INLINE +ulint +buf_page_get_zip_size( +/*==================*/ + /* out: compressed page size, or 0 */ + const buf_page_t* bpage) /* in: pointer to the control block */ +{ + return(bpage->zip.ssize ? 512 << bpage->zip.ssize : 0); +} + +/************************************************************************* +Gets the compressed page size of a block. */ +UNIV_INLINE +ulint +buf_block_get_zip_size( +/*===================*/ + /* out: compressed page size, or 0 */ + const buf_block_t* block) /* in: pointer to the control block */ +{ + return(block->page.zip.ssize ? 512 << block->page.zip.ssize : 0); +} + +#if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG +/************************************************************************* +Gets the compressed page descriptor corresponding to an uncompressed page +if applicable. */ +UNIV_INLINE +const page_zip_des_t* +buf_frame_get_page_zip( +/*===================*/ + /* out: compressed page descriptor, or NULL */ + const byte* ptr) /* in: pointer to the page */ +{ + return(buf_block_get_page_zip(buf_block_align(ptr))); +} +#endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ + +/************************************************************************** +Gets the space id, page offset, and byte offset within page of a +pointer pointing to a buffer frame containing a file page. */ +UNIV_INLINE +void +buf_ptr_get_fsp_addr( +/*=================*/ + const void* ptr, /* in: pointer to a buffer frame */ + ulint* space, /* out: space id */ + fil_addr_t* addr) /* out: page offset and byte offset */ +{ + const page_t* page = (const page_t*) ut_align_down(ptr, + UNIV_PAGE_SIZE); + + *space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + addr->page = mach_read_from_4(page + FIL_PAGE_OFFSET); + addr->boffset = ut_align_offset(ptr, UNIV_PAGE_SIZE); +} + +/************************************************************************** +Gets the hash value of the page the pointer is pointing to. This can be used +in searches in the lock hash table. */ +UNIV_INLINE +ulint +buf_block_get_lock_hash_val( +/*========================*/ + /* out: lock hash value */ + const buf_block_t* block) /* in: block */ +{ + return(block->lock_hash_val); +} + +/************************************************************************ +Allocates a buffer block. */ +UNIV_INLINE +buf_block_t* +buf_block_alloc( +/*============*/ + /* out, own: the allocated block, + in state BUF_BLOCK_MEMORY */ + ulint zip_size) /* in: compressed page size in bytes, + or 0 if uncompressed tablespace */ +{ + buf_block_t* block; + + block = buf_LRU_get_free_block(zip_size); + + buf_block_set_state(block, BUF_BLOCK_MEMORY); + + return(block); +} + +/************************************************************************ +Frees a buffer block which does not contain a file page. */ +UNIV_INLINE +void +buf_block_free( +/*===========*/ + buf_block_t* block) /* in, own: block to be freed */ +{ + //buf_pool_mutex_enter(); + + mutex_enter(&block->mutex); + + ut_a(buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE); + + buf_LRU_block_free_non_file_page(block, FALSE); + + mutex_exit(&block->mutex); + + //buf_pool_mutex_exit(); +} + +/************************************************************************* +Copies contents of a buffer frame to a given buffer. */ +UNIV_INLINE +byte* +buf_frame_copy( +/*===========*/ + /* out: buf */ + byte* buf, /* in: buffer to copy to */ + const buf_frame_t* frame) /* in: buffer frame */ +{ + ut_ad(buf && frame); + + ut_memcpy(buf, frame, UNIV_PAGE_SIZE); + + return(buf); +} + +/************************************************************************ +Calculates a folded value of a file page address to use in the page hash +table. */ +UNIV_INLINE +ulint +buf_page_address_fold( +/*==================*/ + /* out: the folded value */ + ulint space, /* in: space id */ + ulint offset) /* in: offset of the page within space */ +{ + return((space << 20) + space + offset); +} + +/************************************************************************ +This function is used to get info if there is an io operation +going on on a buffer page. */ +UNIV_INLINE +ibool +buf_page_io_query( +/*==============*/ + /* out: TRUE if io going on */ + buf_page_t* bpage) /* in: buf_pool block, must be bufferfixed */ +{ + ibool io_fixed; + mutex_t* block_mutex = buf_page_get_mutex(bpage); + + //buf_pool_mutex_enter(); + mutex_enter(block_mutex); + + ut_ad(buf_page_in_file(bpage)); + ut_ad(bpage->buf_fix_count > 0); + + io_fixed = buf_page_get_io_fix(bpage) != BUF_IO_NONE; + //buf_pool_mutex_exit(); + mutex_exit(block_mutex); + + return(io_fixed); +} + +/************************************************************************ +Gets the youngest modification log sequence number for a frame. +Returns zero if not file page or no modification occurred yet. */ +UNIV_INLINE +ib_uint64_t +buf_page_get_newest_modification( +/*=============================*/ + /* out: newest modification to page */ + const buf_page_t* bpage) /* in: block containing the + page frame */ +{ + ib_uint64_t lsn; + mutex_t* block_mutex = buf_page_get_mutex(bpage); + + mutex_enter(block_mutex); + + if (buf_page_in_file(bpage)) { + lsn = bpage->newest_modification; + } else { + lsn = 0; + } + + mutex_exit(block_mutex); + + return(lsn); +} + +/************************************************************************ +Increments the modify clock of a frame by 1. The caller must (1) own the +buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock +on the block. */ +UNIV_INLINE +void +buf_block_modify_clock_inc( +/*=======================*/ + buf_block_t* block) /* in: block */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad((buf_pool_mutex_own() + && (block->page.buf_fix_count == 0)) + || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); +#endif /* UNIV_SYNC_DEBUG */ + + block->modify_clock++; +} + +/************************************************************************ +Returns the value of the modify clock. The caller must have an s-lock +or x-lock on the block. */ +UNIV_INLINE +ib_uint64_t +buf_block_get_modify_clock( +/*=======================*/ + /* out: value */ + buf_block_t* block) /* in: block */ +{ +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) + || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); +#endif /* UNIV_SYNC_DEBUG */ + + return(block->modify_clock); +} + +/*********************************************************************** +Increments the bufferfix count. */ +UNIV_INLINE +void +buf_block_buf_fix_inc_func( +/*=======================*/ +#ifdef UNIV_SYNC_DEBUG + const char* file, /* in: file name */ + ulint line, /* in: line */ +#endif /* UNIV_SYNC_DEBUG */ + buf_block_t* block) /* in: block to bufferfix */ +{ +#ifdef UNIV_SYNC_DEBUG + ibool ret; + + ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); + ut_a(ret); +#endif /* UNIV_SYNC_DEBUG */ + ut_ad(mutex_own(&block->mutex)); + + block->page.buf_fix_count++; +} +#ifdef UNIV_SYNC_DEBUG +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) +#else /* UNIV_SYNC_DEBUG */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) +#endif /* UNIV_SYNC_DEBUG */ + +/*********************************************************************** +Decrements the bufferfix count. */ +UNIV_INLINE +void +buf_block_buf_fix_dec( +/*==================*/ + buf_block_t* block) /* in: block to bufferunfix */ +{ + ut_ad(mutex_own(&block->mutex)); + + block->page.buf_fix_count--; +#ifdef UNIV_SYNC_DEBUG + rw_lock_s_unlock(&block->debug_latch); +#endif +} + +/********************************************************************** +Returns the control block of a file page, NULL if not found. */ +UNIV_INLINE +buf_page_t* +buf_page_hash_get( +/*==============*/ + /* out: block, NULL if not found */ + ulint space, /* in: space id */ + ulint offset) /* in: offset of the page within space */ +{ + buf_page_t* bpage; + ulint fold; + + ut_ad(buf_pool); + //ut_ad(buf_pool_mutex_own()); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&page_hash_latch, RW_LOCK_EX) + || rw_lock_own(&page_hash_latch, RW_LOCK_SHARED)); +#endif + + /* Look for the page in the hash table */ + + fold = buf_page_address_fold(space, offset); + + HASH_SEARCH(hash, buf_pool->page_hash, fold, buf_page_t*, bpage, + ut_ad(bpage->in_page_hash && !bpage->in_zip_hash + && buf_page_in_file(bpage)), + bpage->space == space && bpage->offset == offset); + if (bpage) { + ut_a(buf_page_in_file(bpage)); + ut_ad(bpage->in_page_hash); + ut_ad(!bpage->in_zip_hash); + UNIV_MEM_ASSERT_RW(bpage, sizeof *bpage); + } + + return(bpage); +} + +/********************************************************************** +Returns the control block of a file page, NULL if not found +or an uncompressed page frame does not exist. */ +UNIV_INLINE +buf_block_t* +buf_block_hash_get( +/*===============*/ + /* out: block, NULL if not found */ + ulint space, /* in: space id */ + ulint offset) /* in: offset of the page within space */ +{ + return(buf_page_get_block(buf_page_hash_get(space, offset))); +} + +/************************************************************************ +Returns TRUE if the page can be found in the buffer pool hash table. NOTE +that it is possible that the page is not yet read from disk, though. */ +UNIV_INLINE +ibool +buf_page_peek( +/*==========*/ + /* out: TRUE if found from page hash table, + NOTE that the page is not necessarily yet read + from disk! */ + ulint space, /* in: space id */ + ulint offset) /* in: page number */ +{ + const buf_page_t* bpage; + + //buf_pool_mutex_enter(); + rw_lock_s_lock(&page_hash_latch); + + bpage = buf_page_hash_get(space, offset); + + //buf_pool_mutex_exit(); + rw_lock_s_unlock(&page_hash_latch); + + return(bpage != NULL); +} + +/************************************************************************ +Releases a compressed-only page acquired with buf_page_get_zip(). */ +UNIV_INLINE +void +buf_page_release_zip( +/*=================*/ + buf_page_t* bpage) /* in: buffer block */ +{ + buf_block_t* block; + + ut_ad(bpage); + ut_a(bpage->buf_fix_count > 0); + + switch (buf_page_get_state(bpage)) { + case BUF_BLOCK_ZIP_PAGE: + case BUF_BLOCK_ZIP_DIRTY: + mutex_enter(&buf_pool_zip_mutex); + bpage->buf_fix_count--; + mutex_exit(&buf_pool_zip_mutex); + return; + case BUF_BLOCK_FILE_PAGE: + block = (buf_block_t*) bpage; + mutex_enter(&block->mutex); +#ifdef UNIV_SYNC_DEBUG + rw_lock_s_unlock(&block->debug_latch); +#endif + bpage->buf_fix_count--; + mutex_exit(&block->mutex); + return; + case BUF_BLOCK_ZIP_FREE: + case BUF_BLOCK_NOT_USED: + case BUF_BLOCK_READY_FOR_USE: + case BUF_BLOCK_MEMORY: + case BUF_BLOCK_REMOVE_HASH: + break; + } + + ut_error; +} + +/************************************************************************ +Decrements the bufferfix count of a buffer control block and releases +a latch, if specified. */ +UNIV_INLINE +void +buf_page_release( +/*=============*/ + buf_block_t* block, /* in: buffer block */ + ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH, + RW_NO_LATCH */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(block); + + ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_a(block->page.buf_fix_count > 0); + + if (rw_latch == RW_X_LATCH && mtr->modifications) { + //buf_pool_mutex_enter(); + mutex_enter(&flush_list_mutex); + mutex_enter(&block->mutex); + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + buf_flush_note_modification(block, mtr); + //buf_pool_mutex_exit(); + mutex_exit(&flush_list_mutex); + } + else { + mutex_enter(&block->mutex); + } + +#ifdef UNIV_SYNC_DEBUG + rw_lock_s_unlock(&(block->debug_latch)); +#endif + block->page.buf_fix_count--; + + /* Dirty blocks should be in the flush list. */ + ut_ad(!block->page.oldest_modification + || block->page.in_flush_list); + + mutex_exit(&block->mutex); + + if (rw_latch == RW_S_LATCH) { + rw_lock_s_unlock(&(block->lock)); + } else if (rw_latch == RW_X_LATCH) { + rw_lock_x_unlock(&(block->lock)); + } +} + +#ifdef UNIV_SYNC_DEBUG +/************************************************************************* +Adds latch level info for the rw-lock protecting the buffer frame. This +should be called in the debug version after a successful latching of a +page if we know the latching order level of the acquired latch. */ +UNIV_INLINE +void +buf_block_dbg_add_level( +/*====================*/ + buf_block_t* block, /* in: buffer page + where we have acquired latch */ + ulint level) /* in: latching order level */ +{ + sync_thread_add_level(&block->lock, level); +} +#endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/xtradb/include/buf0flu.h b/storage/xtradb/include/buf0flu.h new file mode 100644 index 00000000000..11a37351479 --- /dev/null +++ b/storage/xtradb/include/buf0flu.h @@ -0,0 +1,151 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer pool flush algorithm + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef buf0flu_h +#define buf0flu_h + +#include "univ.i" +#include "buf0types.h" +#include "ut0byte.h" +#include "mtr0types.h" + +/************************************************************************ +Remove a block from the flush list of modified blocks. */ +UNIV_INTERN +void +buf_flush_remove( +/*=============*/ + buf_page_t* bpage); /* in: pointer to the block in question */ +/************************************************************************ +Updates the flush system data structures when a write is completed. */ +UNIV_INTERN +void +buf_flush_write_complete( +/*=====================*/ + buf_page_t* bpage); /* in: pointer to the block in question */ +/************************************************************************* +Flushes pages from the end of the LRU list if there is too small +a margin of replaceable pages there. */ +UNIV_INTERN +void +buf_flush_free_margin( +/*=======================*/ + ibool wait); +/************************************************************************ +Initializes a page for writing to the tablespace. */ +UNIV_INTERN +void +buf_flush_init_for_writing( +/*=======================*/ + byte* page, /* in/out: page */ + void* page_zip_, /* in/out: compressed page, or NULL */ + ib_uint64_t newest_lsn); /* in: newest modification lsn + to the page */ +/*********************************************************************** +This utility flushes dirty blocks from the end of the LRU list or flush_list. +NOTE 1: in the case of an LRU flush the calling thread may own latches to +pages: to avoid deadlocks, this function must be written so that it cannot +end up waiting for these latches! NOTE 2: in the case of a flush list flush, +the calling thread is not allowed to own any latches on pages! */ +UNIV_INTERN +ulint +buf_flush_batch( +/*============*/ + /* out: number of blocks for which the + write request was queued; + ULINT_UNDEFINED if there was a flush + of the same type already running */ + enum buf_flush flush_type, /* in: BUF_FLUSH_LRU or + BUF_FLUSH_LIST; if BUF_FLUSH_LIST, + then the caller must not own any + latches on pages */ + ulint min_n, /* in: wished minimum mumber of blocks + flushed (it is not guaranteed that the + actual number is that big, though) */ + ib_uint64_t lsn_limit); /* in the case BUF_FLUSH_LIST all + blocks whose oldest_modification is + smaller than this should be flushed + (if their number does not exceed + min_n), otherwise ignored */ +/********************************************************************** +Waits until a flush batch of the given type ends */ +UNIV_INTERN +void +buf_flush_wait_batch_end( +/*=====================*/ + enum buf_flush type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ +/************************************************************************ +This function should be called at a mini-transaction commit, if a page was +modified in it. Puts the block to the list of modified blocks, if it not +already in it. */ +UNIV_INLINE +void +buf_flush_note_modification( +/*========================*/ + buf_block_t* block, /* in: block which is modified */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +This function should be called when recovery has modified a buffer page. */ +UNIV_INLINE +void +buf_flush_recv_note_modification( +/*=============================*/ + buf_block_t* block, /* in: block which is modified */ + ib_uint64_t start_lsn, /* in: start lsn of the first mtr in a + set of mtr's */ + ib_uint64_t end_lsn); /* in: end lsn of the last mtr in the + set of mtr's */ +/************************************************************************ +Returns TRUE if the file page block is immediately suitable for replacement, +i.e., transition FILE_PAGE => NOT_USED allowed. */ +UNIV_INTERN +ibool +buf_flush_ready_for_replace( +/*========================*/ + /* out: TRUE if can replace immediately */ + buf_page_t* bpage); /* in: buffer control block, must be + buf_page_in_file(bpage) and in the LRU list */ +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/********************************************************************** +Validates the flush list. */ +UNIV_INTERN +ibool +buf_flush_validate(void); +/*====================*/ + /* out: TRUE if ok */ +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ + +/* When buf_flush_free_margin is called, it tries to make this many blocks +available to replacement in the free list and at the end of the LRU list (to +make sure that a read-ahead batch can be read efficiently in a single +sweep). */ + +#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA) +#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100) + +#ifndef UNIV_NONINL +#include "buf0flu.ic" +#endif + +#endif diff --git a/storage/xtradb/include/buf0flu.ic b/storage/xtradb/include/buf0flu.ic new file mode 100644 index 00000000000..26fa7c02ef3 --- /dev/null +++ b/storage/xtradb/include/buf0flu.ic @@ -0,0 +1,123 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer pool flush algorithm + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#include "buf0buf.h" +#include "mtr0mtr.h" + +/************************************************************************ +Inserts a modified block into the flush list. */ +UNIV_INTERN +void +buf_flush_insert_into_flush_list( +/*=============================*/ + buf_block_t* block); /* in/out: block which is modified */ +/************************************************************************ +Inserts a modified block into the flush list in the right sorted position. +This function is used by recovery, because there the modifications do not +necessarily come in the order of lsn's. */ +UNIV_INTERN +void +buf_flush_insert_sorted_into_flush_list( +/*====================================*/ + buf_block_t* block); /* in/out: block which is modified */ + +/************************************************************************ +This function should be called at a mini-transaction commit, if a page was +modified in it. Puts the block to the list of modified blocks, if it is not +already in it. */ +UNIV_INLINE +void +buf_flush_note_modification( +/*========================*/ + buf_block_t* block, /* in: block which is modified */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(block); + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.buf_fix_count > 0); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + //ut_ad(buf_pool_mutex_own()); + ut_ad(mutex_own(&flush_list_mutex)); + + ut_ad(mtr->start_lsn != 0); + ut_ad(mtr->modifications); + ut_ad(block->page.newest_modification <= mtr->end_lsn); + + block->page.newest_modification = mtr->end_lsn; + + if (!block->page.oldest_modification) { + + block->page.oldest_modification = mtr->start_lsn; + ut_ad(block->page.oldest_modification != 0); + + buf_flush_insert_into_flush_list(block); + } else { + ut_ad(block->page.oldest_modification <= mtr->start_lsn); + } + + ++srv_buf_pool_write_requests; +} + +/************************************************************************ +This function should be called when recovery has modified a buffer page. */ +UNIV_INLINE +void +buf_flush_recv_note_modification( +/*=============================*/ + buf_block_t* block, /* in: block which is modified */ + ib_uint64_t start_lsn, /* in: start lsn of the first mtr in a + set of mtr's */ + ib_uint64_t end_lsn) /* in: end lsn of the last mtr in the + set of mtr's */ +{ + ut_ad(block); + ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE); + ut_ad(block->page.buf_fix_count > 0); +#ifdef UNIV_SYNC_DEBUG + ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); +#endif /* UNIV_SYNC_DEBUG */ + + //buf_pool_mutex_enter(); + mutex_enter(&flush_list_mutex); + + ut_ad(block->page.newest_modification <= end_lsn); + + block->page.newest_modification = end_lsn; + + if (!block->page.oldest_modification) { + + block->page.oldest_modification = start_lsn; + + ut_ad(block->page.oldest_modification != 0); + + buf_flush_insert_sorted_into_flush_list(block); + } else { + ut_ad(block->page.oldest_modification <= start_lsn); + } + + //buf_pool_mutex_exit(); + mutex_exit(&flush_list_mutex); +} diff --git a/storage/xtradb/include/buf0lru.h b/storage/xtradb/include/buf0lru.h new file mode 100644 index 00000000000..3fd534a215d --- /dev/null +++ b/storage/xtradb/include/buf0lru.h @@ -0,0 +1,267 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer pool LRU replacement algorithm + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef buf0lru_h +#define buf0lru_h + +#include "univ.i" +#include "ut0byte.h" +#include "buf0types.h" + +/** The return type of buf_LRU_free_block() */ +enum buf_lru_free_block_status { + /** freed */ + BUF_LRU_FREED = 0, + /** not freed because the caller asked to remove the + uncompressed frame but the control block cannot be + relocated */ + BUF_LRU_CANNOT_RELOCATE, + /** not freed because of some other reason */ + BUF_LRU_NOT_FREED +}; + +/********************************************************************** +Tries to remove LRU flushed blocks from the end of the LRU list and put them +to the free list. This is beneficial for the efficiency of the insert buffer +operation, as flushed pages from non-unique non-clustered indexes are here +taken out of the buffer pool, and their inserts redirected to the insert +buffer. Otherwise, the flushed blocks could get modified again before read +operations need new buffer blocks, and the i/o work done in flushing would be +wasted. */ +UNIV_INTERN +void +buf_LRU_try_free_flushed_blocks(void); +/*==================================*/ +/********************************************************************** +Returns TRUE if less than 25 % of the buffer pool is available. This can be +used in heuristics to prevent huge transactions eating up the whole buffer +pool for their locks. */ +UNIV_INTERN +ibool +buf_LRU_buf_pool_running_out(void); +/*==============================*/ + /* out: TRUE if less than 25 % of buffer pool + left */ + +/*####################################################################### +These are low-level functions +#########################################################################*/ + +/* Minimum LRU list length for which the LRU_old pointer is defined */ + +#define BUF_LRU_OLD_MIN_LEN 80 + +#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA) + +/********************************************************************** +Invalidates all pages belonging to a given tablespace when we are deleting +the data file(s) of that tablespace. A PROBLEM: if readahead is being started, +what guarantees that it will not try to read in pages after this operation has +completed? */ +UNIV_INTERN +void +buf_LRU_invalidate_tablespace( +/*==========================*/ + ulint id); /* in: space id */ +/********************************************************************** +Gets the minimum LRU_position field for the blocks in an initial segment +(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not +guaranteed to be precise, because the ulint_clock may wrap around. */ +UNIV_INTERN +ulint +buf_LRU_get_recent_limit(void); +/*==========================*/ + /* out: the limit; zero if could not determine it */ +/************************************************************************ +Insert a compressed block into buf_pool->zip_clean in the LRU order. */ +UNIV_INTERN +void +buf_LRU_insert_zip_clean( +/*=====================*/ + buf_page_t* bpage); /* in: pointer to the block in question */ + +/********************************************************************** +Try to free a block. If bpage is a descriptor of a compressed-only +page, the descriptor object will be freed as well. + +NOTE: If this function returns BUF_LRU_FREED, it will not temporarily +release buf_pool_mutex. Furthermore, the page frame will no longer be +accessible via bpage. + +The caller must hold buf_pool_mutex and buf_page_get_mutex(bpage) and +release these two mutexes after the call. No other +buf_page_get_mutex() may be held when calling this function. */ +UNIV_INTERN +enum buf_lru_free_block_status +buf_LRU_free_block( +/*===============*/ + /* out: BUF_LRU_FREED if freed, + BUF_LRU_CANNOT_RELOCATE or + BUF_LRU_NOT_FREED otherwise. */ + buf_page_t* bpage, /* in: block to be freed */ + ibool zip, /* in: TRUE if should remove also the + compressed page of an uncompressed page */ + ibool* buf_pool_mutex_released, + /* in: pointer to a variable that will + be assigned TRUE if buf_pool_mutex + was temporarily released, or NULL */ + ibool have_LRU_mutex); +/********************************************************************** +Try to free a replaceable block. */ +UNIV_INTERN +ibool +buf_LRU_search_and_free_block( +/*==========================*/ + /* out: TRUE if found and freed */ + ulint n_iterations); /* in: how many times this has been called + repeatedly without result: a high value means + that we should search farther; if + n_iterations < 10, then we search + n_iterations / 10 * buf_pool->curr_size + pages from the end of the LRU list; if + n_iterations < 5, then we will also search + n_iterations / 5 of the unzip_LRU list. */ +/********************************************************************** +Returns a free block from the buf_pool. The block is taken off the +free list. If it is empty, returns NULL. */ +UNIV_INTERN +buf_block_t* +buf_LRU_get_free_only(void); +/*=======================*/ + /* out: a free control block, or NULL + if the buf_block->free list is empty */ +/********************************************************************** +Returns a free block from the buf_pool. The block is taken off the +free list. If it is empty, blocks are moved from the end of the +LRU list to the free list. */ +UNIV_INTERN +buf_block_t* +buf_LRU_get_free_block( +/*===================*/ + /* out: the free control block, + in state BUF_BLOCK_READY_FOR_USE */ + ulint zip_size); /* in: compressed page size in bytes, + or 0 if uncompressed tablespace */ + +/********************************************************************** +Puts a block back to the free list. */ +UNIV_INTERN +void +buf_LRU_block_free_non_file_page( +/*=============================*/ + buf_block_t* block, /* in: block, must not contain a file page */ + ibool have_page_hash_mutex); +/********************************************************************** +Adds a block to the LRU list. */ +UNIV_INTERN +void +buf_LRU_add_block( +/*==============*/ + buf_page_t* bpage, /* in: control block */ + ibool old); /* in: TRUE if should be put to the old + blocks in the LRU list, else put to the + start; if the LRU list is very short, added to + the start regardless of this parameter */ +/********************************************************************** +Adds a block to the LRU list of decompressed zip pages. */ +UNIV_INTERN +void +buf_unzip_LRU_add_block( +/*====================*/ + buf_block_t* block, /* in: control block */ + ibool old); /* in: TRUE if should be put to the end + of the list, else put to the start */ +/********************************************************************** +Moves a block to the start of the LRU list. */ +UNIV_INTERN +void +buf_LRU_make_block_young( +/*=====================*/ + buf_page_t* bpage); /* in: control block */ +/********************************************************************** +Moves a block to the end of the LRU list. */ +UNIV_INTERN +void +buf_LRU_make_block_old( +/*===================*/ + buf_page_t* bpage); /* in: control block */ +/************************************************************************ +Update the historical stats that we are collecting for LRU eviction +policy at the end of each interval. */ +UNIV_INTERN +void +buf_LRU_stat_update(void); +/*=====================*/ + +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/************************************************************************** +Validates the LRU list. */ +UNIV_INTERN +ibool +buf_LRU_validate(void); +/*==================*/ +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#if defined UNIV_DEBUG_PRINT || defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/************************************************************************** +Prints the LRU list. */ +UNIV_INTERN +void +buf_LRU_print(void); +/*===============*/ +#endif /* UNIV_DEBUG_PRINT || UNIV_DEBUG || UNIV_BUF_DEBUG */ + +/********************************************************************** +These statistics are not 'of' LRU but 'for' LRU. We keep count of I/O +and page_zip_decompress() operations. Based on the statistics we decide +if we want to evict from buf_pool->unzip_LRU or buf_pool->LRU. */ + +/** Statistics for selecting the LRU list for eviction. */ +struct buf_LRU_stat_struct +{ + ulint io; /**< Counter of buffer pool I/O operations. */ + ulint unzip; /**< Counter of page_zip_decompress operations. */ +}; + +typedef struct buf_LRU_stat_struct buf_LRU_stat_t; + +/** Current operation counters. Not protected by any mutex. +Cleared by buf_LRU_stat_update(). */ +extern buf_LRU_stat_t buf_LRU_stat_cur; + +/** Running sum of past values of buf_LRU_stat_cur. +Updated by buf_LRU_stat_update(). Protected by buf_pool_mutex. */ +extern buf_LRU_stat_t buf_LRU_stat_sum; + +/************************************************************************ +Increments the I/O counter in buf_LRU_stat_cur. */ +#define buf_LRU_stat_inc_io() buf_LRU_stat_cur.io++ +/************************************************************************ +Increments the page_zip_decompress() counter in buf_LRU_stat_cur. */ +#define buf_LRU_stat_inc_unzip() buf_LRU_stat_cur.unzip++ + +#ifndef UNIV_NONINL +#include "buf0lru.ic" +#endif + +#endif diff --git a/storage/xtradb/include/buf0lru.ic b/storage/xtradb/include/buf0lru.ic new file mode 100644 index 00000000000..f4c40e0b606 --- /dev/null +++ b/storage/xtradb/include/buf0lru.ic @@ -0,0 +1,24 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer replacement algorithm + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + diff --git a/storage/xtradb/include/buf0rea.h b/storage/xtradb/include/buf0rea.h new file mode 100644 index 00000000000..6d138a3a02b --- /dev/null +++ b/storage/xtradb/include/buf0rea.h @@ -0,0 +1,136 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer read + +Created 11/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef buf0rea_h +#define buf0rea_h + +#include "univ.i" +#include "buf0types.h" + +/************************************************************************ +High-level function which reads a page asynchronously from a file to the +buffer buf_pool if it is not already there. Sets the io_fix flag and sets +an exclusive lock on the buffer frame. The flag is cleared and the x-lock +released by the i/o-handler thread. Does a random read-ahead if it seems +sensible. */ +UNIV_INTERN +ulint +buf_read_page( +/*==========*/ + /* out: number of page read requests issued: this can + be > 1 if read-ahead occurred */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes, or 0 */ + ulint offset);/* in: page number */ +/************************************************************************ +Applies linear read-ahead if in the buf_pool the page is a border page of +a linear read-ahead area and all the pages in the area have been accessed. +Does not read any page if the read-ahead mechanism is not activated. Note +that the the algorithm looks at the 'natural' adjacent successor and +predecessor of the page, which on the leaf level of a B-tree are the next +and previous page in the chain of leaves. To know these, the page specified +in (space, offset) must already be present in the buf_pool. Thus, the +natural way to use this function is to call it when a page in the buf_pool +is accessed the first time, calling this function just after it has been +bufferfixed. +NOTE 1: as this function looks at the natural predecessor and successor +fields on the page, what happens, if these are not initialized to any +sensible value? No problem, before applying read-ahead we check that the +area to read is within the span of the space, if not, read-ahead is not +applied. An uninitialized value may result in a useless read operation, but +only very improbably. +NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this +function must be written such that it cannot end up waiting for these +latches! +NOTE 3: the calling thread must want access to the page given: this rule is +set to prevent unintended read-aheads performed by ibuf routines, a situation +which could result in a deadlock if the OS does not support asynchronous io. */ +UNIV_INTERN +ulint +buf_read_ahead_linear( +/*==================*/ + /* out: number of page read requests issued */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes, or 0 */ + ulint offset);/* in: page number of a page; NOTE: the current thread + must want access to this page (see NOTE 3 above) */ +/************************************************************************ +Issues read requests for pages which the ibuf module wants to read in, in +order to contract the insert buffer tree. Technically, this function is like +a read-ahead function. */ +UNIV_INTERN +void +buf_read_ibuf_merge_pages( +/*======================*/ + ibool sync, /* in: TRUE if the caller + wants this function to wait + for the highest address page + to get read in, before this + function returns */ + const ulint* space_ids, /* in: array of space ids */ + const ib_int64_t* space_versions,/* in: the spaces must have + this version number + (timestamp), otherwise we + discard the read; we use this + to cancel reads if DISCARD + + IMPORT may have changed the + tablespace size */ + const ulint* page_nos, /* in: array of page numbers + to read, with the highest page + number the last in the + array */ + ulint n_stored); /* in: number of elements + in the arrays */ +/************************************************************************ +Issues read requests for pages which recovery wants to read in. */ +UNIV_INTERN +void +buf_read_recv_pages( +/*================*/ + ibool sync, /* in: TRUE if the caller + wants this function to wait + for the highest address page + to get read in, before this + function returns */ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in + bytes, or 0 */ + const ulint* page_nos, /* in: array of page numbers + to read, with the highest page + number the last in the + array */ + ulint n_stored); /* in: number of page numbers + in the array */ + +/* The size in pages of the area which the read-ahead algorithms read if +invoked */ + +#define BUF_READ_AHEAD_AREA \ + ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)) + +/* Modes used in read-ahead */ +#define BUF_READ_IBUF_PAGES_ONLY 131 +#define BUF_READ_ANY_PAGE 132 + +#endif diff --git a/storage/xtradb/include/buf0types.h b/storage/xtradb/include/buf0types.h new file mode 100644 index 00000000000..f2721da85f9 --- /dev/null +++ b/storage/xtradb/include/buf0types.h @@ -0,0 +1,70 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The database buffer pool global types for the directory + +Created 11/17/1995 Heikki Tuuri +*******************************************************/ + +#ifndef buf0types_h +#define buf0types_h + +typedef struct buf_page_struct buf_page_t; +typedef struct buf_block_struct buf_block_t; +typedef struct buf_chunk_struct buf_chunk_t; +typedef struct buf_pool_struct buf_pool_t; + +/* The 'type' used of a buffer frame */ +typedef byte buf_frame_t; + +/* Flags for flush types */ +enum buf_flush { + BUF_FLUSH_LRU = 0, + BUF_FLUSH_SINGLE_PAGE, + BUF_FLUSH_LIST, + BUF_FLUSH_N_TYPES /* index of last element + 1 */ +}; + +/* Flags for io_fix types */ +enum buf_io_fix { + BUF_IO_NONE = 0, /**< no pending I/O */ + BUF_IO_READ, /**< read pending */ + BUF_IO_WRITE /**< write pending */ +}; + +/* Parameters of binary buddy system for compressed pages (buf0buddy.h) */ +#if UNIV_WORD_SIZE <= 4 /* 32-bit system */ +# define BUF_BUDDY_LOW_SHIFT 6 +#else /* 64-bit system */ +# define BUF_BUDDY_LOW_SHIFT 7 +#endif +#define BUF_BUDDY_LOW (1 << BUF_BUDDY_LOW_SHIFT) + /* minimum block size in the binary + buddy system; must be at least + sizeof(buf_page_t) */ +#define BUF_BUDDY_SIZES (UNIV_PAGE_SIZE_SHIFT - BUF_BUDDY_LOW_SHIFT) + /* number of buddy sizes */ + +/* twice the maximum block size of the buddy system; +the underlying memory is aligned by this amount: +this must be equal to UNIV_PAGE_SIZE */ +#define BUF_BUDDY_HIGH (BUF_BUDDY_LOW << BUF_BUDDY_SIZES) + +#endif + diff --git a/storage/xtradb/include/data0data.h b/storage/xtradb/include/data0data.h new file mode 100644 index 00000000000..1190a7ae45a --- /dev/null +++ b/storage/xtradb/include/data0data.h @@ -0,0 +1,480 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +SQL data field and tuple + +Created 5/30/1994 Heikki Tuuri +*************************************************************************/ + +#ifndef data0data_h +#define data0data_h + +#include "univ.i" + +#include "data0types.h" +#include "data0type.h" +#include "mem0mem.h" +#include "dict0types.h" + +typedef struct big_rec_struct big_rec_t; + +#ifdef UNIV_DEBUG +/************************************************************************* +Gets pointer to the type struct of SQL data field. */ +UNIV_INLINE +dtype_t* +dfield_get_type( +/*============*/ + /* out: pointer to the type struct */ + const dfield_t* field); /* in: SQL data field */ +/************************************************************************* +Gets pointer to the data in a field. */ +UNIV_INLINE +void* +dfield_get_data( +/*============*/ + /* out: pointer to data */ + const dfield_t* field); /* in: field */ +#else /* UNIV_DEBUG */ +# define dfield_get_type(field) (&(field)->type) +# define dfield_get_data(field) ((field)->data) +#endif /* UNIV_DEBUG */ +/************************************************************************* +Sets the type struct of SQL data field. */ +UNIV_INLINE +void +dfield_set_type( +/*============*/ + dfield_t* field, /* in: SQL data field */ + dtype_t* type); /* in: pointer to data type struct */ +/************************************************************************* +Gets length of field data. */ +UNIV_INLINE +ulint +dfield_get_len( +/*===========*/ + /* out: length of data; UNIV_SQL_NULL if + SQL null data */ + const dfield_t* field); /* in: field */ +/************************************************************************* +Sets length in a field. */ +UNIV_INLINE +void +dfield_set_len( +/*===========*/ + dfield_t* field, /* in: field */ + ulint len); /* in: length or UNIV_SQL_NULL */ +/************************************************************************* +Determines if a field is SQL NULL */ +UNIV_INLINE +ulint +dfield_is_null( +/*===========*/ + /* out: nonzero if SQL null data */ + const dfield_t* field); /* in: field */ +/************************************************************************* +Determines if a field is externally stored */ +UNIV_INLINE +ulint +dfield_is_ext( +/*==========*/ + /* out: nonzero if externally stored */ + const dfield_t* field); /* in: field */ +/************************************************************************* +Sets the "external storage" flag */ +UNIV_INLINE +void +dfield_set_ext( +/*===========*/ + dfield_t* field); /* in/out: field */ +/************************************************************************* +Sets pointer to the data and length in a field. */ +UNIV_INLINE +void +dfield_set_data( +/*============*/ + dfield_t* field, /* in: field */ + const void* data, /* in: data */ + ulint len); /* in: length or UNIV_SQL_NULL */ +/************************************************************************* +Sets a data field to SQL NULL. */ +UNIV_INLINE +void +dfield_set_null( +/*============*/ + dfield_t* field); /* in/out: field */ +/************************************************************************** +Writes an SQL null field full of zeros. */ +UNIV_INLINE +void +data_write_sql_null( +/*================*/ + byte* data, /* in: pointer to a buffer of size len */ + ulint len); /* in: SQL null size in bytes */ +/************************************************************************* +Copies the data and len fields. */ +UNIV_INLINE +void +dfield_copy_data( +/*=============*/ + dfield_t* field1, /* out: field to copy to */ + const dfield_t* field2);/* in: field to copy from */ +/************************************************************************* +Copies a data field to another. */ +UNIV_INLINE +void +dfield_copy( +/*========*/ + dfield_t* field1, /* out: field to copy to */ + const dfield_t* field2);/* in: field to copy from */ +/************************************************************************* +Copies the data pointed to by a data field. */ +UNIV_INLINE +void +dfield_dup( +/*=======*/ + dfield_t* field, /* in/out: data field */ + mem_heap_t* heap); /* in: memory heap where allocated */ +/************************************************************************* +Tests if data length and content is equal for two dfields. */ +UNIV_INLINE +ibool +dfield_datas_are_binary_equal( +/*==========================*/ + /* out: TRUE if equal */ + const dfield_t* field1, /* in: field */ + const dfield_t* field2);/* in: field */ +/************************************************************************* +Tests if dfield data length and content is equal to the given. */ +UNIV_INTERN +ibool +dfield_data_is_binary_equal( +/*========================*/ + /* out: TRUE if equal */ + const dfield_t* field, /* in: field */ + ulint len, /* in: data length or UNIV_SQL_NULL */ + const byte* data); /* in: data */ +/************************************************************************* +Gets number of fields in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_n_fields( +/*================*/ + /* out: number of fields */ + const dtuple_t* tuple); /* in: tuple */ +#ifdef UNIV_DEBUG +/************************************************************************* +Gets nth field of a tuple. */ +UNIV_INLINE +dfield_t* +dtuple_get_nth_field( +/*=================*/ + /* out: nth field */ + const dtuple_t* tuple, /* in: tuple */ + ulint n); /* in: index of field */ +#else /* UNIV_DEBUG */ +# define dtuple_get_nth_field(tuple, n) ((tuple)->fields + (n)) +#endif /* UNIV_DEBUG */ +/************************************************************************* +Gets info bits in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_info_bits( +/*=================*/ + /* out: info bits */ + const dtuple_t* tuple); /* in: tuple */ +/************************************************************************* +Sets info bits in a data tuple. */ +UNIV_INLINE +void +dtuple_set_info_bits( +/*=================*/ + dtuple_t* tuple, /* in: tuple */ + ulint info_bits); /* in: info bits */ +/************************************************************************* +Gets number of fields used in record comparisons. */ +UNIV_INLINE +ulint +dtuple_get_n_fields_cmp( +/*====================*/ + /* out: number of fields used in comparisons + in rem0cmp.* */ + const dtuple_t* tuple); /* in: tuple */ +/************************************************************************* +Gets number of fields used in record comparisons. */ +UNIV_INLINE +void +dtuple_set_n_fields_cmp( +/*====================*/ + dtuple_t* tuple, /* in: tuple */ + ulint n_fields_cmp); /* in: number of fields used in + comparisons in rem0cmp.* */ +/************************************************************** +Creates a data tuple to a memory heap. The default value for number +of fields used in record comparisons for this tuple is n_fields. */ +UNIV_INLINE +dtuple_t* +dtuple_create( +/*==========*/ + /* out, own: created tuple */ + mem_heap_t* heap, /* in: memory heap where the tuple + is created */ + ulint n_fields); /* in: number of fields */ + +/************************************************************** +Wrap data fields in a tuple. The default value for number +of fields used in record comparisons for this tuple is n_fields. */ +UNIV_INLINE +const dtuple_t* +dtuple_from_fields( +/*===============*/ + /* out: data tuple */ + dtuple_t* tuple, /* in: storage for data tuple */ + const dfield_t* fields, /* in: fields */ + ulint n_fields); /* in: number of fields */ + +/************************************************************************* +Sets number of fields used in a tuple. Normally this is set in +dtuple_create, but if you want later to set it smaller, you can use this. */ +UNIV_INTERN +void +dtuple_set_n_fields( +/*================*/ + dtuple_t* tuple, /* in: tuple */ + ulint n_fields); /* in: number of fields */ +/************************************************************************* +Copies a data tuple to another. This is a shallow copy; if a deep copy +is desired, dfield_dup() will have to be invoked on each field. */ +UNIV_INLINE +dtuple_t* +dtuple_copy( +/*========*/ + /* out, own: copy of tuple */ + const dtuple_t* tuple, /* in: tuple to copy from */ + mem_heap_t* heap); /* in: memory heap + where the tuple is created */ +/************************************************************** +The following function returns the sum of data lengths of a tuple. The space +occupied by the field structs or the tuple struct is not counted. */ +UNIV_INLINE +ulint +dtuple_get_data_size( +/*=================*/ + /* out: sum of data lens */ + const dtuple_t* tuple); /* in: typed data tuple */ +/************************************************************************* +Computes the number of externally stored fields in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_n_ext( +/*=============*/ + /* out: number of fields */ + const dtuple_t* tuple); /* in: tuple */ +/**************************************************************** +Compare two data tuples, respecting the collation of character fields. */ +UNIV_INTERN +int +dtuple_coll_cmp( +/*============*/ + /* out: 1, 0 , -1 if tuple1 is greater, equal, + less, respectively, than tuple2 */ + const dtuple_t* tuple1, /* in: tuple 1 */ + const dtuple_t* tuple2);/* in: tuple 2 */ +/**************************************************************** +Folds a prefix given as the number of fields of a tuple. */ +UNIV_INLINE +ulint +dtuple_fold( +/*========*/ + /* out: the folded value */ + const dtuple_t* tuple, /* in: the tuple */ + ulint n_fields,/* in: number of complete fields to fold */ + ulint n_bytes,/* in: number of bytes to fold in an + incomplete last field */ + dulint tree_id)/* in: index tree id */ + __attribute__((pure)); +/*********************************************************************** +Sets types of fields binary in a tuple. */ +UNIV_INLINE +void +dtuple_set_types_binary( +/*====================*/ + dtuple_t* tuple, /* in: data tuple */ + ulint n); /* in: number of fields to set */ +/************************************************************************** +Checks if a dtuple contains an SQL null value. */ +UNIV_INLINE +ibool +dtuple_contains_null( +/*=================*/ + /* out: TRUE if some field is SQL null */ + const dtuple_t* tuple); /* in: dtuple */ +/************************************************************** +Checks that a data field is typed. Asserts an error if not. */ +UNIV_INTERN +ibool +dfield_check_typed( +/*===============*/ + /* out: TRUE if ok */ + const dfield_t* field); /* in: data field */ +/************************************************************** +Checks that a data tuple is typed. Asserts an error if not. */ +UNIV_INTERN +ibool +dtuple_check_typed( +/*===============*/ + /* out: TRUE if ok */ + const dtuple_t* tuple); /* in: tuple */ +/************************************************************** +Checks that a data tuple is typed. */ +UNIV_INTERN +ibool +dtuple_check_typed_no_assert( +/*=========================*/ + /* out: TRUE if ok */ + const dtuple_t* tuple); /* in: tuple */ +#ifdef UNIV_DEBUG +/************************************************************** +Validates the consistency of a tuple which must be complete, i.e, +all fields must have been set. */ +UNIV_INTERN +ibool +dtuple_validate( +/*============*/ + /* out: TRUE if ok */ + const dtuple_t* tuple); /* in: tuple */ +#endif /* UNIV_DEBUG */ +/***************************************************************** +Pretty prints a dfield value according to its data type. */ +UNIV_INTERN +void +dfield_print( +/*=========*/ + const dfield_t* dfield);/* in: dfield */ +/***************************************************************** +Pretty prints a dfield value according to its data type. Also the hex string +is printed if a string contains non-printable characters. */ +UNIV_INTERN +void +dfield_print_also_hex( +/*==================*/ + const dfield_t* dfield); /* in: dfield */ +/************************************************************** +The following function prints the contents of a tuple. */ +UNIV_INTERN +void +dtuple_print( +/*=========*/ + FILE* f, /* in: output stream */ + const dtuple_t* tuple); /* in: tuple */ +/****************************************************************** +Moves parts of long fields in entry to the big record vector so that +the size of tuple drops below the maximum record size allowed in the +database. Moves data only from those fields which are not necessary +to determine uniquely the insertion place of the tuple in the index. */ +UNIV_INTERN +big_rec_t* +dtuple_convert_big_rec( +/*===================*/ + /* out, own: created big record vector, + NULL if we are not able to shorten + the entry enough, i.e., if there are + too many fixed-length or short fields + in entry or the index is clustered */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in/out: index entry */ + ulint* n_ext); /* in/out: number of + externally stored columns */ +/****************************************************************** +Puts back to entry the data stored in vector. Note that to ensure the +fields in entry can accommodate the data, vector must have been created +from entry with dtuple_convert_big_rec. */ +UNIV_INTERN +void +dtuple_convert_back_big_rec( +/*========================*/ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: entry whose data was put to vector */ + big_rec_t* vector);/* in, own: big rec vector; it is + freed in this function */ +/****************************************************************** +Frees the memory in a big rec vector. */ +UNIV_INLINE +void +dtuple_big_rec_free( +/*================*/ + big_rec_t* vector); /* in, own: big rec vector; it is + freed in this function */ + +/*######################################################################*/ + +/* Structure for an SQL data field */ +struct dfield_struct{ + void* data; /* pointer to data */ + unsigned ext:1; /* TRUE=externally stored, FALSE=local */ + unsigned len:32; /* data length; UNIV_SQL_NULL if SQL null */ + dtype_t type; /* type of data */ +}; + +struct dtuple_struct { + ulint info_bits; /* info bits of an index record: + the default is 0; this field is used + if an index record is built from + a data tuple */ + ulint n_fields; /* number of fields in dtuple */ + ulint n_fields_cmp; /* number of fields which should + be used in comparison services + of rem0cmp.*; the index search + is performed by comparing only these + fields, others are ignored; the + default value in dtuple creation is + the same value as n_fields */ + dfield_t* fields; /* fields */ + UT_LIST_NODE_T(dtuple_t) tuple_list; + /* data tuples can be linked into a + list using this field */ +#ifdef UNIV_DEBUG + ulint magic_n; +# define DATA_TUPLE_MAGIC_N 65478679 +#endif /* UNIV_DEBUG */ +}; + +/* A slot for a field in a big rec vector */ + +typedef struct big_rec_field_struct big_rec_field_t; +struct big_rec_field_struct { + ulint field_no; /* field number in record */ + ulint len; /* stored data len */ + const void* data; /* stored data */ +}; + +/* Storage format for overflow data in a big record, that is, a record +which needs external storage of data fields */ + +struct big_rec_struct { + mem_heap_t* heap; /* memory heap from which allocated */ + ulint n_fields; /* number of stored fields */ + big_rec_field_t* fields; /* stored fields */ +}; + +#ifndef UNIV_NONINL +#include "data0data.ic" +#endif + +#endif diff --git a/storage/xtradb/include/data0data.ic b/storage/xtradb/include/data0data.ic new file mode 100644 index 00000000000..f11dbd9fce6 --- /dev/null +++ b/storage/xtradb/include/data0data.ic @@ -0,0 +1,608 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +SQL data field and tuple + +Created 5/30/1994 Heikki Tuuri +*************************************************************************/ + +#include "mem0mem.h" +#include "ut0rnd.h" + +#ifdef UNIV_DEBUG +extern byte data_error; + +/************************************************************************* +Gets pointer to the type struct of SQL data field. */ +UNIV_INLINE +dtype_t* +dfield_get_type( +/*============*/ + /* out: pointer to the type struct */ + const dfield_t* field) /* in: SQL data field */ +{ + ut_ad(field); + + return((dtype_t*) &(field->type)); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************* +Sets the type struct of SQL data field. */ +UNIV_INLINE +void +dfield_set_type( +/*============*/ + dfield_t* field, /* in: SQL data field */ + dtype_t* type) /* in: pointer to data type struct */ +{ + ut_ad(field && type); + + field->type = *type; +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Gets pointer to the data in a field. */ +UNIV_INLINE +void* +dfield_get_data( +/*============*/ + /* out: pointer to data */ + const dfield_t* field) /* in: field */ +{ + ut_ad(field); + ut_ad((field->len == UNIV_SQL_NULL) + || (field->data != &data_error)); + + return((void*) field->data); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************* +Gets length of field data. */ +UNIV_INLINE +ulint +dfield_get_len( +/*===========*/ + /* out: length of data; UNIV_SQL_NULL if + SQL null data */ + const dfield_t* field) /* in: field */ +{ + ut_ad(field); + ut_ad((field->len == UNIV_SQL_NULL) + || (field->data != &data_error)); + + return(field->len); +} + +/************************************************************************* +Sets length in a field. */ +UNIV_INLINE +void +dfield_set_len( +/*===========*/ + dfield_t* field, /* in: field */ + ulint len) /* in: length or UNIV_SQL_NULL */ +{ + ut_ad(field); +#ifdef UNIV_VALGRIND_DEBUG + if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(field->data, len); +#endif /* UNIV_VALGRIND_DEBUG */ + + field->ext = 0; + field->len = len; +} + +/************************************************************************* +Determines if a field is SQL NULL */ +UNIV_INLINE +ulint +dfield_is_null( +/*===========*/ + /* out: nonzero if SQL null data */ + const dfield_t* field) /* in: field */ +{ + ut_ad(field); + + return(field->len == UNIV_SQL_NULL); +} + +/************************************************************************* +Determines if a field is externally stored */ +UNIV_INLINE +ulint +dfield_is_ext( +/*==========*/ + /* out: nonzero if externally stored */ + const dfield_t* field) /* in: field */ +{ + ut_ad(field); + + return(UNIV_UNLIKELY(field->ext)); +} + +/************************************************************************* +Sets the "external storage" flag */ +UNIV_INLINE +void +dfield_set_ext( +/*===========*/ + dfield_t* field) /* in/out: field */ +{ + ut_ad(field); + + field->ext = 1; +} + +/************************************************************************* +Sets pointer to the data and length in a field. */ +UNIV_INLINE +void +dfield_set_data( +/*============*/ + dfield_t* field, /* in: field */ + const void* data, /* in: data */ + ulint len) /* in: length or UNIV_SQL_NULL */ +{ + ut_ad(field); + +#ifdef UNIV_VALGRIND_DEBUG + if (len != UNIV_SQL_NULL) UNIV_MEM_ASSERT_RW(data, len); +#endif /* UNIV_VALGRIND_DEBUG */ + field->data = (void*) data; + field->ext = 0; + field->len = len; +} + +/************************************************************************* +Sets a data field to SQL NULL. */ +UNIV_INLINE +void +dfield_set_null( +/*============*/ + dfield_t* field) /* in/out: field */ +{ + dfield_set_data(field, NULL, UNIV_SQL_NULL); +} + +/************************************************************************* +Copies the data and len fields. */ +UNIV_INLINE +void +dfield_copy_data( +/*=============*/ + dfield_t* field1, /* out: field to copy to */ + const dfield_t* field2) /* in: field to copy from */ +{ + ut_ad(field1 && field2); + + field1->data = field2->data; + field1->len = field2->len; + field1->ext = field2->ext; +} + +/************************************************************************* +Copies a data field to another. */ +UNIV_INLINE +void +dfield_copy( +/*========*/ + dfield_t* field1, /* out: field to copy to */ + const dfield_t* field2) /* in: field to copy from */ +{ + *field1 = *field2; +} + +/************************************************************************* +Copies the data pointed to by a data field. */ +UNIV_INLINE +void +dfield_dup( +/*=======*/ + dfield_t* field, /* in/out: data field */ + mem_heap_t* heap) /* in: memory heap where allocated */ +{ + if (!dfield_is_null(field)) { + UNIV_MEM_ASSERT_RW(field->data, field->len); + field->data = mem_heap_dup(heap, field->data, field->len); + } +} + +/************************************************************************* +Tests if data length and content is equal for two dfields. */ +UNIV_INLINE +ibool +dfield_datas_are_binary_equal( +/*==========================*/ + /* out: TRUE if equal */ + const dfield_t* field1, /* in: field */ + const dfield_t* field2) /* in: field */ +{ + ulint len; + + len = field1->len; + + return(len == field2->len + && (len == UNIV_SQL_NULL + || !memcmp(field1->data, field2->data, len))); +} + +/************************************************************************* +Gets info bits in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_info_bits( +/*=================*/ + /* out: info bits */ + const dtuple_t* tuple) /* in: tuple */ +{ + ut_ad(tuple); + + return(tuple->info_bits); +} + +/************************************************************************* +Sets info bits in a data tuple. */ +UNIV_INLINE +void +dtuple_set_info_bits( +/*=================*/ + dtuple_t* tuple, /* in: tuple */ + ulint info_bits) /* in: info bits */ +{ + ut_ad(tuple); + + tuple->info_bits = info_bits; +} + +/************************************************************************* +Gets number of fields used in record comparisons. */ +UNIV_INLINE +ulint +dtuple_get_n_fields_cmp( +/*====================*/ + /* out: number of fields used in comparisons + in rem0cmp.* */ + const dtuple_t* tuple) /* in: tuple */ +{ + ut_ad(tuple); + + return(tuple->n_fields_cmp); +} + +/************************************************************************* +Sets number of fields used in record comparisons. */ +UNIV_INLINE +void +dtuple_set_n_fields_cmp( +/*====================*/ + dtuple_t* tuple, /* in: tuple */ + ulint n_fields_cmp) /* in: number of fields used in + comparisons in rem0cmp.* */ +{ + ut_ad(tuple); + ut_ad(n_fields_cmp <= tuple->n_fields); + + tuple->n_fields_cmp = n_fields_cmp; +} + +/************************************************************************* +Gets number of fields in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_n_fields( +/*================*/ + /* out: number of fields */ + const dtuple_t* tuple) /* in: tuple */ +{ + ut_ad(tuple); + + return(tuple->n_fields); +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Gets nth field of a tuple. */ +UNIV_INLINE +dfield_t* +dtuple_get_nth_field( +/*=================*/ + /* out: nth field */ + const dtuple_t* tuple, /* in: tuple */ + ulint n) /* in: index of field */ +{ + ut_ad(tuple); + ut_ad(n < tuple->n_fields); + + return((dfield_t*) tuple->fields + n); +} +#endif /* UNIV_DEBUG */ + +/************************************************************** +Creates a data tuple to a memory heap. The default value for number +of fields used in record comparisons for this tuple is n_fields. */ +UNIV_INLINE +dtuple_t* +dtuple_create( +/*==========*/ + /* out, own: created tuple */ + mem_heap_t* heap, /* in: memory heap where the tuple + is created */ + ulint n_fields) /* in: number of fields */ +{ + dtuple_t* tuple; + + ut_ad(heap); + + tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t) + + n_fields * sizeof(dfield_t)); + tuple->info_bits = 0; + tuple->n_fields = n_fields; + tuple->n_fields_cmp = n_fields; + tuple->fields = (dfield_t*) &tuple[1]; + +#ifdef UNIV_DEBUG + tuple->magic_n = DATA_TUPLE_MAGIC_N; + + { /* In the debug version, initialize fields to an error value */ + ulint i; + + for (i = 0; i < n_fields; i++) { + dfield_t* field; + + field = dtuple_get_nth_field(tuple, i); + + dfield_set_len(field, UNIV_SQL_NULL); + field->data = &data_error; + dfield_get_type(field)->mtype = DATA_ERROR; + } + } + + UNIV_MEM_INVALID(tuple->fields, n_fields * sizeof *tuple->fields); +#endif + return(tuple); +} + +/************************************************************** +Wrap data fields in a tuple. The default value for number +of fields used in record comparisons for this tuple is n_fields. */ +UNIV_INLINE +const dtuple_t* +dtuple_from_fields( +/*===============*/ + /* out: data tuple */ + dtuple_t* tuple, /* in: storage for data tuple */ + const dfield_t* fields, /* in: fields */ + ulint n_fields) /* in: number of fields */ +{ + tuple->info_bits = 0; + tuple->n_fields = tuple->n_fields_cmp = n_fields; + tuple->fields = (dfield_t*) fields; + ut_d(tuple->magic_n = DATA_TUPLE_MAGIC_N); + + return(tuple); +} + +/************************************************************************* +Copies a data tuple to another. This is a shallow copy; if a deep copy +is desired, dfield_dup() will have to be invoked on each field. */ +UNIV_INLINE +dtuple_t* +dtuple_copy( +/*========*/ + /* out, own: copy of tuple */ + const dtuple_t* tuple, /* in: tuple to copy from */ + mem_heap_t* heap) /* in: memory heap + where the tuple is created */ +{ + ulint n_fields = dtuple_get_n_fields(tuple); + dtuple_t* new_tuple = dtuple_create(heap, n_fields); + ulint i; + + for (i = 0; i < n_fields; i++) { + dfield_copy(dtuple_get_nth_field(new_tuple, i), + dtuple_get_nth_field(tuple, i)); + } + + return(new_tuple); +} + +/************************************************************** +The following function returns the sum of data lengths of a tuple. The space +occupied by the field structs or the tuple struct is not counted. Neither +is possible space in externally stored parts of the field. */ +UNIV_INLINE +ulint +dtuple_get_data_size( +/*=================*/ + /* out: sum of data lengths */ + const dtuple_t* tuple) /* in: typed data tuple */ +{ + const dfield_t* field; + ulint n_fields; + ulint len; + ulint i; + ulint sum = 0; + + ut_ad(tuple); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); + + n_fields = tuple->n_fields; + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + len = dfield_get_len(field); + + if (len == UNIV_SQL_NULL) { + len = dtype_get_sql_null_size(dfield_get_type(field)); + } + + sum += len; + } + + return(sum); +} + +/************************************************************************* +Computes the number of externally stored fields in a data tuple. */ +UNIV_INLINE +ulint +dtuple_get_n_ext( +/*=============*/ + /* out: number of externally stored fields */ + const dtuple_t* tuple) /* in: tuple */ +{ + ulint n_ext = 0; + ulint n_fields = tuple->n_fields; + ulint i; + + ut_ad(tuple); + ut_ad(dtuple_check_typed(tuple)); + ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); + + for (i = 0; i < n_fields; i++) { + n_ext += dtuple_get_nth_field(tuple, i)->ext; + } + + return(n_ext); +} + +/*********************************************************************** +Sets types of fields binary in a tuple. */ +UNIV_INLINE +void +dtuple_set_types_binary( +/*====================*/ + dtuple_t* tuple, /* in: data tuple */ + ulint n) /* in: number of fields to set */ +{ + dtype_t* dfield_type; + ulint i; + + for (i = 0; i < n; i++) { + dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); + dtype_set(dfield_type, DATA_BINARY, 0, 0); + } +} + +/**************************************************************** +Folds a prefix given as the number of fields of a tuple. */ +UNIV_INLINE +ulint +dtuple_fold( +/*========*/ + /* out: the folded value */ + const dtuple_t* tuple, /* in: the tuple */ + ulint n_fields,/* in: number of complete fields to fold */ + ulint n_bytes,/* in: number of bytes to fold in an + incomplete last field */ + dulint tree_id)/* in: index tree id */ +{ + const dfield_t* field; + ulint i; + const byte* data; + ulint len; + ulint fold; + + ut_ad(tuple); + ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); + ut_ad(dtuple_check_typed(tuple)); + + fold = ut_fold_dulint(tree_id); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = (const byte*) dfield_get_data(field); + len = dfield_get_len(field); + + if (len != UNIV_SQL_NULL) { + fold = ut_fold_ulint_pair(fold, + ut_fold_binary(data, len)); + } + } + + if (n_bytes > 0) { + field = dtuple_get_nth_field(tuple, i); + + data = (const byte*) dfield_get_data(field); + len = dfield_get_len(field); + + if (len != UNIV_SQL_NULL) { + if (len > n_bytes) { + len = n_bytes; + } + + fold = ut_fold_ulint_pair(fold, + ut_fold_binary(data, len)); + } + } + + return(fold); +} + +/************************************************************************** +Writes an SQL null field full of zeros. */ +UNIV_INLINE +void +data_write_sql_null( +/*================*/ + byte* data, /* in: pointer to a buffer of size len */ + ulint len) /* in: SQL null size in bytes */ +{ + memset(data, 0, len); +} + +/************************************************************************** +Checks if a dtuple contains an SQL null value. */ +UNIV_INLINE +ibool +dtuple_contains_null( +/*=================*/ + /* out: TRUE if some field is SQL null */ + const dtuple_t* tuple) /* in: dtuple */ +{ + ulint n; + ulint i; + + n = dtuple_get_n_fields(tuple); + + for (i = 0; i < n; i++) { + if (dfield_is_null(dtuple_get_nth_field(tuple, i))) { + + return(TRUE); + } + } + + return(FALSE); +} + +/****************************************************************** +Frees the memory in a big rec vector. */ +UNIV_INLINE +void +dtuple_big_rec_free( +/*================*/ + big_rec_t* vector) /* in, own: big rec vector; it is + freed in this function */ +{ + mem_heap_free(vector->heap); +} diff --git a/storage/xtradb/include/data0type.h b/storage/xtradb/include/data0type.h new file mode 100644 index 00000000000..1f10878984b --- /dev/null +++ b/storage/xtradb/include/data0type.h @@ -0,0 +1,471 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data types + +Created 1/16/1996 Heikki Tuuri +*******************************************************/ + +#ifndef data0type_h +#define data0type_h + +#include "univ.i" + +extern ulint data_mysql_default_charset_coll; +#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8 +#define DATA_MYSQL_BINARY_CHARSET_COLL 63 + +/* SQL data type struct */ +typedef struct dtype_struct dtype_t; + +/*-------------------------------------------*/ +/* The 'MAIN TYPE' of a column */ +#define DATA_VARCHAR 1 /* character varying of the + latin1_swedish_ci charset-collation; note + that the MySQL format for this, DATA_BINARY, + DATA_VARMYSQL, is also affected by whether the + 'precise type' contains + DATA_MYSQL_TRUE_VARCHAR */ +#define DATA_CHAR 2 /* fixed length character of the + latin1_swedish_ci charset-collation */ +#define DATA_FIXBINARY 3 /* binary string of fixed length */ +#define DATA_BINARY 4 /* binary string */ +#define DATA_BLOB 5 /* binary large object, or a TEXT type; + if prtype & DATA_BINARY_TYPE == 0, then this is + actually a TEXT column (or a BLOB created + with < 4.0.14; since column prefix indexes + came only in 4.0.14, the missing flag in BLOBs + created before that does not cause any harm) */ +#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ +#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ +#define DATA_SYS 8 /* system column */ + +/* Data types >= DATA_FLOAT must be compared using the whole field, not as +binary strings */ + +#define DATA_FLOAT 9 +#define DATA_DOUBLE 10 +#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ +#define DATA_VARMYSQL 12 /* any charset varying length char */ +#define DATA_MYSQL 13 /* any charset fixed length char */ + /* NOTE that 4.1.1 used DATA_MYSQL and + DATA_VARMYSQL for all character sets, and the + charset-collation for tables created with it + can also be latin1_swedish_ci */ +#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() + requires the values are <= 63 */ +/*-------------------------------------------*/ +/* The 'PRECISE TYPE' of a column */ +/* +Tables created by a MySQL user have the following convention: + +- In the least significant byte in the precise type we store the MySQL type +code (not applicable for system columns). + +- In the second least significant byte we OR flags DATA_NOT_NULL, +DATA_UNSIGNED, DATA_BINARY_TYPE. + +- In the third least significant byte of the precise type of string types we +store the MySQL charset-collation code. In DATA_BLOB columns created with +< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there +are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no +problem, though. + +Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the +precise type, since the charset was always the default charset of the MySQL +installation. If the stored charset code is 0 in the system table SYS_COLUMNS +of InnoDB, that means that the default charset of this MySQL installation +should be used. + +When loading a table definition from the system tables to the InnoDB data +dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check +if the stored charset-collation is 0, and if that is the case and the type is +a non-binary string, replace that 0 by the default charset-collation code of +this MySQL installation. In short, in old tables, the charset-collation code +in the system tables on disk can be 0, but in in-memory data structures +(dtype_t), the charset-collation code is always != 0 for non-binary string +types. + +In new tables, in binary string types, the charset-collation code is the +MySQL code for the 'binary charset', that is, != 0. + +For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those +DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci, +InnoDB performs all comparisons internally, without resorting to the MySQL +comparison functions. This is to save CPU time. + +InnoDB's own internal system tables have different precise types for their +columns, and for them the precise type is usually not used at all. +*/ + +#define DATA_ENGLISH 4 /* English language character string: this + is a relic from pre-MySQL time and only used + for InnoDB's own system tables */ +#define DATA_ERROR 111 /* another relic from pre-MySQL time */ + +#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL + type from the precise type */ +#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3 + format true VARCHAR */ + +/* Precise data types for system columns and the length of those columns; +NOTE: the values must run from 0 up in the order given! All codes must +be less than 256 */ +#define DATA_ROW_ID 0 /* row id: a dulint */ +#define DATA_ROW_ID_LEN 6 /* stored length for row id */ + +#define DATA_TRX_ID 1 /* transaction id: 6 bytes */ +#define DATA_TRX_ID_LEN 6 + +#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ +#define DATA_ROLL_PTR_LEN 7 + +#define DATA_N_SYS_COLS 3 /* number of system columns defined above */ + +#define DATA_SYS_PRTYPE_MASK 0xF /* mask to extract the above from prtype */ + +/* Flags ORed to the precise data type */ +#define DATA_NOT_NULL 256 /* this is ORed to the precise type when + the column is declared as NOT NULL */ +#define DATA_UNSIGNED 512 /* this id ORed to the precise type when + we have an unsigned integer type */ +#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character + string, this is ORed to the precise type: + this only holds for tables created with + >= MySQL-4.0.14 */ +/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1. + In earlier versions this was set for some + BLOB columns. +*/ +#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data + type when the column is true VARCHAR where + MySQL uses 2 bytes to store the data len; + for shorter VARCHARs MySQL uses only 1 byte */ +/*-------------------------------------------*/ + +/* This many bytes we need to store the type information affecting the +alphabetical order for a single field and decide the storage size of an +SQL null*/ +#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4 +/* In the >= 4.1.x storage format we add 2 bytes more so that we can also +store the charset-collation number; one byte is left unused, though */ +#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6 + +/************************************************************************* +Gets the MySQL type code from a dtype. */ +UNIV_INLINE +ulint +dtype_get_mysql_type( +/*=================*/ + /* out: MySQL type code; this is NOT an InnoDB + type code! */ + const dtype_t* type); /* in: type struct */ +/************************************************************************* +Determine how many bytes the first n characters of the given string occupy. +If the string is shorter than n characters, returns the number of bytes +the characters in the string occupy. */ +UNIV_INTERN +ulint +dtype_get_at_most_n_mbchars( +/*========================*/ + /* out: length of the prefix, + in bytes */ + ulint prtype, /* in: precise type */ + ulint mbminlen, /* in: minimum length of a + multi-byte character */ + ulint mbmaxlen, /* in: maximum length of a + multi-byte character */ + ulint prefix_len, /* in: length of the requested + prefix, in characters, multiplied by + dtype_get_mbmaxlen(dtype) */ + ulint data_len, /* in: length of str (in bytes) */ + const char* str); /* in: the string whose prefix + length is being determined */ +/************************************************************************* +Checks if a data main type is a string type. Also a BLOB is considered a +string type. */ +UNIV_INTERN +ibool +dtype_is_string_type( +/*=================*/ + /* out: TRUE if string type */ + ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */ +/************************************************************************* +Checks if a type is a binary string type. Note that for tables created with +< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For +those DATA_BLOB columns this function currently returns FALSE. */ +UNIV_INTERN +ibool +dtype_is_binary_string_type( +/*========================*/ + /* out: TRUE if binary string type */ + ulint mtype, /* in: main data type */ + ulint prtype);/* in: precise type */ +/************************************************************************* +Checks if a type is a non-binary string type. That is, dtype_is_string_type is +TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created +with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. +For those DATA_BLOB columns this function currently returns TRUE. */ +UNIV_INTERN +ibool +dtype_is_non_binary_string_type( +/*============================*/ + /* out: TRUE if non-binary string type */ + ulint mtype, /* in: main data type */ + ulint prtype);/* in: precise type */ +/************************************************************************* +Sets a data type structure. */ +UNIV_INLINE +void +dtype_set( +/*======*/ + dtype_t* type, /* in: type struct to init */ + ulint mtype, /* in: main data type */ + ulint prtype, /* in: precise type */ + ulint len); /* in: precision of type */ +/************************************************************************* +Copies a data type structure. */ +UNIV_INLINE +void +dtype_copy( +/*=======*/ + dtype_t* type1, /* in: type struct to copy to */ + const dtype_t* type2); /* in: type struct to copy from */ +/************************************************************************* +Gets the SQL main data type. */ +UNIV_INLINE +ulint +dtype_get_mtype( +/*============*/ + const dtype_t* type); +/************************************************************************* +Gets the precise data type. */ +UNIV_INLINE +ulint +dtype_get_prtype( +/*=============*/ + const dtype_t* type); +/************************************************************************* +Compute the mbminlen and mbmaxlen members of a data type structure. */ +UNIV_INLINE +void +dtype_get_mblen( +/*============*/ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type (and collation) */ + ulint* mbminlen, /* out: minimum length of a + multi-byte character */ + ulint* mbmaxlen); /* out: maximum length of a + multi-byte character */ +/************************************************************************* +Gets the MySQL charset-collation code for MySQL string types. */ +UNIV_INLINE +ulint +dtype_get_charset_coll( +/*===================*/ + ulint prtype);/* in: precise data type */ +/************************************************************************* +Forms a precise type from the < 4.1.2 format precise type plus the +charset-collation code. */ +UNIV_INTERN +ulint +dtype_form_prtype( +/*==============*/ + ulint old_prtype, /* in: the MySQL type code and the flags + DATA_BINARY_TYPE etc. */ + ulint charset_coll); /* in: MySQL charset-collation code */ +/************************************************************************* +Determines if a MySQL string type is a subset of UTF-8. This function +may return false negatives, in case further character-set collation +codes are introduced in MySQL later. */ +UNIV_INLINE +ibool +dtype_is_utf8( +/*==========*/ + /* out: TRUE if a subset of UTF-8 */ + ulint prtype);/* in: precise data type */ +/************************************************************************* +Gets the type length. */ +UNIV_INLINE +ulint +dtype_get_len( +/*==========*/ + const dtype_t* type); +/************************************************************************* +Gets the minimum length of a character, in bytes. */ +UNIV_INLINE +ulint +dtype_get_mbminlen( +/*===============*/ + /* out: minimum length of a char, in bytes, + or 0 if this is not a character type */ + const dtype_t* type); /* in: type */ +/************************************************************************* +Gets the maximum length of a character, in bytes. */ +UNIV_INLINE +ulint +dtype_get_mbmaxlen( +/*===============*/ + /* out: maximum length of a char, in bytes, + or 0 if this is not a character type */ + const dtype_t* type); /* in: type */ +/************************************************************************* +Gets the padding character code for the type. */ +UNIV_INLINE +ulint +dtype_get_pad_char( +/*===============*/ + /* out: padding character code, or + ULINT_UNDEFINED if no padding specified */ + ulint mtype, /* in: main type */ + ulint prtype); /* in: precise type */ +/*************************************************************************** +Returns the size of a fixed size data type, 0 if not a fixed size type. */ +UNIV_INLINE +ulint +dtype_get_fixed_size_low( +/*=====================*/ + /* out: fixed size, or 0 */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + ulint len, /* in: length */ + ulint mbminlen, /* in: minimum length of a multibyte char */ + ulint mbmaxlen); /* in: maximum length of a multibyte char */ +/*************************************************************************** +Returns the minimum size of a data type. */ +UNIV_INLINE +ulint +dtype_get_min_size_low( +/*===================*/ + /* out: minimum size */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + ulint len, /* in: length */ + ulint mbminlen, /* in: minimum length of a multibyte char */ + ulint mbmaxlen); /* in: maximum length of a multibyte char */ +/*************************************************************************** +Returns the maximum size of a data type. Note: types in system tables may be +incomplete and return incorrect information. */ +UNIV_INLINE +ulint +dtype_get_max_size_low( +/*===================*/ + /* out: maximum size */ + ulint mtype, /* in: main type */ + ulint len); /* in: length */ +/*************************************************************************** +Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. +For fixed length types it is the fixed length of the type, otherwise 0. */ +UNIV_INLINE +ulint +dtype_get_sql_null_size( +/*====================*/ + /* out: SQL null storage size + in ROW_FORMAT=REDUNDANT */ + const dtype_t* type); /* in: type */ +/************************************************************************** +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. */ +UNIV_INLINE +void +dtype_read_for_order_and_null_size( +/*===============================*/ + dtype_t* type, /* in: type struct */ + const byte* buf); /* in: buffer for the stored order info */ +/************************************************************************** +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. This is the >= 4.1.x storage +format. */ +UNIV_INLINE +void +dtype_new_store_for_order_and_null_size( +/*====================================*/ + byte* buf, /* in: buffer for + DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + bytes where we store the info */ + const dtype_t* type, /* in: type struct */ + ulint prefix_len);/* in: prefix length to + replace type->len, or 0 */ +/************************************************************************** +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. This is the 4.1.x storage +format. */ +UNIV_INLINE +void +dtype_new_read_for_order_and_null_size( +/*===================================*/ + dtype_t* type, /* in: type struct */ + const byte* buf); /* in: buffer for stored type order info */ + +/************************************************************************* +Validates a data type structure. */ +UNIV_INTERN +ibool +dtype_validate( +/*===========*/ + /* out: TRUE if ok */ + const dtype_t* type); /* in: type struct to validate */ +/************************************************************************* +Prints a data type structure. */ +UNIV_INTERN +void +dtype_print( +/*========*/ + const dtype_t* type); /* in: type */ + +/* Structure for an SQL data type. +If you add fields to this structure, be sure to initialize them everywhere. +This structure is initialized in the following functions: +dtype_set() +dtype_read_for_order_and_null_size() +dtype_new_read_for_order_and_null_size() +sym_tab_add_null_lit() */ + +struct dtype_struct{ + unsigned mtype:8; /* main data type */ + unsigned prtype:24; /* precise type; MySQL data + type, charset code, flags to + indicate nullability, + signedness, whether this is a + binary string, whether this is + a true VARCHAR where MySQL + uses 2 bytes to store the length */ + + /* the remaining fields do not affect alphabetical ordering: */ + + unsigned len:16; /* length; for MySQL data this + is field->pack_length(), + except that for a >= 5.0.3 + type true VARCHAR this is the + maximum byte length of the + string data (in addition to + the string, MySQL uses 1 or 2 + bytes to store the string length) */ + + unsigned mbminlen:2; /* minimum length of a + character, in bytes */ + unsigned mbmaxlen:3; /* maximum length of a + character, in bytes */ +}; + +#ifndef UNIV_NONINL +#include "data0type.ic" +#endif + +#endif diff --git a/storage/xtradb/include/data0type.ic b/storage/xtradb/include/data0type.ic new file mode 100644 index 00000000000..d4c1080bebe --- /dev/null +++ b/storage/xtradb/include/data0type.ic @@ -0,0 +1,587 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data types + +Created 1/16/1996 Heikki Tuuri +*******************************************************/ + +#include "mach0data.h" +#include "ha_prototypes.h" + +/************************************************************************* +Gets the MySQL charset-collation code for MySQL string types. */ +UNIV_INLINE +ulint +dtype_get_charset_coll( +/*===================*/ + ulint prtype) /* in: precise data type */ +{ + return((prtype >> 16) & 0xFFUL); +} + +/************************************************************************* +Determines if a MySQL string type is a subset of UTF-8. This function +may return false negatives, in case further character-set collation +codes are introduced in MySQL later. */ +UNIV_INLINE +ibool +dtype_is_utf8( +/*==========*/ + /* out: TRUE if a subset of UTF-8 */ + ulint prtype) /* in: precise data type */ +{ + /* These codes have been copied from strings/ctype-extra.c + and strings/ctype-utf8.c. */ + switch (dtype_get_charset_coll(prtype)) { + case 11: /* ascii_general_ci */ + case 65: /* ascii_bin */ + case 33: /* utf8_general_ci */ + case 83: /* utf8_bin */ + case 254: /* utf8_general_cs */ + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************* +Gets the MySQL type code from a dtype. */ +UNIV_INLINE +ulint +dtype_get_mysql_type( +/*=================*/ + /* out: MySQL type code; this is NOT an InnoDB + type code! */ + const dtype_t* type) /* in: type struct */ +{ + return(type->prtype & 0xFFUL); +} + +/************************************************************************* +Compute the mbminlen and mbmaxlen members of a data type structure. */ +UNIV_INLINE +void +dtype_get_mblen( +/*============*/ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type (and collation) */ + ulint* mbminlen, /* out: minimum length of a + multi-byte character */ + ulint* mbmaxlen) /* out: maximum length of a + multi-byte character */ +{ + if (dtype_is_string_type(mtype)) { +#ifndef UNIV_HOTBACKUP + innobase_get_cset_width(dtype_get_charset_coll(prtype), + mbminlen, mbmaxlen); + ut_ad(*mbminlen <= *mbmaxlen); + ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */ + ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */ +#else /* !UNIV_HOTBACKUP */ + ut_a(mtype <= DATA_BINARY); + *mbminlen = *mbmaxlen = 1; +#endif /* !UNIV_HOTBACKUP */ + } else { + *mbminlen = *mbmaxlen = 0; + } +} + +/************************************************************************* +Compute the mbminlen and mbmaxlen members of a data type structure. */ +UNIV_INLINE +void +dtype_set_mblen( +/*============*/ + dtype_t* type) /* in/out: type */ +{ + ulint mbminlen; + ulint mbmaxlen; + + dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen); + type->mbminlen = mbminlen; + type->mbmaxlen = mbmaxlen; + + ut_ad(dtype_validate(type)); +} + +/************************************************************************* +Sets a data type structure. */ +UNIV_INLINE +void +dtype_set( +/*======*/ + dtype_t* type, /* in: type struct to init */ + ulint mtype, /* in: main data type */ + ulint prtype, /* in: precise type */ + ulint len) /* in: precision of type */ +{ + ut_ad(type); + ut_ad(mtype <= DATA_MTYPE_MAX); + + type->mtype = mtype; + type->prtype = prtype; + type->len = len; + + dtype_set_mblen(type); +} + +/************************************************************************* +Copies a data type structure. */ +UNIV_INLINE +void +dtype_copy( +/*=======*/ + dtype_t* type1, /* in: type struct to copy to */ + const dtype_t* type2) /* in: type struct to copy from */ +{ + *type1 = *type2; + + ut_ad(dtype_validate(type1)); +} + +/************************************************************************* +Gets the SQL main data type. */ +UNIV_INLINE +ulint +dtype_get_mtype( +/*============*/ + const dtype_t* type) +{ + ut_ad(type); + + return(type->mtype); +} + +/************************************************************************* +Gets the precise data type. */ +UNIV_INLINE +ulint +dtype_get_prtype( +/*=============*/ + const dtype_t* type) +{ + ut_ad(type); + + return(type->prtype); +} + +/************************************************************************* +Gets the type length. */ +UNIV_INLINE +ulint +dtype_get_len( +/*==========*/ + const dtype_t* type) +{ + ut_ad(type); + + return(type->len); +} + +/************************************************************************* +Gets the minimum length of a character, in bytes. */ +UNIV_INLINE +ulint +dtype_get_mbminlen( +/*===============*/ + /* out: minimum length of a char, in bytes, + or 0 if this is not a character type */ + const dtype_t* type) /* in: type */ +{ + ut_ad(type); + return(type->mbminlen); +} +/************************************************************************* +Gets the maximum length of a character, in bytes. */ +UNIV_INLINE +ulint +dtype_get_mbmaxlen( +/*===============*/ + /* out: maximum length of a char, in bytes, + or 0 if this is not a character type */ + const dtype_t* type) /* in: type */ +{ + ut_ad(type); + return(type->mbmaxlen); +} + +/************************************************************************* +Gets the padding character code for a type. */ +UNIV_INLINE +ulint +dtype_get_pad_char( +/*===============*/ + /* out: padding character code, or + ULINT_UNDEFINED if no padding specified */ + ulint mtype, /* in: main type */ + ulint prtype) /* in: precise type */ +{ + switch (mtype) { + case DATA_FIXBINARY: + case DATA_BINARY: + if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype) + == DATA_MYSQL_BINARY_CHARSET_COLL)) { + /* Starting from 5.0.18, do not pad + VARBINARY or BINARY columns. */ + return(ULINT_UNDEFINED); + } + /* Fall through */ + case DATA_CHAR: + case DATA_VARCHAR: + case DATA_MYSQL: + case DATA_VARMYSQL: + /* Space is the padding character for all char and binary + strings, and starting from 5.0.3, also for TEXT strings. */ + + return(0x20); + case DATA_BLOB: + if (!(prtype & DATA_BINARY_TYPE)) { + return(0x20); + } + /* Fall through */ + default: + /* No padding specified */ + return(ULINT_UNDEFINED); + } +} + +/************************************************************************** +Stores for a type the information which determines its alphabetical ordering +and the storage size of an SQL NULL value. This is the >= 4.1.x storage +format. */ +UNIV_INLINE +void +dtype_new_store_for_order_and_null_size( +/*====================================*/ + byte* buf, /* in: buffer for + DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + bytes where we store the info */ + const dtype_t* type, /* in: type struct */ + ulint prefix_len)/* in: prefix length to + replace type->len, or 0 */ +{ +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + ulint len; + + buf[0] = (byte)(type->mtype & 0xFFUL); + + if (type->prtype & DATA_BINARY_TYPE) { + buf[0] = buf[0] | 128; + } + + /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) { + buf[0] = buf[0] | 64; + } + */ + + buf[1] = (byte)(type->prtype & 0xFFUL); + + len = prefix_len ? prefix_len : type->len; + + mach_write_to_2(buf + 2, len & 0xFFFFUL); + + ut_ad(dtype_get_charset_coll(type->prtype) < 256); + mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); + + if (type->prtype & DATA_NOT_NULL) { + buf[4] |= 128; + } +} + +/************************************************************************** +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. This is the < 4.1.x +storage format. */ +UNIV_INLINE +void +dtype_read_for_order_and_null_size( +/*===============================*/ + dtype_t* type, /* in: type struct */ + const byte* buf) /* in: buffer for stored type order info */ +{ +#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE +# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE" +#endif + + type->mtype = buf[0] & 63; + type->prtype = buf[1]; + + if (buf[0] & 128) { + type->prtype = type->prtype | DATA_BINARY_TYPE; + } + + type->len = mach_read_from_2(buf + 2); + + type->prtype = dtype_form_prtype(type->prtype, + data_mysql_default_charset_coll); + dtype_set_mblen(type); +} + +/************************************************************************** +Reads to a type the stored information which determines its alphabetical +ordering and the storage size of an SQL NULL value. This is the >= 4.1.x +storage format. */ +UNIV_INLINE +void +dtype_new_read_for_order_and_null_size( +/*===================================*/ + dtype_t* type, /* in: type struct */ + const byte* buf) /* in: buffer for stored type order info */ +{ + ulint charset_coll; + +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + + type->mtype = buf[0] & 63; + type->prtype = buf[1]; + + if (buf[0] & 128) { + type->prtype |= DATA_BINARY_TYPE; + } + + if (buf[4] & 128) { + type->prtype |= DATA_NOT_NULL; + } + + type->len = mach_read_from_2(buf + 2); + + charset_coll = mach_read_from_2(buf + 4) & 0x7fff; + + if (dtype_is_string_type(type->mtype)) { + ut_a(charset_coll < 256); + + if (charset_coll == 0) { + /* This insert buffer record was inserted with MySQL + version < 4.1.2, and the charset-collation code was not + explicitly stored to dtype->prtype at that time. It + must be the default charset-collation of this MySQL + installation. */ + + charset_coll = data_mysql_default_charset_coll; + } + + type->prtype = dtype_form_prtype(type->prtype, charset_coll); + } + dtype_set_mblen(type); +} + +/*************************************************************************** +Returns the size of a fixed size data type, 0 if not a fixed size type. */ +UNIV_INLINE +ulint +dtype_get_fixed_size_low( +/*=====================*/ + /* out: fixed size, or 0 */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + ulint len, /* in: length */ + ulint mbminlen, /* in: minimum length of a multibyte char */ + ulint mbmaxlen) /* in: maximum length of a multibyte char */ +{ + switch (mtype) { + case DATA_SYS: +#ifdef UNIV_DEBUG + switch (prtype & DATA_MYSQL_TYPE_MASK) { + case DATA_ROW_ID: + ut_ad(len == DATA_ROW_ID_LEN); + break; + case DATA_TRX_ID: + ut_ad(len == DATA_TRX_ID_LEN); + break; + case DATA_ROLL_PTR: + ut_ad(len == DATA_ROLL_PTR_LEN); + break; + default: + ut_ad(0); + return(0); + } +#endif /* UNIV_DEBUG */ + case DATA_CHAR: + case DATA_FIXBINARY: + case DATA_INT: + case DATA_FLOAT: + case DATA_DOUBLE: + return(len); + case DATA_MYSQL: + if (prtype & DATA_BINARY_TYPE) { + return(len); + } else { +#ifdef UNIV_HOTBACKUP + if (mbminlen == mbmaxlen) { + return(len); + } +#else /* UNIV_HOTBACKUP */ + /* We play it safe here and ask MySQL for + mbminlen and mbmaxlen. Although + mbminlen and mbmaxlen are + initialized if and only if prtype + is (in one of the 3 functions in this file), + it could be that none of these functions + has been called. */ + + ulint i_mbminlen, i_mbmaxlen; + + innobase_get_cset_width( + dtype_get_charset_coll(prtype), + &i_mbminlen, &i_mbmaxlen); + + if (UNIV_UNLIKELY(mbminlen != i_mbminlen) + || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) { + + ut_print_timestamp(stderr); + fprintf(stderr, " InnoDB: " + "mbminlen=%lu, " + "mbmaxlen=%lu, " + "type->mbminlen=%lu, " + "type->mbmaxlen=%lu\n", + (ulong) i_mbminlen, + (ulong) i_mbmaxlen, + (ulong) mbminlen, + (ulong) mbmaxlen); + } + if (mbminlen == mbmaxlen) { + return(len); + } +#endif /* !UNIV_HOTBACKUP */ + } + /* fall through for variable-length charsets */ + case DATA_VARCHAR: + case DATA_BINARY: + case DATA_DECIMAL: + case DATA_VARMYSQL: + case DATA_BLOB: + return(0); + default: + ut_error; + } + + return(0); +} + +/*************************************************************************** +Returns the minimum size of a data type. */ +UNIV_INLINE +ulint +dtype_get_min_size_low( +/*===================*/ + /* out: minimum size */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + ulint len, /* in: length */ + ulint mbminlen, /* in: minimum length of a multibyte char */ + ulint mbmaxlen) /* in: maximum length of a multibyte char */ +{ + switch (mtype) { + case DATA_SYS: +#ifdef UNIV_DEBUG + switch (prtype & DATA_MYSQL_TYPE_MASK) { + case DATA_ROW_ID: + ut_ad(len == DATA_ROW_ID_LEN); + break; + case DATA_TRX_ID: + ut_ad(len == DATA_TRX_ID_LEN); + break; + case DATA_ROLL_PTR: + ut_ad(len == DATA_ROLL_PTR_LEN); + break; + default: + ut_ad(0); + return(0); + } +#endif /* UNIV_DEBUG */ + case DATA_CHAR: + case DATA_FIXBINARY: + case DATA_INT: + case DATA_FLOAT: + case DATA_DOUBLE: + return(len); + case DATA_MYSQL: + if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) { + return(len); + } + /* this is a variable-length character set */ + ut_a(mbminlen > 0); + ut_a(mbmaxlen > mbminlen); + ut_a(len % mbmaxlen == 0); + return(len * mbminlen / mbmaxlen); + case DATA_VARCHAR: + case DATA_BINARY: + case DATA_DECIMAL: + case DATA_VARMYSQL: + case DATA_BLOB: + return(0); + default: + ut_error; + } + + return(0); +} + +/*************************************************************************** +Returns the maximum size of a data type. Note: types in system tables may be +incomplete and return incorrect information. */ +UNIV_INLINE +ulint +dtype_get_max_size_low( +/*===================*/ + /* out: maximum size */ + ulint mtype, /* in: main type */ + ulint len) /* in: length */ +{ + switch (mtype) { + case DATA_SYS: + case DATA_CHAR: + case DATA_FIXBINARY: + case DATA_INT: + case DATA_FLOAT: + case DATA_DOUBLE: + case DATA_MYSQL: + case DATA_VARCHAR: + case DATA_BINARY: + case DATA_DECIMAL: + case DATA_VARMYSQL: + return(len); + case DATA_BLOB: + break; + default: + ut_error; + } + + return(ULINT_MAX); +} + +/*************************************************************************** +Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. +For fixed length types it is the fixed length of the type, otherwise 0. */ +UNIV_INLINE +ulint +dtype_get_sql_null_size( +/*====================*/ + /* out: SQL null storage size + in ROW_FORMAT=REDUNDANT */ + const dtype_t* type) /* in: type */ +{ + return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len, + type->mbminlen, type->mbmaxlen)); +} diff --git a/storage/xtradb/include/data0types.h b/storage/xtradb/include/data0types.h new file mode 100644 index 00000000000..9e536478d68 --- /dev/null +++ b/storage/xtradb/include/data0types.h @@ -0,0 +1,35 @@ +/***************************************************************************** + +Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +Some type definitions + +Created 9/21/2000 Heikki Tuuri +*************************************************************************/ + +#ifndef data0types_h +#define data0types_h + +/* SQL data field struct */ +typedef struct dfield_struct dfield_t; + +/* SQL data tuple struct */ +typedef struct dtuple_struct dtuple_t; + +#endif + diff --git a/storage/xtradb/include/db0err.h b/storage/xtradb/include/db0err.h new file mode 100644 index 00000000000..d6d2a9785a5 --- /dev/null +++ b/storage/xtradb/include/db0err.h @@ -0,0 +1,104 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Global error codes for the database + +Created 5/24/1996 Heikki Tuuri +*******************************************************/ + +#ifndef db0err_h +#define db0err_h + + +enum db_err { + DB_SUCCESS = 10, + + /* The following are error codes */ + DB_ERROR, + DB_OUT_OF_MEMORY, + DB_OUT_OF_FILE_SPACE, + DB_LOCK_WAIT, + DB_DEADLOCK, + DB_ROLLBACK, + DB_DUPLICATE_KEY, + DB_QUE_THR_SUSPENDED, + DB_MISSING_HISTORY, /* required history data has been + deleted due to lack of space in + rollback segment */ + DB_CLUSTER_NOT_FOUND = 30, + DB_TABLE_NOT_FOUND, + DB_MUST_GET_MORE_FILE_SPACE, /* the database has to be stopped + and restarted with more file space */ + DB_TABLE_IS_BEING_USED, + DB_TOO_BIG_RECORD, /* a record in an index would not fit + on a compressed page, or it would + become bigger than 1/2 free space in + an uncompressed page frame */ + DB_LOCK_WAIT_TIMEOUT, /* lock wait lasted too long */ + DB_NO_REFERENCED_ROW, /* referenced key value not found + for a foreign key in an insert or + update of a row */ + DB_ROW_IS_REFERENCED, /* cannot delete or update a row + because it contains a key value + which is referenced */ + DB_CANNOT_ADD_CONSTRAINT, /* adding a foreign key constraint + to a table failed */ + DB_CORRUPTION, /* data structure corruption noticed */ + DB_COL_APPEARS_TWICE_IN_INDEX, /* InnoDB cannot handle an index + where same column appears twice */ + DB_CANNOT_DROP_CONSTRAINT, /* dropping a foreign key constraint + from a table failed */ + DB_NO_SAVEPOINT, /* no savepoint exists with the given + name */ + DB_TABLESPACE_ALREADY_EXISTS, /* we cannot create a new single-table + tablespace because a file of the same + name already exists */ + DB_TABLESPACE_DELETED, /* tablespace does not exist or is + being dropped right now */ + DB_LOCK_TABLE_FULL, /* lock structs have exhausted the + buffer pool (for big transactions, + InnoDB stores the lock structs in the + buffer pool) */ + DB_FOREIGN_DUPLICATE_KEY, /* foreign key constraints + activated by the operation would + lead to a duplicate key in some + table */ + DB_TOO_MANY_CONCURRENT_TRXS, /* when InnoDB runs out of the + preconfigured undo slots, this can + only happen when there are too many + concurrent transactions */ + DB_UNSUPPORTED, /* when InnoDB sees any artefact or + a feature that it can't recoginize or + work with e.g., FT indexes created by + a later version of the engine. */ + + DB_PRIMARY_KEY_IS_NULL, /* a column in the PRIMARY KEY + was found to be NULL */ + + /* The following are partial failure codes */ + DB_FAIL = 1000, + DB_OVERFLOW, + DB_UNDERFLOW, + DB_STRONG_FAIL, + DB_ZIP_OVERFLOW, + DB_RECORD_NOT_FOUND = 1500, + DB_END_OF_INDEX +}; + +#endif diff --git a/storage/xtradb/include/dict0boot.h b/storage/xtradb/include/dict0boot.h new file mode 100644 index 00000000000..e1556bdb16e --- /dev/null +++ b/storage/xtradb/include/dict0boot.h @@ -0,0 +1,150 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data dictionary creation and booting + +Created 4/18/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0boot_h +#define dict0boot_h + +#include "univ.i" + +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "ut0byte.h" +#include "buf0buf.h" +#include "fsp0fsp.h" +#include "dict0dict.h" + +typedef byte dict_hdr_t; + +/************************************************************************** +Gets a pointer to the dictionary header and x-latches its page. */ +UNIV_INTERN +dict_hdr_t* +dict_hdr_get( +/*=========*/ + /* out: pointer to the dictionary header, + page x-latched */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Returns a new row, table, index, or tree id. */ +UNIV_INTERN +dulint +dict_hdr_get_new_id( +/*================*/ + /* out: the new id */ + ulint type); /* in: DICT_HDR_ROW_ID, ... */ +/************************************************************************** +Returns a new row id. */ +UNIV_INLINE +dulint +dict_sys_get_new_row_id(void); +/*=========================*/ + /* out: the new id */ +/************************************************************************** +Reads a row id from a record or other 6-byte stored form. */ +UNIV_INLINE +dulint +dict_sys_read_row_id( +/*=================*/ + /* out: row id */ + byte* field); /* in: record field */ +/************************************************************************** +Writes a row id to a record or other 6-byte stored form. */ +UNIV_INLINE +void +dict_sys_write_row_id( +/*==================*/ + byte* field, /* in: record field */ + dulint row_id);/* in: row id */ +/********************************************************************* +Initializes the data dictionary memory structures when the database is +started. This function is also called when the data dictionary is created. */ +UNIV_INTERN +void +dict_boot(void); +/*===========*/ +/********************************************************************* +Creates and initializes the data dictionary at the database creation. */ +UNIV_INTERN +void +dict_create(void); +/*=============*/ + + +/* Space id and page no where the dictionary header resides */ +#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ +#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO + +/* The ids for the basic system tables and their indexes */ +#define DICT_TABLES_ID ut_dulint_create(0, 1) +#define DICT_COLUMNS_ID ut_dulint_create(0, 2) +#define DICT_INDEXES_ID ut_dulint_create(0, 3) +#define DICT_FIELDS_ID ut_dulint_create(0, 4) +/* The following is a secondary index on SYS_TABLES */ +#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5) + +#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start + from this number, except for basic + system tables and their above defined + indexes; ibuf tables and indexes are + assigned as the id the number + DICT_IBUF_ID_MIN plus the space id */ +#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0) + +/* The offset of the dictionary header on the page */ +#define DICT_HDR FSEG_PAGE_DATA + +/*-------------------------------------------------------------*/ +/* Dictionary header offsets */ +#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ +#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ +#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ +#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ +#define DICT_HDR_TABLES 32 /* Root of the table index tree */ +#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ +#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ +#define DICT_HDR_INDEXES 44 /* Root of the index index tree */ +#define DICT_HDR_FIELDS 48 /* Root of the index field + index tree */ + +#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace + segment into which the dictionary + header is created */ +/*-------------------------------------------------------------*/ + +/* The field number of the page number field in the sys_indexes table +clustered index */ +#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 +#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 +#define DICT_SYS_INDEXES_TYPE_FIELD 6 + +/* When a row id which is zero modulo this number (which must be a power of +two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is +updated */ +#define DICT_HDR_ROW_ID_WRITE_MARGIN 256 + +#ifndef UNIV_NONINL +#include "dict0boot.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0boot.ic b/storage/xtradb/include/dict0boot.ic new file mode 100644 index 00000000000..9b45f9e84be --- /dev/null +++ b/storage/xtradb/include/dict0boot.ic @@ -0,0 +1,92 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data dictionary creation and booting + +Created 4/18/1996 Heikki Tuuri +*******************************************************/ + +/************************************************************************** +Writes the current value of the row id counter to the dictionary header file +page. */ +UNIV_INTERN +void +dict_hdr_flush_row_id(void); +/*=======================*/ + + +/************************************************************************** +Returns a new row id. */ +UNIV_INLINE +dulint +dict_sys_get_new_row_id(void) +/*=========================*/ + /* out: the new id */ +{ + dulint id; + + mutex_enter(&(dict_sys->mutex)); + + id = dict_sys->row_id; + + if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) { + + dict_hdr_flush_row_id(); + } + + UT_DULINT_INC(dict_sys->row_id); + + mutex_exit(&(dict_sys->mutex)); + + return(id); +} + +/************************************************************************** +Reads a row id from a record or other 6-byte stored form. */ +UNIV_INLINE +dulint +dict_sys_read_row_id( +/*=================*/ + /* out: row id */ + byte* field) /* in: record field */ +{ +#if DATA_ROW_ID_LEN != 6 +# error "DATA_ROW_ID_LEN != 6" +#endif + + return(mach_read_from_6(field)); +} + +/************************************************************************** +Writes a row id to a record or other 6-byte stored form. */ +UNIV_INLINE +void +dict_sys_write_row_id( +/*==================*/ + byte* field, /* in: record field */ + dulint row_id) /* in: row id */ +{ +#if DATA_ROW_ID_LEN != 6 +# error "DATA_ROW_ID_LEN != 6" +#endif + + mach_write_to_6(field, row_id); +} + + diff --git a/storage/xtradb/include/dict0crea.h b/storage/xtradb/include/dict0crea.h new file mode 100644 index 00000000000..9ac3e408f1f --- /dev/null +++ b/storage/xtradb/include/dict0crea.h @@ -0,0 +1,199 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Database object creation + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0crea_h +#define dict0crea_h + +#include "univ.i" +#include "dict0types.h" +#include "dict0dict.h" +#include "que0types.h" +#include "row0types.h" +#include "mtr0mtr.h" + +/************************************************************************* +Creates a table create graph. */ +UNIV_INTERN +tab_node_t* +tab_create_graph_create( +/*====================*/ + /* out, own: table create node */ + dict_table_t* table, /* in: table to create, built as a memory data + structure */ + mem_heap_t* heap); /* in: heap where created */ +/************************************************************************* +Creates an index create graph. */ +UNIV_INTERN +ind_node_t* +ind_create_graph_create( +/*====================*/ + /* out, own: index create node */ + dict_index_t* index, /* in: index to create, built as a memory data + structure */ + mem_heap_t* heap); /* in: heap where created */ +/*************************************************************** +Creates a table. This is a high-level function used in SQL execution graphs. */ +UNIV_INTERN +que_thr_t* +dict_create_table_step( +/*===================*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/*************************************************************** +Creates an index. This is a high-level function used in SQL execution +graphs. */ +UNIV_INTERN +que_thr_t* +dict_create_index_step( +/*===================*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/*********************************************************************** +Truncates the index tree associated with a row in SYS_INDEXES table. */ +UNIV_INTERN +ulint +dict_truncate_index_tree( +/*=====================*/ + /* out: new root page number, or + FIL_NULL on failure */ + dict_table_t* table, /* in: the table the index belongs to */ + ulint space, /* in: 0=truncate, + nonzero=create the index tree in the + given tablespace */ + btr_pcur_t* pcur, /* in/out: persistent cursor pointing to + record in the clustered index of + SYS_INDEXES table. The cursor may be + repositioned in this call. */ + mtr_t* mtr); /* in: mtr having the latch + on the record page. The mtr may be + committed and restarted in this call. */ +/*********************************************************************** +Drops the index tree associated with a row in SYS_INDEXES table. */ +UNIV_INTERN +void +dict_drop_index_tree( +/*=================*/ + rec_t* rec, /* in/out: record in the clustered index + of SYS_INDEXES table */ + mtr_t* mtr); /* in: mtr having the latch on the record page */ +#ifndef UNIV_HOTBACKUP +/******************************************************************** +Creates the foreign key constraints system tables inside InnoDB +at database creation or database start if they are not found or are +not of the right form. */ +UNIV_INTERN +ulint +dict_create_or_check_foreign_constraint_tables(void); +/*================================================*/ + /* out: DB_SUCCESS or error code */ +/************************************************************************ +Adds foreign key definitions to data dictionary tables in the database. We +look at table->foreign_list, and also generate names to constraints that were +not named by the user. A generated constraint has a name of the format +databasename/tablename_ibfk_<number>, where the numbers start from 1, and are +given locally for this table, that is, the number is not global, as in the +old format constraints < 4.0.18 it used to be. */ +UNIV_INTERN +ulint +dict_create_add_foreigns_to_dictionary( +/*===================================*/ + /* out: error code or DB_SUCCESS */ + ulint start_id,/* in: if we are actually doing ALTER TABLE + ADD CONSTRAINT, we want to generate constraint + numbers which are bigger than in the table so + far; we number the constraints from + start_id + 1 up; start_id should be set to 0 if + we are creating a new table, or if the table + so far has no constraints for which the name + was generated here */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction */ +#endif /* !UNIV_HOTBACKUP */ + +/* Table create node structure */ + +struct tab_node_struct{ + que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */ + dict_table_t* table; /* table to create, built as a memory data + structure with dict_mem_... functions */ + ins_node_t* tab_def; /* child node which does the insert of + the table definition; the row to be inserted + is built by the parent node */ + ins_node_t* col_def; /* child node which does the inserts of + the column definitions; the row to be inserted + is built by the parent node */ + commit_node_t* commit_node; + /* child node which performs a commit after + a successful table creation */ + /*----------------------*/ + /* Local storage for this graph node */ + ulint state; /* node execution state */ + ulint col_no; /* next column definition to insert */ + mem_heap_t* heap; /* memory heap used as auxiliary storage */ +}; + +/* Table create node states */ +#define TABLE_BUILD_TABLE_DEF 1 +#define TABLE_BUILD_COL_DEF 2 +#define TABLE_COMMIT_WORK 3 +#define TABLE_ADD_TO_CACHE 4 +#define TABLE_COMPLETED 5 + +/* Index create node struct */ + +struct ind_node_struct{ + que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */ + dict_index_t* index; /* index to create, built as a memory data + structure with dict_mem_... functions */ + ins_node_t* ind_def; /* child node which does the insert of + the index definition; the row to be inserted + is built by the parent node */ + ins_node_t* field_def; /* child node which does the inserts of + the field definitions; the row to be inserted + is built by the parent node */ + commit_node_t* commit_node; + /* child node which performs a commit after + a successful index creation */ + /*----------------------*/ + /* Local storage for this graph node */ + ulint state; /* node execution state */ + ulint page_no;/* root page number of the index */ + dict_table_t* table; /* table which owns the index */ + dtuple_t* ind_row;/* index definition row built */ + ulint field_no;/* next field definition to insert */ + mem_heap_t* heap; /* memory heap used as auxiliary storage */ +}; + +/* Index create node states */ +#define INDEX_BUILD_INDEX_DEF 1 +#define INDEX_BUILD_FIELD_DEF 2 +#define INDEX_CREATE_INDEX_TREE 3 +#define INDEX_COMMIT_WORK 4 +#define INDEX_ADD_TO_CACHE 5 + +#ifndef UNIV_NONINL +#include "dict0crea.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0crea.ic b/storage/xtradb/include/dict0crea.ic new file mode 100644 index 00000000000..b05385fa121 --- /dev/null +++ b/storage/xtradb/include/dict0crea.ic @@ -0,0 +1,24 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Database object creation + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + diff --git a/storage/xtradb/include/dict0dict.h b/storage/xtradb/include/dict0dict.h new file mode 100644 index 00000000000..82a139a7ff9 --- /dev/null +++ b/storage/xtradb/include/dict0dict.h @@ -0,0 +1,1147 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data dictionary system + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0dict_h +#define dict0dict_h + +#include "univ.i" +#include "dict0types.h" +#include "dict0mem.h" +#include "data0type.h" +#include "data0data.h" +#include "sync0sync.h" +#include "sync0rw.h" +#include "mem0mem.h" +#include "rem0types.h" +#include "ut0mem.h" +#include "ut0lst.h" +#include "hash0hash.h" +#include "ut0rnd.h" +#include "ut0byte.h" +#include "trx0types.h" + +#ifndef UNIV_HOTBACKUP +/********************************************************************** +Makes all characters in a NUL-terminated UTF-8 string lower case. */ +UNIV_INTERN +void +dict_casedn_str( +/*============*/ + char* a); /* in/out: string to put in lower case */ +#endif /* !UNIV_HOTBACKUP */ +/************************************************************************ +Get the database name length in a table name. */ +UNIV_INTERN +ulint +dict_get_db_name_len( +/*=================*/ + /* out: database name length */ + const char* name); /* in: table name in the form + dbname '/' tablename */ +/************************************************************************ +Return the end of table name where we have removed dbname and '/'. */ + +const char* +dict_remove_db_name( +/*================*/ + /* out: table name */ + const char* name); /* in: table name in the form + dbname '/' tablename */ +/************************************************************************** +Returns a table object based on table id. */ +UNIV_INTERN +dict_table_t* +dict_table_get_on_id( +/*=================*/ + /* out: table, NULL if does not exist */ + dulint table_id, /* in: table id */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************ +Decrements the count of open MySQL handles to a table. */ +UNIV_INTERN +void +dict_table_decrement_handle_count( +/*==============================*/ + dict_table_t* table, /* in/out: table */ + ibool dict_locked); /* in: TRUE=data dictionary locked */ +/************************************************************************** +Inits the data dictionary module. */ +UNIV_INTERN +void +dict_init(void); +/*===========*/ +/************************************************************************ +Gets the space id of every table of the data dictionary and makes a linear +list and a hash table of them to the data dictionary cache. This function +can be called at database startup if we did not need to do a crash recovery. +In crash recovery we must scan the space id's from the .ibd files in MySQL +database directories. */ +UNIV_INTERN +void +dict_load_space_id_list(void); +/*=========================*/ +/************************************************************************* +Gets the column data type. */ +UNIV_INLINE +void +dict_col_copy_type( +/*===============*/ + const dict_col_t* col, /* in: column */ + dtype_t* type); /* out: data type */ +#ifdef UNIV_DEBUG +/************************************************************************* +Assert that a column and a data type match. */ +UNIV_INLINE +ibool +dict_col_type_assert_equal( +/*=======================*/ + /* out: TRUE */ + const dict_col_t* col, /* in: column */ + const dtype_t* type); /* in: data type */ +#endif /* UNIV_DEBUG */ +/*************************************************************************** +Returns the minimum size of the column. */ +UNIV_INLINE +ulint +dict_col_get_min_size( +/*==================*/ + /* out: minimum size */ + const dict_col_t* col); /* in: column */ +/*************************************************************************** +Returns the maximum size of the column. */ +UNIV_INLINE +ulint +dict_col_get_max_size( +/*==================*/ + /* out: maximum size */ + const dict_col_t* col); /* in: column */ +/*************************************************************************** +Returns the size of a fixed size column, 0 if not a fixed size column. */ +UNIV_INLINE +ulint +dict_col_get_fixed_size( +/*====================*/ + /* out: fixed size, or 0 */ + const dict_col_t* col); /* in: column */ +/*************************************************************************** +Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. +For fixed length types it is the fixed length of the type, otherwise 0. */ +UNIV_INLINE +ulint +dict_col_get_sql_null_size( +/*=======================*/ + /* out: SQL null storage size + in ROW_FORMAT=REDUNDANT */ + const dict_col_t* col); /* in: column */ + +/************************************************************************* +Gets the column number. */ +UNIV_INLINE +ulint +dict_col_get_no( +/*============*/ + const dict_col_t* col); +/************************************************************************* +Gets the column position in the clustered index. */ +UNIV_INLINE +ulint +dict_col_get_clust_pos( +/*===================*/ + const dict_col_t* col, /* in: table column */ + const dict_index_t* clust_index); /* in: clustered index */ +/******************************************************************** +If the given column name is reserved for InnoDB system columns, return +TRUE. */ +UNIV_INTERN +ibool +dict_col_name_is_reserved( +/*======================*/ + /* out: TRUE if name is reserved */ + const char* name); /* in: column name */ +/************************************************************************ +Acquire the autoinc lock.*/ +UNIV_INTERN +void +dict_table_autoinc_lock( +/*====================*/ + dict_table_t* table); /* in/out: table */ +/************************************************************************ +Unconditionally set the autoinc counter. */ +UNIV_INTERN +void +dict_table_autoinc_initialize( +/*==========================*/ + dict_table_t* table, /* in/out: table */ + ib_uint64_t value); /* in: next value to assign to a row */ +/************************************************************************ +Reads the next autoinc value (== autoinc counter value), 0 if not yet +initialized. */ +UNIV_INTERN +ib_uint64_t +dict_table_autoinc_read( +/*====================*/ + /* out: value for a new row, or 0 */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Updates the autoinc counter if the value supplied is greater than the +current value. */ +UNIV_INTERN +void +dict_table_autoinc_update_if_greater( +/*=================================*/ + + dict_table_t* table, /* in/out: table */ + ib_uint64_t value); /* in: value which was assigned to a row */ +/************************************************************************ +Release the autoinc lock.*/ +UNIV_INTERN +void +dict_table_autoinc_unlock( +/*======================*/ + dict_table_t* table); /* in/out: table */ +/************************************************************************** +Adds system columns to a table object. */ +UNIV_INTERN +void +dict_table_add_system_columns( +/*==========================*/ + dict_table_t* table, /* in/out: table */ + mem_heap_t* heap); /* in: temporary heap */ +/************************************************************************** +Adds a table object to the dictionary cache. */ +UNIV_INTERN +void +dict_table_add_to_cache( +/*====================*/ + dict_table_t* table, /* in: table */ + mem_heap_t* heap); /* in: temporary heap */ +/************************************************************************** +Removes a table object from the dictionary cache. */ +UNIV_INTERN +void +dict_table_remove_from_cache( +/*=========================*/ + dict_table_t* table); /* in, own: table */ +/************************************************************************** +Renames a table object. */ +UNIV_INTERN +ibool +dict_table_rename_in_cache( +/*=======================*/ + /* out: TRUE if success */ + dict_table_t* table, /* in/out: table */ + const char* new_name, /* in: new name */ + ibool rename_also_foreigns);/* in: in ALTER TABLE we want + to preserve the original table name + in constraints which reference it */ +/************************************************************************** +Removes an index from the dictionary cache. */ +UNIV_INTERN +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in, own: index */ +/************************************************************************** +Change the id of a table object in the dictionary cache. This is used in +DISCARD TABLESPACE. */ +UNIV_INTERN +void +dict_table_change_id_in_cache( +/*==========================*/ + dict_table_t* table, /* in/out: table object already in cache */ + dulint new_id);/* in: new id to set */ +/************************************************************************** +Adds a foreign key constraint object to the dictionary cache. May free +the object if there already is an object with the same identifier in. +At least one of foreign table or referenced table must already be in +the dictionary cache! */ +UNIV_INTERN +ulint +dict_foreign_add_to_cache( +/*======================*/ + /* out: DB_SUCCESS or error code */ + dict_foreign_t* foreign, /* in, own: foreign key constraint */ + ibool check_charsets);/* in: TRUE=check charset + compatibility */ +/************************************************************************* +Check if the index is referenced by a foreign key, if TRUE return the +matching instance NULL otherwise. */ +UNIV_INTERN +dict_foreign_t* +dict_table_get_referenced_constraint( +/*=================================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index); /* in: InnoDB index */ +/************************************************************************* +Checks if a table is referenced by foreign keys. */ +UNIV_INTERN +ibool +dict_table_is_referenced_by_foreign_key( +/*====================================*/ + /* out: TRUE if table is referenced + by a foreign key */ + const dict_table_t* table); /* in: InnoDB table */ +/************************************************************************** +Replace the index in the foreign key list that matches this index's +definition with an equivalent index. */ +UNIV_INTERN +void +dict_table_replace_index_in_foreign_list( +/*=====================================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in: index to be replaced */ +/************************************************************************* +Checks if a index is defined for a foreign key constraint. Index is a part +of a foreign key constraint if the index is referenced by foreign key +or index is a foreign key index */ +UNIV_INTERN +dict_foreign_t* +dict_table_get_foreign_constraint( +/*==============================*/ + /* out: pointer to foreign key struct if index + is defined for foreign key, otherwise NULL */ + dict_table_t* table, /* in: InnoDB table */ + dict_index_t* index); /* in: InnoDB index */ +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ +UNIV_INTERN +ulint +dict_create_foreign_constraints( +/*============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + const char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES + table2(c, d), table2 can be written + also with the database + name before it: test.table2; the + default database id the database of + parameter name */ + const char* name, /* in: table full name in the + normalized form + database_name/table_name */ + ibool reject_fks); /* in: if TRUE, fail with error + code DB_CANNOT_ADD_CONSTRAINT if + any foreign keys are found. */ +/************************************************************************** +Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */ +UNIV_INTERN +ulint +dict_foreign_parse_drop_constraints( +/*================================*/ + /* out: DB_SUCCESS or + DB_CANNOT_DROP_CONSTRAINT if + syntax error or the constraint + id does not match */ + mem_heap_t* heap, /* in: heap from which we can + allocate memory */ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table */ + ulint* n, /* out: number of constraints + to drop */ + const char*** constraints_to_drop); /* out: id's of the + constraints to drop */ +/************************************************************************** +Returns a table object and optionally increment its MySQL open handle count. +NOTE! This is a high-level function to be used mainly from outside the +'dict' directory. Inside this directory dict_table_get_low is usually the +appropriate function. */ +UNIV_INTERN +dict_table_t* +dict_table_get( +/*===========*/ + /* out: table, NULL if + does not exist */ + const char* table_name, /* in: table name */ + ibool inc_mysql_count); + /* in: whether to increment the open + handle count on the table */ +/************************************************************************** +Returns a index object, based on table and index id, and memoryfixes it. */ +UNIV_INTERN +dict_index_t* +dict_index_get_on_id_low( +/*=====================*/ + /* out: index, NULL if does not + exist */ + dict_table_t* table, /* in: table */ + dulint index_id); /* in: index id */ +/************************************************************************** +Checks if a table is in the dictionary cache. */ + +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*=============================*/ + /* out: table, NULL if not found */ + const char* table_name); /* in: table name */ +/************************************************************************** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + /* out: table, NULL if not found */ + const char* table_name); /* in: table name */ +/************************************************************************** +Returns a table object based on table id. */ +UNIV_INLINE +dict_table_t* +dict_table_get_on_id_low( +/*=====================*/ + /* out: table, NULL if does not exist */ + dulint table_id); /* in: table id */ +/************************************************************************** +Find an index that is equivalent to the one passed in and is not marked +for deletion. */ +UNIV_INTERN +dict_index_t* +dict_foreign_find_equiv_index( +/*==========================*/ + /* out: index equivalent to + foreign->foreign_index, or NULL */ + dict_foreign_t* foreign);/* in: foreign key */ +/************************************************************************** +Returns an index object by matching on the name and column names and if +more than index is found return the index with the higher id.*/ +UNIV_INTERN +dict_index_t* +dict_table_get_index_by_max_id( +/*===========================*/ + /* out: matching index, NULL if not found */ + dict_table_t* table, /* in: table */ + const char* name, /* in: the index name to find */ + const char** columns,/* in: array of column names */ + ulint n_cols);/* in: number of columns */ +/************************************************************************** +Returns a column's name. */ + +const char* +dict_table_get_col_name( +/*====================*/ + /* out: column name. NOTE: not + guaranteed to stay valid if table is + modified in any way (columns added, + etc.). */ + const dict_table_t* table, /* in: table */ + ulint col_nr);/* in: column number */ + +/************************************************************************** +Prints a table definition. */ +UNIV_INTERN +void +dict_table_print( +/*=============*/ + dict_table_t* table); /* in: table */ +/************************************************************************** +Prints a table data. */ +UNIV_INTERN +void +dict_table_print_low( +/*=================*/ + dict_table_t* table); /* in: table */ +/************************************************************************** +Prints a table data when we know the table name. */ +UNIV_INTERN +void +dict_table_print_by_name( +/*=====================*/ + const char* name); +/************************************************************************** +Outputs info on foreign keys of a table. */ +UNIV_INTERN +void +dict_print_info_on_foreign_keys( +/*============================*/ + ibool create_table_format, /* in: if TRUE then print in + a format suitable to be inserted into + a CREATE TABLE, otherwise in the format + of SHOW TABLE STATUS */ + FILE* file, /* in: file where to print */ + trx_t* trx, /* in: transaction */ + dict_table_t* table); /* in: table */ +/************************************************************************** +Outputs info on a foreign key of a table in a format suitable for +CREATE TABLE. */ +UNIV_INTERN +void +dict_print_info_on_foreign_key_in_create_format( +/*============================================*/ + FILE* file, /* in: file where to print */ + trx_t* trx, /* in: transaction */ + dict_foreign_t* foreign, /* in: foreign key constraint */ + ibool add_newline); /* in: whether to add a newline */ +/************************************************************************ +Displays the names of the index and the table. */ +UNIV_INTERN +void +dict_index_name_print( +/*==================*/ + FILE* file, /* in: output stream */ + trx_t* trx, /* in: transaction */ + const dict_index_t* index); /* in: index to print */ +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the first index on the table (the clustered index). */ +UNIV_INLINE +dict_index_t* +dict_table_get_first_index( +/*=======================*/ + /* out: index, NULL if none exists */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Gets the next index on the table. */ +UNIV_INLINE +dict_index_t* +dict_table_get_next_index( +/*======================*/ + /* out: index, NULL if none left */ + const dict_index_t* index); /* in: index */ +#else /* UNIV_DEBUG */ +# define dict_table_get_first_index(table) UT_LIST_GET_FIRST((table)->indexes) +# define dict_table_get_next_index(index) UT_LIST_GET_NEXT(indexes, index) +#endif /* UNIV_DEBUG */ +/************************************************************************ +Check whether the index is the clustered index. */ +UNIV_INLINE +ulint +dict_index_is_clust( +/*================*/ + /* out: nonzero for clustered index, + zero for other indexes */ + const dict_index_t* index) /* in: index */ + __attribute__((pure)); +/************************************************************************ +Check whether the index is unique. */ +UNIV_INLINE +ulint +dict_index_is_unique( +/*=================*/ + /* out: nonzero for unique index, + zero for other indexes */ + const dict_index_t* index) /* in: index */ + __attribute__((pure)); +/************************************************************************ +Check whether the index is the insert buffer tree. */ +UNIV_INLINE +ulint +dict_index_is_ibuf( +/*===============*/ + /* out: nonzero for insert buffer, + zero for other indexes */ + const dict_index_t* index) /* in: index */ + __attribute__((pure)); + +/************************************************************************ +Gets the number of user-defined columns in a table in the dictionary +cache. */ +UNIV_INLINE +ulint +dict_table_get_n_user_cols( +/*=======================*/ + /* out: number of user-defined + (e.g., not ROW_ID) + columns of a table */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Gets the number of system columns in a table in the dictionary cache. */ +UNIV_INLINE +ulint +dict_table_get_n_sys_cols( +/*======================*/ + /* out: number of system (e.g., + ROW_ID) columns of a table */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Gets the number of all columns (also system) in a table in the dictionary +cache. */ +UNIV_INLINE +ulint +dict_table_get_n_cols( +/*==================*/ + /* out: number of columns of a table */ + const dict_table_t* table); /* in: table */ +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the nth column of a table. */ +UNIV_INLINE +dict_col_t* +dict_table_get_nth_col( +/*===================*/ + /* out: pointer to column object */ + const dict_table_t* table, /* in: table */ + ulint pos); /* in: position of column */ +/************************************************************************ +Gets the given system column of a table. */ +UNIV_INLINE +dict_col_t* +dict_table_get_sys_col( +/*===================*/ + /* out: pointer to column object */ + const dict_table_t* table, /* in: table */ + ulint sys); /* in: DATA_ROW_ID, ... */ +#else /* UNIV_DEBUG */ +#define dict_table_get_nth_col(table, pos) \ +((table)->cols + (pos)) +#define dict_table_get_sys_col(table, sys) \ +((table)->cols + (table)->n_cols + (sys) - DATA_N_SYS_COLS) +#endif /* UNIV_DEBUG */ +/************************************************************************ +Gets the given system column number of a table. */ +UNIV_INLINE +ulint +dict_table_get_sys_col_no( +/*======================*/ + /* out: column number */ + const dict_table_t* table, /* in: table */ + ulint sys); /* in: DATA_ROW_ID, ... */ +/************************************************************************ +Returns the minimum data size of an index record. */ +UNIV_INLINE +ulint +dict_index_get_min_size( +/*====================*/ + /* out: minimum data size in bytes */ + const dict_index_t* index); /* in: index */ +/************************************************************************ +Check whether the table uses the compact page format. */ +UNIV_INLINE +ibool +dict_table_is_comp( +/*===============*/ + /* out: TRUE if table uses the + compact page format */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Determine the file format of a table. */ +UNIV_INLINE +ulint +dict_table_get_format( +/*==================*/ + /* out: file format version */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Set the file format of a table. */ +UNIV_INLINE +void +dict_table_set_format( +/*==================*/ + dict_table_t* table, /* in/out: table */ + ulint format);/* in: file format version */ +/************************************************************************ +Extract the compressed page size from table flags. */ +UNIV_INLINE +ulint +dict_table_flags_to_zip_size( +/*=========================*/ + /* out: compressed page size, + or 0 if not compressed */ + ulint flags) /* in: flags */ + __attribute__((const)); +/************************************************************************ +Check whether the table uses the compressed compact page format. */ +UNIV_INLINE +ulint +dict_table_zip_size( +/*================*/ + /* out: compressed page size, + or 0 if not compressed */ + const dict_table_t* table); /* in: table */ +/************************************************************************ +Checks if a column is in the ordering columns of the clustered index of a +table. Column prefixes are treated like whole columns. */ +UNIV_INTERN +ibool +dict_table_col_in_clustered_key( +/*============================*/ + /* out: TRUE if the column, or its + prefix, is in the clustered key */ + const dict_table_t* table, /* in: table */ + ulint n); /* in: column number */ +/*********************************************************************** +Copies types of columns contained in table to tuple and sets all +fields of the tuple to the SQL NULL value. This function should +be called right after dtuple_create(). */ +UNIV_INTERN +void +dict_table_copy_types( +/*==================*/ + dtuple_t* tuple, /* in/out: data tuple */ + const dict_table_t* table); /* in: table */ +/************************************************************************** +Looks for an index with the given id. NOTE that we do not reserve +the dictionary mutex: this function is for emergency purposes like +printing info of a corrupt database page! */ +UNIV_INTERN +dict_index_t* +dict_index_find_on_id_low( +/*======================*/ + /* out: index or NULL if not found from cache */ + dulint id); /* in: index id */ +/************************************************************************** +Adds an index to the dictionary cache. */ +UNIV_INTERN +ulint +dict_index_add_to_cache( +/*====================*/ + /* out: DB_SUCCESS or error code */ + dict_table_t* table, /* in: table on which the index is */ + dict_index_t* index, /* in, own: index; NOTE! The index memory + object is freed in this function! */ + ulint page_no,/* in: root page number of the index */ + ibool strict);/* in: TRUE=refuse to create the index + if records could be too big to fit in + an B-tree page */ +/************************************************************************** +Removes an index from the dictionary cache. */ +UNIV_INTERN +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in, own: index */ +/************************************************************************ +Gets the number of fields in the internal representation of an index, +including fields added by the dictionary system. */ +UNIV_INLINE +ulint +dict_index_get_n_fields( +/*====================*/ + /* out: number of fields */ + const dict_index_t* index); /* in: an internal + representation of index (in + the dictionary cache) */ +/************************************************************************ +Gets the number of fields in the internal representation of an index +that uniquely determine the position of an index entry in the index, if +we do not take multiversioning into account: in the B-tree use the value +returned by dict_index_get_n_unique_in_tree. */ +UNIV_INLINE +ulint +dict_index_get_n_unique( +/*====================*/ + /* out: number of fields */ + const dict_index_t* index); /* in: an internal representation + of index (in the dictionary cache) */ +/************************************************************************ +Gets the number of fields in the internal representation of an index +which uniquely determine the position of an index entry in the index, if +we also take multiversioning into account. */ +UNIV_INLINE +ulint +dict_index_get_n_unique_in_tree( +/*============================*/ + /* out: number of fields */ + const dict_index_t* index); /* in: an internal representation + of index (in the dictionary cache) */ +/************************************************************************ +Gets the number of user-defined ordering fields in the index. In the internal +representation we add the row id to the ordering fields to make all indexes +unique, but this function returns the number of fields the user defined +in the index as ordering fields. */ +UNIV_INLINE +ulint +dict_index_get_n_ordering_defined_by_user( +/*======================================*/ + /* out: number of fields */ + const dict_index_t* index); /* in: an internal representation + of index (in the dictionary cache) */ +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the nth field of an index. */ +UNIV_INLINE +dict_field_t* +dict_index_get_nth_field( +/*=====================*/ + /* out: pointer to field object */ + const dict_index_t* index, /* in: index */ + ulint pos); /* in: position of field */ +#else /* UNIV_DEBUG */ +# define dict_index_get_nth_field(index, pos) ((index)->fields + (pos)) +#endif /* UNIV_DEBUG */ +/************************************************************************ +Gets pointer to the nth column in an index. */ +UNIV_INLINE +const dict_col_t* +dict_index_get_nth_col( +/*===================*/ + /* out: column */ + const dict_index_t* index, /* in: index */ + ulint pos); /* in: position of the field */ +/************************************************************************ +Gets the column number of the nth field in an index. */ +UNIV_INLINE +ulint +dict_index_get_nth_col_no( +/*======================*/ + /* out: column number */ + const dict_index_t* index, /* in: index */ + ulint pos); /* in: position of the field */ +/************************************************************************ +Looks for column n in an index. */ +UNIV_INTERN +ulint +dict_index_get_nth_col_pos( +/*=======================*/ + /* out: position in internal + representation of the index; + if not contained, returns + ULINT_UNDEFINED */ + const dict_index_t* index, /* in: index */ + ulint n); /* in: column number */ +/************************************************************************ +Returns TRUE if the index contains a column or a prefix of that column. */ +UNIV_INTERN +ibool +dict_index_contains_col_or_prefix( +/*==============================*/ + /* out: TRUE if contains the column + or its prefix */ + const dict_index_t* index, /* in: index */ + ulint n); /* in: column number */ +/************************************************************************ +Looks for a matching field in an index. The column has to be the same. The +column in index must be complete, or must contain a prefix longer than the +column in index2. That is, we must be able to construct the prefix in index2 +from the prefix in index. */ +UNIV_INTERN +ulint +dict_index_get_nth_field_pos( +/*=========================*/ + /* out: position in internal + representation of the index; + if not contained, returns + ULINT_UNDEFINED */ + const dict_index_t* index, /* in: index from which to search */ + const dict_index_t* index2, /* in: index */ + ulint n); /* in: field number in index2 */ +/************************************************************************ +Looks for column n position in the clustered index. */ +UNIV_INTERN +ulint +dict_table_get_nth_col_pos( +/*=======================*/ + /* out: position in internal + representation of + the clustered index */ + const dict_table_t* table, /* in: table */ + ulint n); /* in: column number */ +/************************************************************************ +Returns the position of a system column in an index. */ +UNIV_INLINE +ulint +dict_index_get_sys_col_pos( +/*=======================*/ + /* out: position, + ULINT_UNDEFINED if not contained */ + const dict_index_t* index, /* in: index */ + ulint type); /* in: DATA_ROW_ID, ... */ +/*********************************************************************** +Adds a column to index. */ +UNIV_INTERN +void +dict_index_add_col( +/*===============*/ + dict_index_t* index, /* in/out: index */ + const dict_table_t* table, /* in: table */ + dict_col_t* col, /* in: column */ + ulint prefix_len); /* in: column prefix length */ +/*********************************************************************** +Copies types of fields contained in index to tuple. */ +UNIV_INTERN +void +dict_index_copy_types( +/*==================*/ + dtuple_t* tuple, /* in/out: data tuple */ + const dict_index_t* index, /* in: index */ + ulint n_fields); /* in: number of + field types to copy */ +/************************************************************************* +Gets the field column. */ +UNIV_INLINE +const dict_col_t* +dict_field_get_col( +/*===============*/ + const dict_field_t* field); + +/************************************************************************** +Returns an index object if it is found in the dictionary cache. +Assumes that dict_sys->mutex is already being held. */ +UNIV_INTERN +dict_index_t* +dict_index_get_if_in_cache_low( +/*===========================*/ + /* out: index, NULL if not found */ + dulint index_id); /* in: index id */ +#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG +/************************************************************************** +Returns an index object if it is found in the dictionary cache. */ +UNIV_INTERN +dict_index_t* +dict_index_get_if_in_cache( +/*=======================*/ + /* out: index, NULL if not found */ + dulint index_id); /* in: index id */ +#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */ +#ifdef UNIV_DEBUG +/************************************************************************** +Checks that a tuple has n_fields_cmp value in a sensible range, so that +no comparison can occur with the page number field in a node pointer. */ +UNIV_INTERN +ibool +dict_index_check_search_tuple( +/*==========================*/ + /* out: TRUE if ok */ + const dict_index_t* index, /* in: index tree */ + const dtuple_t* tuple); /* in: tuple used in a search */ +/************************************************************************** +Check for duplicate index entries in a table [using the index name] */ +UNIV_INTERN +void +dict_table_check_for_dup_indexes( +/*=============================*/ + const dict_table_t* table); /* in: Check for dup indexes + in this table */ + +#endif /* UNIV_DEBUG */ +/************************************************************************** +Builds a node pointer out of a physical record and a page number. */ +UNIV_INTERN +dtuple_t* +dict_index_build_node_ptr( +/*======================*/ + /* out, own: node pointer */ + const dict_index_t* index, /* in: index */ + const rec_t* rec, /* in: record for which to build node + pointer */ + ulint page_no,/* in: page number to put in node + pointer */ + mem_heap_t* heap, /* in: memory heap where pointer + created */ + ulint level); /* in: level of rec in tree: + 0 means leaf level */ +/************************************************************************** +Copies an initial segment of a physical record, long enough to specify an +index entry uniquely. */ +UNIV_INTERN +rec_t* +dict_index_copy_rec_order_prefix( +/*=============================*/ + /* out: pointer to the prefix record */ + const dict_index_t* index, /* in: index */ + const rec_t* rec, /* in: record for which to + copy prefix */ + ulint* n_fields,/* out: number of fields copied */ + byte** buf, /* in/out: memory buffer for the + copied prefix, or NULL */ + ulint* buf_size);/* in/out: buffer size */ +/************************************************************************** +Builds a typed data tuple out of a physical record. */ +UNIV_INTERN +dtuple_t* +dict_index_build_data_tuple( +/*========================*/ + /* out, own: data tuple */ + dict_index_t* index, /* in: index */ + rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ + mem_heap_t* heap); /* in: memory heap where tuple created */ +/************************************************************************* +Gets the space id of the root of the index tree. */ +UNIV_INLINE +ulint +dict_index_get_space( +/*=================*/ + /* out: space id */ + const dict_index_t* index); /* in: index */ +/************************************************************************* +Sets the space id of the root of the index tree. */ +UNIV_INLINE +void +dict_index_set_space( +/*=================*/ + dict_index_t* index, /* in/out: index */ + ulint space); /* in: space id */ +/************************************************************************* +Gets the page number of the root of the index tree. */ +UNIV_INLINE +ulint +dict_index_get_page( +/*================*/ + /* out: page number */ + const dict_index_t* tree); /* in: index */ +/************************************************************************* +Sets the page number of the root of index tree. */ +UNIV_INLINE +void +dict_index_set_page( +/*================*/ + dict_index_t* index, /* in/out: index */ + ulint page); /* in: page number */ +/************************************************************************* +Gets the read-write lock of the index tree. */ +UNIV_INLINE +rw_lock_t* +dict_index_get_lock( +/*================*/ + /* out: read-write lock */ + dict_index_t* index); /* in: index */ +/************************************************************************ +Returns free space reserved for future updates of records. This is +relevant only in the case of many consecutive inserts, as updates +which make the records bigger might fragment the index. */ +UNIV_INLINE +ulint +dict_index_get_space_reserve(void); +/*==============================*/ + /* out: number of free bytes on page, + reserved for updates */ +/************************************************************************* +Calculates the minimum record length in an index. */ +UNIV_INTERN +ulint +dict_index_calc_min_rec_len( +/*========================*/ + const dict_index_t* index); /* in: index */ +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ +UNIV_INTERN +void +dict_update_statistics_low( +/*=======================*/ + dict_table_t* table, /* in/out: table */ + ibool has_dict_mutex);/* in: TRUE if the caller has the + dictionary mutex */ +/************************************************************************* +Calculates new estimates for table and index statistics. The statistics +are used in query optimization. */ +UNIV_INTERN +void +dict_update_statistics( +/*===================*/ + dict_table_t* table); /* in/out: table */ +/************************************************************************ +Reserves the dictionary system mutex for MySQL. */ +UNIV_INTERN +void +dict_mutex_enter_for_mysql(void); +/*============================*/ +/************************************************************************ +Releases the dictionary system mutex for MySQL. */ +UNIV_INTERN +void +dict_mutex_exit_for_mysql(void); +/*===========================*/ +/************************************************************************ +Checks if the database name in two table names is the same. */ +UNIV_INTERN +ibool +dict_tables_have_same_db( +/*=====================*/ + /* out: TRUE if same db name */ + const char* name1, /* in: table name in the form + dbname '/' tablename */ + const char* name2); /* in: table name in the form + dbname '/' tablename */ +/************************************************************************* +Removes an index from the cache */ +UNIV_INTERN +void +dict_index_remove_from_cache( +/*=========================*/ + dict_table_t* table, /* in/out: table */ + dict_index_t* index); /* in, own: index */ +/************************************************************************** +Get index by name */ +UNIV_INTERN +dict_index_t* +dict_table_get_index_on_name( +/*=========================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name); /* in: name of the index to find */ +/************************************************************************** +In case there is more than one index with the same name return the index +with the min(id). */ +UNIV_INTERN +dict_index_t* +dict_table_get_index_on_name_and_min_id( +/*====================================*/ + /* out: index, NULL if does not exist */ + dict_table_t* table, /* in: table */ + const char* name); /* in: name of the index to find */ +/* Buffers for storing detailed information about the latest foreign key +and unique key errors */ +extern FILE* dict_foreign_err_file; +extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */ + +extern dict_sys_t* dict_sys; /* the dictionary system */ +extern rw_lock_t dict_operation_lock; + +/* Dictionary system struct */ +struct dict_sys_struct{ + mutex_t mutex; /* mutex protecting the data + dictionary; protects also the + disk-based dictionary system tables; + this mutex serializes CREATE TABLE + and DROP TABLE, as well as reading + the dictionary data for a table from + system tables */ + dulint row_id; /* the next row id to assign; + NOTE that at a checkpoint this + must be written to the dict system + header and flushed to a file; in + recovery this must be derived from + the log records */ + hash_table_t* table_hash; /* hash table of the tables, based + on name */ + hash_table_t* table_id_hash; /* hash table of the tables, based + on id */ + UT_LIST_BASE_NODE_T(dict_table_t) + table_LRU; /* LRU list of tables */ + ulint size; /* varying space in bytes occupied + by the data dictionary table and + index objects */ + dict_table_t* sys_tables; /* SYS_TABLES table */ + dict_table_t* sys_columns; /* SYS_COLUMNS table */ + dict_table_t* sys_indexes; /* SYS_INDEXES table */ + dict_table_t* sys_fields; /* SYS_FIELDS table */ +}; + +#ifndef UNIV_NONINL +#include "dict0dict.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0dict.ic b/storage/xtradb/include/dict0dict.ic new file mode 100644 index 00000000000..628d207b329 --- /dev/null +++ b/storage/xtradb/include/dict0dict.ic @@ -0,0 +1,785 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Data dictionary system + +Created 1/8/1996 Heikki Tuuri +***********************************************************************/ + +#include "dict0load.h" +#include "rem0types.h" +#include "data0type.h" + +/************************************************************************* +Gets the column data type. */ +UNIV_INLINE +void +dict_col_copy_type( +/*===============*/ + const dict_col_t* col, /* in: column */ + dtype_t* type) /* out: data type */ +{ + ut_ad(col && type); + + type->mtype = col->mtype; + type->prtype = col->prtype; + type->len = col->len; + type->mbminlen = col->mbminlen; + type->mbmaxlen = col->mbmaxlen; +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Assert that a column and a data type match. */ +UNIV_INLINE +ibool +dict_col_type_assert_equal( +/*=======================*/ + /* out: TRUE */ + const dict_col_t* col, /* in: column */ + const dtype_t* type) /* in: data type */ +{ + ut_ad(col); + ut_ad(type); + + ut_ad(col->mtype == type->mtype); + ut_ad(col->prtype == type->prtype); + ut_ad(col->len == type->len); + ut_ad(col->mbminlen == type->mbminlen); + ut_ad(col->mbmaxlen == type->mbmaxlen); + + return(TRUE); +} +#endif /* UNIV_DEBUG */ + +/*************************************************************************** +Returns the minimum size of the column. */ +UNIV_INLINE +ulint +dict_col_get_min_size( +/*==================*/ + /* out: minimum size */ + const dict_col_t* col) /* in: column */ +{ + return(dtype_get_min_size_low(col->mtype, col->prtype, col->len, + col->mbminlen, col->mbmaxlen)); +} +/*************************************************************************** +Returns the maximum size of the column. */ +UNIV_INLINE +ulint +dict_col_get_max_size( +/*==================*/ + /* out: maximum size */ + const dict_col_t* col) /* in: column */ +{ + return(dtype_get_max_size_low(col->mtype, col->len)); +} +/*************************************************************************** +Returns the size of a fixed size column, 0 if not a fixed size column. */ +UNIV_INLINE +ulint +dict_col_get_fixed_size( +/*====================*/ + /* out: fixed size, or 0 */ + const dict_col_t* col) /* in: column */ +{ + return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len, + col->mbminlen, col->mbmaxlen)); +} +/*************************************************************************** +Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. +For fixed length types it is the fixed length of the type, otherwise 0. */ +UNIV_INLINE +ulint +dict_col_get_sql_null_size( +/*=======================*/ + /* out: SQL null storage size + in ROW_FORMAT=REDUNDANT */ + const dict_col_t* col) /* in: column */ +{ + return(dict_col_get_fixed_size(col)); +} + +/************************************************************************* +Gets the column number. */ +UNIV_INLINE +ulint +dict_col_get_no( +/*============*/ + const dict_col_t* col) +{ + ut_ad(col); + + return(col->ind); +} + +/************************************************************************* +Gets the column position in the clustered index. */ +UNIV_INLINE +ulint +dict_col_get_clust_pos( +/*===================*/ + const dict_col_t* col, /* in: table column */ + const dict_index_t* clust_index) /* in: clustered index */ +{ + ulint i; + + ut_ad(col); + ut_ad(clust_index); + ut_ad(dict_index_is_clust(clust_index)); + + for (i = 0; i < clust_index->n_def; i++) { + const dict_field_t* field = &clust_index->fields[i]; + + if (!field->prefix_len && field->col == col) { + return(i); + } + } + + return(ULINT_UNDEFINED); +} + +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the first index on the table (the clustered index). */ +UNIV_INLINE +dict_index_t* +dict_table_get_first_index( +/*=======================*/ + /* out: index, NULL if none exists */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return(UT_LIST_GET_FIRST(((dict_table_t*) table)->indexes)); +} + +/************************************************************************ +Gets the next index on the table. */ +UNIV_INLINE +dict_index_t* +dict_table_get_next_index( +/*======================*/ + /* out: index, NULL if none left */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(UT_LIST_GET_NEXT(indexes, (dict_index_t*) index)); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************ +Check whether the index is the clustered index. */ +UNIV_INLINE +ulint +dict_index_is_clust( +/*================*/ + /* out: nonzero for clustered index, + zero for other indexes */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(UNIV_UNLIKELY(index->type & DICT_CLUSTERED)); +} +/************************************************************************ +Check whether the index is unique. */ +UNIV_INLINE +ulint +dict_index_is_unique( +/*=================*/ + /* out: nonzero for unique index, + zero for other indexes */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(UNIV_UNLIKELY(index->type & DICT_UNIQUE)); +} + +/************************************************************************ +Check whether the index is the insert buffer tree. */ +UNIV_INLINE +ulint +dict_index_is_ibuf( +/*===============*/ + /* out: nonzero for insert buffer, + zero for other indexes */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(UNIV_UNLIKELY(index->type & DICT_IBUF)); +} + +/************************************************************************ +Gets the number of user-defined columns in a table in the dictionary +cache. */ +UNIV_INLINE +ulint +dict_table_get_n_user_cols( +/*=======================*/ + /* out: number of user-defined + (e.g., not ROW_ID) + columns of a table */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return(table->n_cols - DATA_N_SYS_COLS); +} + +/************************************************************************ +Gets the number of system columns in a table in the dictionary cache. */ +UNIV_INLINE +ulint +dict_table_get_n_sys_cols( +/*======================*/ + /* out: number of system (e.g., + ROW_ID) columns of a table */ + const dict_table_t* table __attribute__((unused))) /* in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + ut_ad(table->cached); + + return(DATA_N_SYS_COLS); +} + +/************************************************************************ +Gets the number of all columns (also system) in a table in the dictionary +cache. */ +UNIV_INLINE +ulint +dict_table_get_n_cols( +/*==================*/ + /* out: number of columns of a table */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return(table->n_cols); +} + +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the nth column of a table. */ +UNIV_INLINE +dict_col_t* +dict_table_get_nth_col( +/*===================*/ + /* out: pointer to column object */ + const dict_table_t* table, /* in: table */ + ulint pos) /* in: position of column */ +{ + ut_ad(table); + ut_ad(pos < table->n_def); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return((dict_col_t*) (table->cols) + pos); +} + +/************************************************************************ +Gets the given system column of a table. */ +UNIV_INLINE +dict_col_t* +dict_table_get_sys_col( +/*===================*/ + /* out: pointer to column object */ + const dict_table_t* table, /* in: table */ + ulint sys) /* in: DATA_ROW_ID, ... */ +{ + dict_col_t* col; + + ut_ad(table); + ut_ad(sys < DATA_N_SYS_COLS); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + col = dict_table_get_nth_col(table, table->n_cols + - DATA_N_SYS_COLS + sys); + ut_ad(col->mtype == DATA_SYS); + ut_ad(col->prtype == (sys | DATA_NOT_NULL)); + + return(col); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************ +Gets the given system column number of a table. */ +UNIV_INLINE +ulint +dict_table_get_sys_col_no( +/*======================*/ + /* out: column number */ + const dict_table_t* table, /* in: table */ + ulint sys) /* in: DATA_ROW_ID, ... */ +{ + ut_ad(table); + ut_ad(sys < DATA_N_SYS_COLS); + ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); + + return(table->n_cols - DATA_N_SYS_COLS + sys); +} + +/************************************************************************ +Check whether the table uses the compact page format. */ +UNIV_INLINE +ibool +dict_table_is_comp( +/*===============*/ + /* out: TRUE if table uses the + compact page format */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + +#if DICT_TF_COMPACT != TRUE +#error +#endif + + return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT)); +} + +/************************************************************************ +Determine the file format of a table. */ +UNIV_INLINE +ulint +dict_table_get_format( +/*==================*/ + /* out: file format version */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + + return((table->flags & DICT_TF_FORMAT_MASK) >> DICT_TF_FORMAT_SHIFT); +} + +/************************************************************************ +Determine the file format of a table. */ +UNIV_INLINE +void +dict_table_set_format( +/*==================*/ + dict_table_t* table, /* in/out: table */ + ulint format) /* in: file format version */ +{ + ut_ad(table); + + table->flags = (table->flags & ~DICT_TF_FORMAT_MASK) + | (format << DICT_TF_FORMAT_SHIFT); +} + +/************************************************************************ +Extract the compressed page size from table flags. */ +UNIV_INLINE +ulint +dict_table_flags_to_zip_size( +/*=========================*/ + /* out: compressed page size, + or 0 if not compressed */ + ulint flags) /* in: flags */ +{ + ulint zip_size = flags & DICT_TF_ZSSIZE_MASK; + + if (UNIV_UNLIKELY(zip_size)) { + zip_size = ((PAGE_ZIP_MIN_SIZE >> 1) + << (zip_size >> DICT_TF_ZSSIZE_SHIFT)); + + ut_ad(zip_size <= UNIV_PAGE_SIZE); + } + + return(zip_size); +} + +/************************************************************************ +Check whether the table uses the compressed compact page format. */ +UNIV_INLINE +ulint +dict_table_zip_size( +/*================*/ + /* out: compressed page size, + or 0 if not compressed */ + const dict_table_t* table) /* in: table */ +{ + ut_ad(table); + + return(dict_table_flags_to_zip_size(table->flags)); +} + +/************************************************************************ +Gets the number of fields in the internal representation of an index, +including fields added by the dictionary system. */ +UNIV_INLINE +ulint +dict_index_get_n_fields( +/*====================*/ + /* out: number of fields */ + const dict_index_t* index) /* in: an internal + representation of index (in + the dictionary cache) */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(index->n_fields); +} + +/************************************************************************ +Gets the number of fields in the internal representation of an index +that uniquely determine the position of an index entry in the index, if +we do not take multiversioning into account: in the B-tree use the value +returned by dict_index_get_n_unique_in_tree. */ +UNIV_INLINE +ulint +dict_index_get_n_unique( +/*====================*/ + /* out: number of fields */ + const dict_index_t* index) /* in: an internal representation + of index (in the dictionary cache) */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(index->cached); + + return(index->n_uniq); +} + +/************************************************************************ +Gets the number of fields in the internal representation of an index +which uniquely determine the position of an index entry in the index, if +we also take multiversioning into account. */ +UNIV_INLINE +ulint +dict_index_get_n_unique_in_tree( +/*============================*/ + /* out: number of fields */ + const dict_index_t* index) /* in: an internal representation + of index (in the dictionary cache) */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(index->cached); + + if (dict_index_is_clust(index)) { + + return(dict_index_get_n_unique(index)); + } + + return(dict_index_get_n_fields(index)); +} + +/************************************************************************ +Gets the number of user-defined ordering fields in the index. In the internal +representation of clustered indexes we add the row id to the ordering fields +to make a clustered index unique, but this function returns the number of +fields the user defined in the index as ordering fields. */ +UNIV_INLINE +ulint +dict_index_get_n_ordering_defined_by_user( +/*======================================*/ + /* out: number of fields */ + const dict_index_t* index) /* in: an internal representation + of index (in the dictionary cache) */ +{ + return(index->n_user_defined_cols); +} + +#ifdef UNIV_DEBUG +/************************************************************************ +Gets the nth field of an index. */ +UNIV_INLINE +dict_field_t* +dict_index_get_nth_field( +/*=====================*/ + /* out: pointer to field object */ + const dict_index_t* index, /* in: index */ + ulint pos) /* in: position of field */ +{ + ut_ad(index); + ut_ad(pos < index->n_def); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return((dict_field_t*) (index->fields) + pos); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************ +Returns the position of a system column in an index. */ +UNIV_INLINE +ulint +dict_index_get_sys_col_pos( +/*=======================*/ + /* out: position, + ULINT_UNDEFINED if not contained */ + const dict_index_t* index, /* in: index */ + ulint type) /* in: DATA_ROW_ID, ... */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + ut_ad(!(index->type & DICT_UNIVERSAL)); + + if (dict_index_is_clust(index)) { + + return(dict_col_get_clust_pos( + dict_table_get_sys_col(index->table, type), + index)); + } + + return(dict_index_get_nth_col_pos( + index, dict_table_get_sys_col_no(index->table, type))); +} + +/************************************************************************* +Gets the field column. */ +UNIV_INLINE +const dict_col_t* +dict_field_get_col( +/*===============*/ + const dict_field_t* field) +{ + ut_ad(field); + + return(field->col); +} + +/************************************************************************ +Gets pointer to the nth column in an index. */ +UNIV_INLINE +const dict_col_t* +dict_index_get_nth_col( +/*===================*/ + /* out: column */ + const dict_index_t* index, /* in: index */ + ulint pos) /* in: position of the field */ +{ + return(dict_field_get_col(dict_index_get_nth_field(index, pos))); +} + +/************************************************************************ +Gets the column number the nth field in an index. */ +UNIV_INLINE +ulint +dict_index_get_nth_col_no( +/*======================*/ + /* out: column number */ + const dict_index_t* index, /* in: index */ + ulint pos) /* in: position of the field */ +{ + return(dict_col_get_no(dict_index_get_nth_col(index, pos))); +} + +/************************************************************************ +Returns the minimum data size of an index record. */ +UNIV_INLINE +ulint +dict_index_get_min_size( +/*====================*/ + /* out: minimum data size in bytes */ + const dict_index_t* index) /* in: index */ +{ + ulint n = dict_index_get_n_fields(index); + ulint size = 0; + + while (n--) { + size += dict_col_get_min_size(dict_index_get_nth_col(index, + n)); + } + + return(size); +} + +/************************************************************************* +Gets the space id of the root of the index tree. */ +UNIV_INLINE +ulint +dict_index_get_space( +/*=================*/ + /* out: space id */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(index->space); +} + +/************************************************************************* +Sets the space id of the root of the index tree. */ +UNIV_INLINE +void +dict_index_set_space( +/*=================*/ + dict_index_t* index, /* in/out: index */ + ulint space) /* in: space id */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + index->space = space; +} + +/************************************************************************* +Gets the page number of the root of the index tree. */ +UNIV_INLINE +ulint +dict_index_get_page( +/*================*/ + /* out: page number */ + const dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(index->page); +} + +/************************************************************************* +Sets the page number of the root of index tree. */ +UNIV_INLINE +void +dict_index_set_page( +/*================*/ + dict_index_t* index, /* in/out: index */ + ulint page) /* in: page number */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + index->page = page; +} + +/************************************************************************* +Gets the read-write lock of the index tree. */ +UNIV_INLINE +rw_lock_t* +dict_index_get_lock( +/*================*/ + /* out: read-write lock */ + dict_index_t* index) /* in: index */ +{ + ut_ad(index); + ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); + + return(&(index->lock)); +} + +/************************************************************************ +Returns free space reserved for future updates of records. This is +relevant only in the case of many consecutive inserts, as updates +which make the records bigger might fragment the index. */ +UNIV_INLINE +ulint +dict_index_get_space_reserve(void) +/*==============================*/ + /* out: number of free bytes on page, + reserved for updates */ +{ + return(UNIV_PAGE_SIZE / 16); +} + +/************************************************************************** +Checks if a table is in the dictionary cache. */ +UNIV_INLINE +dict_table_t* +dict_table_check_if_in_cache_low( +/*=============================*/ + /* out: table, NULL if not found */ + const char* table_name) /* in: table name */ +{ + dict_table_t* table; + ulint table_fold; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* Look for the table name in the hash table */ + table_fold = ut_fold_string(table_name); + + HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, + dict_table_t*, table, ut_ad(table->cached), + !strcmp(table->name, table_name)); + return(table); +} + +/************************************************************************** +Gets a table; loads it to the dictionary cache if necessary. A low-level +function. */ +UNIV_INLINE +dict_table_t* +dict_table_get_low( +/*===============*/ + /* out: table, NULL if not found */ + const char* table_name) /* in: table name */ +{ + dict_table_t* table; + + ut_ad(table_name); + ut_ad(mutex_own(&(dict_sys->mutex))); + + table = dict_table_check_if_in_cache_low(table_name); + + if (table == NULL) { + table = dict_load_table(table_name); + } + + ut_ad(!table || table->cached); + + return(table); +} + +/************************************************************************** +Returns a table object based on table id. */ +UNIV_INLINE +dict_table_t* +dict_table_get_on_id_low( +/*=====================*/ + /* out: table, NULL if does not exist */ + dulint table_id) /* in: table id */ +{ + dict_table_t* table; + ulint fold; + + ut_ad(mutex_own(&(dict_sys->mutex))); + + /* Look for the table name in the hash table */ + fold = ut_fold_dulint(table_id); + + HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, + dict_table_t*, table, ut_ad(table->cached), + !ut_dulint_cmp(table->id, table_id)); + if (table == NULL) { + table = dict_load_table_on_id(table_id); + } + + ut_ad(!table || table->cached); + + /* TODO: should get the type information from MySQL */ + + return(table); +} + diff --git a/storage/xtradb/include/dict0load.h b/storage/xtradb/include/dict0load.h new file mode 100644 index 00000000000..759cbcdb14a --- /dev/null +++ b/storage/xtradb/include/dict0load.h @@ -0,0 +1,119 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Loads to the memory cache database object definitions +from dictionary tables + +Created 4/24/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0load_h +#define dict0load_h + +#include "univ.i" +#include "dict0types.h" +#include "ut0byte.h" +#include "mem0mem.h" + +/************************************************************************ +In a crash recovery we already have all the tablespace objects created. +This function compares the space id information in the InnoDB data dictionary +to what we already read with fil_load_single_table_tablespaces(). + +In a normal startup, we create the tablespace objects for every table in +InnoDB's data dictionary, if the corresponding .ibd file exists. +We also scan the biggest space id, and store it to fil_system. */ +UNIV_INTERN +void +dict_check_tablespaces_and_store_max_id( +/*====================================*/ + ibool in_crash_recovery); /* in: are we doing a crash recovery */ +/************************************************************************ +Finds the first table name in the given database. */ +UNIV_INTERN +char* +dict_get_first_table_name_in_db( +/*============================*/ + /* out, own: table name, NULL if + does not exist; the caller must free + the memory in the string! */ + const char* name); /* in: database name which ends to '/' */ +/************************************************************************ +Loads a table definition and also all its index definitions, and also +the cluster definition if the table is a member in a cluster. Also loads +all foreign key constraints where the foreign key is in the table or where +a foreign key references columns in this table. */ +UNIV_INTERN +dict_table_t* +dict_load_table( +/*============*/ + /* out: table, NULL if does not exist; + if the table is stored in an .ibd file, + but the file does not exist, + then we set the ibd_file_missing flag TRUE + in the table object we return */ + const char* name); /* in: table name in the + databasename/tablename format */ +/*************************************************************************** +Loads a table object based on the table id. */ +UNIV_INTERN +dict_table_t* +dict_load_table_on_id( +/*==================*/ + /* out: table; NULL if table does not exist */ + dulint table_id); /* in: table id */ +/************************************************************************ +This function is called when the database is booted. +Loads system table index definitions except for the clustered index which +is added to the dictionary cache at booting before calling this function. */ +UNIV_INTERN +void +dict_load_sys_table( +/*================*/ + dict_table_t* table); /* in: system table */ +#ifndef UNIV_HOTBACKUP +/*************************************************************************** +Loads foreign key constraints where the table is either the foreign key +holder or where the table is referenced by a foreign key. Adds these +constraints to the data dictionary. Note that we know that the dictionary +cache already contains all constraints where the other relevant table is +already in the dictionary cache. */ +UNIV_INTERN +ulint +dict_load_foreigns( +/*===============*/ + /* out: DB_SUCCESS or error code */ + const char* table_name, /* in: table name */ + ibool check_charsets);/* in: TRUE=check charsets + compatibility */ +#endif /* !UNIV_HOTBACKUP */ +/************************************************************************ +Prints to the standard output information on all tables found in the data +dictionary system table. */ +UNIV_INTERN +void +dict_print(void); +/*============*/ + + +#ifndef UNIV_NONINL +#include "dict0load.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0load.ic b/storage/xtradb/include/dict0load.ic new file mode 100644 index 00000000000..72eac2f621a --- /dev/null +++ b/storage/xtradb/include/dict0load.ic @@ -0,0 +1,25 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Loads to the memory cache database object definitions +from dictionary tables + +Created 4/24/1996 Heikki Tuuri +*******************************************************/ + diff --git a/storage/xtradb/include/dict0mem.h b/storage/xtradb/include/dict0mem.h new file mode 100644 index 00000000000..e2b3cfa3679 --- /dev/null +++ b/storage/xtradb/include/dict0mem.h @@ -0,0 +1,501 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data dictionary memory object creation + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0mem_h +#define dict0mem_h + +#include "univ.i" +#include "dict0types.h" +#include "data0type.h" +#include "mem0mem.h" +#include "rem0types.h" +#include "btr0types.h" +#include "ut0mem.h" +#include "ut0lst.h" +#include "ut0rnd.h" +#include "ut0byte.h" +#include "sync0rw.h" +#include "lock0types.h" +#include "hash0hash.h" +#include "que0types.h" +#include "trx0types.h" + +/* Type flags of an index: OR'ing of the flags is allowed to define a +combination of types */ +#define DICT_CLUSTERED 1 /* clustered index */ +#define DICT_UNIQUE 2 /* unique index */ +#define DICT_UNIVERSAL 4 /* index which can contain records from any + other index */ +#define DICT_IBUF 8 /* insert buffer tree */ + +/* Types for a table object */ +#define DICT_TABLE_ORDINARY 1 +#if 0 /* not implemented */ +#define DICT_TABLE_CLUSTER_MEMBER 2 +#define DICT_TABLE_CLUSTER 3 /* this means that the table is + really a cluster definition */ +#endif + +/* Table flags. All unused bits must be 0. */ +#define DICT_TF_COMPACT 1 /* Compact page format. + This must be set for + new file formats + (later than + DICT_TF_FORMAT_51). */ + +/* compressed page size (0=uncompressed, up to 15 compressed sizes) */ +#define DICT_TF_ZSSIZE_SHIFT 1 +#define DICT_TF_ZSSIZE_MASK (15 << DICT_TF_ZSSIZE_SHIFT) +#define DICT_TF_ZSSIZE_MAX (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 1) + + +#define DICT_TF_FORMAT_SHIFT 5 /* file format */ +#define DICT_TF_FORMAT_MASK (127 << DICT_TF_FORMAT_SHIFT) +#define DICT_TF_FORMAT_51 0 /* InnoDB/MySQL up to 5.1 */ +#define DICT_TF_FORMAT_ZIP 1 /* InnoDB plugin for 5.1: + compressed tables, + new BLOB treatment */ +#define DICT_TF_FORMAT_MAX DICT_TF_FORMAT_ZIP + +#define DICT_TF_BITS 6 /* number of flag bits */ +#if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX +# error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX" +#endif + +/************************************************************************** +Creates a table memory object. */ +UNIV_INTERN +dict_table_t* +dict_mem_table_create( +/*==================*/ + /* out, own: table object */ + const char* name, /* in: table name */ + ulint space, /* in: space where the clustered index + of the table is placed; this parameter + is ignored if the table is made + a member of a cluster */ + ulint n_cols, /* in: number of columns */ + ulint flags); /* in: table flags */ +/******************************************************************** +Free a table memory object. */ +UNIV_INTERN +void +dict_mem_table_free( +/*================*/ + dict_table_t* table); /* in: table */ +/************************************************************************** +Adds a column definition to a table. */ +UNIV_INTERN +void +dict_mem_table_add_col( +/*===================*/ + dict_table_t* table, /* in: table */ + mem_heap_t* heap, /* in: temporary memory heap, or NULL */ + const char* name, /* in: column name, or NULL */ + ulint mtype, /* in: main datatype */ + ulint prtype, /* in: precise type */ + ulint len); /* in: precision */ +/************************************************************************** +Creates an index memory object. */ +UNIV_INTERN +dict_index_t* +dict_mem_index_create( +/*==================*/ + /* out, own: index object */ + const char* table_name, /* in: table name */ + const char* index_name, /* in: index name */ + ulint space, /* in: space where the index tree is + placed, ignored if the index is of + the clustered type */ + ulint type, /* in: DICT_UNIQUE, + DICT_CLUSTERED, ... ORed */ + ulint n_fields); /* in: number of fields */ +/************************************************************************** +Adds a field definition to an index. NOTE: does not take a copy +of the column name if the field is a column. The memory occupied +by the column name may be released only after publishing the index. */ +UNIV_INTERN +void +dict_mem_index_add_field( +/*=====================*/ + dict_index_t* index, /* in: index */ + const char* name, /* in: column name */ + ulint prefix_len); /* in: 0 or the column prefix length + in a MySQL index like + INDEX (textcol(25)) */ +/************************************************************************** +Frees an index memory object. */ +UNIV_INTERN +void +dict_mem_index_free( +/*================*/ + dict_index_t* index); /* in: index */ +/************************************************************************** +Creates and initializes a foreign constraint memory object. */ +UNIV_INTERN +dict_foreign_t* +dict_mem_foreign_create(void); +/*=========================*/ + /* out, own: foreign constraint struct */ + +/* Data structure for a column in a table */ +struct dict_col_struct{ + /*----------------------*/ + /* The following are copied from dtype_t, + so that all bit-fields can be packed tightly. */ + unsigned mtype:8; /* main data type */ + unsigned prtype:24; /* precise type; MySQL data + type, charset code, flags to + indicate nullability, + signedness, whether this is a + binary string, whether this is + a true VARCHAR where MySQL + uses 2 bytes to store the length */ + + /* the remaining fields do not affect alphabetical ordering: */ + + unsigned len:16; /* length; for MySQL data this + is field->pack_length(), + except that for a >= 5.0.3 + type true VARCHAR this is the + maximum byte length of the + string data (in addition to + the string, MySQL uses 1 or 2 + bytes to store the string length) */ + + unsigned mbminlen:2; /* minimum length of a + character, in bytes */ + unsigned mbmaxlen:3; /* maximum length of a + character, in bytes */ + /*----------------------*/ + /* End of definitions copied from dtype_t */ + + unsigned ind:10; /* table column position + (starting from 0) */ + unsigned ord_part:1; /* nonzero if this column + appears in the ordering fields + of an index */ +}; + +/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum +indexed column length (or indexed prefix length). It is set to 3*256, +so that one can create a column prefix index on 256 characters of a +TEXT or VARCHAR column also in the UTF-8 charset. In that charset, +a character may take at most 3 bytes. +This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data +files would be at risk! */ + +#define DICT_MAX_INDEX_COL_LEN REC_MAX_INDEX_COL_LEN + +/* Data structure for a field in an index */ +struct dict_field_struct{ + dict_col_t* col; /* pointer to the table column */ + const char* name; /* name of the column */ + unsigned prefix_len:10; /* 0 or the length of the column + prefix in bytes in a MySQL index of + type, e.g., INDEX (textcol(25)); + must be smaller than + DICT_MAX_INDEX_COL_LEN; NOTE that + in the UTF-8 charset, MySQL sets this + to 3 * the prefix len in UTF-8 chars */ + unsigned fixed_len:10; /* 0 or the fixed length of the + column if smaller than + DICT_MAX_INDEX_COL_LEN */ +}; + +/* Data structure for an index. Most fields will be +initialized to 0, NULL or FALSE in dict_mem_index_create(). */ +struct dict_index_struct{ + dulint id; /* id of the index */ + mem_heap_t* heap; /* memory heap */ + const char* name; /* index name */ + const char* table_name; /* table name */ + dict_table_t* table; /* back pointer to table */ + unsigned space:32; + /* space where the index tree is placed */ + unsigned page:32;/* index tree root page number */ + unsigned type:4; /* index type (DICT_CLUSTERED, DICT_UNIQUE, + DICT_UNIVERSAL, DICT_IBUF) */ + unsigned trx_id_offset:10;/* position of the trx id column + in a clustered index record, if the fields + before it are known to be of a fixed size, + 0 otherwise */ + unsigned n_user_defined_cols:10; + /* number of columns the user defined to + be in the index: in the internal + representation we add more columns */ + unsigned n_uniq:10;/* number of fields from the beginning + which are enough to determine an index + entry uniquely */ + unsigned n_def:10;/* number of fields defined so far */ + unsigned n_fields:10;/* number of fields in the index */ + unsigned n_nullable:10;/* number of nullable fields */ + unsigned cached:1;/* TRUE if the index object is in the + dictionary cache */ + unsigned to_be_dropped:1; + /* TRUE if this index is marked to be + dropped in ha_innobase::prepare_drop_index(), + otherwise FALSE */ + dict_field_t* fields; /* array of field descriptions */ + UT_LIST_NODE_T(dict_index_t) + indexes;/* list of indexes of the table */ + btr_search_t* search_info; /* info used in optimistic searches */ + /*----------------------*/ + ib_int64_t* stat_n_diff_key_vals; + /* approximate number of different key values + for this index, for each n-column prefix + where n <= dict_get_n_unique(index); we + periodically calculate new estimates */ + ulint stat_index_size; + /* approximate index size in database pages */ + ulint stat_n_leaf_pages; + /* approximate number of leaf pages in the + index tree */ + rw_lock_t lock; /* read-write lock protecting the upper levels + of the index tree */ +#ifdef ROW_MERGE_IS_INDEX_USABLE + dulint trx_id; /* id of the transaction that created this + index, or ut_dulint_zero if the index existed + when InnoDB was started up */ +#endif /* ROW_MERGE_IS_INDEX_USABLE */ +#ifdef UNIV_DEBUG + ulint magic_n;/* magic number */ +# define DICT_INDEX_MAGIC_N 76789786 +#endif +}; + +/* Data structure for a foreign key constraint; an example: +FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D). Most fields will be +initialized to 0, NULL or FALSE in dict_mem_foreign_create(). */ + +struct dict_foreign_struct{ + mem_heap_t* heap; /* this object is allocated from + this memory heap */ + char* id; /* id of the constraint as a + null-terminated string */ + unsigned n_fields:10; /* number of indexes' first fields + for which the the foreign key + constraint is defined: we allow the + indexes to contain more fields than + mentioned in the constraint, as long + as the first fields are as mentioned */ + unsigned type:6; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE + or DICT_FOREIGN_ON_DELETE_SET_NULL */ + char* foreign_table_name;/* foreign table name */ + dict_table_t* foreign_table; /* table where the foreign key is */ + const char** foreign_col_names;/* names of the columns in the + foreign key */ + char* referenced_table_name;/* referenced table name */ + dict_table_t* referenced_table;/* table where the referenced key + is */ + const char** referenced_col_names;/* names of the referenced + columns in the referenced table */ + dict_index_t* foreign_index; /* foreign index; we require that + both tables contain explicitly defined + indexes for the constraint: InnoDB + does not generate new indexes + implicitly */ + dict_index_t* referenced_index;/* referenced index */ + UT_LIST_NODE_T(dict_foreign_t) + foreign_list; /* list node for foreign keys of the + table */ + UT_LIST_NODE_T(dict_foreign_t) + referenced_list;/* list node for referenced keys of the + table */ +}; + +/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that +a foreign key constraint is enforced, therefore RESTRICT just means no flag */ +#define DICT_FOREIGN_ON_DELETE_CASCADE 1 +#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 +#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 +#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 +#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 +#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 + + +/* Data structure for a database table. Most fields will be +initialized to 0, NULL or FALSE in dict_mem_table_create(). */ +struct dict_table_struct{ + dulint id; /* id of the table */ + mem_heap_t* heap; /* memory heap */ + const char* name; /* table name */ + const char* dir_path_of_temp_table;/* NULL or the directory path + where a TEMPORARY table that was explicitly + created by a user should be placed if + innodb_file_per_table is defined in my.cnf; + in Unix this is usually /tmp/..., in Windows + \temp\... */ + unsigned space:32; + /* space where the clustered index of the + table is placed */ + unsigned flags:DICT_TF_BITS;/* DICT_TF_COMPACT, ... */ + unsigned ibd_file_missing:1; + /* TRUE if this is in a single-table + tablespace and the .ibd file is missing; then + we must return in ha_innodb.cc an error if the + user tries to query such an orphaned table */ + unsigned tablespace_discarded:1; + /* this flag is set TRUE when the user + calls DISCARD TABLESPACE on this + table, and reset to FALSE in IMPORT + TABLESPACE */ + unsigned cached:1;/* TRUE if the table object has been added + to the dictionary cache */ + unsigned n_def:10;/* number of columns defined so far */ + unsigned n_cols:10;/* number of columns */ + dict_col_t* cols; /* array of column descriptions */ + const char* col_names; + /* Column names packed in a character string + "name1\0name2\0...nameN\0". Until + the string contains n_cols, it will be + allocated from a temporary heap. The final + string will be allocated from table->heap. */ + hash_node_t name_hash; /* hash chain node */ + hash_node_t id_hash; /* hash chain node */ + UT_LIST_BASE_NODE_T(dict_index_t) + indexes; /* list of indexes of the table */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + foreign_list;/* list of foreign key constraints + in the table; these refer to columns + in other tables */ + UT_LIST_BASE_NODE_T(dict_foreign_t) + referenced_list;/* list of foreign key constraints + which refer to this table */ + UT_LIST_NODE_T(dict_table_t) + table_LRU; /* node of the LRU list of tables */ + ulint n_mysql_handles_opened; + /* count of how many handles MySQL has opened + to this table; dropping of the table is + NOT allowed until this count gets to zero; + MySQL does NOT itself check the number of + open handles at drop */ + ulint n_foreign_key_checks_running; + /* count of how many foreign key check + operations are currently being performed + on the table: we cannot drop the table while + there are foreign key checks running on + it! */ + dulint query_cache_inv_trx_id; + /* transactions whose trx id < than this + number are not allowed to store to the MySQL + query cache or retrieve from it; when a trx + with undo logs commits, it sets this to the + value of the trx id counter for the tables it + had an IX lock on */ + UT_LIST_BASE_NODE_T(lock_t) + locks; /* list of locks on the table */ +#ifdef UNIV_DEBUG + /*----------------------*/ + ibool does_not_fit_in_memory; + /* this field is used to specify in simulations + tables which are so big that disk should be + accessed: disk access is simulated by + putting the thread to sleep for a while; + NOTE that this flag is not stored to the data + dictionary on disk, and the database will + forget about value TRUE if it has to reload + the table definition from disk */ +#endif /* UNIV_DEBUG */ + /*----------------------*/ + unsigned big_rows:1; + /* flag: TRUE if the maximum length of + a single row exceeds BIG_ROW_SIZE; + initialized in dict_table_add_to_cache() */ + unsigned stat_initialized:1; /* TRUE if statistics have + been calculated the first time + after database startup or table creation */ + ib_int64_t stat_n_rows; + /* approximate number of rows in the table; + we periodically calculate new estimates */ + ulint stat_clustered_index_size; + /* approximate clustered index size in + database pages */ + ulint stat_sum_of_other_index_sizes; + /* other indexes in database pages */ + ulint stat_modified_counter; + /* when a row is inserted, updated, or deleted, + we add 1 to this number; we calculate new + estimates for the stat_... values for the + table and the indexes at an interval of 2 GB + or when about 1 / 16 of table has been + modified; also when the estimate operation is + called for MySQL SHOW TABLE STATUS; the + counter is reset to zero at statistics + calculation; this counter is not protected by + any latch, because this is only used for + heuristics */ + /*----------------------*/ + /* The following fields are used by the + AUTOINC code. The actual collection of + tables locked during AUTOINC read/write is + kept in trx_t. In order to quickly determine + whether a transaction has locked the AUTOINC + lock we keep a pointer to the transaction + here in the autoinc_trx variable. This is to + avoid acquiring the kernel mutex and scanning + the vector in trx_t. + + When an AUTOINC lock has to wait, the + corresponding lock instance is created on + the trx lock heap rather than use the + pre-allocated instance in autoinc_lock below.*/ + lock_t* autoinc_lock; + /* a buffer for an AUTOINC lock + for this table: we allocate the memory here + so that individual transactions can get it + and release it without a need to allocate + space from the lock heap of the trx: + otherwise the lock heap would grow rapidly + if we do a large insert from a select */ + mutex_t autoinc_mutex; + /* mutex protecting the autoincrement + counter */ + ib_uint64_t autoinc;/* autoinc counter value to give to the + next inserted row */ + ulong n_waiting_or_granted_auto_inc_locks; + /* This counter is used to track the number + of granted and pending autoinc locks on this + table. This value is set after acquiring the + kernel mutex but we peek the contents to + determine whether other transactions have + acquired the AUTOINC lock or not. Of course + only one transaction can be granted the + lock but there can be multiple waiters. */ + const trx_t* autoinc_trx; + /* The transaction that currently holds the + the AUTOINC lock on this table. */ + /*----------------------*/ + +#ifdef UNIV_DEBUG + ulint magic_n;/* magic number */ +# define DICT_TABLE_MAGIC_N 76333786 +#endif /* UNIV_DEBUG */ +}; + +#ifndef UNIV_NONINL +#include "dict0mem.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dict0mem.ic b/storage/xtradb/include/dict0mem.ic new file mode 100644 index 00000000000..6916393a9cd --- /dev/null +++ b/storage/xtradb/include/dict0mem.ic @@ -0,0 +1,25 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Data dictionary memory object creation + +Created 1/8/1996 Heikki Tuuri +***********************************************************************/ + + diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h new file mode 100644 index 00000000000..b93e995e01b --- /dev/null +++ b/storage/xtradb/include/dict0types.h @@ -0,0 +1,45 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Data dictionary global types + +Created 1/8/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dict0types_h +#define dict0types_h + +#include "ut0list.h" + +typedef struct dict_sys_struct dict_sys_t; +typedef struct dict_col_struct dict_col_t; +typedef struct dict_field_struct dict_field_t; +typedef struct dict_index_struct dict_index_t; +typedef struct dict_table_struct dict_table_t; +typedef struct dict_foreign_struct dict_foreign_t; + +/* A cluster object is a table object with the type field set to +DICT_CLUSTERED */ + +typedef dict_table_t dict_cluster_t; + +typedef struct ind_node_struct ind_node_t; +typedef struct tab_node_struct tab_node_t; + +#endif diff --git a/storage/xtradb/include/dyn0dyn.h b/storage/xtradb/include/dyn0dyn.h new file mode 100644 index 00000000000..c06d6b88d2f --- /dev/null +++ b/storage/xtradb/include/dyn0dyn.h @@ -0,0 +1,182 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The dynamically allocated array + +Created 2/5/1996 Heikki Tuuri +*******************************************************/ + +#ifndef dyn0dyn_h +#define dyn0dyn_h + +#include "univ.i" +#include "ut0lst.h" +#include "mem0mem.h" + +typedef struct dyn_block_struct dyn_block_t; +typedef dyn_block_t dyn_array_t; + + +/* This is the initial 'payload' size of a dynamic array; +this must be > MLOG_BUF_MARGIN + 30! */ +#define DYN_ARRAY_DATA_SIZE 512 + +/************************************************************************* +Initializes a dynamic array. */ +UNIV_INLINE +dyn_array_t* +dyn_array_create( +/*=============*/ + /* out: initialized dyn array */ + dyn_array_t* arr); /* in: pointer to a memory buffer of + size sizeof(dyn_array_t) */ +/**************************************************************** +Frees a dynamic array. */ +UNIV_INLINE +void +dyn_array_free( +/*===========*/ + dyn_array_t* arr); /* in: dyn array */ +/************************************************************************* +Makes room on top of a dyn array and returns a pointer to a buffer in it. +After copying the elements, the caller must close the buffer using +dyn_array_close. */ +UNIV_INLINE +byte* +dyn_array_open( +/*===========*/ + /* out: pointer to the buffer */ + dyn_array_t* arr, /* in: dynamic array */ + ulint size); /* in: size in bytes of the buffer; MUST be + smaller than DYN_ARRAY_DATA_SIZE! */ +/************************************************************************* +Closes the buffer returned by dyn_array_open. */ +UNIV_INLINE +void +dyn_array_close( +/*============*/ + dyn_array_t* arr, /* in: dynamic array */ + byte* ptr); /* in: buffer space from ptr up was not used */ +/************************************************************************* +Makes room on top of a dyn array and returns a pointer to +the added element. The caller must copy the element to +the pointer returned. */ +UNIV_INLINE +void* +dyn_array_push( +/*===========*/ + /* out: pointer to the element */ + dyn_array_t* arr, /* in: dynamic array */ + ulint size); /* in: size in bytes of the element */ +/**************************************************************** +Returns pointer to an element in dyn array. */ +UNIV_INLINE +void* +dyn_array_get_element( +/*==================*/ + /* out: pointer to element */ + dyn_array_t* arr, /* in: dyn array */ + ulint pos); /* in: position of element as bytes + from array start */ +/**************************************************************** +Returns the size of stored data in a dyn array. */ +UNIV_INLINE +ulint +dyn_array_get_data_size( +/*====================*/ + /* out: data size in bytes */ + dyn_array_t* arr); /* in: dyn array */ +/**************************************************************** +Gets the first block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_first_block( +/*======================*/ + dyn_array_t* arr); /* in: dyn array */ +/**************************************************************** +Gets the last block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_last_block( +/*=====================*/ + dyn_array_t* arr); /* in: dyn array */ +/************************************************************************ +Gets the next block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_next_block( +/*=====================*/ + /* out: pointer to next, NULL if end of list */ + dyn_array_t* arr, /* in: dyn array */ + dyn_block_t* block); /* in: dyn array block */ +/************************************************************************ +Gets the number of used bytes in a dyn array block. */ +UNIV_INLINE +ulint +dyn_block_get_used( +/*===============*/ + /* out: number of bytes used */ + dyn_block_t* block); /* in: dyn array block */ +/************************************************************************ +Gets pointer to the start of data in a dyn array block. */ +UNIV_INLINE +byte* +dyn_block_get_data( +/*===============*/ + /* out: pointer to data */ + dyn_block_t* block); /* in: dyn array block */ +/************************************************************ +Pushes n bytes to a dyn array. */ +UNIV_INLINE +void +dyn_push_string( +/*============*/ + dyn_array_t* arr, /* in: dyn array */ + const byte* str, /* in: string to write */ + ulint len); /* in: string length */ + +/*#################################################################*/ + +/* NOTE! Do not use the fields of the struct directly: the definition +appears here only for the compiler to know its size! */ +struct dyn_block_struct{ + mem_heap_t* heap; /* in the first block this is != NULL + if dynamic allocation has been needed */ + ulint used; /* number of data bytes used in this block */ + byte data[DYN_ARRAY_DATA_SIZE]; + /* storage for array elements */ + UT_LIST_BASE_NODE_T(dyn_block_t) base; + /* linear list of dyn blocks: this node is + used only in the first block */ + UT_LIST_NODE_T(dyn_block_t) list; + /* linear list node: used in all blocks */ +#ifdef UNIV_DEBUG + ulint buf_end;/* only in the debug version: if dyn array is + opened, this is the buffer end offset, else + this is 0 */ + ulint magic_n; +#endif +}; + + +#ifndef UNIV_NONINL +#include "dyn0dyn.ic" +#endif + +#endif diff --git a/storage/xtradb/include/dyn0dyn.ic b/storage/xtradb/include/dyn0dyn.ic new file mode 100644 index 00000000000..1ef8b284a99 --- /dev/null +++ b/storage/xtradb/include/dyn0dyn.ic @@ -0,0 +1,362 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The dynamically allocated array + +Created 2/5/1996 Heikki Tuuri +*******************************************************/ + +#define DYN_BLOCK_MAGIC_N 375767 +#define DYN_BLOCK_FULL_FLAG 0x1000000UL + +/**************************************************************** +Adds a new block to a dyn array. */ +UNIV_INTERN +dyn_block_t* +dyn_array_add_block( +/*================*/ + /* out: created block */ + dyn_array_t* arr); /* in: dyn array */ + + +/**************************************************************** +Gets the first block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_first_block( +/*======================*/ + dyn_array_t* arr) /* in: dyn array */ +{ + return(arr); +} + +/**************************************************************** +Gets the last block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_last_block( +/*=====================*/ + dyn_array_t* arr) /* in: dyn array */ +{ + if (arr->heap == NULL) { + + return(arr); + } + + return(UT_LIST_GET_LAST(arr->base)); +} + +/************************************************************************ +Gets the next block in a dyn array. */ +UNIV_INLINE +dyn_block_t* +dyn_array_get_next_block( +/*=====================*/ + /* out: pointer to next, NULL if end of list */ + dyn_array_t* arr, /* in: dyn array */ + dyn_block_t* block) /* in: dyn array block */ +{ + ut_ad(arr && block); + + if (arr->heap == NULL) { + ut_ad(arr == block); + + return(NULL); + } + + return(UT_LIST_GET_NEXT(list, block)); +} + +/************************************************************************ +Gets the number of used bytes in a dyn array block. */ +UNIV_INLINE +ulint +dyn_block_get_used( +/*===============*/ + /* out: number of bytes used */ + dyn_block_t* block) /* in: dyn array block */ +{ + ut_ad(block); + + return((block->used) & ~DYN_BLOCK_FULL_FLAG); +} + +/************************************************************************ +Gets pointer to the start of data in a dyn array block. */ +UNIV_INLINE +byte* +dyn_block_get_data( +/*===============*/ + /* out: pointer to data */ + dyn_block_t* block) /* in: dyn array block */ +{ + ut_ad(block); + + return(block->data); +} + +/************************************************************************* +Initializes a dynamic array. */ +UNIV_INLINE +dyn_array_t* +dyn_array_create( +/*=============*/ + /* out: initialized dyn array */ + dyn_array_t* arr) /* in: pointer to a memory buffer of + size sizeof(dyn_array_t) */ +{ + ut_ad(arr); +#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG +# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG" +#endif + + arr->heap = NULL; + arr->used = 0; + +#ifdef UNIV_DEBUG + arr->buf_end = 0; + arr->magic_n = DYN_BLOCK_MAGIC_N; +#endif + return(arr); +} + +/**************************************************************** +Frees a dynamic array. */ +UNIV_INLINE +void +dyn_array_free( +/*===========*/ + dyn_array_t* arr) /* in: dyn array */ +{ + if (arr->heap != NULL) { + mem_heap_free(arr->heap); + } + +#ifdef UNIV_DEBUG + arr->magic_n = 0; +#endif +} + +/************************************************************************* +Makes room on top of a dyn array and returns a pointer to the added element. +The caller must copy the element to the pointer returned. */ +UNIV_INLINE +void* +dyn_array_push( +/*===========*/ + /* out: pointer to the element */ + dyn_array_t* arr, /* in: dynamic array */ + ulint size) /* in: size in bytes of the element */ +{ + dyn_block_t* block; + ulint used; + + ut_ad(arr); + ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); + ut_ad(size <= DYN_ARRAY_DATA_SIZE); + ut_ad(size); + + block = arr; + used = block->used; + + if (used + size > DYN_ARRAY_DATA_SIZE) { + /* Get the last array block */ + + block = dyn_array_get_last_block(arr); + used = block->used; + + if (used + size > DYN_ARRAY_DATA_SIZE) { + block = dyn_array_add_block(arr); + used = block->used; + } + } + + block->used = used + size; + ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); + + return((block->data) + used); +} + +/************************************************************************* +Makes room on top of a dyn array and returns a pointer to a buffer in it. +After copying the elements, the caller must close the buffer using +dyn_array_close. */ +UNIV_INLINE +byte* +dyn_array_open( +/*===========*/ + /* out: pointer to the buffer */ + dyn_array_t* arr, /* in: dynamic array */ + ulint size) /* in: size in bytes of the buffer; MUST be + smaller than DYN_ARRAY_DATA_SIZE! */ +{ + dyn_block_t* block; + ulint used; + + ut_ad(arr); + ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); + ut_ad(size <= DYN_ARRAY_DATA_SIZE); + ut_ad(size); + + block = arr; + used = block->used; + + if (used + size > DYN_ARRAY_DATA_SIZE) { + /* Get the last array block */ + + block = dyn_array_get_last_block(arr); + used = block->used; + + if (used + size > DYN_ARRAY_DATA_SIZE) { + block = dyn_array_add_block(arr); + used = block->used; + ut_a(size <= DYN_ARRAY_DATA_SIZE); + } + } + + ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); +#ifdef UNIV_DEBUG + ut_ad(arr->buf_end == 0); + + arr->buf_end = used + size; +#endif + return((block->data) + used); +} + +/************************************************************************* +Closes the buffer returned by dyn_array_open. */ +UNIV_INLINE +void +dyn_array_close( +/*============*/ + dyn_array_t* arr, /* in: dynamic array */ + byte* ptr) /* in: buffer space from ptr up was not used */ +{ + dyn_block_t* block; + + ut_ad(arr); + ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); + + block = dyn_array_get_last_block(arr); + + ut_ad(arr->buf_end + block->data >= ptr); + + block->used = ptr - block->data; + + ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); + +#ifdef UNIV_DEBUG + arr->buf_end = 0; +#endif +} + +/**************************************************************** +Returns pointer to an element in dyn array. */ +UNIV_INLINE +void* +dyn_array_get_element( +/*==================*/ + /* out: pointer to element */ + dyn_array_t* arr, /* in: dyn array */ + ulint pos) /* in: position of element as bytes + from array start */ +{ + dyn_block_t* block; + ulint used; + + ut_ad(arr); + ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); + + /* Get the first array block */ + block = dyn_array_get_first_block(arr); + + if (arr->heap != NULL) { + used = dyn_block_get_used(block); + + while (pos >= used) { + pos -= used; + block = UT_LIST_GET_NEXT(list, block); + ut_ad(block); + + used = dyn_block_get_used(block); + } + } + + ut_ad(block); + ut_ad(dyn_block_get_used(block) >= pos); + + return(block->data + pos); +} + +/**************************************************************** +Returns the size of stored data in a dyn array. */ +UNIV_INLINE +ulint +dyn_array_get_data_size( +/*====================*/ + /* out: data size in bytes */ + dyn_array_t* arr) /* in: dyn array */ +{ + dyn_block_t* block; + ulint sum = 0; + + ut_ad(arr); + ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); + + if (arr->heap == NULL) { + + return(arr->used); + } + + /* Get the first array block */ + block = dyn_array_get_first_block(arr); + + while (block != NULL) { + sum += dyn_block_get_used(block); + block = dyn_array_get_next_block(arr, block); + } + + return(sum); +} + +/************************************************************ +Pushes n bytes to a dyn array. */ +UNIV_INLINE +void +dyn_push_string( +/*============*/ + dyn_array_t* arr, /* in: dyn array */ + const byte* str, /* in: string to write */ + ulint len) /* in: string length */ +{ + ulint n_copied; + + while (len > 0) { + if (len > DYN_ARRAY_DATA_SIZE) { + n_copied = DYN_ARRAY_DATA_SIZE; + } else { + n_copied = len; + } + + memcpy(dyn_array_push(arr, n_copied), str, n_copied); + + str += n_copied; + len -= n_copied; + } +} diff --git a/storage/xtradb/include/eval0eval.h b/storage/xtradb/include/eval0eval.h new file mode 100644 index 00000000000..75cf9b38c3a --- /dev/null +++ b/storage/xtradb/include/eval0eval.h @@ -0,0 +1,113 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL evaluator: evaluates simple data structures, like expressions, in +a query graph + +Created 12/29/1997 Heikki Tuuri +*******************************************************/ + +#ifndef eval0eval_h +#define eval0eval_h + +#include "univ.i" +#include "que0types.h" +#include "pars0sym.h" +#include "pars0pars.h" + +/********************************************************************* +Free the buffer from global dynamic memory for a value of a que_node, +if it has been allocated in the above function. The freeing for pushed +column values is done in sel_col_prefetch_buf_free. */ +UNIV_INTERN +void +eval_node_free_val_buf( +/*===================*/ + que_node_t* node); /* in: query graph node */ +/********************************************************************* +Evaluates a symbol table symbol. */ +UNIV_INLINE +void +eval_sym( +/*=====*/ + sym_node_t* sym_node); /* in: symbol table node */ +/********************************************************************* +Evaluates an expression. */ +UNIV_INLINE +void +eval_exp( +/*=====*/ + que_node_t* exp_node); /* in: expression */ +/********************************************************************* +Sets an integer value as the value of an expression node. */ +UNIV_INLINE +void +eval_node_set_int_val( +/*==================*/ + que_node_t* node, /* in: expression node */ + lint val); /* in: value to set */ +/********************************************************************* +Gets an integer value from an expression node. */ +UNIV_INLINE +lint +eval_node_get_int_val( +/*==================*/ + /* out: integer value */ + que_node_t* node); /* in: expression node */ +/********************************************************************* +Copies a binary string value as the value of a query graph node. Allocates a +new buffer if necessary. */ +UNIV_INLINE +void +eval_node_copy_and_alloc_val( +/*=========================*/ + que_node_t* node, /* in: query graph node */ + const byte* str, /* in: binary string */ + ulint len); /* in: string length or UNIV_SQL_NULL */ +/********************************************************************* +Copies a query node value to another node. */ +UNIV_INLINE +void +eval_node_copy_val( +/*===============*/ + que_node_t* node1, /* in: node to copy to */ + que_node_t* node2); /* in: node to copy from */ +/********************************************************************* +Gets a iboolean value from a query node. */ +UNIV_INLINE +ibool +eval_node_get_ibool_val( +/*====================*/ + /* out: iboolean value */ + que_node_t* node); /* in: query graph node */ +/********************************************************************* +Evaluates a comparison node. */ +UNIV_INTERN +ibool +eval_cmp( +/*=====*/ + /* out: the result of the comparison */ + func_node_t* cmp_node); /* in: comparison node */ + + +#ifndef UNIV_NONINL +#include "eval0eval.ic" +#endif + +#endif diff --git a/storage/xtradb/include/eval0eval.ic b/storage/xtradb/include/eval0eval.ic new file mode 100644 index 00000000000..a6330ae441f --- /dev/null +++ b/storage/xtradb/include/eval0eval.ic @@ -0,0 +1,250 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL evaluator: evaluates simple data structures, like expressions, in +a query graph + +Created 12/29/1997 Heikki Tuuri +*******************************************************/ + +#include "que0que.h" +#include "rem0cmp.h" +#include "pars0grm.h" + +/********************************************************************* +Evaluates a function node. */ +UNIV_INTERN +void +eval_func( +/*======*/ + func_node_t* func_node); /* in: function node */ +/********************************************************************* +Allocate a buffer from global dynamic memory for a value of a que_node. +NOTE that this memory must be explicitly freed when the query graph is +freed. If the node already has allocated buffer, that buffer is freed +here. NOTE that this is the only function where dynamic memory should be +allocated for a query node val field. */ +UNIV_INTERN +byte* +eval_node_alloc_val_buf( +/*====================*/ + /* out: pointer to allocated buffer */ + que_node_t* node, /* in: query graph node; sets the val field + data field to point to the new buffer, and + len field equal to size */ + ulint size); /* in: buffer size */ + + +/********************************************************************* +Allocates a new buffer if needed. */ +UNIV_INLINE +byte* +eval_node_ensure_val_buf( +/*=====================*/ + /* out: pointer to buffer */ + que_node_t* node, /* in: query graph node; sets the val field + data field to point to the new buffer, and + len field equal to size */ + ulint size) /* in: buffer size */ +{ + dfield_t* dfield; + byte* data; + + dfield = que_node_get_val(node); + dfield_set_len(dfield, size); + + data = dfield_get_data(dfield); + + if (!data || que_node_get_val_buf_size(node) < size) { + + data = eval_node_alloc_val_buf(node, size); + } + + return(data); +} + +/********************************************************************* +Evaluates a symbol table symbol. */ +UNIV_INLINE +void +eval_sym( +/*=====*/ + sym_node_t* sym_node) /* in: symbol table node */ +{ + + ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL); + + if (sym_node->indirection) { + /* The symbol table node is an alias for a variable or a + column */ + + dfield_copy_data(que_node_get_val(sym_node), + que_node_get_val(sym_node->indirection)); + } +} + +/********************************************************************* +Evaluates an expression. */ +UNIV_INLINE +void +eval_exp( +/*=====*/ + que_node_t* exp_node) /* in: expression */ +{ + if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) { + + eval_sym((sym_node_t*)exp_node); + + return; + } + + eval_func(exp_node); +} + +/********************************************************************* +Sets an integer value as the value of an expression node. */ +UNIV_INLINE +void +eval_node_set_int_val( +/*==================*/ + que_node_t* node, /* in: expression node */ + lint val) /* in: value to set */ +{ + dfield_t* dfield; + byte* data; + + dfield = que_node_get_val(node); + + data = dfield_get_data(dfield); + + if (data == NULL) { + data = eval_node_alloc_val_buf(node, 4); + } + + ut_ad(dfield_get_len(dfield) == 4); + + mach_write_to_4(data, (ulint)val); +} + +/********************************************************************* +Gets an integer non-SQL null value from an expression node. */ +UNIV_INLINE +lint +eval_node_get_int_val( +/*==================*/ + /* out: integer value */ + que_node_t* node) /* in: expression node */ +{ + dfield_t* dfield; + + dfield = que_node_get_val(node); + + ut_ad(dfield_get_len(dfield) == 4); + + return((int)mach_read_from_4(dfield_get_data(dfield))); +} + +/********************************************************************* +Gets a iboolean value from a query node. */ +UNIV_INLINE +ibool +eval_node_get_ibool_val( +/*====================*/ + /* out: iboolean value */ + que_node_t* node) /* in: query graph node */ +{ + dfield_t* dfield; + byte* data; + + dfield = que_node_get_val(node); + + data = dfield_get_data(dfield); + + ut_ad(data != NULL); + + return(mach_read_from_1(data)); +} + +/********************************************************************* +Sets a iboolean value as the value of a function node. */ +UNIV_INLINE +void +eval_node_set_ibool_val( +/*====================*/ + func_node_t* func_node, /* in: function node */ + ibool val) /* in: value to set */ +{ + dfield_t* dfield; + byte* data; + + dfield = que_node_get_val(func_node); + + data = dfield_get_data(dfield); + + if (data == NULL) { + /* Allocate 1 byte to hold the value */ + + data = eval_node_alloc_val_buf(func_node, 1); + } + + ut_ad(dfield_get_len(dfield) == 1); + + mach_write_to_1(data, val); +} + +/********************************************************************* +Copies a binary string value as the value of a query graph node. Allocates a +new buffer if necessary. */ +UNIV_INLINE +void +eval_node_copy_and_alloc_val( +/*=========================*/ + que_node_t* node, /* in: query graph node */ + const byte* str, /* in: binary string */ + ulint len) /* in: string length or UNIV_SQL_NULL */ +{ + byte* data; + + if (len == UNIV_SQL_NULL) { + dfield_set_len(que_node_get_val(node), len); + + return; + } + + data = eval_node_ensure_val_buf(node, len); + + ut_memcpy(data, str, len); +} + +/********************************************************************* +Copies a query node value to another node. */ +UNIV_INLINE +void +eval_node_copy_val( +/*===============*/ + que_node_t* node1, /* in: node to copy to */ + que_node_t* node2) /* in: node to copy from */ +{ + dfield_t* dfield2; + + dfield2 = que_node_get_val(node2); + + eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2), + dfield_get_len(dfield2)); +} diff --git a/storage/xtradb/include/eval0proc.h b/storage/xtradb/include/eval0proc.h new file mode 100644 index 00000000000..58937c18124 --- /dev/null +++ b/storage/xtradb/include/eval0proc.h @@ -0,0 +1,103 @@ +/***************************************************************************** + +Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Executes SQL stored procedures and their control structures + +Created 1/20/1998 Heikki Tuuri +*******************************************************/ + +#ifndef eval0proc_h +#define eval0proc_h + +#include "univ.i" +#include "que0types.h" +#include "pars0sym.h" +#include "pars0pars.h" + +/************************************************************************** +Performs an execution step of a procedure node. */ +UNIV_INLINE +que_thr_t* +proc_step( +/*======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of an if-statement node. */ +UNIV_INTERN +que_thr_t* +if_step( +/*====*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of a while-statement node. */ +UNIV_INTERN +que_thr_t* +while_step( +/*=======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of a for-loop node. */ +UNIV_INTERN +que_thr_t* +for_step( +/*=====*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of an assignment statement node. */ +UNIV_INTERN +que_thr_t* +assign_step( +/*========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of a procedure call node. */ +UNIV_INLINE +que_thr_t* +proc_eval_step( +/*===========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of an exit statement node. */ +UNIV_INTERN +que_thr_t* +exit_step( +/*======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of a return-statement node. */ +UNIV_INTERN +que_thr_t* +return_step( +/*========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ + + +#ifndef UNIV_NONINL +#include "eval0proc.ic" +#endif + +#endif diff --git a/storage/xtradb/include/eval0proc.ic b/storage/xtradb/include/eval0proc.ic new file mode 100644 index 00000000000..6bd978ad3fc --- /dev/null +++ b/storage/xtradb/include/eval0proc.ic @@ -0,0 +1,87 @@ +/***************************************************************************** + +Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Executes SQL stored procedures and their control structures + +Created 1/20/1998 Heikki Tuuri +*******************************************************/ + +#include "pars0pars.h" +#include "que0que.h" +#include "eval0eval.h" + +/************************************************************************** +Performs an execution step of a procedure node. */ +UNIV_INLINE +que_thr_t* +proc_step( +/*======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + proc_node_t* node; + + ut_ad(thr); + + node = thr->run_node; + ut_ad(que_node_get_type(node) == QUE_NODE_PROC); + + if (thr->prev_node == que_node_get_parent(node)) { + /* Start execution from the first statement in the statement + list */ + + thr->run_node = node->stat_list; + } else { + /* Move to the next statement */ + ut_ad(que_node_get_next(thr->prev_node) == NULL); + + thr->run_node = NULL; + } + + if (thr->run_node == NULL) { + thr->run_node = que_node_get_parent(node); + } + + return(thr); +} + +/************************************************************************** +Performs an execution step of a procedure call node. */ +UNIV_INLINE +que_thr_t* +proc_eval_step( +/*===========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + func_node_t* node; + + ut_ad(thr); + + node = thr->run_node; + ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); + + /* Evaluate the procedure */ + + eval_exp(node); + + thr->run_node = que_node_get_parent(node); + + return(thr); +} diff --git a/storage/xtradb/include/fil0fil.h b/storage/xtradb/include/fil0fil.h new file mode 100644 index 00000000000..587e5ee48a8 --- /dev/null +++ b/storage/xtradb/include/fil0fil.h @@ -0,0 +1,711 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The low-level file system + +Created 10/25/1995 Heikki Tuuri +*******************************************************/ + +#ifndef fil0fil_h +#define fil0fil_h + +#include "univ.i" +#include "sync0rw.h" +#include "dict0types.h" +#include "ut0byte.h" +#include "os0file.h" + +/* When mysqld is run, the default directory "." is the mysqld datadir, but in +ibbackup we must set it explicitly; the patgh must NOT contain the trailing +'/' or '\' */ +extern const char* fil_path_to_mysql_datadir; + +/* Initial size of a single-table tablespace in pages */ +#define FIL_IBD_FILE_INITIAL_SIZE 4 + +/* 'null' (undefined) page offset in the context of file spaces */ +#define FIL_NULL ULINT32_UNDEFINED + +/* Space address data type; this is intended to be used when +addresses accurate to a byte are stored in file pages. If the page part +of the address is FIL_NULL, the address is considered undefined. */ + +typedef byte fil_faddr_t; /* 'type' definition in C: an address + stored in a file page is a string of bytes */ +#define FIL_ADDR_PAGE 0 /* first in address is the page offset */ +#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/ + +#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */ + +/* A struct for storing a space address FIL_ADDR, when it is used +in C program data structures. */ + +typedef struct fil_addr_struct fil_addr_t; +struct fil_addr_struct{ + ulint page; /* page number within a space */ + ulint boffset; /* byte offset within the page */ +}; + +/* Null file address */ +extern fil_addr_t fil_addr_null; + +/* The byte offsets on a file page for various variables */ +#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the + page belongs to (== 0) but in later + versions the 'new' checksum of the + page */ +#define FIL_PAGE_OFFSET 4 /* page offset inside space */ +#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor + of the page, its offset. + Otherwise FIL_NULL. + This field is not set on BLOB pages, + which are stored as a singly-linked + list. See also FIL_PAGE_NEXT. */ +#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor + of the page, its offset. + Otherwise FIL_NULL. + B-tree index pages + (FIL_PAGE_TYPE contains FIL_PAGE_INDEX) + on the same PAGE_LEVEL are maintained + as a doubly linked list via + FIL_PAGE_PREV and FIL_PAGE_NEXT + in the collation order of the + smallest user record on each page. */ +#define FIL_PAGE_LSN 16 /* lsn of the end of the newest + modification log record to the page */ +#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,..., + 2 bytes. + + The contents of this field can only + be trusted in the following case: + if the page is an uncompressed + B-tree index page, then it is + guaranteed that the value is + FIL_PAGE_INDEX. + The opposite does not hold. + + In tablespaces created by + MySQL/InnoDB 5.1.7 or later, the + contents of this field is valid + for all uncompressed pages. */ +#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the + first page in a data file: the file + has been flushed to disk at least up + to this lsn */ +#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this + contains the space id of the page */ +#define FIL_PAGE_DATA 38 /* start of the data on the page */ + +/* File page trailer */ +#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used + to store the page checksum, the + last 4 bytes should be identical + to the last 4 bytes of FIL_PAGE_LSN */ +#define FIL_PAGE_DATA_END 8 + +/* File page types (values of FIL_PAGE_TYPE) */ +#define FIL_PAGE_INDEX 17855 /* B-tree node */ +#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */ +#define FIL_PAGE_INODE 3 /* Index node */ +#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */ +/* File page types introduced in MySQL/InnoDB 5.1.7 */ +#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */ +#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */ +#define FIL_PAGE_TYPE_SYS 6 /* System page */ +#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */ +#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */ +#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */ +#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */ +#define FIL_PAGE_TYPE_ZBLOB 11 /* First compressed BLOB page */ +#define FIL_PAGE_TYPE_ZBLOB2 12 /* Subsequent compressed BLOB page */ + +/* Space types */ +#define FIL_TABLESPACE 501 +#define FIL_LOG 502 + +extern ulint fil_n_log_flushes; + +extern ulint fil_n_pending_log_flushes; +extern ulint fil_n_pending_tablespace_flushes; + + +/*********************************************************************** +Returns the version number of a tablespace, -1 if not found. */ +UNIV_INTERN +ib_int64_t +fil_space_get_version( +/*==================*/ + /* out: version number, -1 if the tablespace does not + exist in the memory cache */ + ulint id); /* in: space id */ +/*********************************************************************** +Returns the latch of a file space. */ +UNIV_INTERN +rw_lock_t* +fil_space_get_latch( +/*================*/ + /* out: latch protecting storage allocation */ + ulint id, /* in: space id */ + ulint* zip_size);/* out: compressed page size, or + 0 for uncompressed tablespaces */ +/*********************************************************************** +Returns the type of a file space. */ +UNIV_INTERN +ulint +fil_space_get_type( +/*===============*/ + /* out: FIL_TABLESPACE or FIL_LOG */ + ulint id); /* in: space id */ +/*********************************************************************** +Appends a new file to the chain of files of a space. File must be closed. */ +UNIV_INTERN +void +fil_node_create( +/*============*/ + const char* name, /* in: file name (file must be closed) */ + ulint size, /* in: file size in database blocks, rounded + downwards to an integer */ + ulint id, /* in: space id where to append */ + ibool is_raw);/* in: TRUE if a raw device or + a raw disk partition */ +#ifdef UNIV_LOG_ARCHIVE +/******************************************************************** +Drops files from the start of a file space, so that its size is cut by +the amount given. */ +UNIV_INTERN +void +fil_space_truncate_start( +/*=====================*/ + ulint id, /* in: space id */ + ulint trunc_len); /* in: truncate by this much; it is an error + if this does not equal to the combined size of + some initial files in the space */ +#endif /* UNIV_LOG_ARCHIVE */ +/*********************************************************************** +Creates a space memory object and puts it to the 'fil system' hash table. If +there is an error, prints an error message to the .err log. */ +UNIV_INTERN +ibool +fil_space_create( +/*=============*/ + /* out: TRUE if success */ + const char* name, /* in: space name */ + ulint id, /* in: space id */ + ulint zip_size,/* in: compressed page size, or + 0 for uncompressed tablespaces */ + ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */ +/*********************************************************************** +Frees a space object from a the tablespace memory cache. Closes the files in +the chain but does not delete them. */ +UNIV_INTERN +ibool +fil_space_free( +/*===========*/ + /* out: TRUE if success */ + ulint id); /* in: space id */ +/*********************************************************************** +Returns the size of the space in pages. The tablespace must be cached in the +memory cache. */ +UNIV_INTERN +ulint +fil_space_get_size( +/*===============*/ + /* out: space size, 0 if space not found */ + ulint id); /* in: space id */ +/*********************************************************************** +Returns the flags of the space. The tablespace must be cached +in the memory cache. */ +UNIV_INTERN +ulint +fil_space_get_flags( +/*================*/ + /* out: flags, ULINT_UNDEFINED if space not found */ + ulint id); /* in: space id */ +/*********************************************************************** +Returns the compressed page size of the space, or 0 if the space +is not compressed. The tablespace must be cached in the memory cache. */ +UNIV_INTERN +ulint +fil_space_get_zip_size( +/*===================*/ + /* out: compressed page size, ULINT_UNDEFINED + if space not found */ + ulint id); /* in: space id */ +/*********************************************************************** +Checks if the pair space, page_no refers to an existing page in a tablespace +file space. The tablespace must be cached in the memory cache. */ +UNIV_INTERN +ibool +fil_check_adress_in_tablespace( +/*===========================*/ + /* out: TRUE if the address is meaningful */ + ulint id, /* in: space id */ + ulint page_no);/* in: page number */ +/******************************************************************** +Initializes the tablespace memory cache. */ +UNIV_INTERN +void +fil_init( +/*=====*/ + ulint max_n_open); /* in: max number of open files */ +/*********************************************************************** +Opens all log files and system tablespace data files. They stay open until the +database server shutdown. This should be called at a server startup after the +space objects for the log and the system tablespace have been created. The +purpose of this operation is to make sure we never run out of file descriptors +if we need to read from the insert buffer or to write to the log. */ +UNIV_INTERN +void +fil_open_log_and_system_tablespace_files(void); +/*==========================================*/ +/*********************************************************************** +Closes all open files. There must not be any pending i/o's or not flushed +modifications in the files. */ +UNIV_INTERN +void +fil_close_all_files(void); +/*=====================*/ +/*********************************************************************** +Sets the max tablespace id counter if the given number is bigger than the +previous value. */ +UNIV_INTERN +void +fil_set_max_space_id_if_bigger( +/*===========================*/ + ulint max_id);/* in: maximum known id */ +/******************************************************************** +Writes the flushed lsn and the latest archived log number to the page +header of the first page of each data file in the system tablespace. */ +UNIV_INTERN +ulint +fil_write_flushed_lsn_to_data_files( +/*================================*/ + /* out: DB_SUCCESS or error number */ + ib_uint64_t lsn, /* in: lsn to write */ + ulint arch_log_no); /* in: latest archived log + file number */ +/*********************************************************************** +Reads the flushed lsn and arch no fields from a data file at database +startup. */ +UNIV_INTERN +void +fil_read_flushed_lsn_and_arch_log_no( +/*=================================*/ + os_file_t data_file, /* in: open data file */ + ibool one_read_already, /* in: TRUE if min and max + parameters below already + contain sensible data */ +#ifdef UNIV_LOG_ARCHIVE + ulint* min_arch_log_no, /* in/out: */ + ulint* max_arch_log_no, /* in/out: */ +#endif /* UNIV_LOG_ARCHIVE */ + ib_uint64_t* min_flushed_lsn, /* in/out: */ + ib_uint64_t* max_flushed_lsn); /* in/out: */ +/*********************************************************************** +Increments the count of pending insert buffer page merges, if space is not +being deleted. */ +UNIV_INTERN +ibool +fil_inc_pending_ibuf_merges( +/*========================*/ + /* out: TRUE if being deleted, and ibuf merges should + be skipped */ + ulint id); /* in: space id */ +/*********************************************************************** +Decrements the count of pending insert buffer page merges. */ +UNIV_INTERN +void +fil_decr_pending_ibuf_merges( +/*=========================*/ + ulint id); /* in: space id */ +/*********************************************************************** +Parses the body of a log record written about an .ibd file operation. That is, +the log record part after the standard (type, space id, page no) header of the +log record. + +If desired, also replays the delete or rename operation if the .ibd file +exists and the space id in it matches. Replays the create operation if a file +at that path does not exist yet. If the database directory for the file to be +created does not exist, then we create the directory, too. + +Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the +datadir that we should use in replaying the file operations. */ +UNIV_INTERN +byte* +fil_op_log_parse_or_replay( +/*=======================*/ + /* out: end of log record, or NULL if the + record was not completely contained between + ptr and end_ptr */ + byte* ptr, /* in: buffer containing the log record body, + or an initial segment of it, if the record does + not fir completely between ptr and end_ptr */ + byte* end_ptr, /* in: buffer end */ + ulint type, /* in: the type of this log record */ + ulint space_id); /* in: the space id of the tablespace in + question, or 0 if the log record should + only be parsed but not replayed */ +/*********************************************************************** +Deletes a single-table tablespace. The tablespace must be cached in the +memory cache. */ +UNIV_INTERN +ibool +fil_delete_tablespace( +/*==================*/ + /* out: TRUE if success */ + ulint id); /* in: space id */ +/*********************************************************************** +Discards a single-table tablespace. The tablespace must be cached in the +memory cache. Discarding is like deleting a tablespace, but +1) we do not drop the table from the data dictionary; +2) we remove all insert buffer entries for the tablespace immediately; in DROP +TABLE they are only removed gradually in the background; +3) when the user does IMPORT TABLESPACE, the tablespace will have the same id +as it originally had. */ +UNIV_INTERN +ibool +fil_discard_tablespace( +/*===================*/ + /* out: TRUE if success */ + ulint id); /* in: space id */ +/*********************************************************************** +Renames a single-table tablespace. The tablespace must be cached in the +tablespace memory cache. */ +UNIV_INTERN +ibool +fil_rename_tablespace( +/*==================*/ + /* out: TRUE if success */ + const char* old_name, /* in: old table name in the standard + databasename/tablename format of + InnoDB, or NULL if we do the rename + based on the space id only */ + ulint id, /* in: space id */ + const char* new_name); /* in: new table name in the standard + databasename/tablename format + of InnoDB */ + +/*********************************************************************** +Creates a new single-table tablespace to a database directory of MySQL. +Database directories are under the 'datadir' of MySQL. The datadir is the +directory of a running mysqld program. We can refer to it by simply the +path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp +dir of the mysqld server. */ +UNIV_INTERN +ulint +fil_create_new_single_table_tablespace( +/*===================================*/ + /* out: DB_SUCCESS or error code */ + ulint* space_id, /* in/out: space id; if this is != 0, + then this is an input parameter, + otherwise output */ + const char* tablename, /* in: the table name in the usual + databasename/tablename format + of InnoDB, or a dir path to a temp + table */ + ibool is_temp, /* in: TRUE if a table created with + CREATE TEMPORARY TABLE */ + ulint flags, /* in: tablespace flags */ + ulint size); /* in: the initial size of the + tablespace file in pages, + must be >= FIL_IBD_FILE_INITIAL_SIZE */ +/************************************************************************ +Tries to open a single-table tablespace and optionally checks the space id is +right in it. If does not succeed, prints an error message to the .err log. This +function is used to open a tablespace when we start up mysqld, and also in +IMPORT TABLESPACE. +NOTE that we assume this operation is used either at the database startup +or under the protection of the dictionary mutex, so that two users cannot +race here. This operation does not leave the file associated with the +tablespace open, but closes it after we have looked at the space id in it. */ +UNIV_INTERN +ibool +fil_open_single_table_tablespace( +/*=============================*/ + /* out: TRUE if success */ + ibool check_space_id, /* in: should we check that the space + id in the file is right; we assume + that this function runs much faster + if no check is made, since accessing + the file inode probably is much + faster (the OS caches them) than + accessing the first page of the file */ + ulint id, /* in: space id */ + ulint flags, /* in: tablespace flags */ + const char* name); /* in: table name in the + databasename/tablename format */ +/************************************************************************ +It is possible, though very improbable, that the lsn's in the tablespace to be +imported have risen above the current system lsn, if a lengthy purge, ibuf +merge, or rollback was performed on a backup taken with ibbackup. If that is +the case, reset page lsn's in the file. We assume that mysqld was shut down +after it performed these cleanup operations on the .ibd file, so that it at +the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the +first page of the .ibd file, and we can determine whether we need to reset the +lsn's just by looking at that flush lsn. */ +UNIV_INTERN +ibool +fil_reset_too_high_lsns( +/*====================*/ + /* out: TRUE if success */ + const char* name, /* in: table name in the + databasename/tablename format */ + ib_uint64_t current_lsn); /* in: reset lsn's if the lsn stamped + to FIL_PAGE_FILE_FLUSH_LSN in the + first page is too high */ +/************************************************************************ +At the server startup, if we need crash recovery, scans the database +directories under the MySQL datadir, looking for .ibd files. Those files are +single-table tablespaces. We need to know the space id in each of them so that +we know into which file we should look to check the contents of a page stored +in the doublewrite buffer, also to know where to apply log records where the +space id is != 0. */ +UNIV_INTERN +ulint +fil_load_single_table_tablespaces(void); +/*===================================*/ + /* out: DB_SUCCESS or error number */ +/************************************************************************ +If we need crash recovery, and we have called +fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), +we can call this function to print an error message of orphaned .ibd files +for which there is not a data dictionary entry with a matching table name +and space id. */ +UNIV_INTERN +void +fil_print_orphaned_tablespaces(void); +/*================================*/ +/*********************************************************************** +Returns TRUE if a single-table tablespace does not exist in the memory cache, +or is being deleted there. */ +UNIV_INTERN +ibool +fil_tablespace_deleted_or_being_deleted_in_mem( +/*===========================================*/ + /* out: TRUE if does not exist or is being\ + deleted */ + ulint id, /* in: space id */ + ib_int64_t version);/* in: tablespace_version should be this; if + you pass -1 as the value of this, then this + parameter is ignored */ +/*********************************************************************** +Returns TRUE if a single-table tablespace exists in the memory cache. */ +UNIV_INTERN +ibool +fil_tablespace_exists_in_mem( +/*=========================*/ + /* out: TRUE if exists */ + ulint id); /* in: space id */ +/*********************************************************************** +Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory +cache. Note that if we have not done a crash recovery at the database startup, +there may be many tablespaces which are not yet in the memory cache. */ +UNIV_INTERN +ibool +fil_space_for_table_exists_in_mem( +/*==============================*/ + /* out: TRUE if a matching tablespace + exists in the memory cache */ + ulint id, /* in: space id */ + const char* name, /* in: table name in the standard + 'databasename/tablename' format or + the dir path to a temp table */ + ibool is_temp, /* in: TRUE if created with CREATE + TEMPORARY TABLE */ + ibool mark_space, /* in: in crash recovery, at database + startup we mark all spaces which have + an associated table in the InnoDB + data dictionary, so that + we can print a warning about orphaned + tablespaces */ + ibool print_error_if_does_not_exist); + /* in: print detailed error + information to the .err log if a + matching tablespace is not found from + memory */ +/************************************************************************** +Tries to extend a data file so that it would accommodate the number of pages +given. The tablespace must be cached in the memory cache. If the space is big +enough already, does nothing. */ +UNIV_INTERN +ibool +fil_extend_space_to_desired_size( +/*=============================*/ + /* out: TRUE if success */ + ulint* actual_size, /* out: size of the space after extension; + if we ran out of disk space this may be lower + than the desired size */ + ulint space_id, /* in: space id */ + ulint size_after_extend);/* in: desired size in pages after the + extension; if the current space size is bigger + than this already, the function does nothing */ +#ifdef UNIV_HOTBACKUP +/************************************************************************ +Extends all tablespaces to the size stored in the space header. During the +ibbackup --apply-log phase we extended the spaces on-demand so that log records +could be appllied, but that may have left spaces still too small compared to +the size stored in the space header. */ +UNIV_INTERN +void +fil_extend_tablespaces_to_stored_len(void); +/*======================================*/ +#endif +/*********************************************************************** +Tries to reserve free extents in a file space. */ +UNIV_INTERN +ibool +fil_space_reserve_free_extents( +/*===========================*/ + /* out: TRUE if succeed */ + ulint id, /* in: space id */ + ulint n_free_now, /* in: number of free extents now */ + ulint n_to_reserve); /* in: how many one wants to reserve */ +/*********************************************************************** +Releases free extents in a file space. */ +UNIV_INTERN +void +fil_space_release_free_extents( +/*===========================*/ + ulint id, /* in: space id */ + ulint n_reserved); /* in: how many one reserved */ +/*********************************************************************** +Gets the number of reserved extents. If the database is silent, this number +should be zero. */ +UNIV_INTERN +ulint +fil_space_get_n_reserved_extents( +/*=============================*/ + ulint id); /* in: space id */ +/************************************************************************ +Reads or writes data. This operation is asynchronous (aio). */ +UNIV_INTERN +ulint +fil_io( +/*===*/ + /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED + if we are trying to do i/o on a tablespace + which does not exist */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE, + ORed to OS_FILE_LOG, if a log i/o + and ORed to OS_AIO_SIMULATED_WAKE_LATER + if simulated aio and we want to post a + batch of i/os; NOTE that a simulated batch + may introduce hidden chances of deadlocks, + because i/os are not actually handled until + all have been posted: use with great + caution! */ + ibool sync, /* in: TRUE if synchronous aio is desired */ + ulint space_id, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint block_offset, /* in: offset in number of blocks */ + ulint byte_offset, /* in: remainder of offset in bytes; in + aio this must be divisible by the OS block + size */ + ulint len, /* in: how many bytes to read or write; this + must not cross a file boundary; in aio this + must be a block size multiple */ + void* buf, /* in/out: buffer where to store read data + or from where to write; in aio this must be + appropriately aligned */ + void* message); /* in: message for aio handler if non-sync + aio used, else ignored */ +/************************************************************************** +Waits for an aio operation to complete. This function is used to write the +handler for completed requests. The aio array of pending requests is divided +into segments (see os0file.c for more info). The thread specifies which +segment it wants to wait for. */ +UNIV_INTERN +void +fil_aio_wait( +/*=========*/ + ulint segment); /* in: the number of the segment in the aio + array to wait for */ +/************************************************************************** +Flushes to disk possible writes cached by the OS. If the space does not exist +or is being dropped, does not do anything. */ +UNIV_INTERN +void +fil_flush( +/*======*/ + ulint space_id); /* in: file space id (this can be a group of + log files or a tablespace of the database) */ +/************************************************************************** +Flushes to disk writes in file spaces of the given type possibly cached by +the OS. */ +UNIV_INTERN +void +fil_flush_file_spaces( +/*==================*/ + ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ +/********************************************************************** +Checks the consistency of the tablespace cache. */ +UNIV_INTERN +ibool +fil_validate(void); +/*==============*/ + /* out: TRUE if ok */ +/************************************************************************ +Returns TRUE if file address is undefined. */ +UNIV_INTERN +ibool +fil_addr_is_null( +/*=============*/ + /* out: TRUE if undefined */ + fil_addr_t addr); /* in: address */ +/************************************************************************ +Accessor functions for a file page */ +UNIV_INTERN +ulint +fil_page_get_prev(const byte* page); +ulint +fil_page_get_next(const byte* page); +/************************************************************************* +Sets the file page type. */ +UNIV_INTERN +void +fil_page_set_type( +/*==============*/ + byte* page, /* in: file page */ + ulint type); /* in: type */ +/************************************************************************* +Gets the file page type. */ +UNIV_INTERN +ulint +fil_page_get_type( +/*==============*/ + /* out: type; NOTE that if the type + has not been written to page, the + return value not defined */ + const byte* page); /* in: file page */ + +/************************************************************************* +Return local hash table informations. */ + +ulint +fil_system_hash_cells(void); +/*========================*/ + +ulint +fil_system_hash_nodes(void); +/*========================*/ + +typedef struct fil_space_struct fil_space_t; + +#endif diff --git a/storage/xtradb/include/fsp0fsp.h b/storage/xtradb/include/fsp0fsp.h new file mode 100644 index 00000000000..1f6ae4b614b --- /dev/null +++ b/storage/xtradb/include/fsp0fsp.h @@ -0,0 +1,433 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +File space management + +Created 12/18/1995 Heikki Tuuri +*******************************************************/ + +#ifndef fsp0fsp_h +#define fsp0fsp_h + +#include "univ.i" + +#include "mtr0mtr.h" +#include "fut0lst.h" +#include "ut0byte.h" +#include "page0types.h" + +/* If records are inserted in order, there are the following +flags to tell this (their type is made byte for the compiler +to warn if direction and hint parameters are switched in +fseg_alloc_free_page): */ +#define FSP_UP ((byte)111) /* alphabetically upwards */ +#define FSP_DOWN ((byte)112) /* alphabetically downwards */ +#define FSP_NO_DIR ((byte)113) /* no order */ + +/* File space extent size (one megabyte) in pages */ +#define FSP_EXTENT_SIZE (1 << (20 - UNIV_PAGE_SIZE_SHIFT)) + +/* On a page of any file segment, data may be put starting from this offset: */ +#define FSEG_PAGE_DATA FIL_PAGE_DATA + +/* File segment header which points to the inode describing the file segment */ +typedef byte fseg_header_t; + +#define FSEG_HDR_SPACE 0 /* space id of the inode */ +#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */ +#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */ + +#define FSEG_HEADER_SIZE 10 + +/************************************************************************** +Initializes the file space system. */ +UNIV_INTERN +void +fsp_init(void); +/*==========*/ +/************************************************************************** +Gets the current free limit of the system tablespace. The free limit +means the place of the first page which has never been put to the the +free list for allocation. The space above that address is initialized +to zero. Sets also the global variable log_fsp_current_free_limit. */ +UNIV_INTERN +ulint +fsp_header_get_free_limit(void); +/*===========================*/ + /* out: free limit in megabytes */ +/************************************************************************** +Gets the size of the system tablespace from the tablespace header. If +we do not have an auto-extending data file, this should be equal to +the size of the data files. If there is an auto-extending data file, +this can be smaller. */ +UNIV_INTERN +ulint +fsp_header_get_tablespace_size(void); +/*================================*/ + /* out: size in pages */ +/************************************************************************** +Reads the file space size stored in the header page. */ +UNIV_INTERN +ulint +fsp_get_size_low( +/*=============*/ + /* out: tablespace size stored in the space header */ + page_t* page); /* in: header page (page 0 in the tablespace) */ +/************************************************************************** +Reads the space id from the first page of a tablespace. */ +UNIV_INTERN +ulint +fsp_header_get_space_id( +/*====================*/ + /* out: space id, ULINT UNDEFINED if error */ + const page_t* page); /* in: first page of a tablespace */ +/************************************************************************** +Reads the space flags from the first page of a tablespace. */ +UNIV_INTERN +ulint +fsp_header_get_flags( +/*=================*/ + /* out: flags */ + const page_t* page); /* in: first page of a tablespace */ +/************************************************************************** +Reads the compressed page size from the first page of a tablespace. */ +UNIV_INTERN +ulint +fsp_header_get_zip_size( +/*====================*/ + /* out: compressed page size in bytes, + or 0 if uncompressed */ + const page_t* page); /* in: first page of a tablespace */ +/************************************************************************** +Writes the space id and compressed page size to a tablespace header. +This function is used past the buffer pool when we in fil0fil.c create +a new single-table tablespace. */ +UNIV_INTERN +void +fsp_header_init_fields( +/*===================*/ + page_t* page, /* in/out: first page in the space */ + ulint space_id, /* in: space id */ + ulint flags); /* in: tablespace flags (FSP_SPACE_FLAGS): + 0, or table->flags if newer than COMPACT */ +/************************************************************************** +Initializes the space header of a new created space and creates also the +insert buffer tree root if space == 0. */ +UNIV_INTERN +void +fsp_header_init( +/*============*/ + ulint space, /* in: space id */ + ulint size, /* in: current size in blocks */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************** +Increases the space size field of a space. */ +UNIV_INTERN +void +fsp_header_inc_size( +/*================*/ + ulint space, /* in: space id */ + ulint size_inc,/* in: size increment in pages */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************** +Creates a new segment. */ +UNIV_INTERN +buf_block_t* +fseg_create( +/*========*/ + /* out: the block where the segment header is placed, + x-latched, NULL if could not create segment + because of lack of space */ + ulint space, /* in: space id */ + ulint page, /* in: page where the segment header is placed: if + this is != 0, the page must belong to another segment, + if this is 0, a new page will be allocated and it + will belong to the created segment */ + ulint byte_offset, /* in: byte offset of the created segment header + on the page */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Creates a new segment. */ +UNIV_INTERN +buf_block_t* +fseg_create_general( +/*================*/ + /* out: the block where the segment header is placed, + x-latched, NULL if could not create segment + because of lack of space */ + ulint space, /* in: space id */ + ulint page, /* in: page where the segment header is placed: if + this is != 0, the page must belong to another segment, + if this is 0, a new page will be allocated and it + will belong to the created segment */ + ulint byte_offset, /* in: byte offset of the created segment header + on the page */ + ibool has_done_reservation, /* in: TRUE if the caller has already + done the reservation for the pages with + fsp_reserve_free_extents (at least 2 extents: one for + the inode and the other for the segment) then there is + no need to do the check for this individual + operation */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Calculates the number of pages reserved by a segment, and how many pages are +currently used. */ +UNIV_INTERN +ulint +fseg_n_reserved_pages( +/*==================*/ + /* out: number of reserved pages */ + fseg_header_t* header, /* in: segment header */ + ulint* used, /* out: number of pages used (<= reserved) */ + mtr_t* mtr); /* in: mtr handle */ +/************************************************************************** +Allocates a single free page from a segment. This function implements +the intelligent allocation strategy which tries to minimize +file space fragmentation. */ +UNIV_INTERN +ulint +fseg_alloc_free_page( +/*=================*/ + /* out: the allocated page offset + FIL_NULL if no page could be allocated */ + fseg_header_t* seg_header, /* in: segment header */ + ulint hint, /* in: hint of which page would be desirable */ + byte direction, /* in: if the new page is needed because + of an index page split, and records are + inserted there in order, into which + direction they go alphabetically: FSP_DOWN, + FSP_UP, FSP_NO_DIR */ + mtr_t* mtr); /* in: mtr handle */ +/************************************************************************** +Allocates a single free page from a segment. This function implements +the intelligent allocation strategy which tries to minimize file space +fragmentation. */ +UNIV_INTERN +ulint +fseg_alloc_free_page_general( +/*=========================*/ + /* out: allocated page offset, FIL_NULL if no + page could be allocated */ + fseg_header_t* seg_header,/* in: segment header */ + ulint hint, /* in: hint of which page would be desirable */ + byte direction,/* in: if the new page is needed because + of an index page split, and records are + inserted there in order, into which + direction they go alphabetically: FSP_DOWN, + FSP_UP, FSP_NO_DIR */ + ibool has_done_reservation, /* in: TRUE if the caller has + already done the reservation for the page + with fsp_reserve_free_extents, then there + is no need to do the check for this individual + page */ + mtr_t* mtr); /* in: mtr handle */ +/************************************************************************** +Reserves free pages from a tablespace. All mini-transactions which may +use several pages from the tablespace should call this function beforehand +and reserve enough free extents so that they certainly will be able +to do their operation, like a B-tree page split, fully. Reservations +must be released with function fil_space_release_free_extents! + +The alloc_type below has the following meaning: FSP_NORMAL means an +operation which will probably result in more space usage, like an +insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are +deleting rows, then this allocation will in the long run result in +less space usage (after a purge); FSP_CLEANING means allocation done +in a physical record delete (like in a purge) or other cleaning operation +which will result in less space usage in the long run. We prefer the latter +two types of allocation: when space is scarce, FSP_NORMAL allocations +will not succeed, but the latter two allocations will succeed, if possible. +The purpose is to avoid dead end where the database is full but the +user cannot free any space because these freeing operations temporarily +reserve some space. + +Single-table tablespaces whose size is < 32 pages are a special case. In this +function we would liberally reserve several 64 page extents for every page +split or merge in a B-tree. But we do not want to waste disk space if the table +only occupies < 32 pages. That is why we apply different rules in that special +case, just ensuring that there are 3 free pages available. */ +UNIV_INTERN +ibool +fsp_reserve_free_extents( +/*=====================*/ + /* out: TRUE if we were able to make the reservation */ + ulint* n_reserved,/* out: number of extents actually reserved; if we + return TRUE and the tablespace size is < 64 pages, + then this can be 0, otherwise it is n_ext */ + ulint space, /* in: space id */ + ulint n_ext, /* in: number of extents to reserve */ + ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +This function should be used to get information on how much we still +will be able to insert new data to the database without running out the +tablespace. Only free extents are taken into account and we also subtract +the safety margin required by the above function fsp_reserve_free_extents. */ +UNIV_INTERN +ullint +fsp_get_available_space_in_free_extents( +/*====================================*/ + /* out: available space in kB */ + ulint space); /* in: space id */ +/************************************************************************** +Frees a single page of a segment. */ +UNIV_INTERN +void +fseg_free_page( +/*===========*/ + fseg_header_t* seg_header, /* in: segment header */ + ulint space, /* in: space id */ + ulint page, /* in: page offset */ + mtr_t* mtr); /* in: mtr handle */ +/*********************************************************************** +Frees a segment. The freeing is performed in several mini-transactions, +so that there is no danger of bufferfixing too many buffer pages. */ +UNIV_INTERN +void +fseg_free( +/*======*/ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no,/* in: page number where the segment header is + placed */ + ulint offset);/* in: byte offset of the segment header on that + page */ +/************************************************************************** +Frees part of a segment. This function can be used to free a segment +by repeatedly calling this function in different mini-transactions. +Doing the freeing in a single mini-transaction might result in +too big a mini-transaction. */ +UNIV_INTERN +ibool +fseg_free_step( +/*===========*/ + /* out: TRUE if freeing completed */ + fseg_header_t* header, /* in, own: segment header; NOTE: if the header + resides on the first page of the frag list + of the segment, this pointer becomes obsolete + after the last freeing step */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +Frees part of a segment. Differs from fseg_free_step because this function +leaves the header page unfreed. */ +UNIV_INTERN +ibool +fseg_free_step_not_header( +/*======================*/ + /* out: TRUE if freeing completed, except the + header page */ + fseg_header_t* header, /* in: segment header which must reside on + the first fragment page of the segment */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************************** +Checks if a page address is an extent descriptor page address. */ +UNIV_INLINE +ibool +fsp_descr_page( +/*===========*/ + /* out: TRUE if a descriptor page */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no);/* in: page number */ +/*************************************************************** +Parses a redo log record of a file page init. */ +UNIV_INTERN +byte* +fsp_parse_init_file_page( +/*=====================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr, /* in: buffer end */ + buf_block_t* block); /* in: block or NULL */ +/*********************************************************************** +Validates the file space system and its segments. */ +UNIV_INTERN +ibool +fsp_validate( +/*=========*/ + /* out: TRUE if ok */ + ulint space); /* in: space id */ +/*********************************************************************** +Prints info of a file space. */ +UNIV_INTERN +void +fsp_print( +/*======*/ + ulint space); /* in: space id */ +/*********************************************************************** +Validates a segment. */ +UNIV_INTERN +ibool +fseg_validate( +/*==========*/ + /* out: TRUE if ok */ + fseg_header_t* header, /* in: segment header */ + mtr_t* mtr2); /* in: mtr */ +#ifdef UNIV_BTR_PRINT +/*********************************************************************** +Writes info of a segment. */ +UNIV_INTERN +void +fseg_print( +/*=======*/ + fseg_header_t* header, /* in: segment header */ + mtr_t* mtr); /* in: mtr */ +#endif /* UNIV_BTR_PRINT */ + +/* Flags for fsp_reserve_free_extents */ +#define FSP_NORMAL 1000000 +#define FSP_UNDO 2000000 +#define FSP_CLEANING 3000000 + +/* Number of pages described in a single descriptor page: currently each page +description takes less than 1 byte; a descriptor page is repeated every +this many file pages */ +/* #define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE */ +/* This has been replaced with either UNIV_PAGE_SIZE or page_zip->size. */ + +/* The space low address page map */ +/*--------------------------------------*/ + /* The following two pages are repeated + every XDES_DESCRIBED_PER_PAGE pages in + every tablespace. */ +#define FSP_XDES_OFFSET 0 /* extent descriptor */ +#define FSP_IBUF_BITMAP_OFFSET 1 /* insert buffer bitmap */ + /* The ibuf bitmap pages are the ones whose + page number is the number above plus a + multiple of XDES_DESCRIBED_PER_PAGE */ + +#define FSP_FIRST_INODE_PAGE_NO 2 /* in every tablespace */ + /* The following pages exist + in the system tablespace (space 0). */ +#define FSP_IBUF_HEADER_PAGE_NO 3 /* in tablespace 0 */ +#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /* in tablespace 0 */ + /* The ibuf tree root page number in + tablespace 0; its fseg inode is on the page + number FSP_FIRST_INODE_PAGE_NO */ +#define FSP_TRX_SYS_PAGE_NO 5 /* in tablespace 0 */ +#define FSP_FIRST_RSEG_PAGE_NO 6 /* in tablespace 0 */ +#define FSP_DICT_HDR_PAGE_NO 7 /* in tablespace 0 */ +/*--------------------------------------*/ + +#ifndef UNIV_NONINL +#include "fsp0fsp.ic" +#endif + +#endif diff --git a/storage/xtradb/include/fsp0fsp.ic b/storage/xtradb/include/fsp0fsp.ic new file mode 100644 index 00000000000..f0301cc5e18 --- /dev/null +++ b/storage/xtradb/include/fsp0fsp.ic @@ -0,0 +1,44 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +File space management + +Created 12/18/1995 Heikki Tuuri +*******************************************************/ + +/*************************************************************************** +Checks if a page address is an extent descriptor page address. */ +UNIV_INLINE +ibool +fsp_descr_page( +/*===========*/ + /* out: TRUE if a descriptor page */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no)/* in: page number */ +{ + ut_ad(ut_is_2pow(zip_size)); + + if (!zip_size) { + return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1)) + == FSP_XDES_OFFSET)); + } + + return(UNIV_UNLIKELY((page_no & (zip_size - 1)) == FSP_XDES_OFFSET)); +} diff --git a/storage/xtradb/include/fut0fut.h b/storage/xtradb/include/fut0fut.h new file mode 100644 index 00000000000..4de0c97294c --- /dev/null +++ b/storage/xtradb/include/fut0fut.h @@ -0,0 +1,54 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +File-based utilities + +Created 12/13/1995 Heikki Tuuri +***********************************************************************/ + + +#ifndef fut0fut_h +#define fut0fut_h + +#include "univ.i" + +#include "fil0fil.h" +#include "mtr0mtr.h" + +/************************************************************************ +Gets a pointer to a file address and latches the page. */ +UNIV_INLINE +byte* +fut_get_ptr( +/*========*/ + /* out: pointer to a byte in a frame; the file + page in the frame is bufferfixed and latched */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + fil_addr_t addr, /* in: file address */ + ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */ + mtr_t* mtr); /* in: mtr handle */ + +#ifndef UNIV_NONINL +#include "fut0fut.ic" +#endif + +#endif + diff --git a/storage/xtradb/include/fut0fut.ic b/storage/xtradb/include/fut0fut.ic new file mode 100644 index 00000000000..f7e820da008 --- /dev/null +++ b/storage/xtradb/include/fut0fut.ic @@ -0,0 +1,55 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +File-based utilities + +Created 12/13/1995 Heikki Tuuri +***********************************************************************/ + +#include "sync0rw.h" +#include "buf0buf.h" + +/************************************************************************ +Gets a pointer to a file address and latches the page. */ +UNIV_INLINE +byte* +fut_get_ptr( +/*========*/ + /* out: pointer to a byte in a frame; the file + page in the frame is bufferfixed and latched */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + fil_addr_t addr, /* in: file address */ + ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */ + mtr_t* mtr) /* in: mtr handle */ +{ + buf_block_t* block; + byte* ptr; + + ut_ad(addr.boffset < UNIV_PAGE_SIZE); + ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); + + block = buf_page_get(space, zip_size, addr.page, rw_latch, mtr); + ptr = buf_block_get_frame(block) + addr.boffset; + + buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK); + + return(ptr); +} diff --git a/storage/xtradb/include/fut0lst.h b/storage/xtradb/include/fut0lst.h new file mode 100644 index 00000000000..f812874fe00 --- /dev/null +++ b/storage/xtradb/include/fut0lst.h @@ -0,0 +1,214 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +File-based list utilities + +Created 11/28/1995 Heikki Tuuri +***********************************************************************/ + +#ifndef fut0lst_h +#define fut0lst_h + +#include "univ.i" + +#include "fil0fil.h" +#include "mtr0mtr.h" + + +/* The C 'types' of base node and list node: these should be used to +write self-documenting code. Of course, the sizeof macro cannot be +applied to these types! */ + +typedef byte flst_base_node_t; +typedef byte flst_node_t; + +/* The physical size of a list base node in bytes */ +#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE) + +/* The physical size of a list node in bytes */ +#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) + + +/************************************************************************ +Initializes a list base node. */ +UNIV_INLINE +void +flst_init( +/*======*/ + flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Adds a node as the last node in a list. */ +UNIV_INTERN +void +flst_add_last( +/*==========*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node, /* in: node to add */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Adds a node as the first node in a list. */ +UNIV_INTERN +void +flst_add_first( +/*===========*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node, /* in: node to add */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Inserts a node after another in a list. */ +UNIV_INTERN +void +flst_insert_after( +/*==============*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node1, /* in: node to insert after */ + flst_node_t* node2, /* in: node to add */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Inserts a node before another in a list. */ +UNIV_INTERN +void +flst_insert_before( +/*===============*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node2, /* in: node to insert */ + flst_node_t* node3, /* in: node to insert before */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Removes a node. */ +UNIV_INTERN +void +flst_remove( +/*========*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node2, /* in: node to remove */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Cuts off the tail of the list, including the node given. The number of +nodes which will be removed must be provided by the caller, as this function +does not measure the length of the tail. */ +UNIV_INTERN +void +flst_cut_end( +/*=========*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node2, /* in: first node to remove */ + ulint n_nodes,/* in: number of nodes to remove, + must be >= 1 */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Cuts off the tail of the list, not including the given node. The number of +nodes which will be removed must be provided by the caller, as this function +does not measure the length of the tail. */ +UNIV_INTERN +void +flst_truncate_end( +/*==============*/ + flst_base_node_t* base, /* in: pointer to base node of list */ + flst_node_t* node2, /* in: first node not to remove */ + ulint n_nodes,/* in: number of nodes to remove */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Gets list length. */ +UNIV_INLINE +ulint +flst_get_len( +/*=========*/ + /* out: length */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Gets list first node address. */ +UNIV_INLINE +fil_addr_t +flst_get_first( +/*===========*/ + /* out: file address */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Gets list last node address. */ +UNIV_INLINE +fil_addr_t +flst_get_last( +/*==========*/ + /* out: file address */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Gets list next node address. */ +UNIV_INLINE +fil_addr_t +flst_get_next_addr( +/*===============*/ + /* out: file address */ + const flst_node_t* node, /* in: pointer to node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Gets list prev node address. */ +UNIV_INLINE +fil_addr_t +flst_get_prev_addr( +/*===============*/ + /* out: file address */ + const flst_node_t* node, /* in: pointer to node */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Writes a file address. */ +UNIV_INLINE +void +flst_write_addr( +/*============*/ + fil_faddr_t* faddr, /* in: pointer to file faddress */ + fil_addr_t addr, /* in: file address */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Reads a file address. */ +UNIV_INLINE +fil_addr_t +flst_read_addr( +/*===========*/ + /* out: file address */ + const fil_faddr_t* faddr, /* in: pointer to file faddress */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************ +Validates a file-based list. */ +UNIV_INTERN +ibool +flst_validate( +/*==========*/ + /* out: TRUE if ok */ + const flst_base_node_t* base, /* in: pointer to base node of list */ + mtr_t* mtr1); /* in: mtr */ +/************************************************************************ +Prints info of a file-based list. */ +UNIV_INTERN +void +flst_print( +/*=======*/ + const flst_base_node_t* base, /* in: pointer to base node of list */ + mtr_t* mtr); /* in: mtr */ + + +#ifndef UNIV_NONINL +#include "fut0lst.ic" +#endif + +#endif diff --git a/storage/xtradb/include/fut0lst.ic b/storage/xtradb/include/fut0lst.ic new file mode 100644 index 00000000000..5899e996059 --- /dev/null +++ b/storage/xtradb/include/fut0lst.ic @@ -0,0 +1,166 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +File-based list utilities + +Created 11/28/1995 Heikki Tuuri +***********************************************************************/ + +#include "fut0fut.h" +#include "mtr0log.h" +#include "buf0buf.h" + +/* We define the field offsets of a node for the list */ +#define FLST_PREV 0 /* 6-byte address of the previous list element; + the page part of address is FIL_NULL, if no + previous element */ +#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next + list element; the page part of address + is FIL_NULL, if no next element */ + +/* We define the field offsets of a base node for the list */ +#define FLST_LEN 0 /* 32-bit list length field */ +#define FLST_FIRST 4 /* 6-byte address of the first element + of the list; undefined if empty list */ +#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the + last element of the list; undefined + if empty list */ + +/************************************************************************ +Writes a file address. */ +UNIV_INLINE +void +flst_write_addr( +/*============*/ + fil_faddr_t* faddr, /* in: pointer to file faddress */ + fil_addr_t addr, /* in: file address */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ut_ad(faddr && mtr); + ut_ad(mtr_memo_contains_page(mtr, faddr, MTR_MEMO_PAGE_X_FIX)); + ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA); + ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA); + + mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr); + mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset, + MLOG_2BYTES, mtr); +} + +/************************************************************************ +Reads a file address. */ +UNIV_INLINE +fil_addr_t +flst_read_addr( +/*===========*/ + /* out: file address */ + const fil_faddr_t* faddr, /* in: pointer to file faddress */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + fil_addr_t addr; + + ut_ad(faddr && mtr); + + addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr); + addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES, + mtr); + ut_a(addr.page == FIL_NULL || addr.boffset >= FIL_PAGE_DATA); + ut_a(ut_align_offset(faddr, UNIV_PAGE_SIZE) >= FIL_PAGE_DATA); + return(addr); +} + +/************************************************************************ +Initializes a list base node. */ +UNIV_INLINE +void +flst_init( +/*======*/ + flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ut_ad(mtr_memo_contains_page(mtr, base, MTR_MEMO_PAGE_X_FIX)); + + mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); + flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); + flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); +} + +/************************************************************************ +Gets list length. */ +UNIV_INLINE +ulint +flst_get_len( +/*=========*/ + /* out: length */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr)); +} + +/************************************************************************ +Gets list first node address. */ +UNIV_INLINE +fil_addr_t +flst_get_first( +/*===========*/ + /* out: file address */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + return(flst_read_addr(base + FLST_FIRST, mtr)); +} + +/************************************************************************ +Gets list last node address. */ +UNIV_INLINE +fil_addr_t +flst_get_last( +/*==========*/ + /* out: file address */ + const flst_base_node_t* base, /* in: pointer to base node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + return(flst_read_addr(base + FLST_LAST, mtr)); +} + +/************************************************************************ +Gets list next node address. */ +UNIV_INLINE +fil_addr_t +flst_get_next_addr( +/*===============*/ + /* out: file address */ + const flst_node_t* node, /* in: pointer to node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + return(flst_read_addr(node + FLST_NEXT, mtr)); +} + +/************************************************************************ +Gets list prev node address. */ +UNIV_INLINE +fil_addr_t +flst_get_prev_addr( +/*===============*/ + /* out: file address */ + const flst_node_t* node, /* in: pointer to node */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + return(flst_read_addr(node + FLST_PREV, mtr)); +} diff --git a/storage/xtradb/include/ha0ha.h b/storage/xtradb/include/ha0ha.h new file mode 100644 index 00000000000..768f3d7aca3 --- /dev/null +++ b/storage/xtradb/include/ha0ha.h @@ -0,0 +1,188 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The hash table with external chains + +Created 8/18/1994 Heikki Tuuri +*******************************************************/ + +#ifndef ha0ha_h +#define ha0ha_h + +#include "univ.i" + +#include "hash0hash.h" +#include "page0types.h" +#include "buf0types.h" + +/***************************************************************** +Looks for an element in a hash table. */ +UNIV_INLINE +void* +ha_search_and_get_data( +/*===================*/ + /* out: pointer to the data of the first hash + table node in chain having the fold number, + NULL if not found */ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: folded value of the searched data */ +/************************************************************* +Looks for an element when we know the pointer to the data and updates +the pointer to data if found. */ +UNIV_INTERN +void +ha_search_and_update_if_found_func( +/*===============================*/ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of the searched data */ + void* data, /* in: pointer to the data */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + buf_block_t* new_block,/* in: block containing new_data */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + void* new_data);/* in: new pointer to the data */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG +# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \ + ha_search_and_update_if_found_func(table,fold,data,new_block,new_data) +#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ +# define ha_search_and_update_if_found(table,fold,data,new_block,new_data) \ + ha_search_and_update_if_found_func(table,fold,data,new_data) +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ +/***************************************************************** +Creates a hash table with >= n array cells. The actual number of cells is +chosen to be a prime number slightly bigger than n. */ +UNIV_INTERN +hash_table_t* +ha_create_func( +/*===========*/ + /* out, own: created table */ + ulint n, /* in: number of array cells */ +#ifdef UNIV_SYNC_DEBUG + ulint mutex_level, /* in: level of the mutexes in the latching + order: this is used in the debug version */ +#endif /* UNIV_SYNC_DEBUG */ + ulint n_mutexes); /* in: number of mutexes to protect the + hash table: must be a power of 2 */ +#ifdef UNIV_SYNC_DEBUG +# define ha_create(n_c,n_m,level) ha_create_func(n_c,level,n_m) +#else /* UNIV_SYNC_DEBUG */ +# define ha_create(n_c,n_m,level) ha_create_func(n_c,n_m) +#endif /* UNIV_SYNC_DEBUG */ + +/***************************************************************** +Empties a hash table and frees the memory heaps. */ +UNIV_INTERN +void +ha_clear( +/*=====*/ + hash_table_t* table); /* in, own: hash table */ + +/***************************************************************** +Inserts an entry into a hash table. If an entry with the same fold number +is found, its node is updated to point to the new data, and no new node +is inserted. */ +UNIV_INTERN +ibool +ha_insert_for_fold_func( +/*====================*/ + /* out: TRUE if succeed, FALSE if no more + memory could be allocated */ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of data; if a node with + the same fold value already exists, it is + updated to point to the same data, and no new + node is created! */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + buf_block_t* block, /* in: buffer block containing the data */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + void* data); /* in: data, must not be NULL */ + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG +# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,b,d) +#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ +# define ha_insert_for_fold(t,f,b,d) ha_insert_for_fold_func(t,f,d) +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + +/***************************************************************** +Deletes an entry from a hash table. */ +UNIV_INTERN +void +ha_delete( +/*======*/ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of data */ + void* data); /* in: data, must not be NULL and must exist + in the hash table */ +/************************************************************* +Looks for an element when we know the pointer to the data and deletes +it from the hash table if found. */ +UNIV_INLINE +ibool +ha_search_and_delete_if_found( +/*==========================*/ + /* out: TRUE if found */ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of the searched data */ + void* data); /* in: pointer to the data */ +/********************************************************************* +Removes from the chain determined by fold all nodes whose data pointer +points to the page given. */ +UNIV_INTERN +void +ha_remove_all_nodes_to_page( +/*========================*/ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: fold value */ + const page_t* page); /* in: buffer page */ +/***************************************************************** +Validates a given range of the cells in hash table. */ +UNIV_INTERN +ibool +ha_validate( +/*========*/ + /* out: TRUE if ok */ + hash_table_t* table, /* in: hash table */ + ulint start_index, /* in: start index */ + ulint end_index); /* in: end index */ +/***************************************************************** +Prints info of a hash table. */ +UNIV_INTERN +void +ha_print_info( +/*==========*/ + FILE* file, /* in: file where to print */ + hash_table_t* table); /* in: hash table */ + +/* The hash table external chain node */ + +typedef struct ha_node_struct ha_node_t; +struct ha_node_struct { + ha_node_t* next; /* next chain node or NULL if none */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + buf_block_t* block; /* buffer block containing the data, or NULL */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + void* data; /* pointer to the data */ + ulint fold; /* fold value for the data */ +}; + +#ifndef UNIV_NONINL +#include "ha0ha.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ha0ha.ic b/storage/xtradb/include/ha0ha.ic new file mode 100644 index 00000000000..35fd802eaef --- /dev/null +++ b/storage/xtradb/include/ha0ha.ic @@ -0,0 +1,214 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The hash table with external chains + +Created 8/18/1994 Heikki Tuuri +*************************************************************************/ + +#include "ut0rnd.h" +#include "mem0mem.h" + +/*************************************************************** +Deletes a hash node. */ +UNIV_INTERN +void +ha_delete_hash_node( +/*================*/ + hash_table_t* table, /* in: hash table */ + ha_node_t* del_node); /* in: node to be deleted */ + +/********************************************************************** +Gets a hash node data. */ +UNIV_INLINE +void* +ha_node_get_data( +/*=============*/ + /* out: pointer to the data */ + ha_node_t* node) /* in: hash chain node */ +{ + return(node->data); +} + +/********************************************************************** +Sets hash node data. */ +UNIV_INLINE +void +ha_node_set_data_func( +/*==================*/ + ha_node_t* node, /* in: hash chain node */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + buf_block_t* block, /* in: buffer block containing the data */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + void* data) /* in: pointer to the data */ +{ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + node->block = block; +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + node->data = data; +} + +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG +# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,b,d) +#else /* UNIV_AHI_DEBUG || UNIV_DEBUG */ +# define ha_node_set_data(n,b,d) ha_node_set_data_func(n,d) +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + +/********************************************************************** +Gets the next node in a hash chain. */ +UNIV_INLINE +ha_node_t* +ha_chain_get_next( +/*==============*/ + /* out: next node, NULL if none */ + ha_node_t* node) /* in: hash chain node */ +{ + return(node->next); +} + +/********************************************************************** +Gets the first node in a hash chain. */ +UNIV_INLINE +ha_node_t* +ha_chain_get_first( +/*===============*/ + /* out: first node, NULL if none */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: fold value determining the chain */ +{ + return((ha_node_t*) + hash_get_nth_cell(table, hash_calc_hash(fold, table))->node); +} + +/***************************************************************** +Looks for an element in a hash table. */ +UNIV_INLINE +ha_node_t* +ha_search( +/*======*/ + /* out: pointer to the first hash table node + in chain having the fold number, NULL if not + found */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: folded value of the searched data */ +{ + ha_node_t* node; + + ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); + + node = ha_chain_get_first(table, fold); + + while (node) { + if (node->fold == fold) { + + return(node); + } + + node = ha_chain_get_next(node); + } + + return(NULL); +} + +/***************************************************************** +Looks for an element in a hash table. */ +UNIV_INLINE +void* +ha_search_and_get_data( +/*===================*/ + /* out: pointer to the data of the first hash + table node in chain having the fold number, + NULL if not found */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: folded value of the searched data */ +{ + ha_node_t* node; + + ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); + + node = ha_chain_get_first(table, fold); + + while (node) { + if (node->fold == fold) { + + return(node->data); + } + + node = ha_chain_get_next(node); + } + + return(NULL); +} + +/************************************************************* +Looks for an element when we know the pointer to the data. */ +UNIV_INLINE +ha_node_t* +ha_search_with_data( +/*================*/ + /* out: pointer to the hash table node, NULL + if not found in the table */ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of the searched data */ + void* data) /* in: pointer to the data */ +{ + ha_node_t* node; + + ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); + + node = ha_chain_get_first(table, fold); + + while (node) { + if (node->data == data) { + + return(node); + } + + node = ha_chain_get_next(node); + } + + return(NULL); +} + +/************************************************************* +Looks for an element when we know the pointer to the data, and deletes +it from the hash table, if found. */ +UNIV_INLINE +ibool +ha_search_and_delete_if_found( +/*==========================*/ + /* out: TRUE if found */ + hash_table_t* table, /* in: hash table */ + ulint fold, /* in: folded value of the searched data */ + void* data) /* in: pointer to the data */ +{ + ha_node_t* node; + + ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); + + node = ha_search_with_data(table, fold, data); + + if (node) { + ha_delete_hash_node(table, node); + + return(TRUE); + } + + return(FALSE); +} diff --git a/storage/xtradb/include/ha0storage.h b/storage/xtradb/include/ha0storage.h new file mode 100644 index 00000000000..f5a3938f434 --- /dev/null +++ b/storage/xtradb/include/ha0storage.h @@ -0,0 +1,129 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Hash storage. +Provides a data structure that stores chunks of data in +its own storage, avoiding duplicates. + +Created September 22, 2007 Vasil Dimov +*******************************************************/ + +#ifndef ha0storage_h +#define ha0storage_h + +#include "univ.i" + +/* This value is used by default by ha_storage_create(). More memory +is allocated later when/if it is needed. */ +#define HA_STORAGE_DEFAULT_HEAP_BYTES 1024 + +/* This value is used by default by ha_storage_create(). It is a +constant per ha_storage's lifetime. */ +#define HA_STORAGE_DEFAULT_HASH_CELLS 4096 + +typedef struct ha_storage_struct ha_storage_t; + +/*********************************************************************** +Creates a hash storage. If any of the parameters is 0, then a default +value is used. */ +UNIV_INLINE +ha_storage_t* +ha_storage_create( +/*==============*/ + /* out, own: hash storage */ + ulint initial_heap_bytes, /* in: initial heap's size */ + ulint initial_hash_cells); /* in: initial number of cells + in the hash table */ + +/*********************************************************************** +Copies data into the storage and returns a pointer to the copy. If the +same data chunk is already present, then pointer to it is returned. +Data chunks are considered to be equal if len1 == len2 and +memcmp(data1, data2, len1) == 0. If "data" is not present (and thus +data_len bytes need to be allocated) and the size of storage is going to +become more than "memlim" then "data" is not added and NULL is returned. +To disable this behavior "memlim" can be set to 0, which stands for +"no limit". */ + +const void* +ha_storage_put_memlim( +/*==================*/ + /* out: pointer to the copy */ + ha_storage_t* storage, /* in/out: hash storage */ + const void* data, /* in: data to store */ + ulint data_len, /* in: data length */ + ulint memlim); /* in: memory limit to obey */ + +/*********************************************************************** +Same as ha_storage_put_memlim() but without memory limit. */ + +#define ha_storage_put(storage, data, data_len) \ + ha_storage_put_memlim((storage), (data), (data_len), 0) + +/*********************************************************************** +Copies string into the storage and returns a pointer to the copy. If the +same string is already present, then pointer to it is returned. +Strings are considered to be equal if strcmp(str1, str2) == 0. */ + +#define ha_storage_put_str(storage, str) \ + ((const char*) ha_storage_put((storage), (str), strlen(str) + 1)) + +/*********************************************************************** +Copies string into the storage and returns a pointer to the copy obeying +a memory limit. */ + +#define ha_storage_put_str_memlim(storage, str, memlim) \ + ((const char*) ha_storage_put_memlim((storage), (str), \ + strlen(str) + 1, (memlim))) + +/*********************************************************************** +Empties a hash storage, freeing memory occupied by data chunks. +This invalidates any pointers previously returned by ha_storage_put(). +The hash storage is not invalidated itself and can be used again. */ +UNIV_INLINE +void +ha_storage_empty( +/*=============*/ + ha_storage_t** storage); /* in/out: hash storage */ + +/*********************************************************************** +Frees a hash storage and everything it contains, it cannot be used after +this call. +This invalidates any pointers previously returned by ha_storage_put(). +*/ +UNIV_INLINE +void +ha_storage_free( +/*============*/ + ha_storage_t* storage); /* in/out: hash storage */ + +/*********************************************************************** +Gets the size of the memory used by a storage. */ +UNIV_INLINE +ulint +ha_storage_get_size( +/*================*/ + /* out: bytes used */ + const ha_storage_t* storage); /* in: hash storage */ + +#ifndef UNIV_NONINL +#include "ha0storage.ic" +#endif + +#endif /* ha0storage_h */ diff --git a/storage/xtradb/include/ha0storage.ic b/storage/xtradb/include/ha0storage.ic new file mode 100644 index 00000000000..7ab43bc00ba --- /dev/null +++ b/storage/xtradb/include/ha0storage.ic @@ -0,0 +1,146 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Hash storage. +Provides a data structure that stores chunks of data in +its own storage, avoiding duplicates. + +Created September 24, 2007 Vasil Dimov +*******************************************************/ + +#include "univ.i" +#include "ha0storage.h" +#include "hash0hash.h" +#include "mem0mem.h" + +struct ha_storage_struct { + mem_heap_t* heap; /* storage from which memory is + allocated */ + hash_table_t* hash; /* hash table used to avoid + duplicates */ +}; + +/* Objects of this type are put in the hash */ +typedef struct ha_storage_node_struct ha_storage_node_t; +struct ha_storage_node_struct { + ulint data_len;/* length of the data */ + const void* data; /* pointer to data */ + ha_storage_node_t* next; /* next node in hash chain */ +}; + +/*********************************************************************** +Creates a hash storage. If any of the parameters is 0, then a default +value is used. */ +UNIV_INLINE +ha_storage_t* +ha_storage_create( +/*==============*/ + /* out, own: hash storage */ + ulint initial_heap_bytes, /* in: initial heap's size */ + ulint initial_hash_cells) /* in: initial number of cells + in the hash table */ +{ + ha_storage_t* storage; + mem_heap_t* heap; + + if (initial_heap_bytes == 0) { + + initial_heap_bytes = HA_STORAGE_DEFAULT_HEAP_BYTES; + } + + if (initial_hash_cells == 0) { + + initial_hash_cells = HA_STORAGE_DEFAULT_HASH_CELLS; + } + + /* we put "storage" within "storage->heap" */ + + heap = mem_heap_create(sizeof(ha_storage_t) + + initial_heap_bytes); + + storage = (ha_storage_t*) mem_heap_alloc(heap, + sizeof(ha_storage_t)); + + storage->heap = heap; + storage->hash = hash_create(initial_hash_cells); + + return(storage); +} + +/*********************************************************************** +Empties a hash storage, freeing memory occupied by data chunks. +This invalidates any pointers previously returned by ha_storage_put(). +The hash storage is not invalidated itself and can be used again. */ +UNIV_INLINE +void +ha_storage_empty( +/*=============*/ + ha_storage_t** storage) /* in/out: hash storage */ +{ + ha_storage_t temp_storage; + + temp_storage.heap = (*storage)->heap; + temp_storage.hash = (*storage)->hash; + + hash_table_clear(temp_storage.hash); + mem_heap_empty(temp_storage.heap); + + *storage = (ha_storage_t*) mem_heap_alloc(temp_storage.heap, + sizeof(ha_storage_t)); + + (*storage)->heap = temp_storage.heap; + (*storage)->hash = temp_storage.hash; +} + +/*********************************************************************** +Frees a hash storage and everything it contains, it cannot be used after +this call. +This invalidates any pointers previously returned by ha_storage_put(). +*/ +UNIV_INLINE +void +ha_storage_free( +/*============*/ + ha_storage_t* storage) /* in/out: hash storage */ +{ + /* order is important because the pointer storage->hash is + within the heap */ + hash_table_free(storage->hash); + mem_heap_free(storage->heap); +} + +/*********************************************************************** +Gets the size of the memory used by a storage. */ +UNIV_INLINE +ulint +ha_storage_get_size( +/*================*/ + /* out: bytes used */ + const ha_storage_t* storage) /* in: hash storage */ +{ + ulint ret; + + ret = mem_heap_get_size(storage->heap); + + /* this assumes hash->heap and hash->heaps are NULL */ + ret += sizeof(hash_table_t); + ret += sizeof(hash_cell_t) * hash_get_n_cells(storage->hash); + + return(ret); +} diff --git a/storage/xtradb/include/ha_prototypes.h b/storage/xtradb/include/ha_prototypes.h new file mode 100644 index 00000000000..116242b32e4 --- /dev/null +++ b/storage/xtradb/include/ha_prototypes.h @@ -0,0 +1,249 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +#ifndef HA_INNODB_PROTOTYPES_H +#define HA_INNODB_PROTOTYPES_H + +#ifndef UNIV_HOTBACKUP + +#include "univ.i" /* ulint, uint */ +#include "m_ctype.h" /* CHARSET_INFO */ + +/* Prototypes for global functions in ha_innodb.cc that are called by +InnoDB's C-code. */ + +/************************************************************************* +Wrapper around MySQL's copy_and_convert function, see it for +documentation. */ +UNIV_INTERN +ulint +innobase_convert_string( +/*====================*/ + void* to, + ulint to_length, + CHARSET_INFO* to_cs, + const void* from, + ulint from_length, + CHARSET_INFO* from_cs, + uint* errors); + +/*********************************************************************** +Formats the raw data in "data" (in InnoDB on-disk format) that is of +type DATA_(CHAR|VARCHAR|MYSQL|VARMYSQL) using "charset_coll" and writes +the result to "buf". The result is converted to "system_charset_info". +Not more than "buf_size" bytes are written to "buf". +The result is always '\0'-terminated (provided buf_size > 0) and the +number of bytes that were written to "buf" is returned (including the +terminating '\0'). */ +UNIV_INTERN +ulint +innobase_raw_format( +/*================*/ + /* out: number of bytes + that were written */ + const char* data, /* in: raw data */ + ulint data_len, /* in: raw data length + in bytes */ + ulint charset_coll, /* in: charset collation */ + char* buf, /* out: output buffer */ + ulint buf_size); /* in: output buffer size + in bytes */ + +/********************************************************************* +Convert a table or index name to the MySQL system_charset_info (UTF-8) +and quote it if needed. */ +UNIV_INTERN +char* +innobase_convert_name( +/*==================*/ + /* out: pointer to the end of buf */ + char* buf, /* out: buffer for converted identifier */ + ulint buflen, /* in: length of buf, in bytes */ + const char* id, /* in: identifier to convert */ + ulint idlen, /* in: length of id, in bytes */ + void* thd, /* in: MySQL connection thread, or NULL */ + ibool table_id);/* in: TRUE=id is a table or database name; + FALSE=id is an index name */ + +/********************************************************************** +Returns true if the thread is the replication thread on the slave +server. Used in srv_conc_enter_innodb() to determine if the thread +should be allowed to enter InnoDB - the replication thread is treated +differently than other threads. Also used in +srv_conc_force_exit_innodb(). */ +UNIV_INTERN +ibool +thd_is_replication_slave_thread( +/*============================*/ + /* out: true if thd is the replication thread */ + void* thd); /* in: thread handle (THD*) */ + +/********************************************************************** +Returns true if the transaction this thread is processing has edited +non-transactional tables. Used by the deadlock detector when deciding +which transaction to rollback in case of a deadlock - we try to avoid +rolling back transactions that have edited non-transactional tables. */ +UNIV_INTERN +ibool +thd_has_edited_nontrans_tables( +/*===========================*/ + /* out: true if non-transactional tables have + been edited */ + void* thd); /* in: thread handle (THD*) */ + +/***************************************************************** +Prints info of a THD object (== user session thread) to the given file. */ +UNIV_INTERN +void +innobase_mysql_print_thd( +/*=====================*/ + FILE* f, /* in: output stream */ + void* thd, /* in: pointer to a MySQL THD object */ + uint max_query_len); /* in: max query length to print, or 0 to + use the default max length */ + +/****************************************************************** +Converts a MySQL type to an InnoDB type. Note that this function returns +the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 +VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */ +UNIV_INTERN +ulint +get_innobase_type_from_mysql_type( +/*==============================*/ + /* out: DATA_BINARY, + DATA_VARCHAR, ... */ + ulint* unsigned_flag, /* out: DATA_UNSIGNED if an + 'unsigned type'; + at least ENUM and SET, + and unsigned integer + types are 'unsigned types' */ + const void* field) /* in: MySQL Field */ + __attribute__((nonnull)); + +/***************************************************************** +If you want to print a thd that is not associated with the current thread, +you must call this function before reserving the InnoDB kernel_mutex, to +protect MySQL from setting thd->query NULL. If you print a thd of the current +thread, we know that MySQL cannot modify thd->query, and it is not necessary +to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release +the kernel_mutex. */ +UNIV_INTERN +void +innobase_mysql_prepare_print_arbitrary_thd(void); +/*============================================*/ + +/***************************************************************** +Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd(). +In the InnoDB latching order, the mutex sits right above the +kernel_mutex. In debug builds, we assert that the kernel_mutex is +released before this function is invoked. */ +UNIV_INTERN +void +innobase_mysql_end_print_arbitrary_thd(void); +/*========================================*/ + +/********************************************************************** +Get the variable length bounds of the given character set. */ +UNIV_INTERN +void +innobase_get_cset_width( +/*====================*/ + ulint cset, /* in: MySQL charset-collation code */ + ulint* mbminlen, /* out: minimum length of a char (in bytes) */ + ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */ + +/********************************************************************** +Compares NUL-terminated UTF-8 strings case insensitively. */ +UNIV_INTERN +int +innobase_strcasecmp( +/*================*/ + /* out: 0 if a=b, <0 if a<b, >1 if a>b */ + const char* a, /* in: first string to compare */ + const char* b); /* in: second string to compare */ + +/********************************************************************** +Returns true if the thread is executing a SELECT statement. */ + +ibool +thd_is_select( +/*==========*/ + /* out: true if thd is executing SELECT */ + const void* thd); /* in: thread handle (THD*) */ + +/********************************************************************** +Converts an identifier to a table name. */ +UNIV_INTERN +void +innobase_convert_from_table_id( +/*===========================*/ + struct charset_info_st* cs, /* in: the 'from' character set */ + char* to, /* out: converted identifier */ + const char* from, /* in: identifier to convert */ + ulint len); /* in: length of 'to', in bytes; should + be at least 5 * strlen(to) + 1 */ +/********************************************************************** +Converts an identifier to UTF-8. */ +UNIV_INTERN +void +innobase_convert_from_id( +/*=====================*/ + struct charset_info_st* cs, /* in: the 'from' character set */ + char* to, /* out: converted identifier */ + const char* from, /* in: identifier to convert */ + ulint len); /* in: length of 'to', in bytes; should + be at least 3 * strlen(to) + 1 */ +/********************************************************************** +Makes all characters in a NUL-terminated UTF-8 string lower case. */ +UNIV_INTERN +void +innobase_casedn_str( +/*================*/ + char* a); /* in/out: string to put in lower case */ + +/************************************************************************** +Determines the connection character set. */ +struct charset_info_st* +innobase_get_charset( +/*=================*/ + /* out: connection character set */ + void* mysql_thd); /* in: MySQL thread handle */ + +/********************************************************************** +Returns true if the thread supports XA, +global value of innodb_supports_xa if thd is NULL. */ + +ibool +thd_supports_xa( +/*============*/ + /* out: true if thd supports XA */ + void* thd); /* in: thread handle (THD*), or NULL to query + the global innodb_supports_xa */ + +/********************************************************************** +Returns the lock wait timeout for the current connection. */ + +ulong +thd_lock_wait_timeout( +/*==================*/ + /* out: the lock wait timeout, in seconds */ + void* thd); /* in: thread handle (THD*), or NULL to query + the global innodb_lock_wait_timeout */ + +#endif +#endif diff --git a/storage/xtradb/include/handler0alter.h b/storage/xtradb/include/handler0alter.h new file mode 100644 index 00000000000..69488b67b2b --- /dev/null +++ b/storage/xtradb/include/handler0alter.h @@ -0,0 +1,41 @@ +/***************************************************************************** + +Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Smart ALTER TABLE +*******************************************************/ + +/***************************************************************** +Copies an InnoDB record to table->record[0]. */ +UNIV_INTERN +void +innobase_rec_to_mysql( +/*==================*/ + TABLE* table, /* in/out: MySQL table */ + const rec_t* rec, /* in: record */ + const dict_index_t* index, /* in: index */ + const ulint* offsets); /* in: rec_get_offsets( + rec, index, ...) */ + +/***************************************************************** +Resets table->record[0]. */ +UNIV_INTERN +void +innobase_rec_reset( +/*===============*/ + TABLE* table); /* in/out: MySQL table */ diff --git a/storage/xtradb/include/hash0hash.h b/storage/xtradb/include/hash0hash.h new file mode 100644 index 00000000000..2b3eea62754 --- /dev/null +++ b/storage/xtradb/include/hash0hash.h @@ -0,0 +1,429 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The simple hash table utility + +Created 5/20/1997 Heikki Tuuri +*******************************************************/ + +#ifndef hash0hash_h +#define hash0hash_h + +#include "univ.i" +#include "mem0mem.h" +#include "sync0sync.h" + +typedef struct hash_table_struct hash_table_t; +typedef struct hash_cell_struct hash_cell_t; + +typedef void* hash_node_t; + +/* Fix Bug #13859: symbol collision between imap/mysql */ +#define hash_create hash0_create + +/***************************************************************** +Creates a hash table with >= n array cells. The actual number +of cells is chosen to be a prime number slightly bigger than n. */ +UNIV_INTERN +hash_table_t* +hash_create( +/*========*/ + /* out, own: created table */ + ulint n); /* in: number of array cells */ +/***************************************************************** +Creates a mutex array to protect a hash table. */ +UNIV_INTERN +void +hash_create_mutexes_func( +/*=====================*/ + hash_table_t* table, /* in: hash table */ +#ifdef UNIV_SYNC_DEBUG + ulint sync_level, /* in: latching order level of the + mutexes: used in the debug version */ +#endif /* UNIV_SYNC_DEBUG */ + ulint n_mutexes); /* in: number of mutexes */ +#ifdef UNIV_SYNC_DEBUG +# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n) +#else /* UNIV_SYNC_DEBUG */ +# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n) +#endif /* UNIV_SYNC_DEBUG */ + +/***************************************************************** +Frees a hash table. */ +UNIV_INTERN +void +hash_table_free( +/*============*/ + hash_table_t* table); /* in, own: hash table */ +/****************************************************************** +Calculates the hash value from a folded value. */ +UNIV_INLINE +ulint +hash_calc_hash( +/*===========*/ + /* out: hashed value */ + ulint fold, /* in: folded value */ + hash_table_t* table); /* in: hash table */ +/************************************************************************ +Assert that the mutex for the table in a hash operation is owned. */ +#define HASH_ASSERT_OWNED(TABLE, FOLD) \ +ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD))); + +/*********************************************************************** +Inserts a struct to a hash table. */ + +#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\ +do {\ + hash_cell_t* cell3333;\ + TYPE* struct3333;\ +\ + HASH_ASSERT_OWNED(TABLE, FOLD)\ +\ + (DATA)->NAME = NULL;\ +\ + cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ +\ + if (cell3333->node == NULL) {\ + cell3333->node = DATA;\ + } else {\ + struct3333 = (TYPE*) cell3333->node;\ +\ + while (struct3333->NAME != NULL) {\ +\ + struct3333 = (TYPE*) struct3333->NAME;\ + }\ +\ + struct3333->NAME = DATA;\ + }\ +} while (0) + +#ifdef UNIV_HASH_DEBUG +# define HASH_ASSERT_VALID(DATA) ut_a((void*) (DATA) != (void*) -1) +# define HASH_INVALIDATE(DATA, NAME) DATA->NAME = (void*) -1 +#else +# define HASH_ASSERT_VALID(DATA) do {} while (0) +# define HASH_INVALIDATE(DATA, NAME) do {} while (0) +#endif + +/*********************************************************************** +Deletes a struct from a hash table. */ + +#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\ +do {\ + hash_cell_t* cell3333;\ + TYPE* struct3333;\ +\ + HASH_ASSERT_OWNED(TABLE, FOLD)\ +\ + cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ +\ + if (cell3333->node == DATA) {\ + HASH_ASSERT_VALID(DATA->NAME);\ + cell3333->node = DATA->NAME;\ + } else {\ + struct3333 = (TYPE*) cell3333->node;\ +\ + while (struct3333->NAME != DATA) {\ +\ + struct3333 = (TYPE*) struct3333->NAME;\ + ut_a(struct3333);\ + }\ +\ + struct3333->NAME = DATA->NAME;\ + }\ + HASH_INVALIDATE(DATA, NAME);\ +} while (0) + +/*********************************************************************** +Gets the first struct in a hash chain, NULL if none. */ + +#define HASH_GET_FIRST(TABLE, HASH_VAL)\ + (hash_get_nth_cell(TABLE, HASH_VAL)->node) + +/*********************************************************************** +Gets the next struct in a hash chain, NULL if none. */ + +#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME) + +/************************************************************************ +Looks for a struct in a hash table. */ +#define HASH_SEARCH(NAME, TABLE, FOLD, TYPE, DATA, ASSERTION, TEST)\ +{\ +\ + HASH_ASSERT_OWNED(TABLE, FOLD)\ +\ + (DATA) = (TYPE) HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ + HASH_ASSERT_VALID(DATA);\ +\ + while ((DATA) != NULL) {\ + ASSERTION;\ + if (TEST) {\ + break;\ + } else {\ + HASH_ASSERT_VALID(HASH_GET_NEXT(NAME, DATA));\ + (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA);\ + }\ + }\ +} + +/************************************************************************ +Looks for an item in all hash buckets. */ +#define HASH_SEARCH_ALL(NAME, TABLE, TYPE, DATA, ASSERTION, TEST) \ +do { \ + ulint i3333; \ + \ + for (i3333 = (TABLE)->n_cells; i3333--; ) { \ + (DATA) = (TYPE) HASH_GET_FIRST(TABLE, i3333); \ + \ + while ((DATA) != NULL) { \ + HASH_ASSERT_VALID(DATA); \ + ASSERTION; \ + \ + if (TEST) { \ + break; \ + } \ + \ + (DATA) = (TYPE) HASH_GET_NEXT(NAME, DATA); \ + } \ + \ + if ((DATA) != NULL) { \ + break; \ + } \ + } \ +} while (0) + +/**************************************************************** +Gets the nth cell in a hash table. */ +UNIV_INLINE +hash_cell_t* +hash_get_nth_cell( +/*==============*/ + /* out: pointer to cell */ + hash_table_t* table, /* in: hash table */ + ulint n); /* in: cell index */ + +/***************************************************************** +Clears a hash table so that all the cells become empty. */ +UNIV_INLINE +void +hash_table_clear( +/*=============*/ + hash_table_t* table); /* in/out: hash table */ + +/***************************************************************** +Returns the number of cells in a hash table. */ +UNIV_INLINE +ulint +hash_get_n_cells( +/*=============*/ + /* out: number of cells */ + hash_table_t* table); /* in: table */ +/*********************************************************************** +Deletes a struct which is stored in the heap of the hash table, and compacts +the heap. The fold value must be stored in the struct NODE in a field named +'fold'. */ + +#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\ +do {\ + TYPE* node111;\ + TYPE* top_node111;\ + hash_cell_t* cell111;\ + ulint fold111;\ +\ + fold111 = (NODE)->fold;\ +\ + HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\ +\ + top_node111 = (TYPE*)mem_heap_get_top(\ + hash_get_heap(TABLE, fold111),\ + sizeof(TYPE));\ +\ + /* If the node to remove is not the top node in the heap, compact the\ + heap of nodes by moving the top node in the place of NODE. */\ +\ + if (NODE != top_node111) {\ +\ + /* Copy the top node in place of NODE */\ +\ + *(NODE) = *top_node111;\ +\ + cell111 = hash_get_nth_cell(TABLE,\ + hash_calc_hash(top_node111->fold, TABLE));\ +\ + /* Look for the pointer to the top node, to update it */\ +\ + if (cell111->node == top_node111) {\ + /* The top node is the first in the chain */\ +\ + cell111->node = NODE;\ + } else {\ + /* We have to look for the predecessor of the top\ + node */\ + node111 = cell111->node;\ +\ + while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\ +\ + node111 = HASH_GET_NEXT(NAME, node111);\ + }\ +\ + /* Now we have the predecessor node */\ +\ + node111->NAME = NODE;\ + }\ + }\ +\ + /* Free the space occupied by the top node */\ +\ + mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\ +} while (0) + +/******************************************************************** +Move all hash table entries from OLD_TABLE to NEW_TABLE.*/ + +#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \ +do {\ + ulint i2222;\ + ulint cell_count2222;\ +\ + cell_count2222 = hash_get_n_cells(OLD_TABLE);\ +\ + for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ + NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\ +\ + while (node2222) {\ + NODE_TYPE* next2222 = node2222->PTR_NAME;\ + ulint fold2222 = FOLD_FUNC(node2222);\ +\ + HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\ + fold2222, node2222);\ +\ + node2222 = next2222;\ + }\ + }\ +} while (0) + + +/**************************************************************** +Gets the mutex index for a fold value in a hash table. */ +UNIV_INLINE +ulint +hash_get_mutex_no( +/*==============*/ + /* out: mutex number */ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: fold */ +/**************************************************************** +Gets the nth heap in a hash table. */ +UNIV_INLINE +mem_heap_t* +hash_get_nth_heap( +/*==============*/ + /* out: mem heap */ + hash_table_t* table, /* in: hash table */ + ulint i); /* in: index of the heap */ +/**************************************************************** +Gets the heap for a fold value in a hash table. */ +UNIV_INLINE +mem_heap_t* +hash_get_heap( +/*==========*/ + /* out: mem heap */ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: fold */ +/**************************************************************** +Gets the nth mutex in a hash table. */ +UNIV_INLINE +mutex_t* +hash_get_nth_mutex( +/*===============*/ + /* out: mutex */ + hash_table_t* table, /* in: hash table */ + ulint i); /* in: index of the mutex */ +/**************************************************************** +Gets the mutex for a fold value in a hash table. */ +UNIV_INLINE +mutex_t* +hash_get_mutex( +/*===========*/ + /* out: mutex */ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: fold */ +/**************************************************************** +Reserves the mutex for a fold value in a hash table. */ +UNIV_INTERN +void +hash_mutex_enter( +/*=============*/ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: fold */ +/**************************************************************** +Releases the mutex for a fold value in a hash table. */ +UNIV_INTERN +void +hash_mutex_exit( +/*============*/ + hash_table_t* table, /* in: hash table */ + ulint fold); /* in: fold */ +/**************************************************************** +Reserves all the mutexes of a hash table, in an ascending order. */ +UNIV_INTERN +void +hash_mutex_enter_all( +/*=================*/ + hash_table_t* table); /* in: hash table */ +/**************************************************************** +Releases all the mutexes of a hash table. */ +UNIV_INTERN +void +hash_mutex_exit_all( +/*================*/ + hash_table_t* table); /* in: hash table */ + + +struct hash_cell_struct{ + void* node; /* hash chain node, NULL if none */ +}; + +/* The hash table structure */ +struct hash_table_struct { +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + ibool adaptive;/* TRUE if this is the hash table of the + adaptive hash index */ +#endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */ + ulint n_cells;/* number of cells in the hash table */ + hash_cell_t* array; /* pointer to cell array */ + ulint n_mutexes;/* if mutexes != NULL, then the number of + mutexes, must be a power of 2 */ + mutex_t* mutexes;/* NULL, or an array of mutexes used to + protect segments of the hash table */ + mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for + external chaining can be allocated from these + memory heaps; there are then n_mutexes many of + these heaps */ + mem_heap_t* heap; + ulint magic_n; +}; + +#define HASH_TABLE_MAGIC_N 76561114 + +#ifndef UNIV_NONINL +#include "hash0hash.ic" +#endif + +#endif diff --git a/storage/xtradb/include/hash0hash.ic b/storage/xtradb/include/hash0hash.ic new file mode 100644 index 00000000000..792fdcbf4f8 --- /dev/null +++ b/storage/xtradb/include/hash0hash.ic @@ -0,0 +1,160 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The simple hash table utility + +Created 5/20/1997 Heikki Tuuri +*******************************************************/ + +#include "ut0rnd.h" + +/**************************************************************** +Gets the nth cell in a hash table. */ +UNIV_INLINE +hash_cell_t* +hash_get_nth_cell( +/*==============*/ + /* out: pointer to cell */ + hash_table_t* table, /* in: hash table */ + ulint n) /* in: cell index */ +{ + ut_ad(n < table->n_cells); + + return(table->array + n); +} + +/***************************************************************** +Clears a hash table so that all the cells become empty. */ +UNIV_INLINE +void +hash_table_clear( +/*=============*/ + hash_table_t* table) /* in/out: hash table */ +{ + memset(table->array, 0x0, + table->n_cells * sizeof(*table->array)); +} + +/***************************************************************** +Returns the number of cells in a hash table. */ +UNIV_INLINE +ulint +hash_get_n_cells( +/*=============*/ + /* out: number of cells */ + hash_table_t* table) /* in: table */ +{ + return(table->n_cells); +} + +/****************************************************************** +Calculates the hash value from a folded value. */ +UNIV_INLINE +ulint +hash_calc_hash( +/*===========*/ + /* out: hashed value */ + ulint fold, /* in: folded value */ + hash_table_t* table) /* in: hash table */ +{ + return(ut_hash_ulint(fold, table->n_cells)); +} + +/**************************************************************** +Gets the mutex index for a fold value in a hash table. */ +UNIV_INLINE +ulint +hash_get_mutex_no( +/*==============*/ + /* out: mutex number */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: fold */ +{ + ut_ad(ut_is_2pow(table->n_mutexes)); + return(ut_2pow_remainder(hash_calc_hash(fold, table), + table->n_mutexes)); +} + +/**************************************************************** +Gets the nth heap in a hash table. */ +UNIV_INLINE +mem_heap_t* +hash_get_nth_heap( +/*==============*/ + /* out: mem heap */ + hash_table_t* table, /* in: hash table */ + ulint i) /* in: index of the heap */ +{ + ut_ad(i < table->n_mutexes); + + return(table->heaps[i]); +} + +/**************************************************************** +Gets the heap for a fold value in a hash table. */ +UNIV_INLINE +mem_heap_t* +hash_get_heap( +/*==========*/ + /* out: mem heap */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: fold */ +{ + ulint i; + + if (table->heap) { + return(table->heap); + } + + i = hash_get_mutex_no(table, fold); + + return(hash_get_nth_heap(table, i)); +} + +/**************************************************************** +Gets the nth mutex in a hash table. */ +UNIV_INLINE +mutex_t* +hash_get_nth_mutex( +/*===============*/ + /* out: mutex */ + hash_table_t* table, /* in: hash table */ + ulint i) /* in: index of the mutex */ +{ + ut_ad(i < table->n_mutexes); + + return(table->mutexes + i); +} + +/**************************************************************** +Gets the mutex for a fold value in a hash table. */ +UNIV_INLINE +mutex_t* +hash_get_mutex( +/*===========*/ + /* out: mutex */ + hash_table_t* table, /* in: hash table */ + ulint fold) /* in: fold */ +{ + ulint i; + + i = hash_get_mutex_no(table, fold); + + return(hash_get_nth_mutex(table, i)); +} diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h new file mode 100644 index 00000000000..41e2392cc4a --- /dev/null +++ b/storage/xtradb/include/ibuf0ibuf.h @@ -0,0 +1,369 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Insert buffer + +Created 7/19/1997 Heikki Tuuri +*******************************************************/ + +#ifndef ibuf0ibuf_h +#define ibuf0ibuf_h + +#include "univ.i" + +#include "dict0mem.h" +#include "mtr0mtr.h" +#include "que0types.h" +#include "ibuf0types.h" +#include "fsp0fsp.h" + +/** Combinations of operations that can be buffered. Because the enum +values are used for indexing innobase_change_buffering_values[], they +should start at 0 and there should not be any gaps. */ +typedef enum { + IBUF_USE_NONE = 0, + IBUF_USE_INSERT, /* insert */ + + IBUF_USE_COUNT /* number of entries in ibuf_use_t */ +} ibuf_use_t; + +/** Operations that can currently be buffered. */ +extern ibuf_use_t ibuf_use; + +/** The insert buffer control structure */ +extern ibuf_t* ibuf; + +/* The purpose of the insert buffer is to reduce random disk access. +When we wish to insert a record into a non-unique secondary index and +the B-tree leaf page where the record belongs to is not in the buffer +pool, we insert the record into the insert buffer B-tree, indexed by +(space_id, page_no). When the page is eventually read into the buffer +pool, we look up the insert buffer B-tree for any modifications to the +page, and apply these upon the completion of the read operation. This +is called the insert buffer merge. */ + +/* The insert buffer merge must always succeed. To guarantee this, +the insert buffer subsystem keeps track of the free space in pages for +which it can buffer operations. Two bits per page in the insert +buffer bitmap indicate the available space in coarse increments. The +free bits in the insert buffer bitmap must never exceed the free space +on a page. It is safe to decrement or reset the bits in the bitmap in +a mini-transaction that is committed before the mini-transaction that +affects the free space. It is unsafe to increment the bits in a +separately committed mini-transaction, because in crash recovery, the +free bits could momentarily be set too high. */ + +/********************************************************************** +Creates the insert buffer data structure at a database startup and +initializes the data structures for the insert buffer of each tablespace. */ +UNIV_INTERN +void +ibuf_init_at_db_start(void); +/*=======================*/ +/************************************************************************* +Reads the biggest tablespace id from the high end of the insert buffer +tree and updates the counter in fil_system. */ +UNIV_INTERN +void +ibuf_update_max_tablespace_id(void); +/*===============================*/ +/************************************************************************* +Initializes an ibuf bitmap page. */ +UNIV_INTERN +void +ibuf_bitmap_page_init( +/*==================*/ + buf_block_t* block, /* in: bitmap page */ + mtr_t* mtr); /* in: mtr */ +/**************************************************************************** +Resets the free bits of the page in the ibuf bitmap. This is done in a +separate mini-transaction, hence this operation does not restrict +further work to only ibuf bitmap operations, which would result if the +latch to the bitmap page were kept. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to decrement or reset the bits in the bitmap in a mini-transaction +that is committed before the mini-transaction that affects the free +space. */ +UNIV_INTERN +void +ibuf_reset_free_bits( +/*=================*/ + buf_block_t* block); /* in: index page; free bits are set to 0 + if the index is a non-clustered + non-unique, and page level is 0 */ +/**************************************************************************** +Updates the free bits of an uncompressed page in the ibuf bitmap if +there is not enough free on the page any more. This is done in a +separate mini-transaction, hence this operation does not restrict +further work to only ibuf bitmap operations, which would result if the +latch to the bitmap page were kept. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is +unsafe to increment the bits in a separately committed +mini-transaction, because in crash recovery, the free bits could +momentarily be set too high. It is only safe to use this function for +decrementing the free bits. Should more free space become available, +we must not update the free bits here, because that would break crash +recovery. */ +UNIV_INLINE +void +ibuf_update_free_bits_if_full( +/*==========================*/ + buf_block_t* block, /* in: index page to which we have added new + records; the free bits are updated if the + index is non-clustered and non-unique and + the page level is 0, and the page becomes + fuller */ + ulint max_ins_size,/* in: value of maximum insert size with + reorganize before the latest operation + performed to the page */ + ulint increase);/* in: upper limit for the additional space + used in the latest operation, if known, or + ULINT_UNDEFINED */ +/************************************************************************** +Updates the free bits for an uncompressed page to reflect the present +state. Does this in the mtr given, which means that the latching +order rules virtually prevent any further operations for this OS +thread until mtr is committed. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to set the free bits in the same mini-transaction that updated the +page. */ +UNIV_INTERN +void +ibuf_update_free_bits_low( +/*======================*/ + const buf_block_t* block, /* in: index page */ + ulint max_ins_size, /* in: value of + maximum insert size + with reorganize before + the latest operation + performed to the page */ + mtr_t* mtr); /* in/out: mtr */ +/************************************************************************** +Updates the free bits for a compressed page to reflect the present +state. Does this in the mtr given, which means that the latching +order rules virtually prevent any further operations for this OS +thread until mtr is committed. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is safe +to set the free bits in the same mini-transaction that updated the +page. */ +UNIV_INTERN +void +ibuf_update_free_bits_zip( +/*======================*/ + buf_block_t* block, /* in/out: index page */ + mtr_t* mtr); /* in/out: mtr */ +/************************************************************************** +Updates the free bits for the two pages to reflect the present state. +Does this in the mtr given, which means that the latching order rules +virtually prevent any further operations until mtr is committed. +NOTE: The free bits in the insert buffer bitmap must never exceed the +free space on a page. It is safe to set the free bits in the same +mini-transaction that updated the pages. */ +UNIV_INTERN +void +ibuf_update_free_bits_for_two_pages_low( +/*====================================*/ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + buf_block_t* block1, /* in: index page */ + buf_block_t* block2, /* in: index page */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************** +A basic partial test if an insert to the insert buffer could be possible and +recommended. */ +UNIV_INLINE +ibool +ibuf_should_try( +/*============*/ + dict_index_t* index, /* in: index where to insert */ + ulint ignore_sec_unique); /* in: if != 0, we should + ignore UNIQUE constraint on + a secondary index when we + decide */ +/********************************************************************** +Returns TRUE if the current OS thread is performing an insert buffer +routine. */ +UNIV_INTERN +ibool +ibuf_inside(void); +/*=============*/ + /* out: TRUE if inside an insert buffer routine: for instance, + a read-ahead of non-ibuf pages is then forbidden */ +/*************************************************************************** +Checks if a page address is an ibuf bitmap page (level 3 page) address. */ +UNIV_INLINE +ibool +ibuf_bitmap_page( +/*=============*/ + /* out: TRUE if a bitmap page */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no);/* in: page number */ +/*************************************************************************** +Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. +Must not be called when recv_no_ibuf_operations==TRUE. */ +UNIV_INTERN +ibool +ibuf_page( +/*======*/ + /* out: TRUE if level 2 or level 3 page */ + ulint space, /* in: space id */ + ulint zip_size,/* in: compressed page size in bytes, or 0 */ + ulint page_no,/* in: page number */ + mtr_t* mtr); /* in: mtr which will contain an x-latch to the + bitmap page if the page is not one of the fixed + address ibuf pages, or NULL, in which case a new + transaction is created. */ +/*************************************************************************** +Frees excess pages from the ibuf free list. This function is called when an OS +thread calls fsp services to allocate a new file segment, or a new page to a +file segment, and the thread did not own the fsp latch before this call. */ +UNIV_INTERN +void +ibuf_free_excess_pages(void); +/*========================*/ +/************************************************************************* +Makes an index insert to the insert buffer, instead of directly to the disk +page, if this is possible. Does not do insert if the index is clustered +or unique. */ +UNIV_INTERN +ibool +ibuf_insert( +/*========*/ + /* out: TRUE if success */ + const dtuple_t* entry, /* in: index entry to insert */ + dict_index_t* index, /* in: index where to insert */ + ulint space, /* in: space id where to insert */ + ulint zip_size,/* in: compressed page size in bytes, or 0 */ + ulint page_no,/* in: page number where to insert */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +When an index page is read from a disk to the buffer pool, this function +inserts to the page the possible index entries buffered in the insert buffer. +The entries are deleted from the insert buffer. If the page is not read, but +created in the buffer pool, this function deletes its buffered entries from +the insert buffer; there can exist entries for such a page if the page +belonged to an index which subsequently was dropped. */ +UNIV_INTERN +void +ibuf_merge_or_delete_for_page( +/*==========================*/ + buf_block_t* block, /* in: if page has been read from + disk, pointer to the page x-latched, + else NULL */ + ulint space, /* in: space id of the index page */ + ulint page_no,/* in: page number of the index page */ + ulint zip_size,/* in: compressed page size in bytes, + or 0 */ + ibool update_ibuf_bitmap);/* in: normally this is set + to TRUE, but if we have deleted or are + deleting the tablespace, then we + naturally do not want to update a + non-existent bitmap page */ +/************************************************************************* +Deletes all entries in the insert buffer for a given space id. This is used +in DISCARD TABLESPACE and IMPORT TABLESPACE. +NOTE: this does not update the page free bitmaps in the space. The space will +become CORRUPT when you call this function! */ +UNIV_INTERN +void +ibuf_delete_for_discarded_space( +/*============================*/ + ulint space); /* in: space id */ +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ +UNIV_INTERN +ulint +ibuf_contract( +/*==========*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync); /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ +/************************************************************************* +Contracts insert buffer trees by reading pages to the buffer pool. */ +UNIV_INTERN +ulint +ibuf_contract_for_n_pages( +/*======================*/ + /* out: a lower limit for the combined size in bytes + of entries which will be merged from ibuf trees to the + pages read, 0 if ibuf is empty */ + ibool sync, /* in: TRUE if the caller wants to wait for the + issued read with the highest tablespace address + to complete */ + ulint n_pages);/* in: try to read at least this many pages to + the buffer pool and merge the ibuf contents to + them */ +/************************************************************************* +Parses a redo log record of an ibuf bitmap page init. */ +UNIV_INTERN +byte* +ibuf_parse_bitmap_init( +/*===================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + buf_block_t* block, /* in: block or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +#ifdef UNIV_IBUF_COUNT_DEBUG +/********************************************************************** +Gets the ibuf count for a given page. */ +UNIV_INTERN +ulint +ibuf_count_get( +/*===========*/ + /* out: number of entries in the insert buffer + currently buffered for this page */ + ulint space, /* in: space id */ + ulint page_no);/* in: page number */ +#endif +/********************************************************************** +Looks if the insert buffer is empty. */ +UNIV_INTERN +ibool +ibuf_is_empty(void); +/*===============*/ + /* out: TRUE if empty */ +/********************************************************************** +Prints info of ibuf. */ +UNIV_INTERN +void +ibuf_print( +/*=======*/ + FILE* file); /* in: file where to print */ + +#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO +#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO + +/* The ibuf header page currently contains only the file segment header +for the file segment from which the pages for the ibuf tree are allocated */ +#define IBUF_HEADER PAGE_DATA +#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */ + +/* The insert buffer tree itself is always located in space 0. */ +#define IBUF_SPACE_ID 0 + +#ifndef UNIV_NONINL +#include "ibuf0ibuf.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ibuf0ibuf.ic b/storage/xtradb/include/ibuf0ibuf.ic new file mode 100644 index 00000000000..170e5dba473 --- /dev/null +++ b/storage/xtradb/include/ibuf0ibuf.ic @@ -0,0 +1,325 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Insert buffer + +Created 7/19/1997 Heikki Tuuri +*******************************************************/ + +#include "buf0lru.h" +#include "page0page.h" +#include "page0zip.h" + +extern ulint ibuf_flush_count; + +/* If this number is n, an index page must contain at least the page size +per n bytes of free space for ibuf to try to buffer inserts to this page. +If there is this much of free space, the corresponding bits are set in the +ibuf bitmap. */ +#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32 + +/* Insert buffer struct */ + +struct ibuf_struct{ + ulint size; /* current size of the ibuf index + tree, in pages */ + ulint max_size; /* recommended maximum size of the + ibuf index tree, in pages */ + ulint seg_size; /* allocated pages of the file + segment containing ibuf header and + tree */ + ibool empty; /* after an insert to the ibuf tree + is performed, this is set to FALSE, + and if a contract operation finds + the tree empty, this is set to + TRUE */ + ulint free_list_len; /* length of the free list */ + ulint height; /* tree height */ + dict_index_t* index; /* insert buffer index */ + + ulint n_inserts; /* number of inserts made to + the insert buffer */ + ulint n_merges; /* number of pages merged */ + ulint n_merged_recs; /* number of records merged */ +}; + +/**************************************************************************** +Sets the free bit of the page in the ibuf bitmap. This is done in a separate +mini-transaction, hence this operation does not restrict further work to only +ibuf bitmap operations, which would result if the latch to the bitmap page +were kept. */ +UNIV_INTERN +void +ibuf_set_free_bits_func( +/*====================*/ + buf_block_t* block, /* in: index page of a non-clustered index; + free bit is reset if page level is 0 */ +#ifdef UNIV_IBUF_DEBUG + ulint max_val,/* in: ULINT_UNDEFINED or a maximum + value which the bits must have before + setting; this is for debugging */ +#endif /* UNIV_IBUF_DEBUG */ + ulint val); /* in: value to set: < 4 */ +#ifdef UNIV_IBUF_DEBUG +# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,max,v) +#else /* UNIV_IBUF_DEBUG */ +# define ibuf_set_free_bits(b,v,max) ibuf_set_free_bits_func(b,v) +#endif /* UNIV_IBUF_DEBUG */ + +/************************************************************************** +A basic partial test if an insert to the insert buffer could be possible and +recommended. */ +UNIV_INLINE +ibool +ibuf_should_try( +/*============*/ + dict_index_t* index, /* in: index where to insert */ + ulint ignore_sec_unique) /* in: if != 0, we should + ignore UNIQUE constraint on + a secondary index when we + decide */ +{ + if (ibuf_use != IBUF_USE_NONE + && !dict_index_is_clust(index) + && (ignore_sec_unique || !dict_index_is_unique(index))) { + + ibuf_flush_count++; + + if (ibuf_flush_count % 4 == 0) { + + buf_LRU_try_free_flushed_blocks(); + } + + return(TRUE); + } + + return(FALSE); +} + +/*************************************************************************** +Checks if a page address is an ibuf bitmap page address. */ +UNIV_INLINE +ibool +ibuf_bitmap_page( +/*=============*/ + /* out: TRUE if a bitmap page */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint page_no)/* in: page number */ +{ + ut_ad(ut_is_2pow(zip_size)); + + if (!zip_size) { + return(UNIV_UNLIKELY((page_no & (UNIV_PAGE_SIZE - 1)) + == FSP_IBUF_BITMAP_OFFSET)); + } + + return(UNIV_UNLIKELY((page_no & (zip_size - 1)) + == FSP_IBUF_BITMAP_OFFSET)); +} + +/************************************************************************* +Translates the free space on a page to a value in the ibuf bitmap.*/ +UNIV_INLINE +ulint +ibuf_index_page_calc_free_bits( +/*===========================*/ + /* out: value for ibuf bitmap bits */ + ulint zip_size, /* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint max_ins_size) /* in: maximum insert size after reorganize + for the page */ +{ + ulint n; + ut_ad(ut_is_2pow(zip_size)); + ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); + ut_ad(zip_size <= UNIV_PAGE_SIZE); + + if (zip_size) { + n = max_ins_size + / (zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); + } else { + n = max_ins_size + / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); + } + + if (n == 3) { + n = 2; + } + + if (n > 3) { + n = 3; + } + + return(n); +} + +/************************************************************************* +Translates the ibuf free bits to the free space on a page in bytes. */ +UNIV_INLINE +ulint +ibuf_index_page_calc_free_from_bits( +/*================================*/ + /* out: maximum insert size after reorganize for the + page */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + ulint bits) /* in: value for ibuf bitmap bits */ +{ + ut_ad(bits < 4); + ut_ad(ut_is_2pow(zip_size)); + ut_ad(!zip_size || zip_size > IBUF_PAGE_SIZE_PER_FREE_SPACE); + ut_ad(zip_size <= UNIV_PAGE_SIZE); + + if (zip_size) { + if (bits == 3) { + return(4 * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); + } + + return(bits * zip_size / IBUF_PAGE_SIZE_PER_FREE_SPACE); + } + + if (bits == 3) { + return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); + } + + return(bits * (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE)); +} + +/************************************************************************* +Translates the free space on a compressed page to a value in the ibuf bitmap.*/ +UNIV_INLINE +ulint +ibuf_index_page_calc_free_zip( +/*==========================*/ + /* out: value for ibuf bitmap bits */ + ulint zip_size, + /* in: compressed page size in bytes */ + const buf_block_t* block) /* in: buffer block */ +{ + ulint max_ins_size; + const page_zip_des_t* page_zip; + lint zip_max_ins; + + ut_ad(zip_size == buf_block_get_zip_size(block)); + ut_ad(zip_size); + + max_ins_size = page_get_max_insert_size_after_reorganize( + buf_block_get_frame(block), 1); + + page_zip = buf_block_get_page_zip(block); + zip_max_ins = page_zip_max_ins_size(page_zip, + FALSE/* not clustered */); + + if (UNIV_UNLIKELY(zip_max_ins < 0)) { + return(0); + } else if (UNIV_LIKELY(max_ins_size > (ulint) zip_max_ins)) { + max_ins_size = (ulint) zip_max_ins; + } + + return(ibuf_index_page_calc_free_bits(zip_size, max_ins_size)); +} + +/************************************************************************* +Translates the free space on a page to a value in the ibuf bitmap.*/ +UNIV_INLINE +ulint +ibuf_index_page_calc_free( +/*======================*/ + /* out: value for ibuf bitmap bits */ + ulint zip_size,/* in: compressed page size in bytes; + 0 for uncompressed pages */ + const buf_block_t* block) /* in: buffer block */ +{ + ut_ad(zip_size == buf_block_get_zip_size(block)); + + if (!zip_size) { + ulint max_ins_size; + + max_ins_size = page_get_max_insert_size_after_reorganize( + buf_block_get_frame(block), 1); + + return(ibuf_index_page_calc_free_bits(0, max_ins_size)); + } else { + return(ibuf_index_page_calc_free_zip(zip_size, block)); + } +} + +/**************************************************************************** +Updates the free bits of an uncompressed page in the ibuf bitmap if +there is not enough free on the page any more. This is done in a +separate mini-transaction, hence this operation does not restrict +further work to only ibuf bitmap operations, which would result if the +latch to the bitmap page were kept. NOTE: The free bits in the insert +buffer bitmap must never exceed the free space on a page. It is +unsafe to increment the bits in a separately committed +mini-transaction, because in crash recovery, the free bits could +momentarily be set too high. It is only safe to use this function for +decrementing the free bits. Should more free space become available, +we must not update the free bits here, because that would break crash +recovery. */ +UNIV_INLINE +void +ibuf_update_free_bits_if_full( +/*==========================*/ + buf_block_t* block, /* in: index page to which we have added new + records; the free bits are updated if the + index is non-clustered and non-unique and + the page level is 0, and the page becomes + fuller */ + ulint max_ins_size,/* in: value of maximum insert size with + reorganize before the latest operation + performed to the page */ + ulint increase)/* in: upper limit for the additional space + used in the latest operation, if known, or + ULINT_UNDEFINED */ +{ + ulint before; + ulint after; + + ut_ad(!buf_block_get_page_zip(block)); + + before = ibuf_index_page_calc_free_bits(0, max_ins_size); + + if (max_ins_size >= increase) { +#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE +# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE" +#endif + after = ibuf_index_page_calc_free_bits(0, max_ins_size + - increase); +#ifdef UNIV_IBUF_DEBUG + ut_a(after <= ibuf_index_page_calc_free(0, block)); +#endif + } else { + after = ibuf_index_page_calc_free(0, block); + } + + if (after == 0) { + /* We move the page to the front of the buffer pool LRU list: + the purpose of this is to prevent those pages to which we + cannot make inserts using the insert buffer from slipping + out of the buffer pool */ + + buf_page_make_young(&block->page); + } + + if (before > after) { + ibuf_set_free_bits(block, after, before); + } +} diff --git a/storage/xtradb/include/ibuf0types.h b/storage/xtradb/include/ibuf0types.h new file mode 100644 index 00000000000..264415196a1 --- /dev/null +++ b/storage/xtradb/include/ibuf0types.h @@ -0,0 +1,30 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Insert buffer global types + +Created 7/29/1997 Heikki Tuuri +*******************************************************/ + +#ifndef ibuf0types_h +#define ibuf0types_h + +typedef struct ibuf_struct ibuf_t; + +#endif diff --git a/storage/xtradb/include/lock0iter.h b/storage/xtradb/include/lock0iter.h new file mode 100644 index 00000000000..3cd47bb95d2 --- /dev/null +++ b/storage/xtradb/include/lock0iter.h @@ -0,0 +1,68 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Lock queue iterator type and function prototypes. + +Created July 16, 2007 Vasil Dimov +*******************************************************/ + +#ifndef lock0iter_h +#define lock0iter_h + +#include "univ.i" +#include "lock0types.h" + +typedef struct lock_queue_iterator_struct { + const lock_t* current_lock; + /* In case this is a record lock queue (not table lock queue) + then bit_no is the record number within the heap in which the + record is stored. */ + ulint bit_no; +} lock_queue_iterator_t; + +/*********************************************************************** +Initialize lock queue iterator so that it starts to iterate from +"lock". bit_no specifies the record number within the heap where the +record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: +1. If the lock is a table lock, thus we have a table lock queue; +2. If the lock is a record lock and it is a wait lock. In this case + bit_no is calculated in this function by using + lock_rec_find_set_bit(). There is exactly one bit set in the bitmap + of a wait lock. */ +UNIV_INTERN +void +lock_queue_iterator_reset( +/*======================*/ + lock_queue_iterator_t* iter, /* out: iterator */ + const lock_t* lock, /* in: lock to start from */ + ulint bit_no);/* in: record number in the + heap */ + +/*********************************************************************** +Gets the previous lock in the lock queue, returns NULL if there are no +more locks (i.e. the current lock is the first one). The iterator is +receded (if not-NULL is returned). */ + +const lock_t* +lock_queue_iterator_get_prev( +/*=========================*/ + /* out: previous lock or NULL */ + lock_queue_iterator_t* iter); /* in/out: iterator */ + +#endif /* lock0iter_h */ diff --git a/storage/xtradb/include/lock0lock.h b/storage/xtradb/include/lock0lock.h new file mode 100644 index 00000000000..2deeb804737 --- /dev/null +++ b/storage/xtradb/include/lock0lock.h @@ -0,0 +1,838 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The transaction lock system + +Created 5/7/1996 Heikki Tuuri +*******************************************************/ + +#ifndef lock0lock_h +#define lock0lock_h + +#include "univ.i" +#include "buf0types.h" +#include "trx0types.h" +#include "rem0types.h" +#include "dict0types.h" +#include "que0types.h" +#include "lock0types.h" +#include "read0types.h" +#include "hash0hash.h" +#include "ut0vec.h" + +#ifdef UNIV_DEBUG +extern ibool lock_print_waits; +#endif /* UNIV_DEBUG */ +/* Buffer for storing information about the most recent deadlock error */ +extern FILE* lock_latest_err_file; + +/************************************************************************* +Gets the size of a lock struct. */ +UNIV_INTERN +ulint +lock_get_size(void); +/*===============*/ + /* out: size in bytes */ +/************************************************************************* +Creates the lock system at database start. */ +UNIV_INTERN +void +lock_sys_create( +/*============*/ + ulint n_cells); /* in: number of slots in lock hash table */ +/************************************************************************* +Checks if some transaction has an implicit x-lock on a record in a clustered +index. */ +UNIV_INLINE +trx_t* +lock_clust_rec_some_has_impl( +/*=========================*/ + /* out: transaction which has the x-lock, or + NULL */ + const rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ +/************************************************************************* +Gets the heap_no of the smallest user record on a page. */ +UNIV_INLINE +ulint +lock_get_min_heap_no( +/*=================*/ + /* out: heap_no of smallest + user record, or + PAGE_HEAP_NO_SUPREMUM */ + const buf_block_t* block); /* in: buffer block */ +/***************************************************************** +Updates the lock table when we have reorganized a page. NOTE: we copy +also the locks set on the infimum of the page; the infimum may carry +locks if an update of a record is occurring on the page, and its locks +were temporarily stored on the infimum. */ +UNIV_INTERN +void +lock_move_reorganize_page( +/*======================*/ + const buf_block_t* block, /* in: old index page, now + reorganized */ + const buf_block_t* oblock);/* in: copy of the old, not + reorganized page */ +/***************************************************************** +Moves the explicit locks on user records to another page if a record +list end is moved to another page. */ +UNIV_INTERN +void +lock_move_rec_list_end( +/*===================*/ + const buf_block_t* new_block, /* in: index page to move to */ + const buf_block_t* block, /* in: index page */ + const rec_t* rec); /* in: record on page: this + is the first record moved */ +/***************************************************************** +Moves the explicit locks on user records to another page if a record +list start is moved to another page. */ +UNIV_INTERN +void +lock_move_rec_list_start( +/*=====================*/ + const buf_block_t* new_block, /* in: index page to move to */ + const buf_block_t* block, /* in: index page */ + const rec_t* rec, /* in: record on page: + this is the first + record NOT copied */ + const rec_t* old_end); /* in: old + previous-to-last + record on new_page + before the records + were copied */ +/***************************************************************** +Updates the lock table when a page is split to the right. */ +UNIV_INTERN +void +lock_update_split_right( +/*====================*/ + const buf_block_t* right_block, /* in: right page */ + const buf_block_t* left_block); /* in: left page */ +/***************************************************************** +Updates the lock table when a page is merged to the right. */ +UNIV_INTERN +void +lock_update_merge_right( +/*====================*/ + const buf_block_t* right_block, /* in: right page to + which merged */ + const rec_t* orig_succ, /* in: original + successor of infimum + on the right page + before merge */ + const buf_block_t* left_block); /* in: merged index + page which will be + discarded */ +/***************************************************************** +Updates the lock table when the root page is copied to another in +btr_root_raise_and_insert. Note that we leave lock structs on the +root page, even though they do not make sense on other than leaf +pages: the reason is that in a pessimistic update the infimum record +of the root page will act as a dummy carrier of the locks of the record +to be updated. */ +UNIV_INTERN +void +lock_update_root_raise( +/*===================*/ + const buf_block_t* block, /* in: index page to which copied */ + const buf_block_t* root); /* in: root page */ +/***************************************************************** +Updates the lock table when a page is copied to another and the original page +is removed from the chain of leaf pages, except if page is the root! */ +UNIV_INTERN +void +lock_update_copy_and_discard( +/*=========================*/ + const buf_block_t* new_block, /* in: index page to + which copied */ + const buf_block_t* block); /* in: index page; + NOT the root! */ +/***************************************************************** +Updates the lock table when a page is split to the left. */ +UNIV_INTERN +void +lock_update_split_left( +/*===================*/ + const buf_block_t* right_block, /* in: right page */ + const buf_block_t* left_block); /* in: left page */ +/***************************************************************** +Updates the lock table when a page is merged to the left. */ +UNIV_INTERN +void +lock_update_merge_left( +/*===================*/ + const buf_block_t* left_block, /* in: left page to + which merged */ + const rec_t* orig_pred, /* in: original predecessor + of supremum on the left page + before merge */ + const buf_block_t* right_block); /* in: merged index page + which will be discarded */ +/***************************************************************** +Resets the original locks on heir and replaces them with gap type locks +inherited from rec. */ +UNIV_INTERN +void +lock_rec_reset_and_inherit_gap_locks( +/*=================================*/ + const buf_block_t* heir_block, /* in: block containing the + record which inherits */ + const buf_block_t* block, /* in: block containing the + record from which inherited; + does NOT reset the locks on + this record */ + ulint heir_heap_no, /* in: heap_no of the + inheriting record */ + ulint heap_no); /* in: heap_no of the + donating record */ +/***************************************************************** +Updates the lock table when a page is discarded. */ +UNIV_INTERN +void +lock_update_discard( +/*================*/ + const buf_block_t* heir_block, /* in: index page + which will inherit the locks */ + ulint heir_heap_no, /* in: heap_no of the record + which will inherit the locks */ + const buf_block_t* block); /* in: index page + which will be discarded */ +/***************************************************************** +Updates the lock table when a new user record is inserted. */ +UNIV_INTERN +void +lock_update_insert( +/*===============*/ + const buf_block_t* block, /* in: buffer block containing rec */ + const rec_t* rec); /* in: the inserted record */ +/***************************************************************** +Updates the lock table when a record is removed. */ +UNIV_INTERN +void +lock_update_delete( +/*===============*/ + const buf_block_t* block, /* in: buffer block containing rec */ + const rec_t* rec); /* in: the record to be removed */ +/************************************************************************* +Stores on the page infimum record the explicit locks of another record. +This function is used to store the lock state of a record when it is +updated and the size of the record changes in the update. The record +is in such an update moved, perhaps to another page. The infimum record +acts as a dummy carrier record, taking care of lock releases while the +actual record is being moved. */ +UNIV_INTERN +void +lock_rec_store_on_page_infimum( +/*===========================*/ + const buf_block_t* block, /* in: buffer block containing rec */ + const rec_t* rec); /* in: record whose lock state + is stored on the infimum + record of the same page; lock + bits are reset on the + record */ +/************************************************************************* +Restores the state of explicit lock requests on a single record, where the +state was stored on the infimum of the page. */ +UNIV_INTERN +void +lock_rec_restore_from_page_infimum( +/*===============================*/ + const buf_block_t* block, /* in: buffer block containing rec */ + const rec_t* rec, /* in: record whose lock state + is restored */ + const buf_block_t* donator);/* in: page (rec is not + necessarily on this page) + whose infimum stored the lock + state; lock bits are reset on + the infimum */ +/************************************************************************* +Returns TRUE if there are explicit record locks on a page. */ +UNIV_INTERN +ibool +lock_rec_expl_exist_on_page( +/*========================*/ + /* out: TRUE if there are explicit record locks on + the page */ + ulint space, /* in: space id */ + ulint page_no);/* in: page number */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate insert of +a record. If they do, first tests if the query thread should anyway +be suspended for some reason; if not, then puts the transaction and +the query thread to the lock wait state and inserts a waiting request +for a gap x-lock to the lock queue. */ +UNIV_INTERN +ulint +lock_rec_insert_check_and_lock( +/*===========================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is + set, does nothing */ + rec_t* rec, /* in: record after which to insert */ + buf_block_t* block, /* in/out: buffer block of rec */ + dict_index_t* index, /* in: index */ + que_thr_t* thr, /* in: query thread */ + ibool* inherit);/* out: set to TRUE if the new + inserted record maybe should inherit + LOCK_GAP type locks from the successor + record */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate modify (update, +delete mark, or delete unmark) of a clustered index record. If they do, +first tests if the query thread should anyway be suspended for some +reason; if not, then puts the transaction and the query thread to the +lock wait state and inserts a waiting request for a record x-lock to the +lock queue. */ +UNIV_INTERN +ulint +lock_clust_rec_modify_check_and_lock( +/*=================================*/ + /* out: DB_SUCCESS, + DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /* in: buffer block of rec */ + const rec_t* rec, /* in: record which should be + modified */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate modify +(delete mark or delete unmark) of a secondary index record. */ +UNIV_INTERN +ulint +lock_sec_rec_modify_check_and_lock( +/*===============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + buf_block_t* block, /* in/out: buffer block of rec */ + rec_t* rec, /* in: record which should be + modified; NOTE: as this is a secondary + index, we always have to modify the + clustered index record first: see the + comment below */ + dict_index_t* index, /* in: secondary index */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Like the counterpart for a clustered index below, but now we read a +secondary index record. */ +UNIV_INTERN +ulint +lock_sec_rec_read_check_and_lock( +/*=============================*/ + /* out: DB_SUCCESS, + DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /* in: buffer block of rec */ + const rec_t* rec, /* in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + enum lock_mode mode, /* in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. */ +UNIV_INTERN +ulint +lock_clust_rec_read_check_and_lock( +/*===============================*/ + /* out: DB_SUCCESS, + DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /* in: buffer block of rec */ + const rec_t* rec, /* in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + enum lock_mode mode, /* in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. This is an alternative version of +lock_clust_rec_read_check_and_lock() that does not require the parameter +"offsets". */ +UNIV_INTERN +ulint +lock_clust_rec_read_check_and_lock_alt( +/*===================================*/ + /* out: DB_SUCCESS, + DB_LOCK_WAIT, DB_DEADLOCK, or + DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG + bit is set, does nothing */ + const buf_block_t* block, /* in: buffer block of rec */ + const rec_t* rec, /* in: user record or page + supremum record which should + be read or passed over by a + read cursor */ + dict_index_t* index, /* in: clustered index */ + enum lock_mode mode, /* in: mode of the lock which + the read cursor should set on + records: LOCK_S or LOCK_X; the + latter is possible in + SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks that a record is seen in a consistent read. */ +UNIV_INTERN +ibool +lock_clust_rec_cons_read_sees( +/*==========================*/ + /* out: TRUE if sees, or FALSE if an earlier + version of the record should be retrieved */ + const rec_t* rec, /* in: user record which should be read or + passed over by a read cursor */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + read_view_t* view); /* in: consistent read view */ +/************************************************************************* +Checks that a non-clustered index record is seen in a consistent read. */ +UNIV_INTERN +ulint +lock_sec_rec_cons_read_sees( +/*========================*/ + /* out: TRUE if certainly + sees, or FALSE if an earlier + version of the clustered index + record might be needed: NOTE + that a non-clustered index + page contains so little + information on its + modifications that also in the + case FALSE, the present + version of rec may be the + right, but we must check this + from the clustered index + record */ + const rec_t* rec, /* in: user record which + should be read or passed over + by a read cursor */ + const read_view_t* view); /* in: consistent read view */ +/************************************************************************* +Locks the specified database table in the mode given. If the lock cannot +be granted immediately, the query thread is put to wait. */ +UNIV_INTERN +ulint +lock_table( +/*=======*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + dict_table_t* table, /* in: database table in dictionary cache */ + enum lock_mode mode, /* in: lock mode */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks if there are any locks set on the table. */ +UNIV_INTERN +ibool +lock_is_on_table( +/*=============*/ + /* out: TRUE if there are lock(s) */ + dict_table_t* table); /* in: database table in dictionary cache */ +/***************************************************************** +Removes a granted record lock of a transaction from the queue and grants +locks to other transactions waiting in the queue if they now are entitled +to a lock. */ +UNIV_INTERN +void +lock_rec_unlock( +/*============*/ + trx_t* trx, /* in: transaction that has + set a record lock */ + const buf_block_t* block, /* in: buffer block containing rec */ + const rec_t* rec, /* in: record */ + enum lock_mode lock_mode);/* in: LOCK_S or LOCK_X */ +/************************************************************************* +Releases a table lock. +Releases possible other transactions waiting for this lock. */ +UNIV_INTERN +void +lock_table_unlock( +/*==============*/ + lock_t* lock); /* in: lock */ +/************************************************************************* +Releases transaction locks, and releases possible other transactions waiting +because of these locks. */ +UNIV_INTERN +void +lock_release_off_kernel( +/*====================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Cancels a waiting lock request and releases possible other transactions +waiting behind it. */ +UNIV_INTERN +void +lock_cancel_waiting_and_release( +/*============================*/ + lock_t* lock); /* in: waiting lock request */ + +/************************************************************************* +Removes locks on a table to be dropped or truncated. +If remove_also_table_sx_locks is TRUE then table-level S and X locks are +also removed in addition to other table-level and record-level locks. +No lock, that is going to be removed, is allowed to be a wait lock. */ +UNIV_INTERN +void +lock_remove_all_on_table( +/*=====================*/ + dict_table_t* table, /* in: table to be dropped + or truncated */ + ibool remove_also_table_sx_locks);/* in: also removes + table S and X locks */ + +/************************************************************************* +Calculates the fold value of a page file address: used in inserting or +searching for a lock in the hash table. */ +UNIV_INLINE +ulint +lock_rec_fold( +/*==========*/ + /* out: folded value */ + ulint space, /* in: space */ + ulint page_no)/* in: page number */ + __attribute__((const)); +/************************************************************************* +Calculates the hash value of a page file address: used in inserting or +searching for a lock in the hash table. */ +UNIV_INLINE +ulint +lock_rec_hash( +/*==========*/ + /* out: hashed value */ + ulint space, /* in: space */ + ulint page_no);/* in: page number */ + +/************************************************************************** +Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, +if none found. */ +UNIV_INTERN +ulint +lock_rec_find_set_bit( +/*==================*/ + /* out: bit index == heap number of + the record, or ULINT_UNDEFINED if none found */ + const lock_t* lock); /* in: record lock with at least one bit set */ + +/************************************************************************* +Gets the source table of an ALTER TABLE transaction. The table must be +covered by an IX or IS table lock. */ +UNIV_INTERN +dict_table_t* +lock_get_src_table( +/*===============*/ + /* out: the source table of transaction, + if it is covered by an IX or IS table lock; + dest if there is no source table, and + NULL if the transaction is locking more than + two tables or an inconsistency is found */ + trx_t* trx, /* in: transaction */ + dict_table_t* dest, /* in: destination of ALTER TABLE */ + enum lock_mode* mode); /* out: lock mode of the source table */ +/************************************************************************* +Determine if the given table is exclusively "owned" by the given +transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC +on the table. */ +UNIV_INTERN +ibool +lock_is_table_exclusive( +/*====================*/ + /* out: TRUE if table is only locked by trx, + with LOCK_IX, and possibly LOCK_AUTO_INC */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Checks if a lock request lock1 has to wait for request lock2. */ +UNIV_INTERN +ibool +lock_has_to_wait( +/*=============*/ + /* out: TRUE if lock1 has to wait for + lock2 to be removed */ + const lock_t* lock1, /* in: waiting lock */ + const lock_t* lock2); /* in: another lock; NOTE that it is + assumed that this has a lock bit set + on the same record as in lock1 if the + locks are record locks */ +/************************************************************************* +Checks that a transaction id is sensible, i.e., not in the future. */ +UNIV_INTERN +ibool +lock_check_trx_id_sanity( +/*=====================*/ + /* out: TRUE if ok */ + dulint trx_id, /* in: trx id */ + const rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ + ibool has_kernel_mutex);/* in: TRUE if the caller owns the + kernel mutex */ +/************************************************************************* +Prints info of a table lock. */ +UNIV_INTERN +void +lock_table_print( +/*=============*/ + FILE* file, /* in: file where to print */ + const lock_t* lock); /* in: table type lock */ +/************************************************************************* +Prints info of a record lock. */ +UNIV_INTERN +void +lock_rec_print( +/*===========*/ + FILE* file, /* in: file where to print */ + const lock_t* lock); /* in: record type lock */ +/************************************************************************* +Prints info of locks for all transactions. */ +UNIV_INTERN +void +lock_print_info_summary( +/*====================*/ + FILE* file); /* in: file where to print */ +/************************************************************************* +Prints info of locks for each transaction. */ +UNIV_INTERN +void +lock_print_info_all_transactions( +/*=============================*/ + FILE* file); /* in: file where to print */ +/************************************************************************* +Return approximate number or record locks (bits set in the bitmap) for +this transaction. Since delete-marked records may be removed, the +record count will not be precise. */ +UNIV_INTERN +ulint +lock_number_of_rows_locked( +/*=======================*/ + trx_t* trx); /* in: transaction */ +/*********************************************************************** +Release all the transaction's autoinc locks. */ +UNIV_INTERN +void +lock_release_autoinc_locks( +/*=======================*/ + trx_t* trx); /* in/out: transaction */ + +/*********************************************************************** +Gets the type of a lock. Non-inline version for using outside of the +lock module. */ +UNIV_INTERN +ulint +lock_get_type( +/*==========*/ + /* out: LOCK_TABLE or LOCK_REC */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +Gets the id of the transaction owning a lock. */ +UNIV_INTERN +ullint +lock_get_trx_id( +/*============*/ + /* out: transaction id */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +Gets the mode of a lock in a human readable string. +The string should not be free()'d or modified. */ + +const char* +lock_get_mode_str( +/*==============*/ + /* out: lock mode */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +Gets the type of a lock in a human readable string. +The string should not be free()'d or modified. */ + +const char* +lock_get_type_str( +/*==============*/ + /* out: lock type */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +Gets the id of the table on which the lock is. */ +UNIV_INTERN +ullint +lock_get_table_id( +/*==============*/ + /* out: id of the table */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +Gets the name of the table on which the lock is. +The string should not be free()'d or modified. */ + +const char* +lock_get_table_name( +/*================*/ + /* out: name of the table */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +For a record lock, gets the index on which the lock is. */ + +const dict_index_t* +lock_rec_get_index( +/*===============*/ + /* out: index */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +For a record lock, gets the name of the index on which the lock is. +The string should not be free()'d or modified. */ + +const char* +lock_rec_get_index_name( +/*====================*/ + /* out: name of the index */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +For a record lock, gets the tablespace number on which the lock is. */ +UNIV_INTERN +ulint +lock_rec_get_space_id( +/*==================*/ + /* out: tablespace number */ + const lock_t* lock); /* in: lock */ + +/*********************************************************************** +For a record lock, gets the page number on which the lock is. */ +UNIV_INTERN +ulint +lock_rec_get_page_no( +/*=================*/ + /* out: page number */ + const lock_t* lock); /* in: lock */ + +/* Lock modes and types */ +#define LOCK_MODE_MASK 0xFUL /* mask used to extract mode from the + type_mode field in a lock */ +/* Lock types */ +#define LOCK_TABLE 16 /* these type values should be so high that */ +#define LOCK_REC 32 /* they can be ORed to the lock mode */ +#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the + type_mode field in a lock */ +/* Waiting lock flag */ +#define LOCK_WAIT 256 /* this wait bit should be so high that + it can be ORed to the lock mode and type; + when this bit is set, it means that the + lock has not yet been granted, it is just + waiting for its turn in the wait queue */ +/* Precise modes */ +#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock + in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */ +#define LOCK_GAP 512 /* this gap bit should be so high that + it can be ORed to the other flags; + when this bit is set, it means that the + lock holds only on the gap before the record; + for instance, an x-lock on the gap does not + give permission to modify the record on which + the bit is set; locks of this type are created + when records are removed from the index chain + of records */ +#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on + the index record and does NOT block inserts + to the gap before the index record; this is + used in the case when we retrieve a record + with a unique key, and is also used in + locking plain SELECTs (not part of UPDATE + or DELETE) when the user has set the READ + COMMITTED isolation level */ +#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting + gap type record lock request in order to let + an insert of an index record to wait until + there are no conflicting locks by other + transactions on the gap; note that this flag + remains set when the waiting lock is granted, + or if the lock is inherited to a neighboring + record */ + +/* When lock bits are reset, the following flags are available: */ +#define LOCK_RELEASE_WAIT 1 +#define LOCK_NOT_RELEASE_WAIT 2 + +/* Lock operation struct */ +typedef struct lock_op_struct lock_op_t; +struct lock_op_struct{ + dict_table_t* table; /* table to be locked */ + enum lock_mode mode; /* lock mode */ +}; + +#define LOCK_OP_START 1 +#define LOCK_OP_COMPLETE 2 + +/* The lock system struct */ +struct lock_sys_struct{ + hash_table_t* rec_hash; /* hash table of the record locks */ +}; + +/* The lock system */ +extern lock_sys_t* lock_sys; + + +#ifndef UNIV_NONINL +#include "lock0lock.ic" +#endif + +#endif diff --git a/storage/xtradb/include/lock0lock.ic b/storage/xtradb/include/lock0lock.ic new file mode 100644 index 00000000000..f978cc70678 --- /dev/null +++ b/storage/xtradb/include/lock0lock.ic @@ -0,0 +1,123 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The transaction lock system + +Created 5/7/1996 Heikki Tuuri +*******************************************************/ + +#include "sync0sync.h" +#include "srv0srv.h" +#include "dict0dict.h" +#include "row0row.h" +#include "trx0sys.h" +#include "trx0trx.h" +#include "buf0buf.h" +#include "page0page.h" +#include "page0cur.h" +#include "row0vers.h" +#include "que0que.h" +#include "btr0cur.h" +#include "read0read.h" +#include "log0recv.h" + +/************************************************************************* +Calculates the fold value of a page file address: used in inserting or +searching for a lock in the hash table. */ +UNIV_INLINE +ulint +lock_rec_fold( +/*==========*/ + /* out: folded value */ + ulint space, /* in: space */ + ulint page_no)/* in: page number */ +{ + return(ut_fold_ulint_pair(space, page_no)); +} + +/************************************************************************* +Calculates the hash value of a page file address: used in inserting or +searching for a lock in the hash table. */ +UNIV_INLINE +ulint +lock_rec_hash( +/*==========*/ + /* out: hashed value */ + ulint space, /* in: space */ + ulint page_no)/* in: page number */ +{ + return(hash_calc_hash(lock_rec_fold(space, page_no), + lock_sys->rec_hash)); +} + +/************************************************************************* +Checks if some transaction has an implicit x-lock on a record in a clustered +index. */ +UNIV_INLINE +trx_t* +lock_clust_rec_some_has_impl( +/*=========================*/ + /* out: transaction which has the x-lock, or + NULL */ + const rec_t* rec, /* in: user record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ +{ + dulint trx_id; + + ut_ad(mutex_own(&kernel_mutex)); + ut_ad(dict_index_is_clust(index)); + ut_ad(page_rec_is_user_rec(rec)); + + trx_id = row_get_rec_trx_id(rec, index, offsets); + + if (trx_is_active(trx_id)) { + /* The modifying or inserting transaction is active */ + + return(trx_get_on_id(trx_id)); + } + + return(NULL); +} + +/************************************************************************* +Gets the heap_no of the smallest user record on a page. */ +UNIV_INLINE +ulint +lock_get_min_heap_no( +/*=================*/ + /* out: heap_no of smallest + user record, or + PAGE_HEAP_NO_SUPREMUM */ + const buf_block_t* block) /* in: buffer block */ +{ + const page_t* page = block->frame; + + if (page_is_comp(page)) { + return(rec_get_heap_no_new( + page + + rec_get_next_offs(page + PAGE_NEW_INFIMUM, + TRUE))); + } else { + return(rec_get_heap_no_old( + page + + rec_get_next_offs(page + PAGE_OLD_INFIMUM, + FALSE))); + } +} diff --git a/storage/xtradb/include/lock0priv.h b/storage/xtradb/include/lock0priv.h new file mode 100644 index 00000000000..0a0d41e6aaa --- /dev/null +++ b/storage/xtradb/include/lock0priv.h @@ -0,0 +1,106 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Lock module internal structures and methods. + +Created July 12, 2007 Vasil Dimov +*******************************************************/ + +#ifndef lock0priv_h +#define lock0priv_h + +#ifndef LOCK_MODULE_IMPLEMENTATION +/* If you need to access members of the structures defined in this +file, please write appropriate functions that retrieve them and put +those functions in lock/ */ +#error Do not include lock0priv.h outside of the lock/ module +#endif + +#include "univ.i" +#include "dict0types.h" +#include "hash0hash.h" +#include "trx0types.h" +#include "ut0lst.h" + +/* A table lock */ +typedef struct lock_table_struct lock_table_t; +struct lock_table_struct { + dict_table_t* table; /* database table in dictionary + cache */ + UT_LIST_NODE_T(lock_t) + locks; /* list of locks on the same + table */ +}; + +/* Record lock for a page */ +typedef struct lock_rec_struct lock_rec_t; +struct lock_rec_struct { + ulint space; /* space id */ + ulint page_no; /* page number */ + ulint n_bits; /* number of bits in the lock + bitmap; NOTE: the lock bitmap is + placed immediately after the + lock struct */ +}; + +/* Lock struct */ +struct lock_struct { + trx_t* trx; /* transaction owning the + lock */ + UT_LIST_NODE_T(lock_t) + trx_locks; /* list of the locks of the + transaction */ + ulint type_mode; /* lock type, mode, LOCK_GAP or + LOCK_REC_NOT_GAP, + LOCK_INSERT_INTENTION, + wait flag, ORed */ + hash_node_t hash; /* hash chain node for a record + lock */ + dict_index_t* index; /* index for a record lock */ + union { + lock_table_t tab_lock;/* table lock */ + lock_rec_t rec_lock;/* record lock */ + } un_member; +}; + +/************************************************************************* +Gets the type of a lock. */ +UNIV_INLINE +ulint +lock_get_type_low( +/*==============*/ + /* out: LOCK_TABLE or LOCK_REC */ + const lock_t* lock); /* in: lock */ + +/************************************************************************* +Gets the previous record lock set on a record. */ + +const lock_t* +lock_rec_get_prev( +/*==============*/ + /* out: previous lock on the same + record, NULL if none exists */ + const lock_t* in_lock,/* in: record lock */ + ulint heap_no);/* in: heap number of the record */ + +#ifndef UNIV_NONINL +#include "lock0priv.ic" +#endif + +#endif /* lock0priv_h */ diff --git a/storage/xtradb/include/lock0priv.ic b/storage/xtradb/include/lock0priv.ic new file mode 100644 index 00000000000..ae633a4fc61 --- /dev/null +++ b/storage/xtradb/include/lock0priv.ic @@ -0,0 +1,48 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Lock module internal inline methods. + +Created July 16, 2007 Vasil Dimov +*******************************************************/ + +/* This file contains only methods which are used in +lock/lock0* files, other than lock/lock0lock.c. +I.e. lock/lock0lock.c contains more internal inline +methods but they are used only in that file. */ + +#ifndef LOCK_MODULE_IMPLEMENTATION +#error Do not include lock0priv.ic outside of the lock/ module +#endif + +/************************************************************************* +Gets the type of a lock. */ +UNIV_INLINE +ulint +lock_get_type_low( +/*==============*/ + /* out: LOCK_TABLE or LOCK_REC */ + const lock_t* lock) /* in: lock */ +{ + ut_ad(lock); + + return(lock->type_mode & LOCK_TYPE_MASK); +} + +/* vim: set filetype=c: */ diff --git a/storage/xtradb/include/lock0types.h b/storage/xtradb/include/lock0types.h new file mode 100644 index 00000000000..52631b56532 --- /dev/null +++ b/storage/xtradb/include/lock0types.h @@ -0,0 +1,44 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The transaction lock system global types + +Created 5/7/1996 Heikki Tuuri +*******************************************************/ + +#ifndef lock0types_h +#define lock0types_h + +#define lock_t ib_lock_t +typedef struct lock_struct lock_t; +typedef struct lock_sys_struct lock_sys_t; + +/* Basic lock modes */ +enum lock_mode { + LOCK_IS = 0, /* intention shared */ + LOCK_IX, /* intention exclusive */ + LOCK_S, /* shared */ + LOCK_X, /* exclusive */ + LOCK_AUTO_INC, /* locks the auto-inc counter of a table + in an exclusive mode */ + LOCK_NONE, /* this is used elsewhere to note consistent read */ + LOCK_NUM = LOCK_NONE/* number of lock modes */ +}; + +#endif diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h new file mode 100644 index 00000000000..51b57ae929c --- /dev/null +++ b/storage/xtradb/include/log0log.h @@ -0,0 +1,893 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Database log + +Created 12/9/1995 Heikki Tuuri +*******************************************************/ + +#ifndef log0log_h +#define log0log_h + +#include "univ.i" +#include "ut0byte.h" +#include "sync0sync.h" +#include "sync0rw.h" + +typedef struct log_struct log_t; +typedef struct log_group_struct log_group_t; + +#ifdef UNIV_DEBUG +extern ibool log_do_write; +extern ibool log_debug_writes; +#else /* UNIV_DEBUG */ +# define log_do_write TRUE +#endif /* UNIV_DEBUG */ + +/* Wait modes for log_write_up_to */ +#define LOG_NO_WAIT 91 +#define LOG_WAIT_ONE_GROUP 92 +#define LOG_WAIT_ALL_GROUPS 93 +#define LOG_MAX_N_GROUPS 32 + +/******************************************************************** +Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint, +so that we know that the limit has been written to a log checkpoint field +on disk. */ +UNIV_INTERN +void +log_fsp_current_free_limit_set_and_checkpoint( +/*==========================================*/ + ulint limit); /* in: limit to set */ +/*********************************************************************** +Calculates where in log files we find a specified lsn. */ +UNIV_INTERN +ulint +log_calc_where_lsn_is( +/*==================*/ + /* out: log file number */ + ib_int64_t* log_file_offset, /* out: offset in that file + (including the header) */ + ib_uint64_t first_header_lsn, /* in: first log file start + lsn */ + ib_uint64_t lsn, /* in: lsn whose position to + determine */ + ulint n_log_files, /* in: total number of log + files */ + ib_int64_t log_file_size); /* in: log file size + (including the header) */ +/**************************************************************** +Writes to the log the string given. The log must be released with +log_release. */ +UNIV_INLINE +ib_uint64_t +log_reserve_and_write_fast( +/*=======================*/ + /* out: end lsn of the log record, + zero if did not succeed */ + byte* str, /* in: string */ + ulint len, /* in: string length */ + ib_uint64_t* start_lsn,/* out: start lsn of the log record */ + ibool* success);/* out: TRUE if success */ +/*************************************************************************** +Releases the log mutex. */ +UNIV_INLINE +void +log_release(void); +/*=============*/ +/*************************************************************************** +Checks if there is need for a log buffer flush or a new checkpoint, and does +this if yes. Any database operation should call this when it has modified +more than about 4 pages. NOTE that this function may only be called when the +OS thread owns no synchronization objects except the dictionary mutex. */ +UNIV_INLINE +void +log_free_check(void); +/*================*/ +/**************************************************************** +Opens the log for log_write_low. The log must be closed with log_close and +released with log_release. */ +UNIV_INTERN +ib_uint64_t +log_reserve_and_open( +/*=================*/ + /* out: start lsn of the log record */ + ulint len); /* in: length of data to be catenated */ +/**************************************************************** +Writes to the log the string given. It is assumed that the caller holds the +log mutex. */ +UNIV_INTERN +void +log_write_low( +/*==========*/ + byte* str, /* in: string */ + ulint str_len); /* in: string length */ +/**************************************************************** +Closes the log. */ +UNIV_INTERN +ib_uint64_t +log_close(void); +/*===========*/ + /* out: lsn */ +/**************************************************************** +Gets the current lsn. */ +UNIV_INLINE +ib_uint64_t +log_get_lsn(void); +/*=============*/ + /* out: current lsn */ +/********************************************************** +Initializes the log. */ +UNIV_INTERN +void +log_init(void); +/*==========*/ +/********************************************************************** +Inits a log group to the log system. */ +UNIV_INTERN +void +log_group_init( +/*===========*/ + ulint id, /* in: group id */ + ulint n_files, /* in: number of log files */ + ulint file_size, /* in: log file size in bytes */ + ulint space_id, /* in: space id of the file space + which contains the log files of this + group */ + ulint archive_space_id); /* in: space id of the file space + which contains some archived log + files for this group; currently, only + for the first log group this is + used */ +/********************************************************** +Completes an i/o to a log file. */ +UNIV_INTERN +void +log_io_complete( +/*============*/ + log_group_t* group); /* in: log group */ +/********************************************************** +This function is called, e.g., when a transaction wants to commit. It checks +that the log has been written to the log file up to the last log entry written +by the transaction. If there is a flush running, it waits and checks if the +flush flushed enough. If not, starts a new flush. */ +UNIV_INTERN +void +log_write_up_to( +/*============*/ + ib_uint64_t lsn, /* in: log sequence number up to which + the log should be written, + IB_ULONGLONG_MAX if not specified */ + ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, + or LOG_WAIT_ALL_GROUPS */ + ibool flush_to_disk); + /* in: TRUE if we want the written log + also to be flushed to disk */ +/******************************************************************** +Does a syncronous flush of the log buffer to disk. */ +UNIV_INTERN +void +log_buffer_flush_to_disk(void); +/*==========================*/ +/******************************************************************** +Advances the smallest lsn for which there are unflushed dirty blocks in the +buffer pool and also may make a new checkpoint. NOTE: this function may only +be called if the calling thread owns no synchronization objects! */ +UNIV_INTERN +ibool +log_preflush_pool_modified_pages( +/*=============================*/ + /* out: FALSE if there was a + flush batch of the same type + running, which means that we + could not start this flush + batch */ + ib_uint64_t new_oldest, /* in: try to advance + oldest_modified_lsn at least + to this lsn */ + ibool sync); /* in: TRUE if synchronous + operation is desired */ +/********************************************************** +Makes a checkpoint. Note that this function does not flush dirty +blocks from the buffer pool: it only checks what is lsn of the oldest +modification in the pool, and writes information about the lsn in +log files. Use log_make_checkpoint_at to flush also the pool. */ +UNIV_INTERN +ibool +log_checkpoint( +/*===========*/ + /* out: TRUE if success, FALSE if a checkpoint + write was already running */ + ibool sync, /* in: TRUE if synchronous operation is + desired */ + ibool write_always); /* in: the function normally checks if the + the new checkpoint would have a greater + lsn than the previous one: if not, then no + physical write is done; by setting this + parameter TRUE, a physical write will always be + made to log files */ +/******************************************************************** +Makes a checkpoint at a given lsn or later. */ +UNIV_INTERN +void +log_make_checkpoint_at( +/*===================*/ + ib_uint64_t lsn, /* in: make a checkpoint at this or a + later lsn, if IB_ULONGLONG_MAX, makes + a checkpoint at the latest lsn */ + ibool write_always); /* in: the function normally checks if + the the new checkpoint would have a + greater lsn than the previous one: if + not, then no physical write is done; + by setting this parameter TRUE, a + physical write will always be made to + log files */ +/******************************************************************** +Makes a checkpoint at the latest lsn and writes it to first page of each +data file in the database, so that we know that the file spaces contain +all modifications up to that lsn. This can only be called at database +shutdown. This function also writes all log in log files to the log archive. */ +UNIV_INTERN +void +logs_empty_and_mark_files_at_shutdown(void); +/*=======================================*/ +/********************************************************** +Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ +UNIV_INTERN +void +log_group_read_checkpoint_info( +/*===========================*/ + log_group_t* group, /* in: log group */ + ulint field); /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ +/*********************************************************************** +Gets info from a checkpoint about a log group. */ +UNIV_INTERN +void +log_checkpoint_get_nth_group_info( +/*==============================*/ + byte* buf, /* in: buffer containing checkpoint info */ + ulint n, /* in: nth slot */ + ulint* file_no,/* out: archived file number */ + ulint* offset);/* out: archived file offset */ +/********************************************************** +Writes checkpoint info to groups. */ +UNIV_INTERN +void +log_groups_write_checkpoint_info(void); +/*==================================*/ +#ifdef UNIV_HOTBACKUP +/********************************************************** +Writes info to a buffer of a log group when log files are created in +backup restoration. */ +UNIV_INTERN +void +log_reset_first_header_and_checkpoint( +/*==================================*/ + byte* hdr_buf,/* in: buffer which will be written to the + start of the first log file */ + ib_uint64_t start); /* in: lsn of the start of the first log file; + we pretend that there is a checkpoint at + start + LOG_BLOCK_HDR_SIZE */ +#endif /* UNIV_HOTBACKUP */ +/************************************************************************ +Starts an archiving operation. */ +UNIV_INTERN +ibool +log_archive_do( +/*===========*/ + /* out: TRUE if succeed, FALSE if an archiving + operation was already running */ + ibool sync, /* in: TRUE if synchronous operation is desired */ + ulint* n_bytes);/* out: archive log buffer size, 0 if nothing to + archive */ +/******************************************************************** +Writes the log contents to the archive up to the lsn when this function was +called, and stops the archiving. When archiving is started again, the archived +log file numbers start from a number one higher, so that the archiving will +not write again to the archived log files which exist when this function +returns. */ +UNIV_INTERN +ulint +log_archive_stop(void); +/*==================*/ + /* out: DB_SUCCESS or DB_ERROR */ +/******************************************************************** +Starts again archiving which has been stopped. */ +UNIV_INTERN +ulint +log_archive_start(void); +/*===================*/ + /* out: DB_SUCCESS or DB_ERROR */ +/******************************************************************** +Stop archiving the log so that a gap may occur in the archived log files. */ +UNIV_INTERN +ulint +log_archive_noarchivelog(void); +/*==========================*/ + /* out: DB_SUCCESS or DB_ERROR */ +/******************************************************************** +Start archiving the log so that a gap may occur in the archived log files. */ +UNIV_INTERN +ulint +log_archive_archivelog(void); +/*========================*/ + /* out: DB_SUCCESS or DB_ERROR */ +/********************************************************** +Generates an archived log file name. */ +UNIV_INTERN +void +log_archived_file_name_gen( +/*=======================*/ + char* buf, /* in: buffer where to write */ + ulint id, /* in: group id */ + ulint file_no);/* in: file number */ +/************************************************************************ +Checks that there is enough free space in the log to start a new query step. +Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this +function may only be called if the calling thread owns no synchronization +objects! */ +UNIV_INTERN +void +log_check_margins(void); +/*===================*/ +/********************************************************** +Reads a specified log segment to a buffer. */ +UNIV_INTERN +void +log_group_read_log_seg( +/*===================*/ + ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */ + byte* buf, /* in: buffer where to read */ + log_group_t* group, /* in: log group */ + ib_uint64_t start_lsn, /* in: read area start */ + ib_uint64_t end_lsn); /* in: read area end */ +/********************************************************** +Writes a buffer to a log file group. */ +UNIV_INTERN +void +log_group_write_buf( +/*================*/ + log_group_t* group, /* in: log group */ + byte* buf, /* in: buffer */ + ulint len, /* in: buffer len; must be divisible + by OS_FILE_LOG_BLOCK_SIZE */ + ib_uint64_t start_lsn, /* in: start lsn of the buffer; must + be divisible by + OS_FILE_LOG_BLOCK_SIZE */ + ulint new_data_offset);/* in: start offset of new data in + buf: this parameter is used to decide + if we have to write a new log file + header */ +/************************************************************ +Sets the field values in group to correspond to a given lsn. For this function +to work, the values must already be correctly initialized to correspond to +some lsn, for instance, a checkpoint lsn. */ +UNIV_INTERN +void +log_group_set_fields( +/*=================*/ + log_group_t* group, /* in: group */ + ib_uint64_t lsn); /* in: lsn for which the values should be + set */ +/********************************************************** +Calculates the data capacity of a log group, when the log file headers are not +included. */ +UNIV_INTERN +ulint +log_group_get_capacity( +/*===================*/ + /* out: capacity in bytes */ + log_group_t* group); /* in: log group */ +/**************************************************************** +Gets a log block flush bit. */ +UNIV_INLINE +ibool +log_block_get_flush_bit( +/*====================*/ + /* out: TRUE if this block was the first + to be written in a log flush */ + byte* log_block); /* in: log block */ +/**************************************************************** +Gets a log block number stored in the header. */ +UNIV_INLINE +ulint +log_block_get_hdr_no( +/*=================*/ + /* out: log block number stored in the block + header */ + byte* log_block); /* in: log block */ +/**************************************************************** +Gets a log block data length. */ +UNIV_INLINE +ulint +log_block_get_data_len( +/*===================*/ + /* out: log block data length measured as a + byte offset from the block start */ + byte* log_block); /* in: log block */ +/**************************************************************** +Sets the log block data length. */ +UNIV_INLINE +void +log_block_set_data_len( +/*===================*/ + byte* log_block, /* in: log block */ + ulint len); /* in: data length */ +/**************************************************************** +Calculates the checksum for a log block. */ +UNIV_INLINE +ulint +log_block_calc_checksum( +/*====================*/ + /* out: checksum */ + const byte* block); /* in: log block */ +/**************************************************************** +Gets a log block checksum field value. */ +UNIV_INLINE +ulint +log_block_get_checksum( +/*===================*/ + /* out: checksum */ + const byte* log_block); /* in: log block */ +/**************************************************************** +Sets a log block checksum field value. */ +UNIV_INLINE +void +log_block_set_checksum( +/*===================*/ + byte* log_block, /* in: log block */ + ulint checksum); /* in: checksum */ +/**************************************************************** +Gets a log block first mtr log record group offset. */ +UNIV_INLINE +ulint +log_block_get_first_rec_group( +/*==========================*/ + /* out: first mtr log record group byte offset + from the block start, 0 if none */ + byte* log_block); /* in: log block */ +/**************************************************************** +Sets the log block first mtr log record group offset. */ +UNIV_INLINE +void +log_block_set_first_rec_group( +/*==========================*/ + byte* log_block, /* in: log block */ + ulint offset); /* in: offset, 0 if none */ +/**************************************************************** +Gets a log block checkpoint number field (4 lowest bytes). */ +UNIV_INLINE +ulint +log_block_get_checkpoint_no( +/*========================*/ + /* out: checkpoint no (4 lowest bytes) */ + byte* log_block); /* in: log block */ +/**************************************************************** +Initializes a log block in the log buffer. */ +UNIV_INLINE +void +log_block_init( +/*===========*/ + byte* log_block, /* in: pointer to the log buffer */ + ib_uint64_t lsn); /* in: lsn within the log block */ +/**************************************************************** +Initializes a log block in the log buffer in the old, < 3.23.52 format, where +there was no checksum yet. */ +UNIV_INLINE +void +log_block_init_in_old_format( +/*=========================*/ + byte* log_block, /* in: pointer to the log buffer */ + ib_uint64_t lsn); /* in: lsn within the log block */ +/**************************************************************** +Converts a lsn to a log block number. */ +UNIV_INLINE +ulint +log_block_convert_lsn_to_no( +/*========================*/ + /* out: log block number, + it is > 0 and <= 1G */ + ib_uint64_t lsn); /* in: lsn of a byte within the block */ +/********************************************************** +Prints info of the log. */ +UNIV_INTERN +void +log_print( +/*======*/ + FILE* file); /* in: file where to print */ +/********************************************************** +Peeks the current lsn. */ +UNIV_INTERN +ibool +log_peek_lsn( +/*=========*/ + /* out: TRUE if success, FALSE if + could not get the log system mutex */ + ib_uint64_t* lsn); /* out: if returns TRUE, current lsn is here */ +/************************************************************************** +Refreshes the statistics used to print per-second averages. */ +UNIV_INTERN +void +log_refresh_stats(void); +/*===================*/ + +extern log_t* log_sys; + +/* Values used as flags */ +#define LOG_FLUSH 7652559 +#define LOG_CHECKPOINT 78656949 +#ifdef UNIV_LOG_ARCHIVE +# define LOG_ARCHIVE 11122331 +#endif /* UNIV_LOG_ARCHIVE */ +#define LOG_RECOVER 98887331 + +/* The counting of lsn's starts from this value: this must be non-zero */ +#define LOG_START_LSN ((ib_uint64_t) (16 * OS_FILE_LOG_BLOCK_SIZE)) + +#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE) +#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4) + +/* Offsets of a log block header */ +#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and + is allowed to wrap around at 2G; the + highest bit is set to 1 if this is the + first log block in a log flush write + segment */ +#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL + /* mask used to get the highest bit in + the preceding field */ +#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to + this block */ +#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an + mtr log record group in this log block, + 0 if none; if the value is the same + as LOG_BLOCK_HDR_DATA_LEN, it means + that the first rec group has not yet + been catenated to this log block, but + if it will, it will start at this + offset; an archive recovery can + start parsing the log records starting + from this offset in this log block, + if value not 0 */ +#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of + log_sys->next_checkpoint_no when the + log block was last written to: if the + block has not yet been written full, + this value is only updated before a + log buffer flush */ +#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in + bytes */ + +/* Offsets of a log block trailer from the end of the block */ +#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block + contents; in InnoDB versions + < 3.23.52 this did not contain the + checksum but the same value as + .._HDR_NO */ +#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ + +/* Offsets for a checkpoint field */ +#define LOG_CHECKPOINT_NO 0 +#define LOG_CHECKPOINT_LSN 8 +#define LOG_CHECKPOINT_OFFSET 16 +#define LOG_CHECKPOINT_LOG_BUF_SIZE 20 +#define LOG_CHECKPOINT_ARCHIVED_LSN 24 +#define LOG_CHECKPOINT_GROUP_ARRAY 32 + +/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */ + +#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0 +#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4 + +#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\ + + LOG_MAX_N_GROUPS * 8) +#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END +#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END) +#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END) + /* current fsp free limit in + tablespace 0, in units of one + megabyte; this information is only used + by ibbackup to decide if it can + truncate unused ends of + non-auto-extending data files in space + 0 */ +#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END) + /* this magic number tells if the + checkpoint contains the above field: + the field was added to + InnoDB-3.23.50 */ +#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END) + +#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243 + +/* Offsets of a log file header */ +#define LOG_GROUP_ID 0 /* log group number */ +#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this + log file */ +#define LOG_FILE_NO 12 /* 4-byte archived log file number; + this field is only defined in an + archived log file */ +#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16 + /* a 32-byte field which contains + the string 'ibbackup' and the + creation time if the log file was + created by ibbackup --restore; + when mysqld is first time started + on the restored database, it can + print helpful info for the user */ +#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE + /* this 4-byte field is TRUE when + the writing of an archived log file + has been completed; this field is + only defined in an archived log file */ +#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4) + /* lsn where the archived log file + at least extends: actually the + archived log file may extend to a + later lsn, as long as it is within the + same log block as this lsn; this field + is defined only when an archived log + file has been completely written */ +#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE + /* first checkpoint field in the log + header; we write alternately to the + checkpoint fields when we make new + checkpoints; this field is only defined + in the first log file of a log group */ +#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE) + /* second checkpoint field in the log + header */ +#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE) + +#define LOG_GROUP_OK 301 +#define LOG_GROUP_CORRUPTED 302 + +/* Log group consists of a number of log files, each of the same size; a log +group is implemented as a space in the sense of the module fil0fil. */ + +struct log_group_struct{ + /* The following fields are protected by log_sys->mutex */ + ulint id; /* log group id */ + ulint n_files; /* number of files in the group */ + ulint file_size; /* individual log file size in bytes, + including the log file header */ + ulint space_id; /* file space which implements the log + group */ + ulint state; /* LOG_GROUP_OK or + LOG_GROUP_CORRUPTED */ + ib_uint64_t lsn; /* lsn used to fix coordinates within + the log group */ + ulint lsn_offset; /* the offset of the above lsn */ + ulint n_pending_writes;/* number of currently pending flush + writes for this log group */ + byte** file_header_bufs;/* buffers for each file header in the + group */ + /*-----------------------------*/ + byte** archive_file_header_bufs;/* buffers for each file + header in the group */ + ulint archive_space_id;/* file space which implements the log + group archive */ + ulint archived_file_no;/* file number corresponding to + log_sys->archived_lsn */ + ulint archived_offset;/* file offset corresponding to + log_sys->archived_lsn, 0 if we have + not yet written to the archive file + number archived_file_no */ + ulint next_archived_file_no;/* during an archive write, + until the write is completed, we + store the next value for + archived_file_no here: the write + completion function then sets the new + value to ..._file_no */ + ulint next_archived_offset; /* like the preceding field */ + /*-----------------------------*/ + ib_uint64_t scanned_lsn; /* used only in recovery: recovery scan + succeeded up to this lsn in this log + group */ + byte* checkpoint_buf; /* checkpoint header is written from + this buffer to the group */ + UT_LIST_NODE_T(log_group_t) + log_groups; /* list of log groups */ +}; + +struct log_struct{ + byte pad[64]; /* padding to prevent other memory + update hotspots from residing on the + same memory cache line */ + ib_uint64_t lsn; /* log sequence number */ + ulint buf_free; /* first free offset within the log + buffer */ + mutex_t mutex; /* mutex protecting the log */ + byte* buf; /* log buffer */ + ulint buf_size; /* log buffer size in bytes */ + ulint max_buf_free; /* recommended maximum value of + buf_free, after which the buffer is + flushed */ + ulint old_buf_free; /* value of buf free when log was + last time opened; only in the debug + version */ + ib_uint64_t old_lsn; /* value of lsn when log was last time + opened; only in the debug version */ + ibool check_flush_or_checkpoint; + /* this is set to TRUE when there may + be need to flush the log buffer, or + preflush buffer pool pages, or make + a checkpoint; this MUST be TRUE when + lsn - last_checkpoint_lsn > + max_checkpoint_age; this flag is + peeked at by log_free_check(), which + does not reserve the log mutex */ + UT_LIST_BASE_NODE_T(log_group_t) + log_groups; /* log groups */ + + /* The fields involved in the log buffer flush */ + + ulint buf_next_to_write;/* first offset in the log buffer + where the byte content may not exist + written to file, e.g., the start + offset of a log record catenated + later; this is advanced when a flush + operation is completed to all the log + groups */ + ib_uint64_t written_to_some_lsn; + /* first log sequence number not yet + written to any log group; for this to + be advanced, it is enough that the + write i/o has been completed for any + one log group */ + ib_uint64_t written_to_all_lsn; + /* first log sequence number not yet + written to some log group; for this to + be advanced, it is enough that the + write i/o has been completed for all + log groups */ + ib_uint64_t write_lsn; /* end lsn for the current running + write */ + ulint write_end_offset;/* the data in buffer has been written + up to this offset when the current + write ends: this field will then + be copied to buf_next_to_write */ + ib_uint64_t current_flush_lsn;/* end lsn for the current running + write + flush operation */ + ib_uint64_t flushed_to_disk_lsn; + /* how far we have written the log + AND flushed to disk */ + ulint n_pending_writes;/* number of currently pending flushes + or writes */ + /* NOTE on the 'flush' in names of the fields below: starting from + 4.0.14, we separate the write of the log file and the actual fsync() + or other method to flush it to disk. The names below shhould really + be 'flush_or_write'! */ + os_event_t no_flush_event; /* this event is in the reset state + when a flush or a write is running; + a thread should wait for this without + owning the log mutex, but NOTE that + to set or reset this event, the + thread MUST own the log mutex! */ + ibool one_flushed; /* during a flush, this is first FALSE + and becomes TRUE when one log group + has been written or flushed */ + os_event_t one_flushed_event;/* this event is reset when the + flush or write has not yet completed + for any log group; e.g., this means + that a transaction has been committed + when this is set; a thread should wait + for this without owning the log mutex, + but NOTE that to set or reset this + event, the thread MUST own the log + mutex! */ + ulint n_log_ios; /* number of log i/os initiated thus + far */ + ulint n_log_ios_old; /* number of log i/o's at the + previous printout */ + time_t last_printout_time;/* when log_print was last time + called */ + + /* Fields involved in checkpoints */ + ulint log_group_capacity; /* capacity of the log group; if + the checkpoint age exceeds this, it is + a serious error because it is possible + we will then overwrite log and spoil + crash recovery */ + ulint max_modified_age_async; + /* when this recommended value for lsn + - buf_pool_get_oldest_modification() + is exceeded, we start an asynchronous + preflush of pool pages */ + ulint max_modified_age_sync; + /* when this recommended value for lsn + - buf_pool_get_oldest_modification() + is exceeded, we start a synchronous + preflush of pool pages */ + ulint adm_checkpoint_interval; + /* administrator-specified checkpoint + interval in terms of log growth in + bytes; the interval actually used by + the database can be smaller */ + ulint max_checkpoint_age_async; + /* when this checkpoint age is exceeded + we start an asynchronous writing of a + new checkpoint */ + ulint max_checkpoint_age; + /* this is the maximum allowed value + for lsn - last_checkpoint_lsn when a + new query step is started */ + ib_uint64_t next_checkpoint_no; + /* next checkpoint number */ + ib_uint64_t last_checkpoint_lsn; + /* latest checkpoint lsn */ + ib_uint64_t next_checkpoint_lsn; + /* next checkpoint lsn */ + ulint n_pending_checkpoint_writes; + /* number of currently pending + checkpoint writes */ + rw_lock_t checkpoint_lock;/* this latch is x-locked when a + checkpoint write is running; a thread + should wait for this without owning + the log mutex */ + byte* checkpoint_buf; /* checkpoint header is read to this + buffer */ +#ifdef UNIV_LOG_ARCHIVE + /* Fields involved in archiving */ + ulint archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING + LOG_ARCH_STOPPED, LOG_ARCH_OFF */ + ib_uint64_t archived_lsn; /* archiving has advanced to this + lsn */ + ulint max_archived_lsn_age_async; + /* recommended maximum age of + archived_lsn, before we start + asynchronous copying to the archive */ + ulint max_archived_lsn_age; + /* maximum allowed age for + archived_lsn */ + ib_uint64_t next_archived_lsn;/* during an archive write, + until the write is completed, we + store the next value for + archived_lsn here: the write + completion function then sets the new + value to archived_lsn */ + ulint archiving_phase;/* LOG_ARCHIVE_READ or + LOG_ARCHIVE_WRITE */ + ulint n_pending_archive_ios; + /* number of currently pending reads + or writes in archiving */ + rw_lock_t archive_lock; /* this latch is x-locked when an + archive write is running; a thread + should wait for this without owning + the log mutex */ + ulint archive_buf_size;/* size of archive_buf */ + byte* archive_buf; /* log segment is written to the + archive from this buffer */ + os_event_t archiving_on; /* if archiving has been stopped, + a thread can wait for this event to + become signaled */ +#endif /* UNIV_LOG_ARCHIVE */ +}; + +#define LOG_ARCH_ON 71 +#define LOG_ARCH_STOPPING 72 +#define LOG_ARCH_STOPPING2 73 +#define LOG_ARCH_STOPPED 74 +#define LOG_ARCH_OFF 75 + +#ifndef UNIV_NONINL +#include "log0log.ic" +#endif + +#endif diff --git a/storage/xtradb/include/log0log.ic b/storage/xtradb/include/log0log.ic new file mode 100644 index 00000000000..85eebda4942 --- /dev/null +++ b/storage/xtradb/include/log0log.ic @@ -0,0 +1,407 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Database log + +Created 12/9/1995 Heikki Tuuri +*******************************************************/ + +#include "os0file.h" +#include "mach0data.h" +#include "mtr0mtr.h" + +/********************************************************** +Checks by parsing that the catenated log segment for a single mtr is +consistent. */ +UNIV_INTERN +ibool +log_check_log_recs( +/*===============*/ + byte* buf, /* in: pointer to the start of + the log segment in the + log_sys->buf log buffer */ + ulint len, /* in: segment length in bytes */ + ib_uint64_t buf_start_lsn); /* in: buffer start lsn */ + +/**************************************************************** +Gets a log block flush bit. */ +UNIV_INLINE +ibool +log_block_get_flush_bit( +/*====================*/ + /* out: TRUE if this block was the first + to be written in a log flush */ + byte* log_block) /* in: log block */ +{ + if (LOG_BLOCK_FLUSH_BIT_MASK + & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) { + + return(TRUE); + } + + return(FALSE); +} + +/**************************************************************** +Sets the log block flush bit. */ +UNIV_INLINE +void +log_block_set_flush_bit( +/*====================*/ + byte* log_block, /* in: log block */ + ibool val) /* in: value to set */ +{ + ulint field; + + field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO); + + if (val) { + field = field | LOG_BLOCK_FLUSH_BIT_MASK; + } else { + field = field & ~LOG_BLOCK_FLUSH_BIT_MASK; + } + + mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field); +} + +/**************************************************************** +Gets a log block number stored in the header. */ +UNIV_INLINE +ulint +log_block_get_hdr_no( +/*=================*/ + /* out: log block number stored in the block + header */ + byte* log_block) /* in: log block */ +{ + return(~LOG_BLOCK_FLUSH_BIT_MASK + & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)); +} + +/**************************************************************** +Sets the log block number stored in the header; NOTE that this must be set +before the flush bit! */ +UNIV_INLINE +void +log_block_set_hdr_no( +/*=================*/ + byte* log_block, /* in: log block */ + ulint n) /* in: log block number: must be > 0 and + < LOG_BLOCK_FLUSH_BIT_MASK */ +{ + ut_ad(n > 0); + ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK); + + mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n); +} + +/**************************************************************** +Gets a log block data length. */ +UNIV_INLINE +ulint +log_block_get_data_len( +/*===================*/ + /* out: log block data length measured as a + byte offset from the block start */ + byte* log_block) /* in: log block */ +{ + return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN)); +} + +/**************************************************************** +Sets the log block data length. */ +UNIV_INLINE +void +log_block_set_data_len( +/*===================*/ + byte* log_block, /* in: log block */ + ulint len) /* in: data length */ +{ + mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len); +} + +/**************************************************************** +Gets a log block first mtr log record group offset. */ +UNIV_INLINE +ulint +log_block_get_first_rec_group( +/*==========================*/ + /* out: first mtr log record group byte offset + from the block start, 0 if none */ + byte* log_block) /* in: log block */ +{ + return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP)); +} + +/**************************************************************** +Sets the log block first mtr log record group offset. */ +UNIV_INLINE +void +log_block_set_first_rec_group( +/*==========================*/ + byte* log_block, /* in: log block */ + ulint offset) /* in: offset, 0 if none */ +{ + mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset); +} + +/**************************************************************** +Gets a log block checkpoint number field (4 lowest bytes). */ +UNIV_INLINE +ulint +log_block_get_checkpoint_no( +/*========================*/ + /* out: checkpoint no (4 lowest bytes) */ + byte* log_block) /* in: log block */ +{ + return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO)); +} + +/**************************************************************** +Sets a log block checkpoint number field (4 lowest bytes). */ +UNIV_INLINE +void +log_block_set_checkpoint_no( +/*========================*/ + byte* log_block, /* in: log block */ + ib_uint64_t no) /* in: checkpoint no */ +{ + mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, (ulint) no); +} + +/**************************************************************** +Converts a lsn to a log block number. */ +UNIV_INLINE +ulint +log_block_convert_lsn_to_no( +/*========================*/ + /* out: log block number, + it is > 0 and <= 1G */ + ib_uint64_t lsn) /* in: lsn of a byte within the block */ +{ + return(((ulint) (lsn / OS_FILE_LOG_BLOCK_SIZE) & 0x3FFFFFFFUL) + 1); +} + +/**************************************************************** +Calculates the checksum for a log block. */ +UNIV_INLINE +ulint +log_block_calc_checksum( +/*====================*/ + /* out: checksum */ + const byte* block) /* in: log block */ +{ + ulint sum; + ulint sh; + ulint i; + + sum = 1; + sh = 0; + + for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) { + ulint b = (ulint) block[i]; + sum &= 0x7FFFFFFFUL; + sum += b; + sum += b << sh; + sh++; + if (sh > 24) { + sh = 0; + } + } + + return(sum); +} + +/**************************************************************** +Gets a log block checksum field value. */ +UNIV_INLINE +ulint +log_block_get_checksum( +/*===================*/ + /* out: checksum */ + const byte* log_block) /* in: log block */ +{ + return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE + - LOG_BLOCK_CHECKSUM)); +} + +/**************************************************************** +Sets a log block checksum field value. */ +UNIV_INLINE +void +log_block_set_checksum( +/*===================*/ + byte* log_block, /* in: log block */ + ulint checksum) /* in: checksum */ +{ + mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE + - LOG_BLOCK_CHECKSUM, + checksum); +} + +/**************************************************************** +Initializes a log block in the log buffer. */ +UNIV_INLINE +void +log_block_init( +/*===========*/ + byte* log_block, /* in: pointer to the log buffer */ + ib_uint64_t lsn) /* in: lsn within the log block */ +{ + ulint no; + + ut_ad(mutex_own(&(log_sys->mutex))); + + no = log_block_convert_lsn_to_no(lsn); + + log_block_set_hdr_no(log_block, no); + + log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); + log_block_set_first_rec_group(log_block, 0); +} + +/**************************************************************** +Initializes a log block in the log buffer in the old format, where there +was no checksum yet. */ +UNIV_INLINE +void +log_block_init_in_old_format( +/*=========================*/ + byte* log_block, /* in: pointer to the log buffer */ + ib_uint64_t lsn) /* in: lsn within the log block */ +{ + ulint no; + + ut_ad(mutex_own(&(log_sys->mutex))); + + no = log_block_convert_lsn_to_no(lsn); + + log_block_set_hdr_no(log_block, no); + mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE + - LOG_BLOCK_CHECKSUM, no); + log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); + log_block_set_first_rec_group(log_block, 0); +} + +/**************************************************************** +Writes to the log the string given. The log must be released with +log_release. */ +UNIV_INLINE +ib_uint64_t +log_reserve_and_write_fast( +/*=======================*/ + /* out: end lsn of the log record, + zero if did not succeed */ + byte* str, /* in: string */ + ulint len, /* in: string length */ + ib_uint64_t* start_lsn,/* out: start lsn of the log record */ + ibool* success)/* out: TRUE if success */ +{ + log_t* log = log_sys; + ulint data_len; + ib_uint64_t lsn; + + *success = TRUE; + + mutex_enter(&(log->mutex)); + + data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE; + + if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { + + /* The string does not fit within the current log block + or the log block would become full */ + + *success = FALSE; + + mutex_exit(&(log->mutex)); + + return(0); + } + + *start_lsn = log->lsn; + + ut_memcpy(log->buf + log->buf_free, str, len); + + log_block_set_data_len((byte*) ut_align_down(log->buf + log->buf_free, + OS_FILE_LOG_BLOCK_SIZE), + data_len); +#ifdef UNIV_LOG_DEBUG + log->old_buf_free = log->buf_free; + log->old_lsn = log->lsn; +#endif + log->buf_free += len; + + ut_ad(log->buf_free <= log->buf_size); + + lsn = log->lsn += len; + +#ifdef UNIV_LOG_DEBUG + log_check_log_recs(log->buf + log->old_buf_free, + log->buf_free - log->old_buf_free, log->old_lsn); +#endif + return(lsn); +} + +/*************************************************************************** +Releases the log mutex. */ +UNIV_INLINE +void +log_release(void) +/*=============*/ +{ + mutex_exit(&(log_sys->mutex)); +} + +/**************************************************************** +Gets the current lsn. */ +UNIV_INLINE +ib_uint64_t +log_get_lsn(void) +/*=============*/ + /* out: current lsn */ +{ + ib_uint64_t lsn; + + mutex_enter(&(log_sys->mutex)); + + lsn = log_sys->lsn; + + mutex_exit(&(log_sys->mutex)); + + return(lsn); +} + +/*************************************************************************** +Checks if there is need for a log buffer flush or a new checkpoint, and does +this if yes. Any database operation should call this when it has modified +more than about 4 pages. NOTE that this function may only be called when the +OS thread owns no synchronization objects except the dictionary mutex. */ +UNIV_INLINE +void +log_free_check(void) +/*================*/ +{ + /* ut_ad(sync_thread_levels_empty()); */ + + if (log_sys->check_flush_or_checkpoint) { + + log_check_margins(); + } +} diff --git a/storage/xtradb/include/log0recv.h b/storage/xtradb/include/log0recv.h new file mode 100644 index 00000000000..e3fe9ed330a --- /dev/null +++ b/storage/xtradb/include/log0recv.h @@ -0,0 +1,392 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Recovery + +Created 9/20/1997 Heikki Tuuri +*******************************************************/ + +#ifndef log0recv_h +#define log0recv_h + +#include "univ.i" +#include "ut0byte.h" +#include "buf0types.h" +#include "hash0hash.h" +#include "log0log.h" + +#ifdef UNIV_HOTBACKUP +extern ibool recv_replay_file_ops; + +/*********************************************************************** +Reads the checkpoint info needed in hot backup. */ +UNIV_INTERN +ibool +recv_read_cp_info_for_backup( +/*=========================*/ + /* out: TRUE if success */ + byte* hdr, /* in: buffer containing the log group + header */ + ib_uint64_t* lsn, /* out: checkpoint lsn */ + ulint* offset, /* out: checkpoint offset in the log group */ + ulint* fsp_limit,/* out: fsp limit of space 0, + 1000000000 if the database is running + with < version 3.23.50 of InnoDB */ + ib_uint64_t* cp_no, /* out: checkpoint number */ + ib_uint64_t* first_header_lsn); + /* out: lsn of of the start of the + first log file */ +/*********************************************************************** +Scans the log segment and n_bytes_scanned is set to the length of valid +log scanned. */ +UNIV_INTERN +void +recv_scan_log_seg_for_backup( +/*=========================*/ + byte* buf, /* in: buffer containing log data */ + ulint buf_len, /* in: data length in that buffer */ + ib_uint64_t* scanned_lsn, /* in/out: lsn of buffer start, + we return scanned lsn */ + ulint* scanned_checkpoint_no, + /* in/out: 4 lowest bytes of the + highest scanned checkpoint number so + far */ + ulint* n_bytes_scanned);/* out: how much we were able to + scan, smaller than buf_len if log + data ended here */ +#endif /* UNIV_HOTBACKUP */ +/*********************************************************************** +Returns TRUE if recovery is currently running. */ +UNIV_INLINE +ibool +recv_recovery_is_on(void); +/*=====================*/ +/*********************************************************************** +Returns TRUE if recovery from backup is currently running. */ +UNIV_INLINE +ibool +recv_recovery_from_backup_is_on(void); +/*=================================*/ +/**************************************************************************** +Applies the hashed log records to the page, if the page lsn is less than the +lsn of a log record. This can be called when a buffer page has just been +read in, or also for a page already in the buffer pool. */ +UNIV_INTERN +void +recv_recover_page( +/*==============*/ + ibool recover_backup, + /* in: TRUE if we are recovering a backup + page: then we do not acquire any latches + since the page was read in outside the + buffer pool */ + ibool just_read_in, + /* in: TRUE if the i/o-handler calls this for + a freshly read page */ + buf_block_t* block); /* in: buffer block */ +/************************************************************ +Recovers from a checkpoint. When this function returns, the database is able +to start processing of new user transactions, but the function +recv_recovery_from_checkpoint_finish should be called later to complete +the recovery and free the resources used in it. */ +UNIV_INTERN +ulint +recv_recovery_from_checkpoint_start_func( +/*=====================================*/ + /* out: error code or DB_SUCCESS */ +#ifdef UNIV_LOG_ARCHIVE + ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */ + ib_uint64_t limit_lsn, /* in: recover up to this lsn + if possible */ +#endif /* UNIV_LOG_ARCHIVE */ + ib_uint64_t min_flushed_lsn,/* in: min flushed lsn from + data files */ + ib_uint64_t max_flushed_lsn);/* in: max flushed lsn from + data files */ +#ifdef UNIV_LOG_ARCHIVE +# define recv_recovery_from_checkpoint_start(type,lim,min,max) \ + recv_recovery_from_checkpoint_start_func(type,lim,min,max) +#else /* UNIV_LOG_ARCHIVE */ +# define recv_recovery_from_checkpoint_start(type,lim,min,max) \ + recv_recovery_from_checkpoint_start_func(min,max) +#endif /* UNIV_LOG_ARCHIVE */ +/************************************************************ +Completes recovery from a checkpoint. */ +UNIV_INTERN +void +recv_recovery_from_checkpoint_finish(void); +/*======================================*/ +/*********************************************************** +Scans log from a buffer and stores new log data to the parsing buffer. Parses +and hashes the log records if new data found. */ +UNIV_INTERN +ibool +recv_scan_log_recs( +/*===============*/ + /* out: TRUE if limit_lsn has been + reached, or not able to scan any more + in this log group */ + ibool apply_automatically,/* in: TRUE if we want this + function to apply log records + automatically when the hash table + becomes full; in the hot backup tool + the tool does the applying, not this + function */ + ulint available_memory,/* in: we let the hash table of recs + to grow to this size, at the maximum */ + ibool store_to_hash, /* in: TRUE if the records should be + stored to the hash table; this is set + to FALSE if just debug checking is + needed */ + byte* buf, /* in: buffer containing a log segment + or garbage */ + ulint len, /* in: buffer length */ + ib_uint64_t start_lsn, /* in: buffer start lsn */ + ib_uint64_t* contiguous_lsn, /* in/out: it is known that all log + groups contain contiguous log data up + to this lsn */ + ib_uint64_t* group_scanned_lsn);/* out: scanning succeeded up to + this lsn */ +/********************************************************** +Resets the logs. The contents of log files will be lost! */ +UNIV_INTERN +void +recv_reset_logs( +/*============*/ + ib_uint64_t lsn, /* in: reset to this lsn + rounded up to be divisible by + OS_FILE_LOG_BLOCK_SIZE, after + which we add + LOG_BLOCK_HDR_SIZE */ +#ifdef UNIV_LOG_ARCHIVE + ulint arch_log_no, /* in: next archived log file number */ +#endif /* UNIV_LOG_ARCHIVE */ + ibool new_logs_created);/* in: TRUE if resetting logs + is done at the log creation; + FALSE if it is done after + archive recovery */ +#ifdef UNIV_HOTBACKUP +/********************************************************** +Creates new log files after a backup has been restored. */ +UNIV_INTERN +void +recv_reset_log_files_for_backup( +/*============================*/ + const char* log_dir, /* in: log file directory path */ + ulint n_log_files, /* in: number of log files */ + ulint log_file_size, /* in: log file size */ + ib_uint64_t lsn); /* in: new start lsn, must be + divisible by OS_FILE_LOG_BLOCK_SIZE */ +#endif /* UNIV_HOTBACKUP */ +/************************************************************ +Creates the recovery system. */ +UNIV_INTERN +void +recv_sys_create(void); +/*=================*/ +/************************************************************ +Inits the recovery system for a recovery operation. */ +UNIV_INTERN +void +recv_sys_init( +/*==========*/ + ibool recover_from_backup, /* in: TRUE if this is called + to recover from a hot backup */ + ulint available_memory); /* in: available memory in bytes */ +/*********************************************************************** +Empties the hash table of stored log records, applying them to appropriate +pages. */ +UNIV_INTERN +void +recv_apply_hashed_log_recs( +/*=======================*/ + ibool allow_ibuf); /* in: if TRUE, also ibuf operations are + allowed during the application; if FALSE, + no ibuf operations are allowed, and after + the application all file pages are flushed to + disk and invalidated in buffer pool: this + alternative means that no new log records + can be generated during the application */ +#ifdef UNIV_HOTBACKUP +/*********************************************************************** +Applies log records in the hash table to a backup. */ +UNIV_INTERN +void +recv_apply_log_recs_for_backup(void); +/*================================*/ +#endif +#ifdef UNIV_LOG_ARCHIVE +/************************************************************ +Recovers from archived log files, and also from log files, if they exist. */ +UNIV_INTERN +ulint +recv_recovery_from_archive_start( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + ib_uint64_t min_flushed_lsn,/* in: min flushed lsn field from the + data files */ + ib_uint64_t limit_lsn, /* in: recover up to this lsn if + possible */ + ulint first_log_no); /* in: number of the first archived + log file to use in the recovery; the + file will be searched from + INNOBASE_LOG_ARCH_DIR specified in + server config file */ +/************************************************************ +Completes recovery from archive. */ +UNIV_INTERN +void +recv_recovery_from_archive_finish(void); +/*===================================*/ +#endif /* UNIV_LOG_ARCHIVE */ + +/* Block of log record data */ +typedef struct recv_data_struct recv_data_t; +struct recv_data_struct{ + recv_data_t* next; /* pointer to the next block or NULL */ + /* the log record data is stored physically + immediately after this struct, max amount + RECV_DATA_BLOCK_SIZE bytes of it */ +}; + +/* Stored log record struct */ +typedef struct recv_struct recv_t; +struct recv_struct{ + byte type; /* log record type */ + ulint len; /* log record body length in bytes */ + recv_data_t* data; /* chain of blocks containing the log record + body */ + ib_uint64_t start_lsn;/* start lsn of the log segment written by + the mtr which generated this log record: NOTE + that this is not necessarily the start lsn of + this log record */ + ib_uint64_t end_lsn;/* end lsn of the log segment written by + the mtr which generated this log record: NOTE + that this is not necessarily the end lsn of + this log record */ + UT_LIST_NODE_T(recv_t) + rec_list;/* list of log records for this page */ +}; + +/* Hashed page file address struct */ +typedef struct recv_addr_struct recv_addr_t; +struct recv_addr_struct{ + ulint state; /* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED, + or RECV_PROCESSED */ + ulint space; /* space id */ + ulint page_no;/* page number */ + UT_LIST_BASE_NODE_T(recv_t) + rec_list;/* list of log records for this page */ + hash_node_t addr_hash; +}; + +/* Recovery system data structure */ +typedef struct recv_sys_struct recv_sys_t; +struct recv_sys_struct{ + mutex_t mutex; /* mutex protecting the fields apply_log_recs, + n_addrs, and the state field in each recv_addr + struct */ + ibool apply_log_recs; + /* this is TRUE when log rec application to + pages is allowed; this flag tells the + i/o-handler if it should do log record + application */ + ibool apply_batch_on; + /* this is TRUE when a log rec application + batch is running */ + ib_uint64_t lsn; /* log sequence number */ + ulint last_log_buf_size; + /* size of the log buffer when the database + last time wrote to the log */ + byte* last_block; + /* possible incomplete last recovered log + block */ + byte* last_block_buf_start; + /* the nonaligned start address of the + preceding buffer */ + byte* buf; /* buffer for parsing log records */ + ulint len; /* amount of data in buf */ + ib_uint64_t parse_start_lsn; + /* this is the lsn from which we were able to + start parsing log records and adding them to + the hash table; zero if a suitable + start point not found yet */ + ib_uint64_t scanned_lsn; + /* the log data has been scanned up to this + lsn */ + ulint scanned_checkpoint_no; + /* the log data has been scanned up to this + checkpoint number (lowest 4 bytes) */ + ulint recovered_offset; + /* start offset of non-parsed log records in + buf */ + ib_uint64_t recovered_lsn; + /* the log records have been parsed up to + this lsn */ + ib_uint64_t limit_lsn;/* recovery should be made at most up to this + lsn */ + ibool found_corrupt_log; + /* this is set to TRUE if we during log + scan find a corrupt log block, or a corrupt + log record, or there is a log parsing + buffer overflow */ +#ifdef UNIV_LOG_ARCHIVE + log_group_t* archive_group; + /* in archive recovery: the log group whose + archive is read */ +#endif /* !UNIV_LOG_ARCHIVE */ + mem_heap_t* heap; /* memory heap of log records and file + addresses*/ + hash_table_t* addr_hash;/* hash table of file addresses of pages */ + ulint n_addrs;/* number of not processed hashed file + addresses in the hash table */ +}; + +extern recv_sys_t* recv_sys; +extern ibool recv_recovery_on; +extern ibool recv_no_ibuf_operations; +extern ibool recv_needed_recovery; + +extern ibool recv_lsn_checks_on; +#ifdef UNIV_HOTBACKUP +extern ibool recv_is_making_a_backup; +#endif /* UNIV_HOTBACKUP */ +extern ulint recv_max_parsed_page_no; + +/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many +times! */ +#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024) + +/* Size of block reads when the log groups are scanned forward to do a +roll-forward */ +#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) + +/* States of recv_addr_struct */ +#define RECV_NOT_PROCESSED 71 +#define RECV_BEING_READ 72 +#define RECV_BEING_PROCESSED 73 +#define RECV_PROCESSED 74 + +extern ulint recv_n_pool_free_frames; + +#ifndef UNIV_NONINL +#include "log0recv.ic" +#endif + +#endif diff --git a/storage/xtradb/include/log0recv.ic b/storage/xtradb/include/log0recv.ic new file mode 100644 index 00000000000..e114bede38f --- /dev/null +++ b/storage/xtradb/include/log0recv.ic @@ -0,0 +1,48 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Recovery + +Created 9/20/1997 Heikki Tuuri +*******************************************************/ + +#include "univ.i" + +extern ibool recv_recovery_from_backup_on; + +/*********************************************************************** +Returns TRUE if recovery is currently running. */ +UNIV_INLINE +ibool +recv_recovery_is_on(void) +/*=====================*/ +{ + return(UNIV_UNLIKELY(recv_recovery_on)); +} + +/*********************************************************************** +Returns TRUE if recovery from backup is currently running. */ +UNIV_INLINE +ibool +recv_recovery_from_backup_is_on(void) +/*=================================*/ +{ + return(recv_recovery_from_backup_on); +} + diff --git a/storage/xtradb/include/mach0data.h b/storage/xtradb/include/mach0data.h new file mode 100644 index 00000000000..78b48af0836 --- /dev/null +++ b/storage/xtradb/include/mach0data.h @@ -0,0 +1,398 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Utilities for converting data from the database file +to the machine format. + +Created 11/28/1995 Heikki Tuuri +***********************************************************************/ + +#ifndef mach0data_h +#define mach0data_h + +#include "univ.i" +#include "ut0byte.h" + +/* The data and all fields are always stored in a database file +in the same format: ascii, big-endian, ... . +All data in the files MUST be accessed using the functions in this +module. */ + +/*********************************************************** +The following function is used to store data in one byte. */ +UNIV_INLINE +void +mach_write_to_1( +/*============*/ + byte* b, /* in: pointer to byte where to store */ + ulint n); /* in: ulint integer to be stored, >= 0, < 256 */ +/************************************************************ +The following function is used to fetch data from one byte. */ +UNIV_INLINE +ulint +mach_read_from_1( +/*=============*/ + /* out: ulint integer, >= 0, < 256 */ + const byte* b) /* in: pointer to byte */ + __attribute__((nonnull, pure)); +/*********************************************************** +The following function is used to store data in two consecutive +bytes. We store the most significant byte to the lower address. */ +UNIV_INLINE +void +mach_write_to_2( +/*============*/ + byte* b, /* in: pointer to two bytes where to store */ + ulint n); /* in: ulint integer to be stored, >= 0, < 64k */ +/************************************************************ +The following function is used to fetch data from two consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_2( +/*=============*/ + /* out: ulint integer, >= 0, < 64k */ + const byte* b) /* in: pointer to two bytes */ + __attribute__((nonnull, pure)); + +/************************************************************ +The following function is used to convert a 16-bit data item +to the canonical format, for fast bytewise equality test +against memory. */ +UNIV_INLINE +uint16 +mach_encode_2( +/*==========*/ + /* out: 16-bit integer in canonical format */ + ulint n) /* in: integer in machine-dependent format */ + __attribute__((const)); +/************************************************************ +The following function is used to convert a 16-bit data item +from the canonical format, for fast bytewise equality test +against memory. */ +UNIV_INLINE +ulint +mach_decode_2( +/*==========*/ + /* out: integer in machine-dependent format */ + uint16 n) /* in: 16-bit integer in canonical format */ + __attribute__((const)); +/*********************************************************** +The following function is used to store data in 3 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_3( +/*============*/ + byte* b, /* in: pointer to 3 bytes where to store */ + ulint n); /* in: ulint integer to be stored */ +/************************************************************ +The following function is used to fetch data from 3 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_3( +/*=============*/ + /* out: ulint integer */ + const byte* b) /* in: pointer to 3 bytes */ + __attribute__((nonnull, pure)); +/*********************************************************** +The following function is used to store data in four consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_4( +/*============*/ + byte* b, /* in: pointer to four bytes where to store */ + ulint n); /* in: ulint integer to be stored */ +/************************************************************ +The following function is used to fetch data from 4 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_4( +/*=============*/ + /* out: ulint integer */ + const byte* b) /* in: pointer to four bytes */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a ulint in a compressed form (1..5 bytes). */ +UNIV_INLINE +ulint +mach_write_compressed( +/*==================*/ + /* out: stored size in bytes */ + byte* b, /* in: pointer to memory where to store */ + ulint n); /* in: ulint integer to be stored */ +/************************************************************* +Returns the size of an ulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_get_compressed_size( +/*=====================*/ + /* out: compressed size in bytes */ + ulint n) /* in: ulint integer to be stored */ + __attribute__((const)); +/************************************************************* +Reads a ulint in a compressed form. */ +UNIV_INLINE +ulint +mach_read_compressed( +/*=================*/ + /* out: read integer */ + const byte* b) /* in: pointer to memory from where to read */ + __attribute__((nonnull, pure)); +/*********************************************************** +The following function is used to store data in 6 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_6( +/*============*/ + byte* b, /* in: pointer to 6 bytes where to store */ + dulint n); /* in: dulint integer to be stored */ +/************************************************************ +The following function is used to fetch data from 6 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_6( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 6 bytes */ + __attribute__((nonnull, pure)); +/*********************************************************** +The following function is used to store data in 7 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_7( +/*============*/ + byte* b, /* in: pointer to 7 bytes where to store */ + dulint n); /* in: dulint integer to be stored */ +/************************************************************ +The following function is used to fetch data from 7 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_7( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 7 bytes */ + __attribute__((nonnull, pure)); +/*********************************************************** +The following function is used to store data in 8 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_8( +/*============*/ + byte* b, /* in: pointer to 8 bytes where to store */ + dulint n); /* in: dulint integer to be stored */ +/*********************************************************** +The following function is used to store data in 8 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_ull( +/*===========*/ + byte* b, /* in: pointer to 8 bytes where to store */ + ib_uint64_t n); /* in: 64-bit integer to be stored */ +/************************************************************ +The following function is used to fetch data from 8 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_8( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 8 bytes */ + __attribute__((nonnull, pure)); +/************************************************************ +The following function is used to fetch data from 8 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ib_uint64_t +mach_read_ull( +/*==========*/ + /* out: 64-bit integer */ + const byte* b) /* in: pointer to 8 bytes */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a dulint in a compressed form (5..9 bytes). */ +UNIV_INLINE +ulint +mach_dulint_write_compressed( +/*=========================*/ + /* out: size in bytes */ + byte* b, /* in: pointer to memory where to store */ + dulint n); /* in: dulint integer to be stored */ +/************************************************************* +Returns the size of a dulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_dulint_get_compressed_size( +/*============================*/ + /* out: compressed size in bytes */ + dulint n); /* in: dulint integer to be stored */ +/************************************************************* +Reads a dulint in a compressed form. */ +UNIV_INLINE +dulint +mach_dulint_read_compressed( +/*========================*/ + /* out: read dulint */ + const byte* b) /* in: pointer to memory from where to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a dulint in a compressed form (1..11 bytes). */ +UNIV_INLINE +ulint +mach_dulint_write_much_compressed( +/*==============================*/ + /* out: size in bytes */ + byte* b, /* in: pointer to memory where to store */ + dulint n); /* in: dulint integer to be stored */ +/************************************************************* +Returns the size of a dulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_dulint_get_much_compressed_size( +/*=================================*/ + /* out: compressed size in bytes */ + dulint n) /* in: dulint integer to be stored */ + __attribute__((const)); +/************************************************************* +Reads a dulint in a compressed form. */ +UNIV_INLINE +dulint +mach_dulint_read_much_compressed( +/*=============================*/ + /* out: read dulint */ + const byte* b) /* in: pointer to memory from where to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Reads a ulint in a compressed form if the log record fully contains it. */ +UNIV_INTERN +byte* +mach_parse_compressed( +/*==================*/ + /* out: pointer to end of the stored field, NULL if + not complete */ + byte* ptr, /* in: pointer to buffer from where to read */ + byte* end_ptr,/* in: pointer to end of the buffer */ + ulint* val); /* out: read value */ +/************************************************************* +Reads a dulint in a compressed form if the log record fully contains it. */ +UNIV_INTERN +byte* +mach_dulint_parse_compressed( +/*=========================*/ + /* out: pointer to end of the stored field, NULL if + not complete */ + byte* ptr, /* in: pointer to buffer from where to read */ + byte* end_ptr,/* in: pointer to end of the buffer */ + dulint* val); /* out: read value */ +/************************************************************* +Reads a double. It is stored in a little-endian format. */ +UNIV_INLINE +double +mach_double_read( +/*=============*/ + /* out: double read */ + const byte* b) /* in: pointer to memory from where to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a double. It is stored in a little-endian format. */ +UNIV_INLINE +void +mach_double_write( +/*==============*/ + byte* b, /* in: pointer to memory where to write */ + double d); /* in: double */ +/************************************************************* +Reads a float. It is stored in a little-endian format. */ +UNIV_INLINE +float +mach_float_read( +/*============*/ + /* out: float read */ + const byte* b) /* in: pointer to memory from where to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a float. It is stored in a little-endian format. */ +UNIV_INLINE +void +mach_float_write( +/*=============*/ + byte* b, /* in: pointer to memory where to write */ + float d); /* in: float */ +/************************************************************* +Reads a ulint stored in the little-endian format. */ +UNIV_INLINE +ulint +mach_read_from_n_little_endian( +/*===========================*/ + /* out: unsigned long int */ + const byte* buf, /* in: from where to read */ + ulint buf_size) /* in: from how many bytes to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a ulint in the little-endian format. */ +UNIV_INLINE +void +mach_write_to_n_little_endian( +/*==========================*/ + byte* dest, /* in: where to write */ + ulint dest_size, /* in: into how many bytes to write */ + ulint n); /* in: unsigned long int to write */ +/************************************************************* +Reads a ulint stored in the little-endian format. */ +UNIV_INLINE +ulint +mach_read_from_2_little_endian( +/*===========================*/ + /* out: unsigned long int */ + const byte* buf) /* in: from where to read */ + __attribute__((nonnull, pure)); +/************************************************************* +Writes a ulint in the little-endian format. */ +UNIV_INLINE +void +mach_write_to_2_little_endian( +/*==========================*/ + byte* dest, /* in: where to write */ + ulint n); /* in: unsigned long int to write */ + +/************************************************************* +Convert integral type from storage byte order (big endian) to +host byte order. */ +UNIV_INLINE +ullint +mach_read_int_type( +/*===============*/ + /* out: integer value */ + const byte* src, /* in: where to read from */ + ulint len, /* in: length of src */ + ibool unsigned_type); /* in: signed or unsigned flag */ +#ifndef UNIV_NONINL +#include "mach0data.ic" +#endif + +#endif diff --git a/storage/xtradb/include/mach0data.ic b/storage/xtradb/include/mach0data.ic new file mode 100644 index 00000000000..5dda9aece2f --- /dev/null +++ b/storage/xtradb/include/mach0data.ic @@ -0,0 +1,784 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Utilities for converting data from the database file +to the machine format. + +Created 11/28/1995 Heikki Tuuri +***********************************************************************/ + +#include "ut0mem.h" + +/*********************************************************** +The following function is used to store data in one byte. */ +UNIV_INLINE +void +mach_write_to_1( +/*============*/ + byte* b, /* in: pointer to byte where to store */ + ulint n) /* in: ulint integer to be stored, >= 0, < 256 */ +{ + ut_ad(b); + ut_ad(n <= 0xFFUL); + + b[0] = (byte)n; +} + +/************************************************************ +The following function is used to fetch data from one byte. */ +UNIV_INLINE +ulint +mach_read_from_1( +/*=============*/ + /* out: ulint integer, >= 0, < 256 */ + const byte* b) /* in: pointer to byte */ +{ + ut_ad(b); + return((ulint)(b[0])); +} + +/*********************************************************** +The following function is used to store data in two consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_2( +/*============*/ + byte* b, /* in: pointer to two bytes where to store */ + ulint n) /* in: ulint integer to be stored */ +{ + ut_ad(b); + ut_ad(n <= 0xFFFFUL); + + b[0] = (byte)(n >> 8); + b[1] = (byte)(n); +} + +/************************************************************ +The following function is used to fetch data from 2 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_2( +/*=============*/ + /* out: ulint integer */ + const byte* b) /* in: pointer to 2 bytes */ +{ + ut_ad(b); + return( ((ulint)(b[0]) << 8) + + (ulint)(b[1]) + ); +} + +/************************************************************ +The following function is used to convert a 16-bit data item +to the canonical format, for fast bytewise equality test +against memory. */ +UNIV_INLINE +uint16 +mach_encode_2( +/*==========*/ + /* out: 16-bit integer in canonical format */ + ulint n) /* in: integer in machine-dependent format */ +{ + uint16 ret; + ut_ad(2 == sizeof ret); + mach_write_to_2((byte*) &ret, n); + return(ret); +} +/************************************************************ +The following function is used to convert a 16-bit data item +from the canonical format, for fast bytewise equality test +against memory. */ +UNIV_INLINE +ulint +mach_decode_2( +/*==========*/ + /* out: integer in machine-dependent format */ + uint16 n) /* in: 16-bit integer in canonical format */ +{ + ut_ad(2 == sizeof n); + return(mach_read_from_2((const byte*) &n)); +} + +/*********************************************************** +The following function is used to store data in 3 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_3( +/*============*/ + byte* b, /* in: pointer to 3 bytes where to store */ + ulint n) /* in: ulint integer to be stored */ +{ + ut_ad(b); + ut_ad(n <= 0xFFFFFFUL); + + b[0] = (byte)(n >> 16); + b[1] = (byte)(n >> 8); + b[2] = (byte)(n); +} + +/************************************************************ +The following function is used to fetch data from 3 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_3( +/*=============*/ + /* out: ulint integer */ + const byte* b) /* in: pointer to 3 bytes */ +{ + ut_ad(b); + return( ((ulint)(b[0]) << 16) + + ((ulint)(b[1]) << 8) + + (ulint)(b[2]) + ); +} + +/*********************************************************** +The following function is used to store data in four consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_4( +/*============*/ + byte* b, /* in: pointer to four bytes where to store */ + ulint n) /* in: ulint integer to be stored */ +{ + ut_ad(b); + + b[0] = (byte)(n >> 24); + b[1] = (byte)(n >> 16); + b[2] = (byte)(n >> 8); + b[3] = (byte)n; +} + +/************************************************************ +The following function is used to fetch data from 4 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ulint +mach_read_from_4( +/*=============*/ + /* out: ulint integer */ + const byte* b) /* in: pointer to four bytes */ +{ + ut_ad(b); + return( ((ulint)(b[0]) << 24) + + ((ulint)(b[1]) << 16) + + ((ulint)(b[2]) << 8) + + (ulint)(b[3]) + ); +} + +/************************************************************* +Writes a ulint in a compressed form where the first byte codes the +length of the stored ulint. We look at the most significant bits of +the byte. If the most significant bit is zero, it means 1-byte storage, +else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0, +it means 3-byte storage, else if 4th is 0, it means 4-byte storage, +else the storage is 5-byte. */ +UNIV_INLINE +ulint +mach_write_compressed( +/*==================*/ + /* out: compressed size in bytes */ + byte* b, /* in: pointer to memory where to store */ + ulint n) /* in: ulint integer (< 2^32) to be stored */ +{ + ut_ad(b); + + if (n < 0x80UL) { + mach_write_to_1(b, n); + return(1); + } else if (n < 0x4000UL) { + mach_write_to_2(b, n | 0x8000UL); + return(2); + } else if (n < 0x200000UL) { + mach_write_to_3(b, n | 0xC00000UL); + return(3); + } else if (n < 0x10000000UL) { + mach_write_to_4(b, n | 0xE0000000UL); + return(4); + } else { + mach_write_to_1(b, 0xF0UL); + mach_write_to_4(b + 1, n); + return(5); + } +} + +/************************************************************* +Returns the size of a ulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_get_compressed_size( +/*=====================*/ + /* out: compressed size in bytes */ + ulint n) /* in: ulint integer (< 2^32) to be stored */ +{ + if (n < 0x80UL) { + return(1); + } else if (n < 0x4000UL) { + return(2); + } else if (n < 0x200000UL) { + return(3); + } else if (n < 0x10000000UL) { + return(4); + } else { + return(5); + } +} + +/************************************************************* +Reads a ulint in a compressed form. */ +UNIV_INLINE +ulint +mach_read_compressed( +/*=================*/ + /* out: read integer (< 2^32) */ + const byte* b) /* in: pointer to memory from where to read */ +{ + ulint flag; + + ut_ad(b); + + flag = mach_read_from_1(b); + + if (flag < 0x80UL) { + return(flag); + } else if (flag < 0xC0UL) { + return(mach_read_from_2(b) & 0x7FFFUL); + } else if (flag < 0xE0UL) { + return(mach_read_from_3(b) & 0x3FFFFFUL); + } else if (flag < 0xF0UL) { + return(mach_read_from_4(b) & 0x1FFFFFFFUL); + } else { + ut_ad(flag == 0xF0UL); + return(mach_read_from_4(b + 1)); + } +} + +/*********************************************************** +The following function is used to store data in 8 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_8( +/*============*/ + byte* b, /* in: pointer to 8 bytes where to store */ + dulint n) /* in: dulint integer to be stored */ +{ + ut_ad(b); + + mach_write_to_4(b, ut_dulint_get_high(n)); + mach_write_to_4(b + 4, ut_dulint_get_low(n)); +} + +/*********************************************************** +The following function is used to store data in 8 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_ull( +/*===========*/ + byte* b, /* in: pointer to 8 bytes where to store */ + ib_uint64_t n) /* in: 64-bit integer to be stored */ +{ + ut_ad(b); + + mach_write_to_4(b, (ulint) (n >> 32)); + mach_write_to_4(b + 4, (ulint) n); +} + +/************************************************************ +The following function is used to fetch data from 8 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_8( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 8 bytes */ +{ + ulint high; + ulint low; + + ut_ad(b); + + high = mach_read_from_4(b); + low = mach_read_from_4(b + 4); + + return(ut_dulint_create(high, low)); +} + +/************************************************************ +The following function is used to fetch data from 8 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +ib_uint64_t +mach_read_ull( +/*==========*/ + /* out: 64-bit integer */ + const byte* b) /* in: pointer to 8 bytes */ +{ + ib_uint64_t ull; + + ull = ((ib_uint64_t) mach_read_from_4(b)) << 32; + ull |= (ib_uint64_t) mach_read_from_4(b + 4); + + return(ull); +} + +/*********************************************************** +The following function is used to store data in 7 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_7( +/*============*/ + byte* b, /* in: pointer to 7 bytes where to store */ + dulint n) /* in: dulint integer to be stored */ +{ + ut_ad(b); + + mach_write_to_3(b, ut_dulint_get_high(n)); + mach_write_to_4(b + 3, ut_dulint_get_low(n)); +} + +/************************************************************ +The following function is used to fetch data from 7 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_7( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 7 bytes */ +{ + ulint high; + ulint low; + + ut_ad(b); + + high = mach_read_from_3(b); + low = mach_read_from_4(b + 3); + + return(ut_dulint_create(high, low)); +} + +/*********************************************************** +The following function is used to store data in 6 consecutive +bytes. We store the most significant byte to the lowest address. */ +UNIV_INLINE +void +mach_write_to_6( +/*============*/ + byte* b, /* in: pointer to 6 bytes where to store */ + dulint n) /* in: dulint integer to be stored */ +{ + ut_ad(b); + + mach_write_to_2(b, ut_dulint_get_high(n)); + mach_write_to_4(b + 2, ut_dulint_get_low(n)); +} + +/************************************************************ +The following function is used to fetch data from 6 consecutive +bytes. The most significant byte is at the lowest address. */ +UNIV_INLINE +dulint +mach_read_from_6( +/*=============*/ + /* out: dulint integer */ + const byte* b) /* in: pointer to 6 bytes */ +{ + ulint high; + ulint low; + + ut_ad(b); + + high = mach_read_from_2(b); + low = mach_read_from_4(b + 2); + + return(ut_dulint_create(high, low)); +} + +/************************************************************* +Writes a dulint in a compressed form (5..9 bytes). */ +UNIV_INLINE +ulint +mach_dulint_write_compressed( +/*=========================*/ + /* out: size in bytes */ + byte* b, /* in: pointer to memory where to store */ + dulint n) /* in: dulint integer to be stored */ +{ + ulint size; + + ut_ad(b); + + size = mach_write_compressed(b, ut_dulint_get_high(n)); + mach_write_to_4(b + size, ut_dulint_get_low(n)); + + return(size + 4); +} + +/************************************************************* +Returns the size of a dulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_dulint_get_compressed_size( +/*============================*/ + /* out: compressed size in bytes */ + dulint n) /* in: dulint integer to be stored */ +{ + return(4 + mach_get_compressed_size(ut_dulint_get_high(n))); +} + +/************************************************************* +Reads a dulint in a compressed form. */ +UNIV_INLINE +dulint +mach_dulint_read_compressed( +/*========================*/ + /* out: read dulint */ + const byte* b) /* in: pointer to memory from where to read */ +{ + ulint high; + ulint low; + ulint size; + + ut_ad(b); + + high = mach_read_compressed(b); + + size = mach_get_compressed_size(high); + + low = mach_read_from_4(b + size); + + return(ut_dulint_create(high, low)); +} + +/************************************************************* +Writes a dulint in a compressed form (1..11 bytes). */ +UNIV_INLINE +ulint +mach_dulint_write_much_compressed( +/*==============================*/ + /* out: size in bytes */ + byte* b, /* in: pointer to memory where to store */ + dulint n) /* in: dulint integer to be stored */ +{ + ulint size; + + ut_ad(b); + + if (ut_dulint_get_high(n) == 0) { + return(mach_write_compressed(b, ut_dulint_get_low(n))); + } + + *b = (byte)0xFF; + size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n)); + + size += mach_write_compressed(b + size, ut_dulint_get_low(n)); + + return(size); +} + +/************************************************************* +Returns the size of a dulint when written in the compressed form. */ +UNIV_INLINE +ulint +mach_dulint_get_much_compressed_size( +/*=================================*/ + /* out: compressed size in bytes */ + dulint n) /* in: dulint integer to be stored */ +{ + if (0 == ut_dulint_get_high(n)) { + return(mach_get_compressed_size(ut_dulint_get_low(n))); + } + + return(1 + mach_get_compressed_size(ut_dulint_get_high(n)) + + mach_get_compressed_size(ut_dulint_get_low(n))); +} + +/************************************************************* +Reads a dulint in a compressed form. */ +UNIV_INLINE +dulint +mach_dulint_read_much_compressed( +/*=============================*/ + /* out: read dulint */ + const byte* b) /* in: pointer to memory from where to read */ +{ + ulint high; + ulint low; + ulint size; + + ut_ad(b); + + if (*b != (byte)0xFF) { + high = 0; + size = 0; + } else { + high = mach_read_compressed(b + 1); + + size = 1 + mach_get_compressed_size(high); + } + + low = mach_read_compressed(b + size); + + return(ut_dulint_create(high, low)); +} + +/************************************************************* +Reads a double. It is stored in a little-endian format. */ +UNIV_INLINE +double +mach_double_read( +/*=============*/ + /* out: double read */ + const byte* b) /* in: pointer to memory from where to read */ +{ + double d; + ulint i; + byte* ptr; + + ptr = (byte*)&d; + + for (i = 0; i < sizeof(double); i++) { +#ifdef WORDS_BIGENDIAN + ptr[sizeof(double) - i - 1] = b[i]; +#else + ptr[i] = b[i]; +#endif + } + + return(d); +} + +/************************************************************* +Writes a double. It is stored in a little-endian format. */ +UNIV_INLINE +void +mach_double_write( +/*==============*/ + byte* b, /* in: pointer to memory where to write */ + double d) /* in: double */ +{ + ulint i; + byte* ptr; + + ptr = (byte*)&d; + + for (i = 0; i < sizeof(double); i++) { +#ifdef WORDS_BIGENDIAN + b[i] = ptr[sizeof(double) - i - 1]; +#else + b[i] = ptr[i]; +#endif + } +} + +/************************************************************* +Reads a float. It is stored in a little-endian format. */ +UNIV_INLINE +float +mach_float_read( +/*============*/ + /* out: float read */ + const byte* b) /* in: pointer to memory from where to read */ +{ + float d; + ulint i; + byte* ptr; + + ptr = (byte*)&d; + + for (i = 0; i < sizeof(float); i++) { +#ifdef WORDS_BIGENDIAN + ptr[sizeof(float) - i - 1] = b[i]; +#else + ptr[i] = b[i]; +#endif + } + + return(d); +} + +/************************************************************* +Writes a float. It is stored in a little-endian format. */ +UNIV_INLINE +void +mach_float_write( +/*=============*/ + byte* b, /* in: pointer to memory where to write */ + float d) /* in: float */ +{ + ulint i; + byte* ptr; + + ptr = (byte*)&d; + + for (i = 0; i < sizeof(float); i++) { +#ifdef WORDS_BIGENDIAN + b[i] = ptr[sizeof(float) - i - 1]; +#else + b[i] = ptr[i]; +#endif + } +} + +/************************************************************* +Reads a ulint stored in the little-endian format. */ +UNIV_INLINE +ulint +mach_read_from_n_little_endian( +/*===========================*/ + /* out: unsigned long int */ + const byte* buf, /* in: from where to read */ + ulint buf_size) /* in: from how many bytes to read */ +{ + ulint n = 0; + const byte* ptr; + + ut_ad(buf_size <= sizeof(ulint)); + ut_ad(buf_size > 0); + + ptr = buf + buf_size; + + for (;;) { + ptr--; + + n = n << 8; + + n += (ulint)(*ptr); + + if (ptr == buf) { + break; + } + } + + return(n); +} + +/************************************************************* +Writes a ulint in the little-endian format. */ +UNIV_INLINE +void +mach_write_to_n_little_endian( +/*==========================*/ + byte* dest, /* in: where to write */ + ulint dest_size, /* in: into how many bytes to write */ + ulint n) /* in: unsigned long int to write */ +{ + byte* end; + + ut_ad(dest_size <= sizeof(ulint)); + ut_ad(dest_size > 0); + + end = dest + dest_size; + + for (;;) { + *dest = (byte)(n & 0xFF); + + n = n >> 8; + + dest++; + + if (dest == end) { + break; + } + } + + ut_ad(n == 0); +} + +/************************************************************* +Reads a ulint stored in the little-endian format. */ +UNIV_INLINE +ulint +mach_read_from_2_little_endian( +/*===========================*/ + /* out: unsigned long int */ + const byte* buf) /* in: from where to read */ +{ + return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); +} + +/************************************************************* +Writes a ulint in the little-endian format. */ +UNIV_INLINE +void +mach_write_to_2_little_endian( +/*==========================*/ + byte* dest, /* in: where to write */ + ulint n) /* in: unsigned long int to write */ +{ + ut_ad(n < 256 * 256); + + *dest = (byte)(n & 0xFFUL); + + n = n >> 8; + dest++; + + *dest = (byte)(n & 0xFFUL); +} + +/************************************************************* +Convert integral type from storage byte order (big endian) to +host byte order. */ +UNIV_INLINE +ullint +mach_read_int_type( +/*===============*/ + /* out: integer value */ + const byte* src, /* in: where to read from */ + ulint len, /* in: length of src */ + ibool unsigned_type) /* in: signed or unsigned flag */ +{ + /* XXX this can be optimized on big-endian machines */ + + ullint ret; + uint i; + + if (unsigned_type || (src[0] & 0x80)) { + + ret = 0x0000000000000000ULL; + } else { + + ret = 0xFFFFFFFFFFFFFF00ULL; + } + + if (unsigned_type) { + + ret |= src[0]; + } else { + + ret |= src[0] ^ 0x80; + } + + for (i = 1; i < len; i++) { + ret <<= 8; + ret |= src[i]; + } + + return(ret); +} diff --git a/storage/xtradb/include/mem0dbg.h b/storage/xtradb/include/mem0dbg.h new file mode 100644 index 00000000000..0568a595d06 --- /dev/null +++ b/storage/xtradb/include/mem0dbg.h @@ -0,0 +1,142 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The memory management: the debug code. This is not a compilation module, +but is included in mem0mem.* ! + +Created 6/9/1994 Heikki Tuuri +*******************************************************/ + +/* In the debug version each allocated field is surrounded with +check fields whose sizes are given below */ + +#ifdef UNIV_MEM_DEBUG +#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\ + UNIV_MEM_ALIGNMENT) +#define MEM_FIELD_TRAILER_SIZE sizeof(ulint) +#else +#define MEM_FIELD_HEADER_SIZE 0 +#endif + + +/* Space needed when allocating for a user a field of +length N. The space is allocated only in multiples of +UNIV_MEM_ALIGNMENT. In the debug version there are also +check fields at the both ends of the field. */ +#ifdef UNIV_MEM_DEBUG +#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\ + + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT) +#else +#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT) +#endif + +#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG +/******************************************************************* +Checks a memory heap for consistency and prints the contents if requested. +Outputs the sum of sizes of buffers given to the user (only in +the debug version), the physical size of the heap and the number of +blocks in the heap. In case of error returns 0 as sizes and number +of blocks. */ +UNIV_INTERN +void +mem_heap_validate_or_print( +/*=======================*/ + mem_heap_t* heap, /* in: memory heap */ + byte* top, /* in: calculate and validate only until + this top pointer in the heap is reached, + if this pointer is NULL, ignored */ + ibool print, /* in: if TRUE, prints the contents + of the heap; works only in + the debug version */ + ibool* error, /* out: TRUE if error */ + ulint* us_size,/* out: allocated memory + (for the user) in the heap, + if a NULL pointer is passed as this + argument, it is ignored; in the + non-debug version this is always -1 */ + ulint* ph_size,/* out: physical size of the heap, + if a NULL pointer is passed as this + argument, it is ignored */ + ulint* n_blocks); /* out: number of blocks in the heap, + if a NULL pointer is passed as this + argument, it is ignored */ +/****************************************************************** +Validates the contents of a memory heap. */ +UNIV_INTERN +ibool +mem_heap_validate( +/*==============*/ + /* out: TRUE if ok */ + mem_heap_t* heap); /* in: memory heap */ +#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */ +#ifdef UNIV_DEBUG +/****************************************************************** +Checks that an object is a memory heap (or a block of it) */ +UNIV_INTERN +ibool +mem_heap_check( +/*===========*/ + /* out: TRUE if ok */ + mem_heap_t* heap); /* in: memory heap */ +#endif /* UNIV_DEBUG */ +#ifdef UNIV_MEM_DEBUG +/********************************************************************* +TRUE if no memory is currently allocated. */ +UNIV_INTERN +ibool +mem_all_freed(void); +/*===============*/ + /* out: TRUE if no heaps exist */ +/********************************************************************* +Validates the dynamic memory */ +UNIV_INTERN +ibool +mem_validate_no_assert(void); +/*=========================*/ + /* out: TRUE if error */ +/**************************************************************** +Validates the dynamic memory */ +UNIV_INTERN +ibool +mem_validate(void); +/*===============*/ + /* out: TRUE if ok */ +#endif /* UNIV_MEM_DEBUG */ +/**************************************************************** +Tries to find neigboring memory allocation blocks and dumps to stderr +the neighborhood of a given pointer. */ +UNIV_INTERN +void +mem_analyze_corruption( +/*===================*/ + void* ptr); /* in: pointer to place of possible corruption */ +/********************************************************************* +Prints information of dynamic memory usage and currently allocated memory +heaps or buffers. Can only be used in the debug version. */ +UNIV_INTERN +void +mem_print_info(void); +/*================*/ +/********************************************************************* +Prints information of dynamic memory usage and currently allocated memory +heaps or buffers since the last ..._print_info or..._print_new_info. */ +UNIV_INTERN +void +mem_print_new_info(void); +/*====================*/ diff --git a/storage/xtradb/include/mem0dbg.ic b/storage/xtradb/include/mem0dbg.ic new file mode 100644 index 00000000000..bf695fee785 --- /dev/null +++ b/storage/xtradb/include/mem0dbg.ic @@ -0,0 +1,109 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The memory management: the debug code. This is not an independent +compilation module but is included in mem0mem.*. + +Created 6/8/1994 Heikki Tuuri +*************************************************************************/ + +#ifdef UNIV_MEM_DEBUG +extern mutex_t mem_hash_mutex; +extern ulint mem_current_allocated_memory; + +/********************************************************************** +Initializes an allocated memory field in the debug version. */ +UNIV_INTERN +void +mem_field_init( +/*===========*/ + byte* buf, /* in: memory field */ + ulint n); /* in: how many bytes the user requested */ +/********************************************************************** +Erases an allocated memory field in the debug version. */ +UNIV_INTERN +void +mem_field_erase( +/*============*/ + byte* buf, /* in: memory field */ + ulint n); /* in: how many bytes the user requested */ +/******************************************************************* +Initializes a buffer to a random combination of hex BA and BE. +Used to initialize allocated memory. */ +UNIV_INTERN +void +mem_init_buf( +/*=========*/ + byte* buf, /* in: pointer to buffer */ + ulint n); /* in: length of buffer */ +/******************************************************************* +Initializes a buffer to a random combination of hex DE and AD. +Used to erase freed memory.*/ +UNIV_INTERN +void +mem_erase_buf( +/*==========*/ + byte* buf, /* in: pointer to buffer */ + ulint n); /* in: length of buffer */ +/******************************************************************* +Inserts a created memory heap to the hash table of +current allocated memory heaps. +Initializes the hash table when first called. */ +UNIV_INTERN +void +mem_hash_insert( +/*============*/ + mem_heap_t* heap, /* in: the created heap */ + const char* file_name, /* in: file name of creation */ + ulint line); /* in: line where created */ +/******************************************************************* +Removes a memory heap (which is going to be freed by the caller) +from the list of live memory heaps. Returns the size of the heap +in terms of how much memory in bytes was allocated for the user of +the heap (not the total space occupied by the heap). +Also validates the heap. +NOTE: This function does not free the storage occupied by the +heap itself, only the node in the list of heaps. */ +UNIV_INTERN +void +mem_hash_remove( +/*============*/ + mem_heap_t* heap, /* in: the heap to be freed */ + const char* file_name, /* in: file name of freeing */ + ulint line); /* in: line where freed */ + + +void +mem_field_header_set_len(byte* field, ulint len); + +ulint +mem_field_header_get_len(byte* field); + +void +mem_field_header_set_check(byte* field, ulint check); + +ulint +mem_field_header_get_check(byte* field); + +void +mem_field_trailer_set_check(byte* field, ulint check); + +ulint +mem_field_trailer_get_check(byte* field); +#endif /* UNIV_MEM_DEBUG */ diff --git a/storage/xtradb/include/mem0mem.h b/storage/xtradb/include/mem0mem.h new file mode 100644 index 00000000000..c20e7815001 --- /dev/null +++ b/storage/xtradb/include/mem0mem.h @@ -0,0 +1,406 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The memory management + +Created 6/9/1994 Heikki Tuuri +*******************************************************/ + +#ifndef mem0mem_h +#define mem0mem_h + +#include "univ.i" +#include "ut0mem.h" +#include "ut0byte.h" +#include "ut0ut.h" +#include "ut0rnd.h" +#include "sync0sync.h" +#include "ut0lst.h" +#include "mach0data.h" + +/* -------------------- MEMORY HEAPS ----------------------------- */ + +/* The info structure stored at the beginning of a heap block */ +typedef struct mem_block_info_struct mem_block_info_t; + +/* A block of a memory heap consists of the info structure +followed by an area of memory */ +typedef mem_block_info_t mem_block_t; + +/* A memory heap is a nonempty linear list of memory blocks */ +typedef mem_block_t mem_heap_t; + +/* Types of allocation for memory heaps: DYNAMIC means allocation from the +dynamic memory pool of the C compiler, BUFFER means allocation from the +buffer pool; the latter method is used for very big heaps */ + +#define MEM_HEAP_DYNAMIC 0 /* the most common type */ +#define MEM_HEAP_BUFFER 1 +#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be + ORed to MEM_HEAP_BUFFER, in which + case heap->free_block is used in + some cases for memory allocations, + and if it's NULL, the memory + allocation functions can return + NULL. */ + +/* The following start size is used for the first block in the memory heap if +the size is not specified, i.e., 0 is given as the parameter in the call of +create. The standard size is the maximum (payload) size of the blocks used for +allocations of small buffers. */ + +#define MEM_BLOCK_START_SIZE 64 +#define MEM_BLOCK_STANDARD_SIZE \ + (UNIV_PAGE_SIZE >= 16384 ? 8000 : MEM_MAX_ALLOC_IN_BUF) + +/* If a memory heap is allowed to grow into the buffer pool, the following +is the maximum size for a single allocated buffer: */ +#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200) + +/********************************************************************** +Initializes the memory system. */ +UNIV_INTERN +void +mem_init( +/*=====*/ + ulint size); /* in: common pool size in bytes */ +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +#define mem_heap_create(N) mem_heap_create_func(\ + (N), MEM_HEAP_DYNAMIC, __FILE__, __LINE__) +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +#define mem_heap_create_in_buffer(N) mem_heap_create_func(\ + (N), MEM_HEAP_BUFFER, __FILE__, __LINE__) +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap creation. */ + +#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\ + (N), MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\ + __FILE__, __LINE__) + +/****************************************************************** +Use this macro instead of the corresponding function! Macro for memory +heap freeing. */ + +#define mem_heap_free(heap) mem_heap_free_func(\ + (heap), __FILE__, __LINE__) +/********************************************************************* +NOTE: Use the corresponding macros instead of this function. Creates a +memory heap. For debugging purposes, takes also the file name and line as +arguments. */ +UNIV_INLINE +mem_heap_t* +mem_heap_create_func( +/*=================*/ + /* out, own: memory heap, NULL if + did not succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps)*/ + ulint n, /* in: desired start block size, + this means that a single user buffer + of size n will fit in the block, + 0 creates a default size block */ + ulint type, /* in: heap type */ + const char* file_name, /* in: file name where created */ + ulint line); /* in: line where created */ +/********************************************************************* +NOTE: Use the corresponding macro instead of this function. Frees the space +occupied by a memory heap. In the debug version erases the heap memory +blocks. */ +UNIV_INLINE +void +mem_heap_free_func( +/*===============*/ + mem_heap_t* heap, /* in, own: heap to be freed */ + const char* file_name, /* in: file name where freed */ + ulint line); /* in: line where freed */ +/******************************************************************* +Allocates and zero-fills n bytes of memory from a memory heap. */ +UNIV_INLINE +void* +mem_heap_zalloc( +/*============*/ + /* out: allocated, zero-filled storage */ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ +/******************************************************************* +Allocates n bytes of memory from a memory heap. */ +UNIV_INLINE +void* +mem_heap_alloc( +/*===========*/ + /* out: allocated storage, NULL if did not + succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps) */ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ +/********************************************************************* +Returns a pointer to the heap top. */ +UNIV_INLINE +byte* +mem_heap_get_heap_top( +/*==================*/ + /* out: pointer to the heap top */ + mem_heap_t* heap); /* in: memory heap */ +/********************************************************************* +Frees the space in a memory heap exceeding the pointer given. The +pointer must have been acquired from mem_heap_get_heap_top. The first +memory block of the heap is not freed. */ +UNIV_INLINE +void +mem_heap_free_heap_top( +/*===================*/ + mem_heap_t* heap, /* in: heap from which to free */ + byte* old_top);/* in: pointer to old top of heap */ +/********************************************************************* +Empties a memory heap. The first memory block of the heap is not freed. */ +UNIV_INLINE +void +mem_heap_empty( +/*===========*/ + mem_heap_t* heap); /* in: heap to empty */ +/********************************************************************* +Returns a pointer to the topmost element in a memory heap. +The size of the element must be given. */ +UNIV_INLINE +void* +mem_heap_get_top( +/*=============*/ + /* out: pointer to the topmost element */ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: size of the topmost element */ +/********************************************************************* +Frees the topmost element in a memory heap. +The size of the element must be given. */ +UNIV_INLINE +void +mem_heap_free_top( +/*==============*/ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: size of the topmost element */ +/********************************************************************* +Returns the space in bytes occupied by a memory heap. */ +UNIV_INLINE +ulint +mem_heap_get_size( +/*==============*/ + mem_heap_t* heap); /* in: heap */ +/****************************************************************** +Use this macro instead of the corresponding function! +Macro for memory buffer allocation */ + +#define mem_zalloc(N) memset(mem_alloc(N), 0, (N)); + +#define mem_alloc(N) mem_alloc_func((N), NULL, __FILE__, __LINE__) +#define mem_alloc2(N,S) mem_alloc_func((N), (S), __FILE__, __LINE__) +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. +Allocates a single buffer of memory from the dynamic memory of +the C compiler. Is like malloc of C. The buffer must be freed +with mem_free. */ +UNIV_INLINE +void* +mem_alloc_func( +/*===========*/ + /* out, own: free storage */ + ulint n, /* in: requested size in bytes */ + ulint* size, /* out: allocated size in bytes, + or NULL */ + const char* file_name, /* in: file name where created */ + ulint line); /* in: line where created */ + +/****************************************************************** +Use this macro instead of the corresponding function! +Macro for memory buffer freeing */ + +#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__) +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. +Frees a single buffer of storage from +the dynamic memory of C compiler. Similar to free of C. */ +UNIV_INLINE +void +mem_free_func( +/*==========*/ + void* ptr, /* in, own: buffer to be freed */ + const char* file_name, /* in: file name where created */ + ulint line /* in: line where created */ +); + +/************************************************************************** +Duplicates a NUL-terminated string. */ +UNIV_INLINE +char* +mem_strdup( +/*=======*/ + /* out, own: a copy of the string, + must be deallocated with mem_free */ + const char* str); /* in: string to be copied */ +/************************************************************************** +Makes a NUL-terminated copy of a nonterminated string. */ +UNIV_INLINE +char* +mem_strdupl( +/*========*/ + /* out, own: a copy of the string, + must be deallocated with mem_free */ + const char* str, /* in: string to be copied */ + ulint len); /* in: length of str, in bytes */ + +/************************************************************************** +Duplicates a NUL-terminated string, allocated from a memory heap. */ +UNIV_INTERN +char* +mem_heap_strdup( +/*============*/ + /* out, own: a copy of the string */ + mem_heap_t* heap, /* in: memory heap where string is allocated */ + const char* str); /* in: string to be copied */ +/************************************************************************** +Makes a NUL-terminated copy of a nonterminated string, +allocated from a memory heap. */ +UNIV_INLINE +char* +mem_heap_strdupl( +/*=============*/ + /* out, own: a copy of the string */ + mem_heap_t* heap, /* in: memory heap where string is allocated */ + const char* str, /* in: string to be copied */ + ulint len); /* in: length of str, in bytes */ + +/************************************************************************** +Concatenate two strings and return the result, using a memory heap. */ +UNIV_INTERN +char* +mem_heap_strcat( +/*============*/ + /* out, own: the result */ + mem_heap_t* heap, /* in: memory heap where string is allocated */ + const char* s1, /* in: string 1 */ + const char* s2); /* in: string 2 */ + +/************************************************************************** +Duplicate a block of data, allocated from a memory heap. */ +UNIV_INTERN +void* +mem_heap_dup( +/*=========*/ + /* out, own: a copy of the data */ + mem_heap_t* heap, /* in: memory heap where copy is allocated */ + const void* data, /* in: data to be copied */ + ulint len); /* in: length of data, in bytes */ + +/************************************************************************** +Concatenate two memory blocks and return the result, using a memory heap. */ +UNIV_INTERN +void* +mem_heap_cat( +/*=========*/ + /* out, own: the result */ + mem_heap_t* heap, /* in: memory heap where result is allocated */ + const void* b1, /* in: block 1 */ + ulint len1, /* in: length of b1, in bytes */ + const void* b2, /* in: block 2 */ + ulint len2); /* in: length of b2, in bytes */ + +/******************************************************************** +A simple (s)printf replacement that dynamically allocates the space for the +formatted string from the given heap. This supports a very limited set of +the printf syntax: types 's' and 'u' and length modifier 'l' (which is +required for the 'u' type). */ +UNIV_INTERN +char* +mem_heap_printf( +/*============*/ + /* out: heap-allocated formatted string */ + mem_heap_t* heap, /* in: memory heap */ + const char* format, /* in: format string */ + ...) __attribute__ ((format (printf, 2, 3))); + +#ifdef MEM_PERIODIC_CHECK +/********************************************************************** +Goes through the list of all allocated mem blocks, checks their magic +numbers, and reports possible corruption. */ +UNIV_INTERN +void +mem_validate_all_blocks(void); +/*=========================*/ +#endif + +/*#######################################################################*/ + +/* The info header of a block in a memory heap */ + +struct mem_block_info_struct { + ulint magic_n;/* magic number for debugging */ + char file_name[8];/* file name where the mem heap was created */ + ulint line; /* line number where the mem heap was created */ + UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the + the list this is the base node of the list of blocks; + in subsequent blocks this is undefined */ + UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next + and prev in the list. The first block allocated + to the heap is also the first block in this list, + though it also contains the base node of the list. */ + ulint len; /* physical length of this block in bytes */ + ulint type; /* type of heap: MEM_HEAP_DYNAMIC, or + MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */ + ulint free; /* offset in bytes of the first free position for + user data in the block */ + ulint start; /* the value of the struct field 'free' at the + creation of the block */ + void* free_block; + /* if the MEM_HEAP_BTR_SEARCH bit is set in type, + and this is the heap root, this can contain an + allocated buffer frame, which can be appended as a + free block to the heap, if we need more space; + otherwise, this is NULL */ + void* buf_block; + /* if this block has been allocated from the buffer + pool, this contains the buf_block_t handle; + otherwise, this is NULL */ +#ifdef MEM_PERIODIC_CHECK + UT_LIST_NODE_T(mem_block_t) mem_block_list; + /* List of all mem blocks allocated; protected + by the mem_comm_pool mutex */ +#endif +}; + +#define MEM_BLOCK_MAGIC_N 764741555 +#define MEM_FREED_BLOCK_MAGIC_N 547711122 + +/* Header size for a memory heap block */ +#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\ + UNIV_MEM_ALIGNMENT) +#include "mem0dbg.h" + +#ifndef UNIV_NONINL +#include "mem0mem.ic" +#endif + +#endif diff --git a/storage/xtradb/include/mem0mem.ic b/storage/xtradb/include/mem0mem.ic new file mode 100644 index 00000000000..04b4234904a --- /dev/null +++ b/storage/xtradb/include/mem0mem.ic @@ -0,0 +1,646 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The memory management + +Created 6/8/1994 Heikki Tuuri +*************************************************************************/ + +#include "mem0dbg.ic" + +#include "mem0pool.h" + +/******************************************************************* +Creates a memory heap block where data can be allocated. */ +UNIV_INTERN +mem_block_t* +mem_heap_create_block( +/*==================*/ + /* out, own: memory heap block, NULL if + did not succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps) */ + mem_heap_t* heap, /* in: memory heap or NULL if first block + should be created */ + ulint n, /* in: number of bytes needed for user data */ + ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or + MEM_HEAP_BUFFER */ + const char* file_name,/* in: file name where created */ + ulint line); /* in: line where created */ +/********************************************************************** +Frees a block from a memory heap. */ +UNIV_INTERN +void +mem_heap_block_free( +/*================*/ + mem_heap_t* heap, /* in: heap */ + mem_block_t* block); /* in: block to free */ +/********************************************************************** +Frees the free_block field from a memory heap. */ +UNIV_INTERN +void +mem_heap_free_block_free( +/*=====================*/ + mem_heap_t* heap); /* in: heap */ +/******************************************************************* +Adds a new block to a memory heap. */ +UNIV_INTERN +mem_block_t* +mem_heap_add_block( +/*===============*/ + /* out: created block, NULL if did not + succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps)*/ + mem_heap_t* heap, /* in: memory heap */ + ulint n); /* in: number of bytes user needs */ + +UNIV_INLINE +void +mem_block_set_len(mem_block_t* block, ulint len) +{ + ut_ad(len > 0); + + block->len = len; +} + +UNIV_INLINE +ulint +mem_block_get_len(mem_block_t* block) +{ + return(block->len); +} + +UNIV_INLINE +void +mem_block_set_type(mem_block_t* block, ulint type) +{ + ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER) + || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH)); + + block->type = type; +} + +UNIV_INLINE +ulint +mem_block_get_type(mem_block_t* block) +{ + return(block->type); +} + +UNIV_INLINE +void +mem_block_set_free(mem_block_t* block, ulint free) +{ + ut_ad(free > 0); + ut_ad(free <= mem_block_get_len(block)); + + block->free = free; +} + +UNIV_INLINE +ulint +mem_block_get_free(mem_block_t* block) +{ + return(block->free); +} + +UNIV_INLINE +void +mem_block_set_start(mem_block_t* block, ulint start) +{ + ut_ad(start > 0); + + block->start = start; +} + +UNIV_INLINE +ulint +mem_block_get_start(mem_block_t* block) +{ + return(block->start); +} + +/******************************************************************* +Allocates and zero-fills n bytes of memory from a memory heap. */ +UNIV_INLINE +void* +mem_heap_zalloc( +/*============*/ + /* out: allocated, zero-filled storage */ + mem_heap_t* heap, /* in: memory heap */ + ulint n) /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ +{ + ut_ad(heap); + ut_ad(!(heap->type & MEM_HEAP_BTR_SEARCH)); + return(memset(mem_heap_alloc(heap, n), 0, n)); +} + +/******************************************************************* +Allocates n bytes of memory from a memory heap. */ +UNIV_INLINE +void* +mem_heap_alloc( +/*===========*/ + /* out: allocated storage, NULL if did not + succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps) */ + mem_heap_t* heap, /* in: memory heap */ + ulint n) /* in: number of bytes; if the heap is allowed + to grow into the buffer pool, this must be + <= MEM_MAX_ALLOC_IN_BUF */ +{ + mem_block_t* block; + void* buf; + ulint free; + + ut_ad(mem_heap_check(heap)); + + block = UT_LIST_GET_LAST(heap->base); + + ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF)); + + /* Check if there is enough space in block. If not, create a new + block to the heap */ + + if (mem_block_get_len(block) + < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) { + + block = mem_heap_add_block(heap, n); + + if (block == NULL) { + + return(NULL); + } + } + + free = mem_block_get_free(block); + + buf = (byte*)block + free; + + mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); + +#ifdef UNIV_MEM_DEBUG + UNIV_MEM_ALLOC(buf, + n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE); + + /* In the debug version write debugging info to the field */ + mem_field_init((byte*)buf, n); + + /* Advance buf to point at the storage which will be given to the + caller */ + buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; + +#endif +#ifdef UNIV_SET_MEM_TO_ZERO + UNIV_MEM_ALLOC(buf, n); + memset(buf, '\0', n); +#endif + UNIV_MEM_ALLOC(buf, n); + return(buf); +} + +/********************************************************************* +Returns a pointer to the heap top. */ +UNIV_INLINE +byte* +mem_heap_get_heap_top( +/*==================*/ + /* out: pointer to the heap top */ + mem_heap_t* heap) /* in: memory heap */ +{ + mem_block_t* block; + byte* buf; + + ut_ad(mem_heap_check(heap)); + + block = UT_LIST_GET_LAST(heap->base); + + buf = (byte*)block + mem_block_get_free(block); + + return(buf); +} + +/********************************************************************* +Frees the space in a memory heap exceeding the pointer given. The +pointer must have been acquired from mem_heap_get_heap_top. The first +memory block of the heap is not freed. */ +UNIV_INLINE +void +mem_heap_free_heap_top( +/*===================*/ + mem_heap_t* heap, /* in: heap from which to free */ + byte* old_top)/* in: pointer to old top of heap */ +{ + mem_block_t* block; + mem_block_t* prev_block; +#ifdef UNIV_MEM_DEBUG + ibool error; + ulint total_size; + ulint size; +#endif + + ut_ad(mem_heap_check(heap)); + +#ifdef UNIV_MEM_DEBUG + + /* Validate the heap and get its total allocated size */ + mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size, + NULL, NULL); + ut_a(!error); + + /* Get the size below top pointer */ + mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL, + NULL); + ut_a(!error); + +#endif + + block = UT_LIST_GET_LAST(heap->base); + + while (block != NULL) { + if (((byte*)block + mem_block_get_free(block) >= old_top) + && ((byte*)block <= old_top)) { + /* Found the right block */ + + break; + } + + /* Store prev_block value before freeing the current block + (the current block will be erased in freeing) */ + + prev_block = UT_LIST_GET_PREV(list, block); + + mem_heap_block_free(heap, block); + + block = prev_block; + } + + ut_ad(block); + + /* Set the free field of block */ + mem_block_set_free(block, old_top - (byte*)block); + +#ifdef UNIV_MEM_DEBUG + ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); + + /* In the debug version erase block from top up */ + mem_erase_buf(old_top, (byte*)block + block->len - old_top); + + /* Update allocated memory count */ + mutex_enter(&mem_hash_mutex); + mem_current_allocated_memory -= (total_size - size); + mutex_exit(&mem_hash_mutex); +#else /* UNIV_MEM_DEBUG */ + UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top); +#endif /* UNIV_MEM_DEBUG */ + UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top); + + /* If free == start, we may free the block if it is not the first + one */ + + if ((heap != block) && (mem_block_get_free(block) + == mem_block_get_start(block))) { + mem_heap_block_free(heap, block); + } +} + +/********************************************************************* +Empties a memory heap. The first memory block of the heap is not freed. */ +UNIV_INLINE +void +mem_heap_empty( +/*===========*/ + mem_heap_t* heap) /* in: heap to empty */ +{ + mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap)); + + if (heap->free_block) { + mem_heap_free_block_free(heap); + } +} + +/********************************************************************* +Returns a pointer to the topmost element in a memory heap. The size of the +element must be given. */ +UNIV_INLINE +void* +mem_heap_get_top( +/*=============*/ + /* out: pointer to the topmost element */ + mem_heap_t* heap, /* in: memory heap */ + ulint n) /* in: size of the topmost element */ +{ + mem_block_t* block; + void* buf; + + ut_ad(mem_heap_check(heap)); + + block = UT_LIST_GET_LAST(heap->base); + + buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n); + +#ifdef UNIV_MEM_DEBUG + ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block)); + + /* In the debug version, advance buf to point at the storage which + was given to the caller in the allocation*/ + + buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; + + /* Check that the field lengths agree */ + ut_ad(n == (ulint)mem_field_header_get_len(buf)); +#endif + + return(buf); +} + +/********************************************************************* +Frees the topmost element in a memory heap. The size of the element must be +given. */ +UNIV_INLINE +void +mem_heap_free_top( +/*==============*/ + mem_heap_t* heap, /* in: memory heap */ + ulint n) /* in: size of the topmost element */ +{ + mem_block_t* block; + + ut_ad(mem_heap_check(heap)); + + block = UT_LIST_GET_LAST(heap->base); + + /* Subtract the free field of block */ + mem_block_set_free(block, mem_block_get_free(block) + - MEM_SPACE_NEEDED(n)); + UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n); +#ifdef UNIV_MEM_DEBUG + + ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); + + /* In the debug version check the consistency, and erase field */ + mem_field_erase((byte*)block + mem_block_get_free(block), n); +#endif + + /* If free == start, we may free the block if it is not the first + one */ + + if ((heap != block) && (mem_block_get_free(block) + == mem_block_get_start(block))) { + mem_heap_block_free(heap, block); + } else { + /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a + subsequent invocation of mem_heap_free_top(). + Originally, this was UNIV_MEM_FREE(), to catch writes + to freed memory. */ + UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n); + } +} + +/********************************************************************* +NOTE: Use the corresponding macros instead of this function. Creates a +memory heap. For debugging purposes, takes also the file name and line as +argument. */ +UNIV_INLINE +mem_heap_t* +mem_heap_create_func( +/*=================*/ + /* out, own: memory heap, NULL if + did not succeed (only possible for + MEM_HEAP_BTR_SEARCH type heaps)*/ + ulint n, /* in: desired start block size, + this means that a single user buffer + of size n will fit in the block, + 0 creates a default size block */ + ulint type, /* in: heap type */ + const char* file_name, /* in: file name where created */ + ulint line) /* in: line where created */ +{ + mem_block_t* block; + + if (!n) { + n = MEM_BLOCK_START_SIZE; + } + + block = mem_heap_create_block(NULL, n, type, file_name, line); + + if (block == NULL) { + + return(NULL); + } + + UT_LIST_INIT(block->base); + + /* Add the created block itself as the first block in the list */ + UT_LIST_ADD_FIRST(list, block->base, block); + +#ifdef UNIV_MEM_DEBUG + + mem_hash_insert(block, file_name, line); + +#endif + + return(block); +} + +/********************************************************************* +NOTE: Use the corresponding macro instead of this function. Frees the space +occupied by a memory heap. In the debug version erases the heap memory +blocks. */ +UNIV_INLINE +void +mem_heap_free_func( +/*===============*/ + mem_heap_t* heap, /* in, own: heap to be freed */ + const char* file_name __attribute__((unused)), + /* in: file name where freed */ + ulint line __attribute__((unused))) +{ + mem_block_t* block; + mem_block_t* prev_block; + + ut_ad(mem_heap_check(heap)); + + block = UT_LIST_GET_LAST(heap->base); + +#ifdef UNIV_MEM_DEBUG + + /* In the debug version remove the heap from the hash table of heaps + and check its consistency */ + + mem_hash_remove(heap, file_name, line); + +#endif + + if (heap->free_block) { + mem_heap_free_block_free(heap); + } + + while (block != NULL) { + /* Store the contents of info before freeing current block + (it is erased in freeing) */ + + prev_block = UT_LIST_GET_PREV(list, block); + + mem_heap_block_free(heap, block); + + block = prev_block; + } +} + +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. +Allocates a single buffer of memory from the dynamic memory of +the C compiler. Is like malloc of C. The buffer must be freed +with mem_free. */ +UNIV_INLINE +void* +mem_alloc_func( +/*===========*/ + /* out, own: free storage */ + ulint n, /* in: desired number of bytes */ + ulint* size, /* out: allocated size in bytes, + or NULL */ + const char* file_name, /* in: file name where created */ + ulint line) /* in: line where created */ +{ + mem_heap_t* heap; + void* buf; + + heap = mem_heap_create_func(n, MEM_HEAP_DYNAMIC, file_name, line); + + /* Note that as we created the first block in the heap big enough + for the buffer requested by the caller, the buffer will be in the + first block and thus we can calculate the pointer to the heap from + the pointer to the buffer when we free the memory buffer. */ + + if (UNIV_LIKELY_NULL(size)) { + /* Adjust the allocation to the actual size of the + memory block. */ + ulint m = mem_block_get_len(heap) + - mem_block_get_free(heap); +#ifdef UNIV_MEM_DEBUG + m -= MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE; +#endif /* UNIV_MEM_DEBUG */ + ut_ad(m >= n); + *size = n = m; + } + + buf = mem_heap_alloc(heap, n); + + ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE + - MEM_FIELD_HEADER_SIZE); + return(buf); +} + +/******************************************************************* +NOTE: Use the corresponding macro instead of this function. Frees a single +buffer of storage from the dynamic memory of the C compiler. Similar to the +free of C. */ +UNIV_INLINE +void +mem_free_func( +/*==========*/ + void* ptr, /* in, own: buffer to be freed */ + const char* file_name, /* in: file name where created */ + ulint line /* in: line where created */ + ) +{ + mem_heap_t* heap; + + heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE + - MEM_FIELD_HEADER_SIZE); + mem_heap_free_func(heap, file_name, line); +} + +/********************************************************************* +Returns the space in bytes occupied by a memory heap. */ +UNIV_INLINE +ulint +mem_heap_get_size( +/*==============*/ + mem_heap_t* heap) /* in: heap */ +{ + mem_block_t* block; + ulint size = 0; + + ut_ad(mem_heap_check(heap)); + + block = heap; + + while (block != NULL) { + + size += mem_block_get_len(block); + block = UT_LIST_GET_NEXT(list, block); + } + + if (heap->free_block) { + size += UNIV_PAGE_SIZE; + } + + return(size); +} + +/************************************************************************** +Duplicates a NUL-terminated string. */ +UNIV_INLINE +char* +mem_strdup( +/*=======*/ + /* out, own: a copy of the string, + must be deallocated with mem_free */ + const char* str) /* in: string to be copied */ +{ + ulint len = strlen(str) + 1; + return((char*) memcpy(mem_alloc(len), str, len)); +} + +/************************************************************************** +Makes a NUL-terminated copy of a nonterminated string. */ +UNIV_INLINE +char* +mem_strdupl( +/*========*/ + /* out, own: a copy of the string, + must be deallocated with mem_free */ + const char* str, /* in: string to be copied */ + ulint len) /* in: length of str, in bytes */ +{ + char* s = (char*) mem_alloc(len + 1); + s[len] = 0; + return((char*) memcpy(s, str, len)); +} + +/************************************************************************** +Makes a NUL-terminated copy of a nonterminated string, +allocated from a memory heap. */ +UNIV_INLINE +char* +mem_heap_strdupl( +/*=============*/ + /* out, own: a copy of the string */ + mem_heap_t* heap, /* in: memory heap where string is allocated */ + const char* str, /* in: string to be copied */ + ulint len) /* in: length of str, in bytes */ +{ + char* s = (char*) mem_heap_alloc(heap, len + 1); + s[len] = 0; + return((char*) memcpy(s, str, len)); +} diff --git a/storage/xtradb/include/mem0pool.h b/storage/xtradb/include/mem0pool.h new file mode 100644 index 00000000000..7e51b07bfe0 --- /dev/null +++ b/storage/xtradb/include/mem0pool.h @@ -0,0 +1,126 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The lowest-level memory management + +Created 6/9/1994 Heikki Tuuri +*******************************************************/ + +#ifndef mem0pool_h +#define mem0pool_h + +#include "univ.i" +#include "os0file.h" +#include "ut0lst.h" + +typedef struct mem_area_struct mem_area_t; +typedef struct mem_pool_struct mem_pool_t; + +/* The common memory pool */ +extern mem_pool_t* mem_comm_pool; + +/* Memory area header */ + +struct mem_area_struct{ + ulint size_and_free; /* memory area size is obtained by + anding with ~MEM_AREA_FREE; area in + a free list if ANDing with + MEM_AREA_FREE results in nonzero */ + UT_LIST_NODE_T(mem_area_t) + free_list; /* free list node */ +}; + +/* Each memory area takes this many extra bytes for control information */ +#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\ + UNIV_MEM_ALIGNMENT)) + +/************************************************************************ +Creates a memory pool. */ +UNIV_INTERN +mem_pool_t* +mem_pool_create( +/*============*/ + /* out: memory pool */ + ulint size); /* in: pool size in bytes */ +/************************************************************************ +Allocates memory from a pool. NOTE: This low-level function should only be +used in mem0mem.*! */ +UNIV_INTERN +void* +mem_area_alloc( +/*===========*/ + /* out, own: allocated memory buffer */ + ulint* psize, /* in: requested size in bytes; for optimum + space usage, the size should be a power of 2 + minus MEM_AREA_EXTRA_SIZE; + out: allocated size in bytes (greater than + or equal to the requested size) */ + mem_pool_t* pool); /* in: memory pool */ +/************************************************************************ +Frees memory to a pool. */ +UNIV_INTERN +void +mem_area_free( +/*==========*/ + void* ptr, /* in, own: pointer to allocated memory + buffer */ + mem_pool_t* pool); /* in: memory pool */ +/************************************************************************ +Returns the amount of reserved memory. */ +UNIV_INTERN +ulint +mem_pool_get_reserved( +/*==================*/ + /* out: reserved mmeory in bytes */ + mem_pool_t* pool); /* in: memory pool */ +/************************************************************************ +Reserves the mem pool mutex. */ +UNIV_INTERN +void +mem_pool_mutex_enter(void); +/*======================*/ +/************************************************************************ +Releases the mem pool mutex. */ +UNIV_INTERN +void +mem_pool_mutex_exit(void); +/*=====================*/ +/************************************************************************ +Validates a memory pool. */ +UNIV_INTERN +ibool +mem_pool_validate( +/*==============*/ + /* out: TRUE if ok */ + mem_pool_t* pool); /* in: memory pool */ +/************************************************************************ +Prints info of a memory pool. */ +UNIV_INTERN +void +mem_pool_print_info( +/*================*/ + FILE* outfile,/* in: output file to write to */ + mem_pool_t* pool); /* in: memory pool */ + + +#ifndef UNIV_NONINL +#include "mem0pool.ic" +#endif + +#endif diff --git a/storage/xtradb/include/mem0pool.ic b/storage/xtradb/include/mem0pool.ic new file mode 100644 index 00000000000..4cc65e754ce --- /dev/null +++ b/storage/xtradb/include/mem0pool.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The lowest-level memory management + +Created 6/8/1994 Heikki Tuuri +*************************************************************************/ diff --git a/storage/xtradb/include/mtr0log.h b/storage/xtradb/include/mtr0log.h new file mode 100644 index 00000000000..44374cdf1a4 --- /dev/null +++ b/storage/xtradb/include/mtr0log.h @@ -0,0 +1,247 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mini-transaction logging routines + +Created 12/7/1995 Heikki Tuuri +*******************************************************/ + +#ifndef mtr0log_h +#define mtr0log_h + +#include "univ.i" +#include "mtr0mtr.h" +#include "dict0types.h" + +/************************************************************ +Writes 1 - 4 bytes to a file page buffered in the buffer pool. +Writes the corresponding log record to the mini-transaction log. */ +UNIV_INTERN +void +mlog_write_ulint( +/*=============*/ + byte* ptr, /* in: pointer where to write */ + ulint val, /* in: value to write */ + byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Writes 8 bytes to a file page buffered in the buffer pool. +Writes the corresponding log record to the mini-transaction log. */ +UNIV_INTERN +void +mlog_write_dulint( +/*==============*/ + byte* ptr, /* in: pointer where to write */ + dulint val, /* in: value to write */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Writes a string to a file page buffered in the buffer pool. Writes the +corresponding log record to the mini-transaction log. */ +UNIV_INTERN +void +mlog_write_string( +/*==============*/ + byte* ptr, /* in: pointer where to write */ + const byte* str, /* in: string to write */ + ulint len, /* in: string length */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Logs a write of a string to a file page buffered in the buffer pool. +Writes the corresponding log record to the mini-transaction log. */ +UNIV_INTERN +void +mlog_log_string( +/*============*/ + byte* ptr, /* in: pointer written to */ + ulint len, /* in: string length */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Writes initial part of a log record consisting of one-byte item +type and four-byte space and page numbers. */ +UNIV_INTERN +void +mlog_write_initial_log_record( +/*==========================*/ + const byte* ptr, /* in: pointer to (inside) a buffer + frame holding the file page where + modification is made */ + byte type, /* in: log item type: MLOG_1BYTE, ... */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Writes a log record about an .ibd file create/delete/rename. */ +UNIV_INLINE +byte* +mlog_write_initial_log_record_for_file_op( +/*======================================*/ + /* out: new value of log_ptr */ + ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or + MLOG_FILE_RENAME */ + ulint space_id,/* in: space id, if applicable */ + ulint page_no,/* in: page number (not relevant currently) */ + byte* log_ptr,/* in: pointer to mtr log which has been opened */ + mtr_t* mtr); /* in: mtr */ +/************************************************************ +Catenates 1 - 4 bytes to the mtr log. */ +UNIV_INLINE +void +mlog_catenate_ulint( +/*================*/ + mtr_t* mtr, /* in: mtr */ + ulint val, /* in: value to write */ + ulint type); /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ +/************************************************************ +Catenates n bytes to the mtr log. */ +UNIV_INTERN +void +mlog_catenate_string( +/*=================*/ + mtr_t* mtr, /* in: mtr */ + const byte* str, /* in: string to write */ + ulint len); /* in: string length */ +/************************************************************ +Catenates a compressed ulint to mlog. */ +UNIV_INLINE +void +mlog_catenate_ulint_compressed( +/*===========================*/ + mtr_t* mtr, /* in: mtr */ + ulint val); /* in: value to write */ +/************************************************************ +Catenates a compressed dulint to mlog. */ +UNIV_INLINE +void +mlog_catenate_dulint_compressed( +/*============================*/ + mtr_t* mtr, /* in: mtr */ + dulint val); /* in: value to write */ +/************************************************************ +Opens a buffer to mlog. It must be closed with mlog_close. */ +UNIV_INLINE +byte* +mlog_open( +/*======*/ + /* out: buffer, NULL if log mode MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + ulint size); /* in: buffer size in bytes; MUST be + smaller than DYN_ARRAY_DATA_SIZE! */ +/************************************************************ +Closes a buffer opened to mlog. */ +UNIV_INLINE +void +mlog_close( +/*=======*/ + mtr_t* mtr, /* in: mtr */ + byte* ptr); /* in: buffer space from ptr up was not used */ +/************************************************************ +Writes the initial part of a log record (3..11 bytes). +If the implementation of this function is changed, all +size parameters to mlog_open() should be adjusted accordingly! */ +UNIV_INLINE +byte* +mlog_write_initial_log_record_fast( +/*===============================*/ + /* out: new value of log_ptr */ + const byte* ptr, /* in: pointer to (inside) a buffer + frame holding the file page where + modification is made */ + byte type, /* in: log item type: MLOG_1BYTE, ... */ + byte* log_ptr,/* in: pointer to mtr log which has + been opened */ + mtr_t* mtr); /* in: mtr */ +/************************************************************ +Parses an initial log record written by mlog_write_initial_log_record. */ +UNIV_INTERN +byte* +mlog_parse_initial_log_record( +/*==========================*/ + /* out: parsed record end, NULL if not a complete + record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + byte* type, /* out: log record type: MLOG_1BYTE, ... */ + ulint* space, /* out: space id */ + ulint* page_no);/* out: page number */ +/************************************************************ +Parses a log record written by mlog_write_ulint or mlog_write_dulint. */ +UNIV_INTERN +byte* +mlog_parse_nbytes( +/*==============*/ + /* out: parsed record end, NULL if not a complete + record */ + ulint type, /* in: log record type: MLOG_1BYTE, ... */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + byte* page, /* in: page where to apply the log record, or NULL */ + void* page_zip);/* in/out: compressed page, or NULL */ +/************************************************************ +Parses a log record written by mlog_write_string. */ +UNIV_INTERN +byte* +mlog_parse_string( +/*==============*/ + /* out: parsed record end, NULL if not a complete + record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + byte* page, /* in: page where to apply the log record, or NULL */ + void* page_zip);/* in/out: compressed page, or NULL */ + + +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. Reserves space +for further log entries. The log entry must be closed with +mtr_close(). */ +UNIV_INTERN +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size); /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ +UNIV_INTERN +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index); /* out, own: dummy index */ + +/* Insert, update, and maybe other functions may use this value to define an +extra mlog buffer size for variable size data */ +#define MLOG_BUF_MARGIN 256 + +#ifndef UNIV_NONINL +#include "mtr0log.ic" +#endif + +#endif diff --git a/storage/xtradb/include/mtr0log.ic b/storage/xtradb/include/mtr0log.ic new file mode 100644 index 00000000000..5f05befb9cc --- /dev/null +++ b/storage/xtradb/include/mtr0log.ic @@ -0,0 +1,247 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mini-transaction logging routines + +Created 12/7/1995 Heikki Tuuri +*******************************************************/ + +#include "mach0data.h" +#include "ut0lst.h" +#include "buf0buf.h" + +/************************************************************ +Opens a buffer to mlog. It must be closed with mlog_close. */ +UNIV_INLINE +byte* +mlog_open( +/*======*/ + /* out: buffer, NULL if log mode MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + ulint size) /* in: buffer size in bytes; MUST be + smaller than DYN_ARRAY_DATA_SIZE! */ +{ + dyn_array_t* mlog; + + mtr->modifications = TRUE; + + if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { + + return(NULL); + } + + mlog = &(mtr->log); + + return(dyn_array_open(mlog, size)); +} + +/************************************************************ +Closes a buffer opened to mlog. */ +UNIV_INLINE +void +mlog_close( +/*=======*/ + mtr_t* mtr, /* in: mtr */ + byte* ptr) /* in: buffer space from ptr up was not used */ +{ + dyn_array_t* mlog; + + ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE); + + mlog = &(mtr->log); + + dyn_array_close(mlog, ptr); +} + +/************************************************************ +Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */ +UNIV_INLINE +void +mlog_catenate_ulint( +/*================*/ + mtr_t* mtr, /* in: mtr */ + ulint val, /* in: value to write */ + ulint type) /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ +{ + dyn_array_t* mlog; + byte* ptr; + + if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { + + return; + } + + mlog = &(mtr->log); + +#if MLOG_1BYTE != 1 +# error "MLOG_1BYTE != 1" +#endif +#if MLOG_2BYTES != 2 +# error "MLOG_2BYTES != 2" +#endif +#if MLOG_4BYTES != 4 +# error "MLOG_4BYTES != 4" +#endif +#if MLOG_8BYTES != 8 +# error "MLOG_8BYTES != 8" +#endif + ptr = (byte*) dyn_array_push(mlog, type); + + if (type == MLOG_4BYTES) { + mach_write_to_4(ptr, val); + } else if (type == MLOG_2BYTES) { + mach_write_to_2(ptr, val); + } else { + ut_ad(type == MLOG_1BYTE); + mach_write_to_1(ptr, val); + } +} + +/************************************************************ +Catenates a compressed ulint to mlog. */ +UNIV_INLINE +void +mlog_catenate_ulint_compressed( +/*===========================*/ + mtr_t* mtr, /* in: mtr */ + ulint val) /* in: value to write */ +{ + byte* log_ptr; + + log_ptr = mlog_open(mtr, 10); + + /* If no logging is requested, we may return now */ + if (log_ptr == NULL) { + + return; + } + + log_ptr += mach_write_compressed(log_ptr, val); + + mlog_close(mtr, log_ptr); +} + +/************************************************************ +Catenates a compressed dulint to mlog. */ +UNIV_INLINE +void +mlog_catenate_dulint_compressed( +/*============================*/ + mtr_t* mtr, /* in: mtr */ + dulint val) /* in: value to write */ +{ + byte* log_ptr; + + log_ptr = mlog_open(mtr, 15); + + /* If no logging is requested, we may return now */ + if (log_ptr == NULL) { + + return; + } + + log_ptr += mach_dulint_write_compressed(log_ptr, val); + + mlog_close(mtr, log_ptr); +} + +/************************************************************ +Writes the initial part of a log record (3..11 bytes). +If the implementation of this function is changed, all +size parameters to mlog_open() should be adjusted accordingly! */ +UNIV_INLINE +byte* +mlog_write_initial_log_record_fast( +/*===============================*/ + /* out: new value of log_ptr */ + const byte* ptr, /* in: pointer to (inside) a buffer + frame holding the file page where + modification is made */ + byte type, /* in: log item type: MLOG_1BYTE, ... */ + byte* log_ptr,/* in: pointer to mtr log which has + been opened */ + mtr_t* mtr) /* in: mtr */ +{ +#ifdef UNIV_DEBUG + buf_block_t* block; +#endif + const byte* page; + ulint space; + ulint offset; + + ut_ad(mtr_memo_contains_page(mtr, ptr, MTR_MEMO_PAGE_X_FIX)); + ut_ad(type <= MLOG_BIGGEST_TYPE); + ut_ad(ptr && log_ptr); + + page = (const byte*) ut_align_down(ptr, UNIV_PAGE_SIZE); + space = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + offset = mach_read_from_4(page + FIL_PAGE_OFFSET); + + mach_write_to_1(log_ptr, type); + log_ptr++; + log_ptr += mach_write_compressed(log_ptr, space); + log_ptr += mach_write_compressed(log_ptr, offset); + + mtr->n_log_recs++; + +#ifdef UNIV_LOG_DEBUG + fprintf(stderr, + "Adding to mtr log record type %lu space %lu page no %lu\n", + (ulong) type, space, offset); +#endif + +#ifdef UNIV_DEBUG + /* We now assume that all x-latched pages have been modified! */ + block = (buf_block_t*) buf_block_align(ptr); + + if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) { + + mtr_memo_push(mtr, block, MTR_MEMO_MODIFY); + } +#endif + return(log_ptr); +} + +/************************************************************ +Writes a log record about an .ibd file create/delete/rename. */ +UNIV_INLINE +byte* +mlog_write_initial_log_record_for_file_op( +/*======================================*/ + /* out: new value of log_ptr */ + ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or + MLOG_FILE_RENAME */ + ulint space_id,/* in: space id, if applicable */ + ulint page_no,/* in: page number (not relevant currently) */ + byte* log_ptr,/* in: pointer to mtr log which has been opened */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(log_ptr); + + mach_write_to_1(log_ptr, type); + log_ptr++; + + /* We write dummy space id and page number */ + log_ptr += mach_write_compressed(log_ptr, space_id); + log_ptr += mach_write_compressed(log_ptr, page_no); + + mtr->n_log_recs++; + + return(log_ptr); +} diff --git a/storage/xtradb/include/mtr0mtr.h b/storage/xtradb/include/mtr0mtr.h new file mode 100644 index 00000000000..a29f6c73141 --- /dev/null +++ b/storage/xtradb/include/mtr0mtr.h @@ -0,0 +1,380 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mini-transaction buffer + +Created 11/26/1995 Heikki Tuuri +*******************************************************/ + +#ifndef mtr0mtr_h +#define mtr0mtr_h + +#include "univ.i" +#include "mem0mem.h" +#include "dyn0dyn.h" +#include "buf0types.h" +#include "sync0rw.h" +#include "ut0byte.h" +#include "mtr0types.h" +#include "page0types.h" + +/* Logging modes for a mini-transaction */ +#define MTR_LOG_ALL 21 /* default mode: log all operations + modifying disk-based data */ +#define MTR_LOG_NONE 22 /* log no operations */ +/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying + file space page allocation data + (operations in fsp0fsp.* ) */ +#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter + form */ + +/* Types for the mlock objects to store in the mtr memo; NOTE that the +first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ +#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH +#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH +#define MTR_MEMO_BUF_FIX RW_NO_LATCH +#define MTR_MEMO_MODIFY 54 +#define MTR_MEMO_S_LOCK 55 +#define MTR_MEMO_X_LOCK 56 + +/* Log item types: we have made them to be of the type 'byte' +for the compiler to warn if val and type parameters are switched +in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the +flag value must give the length also! */ +#define MLOG_SINGLE_REC_FLAG 128 /* if the mtr contains only + one log record for one page, + i.e., write_initial_log_record + has been called only once, + this flag is ORed to the type + of that first log record */ +#define MLOG_1BYTE (1) /* one byte is written */ +#define MLOG_2BYTES (2) /* 2 bytes ... */ +#define MLOG_4BYTES (4) /* 4 bytes ... */ +#define MLOG_8BYTES (8) /* 8 bytes ... */ +#define MLOG_REC_INSERT ((byte)9) /* record insert */ +#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /* mark clustered index record + deleted */ +#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /* mark secondary index record + deleted */ +#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /* update of a record, + preserves record field sizes */ +#define MLOG_REC_DELETE ((byte)14) /* delete a record from a + page */ +#define MLOG_LIST_END_DELETE ((byte)15) /* delete record list end on + index page */ +#define MLOG_LIST_START_DELETE ((byte)16) /* delete record list start on + index page */ +#define MLOG_LIST_END_COPY_CREATED ((byte)17) /* copy record list end to a + new created index page */ +#define MLOG_PAGE_REORGANIZE ((byte)18) /* reorganize an index page */ +#define MLOG_PAGE_CREATE ((byte)19) /* create an index page */ +#define MLOG_UNDO_INSERT ((byte)20) /* insert entry in an undo + log */ +#define MLOG_UNDO_ERASE_END ((byte)21) /* erase an undo log + page end */ +#define MLOG_UNDO_INIT ((byte)22) /* initialize a page in an + undo log */ +#define MLOG_UNDO_HDR_DISCARD ((byte)23) /* discard an update undo log + header */ +#define MLOG_UNDO_HDR_REUSE ((byte)24) /* reuse an insert undo log + header */ +#define MLOG_UNDO_HDR_CREATE ((byte)25) /* create an undo log header */ +#define MLOG_REC_MIN_MARK ((byte)26) /* mark an index record as the + predefined minimum record */ +#define MLOG_IBUF_BITMAP_INIT ((byte)27) /* initialize an ibuf bitmap + page */ +/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */ +#define MLOG_INIT_FILE_PAGE ((byte)29) /* this means that a file page + is taken into use and the prior + contents of the page should be + ignored: in recovery we must + not trust the lsn values stored + to the file page */ +#define MLOG_WRITE_STRING ((byte)30) /* write a string to a page */ +#define MLOG_MULTI_REC_END ((byte)31) /* if a single mtr writes + log records for several pages, + this log record ends the + sequence of these records */ +#define MLOG_DUMMY_RECORD ((byte)32) /* dummy log record used to + pad a log block full */ +#define MLOG_FILE_CREATE ((byte)33) /* log record about an .ibd + file creation */ +#define MLOG_FILE_RENAME ((byte)34) /* log record about an .ibd + file rename */ +#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd + file deletion */ +#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record + as the predefined minimum + record */ +#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact + index page */ +#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */ +#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) + /* mark compact clustered index + record deleted */ +#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index + record deleted; this log + record type is redundant, as + MLOG_REC_SEC_DELETE_MARK is + independent of the record + format. */ +#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record, + preserves record field sizes */ +#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record + from a page */ +#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list + end on index page */ +#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list + start on index page */ +#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) + /* copy compact record list end + to a new created index page */ +#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ +#define MLOG_FILE_CREATE2 ((byte)47) /* log record about creating + an .ibd file, with format */ +#define MLOG_ZIP_WRITE_NODE_PTR ((byte)48) /* write the node pointer of + a record on a compressed + non-leaf B-tree page */ +#define MLOG_ZIP_WRITE_BLOB_PTR ((byte)49) /* write the BLOB pointer + of an externally stored column + on a compressed page */ +#define MLOG_ZIP_WRITE_HEADER ((byte)50) /* write to compressed page + header */ +#define MLOG_ZIP_PAGE_COMPRESS ((byte)51) /* compress an index page */ +#define MLOG_BIGGEST_TYPE ((byte)51) /* biggest value (used in + asserts) */ + +/******************************************************************* +Starts a mini-transaction and creates a mini-transaction handle +and buffer in the memory buffer given by the caller. */ +UNIV_INLINE +mtr_t* +mtr_start( +/*======*/ + /* out: mtr buffer which also acts as + the mtr handle */ + mtr_t* mtr); /* in: memory buffer for the mtr buffer */ +/******************************************************************* +Commits a mini-transaction. */ +UNIV_INTERN +void +mtr_commit( +/*=======*/ + mtr_t* mtr); /* in: mini-transaction */ +/************************************************************** +Sets and returns a savepoint in mtr. */ +UNIV_INLINE +ulint +mtr_set_savepoint( +/*==============*/ + /* out: savepoint */ + mtr_t* mtr); /* in: mtr */ +/************************************************************** +Releases the latches stored in an mtr memo down to a savepoint. +NOTE! The mtr must not have made changes to buffer pages after the +savepoint, as these can be handled only by mtr_commit. */ +UNIV_INTERN +void +mtr_rollback_to_savepoint( +/*======================*/ + mtr_t* mtr, /* in: mtr */ + ulint savepoint); /* in: savepoint */ +/************************************************************** +Releases the (index tree) s-latch stored in an mtr memo after a +savepoint. */ +UNIV_INLINE +void +mtr_release_s_latch_at_savepoint( +/*=============================*/ + mtr_t* mtr, /* in: mtr */ + ulint savepoint, /* in: savepoint */ + rw_lock_t* lock); /* in: latch to release */ +/******************************************************************* +Gets the logging mode of a mini-transaction. */ +UNIV_INLINE +ulint +mtr_get_log_mode( +/*=============*/ + /* out: logging mode: MTR_LOG_NONE, ... */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************* +Changes the logging mode of a mini-transaction. */ +UNIV_INLINE +ulint +mtr_set_log_mode( +/*=============*/ + /* out: old mode */ + mtr_t* mtr, /* in: mtr */ + ulint mode); /* in: logging mode: MTR_LOG_NONE, ... */ +/************************************************************ +Reads 1 - 4 bytes from a file page buffered in the buffer pool. */ +UNIV_INTERN +ulint +mtr_read_ulint( +/*===========*/ + /* out: value read */ + const byte* ptr, /* in: pointer from where to read */ + ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************ +Reads 8 bytes from a file page buffered in the buffer pool. */ +UNIV_INTERN +dulint +mtr_read_dulint( +/*============*/ + /* out: value read */ + const byte* ptr, /* in: pointer from where to read */ + mtr_t* mtr); /* in: mini-transaction handle */ +/************************************************************************* +This macro locks an rw-lock in s-mode. */ +#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\ + (MTR)) +/************************************************************************* +This macro locks an rw-lock in x-mode. */ +#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\ + (MTR)) +/************************************************************************* +NOTE! Use the macro above! +Locks a lock in s-mode. */ +UNIV_INLINE +void +mtr_s_lock_func( +/*============*/ + rw_lock_t* lock, /* in: rw-lock */ + const char* file, /* in: file name */ + ulint line, /* in: line number */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************* +NOTE! Use the macro above! +Locks a lock in x-mode. */ +UNIV_INLINE +void +mtr_x_lock_func( +/*============*/ + rw_lock_t* lock, /* in: rw-lock */ + const char* file, /* in: file name */ + ulint line, /* in: line number */ + mtr_t* mtr); /* in: mtr */ + +/******************************************************* +Releases an object in the memo stack. */ +UNIV_INTERN +void +mtr_memo_release( +/*=============*/ + mtr_t* mtr, /* in: mtr */ + void* object, /* in: object */ + ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */ +#ifdef UNIV_DEBUG +/************************************************************** +Checks if memo contains the given item. */ +UNIV_INLINE +ibool +mtr_memo_contains( +/*==============*/ + /* out: TRUE if contains */ + mtr_t* mtr, /* in: mtr */ + const void* object, /* in: object to search */ + ulint type); /* in: type of object */ + +/************************************************************** +Checks if memo contains the given page. */ +UNIV_INTERN +ibool +mtr_memo_contains_page( +/*===================*/ + /* out: TRUE if contains */ + mtr_t* mtr, /* in: mtr */ + const byte* ptr, /* in: pointer to buffer frame */ + ulint type); /* in: type of object */ +/************************************************************* +Prints info of an mtr handle. */ +UNIV_INTERN +void +mtr_print( +/*======*/ + mtr_t* mtr); /* in: mtr */ +#endif /* UNIV_DEBUG */ +/*######################################################################*/ + +#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */ + +/******************************************************************* +Returns the log object of a mini-transaction buffer. */ +UNIV_INLINE +dyn_array_t* +mtr_get_log( +/*========*/ + /* out: log */ + mtr_t* mtr); /* in: mini-transaction */ +/******************************************************* +Pushes an object to an mtr memo stack. */ +UNIV_INLINE +void +mtr_memo_push( +/*==========*/ + mtr_t* mtr, /* in: mtr */ + void* object, /* in: object */ + ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */ + + +/* Type definition of a mini-transaction memo stack slot. */ +typedef struct mtr_memo_slot_struct mtr_memo_slot_t; +struct mtr_memo_slot_struct{ + ulint type; /* type of the stored object (MTR_MEMO_S_LOCK, ...) */ + void* object; /* pointer to the object */ +}; + +/* Mini-transaction handle and buffer */ +struct mtr_struct{ +#ifdef UNIV_DEBUG + ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ +#endif + dyn_array_t memo; /* memo stack for locks etc. */ + dyn_array_t log; /* mini-transaction log */ + ibool modifications; + /* TRUE if the mtr made modifications to + buffer pool pages */ + ulint n_log_recs; + /* count of how many page initial log records + have been written to the mtr log */ + ulint log_mode; /* specifies which operations should be + logged; default value MTR_LOG_ALL */ + ib_uint64_t start_lsn;/* start lsn of the possible log entry for + this mtr */ + ib_uint64_t end_lsn;/* end lsn of the possible log entry for + this mtr */ +#ifdef UNIV_DEBUG + ulint magic_n; +#endif /* UNIV_DEBUG */ +}; + +#ifdef UNIV_DEBUG +# define MTR_MAGIC_N 54551 +#endif /* UNIV_DEBUG */ + +#define MTR_ACTIVE 12231 +#define MTR_COMMITTING 56456 +#define MTR_COMMITTED 34676 + +#ifndef UNIV_NONINL +#include "mtr0mtr.ic" +#endif + +#endif diff --git a/storage/xtradb/include/mtr0mtr.ic b/storage/xtradb/include/mtr0mtr.ic new file mode 100644 index 00000000000..7d6d99917b7 --- /dev/null +++ b/storage/xtradb/include/mtr0mtr.ic @@ -0,0 +1,266 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mini-transaction buffer + +Created 11/26/1995 Heikki Tuuri +*******************************************************/ + +#include "sync0sync.h" +#include "sync0rw.h" +#include "mach0data.h" + +/******************************************************************* +Starts a mini-transaction and creates a mini-transaction handle +and a buffer in the memory buffer given by the caller. */ +UNIV_INLINE +mtr_t* +mtr_start( +/*======*/ + /* out: mtr buffer which also acts as + the mtr handle */ + mtr_t* mtr) /* in: memory buffer for the mtr buffer */ +{ + dyn_array_create(&(mtr->memo)); + dyn_array_create(&(mtr->log)); + + mtr->log_mode = MTR_LOG_ALL; + mtr->modifications = FALSE; + mtr->n_log_recs = 0; + + ut_d(mtr->state = MTR_ACTIVE); + ut_d(mtr->magic_n = MTR_MAGIC_N); + + return(mtr); +} + +/******************************************************* +Pushes an object to an mtr memo stack. */ +UNIV_INLINE +void +mtr_memo_push( +/*==========*/ + mtr_t* mtr, /* in: mtr */ + void* object, /* in: object */ + ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */ +{ + dyn_array_t* memo; + mtr_memo_slot_t* slot; + + ut_ad(object); + ut_ad(type >= MTR_MEMO_PAGE_S_FIX); + ut_ad(type <= MTR_MEMO_X_LOCK); + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + + memo = &(mtr->memo); + + slot = (mtr_memo_slot_t*) dyn_array_push(memo, sizeof *slot); + + slot->object = object; + slot->type = type; +} + +/************************************************************** +Sets and returns a savepoint in mtr. */ +UNIV_INLINE +ulint +mtr_set_savepoint( +/*==============*/ + /* out: savepoint */ + mtr_t* mtr) /* in: mtr */ +{ + dyn_array_t* memo; + + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + + memo = &(mtr->memo); + + return(dyn_array_get_data_size(memo)); +} + +/************************************************************** +Releases the (index tree) s-latch stored in an mtr memo after a +savepoint. */ +UNIV_INLINE +void +mtr_release_s_latch_at_savepoint( +/*=============================*/ + mtr_t* mtr, /* in: mtr */ + ulint savepoint, /* in: savepoint */ + rw_lock_t* lock) /* in: latch to release */ +{ + mtr_memo_slot_t* slot; + dyn_array_t* memo; + + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + ut_ad(mtr->state == MTR_ACTIVE); + + memo = &(mtr->memo); + + ut_ad(dyn_array_get_data_size(memo) > savepoint); + + slot = (mtr_memo_slot_t*) dyn_array_get_element(memo, savepoint); + + ut_ad(slot->object == lock); + ut_ad(slot->type == MTR_MEMO_S_LOCK); + + rw_lock_s_unlock(lock); + + slot->object = NULL; +} + +#ifdef UNIV_DEBUG +/************************************************************** +Checks if memo contains the given item. */ +UNIV_INLINE +ibool +mtr_memo_contains( +/*==============*/ + /* out: TRUE if contains */ + mtr_t* mtr, /* in: mtr */ + const void* object, /* in: object to search */ + ulint type) /* in: type of object */ +{ + mtr_memo_slot_t* slot; + dyn_array_t* memo; + ulint offset; + + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + + memo = &(mtr->memo); + + offset = dyn_array_get_data_size(memo); + + while (offset > 0) { + offset -= sizeof(mtr_memo_slot_t); + + slot = dyn_array_get_element(memo, offset); + + if ((object == slot->object) && (type == slot->type)) { + + return(TRUE); + } + } + + return(FALSE); +} +#endif /* UNIV_DEBUG */ + +/******************************************************************* +Returns the log object of a mini-transaction buffer. */ +UNIV_INLINE +dyn_array_t* +mtr_get_log( +/*========*/ + /* out: log */ + mtr_t* mtr) /* in: mini-transaction */ +{ + ut_ad(mtr); + ut_ad(mtr->magic_n == MTR_MAGIC_N); + + return(&(mtr->log)); +} + +/******************************************************************* +Gets the logging mode of a mini-transaction. */ +UNIV_INLINE +ulint +mtr_get_log_mode( +/*=============*/ + /* out: logging mode: MTR_LOG_NONE, ... */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mtr); + ut_ad(mtr->log_mode >= MTR_LOG_ALL); + ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS); + + return(mtr->log_mode); +} + +/******************************************************************* +Changes the logging mode of a mini-transaction. */ +UNIV_INLINE +ulint +mtr_set_log_mode( +/*=============*/ + /* out: old mode */ + mtr_t* mtr, /* in: mtr */ + ulint mode) /* in: logging mode: MTR_LOG_NONE, ... */ +{ + ulint old_mode; + + ut_ad(mtr); + ut_ad(mode >= MTR_LOG_ALL); + ut_ad(mode <= MTR_LOG_SHORT_INSERTS); + + old_mode = mtr->log_mode; + + if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) { + /* Do nothing */ + } else { + mtr->log_mode = mode; + } + + ut_ad(old_mode >= MTR_LOG_ALL); + ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS); + + return(old_mode); +} + +/************************************************************************* +Locks a lock in s-mode. */ +UNIV_INLINE +void +mtr_s_lock_func( +/*============*/ + rw_lock_t* lock, /* in: rw-lock */ + const char* file, /* in: file name */ + ulint line, /* in: line number */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mtr); + ut_ad(lock); + + rw_lock_s_lock_func(lock, 0, file, line); + + mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); +} + +/************************************************************************* +Locks a lock in x-mode. */ +UNIV_INLINE +void +mtr_x_lock_func( +/*============*/ + rw_lock_t* lock, /* in: rw-lock */ + const char* file, /* in: file name */ + ulint line, /* in: line number */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mtr); + ut_ad(lock); + + rw_lock_x_lock_func(lock, 0, file, line); + + mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); +} diff --git a/storage/xtradb/include/mtr0types.h b/storage/xtradb/include/mtr0types.h new file mode 100644 index 00000000000..23634c98827 --- /dev/null +++ b/storage/xtradb/include/mtr0types.h @@ -0,0 +1,30 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mini-transaction buffer global types + +Created 11/26/1995 Heikki Tuuri +*******************************************************/ + +#ifndef mtr0types_h +#define mtr0types_h + +typedef struct mtr_struct mtr_t; + +#endif diff --git a/storage/xtradb/include/mysql_addons.h b/storage/xtradb/include/mysql_addons.h new file mode 100644 index 00000000000..2e8c87f5962 --- /dev/null +++ b/storage/xtradb/include/mysql_addons.h @@ -0,0 +1,32 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +This file contains functions that need to be added to +MySQL code but have not been added yet. + +Whenever you add a function here submit a MySQL bug +report (feature request) with the implementation. Then +write the bug number in the comment before the +function in this file. + +When MySQL commits the function it can be deleted from +here. In a perfect world this file exists but is empty. + +Created November 07, 2007 Vasil Dimov +*******************************************************/ diff --git a/storage/xtradb/include/os0file.h b/storage/xtradb/include/os0file.h new file mode 100644 index 00000000000..26897226ff4 --- /dev/null +++ b/storage/xtradb/include/os0file.h @@ -0,0 +1,758 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system file io + +Created 10/21/1995 Heikki Tuuri +*******************************************************/ + +#ifndef os0file_h +#define os0file_h + +#include "univ.i" + +#ifndef __WIN__ +#include <dirent.h> +#include <sys/stat.h> +#include <time.h> +#endif + +typedef struct fil_node_struct fil_node_t; + +#ifdef UNIV_DO_FLUSH +extern ibool os_do_not_call_flush_at_each_write; +#endif /* UNIV_DO_FLUSH */ +extern ibool os_has_said_disk_full; +extern ibool os_aio_print_debug; + +extern ulint os_file_n_pending_preads; +extern ulint os_file_n_pending_pwrites; + +extern ulint os_n_pending_reads; +extern ulint os_n_pending_writes; + +#ifdef __WIN__ + +/* We define always WIN_ASYNC_IO, and check at run-time whether + the OS actually supports it: Win 95 does not, NT does. */ +#define WIN_ASYNC_IO + +#define UNIV_NON_BUFFERED_IO + +#endif + +#ifdef __WIN__ +#define os_file_t HANDLE +#define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd) +#else +typedef int os_file_t; +#define OS_FILE_FROM_FD(fd) fd +#endif + +extern ulint os_innodb_umask; + +/* If this flag is TRUE, then we will use the native aio of the +OS (provided we compiled Innobase with it in), otherwise we will +use simulated aio we build below with threads */ + +extern ibool os_aio_use_native_aio; + +#define OS_FILE_SECTOR_SIZE 512 + +/* The next value should be smaller or equal to the smallest sector size used +on any disk. A log block is required to be a portion of disk which is written +so that if the start and the end of a block get written to disk, then the +whole block gets written. This should be true even in most cases of a crash: +if this fails for a log block, then it is equivalent to a media failure in the +log. */ + +#define OS_FILE_LOG_BLOCK_SIZE 512 + +/* Options for file_create */ +#define OS_FILE_OPEN 51 +#define OS_FILE_CREATE 52 +#define OS_FILE_OVERWRITE 53 +#define OS_FILE_OPEN_RAW 54 +#define OS_FILE_CREATE_PATH 55 +#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on + the first ibdata file */ + +#define OS_FILE_READ_ONLY 333 +#define OS_FILE_READ_WRITE 444 +#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */ + +/* Options for file_create */ +#define OS_FILE_AIO 61 +#define OS_FILE_NORMAL 62 + +/* Types for file create */ +#define OS_DATA_FILE 100 +#define OS_LOG_FILE 101 + +/* Error codes from os_file_get_last_error */ +#define OS_FILE_NOT_FOUND 71 +#define OS_FILE_DISK_FULL 72 +#define OS_FILE_ALREADY_EXISTS 73 +#define OS_FILE_PATH_ERROR 74 +#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources + to become available again */ +#define OS_FILE_SHARING_VIOLATION 76 +#define OS_FILE_ERROR_NOT_SPECIFIED 77 + +/* Types for aio operations */ +#define OS_FILE_READ 10 +#define OS_FILE_WRITE 11 + +#define OS_FILE_LOG 256 /* This can be ORed to type */ + +#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more + than 64 */ + +/* Modes for aio operations */ +#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf + pages or ibuf bitmap pages */ +#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf + bitmap pages */ +#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */ +#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread + will itself wait for the i/o to complete, + doing also the job of the i/o-handler thread; + can be used for any pages, ibuf or non-ibuf. + This is used to save CPU time, as we can do + with fewer thread switches. Plain synchronous + i/o is not as good, because it must serialize + the file seek and read or write, causing a + bottleneck for parallelism. */ + +#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode + in the call of os_aio(...), + if the caller wants to post several i/o + requests in a batch, and only after that + wake the i/o-handler thread; this has + effect only in simulated aio */ +#define OS_WIN31 1 +#define OS_WIN95 2 +#define OS_WINNT 3 +#define OS_WIN2000 4 + +extern ulint os_n_file_reads; +extern ulint os_n_file_writes; +extern ulint os_n_fsyncs; + +/* File types for directory entry data type */ + +enum os_file_type_enum{ + OS_FILE_TYPE_UNKNOWN = 0, + OS_FILE_TYPE_FILE, /* regular file */ + OS_FILE_TYPE_DIR, /* directory */ + OS_FILE_TYPE_LINK /* symbolic link */ +}; +typedef enum os_file_type_enum os_file_type_t; + +/* Maximum path string length in bytes when referring to tables with in the +'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers +of this size from the thread stack; that is why this should not be made much +bigger than 4000 bytes */ +#define OS_FILE_MAX_PATH 4000 + +/* Struct used in fetching information of a file in a directory */ +struct os_file_stat_struct{ + char name[OS_FILE_MAX_PATH]; /* path to a file */ + os_file_type_t type; /* file type */ + ib_int64_t size; /* file size */ + time_t ctime; /* creation time */ + time_t mtime; /* modification time */ + time_t atime; /* access time */ +}; +typedef struct os_file_stat_struct os_file_stat_t; + +#ifdef __WIN__ +typedef HANDLE os_file_dir_t; /* directory stream */ +#else +typedef DIR* os_file_dir_t; /* directory stream */ +#endif + +/*************************************************************************** +Gets the operating system version. Currently works only on Windows. */ +UNIV_INTERN +ulint +os_get_os_version(void); +/*===================*/ + /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */ +/******************************************************************** +Creates the seek mutexes used in positioned reads and writes. */ +UNIV_INTERN +void +os_io_init_simple(void); +/*===================*/ +/*************************************************************************** +Creates a temporary file. This function is like tmpfile(3), but +the temporary file is created in the MySQL temporary directory. +On Netware, this function is like tmpfile(3), because the C run-time +library of Netware does not expose the delete-on-close flag. */ + +FILE* +os_file_create_tmpfile(void); +/*========================*/ + /* out: temporary file handle, or NULL on error */ +/*************************************************************************** +The os_file_opendir() function opens a directory stream corresponding to the +directory named by the dirname argument. The directory stream is positioned +at the first entry. In both Unix and Windows we automatically skip the '.' +and '..' items at the start of the directory listing. */ +UNIV_INTERN +os_file_dir_t +os_file_opendir( +/*============*/ + /* out: directory stream, NULL if + error */ + const char* dirname, /* in: directory name; it must not + contain a trailing '\' or '/' */ + ibool error_is_fatal);/* in: TRUE if we should treat an + error as a fatal error; if we try to + open symlinks then we do not wish a + fatal error if it happens not to be + a directory */ +/*************************************************************************** +Closes a directory stream. */ +UNIV_INTERN +int +os_file_closedir( +/*=============*/ + /* out: 0 if success, -1 if failure */ + os_file_dir_t dir); /* in: directory stream */ +/*************************************************************************** +This function returns information of the next file in the directory. We jump +over the '.' and '..' entries in the directory. */ +UNIV_INTERN +int +os_file_readdir_next_file( +/*======================*/ + /* out: 0 if ok, -1 if error, 1 if at the end + of the directory */ + const char* dirname,/* in: directory name or path */ + os_file_dir_t dir, /* in: directory stream */ + os_file_stat_t* info); /* in/out: buffer where the info is returned */ +/********************************************************************* +This function attempts to create a directory named pathname. The new directory +gets default permissions. On Unix, the permissions are (0770 & ~umask). If the +directory exists already, nothing is done and the call succeeds, unless the +fail_if_exists arguments is true. */ +UNIV_INTERN +ibool +os_file_create_directory( +/*=====================*/ + /* out: TRUE if call succeeds, + FALSE on error */ + const char* pathname, /* in: directory name as + null-terminated string */ + ibool fail_if_exists);/* in: if TRUE, pre-existing directory + is treated as an error. */ +/******************************************************************** +A simple function to open or create a file. */ +UNIV_INTERN +os_file_t +os_file_create_simple( +/*==================*/ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file is + opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), or + OS_FILE_CREATE_PATH if new file + (if exists, error) and subdirectories along + its path are created (if needed)*/ + ulint access_type,/* in: OS_FILE_READ_ONLY or + OS_FILE_READ_WRITE */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/******************************************************************** +A simple function to open or create a file. */ +UNIV_INTERN +os_file_t +os_file_create_simple_no_error_handling( +/*====================================*/ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error) */ + ulint access_type,/* in: OS_FILE_READ_ONLY, + OS_FILE_READ_WRITE, or + OS_FILE_READ_ALLOW_DELETE; the last option is + used by a backup program reading the file */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/******************************************************************** +Tries to disable OS caching on an opened file descriptor. */ +UNIV_INTERN +void +os_file_set_nocache( +/*================*/ + int fd, /* in: file descriptor to alter */ + const char* file_name, /* in: file name, used in the + diagnostic message */ + const char* operation_name);/* in: "open" or "create"; used in the + diagnostic message */ +/******************************************************************** +Opens an existing file or creates a new. */ +UNIV_INTERN +os_file_t +os_file_create( +/*===========*/ + /* out, own: handle to the file, not defined + if error, error number can be retrieved with + os_file_get_last_error */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + ulint create_mode,/* in: OS_FILE_OPEN if an existing file + is opened (if does not exist, error), or + OS_FILE_CREATE if a new file is created + (if exists, error), + OS_FILE_OVERWRITE if a new file is created + or an old overwritten; + OS_FILE_OPEN_RAW, if a raw device or disk + partition should be opened */ + ulint purpose,/* in: OS_FILE_AIO, if asynchronous, + non-buffered i/o is desired, + OS_FILE_NORMAL, if any normal file; + NOTE that it also depends on type, os_aio_.. + and srv_.. variables whether we really use + async i/o or unbuffered i/o: look in the + function source code for the exact rules */ + ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ + ibool* success);/* out: TRUE if succeed, FALSE if error */ +/*************************************************************************** +Deletes a file. The file has to be closed before calling this. */ +UNIV_INTERN +ibool +os_file_delete( +/*===========*/ + /* out: TRUE if success */ + const char* name); /* in: file path as a null-terminated string */ + +/*************************************************************************** +Deletes a file if it exists. The file has to be closed before calling this. */ +UNIV_INTERN +ibool +os_file_delete_if_exists( +/*=====================*/ + /* out: TRUE if success */ + const char* name); /* in: file path as a null-terminated string */ +/*************************************************************************** +Renames a file (can also move it to another directory). It is safest that the +file is closed before calling this function. */ +UNIV_INTERN +ibool +os_file_rename( +/*===========*/ + /* out: TRUE if success */ + const char* oldpath, /* in: old file path as a + null-terminated string */ + const char* newpath); /* in: new file path */ +/*************************************************************************** +Closes a file handle. In case of error, error number can be retrieved with +os_file_get_last_error. */ +UNIV_INTERN +ibool +os_file_close( +/*==========*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** +Closes a file handle. */ +UNIV_INTERN +ibool +os_file_close_no_error_handling( +/*============================*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** +Gets a file size. */ +UNIV_INTERN +ibool +os_file_get_size( +/*=============*/ + /* out: TRUE if success */ + os_file_t file, /* in: handle to a file */ + ulint* size, /* out: least significant 32 bits of file + size */ + ulint* size_high);/* out: most significant 32 bits of size */ +/*************************************************************************** +Gets file size as a 64-bit integer ib_int64_t. */ +UNIV_INTERN +ib_int64_t +os_file_get_size_as_iblonglong( +/*===========================*/ + /* out: size in bytes, -1 if error */ + os_file_t file); /* in: handle to a file */ +/*************************************************************************** +Write the specified number of zeros to a newly created file. */ +UNIV_INTERN +ibool +os_file_set_size( +/*=============*/ + /* out: TRUE if success */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + ulint size, /* in: least significant 32 bits of file + size */ + ulint size_high);/* in: most significant 32 bits of size */ +/*************************************************************************** +Truncates a file at its current position. */ +UNIV_INTERN +ibool +os_file_set_eof( +/*============*/ + /* out: TRUE if success */ + FILE* file); /* in: file to be truncated */ +/*************************************************************************** +Flushes the write buffers of a given file to the disk. */ +UNIV_INTERN +ibool +os_file_flush( +/*==========*/ + /* out: TRUE if success */ + os_file_t file); /* in, own: handle to a file */ +/*************************************************************************** +Retrieves the last error number if an error occurs in a file io function. +The number should be retrieved before any other OS calls (because they may +overwrite the error number). If the number is not known to this program, +the OS error number + 100 is returned. */ +UNIV_INTERN +ulint +os_file_get_last_error( +/*===================*/ + /* out: error number, or OS error + number + 100 */ + ibool report_all_errors); /* in: TRUE if we want an error message + printed of all errors */ +/*********************************************************************** +Requests a synchronous read operation. */ +UNIV_INTERN +ibool +os_file_read( +/*=========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint offset, /* in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/* in: most significant 32 bits of + offset */ + ulint n); /* in: number of bytes to read */ +/*********************************************************************** +Rewind file to its start, read at most size - 1 bytes from it to str, and +NUL-terminate str. All errors are silently ignored. This function is +mostly meant to be used with temporary files. */ +UNIV_INTERN +void +os_file_read_string( +/*================*/ + FILE* file, /* in: file to read from */ + char* str, /* in: buffer where to read */ + ulint size); /* in: size of buffer */ +/*********************************************************************** +Requests a synchronous positioned read operation. This function does not do +any error handling. In case of error it returns FALSE. */ +UNIV_INTERN +ibool +os_file_read_no_error_handling( +/*===========================*/ + /* out: TRUE if request was + successful, FALSE if fail */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read */ + ulint offset, /* in: least significant 32 bits of file + offset where to read */ + ulint offset_high,/* in: most significant 32 bits of + offset */ + ulint n); /* in: number of bytes to read */ + +/*********************************************************************** +Requests a synchronous write operation. */ +UNIV_INTERN +ibool +os_file_write( +/*==========*/ + /* out: TRUE if request was + successful, FALSE if fail */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + const void* buf, /* in: buffer from which to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to write */ + ulint offset_high,/* in: most significant 32 bits of + offset */ + ulint n); /* in: number of bytes to write */ +/*********************************************************************** +Check the existence and type of the given file. */ +UNIV_INTERN +ibool +os_file_status( +/*===========*/ + /* out: TRUE if call succeeded */ + const char* path, /* in: pathname of the file */ + ibool* exists, /* out: TRUE if file exists */ + os_file_type_t* type); /* out: type of the file (if it exists) */ +/******************************************************************** +The function os_file_dirname returns a directory component of a +null-terminated pathname string. In the usual case, dirname returns +the string up to, but not including, the final '/', and basename +is the component following the final '/'. Trailing '/' charac +ters are not counted as part of the pathname. + +If path does not contain a slash, dirname returns the string ".". + +Concatenating the string returned by dirname, a "/", and the basename +yields a complete pathname. + +The return value is a copy of the directory component of the pathname. +The copy is allocated from heap. It is the caller responsibility +to free it after it is no longer needed. + +The following list of examples (taken from SUSv2) shows the strings +returned by dirname and basename for different paths: + + path dirname basename + "/usr/lib" "/usr" "lib" + "/usr/" "/" "usr" + "usr" "." "usr" + "/" "/" "/" + "." "." "." + ".." "." ".." +*/ +UNIV_INTERN +char* +os_file_dirname( +/*============*/ + /* out, own: directory component of the + pathname */ + const char* path); /* in: pathname */ +/******************************************************************** +Creates all missing subdirectories along the given path. */ +UNIV_INTERN +ibool +os_file_create_subdirs_if_needed( +/*=============================*/ + /* out: TRUE if call succeeded + FALSE otherwise */ + const char* path); /* in: path name */ +/**************************************************************************** +Initializes the asynchronous io system. Creates separate aio array for +non-ibuf read and write, a third aio array for the ibuf i/o, with just one +segment, two aio arrays for log reads and writes with one segment, and a +synchronous aio array of the specified size. The combined number of segments +in the three first aio arrays is the parameter n_segments given to the +function. The caller must create an i/o handler thread for each segment in +the four first arrays, but not for the sync aio array. */ +UNIV_INTERN +void +os_aio_init( +/*========*/ + ulint n, /* in: maximum number of pending aio operations + allowed; n must be divisible by n_segments */ +// ulint n_segments, /* in: combined number of segments in the four +// first aio arrays; must be >= 4 */ + ulint n_read_threads, /* n_segments == 2 + n_read_threads + n_write_threads */ + ulint n_write_threads, /**/ + ulint n_slots_sync); /* in: number of slots in the sync aio array */ +/*********************************************************************** +Requests an asynchronous i/o operation. */ +UNIV_INTERN +ibool +os_aio( +/*===*/ + /* out: TRUE if request was queued + successfully, FALSE if fail */ + ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ + ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed + to OS_AIO_SIMULATED_WAKE_LATER: the + last flag advises this function not to wake + i/o-handler threads, but the caller will + do the waking explicitly later, in this + way the caller can post several requests in + a batch; NOTE that the batch must not be + so big that it exhausts the slots in aio + arrays! NOTE that a simulated batch + may introduce hidden chances of deadlocks, + because i/os are not actually handled until + all have been posted: use with great + caution! */ + const char* name, /* in: name of the file or path as a + null-terminated string */ + os_file_t file, /* in: handle to a file */ + void* buf, /* in: buffer where to read or from which + to write */ + ulint offset, /* in: least significant 32 bits of file + offset where to read or write */ + ulint offset_high, /* in: most significant 32 bits of + offset */ + ulint n, /* in: number of bytes to read or write */ + fil_node_t* message1,/* in: messages for the aio handler (these + can be used to identify a completed aio + operation); if mode is OS_AIO_SYNC, these + are ignored */ + void* message2); +/**************************************************************************** +Wakes up all async i/o threads so that they know to exit themselves in +shutdown. */ +UNIV_INTERN +void +os_aio_wake_all_threads_at_shutdown(void); +/*=====================================*/ +/**************************************************************************** +Waits until there are no pending writes in os_aio_write_array. There can +be other, synchronous, pending writes. */ +UNIV_INTERN +void +os_aio_wait_until_no_pending_writes(void); +/*=====================================*/ +/************************************************************************** +Wakes up simulated aio i/o-handler threads if they have something to do. */ +UNIV_INTERN +void +os_aio_simulated_wake_handler_threads(void); +/*=======================================*/ +/************************************************************************** +This function can be called if one wants to post a batch of reads and +prefers an i/o-handler thread to handle them all at once later. You must +call os_aio_simulated_wake_handler_threads later to ensure the threads +are not left sleeping! */ +UNIV_INTERN +void +os_aio_simulated_put_read_threads_to_sleep(void); +/*============================================*/ + +#ifdef WIN_ASYNC_IO +/************************************************************************** +This function is only used in Windows asynchronous i/o. +Waits for an aio operation to complete. This function is used to wait the +for completed requests. The aio array of pending requests is divided +into segments. The thread specifies which segment or slot it wants to wait +for. NOTE: this function will also take care of freeing the aio slot, +therefore no other thread is allowed to do the freeing! */ +UNIV_INTERN +ibool +os_aio_windows_handle( +/*==================*/ + /* out: TRUE if the aio operation succeeded */ + ulint segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads; if + this is ULINT_UNDEFINED, then it means that + sync aio is used, and this parameter is + ignored */ + ulint pos, /* this parameter is used only in sync aio: + wait for the aio slot at this position */ + fil_node_t**message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2, + ulint* type); /* out: OS_FILE_WRITE or ..._READ */ +#endif + +/************************************************************************** +Does simulated aio. This function should be called by an i/o-handler +thread. */ +UNIV_INTERN +ibool +os_aio_simulated_handle( +/*====================*/ + /* out: TRUE if the aio operation succeeded */ + ulint segment, /* in: the number of the segment in the aio + arrays to wait for; segment 0 is the ibuf + i/o thread, segment 1 the log i/o thread, + then follow the non-ibuf read threads, and as + the last are the non-ibuf write threads */ + fil_node_t**message1, /* out: the messages passed with the aio + request; note that also in the case where + the aio operation failed, these output + parameters are valid and can be used to + restart the operation, for example */ + void** message2, + ulint* type); /* out: OS_FILE_WRITE or ..._READ */ +/************************************************************************** +Validates the consistency of the aio system. */ +UNIV_INTERN +ibool +os_aio_validate(void); +/*=================*/ + /* out: TRUE if ok */ +/************************************************************************** +Prints info of the aio arrays. */ +UNIV_INTERN +void +os_aio_print( +/*=========*/ + FILE* file); /* in: file where to print */ +/************************************************************************** +Refreshes the statistics used to print per-second averages. */ +UNIV_INTERN +void +os_aio_refresh_stats(void); +/*======================*/ + +#ifdef UNIV_DEBUG +/************************************************************************** +Checks that all slots in the system have been freed, that is, there are +no pending io operations. */ +UNIV_INTERN +ibool +os_aio_all_slots_free(void); +/*=======================*/ +#endif /* UNIV_DEBUG */ + +/*********************************************************************** +This function returns information about the specified file */ +UNIV_INTERN +ibool +os_file_get_status( +/*===============*/ + /* out: TRUE if stat + information found */ + const char* path, /* in: pathname of the file */ + os_file_stat_t* stat_info); /* information of a file in a + directory */ + +#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__) +/************************************************************************* +Creates a temporary file that will be deleted on close. +This function is defined in ha_innodb.cc. */ +UNIV_INTERN +int +innobase_mysql_tmpfile(void); +/*========================*/ + /* out: temporary file descriptor, or < 0 on error */ +#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ + +#endif diff --git a/storage/xtradb/include/os0proc.h b/storage/xtradb/include/os0proc.h new file mode 100644 index 00000000000..19b0b112638 --- /dev/null +++ b/storage/xtradb/include/os0proc.h @@ -0,0 +1,84 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system +process control primitives + +Created 9/30/1995 Heikki Tuuri +*******************************************************/ + +#ifndef os0proc_h +#define os0proc_h + +#include "univ.i" + +#ifdef UNIV_LINUX +#include <sys/ipc.h> +#include <sys/shm.h> +#endif + +typedef void* os_process_t; +typedef unsigned long int os_process_id_t; + +extern ibool os_use_large_pages; +/* Large page size. This may be a boot-time option on some platforms */ +extern ulint os_large_page_size; + +/******************************************************************** +Converts the current process id to a number. It is not guaranteed that the +number is unique. In Linux returns the 'process number' of the current +thread. That number is the same as one sees in 'top', for example. In Linux +the thread id is not the same as one sees in 'top'. */ +UNIV_INTERN +ulint +os_proc_get_number(void); +/*====================*/ +/******************************************************************** +Allocates large pages memory. */ +UNIV_INTERN +void* +os_mem_alloc_large( +/*===============*/ + /* out: allocated memory */ + ulint* n); /* in/out: number of bytes */ +/******************************************************************** +Frees large pages memory. */ +UNIV_INTERN +void +os_mem_free_large( +/*==============*/ + void *ptr, /* in: pointer returned by + os_mem_alloc_large() */ + ulint size); /* in: size returned by + os_mem_alloc_large() */ +/******************************************************************** +Sets the priority boost for threads released from waiting within the current +process. */ +UNIV_INTERN +void +os_process_set_priority_boost( +/*==========================*/ + ibool do_boost); /* in: TRUE if priority boost should be done, + FALSE if not */ + +#ifndef UNIV_NONINL +#include "os0proc.ic" +#endif + +#endif diff --git a/storage/xtradb/include/os0proc.ic b/storage/xtradb/include/os0proc.ic new file mode 100644 index 00000000000..9f1fb01866d --- /dev/null +++ b/storage/xtradb/include/os0proc.ic @@ -0,0 +1,26 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system +process control primitives + +Created 9/30/1995 Heikki Tuuri +*******************************************************/ + + diff --git a/storage/xtradb/include/os0sync.h b/storage/xtradb/include/os0sync.h new file mode 100644 index 00000000000..7e058266762 --- /dev/null +++ b/storage/xtradb/include/os0sync.h @@ -0,0 +1,309 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system +synchronization primitives. + +Created 9/6/1995 Heikki Tuuri +*******************************************************/ + +#ifndef os0sync_h +#define os0sync_h + +#include "univ.i" +#include "ut0lst.h" + +#ifdef __WIN__ + +#define os_fast_mutex_t CRITICAL_SECTION + +typedef HANDLE os_native_event_t; + +typedef struct os_event_struct os_event_struct_t; +typedef os_event_struct_t* os_event_t; + +struct os_event_struct { + os_native_event_t handle; + /* Windows event */ + UT_LIST_NODE_T(os_event_struct_t) os_event_list; + /* list of all created events */ +}; +#else +typedef pthread_mutex_t os_fast_mutex_t; + +typedef struct os_event_struct os_event_struct_t; +typedef os_event_struct_t* os_event_t; + +struct os_event_struct { + os_fast_mutex_t os_mutex; /* this mutex protects the next + fields */ + ibool is_set; /* this is TRUE when the event is + in the signaled state, i.e., a thread + does not stop if it tries to wait for + this event */ + ib_int64_t signal_count; /* this is incremented each time + the event becomes signaled */ + pthread_cond_t cond_var; /* condition variable is used in + waiting for the event */ + UT_LIST_NODE_T(os_event_struct_t) os_event_list; + /* list of all created events */ +}; +#endif + +typedef struct os_mutex_struct os_mutex_str_t; +typedef os_mutex_str_t* os_mutex_t; + +#define OS_SYNC_INFINITE_TIME ((ulint)(-1)) + +#define OS_SYNC_TIME_EXCEEDED 1 + +/* Mutex protecting counts and the event and OS 'slow' mutex lists */ +extern os_mutex_t os_sync_mutex; + +/* This is incremented by 1 in os_thread_create and decremented by 1 in +os_thread_exit */ +extern ulint os_thread_count; + +extern ulint os_event_count; +extern ulint os_mutex_count; +extern ulint os_fast_mutex_count; + +/************************************************************* +Initializes global event and OS 'slow' mutex lists. */ +UNIV_INTERN +void +os_sync_init(void); +/*==============*/ +/************************************************************* +Frees created events and OS 'slow' mutexes. */ +UNIV_INTERN +void +os_sync_free(void); +/*==============*/ +/************************************************************* +Creates an event semaphore, i.e., a semaphore which may just have two states: +signaled and nonsignaled. The created event is manual reset: it must be reset +explicitly by calling sync_os_reset_event. */ +UNIV_INTERN +os_event_t +os_event_create( +/*============*/ + /* out: the event handle */ + const char* name); /* in: the name of the event, if NULL + the event is created without a name */ +#ifdef __WIN__ +/************************************************************* +Creates an auto-reset event semaphore, i.e., an event which is automatically +reset when a single thread is released. Works only in Windows. */ +UNIV_INTERN +os_event_t +os_event_create_auto( +/*=================*/ + /* out: the event handle */ + const char* name); /* in: the name of the event, if NULL + the event is created without a name */ +#endif +/************************************************************** +Sets an event semaphore to the signaled state: lets waiting threads +proceed. */ +UNIV_INTERN +void +os_event_set( +/*=========*/ + os_event_t event); /* in: event to set */ +/************************************************************** +Resets an event semaphore to the nonsignaled state. Waiting threads will +stop to wait for the event. +The return value should be passed to os_even_wait_low() if it is desired +that this thread should not wait in case of an intervening call to +os_event_set() between this os_event_reset() and the +os_event_wait_low() call. See comments for os_event_wait_low(). */ +UNIV_INTERN +ib_int64_t +os_event_reset( +/*===========*/ + os_event_t event); /* in: event to reset */ +/************************************************************** +Frees an event object. */ +UNIV_INTERN +void +os_event_free( +/*==========*/ + os_event_t event); /* in: event to free */ + +/************************************************************** +Waits for an event object until it is in the signaled state. If +srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the +waiting thread when the event becomes signaled (or immediately if the +event is already in the signaled state). + +Typically, if the event has been signalled after the os_event_reset() +we'll return immediately because event->is_set == TRUE. +There are, however, situations (e.g.: sync_array code) where we may +lose this information. For example: + +thread A calls os_event_reset() +thread B calls os_event_set() [event->is_set == TRUE] +thread C calls os_event_reset() [event->is_set == FALSE] +thread A calls os_event_wait() [infinite wait!] +thread C calls os_event_wait() [infinite wait!] + +Where such a scenario is possible, to avoid infinite wait, the +value returned by os_event_reset() should be passed in as +reset_sig_count. */ +UNIV_INTERN +void +os_event_wait_low( +/*==============*/ + os_event_t event, /* in: event to wait */ + ib_int64_t reset_sig_count);/* in: zero or the value + returned by previous call of + os_event_reset(). */ + +#define os_event_wait(event) os_event_wait_low(event, 0) + +/************************************************************** +Waits for an event object until it is in the signaled state or +a timeout is exceeded. In Unix the timeout is always infinite. */ +UNIV_INTERN +ulint +os_event_wait_time( +/*===============*/ + /* out: 0 if success, + OS_SYNC_TIME_EXCEEDED if timeout + was exceeded */ + os_event_t event, /* in: event to wait */ + ulint time); /* in: timeout in microseconds, or + OS_SYNC_INFINITE_TIME */ +#ifdef __WIN__ +/************************************************************** +Waits for any event in an OS native event array. Returns if even a single +one is signaled or becomes signaled. */ +UNIV_INTERN +ulint +os_event_wait_multiple( +/*===================*/ + /* out: index of the event + which was signaled */ + ulint n, /* in: number of events in the + array */ + os_native_event_t* native_event_array); + /* in: pointer to an array of event + handles */ +#endif +/************************************************************* +Creates an operating system mutex semaphore. Because these are slow, the +mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ +UNIV_INTERN +os_mutex_t +os_mutex_create( +/*============*/ + /* out: the mutex handle */ + const char* name); /* in: the name of the mutex, if NULL + the mutex is created without a name */ +/************************************************************** +Acquires ownership of a mutex semaphore. */ +UNIV_INTERN +void +os_mutex_enter( +/*===========*/ + os_mutex_t mutex); /* in: mutex to acquire */ +/************************************************************** +Releases ownership of a mutex. */ +UNIV_INTERN +void +os_mutex_exit( +/*==========*/ + os_mutex_t mutex); /* in: mutex to release */ +/************************************************************** +Frees an mutex object. */ +UNIV_INTERN +void +os_mutex_free( +/*==========*/ + os_mutex_t mutex); /* in: mutex to free */ +/************************************************************** +Acquires ownership of a fast mutex. Currently in Windows this is the same +as os_fast_mutex_lock! */ +UNIV_INLINE +ulint +os_fast_mutex_trylock( +/*==================*/ + /* out: 0 if success, != 0 if + was reserved by another + thread */ + os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */ +/************************************************************** +Releases ownership of a fast mutex. */ +UNIV_INTERN +void +os_fast_mutex_unlock( +/*=================*/ + os_fast_mutex_t* fast_mutex); /* in: mutex to release */ +/************************************************************* +Initializes an operating system fast mutex semaphore. */ +UNIV_INTERN +void +os_fast_mutex_init( +/*===============*/ + os_fast_mutex_t* fast_mutex); /* in: fast mutex */ +/************************************************************** +Acquires ownership of a fast mutex. */ +UNIV_INTERN +void +os_fast_mutex_lock( +/*===============*/ + os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */ +/************************************************************** +Frees an mutex object. */ +UNIV_INTERN +void +os_fast_mutex_free( +/*===============*/ + os_fast_mutex_t* fast_mutex); /* in: mutex to free */ + +#ifdef HAVE_GCC_ATOMIC_BUILTINS +/************************************************************** +Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. +Returns true if swapped, ptr is pointer to target, old_val is value to +compare to, new_val is the value to swap in. */ +#define os_compare_and_swap(ptr, old_val, new_val) \ + __sync_bool_compare_and_swap(ptr, old_val, new_val) + +/************************************************************** +Atomic increment for InnoDB. Currently requires GCC atomic builtins. +Returns the resulting value, ptr is pointer to target, amount is the +amount of increment. */ +#define os_atomic_increment(ptr, amount) \ + __sync_add_and_fetch(ptr, amount) + +#endif /* HAVE_GCC_ATOMIC_BUILTINS */ + +#ifndef UNIV_NONINL +#include "os0sync.ic" +#endif + +#endif diff --git a/storage/xtradb/include/os0sync.ic b/storage/xtradb/include/os0sync.ic new file mode 100644 index 00000000000..5c03d184c7c --- /dev/null +++ b/storage/xtradb/include/os0sync.ic @@ -0,0 +1,62 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system synchronization primitives. + +Created 9/6/1995 Heikki Tuuri +*******************************************************/ + +#ifdef __WIN__ +#include <winbase.h> +#endif + +/************************************************************** +Acquires ownership of a fast mutex. Currently in Windows this is the same +as os_fast_mutex_lock! */ +UNIV_INLINE +ulint +os_fast_mutex_trylock( +/*==================*/ + /* out: 0 if success, != 0 if + was reserved by another + thread */ + os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */ +{ +#ifdef __WIN__ + EnterCriticalSection(fast_mutex); + + return(0); +#else +#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10) + /* Since the hot backup version is standalone, MySQL does not redefine + pthread_mutex_trylock for HP-UX-10.20, and consequently we must invert + the return value here */ + + return((ulint) (1 - pthread_mutex_trylock(fast_mutex))); +#else + /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock + so that it returns 0 on success. In the operating system + libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and + returns 1 on success (but MySQL remaps that to 0), while Linux, + FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */ + + return((ulint) pthread_mutex_trylock(fast_mutex)); +#endif +#endif +} diff --git a/storage/xtradb/include/os0thread.h b/storage/xtradb/include/os0thread.h new file mode 100644 index 00000000000..863596bfa84 --- /dev/null +++ b/storage/xtradb/include/os0thread.h @@ -0,0 +1,158 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system +process and thread control primitives + +Created 9/8/1995 Heikki Tuuri +*******************************************************/ + +#ifndef os0thread_h +#define os0thread_h + +#include "univ.i" + +/* Maximum number of threads which can be created in the program; +this is also the size of the wait slot array for MySQL threads which +can wait inside InnoDB */ + +#define OS_THREAD_MAX_N srv_max_n_threads + + +/* Possible fixed priorities for threads */ +#define OS_THREAD_PRIORITY_NONE 100 +#define OS_THREAD_PRIORITY_BACKGROUND 1 +#define OS_THREAD_PRIORITY_NORMAL 2 +#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3 + +#ifdef __WIN__ +typedef void* os_thread_t; +typedef ulint os_thread_id_t; /* In Windows the thread id + is an unsigned long int */ +#else +typedef pthread_t os_thread_t; +typedef os_thread_t os_thread_id_t; /* In Unix we use the thread + handle itself as the id of + the thread */ +#endif + +/* Define a function pointer type to use in a typecast */ +typedef void* (*os_posix_f_t) (void*); + +/******************************************************************* +Compares two thread ids for equality. */ +UNIV_INTERN +ibool +os_thread_eq( +/*=========*/ + /* out: TRUE if equal */ + os_thread_id_t a, /* in: OS thread or thread id */ + os_thread_id_t b); /* in: OS thread or thread id */ +/******************************************************************** +Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is +unique for the thread though! */ +UNIV_INTERN +ulint +os_thread_pf( +/*=========*/ + /* out: unsigned long int */ + os_thread_id_t a); /* in: thread or thread id */ +/******************************************************************** +Creates a new thread of execution. The execution starts from +the function given. The start function takes a void* parameter +and returns a ulint. +NOTE: We count the number of threads in os_thread_exit(). A created +thread should always use that to exit and not use return() to exit. */ +UNIV_INTERN +os_thread_t +os_thread_create( +/*=============*/ + /* out: handle to the thread */ +#ifndef __WIN__ + os_posix_f_t start_f, +#else + ulint (*start_f)(void*), /* in: pointer to function + from which to start */ +#endif + void* arg, /* in: argument to start + function */ + os_thread_id_t* thread_id); /* out: id of the created + thread, or NULL */ + +/********************************************************************* +Exits the current thread. */ +UNIV_INTERN +void +os_thread_exit( +/*===========*/ + void* exit_value); /* in: exit value; in Windows this void* + is cast as a DWORD */ +/********************************************************************* +Returns the thread identifier of current thread. */ +UNIV_INTERN +os_thread_id_t +os_thread_get_curr_id(void); +/*========================*/ +/********************************************************************* +Returns handle to the current thread. */ +UNIV_INTERN +os_thread_t +os_thread_get_curr(void); +/*====================*/ +/********************************************************************* +Advises the os to give up remainder of the thread's time slice. */ +UNIV_INTERN +void +os_thread_yield(void); +/*=================*/ +/********************************************************************* +The thread sleeps at least the time given in microseconds. */ +UNIV_INTERN +void +os_thread_sleep( +/*============*/ + ulint tm); /* in: time in microseconds */ +/********************************************************************** +Gets a thread priority. */ +UNIV_INTERN +ulint +os_thread_get_priority( +/*===================*/ + /* out: priority */ + os_thread_t handle);/* in: OS handle to the thread */ +/********************************************************************** +Sets a thread priority. */ +UNIV_INTERN +void +os_thread_set_priority( +/*===================*/ + os_thread_t handle, /* in: OS handle to the thread */ + ulint pri); /* in: priority: one of OS_PRIORITY_... */ +/********************************************************************** +Gets the last operating system error code for the calling thread. */ +UNIV_INTERN +ulint +os_thread_get_last_error(void); +/*==========================*/ + +#ifndef UNIV_NONINL +#include "os0thread.ic" +#endif + +#endif diff --git a/storage/xtradb/include/os0thread.ic b/storage/xtradb/include/os0thread.ic new file mode 100644 index 00000000000..a86b203809c --- /dev/null +++ b/storage/xtradb/include/os0thread.ic @@ -0,0 +1,24 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The interface to the operating system +process and thread control primitives + +Created 9/8/1995 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/page0cur.h b/storage/xtradb/include/page0cur.h new file mode 100644 index 00000000000..960ecdddf4e --- /dev/null +++ b/storage/xtradb/include/page0cur.h @@ -0,0 +1,346 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The page cursor + +Created 10/4/1994 Heikki Tuuri +*************************************************************************/ + +#ifndef page0cur_h +#define page0cur_h + +#include "univ.i" + +#include "buf0types.h" +#include "page0page.h" +#include "rem0rec.h" +#include "data0data.h" +#include "mtr0mtr.h" + + +#define PAGE_CUR_ADAPT + +/* Page cursor search modes; the values must be in this order! */ + +#define PAGE_CUR_UNSUPP 0 +#define PAGE_CUR_G 1 +#define PAGE_CUR_GE 2 +#define PAGE_CUR_L 3 +#define PAGE_CUR_LE 4 +/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in + "column LIKE 'abc%' ORDER BY column DESC"; + we have to find strings which are <= 'abc' or + which extend it */ +#ifdef UNIV_SEARCH_DEBUG +# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */ +#endif /* UNIV_SEARCH_DEBUG */ + +#ifdef UNIV_DEBUG +/************************************************************* +Gets pointer to the page frame where the cursor is positioned. */ +UNIV_INLINE +page_t* +page_cur_get_page( +/*==============*/ + /* out: page */ + page_cur_t* cur); /* in: page cursor */ +/************************************************************* +Gets pointer to the buffer block where the cursor is positioned. */ +UNIV_INLINE +buf_block_t* +page_cur_get_block( +/*===============*/ + /* out: page */ + page_cur_t* cur); /* in: page cursor */ +/************************************************************* +Gets pointer to the page frame where the cursor is positioned. */ +UNIV_INLINE +page_zip_des_t* +page_cur_get_page_zip( +/*==================*/ + /* out: page */ + page_cur_t* cur); /* in: page cursor */ +/************************************************************* +Gets the record where the cursor is positioned. */ +UNIV_INLINE +rec_t* +page_cur_get_rec( +/*=============*/ + /* out: record */ + page_cur_t* cur); /* in: page cursor */ +#else /* UNIV_DEBUG */ +# define page_cur_get_page(cur) page_align((cur)->rec) +# define page_cur_get_block(cur) (cur)->block +# define page_cur_get_page_zip(cur) buf_block_get_page_zip((cur)->block) +# define page_cur_get_rec(cur) (cur)->rec +#endif /* UNIV_DEBUG */ +/************************************************************* +Sets the cursor object to point before the first user record +on the page. */ +UNIV_INLINE +void +page_cur_set_before_first( +/*======================*/ + const buf_block_t* block, /* in: index page */ + page_cur_t* cur); /* in: cursor */ +/************************************************************* +Sets the cursor object to point after the last user record on +the page. */ +UNIV_INLINE +void +page_cur_set_after_last( +/*====================*/ + const buf_block_t* block, /* in: index page */ + page_cur_t* cur); /* in: cursor */ +/************************************************************* +Returns TRUE if the cursor is before first user record on page. */ +UNIV_INLINE +ibool +page_cur_is_before_first( +/*=====================*/ + /* out: TRUE if at start */ + const page_cur_t* cur); /* in: cursor */ +/************************************************************* +Returns TRUE if the cursor is after last user record. */ +UNIV_INLINE +ibool +page_cur_is_after_last( +/*===================*/ + /* out: TRUE if at end */ + const page_cur_t* cur); /* in: cursor */ +/************************************************************** +Positions the cursor on the given record. */ +UNIV_INLINE +void +page_cur_position( +/*==============*/ + const rec_t* rec, /* in: record on a page */ + const buf_block_t* block, /* in: buffer block containing + the record */ + page_cur_t* cur); /* out: page cursor */ +/************************************************************** +Invalidates a page cursor by setting the record pointer NULL. */ +UNIV_INLINE +void +page_cur_invalidate( +/*================*/ + page_cur_t* cur); /* out: page cursor */ +/************************************************************** +Moves the cursor to the next record on page. */ +UNIV_INLINE +void +page_cur_move_to_next( +/*==================*/ + page_cur_t* cur); /* in/out: cursor; must not be after last */ +/************************************************************** +Moves the cursor to the previous record on page. */ +UNIV_INLINE +void +page_cur_move_to_prev( +/*==================*/ + page_cur_t* cur); /* in/out: cursor; not before first */ +/*************************************************************** +Inserts a record next to page cursor. Returns pointer to inserted record if +succeed, i.e., enough space available, NULL otherwise. The cursor stays at +the same logical position, but the physical position may change if it is +pointing to a compressed page that was reorganized. */ +UNIV_INLINE +rec_t* +page_cur_tuple_insert( +/*==================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + page_cur_t* cursor, /* in/out: a page cursor */ + const dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ + ulint n_ext, /* in: number of externally stored columns */ + mtr_t* mtr); /* in: mini-transaction handle, or NULL */ +/*************************************************************** +Inserts a record next to page cursor. Returns pointer to inserted record if +succeed, i.e., enough space available, NULL otherwise. The cursor stays at +the same logical position, but the physical position may change if it is +pointing to a compressed page that was reorganized. */ +UNIV_INLINE +rec_t* +page_cur_rec_insert( +/*================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + page_cur_t* cursor, /* in/out: a page cursor */ + const rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in/out: rec_get_offsets(rec, index) */ + mtr_t* mtr); /* in: mini-transaction handle, or NULL */ +/*************************************************************** +Inserts a record next to page cursor on an uncompressed page. +Returns pointer to inserted record if succeed, i.e., enough +space available, NULL otherwise. The cursor stays at the same position. */ +UNIV_INTERN +rec_t* +page_cur_insert_rec_low( +/*====================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + rec_t* current_rec,/* in: pointer to current record after + which the new record is inserted */ + dict_index_t* index, /* in: record descriptor */ + const rec_t* rec, /* in: pointer to a physical record */ + ulint* offsets,/* in/out: rec_get_offsets(rec, index) */ + mtr_t* mtr); /* in: mini-transaction handle, or NULL */ +/*************************************************************** +Inserts a record next to page cursor on a compressed and uncompressed +page. Returns pointer to inserted record if succeed, i.e., +enough space available, NULL otherwise. +The cursor stays at the same position. */ +UNIV_INTERN +rec_t* +page_cur_insert_rec_zip( +/*====================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + rec_t** current_rec,/* in/out: pointer to current record after + which the new record is inserted */ + buf_block_t* block, /* in: buffer block of *current_rec */ + dict_index_t* index, /* in: record descriptor */ + const rec_t* rec, /* in: pointer to a physical record */ + ulint* offsets,/* in/out: rec_get_offsets(rec, index) */ + mtr_t* mtr); /* in: mini-transaction handle, or NULL */ +/***************************************************************** +Copies records from page to a newly created page, from a given record onward, +including that record. Infimum and supremum records are not copied. */ +UNIV_INTERN +void +page_copy_rec_list_end_to_created_page( +/*===================================*/ + page_t* new_page, /* in/out: index page to copy to */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************** +Deletes a record at the page cursor. The cursor is moved to the +next record after the deleted one. */ +UNIV_INTERN +void +page_cur_delete_rec( +/*================*/ + page_cur_t* cursor, /* in/out: a page cursor */ + dict_index_t* index, /* in: record descriptor */ + const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */ + mtr_t* mtr); /* in: mini-transaction handle */ +/******************************************************************** +Searches the right position for a page cursor. */ +UNIV_INLINE +ulint +page_cur_search( +/*============*/ + /* out: number of matched + fields on the left */ + const buf_block_t* block, /* in: buffer block */ + const dict_index_t* index, /* in: record descriptor */ + const dtuple_t* tuple, /* in: data tuple */ + ulint mode, /* in: PAGE_CUR_L, + PAGE_CUR_LE, PAGE_CUR_G, or + PAGE_CUR_GE */ + page_cur_t* cursor);/* out: page cursor */ +/******************************************************************** +Searches the right position for a page cursor. */ +UNIV_INTERN +void +page_cur_search_with_match( +/*=======================*/ + const buf_block_t* block, /* in: buffer block */ + const dict_index_t* index, /* in: record descriptor */ + const dtuple_t* tuple, /* in: data tuple */ + ulint mode, /* in: PAGE_CUR_L, + PAGE_CUR_LE, PAGE_CUR_G, or + PAGE_CUR_GE */ + ulint* iup_matched_fields, + /* in/out: already matched + fields in upper limit record */ + ulint* iup_matched_bytes, + /* in/out: already matched + bytes in a field not yet + completely matched */ + ulint* ilow_matched_fields, + /* in/out: already matched + fields in lower limit record */ + ulint* ilow_matched_bytes, + /* in/out: already matched + bytes in a field not yet + completely matched */ + page_cur_t* cursor);/* out: page cursor */ +/*************************************************************** +Positions a page cursor on a randomly chosen user record on a page. If there +are no user records, sets the cursor on the infimum record. */ +UNIV_INTERN +void +page_cur_open_on_rnd_user_rec( +/*==========================*/ + buf_block_t* block, /* in: page */ + page_cur_t* cursor);/* out: page cursor */ +/*************************************************************** +Parses a log record of a record insert on a page. */ +UNIV_INTERN +byte* +page_cur_parse_insert_rec( +/*======================*/ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + buf_block_t* block, /* in: page or NULL */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr or NULL */ +/************************************************************** +Parses a log record of copying a record list end to a new created page. */ +UNIV_INTERN +byte* +page_parse_copy_rec_list_to_created_page( +/*=====================================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + buf_block_t* block, /* in: page or NULL */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr or NULL */ +/*************************************************************** +Parses log record of a record delete on a page. */ +UNIV_INTERN +byte* +page_cur_parse_delete_rec( +/*======================*/ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + buf_block_t* block, /* in: page or NULL */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr or NULL */ + +/* Index page cursor */ + +struct page_cur_struct{ + byte* rec; /* pointer to a record on page */ + buf_block_t* block; /* pointer to the block containing rec */ +}; + +#ifndef UNIV_NONINL +#include "page0cur.ic" +#endif + +#endif diff --git a/storage/xtradb/include/page0cur.ic b/storage/xtradb/include/page0cur.ic new file mode 100644 index 00000000000..9cf10ea5e3f --- /dev/null +++ b/storage/xtradb/include/page0cur.ic @@ -0,0 +1,300 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +The page cursor + +Created 10/4/1994 Heikki Tuuri +*************************************************************************/ + +#include "page0page.h" +#include "buf0types.h" + +#ifdef UNIV_DEBUG +/************************************************************* +Gets pointer to the page frame where the cursor is positioned. */ +UNIV_INLINE +page_t* +page_cur_get_page( +/*==============*/ + /* out: page */ + page_cur_t* cur) /* in: page cursor */ +{ + ut_ad(cur); + ut_ad(page_align(cur->rec) == cur->block->frame); + + return(page_align(cur->rec)); +} + +/************************************************************* +Gets pointer to the buffer block where the cursor is positioned. */ +UNIV_INLINE +buf_block_t* +page_cur_get_block( +/*===============*/ + /* out: page */ + page_cur_t* cur) /* in: page cursor */ +{ + ut_ad(cur); + ut_ad(page_align(cur->rec) == cur->block->frame); + return(cur->block); +} + +/************************************************************* +Gets pointer to the page frame where the cursor is positioned. */ +UNIV_INLINE +page_zip_des_t* +page_cur_get_page_zip( +/*==================*/ + /* out: page */ + page_cur_t* cur) /* in: page cursor */ +{ + return(buf_block_get_page_zip(page_cur_get_block(cur))); +} + +/************************************************************* +Gets the record where the cursor is positioned. */ +UNIV_INLINE +rec_t* +page_cur_get_rec( +/*=============*/ + /* out: record */ + page_cur_t* cur) /* in: page cursor */ +{ + ut_ad(cur); + ut_ad(page_align(cur->rec) == cur->block->frame); + + return(cur->rec); +} +#endif /* UNIV_DEBUG */ + +/************************************************************* +Sets the cursor object to point before the first user record +on the page. */ +UNIV_INLINE +void +page_cur_set_before_first( +/*======================*/ + const buf_block_t* block, /* in: index page */ + page_cur_t* cur) /* in: cursor */ +{ + cur->block = (buf_block_t*) block; + cur->rec = page_get_infimum_rec(buf_block_get_frame(cur->block)); +} + +/************************************************************* +Sets the cursor object to point after the last user record on +the page. */ +UNIV_INLINE +void +page_cur_set_after_last( +/*====================*/ + const buf_block_t* block, /* in: index page */ + page_cur_t* cur) /* in: cursor */ +{ + cur->block = (buf_block_t*) block; + cur->rec = page_get_supremum_rec(buf_block_get_frame(cur->block)); +} + +/************************************************************* +Returns TRUE if the cursor is before first user record on page. */ +UNIV_INLINE +ibool +page_cur_is_before_first( +/*=====================*/ + /* out: TRUE if at start */ + const page_cur_t* cur) /* in: cursor */ +{ + ut_ad(cur); + ut_ad(page_align(cur->rec) == cur->block->frame); + return(page_rec_is_infimum(cur->rec)); +} + +/************************************************************* +Returns TRUE if the cursor is after last user record. */ +UNIV_INLINE +ibool +page_cur_is_after_last( +/*===================*/ + /* out: TRUE if at end */ + const page_cur_t* cur) /* in: cursor */ +{ + ut_ad(cur); + ut_ad(page_align(cur->rec) == cur->block->frame); + return(page_rec_is_supremum(cur->rec)); +} + +/************************************************************** +Positions the cursor on the given record. */ +UNIV_INLINE +void +page_cur_position( +/*==============*/ + const rec_t* rec, /* in: record on a page */ + const buf_block_t* block, /* in: buffer block containing + the record */ + page_cur_t* cur) /* out: page cursor */ +{ + ut_ad(rec && block && cur); + ut_ad(page_align(rec) == block->frame); + + cur->rec = (rec_t*) rec; + cur->block = (buf_block_t*) block; +} + +/************************************************************** +Invalidates a page cursor by setting the record pointer NULL. */ +UNIV_INLINE +void +page_cur_invalidate( +/*================*/ + page_cur_t* cur) /* out: page cursor */ +{ + ut_ad(cur); + + cur->rec = NULL; + cur->block = NULL; +} + +/************************************************************** +Moves the cursor to the next record on page. */ +UNIV_INLINE +void +page_cur_move_to_next( +/*==================*/ + page_cur_t* cur) /* in/out: cursor; must not be after last */ +{ + ut_ad(!page_cur_is_after_last(cur)); + + cur->rec = page_rec_get_next(cur->rec); +} + +/************************************************************** +Moves the cursor to the previous record on page. */ +UNIV_INLINE +void +page_cur_move_to_prev( +/*==================*/ + page_cur_t* cur) /* in/out: page cursor, not before first */ +{ + ut_ad(!page_cur_is_before_first(cur)); + + cur->rec = page_rec_get_prev(cur->rec); +} + +/******************************************************************** +Searches the right position for a page cursor. */ +UNIV_INLINE +ulint +page_cur_search( +/*============*/ + /* out: number of matched + fields on the left */ + const buf_block_t* block, /* in: buffer block */ + const dict_index_t* index, /* in: record descriptor */ + const dtuple_t* tuple, /* in: data tuple */ + ulint mode, /* in: PAGE_CUR_L, + PAGE_CUR_LE, PAGE_CUR_G, or + PAGE_CUR_GE */ + page_cur_t* cursor) /* out: page cursor */ +{ + ulint low_matched_fields = 0; + ulint low_matched_bytes = 0; + ulint up_matched_fields = 0; + ulint up_matched_bytes = 0; + + ut_ad(dtuple_check_typed(tuple)); + + page_cur_search_with_match(block, index, tuple, mode, + &up_matched_fields, + &up_matched_bytes, + &low_matched_fields, + &low_matched_bytes, + cursor); + return(low_matched_fields); +} + +/*************************************************************** +Inserts a record next to page cursor. Returns pointer to inserted record if +succeed, i.e., enough space available, NULL otherwise. The cursor stays at +the same logical position, but the physical position may change if it is +pointing to a compressed page that was reorganized. */ +UNIV_INLINE +rec_t* +page_cur_tuple_insert( +/*==================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + page_cur_t* cursor, /* in/out: a page cursor */ + const dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ + ulint n_ext, /* in: number of externally stored columns */ + mtr_t* mtr) /* in: mini-transaction handle, or NULL */ +{ + mem_heap_t* heap; + ulint* offsets; + ulint size + = rec_get_converted_size(index, tuple, n_ext); + rec_t* rec; + + heap = mem_heap_create(size + + (4 + REC_OFFS_HEADER_SIZE + + dtuple_get_n_fields(tuple)) + * sizeof *offsets); + rec = rec_convert_dtuple_to_rec((byte*) mem_heap_alloc(heap, size), + index, tuple, n_ext); + offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); + + if (buf_block_get_page_zip(cursor->block)) { + rec = page_cur_insert_rec_zip(&cursor->rec, cursor->block, + index, rec, offsets, mtr); + } else { + rec = page_cur_insert_rec_low(cursor->rec, + index, rec, offsets, mtr); + } + + mem_heap_free(heap); + return(rec); +} + +/*************************************************************** +Inserts a record next to page cursor. Returns pointer to inserted record if +succeed, i.e., enough space available, NULL otherwise. The cursor stays at +the same logical position, but the physical position may change if it is +pointing to a compressed page that was reorganized. */ +UNIV_INLINE +rec_t* +page_cur_rec_insert( +/*================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + page_cur_t* cursor, /* in/out: a page cursor */ + const rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in/out: rec_get_offsets(rec, index) */ + mtr_t* mtr) /* in: mini-transaction handle, or NULL */ +{ + if (buf_block_get_page_zip(cursor->block)) { + return(page_cur_insert_rec_zip(&cursor->rec, cursor->block, + index, rec, offsets, mtr)); + } else { + return(page_cur_insert_rec_low(cursor->rec, + index, rec, offsets, mtr)); + } +} + diff --git a/storage/xtradb/include/page0page.h b/storage/xtradb/include/page0page.h new file mode 100644 index 00000000000..e3de6901ee1 --- /dev/null +++ b/storage/xtradb/include/page0page.h @@ -0,0 +1,1019 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Index page routines + +Created 2/2/1994 Heikki Tuuri +*******************************************************/ + +#ifndef page0page_h +#define page0page_h + +#include "univ.i" + +#include "page0types.h" +#include "fil0fil.h" +#include "buf0buf.h" +#include "data0data.h" +#include "dict0dict.h" +#include "rem0rec.h" +#include "fsp0fsp.h" +#include "mtr0mtr.h" + +#ifdef UNIV_MATERIALIZE +#undef UNIV_INLINE +#define UNIV_INLINE +#endif + +/* PAGE HEADER + =========== + +Index page header starts at the first offset left free by the FIL-module */ + +typedef byte page_header_t; + +#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this + offset */ +/*-----------------------------*/ +#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ +#define PAGE_HEAP_TOP 2 /* pointer to record heap top */ +#define PAGE_N_HEAP 4 /* number of records in the heap, + bit 15=flag: new-style compact page format */ +#define PAGE_FREE 6 /* pointer to start of page free record list */ +#define PAGE_GARBAGE 8 /* number of bytes in deleted records */ +#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or + NULL if this info has been reset by a delete, + for example */ +#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */ +#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same + direction */ +#define PAGE_N_RECS 16 /* number of user records on the page */ +#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified + a record on the page; a dulint; defined only + in secondary indexes; specifically, not in an + ibuf tree; NOTE: this may be modified only + when the thread has an x-latch to the page, + and ALSO an x-latch to btr_search_latch + if there is a hash index to the page! */ +#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page + header which are set in a page create */ +/*----*/ +#define PAGE_LEVEL 26 /* level of the node in an index tree; the + leaf level is the level 0 */ +#define PAGE_INDEX_ID 28 /* index id where the page belongs */ +#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in + a B-tree: defined only on the root page of a + B-tree, but not in the root of an ibuf tree */ +#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF +#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF + /* in the place of PAGE_BTR_SEG_LEAF and _TOP + there is a free list base node if the page is + the root page of an ibuf tree, and at the same + place is the free list node if the page is in + a free list */ +#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE) + /* file segment header for the non-leaf pages + in a B-tree: defined only on the root page of + a B-tree, but not in the root of an ibuf + tree */ +/*----*/ +#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) + /* start of data on the page */ + +#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) + /* offset of the page infimum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) + /* offset of the page supremum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) + /* offset of the page supremum record end on + an old-style page */ +#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) + /* offset of the page infimum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) + /* offset of the page supremum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) + /* offset of the page supremum record end on + a new-style compact page */ +/*-----------------------------*/ + +/* Heap numbers */ +#define PAGE_HEAP_NO_INFIMUM 0 /* page infimum */ +#define PAGE_HEAP_NO_SUPREMUM 1 /* page supremum */ +#define PAGE_HEAP_NO_USER_LOW 2 /* first user record in + creation (insertion) order, + not necessarily collation order; + this record may have been deleted */ + +/* Directions of cursor movement */ +#define PAGE_LEFT 1 +#define PAGE_RIGHT 2 +#define PAGE_SAME_REC 3 +#define PAGE_SAME_PAGE 4 +#define PAGE_NO_DIRECTION 5 + +/* PAGE DIRECTORY + ============== +*/ + +typedef byte page_dir_slot_t; +typedef page_dir_slot_t page_dir_t; + +/* Offset of the directory start down from the page end. We call the +slot with the highest file address directory start, as it points to +the first record in the list of records. */ +#define PAGE_DIR FIL_PAGE_DATA_END + +/* We define a slot in the page directory as two bytes */ +#define PAGE_DIR_SLOT_SIZE 2 + +/* The offset of the physically lower end of the directory, counted from +page end, when the page is empty */ +#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE) + +/* The maximum and minimum number of records owned by a directory slot. The +number may drop below the minimum in the first and the last slot in the +directory. */ +#define PAGE_DIR_SLOT_MAX_N_OWNED 8 +#define PAGE_DIR_SLOT_MIN_N_OWNED 4 + +/**************************************************************** +Gets the start of a page. */ +UNIV_INLINE +page_t* +page_align( +/*=======*/ + /* out: start of the page */ + const void* ptr) /* in: pointer to page frame */ + __attribute__((const)); +/**************************************************************** +Gets the offset within a page. */ +UNIV_INLINE +ulint +page_offset( +/*========*/ + /* out: offset from the start of the page */ + const void* ptr) /* in: pointer to page frame */ + __attribute__((const)); +/***************************************************************** +Returns the max trx id field value. */ +UNIV_INLINE +dulint +page_get_max_trx_id( +/*================*/ + const page_t* page); /* in: page */ +/***************************************************************** +Sets the max trx id field value. */ +UNIV_INTERN +void +page_set_max_trx_id( +/*================*/ + buf_block_t* block, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + dulint trx_id);/* in: transaction id */ +/***************************************************************** +Sets the max trx id field value if trx_id is bigger than the previous +value. */ +UNIV_INLINE +void +page_update_max_trx_id( +/*===================*/ + buf_block_t* block, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + dulint trx_id);/* in: transaction id */ +/***************************************************************** +Reads the given header field. */ +UNIV_INLINE +ulint +page_header_get_field( +/*==================*/ + const page_t* page, /* in: page */ + ulint field); /* in: PAGE_N_DIR_SLOTS, ... */ +/***************************************************************** +Sets the given header field. */ +UNIV_INLINE +void +page_header_set_field( +/*==================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint field, /* in: PAGE_N_DIR_SLOTS, ... */ + ulint val); /* in: value */ +/***************************************************************** +Returns the offset stored in the given header field. */ +UNIV_INLINE +ulint +page_header_get_offs( +/*=================*/ + /* out: offset from the start of the page, + or 0 */ + const page_t* page, /* in: page */ + ulint field) /* in: PAGE_FREE, ... */ + __attribute__((nonnull, pure)); + +/***************************************************************** +Returns the pointer stored in the given header field, or NULL. */ +#define page_header_get_ptr(page, field) \ + (page_header_get_offs(page, field) \ + ? page + page_header_get_offs(page, field) : NULL) +/***************************************************************** +Sets the pointer stored in the given header field. */ +UNIV_INLINE +void +page_header_set_ptr( +/*================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint field, /* in/out: PAGE_FREE, ... */ + const byte* ptr); /* in: pointer or NULL*/ +/***************************************************************** +Resets the last insert info field in the page header. Writes to mlog +about this operation. */ +UNIV_INLINE +void +page_header_reset_last_insert( +/*==========================*/ + page_t* page, /* in: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + mtr_t* mtr); /* in: mtr */ +/**************************************************************** +Gets the offset of the first record on the page. */ +UNIV_INLINE +ulint +page_get_infimum_offset( +/*====================*/ + /* out: offset of the first record + in record list, relative from page */ + const page_t* page); /* in: page which must have record(s) */ +/**************************************************************** +Gets the offset of the last record on the page. */ +UNIV_INLINE +ulint +page_get_supremum_offset( +/*=====================*/ + /* out: offset of the last record in + record list, relative from page */ + const page_t* page); /* in: page which must have record(s) */ +#define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page)) +#define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page)) +/**************************************************************** +Returns the middle record of record list. If there are an even number +of records in the list, returns the first record of upper half-list. */ +UNIV_INTERN +rec_t* +page_get_middle_rec( +/*================*/ + /* out: middle record */ + page_t* page); /* in: page */ +/***************************************************************** +Compares a data tuple to a physical record. Differs from the function +cmp_dtuple_rec_with_match in the way that the record must reside on an +index page, and also page infimum and supremum records can be given in +the parameter rec. These are considered as the negative infinity and +the positive infinity in the alphabetical order. */ +UNIV_INLINE +int +page_cmp_dtuple_rec_with_match( +/*===========================*/ + /* out: 1, 0, -1, if dtuple is greater, equal, + less than rec, respectively, when only the + common first fields are compared */ + const dtuple_t* dtuple, /* in: data tuple */ + const rec_t* rec, /* in: physical record on a page; may also + be page infimum or supremum, in which case + matched-parameter values below are not + affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint* matched_fields, /* in/out: number of already completely + matched fields; when function returns + contains the value for current comparison */ + ulint* matched_bytes); /* in/out: number of already matched + bytes within the first field not completely + matched; when function returns contains the + value for current comparison */ +/***************************************************************** +Gets the page number. */ +UNIV_INLINE +ulint +page_get_page_no( +/*=============*/ + /* out: page number */ + const page_t* page); /* in: page */ +/***************************************************************** +Gets the tablespace identifier. */ +UNIV_INLINE +ulint +page_get_space_id( +/*==============*/ + /* out: space id */ + const page_t* page); /* in: page */ +/***************************************************************** +Gets the number of user records on page (the infimum and supremum records +are not user records). */ +UNIV_INLINE +ulint +page_get_n_recs( +/*============*/ + /* out: number of user records */ + const page_t* page); /* in: index page */ +/******************************************************************* +Returns the number of records before the given record in chain. +The number includes infimum and supremum records. */ +UNIV_INTERN +ulint +page_rec_get_n_recs_before( +/*=======================*/ + /* out: number of records */ + const rec_t* rec); /* in: the physical record */ +/***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + const page_t* page); /* in: index page */ +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL. + Note that the size of the dense page directory + in the compressed page trailer is + n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */ + ulint n_heap);/* in: number of records */ +/***************************************************************** +Gets the number of dir slots in directory. */ +UNIV_INLINE +ulint +page_dir_get_n_slots( +/*=================*/ + /* out: number of slots */ + const page_t* page); /* in: index page */ +/***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint n_slots);/* in: number of slots */ +#ifdef UNIV_DEBUG +/***************************************************************** +Gets pointer to nth directory slot. */ +UNIV_INLINE +page_dir_slot_t* +page_dir_get_nth_slot( +/*==================*/ + /* out: pointer to dir slot */ + const page_t* page, /* in: index page */ + ulint n); /* in: position */ +#else /* UNIV_DEBUG */ +# define page_dir_get_nth_slot(page, n) \ + ((page) + UNIV_PAGE_SIZE - PAGE_DIR \ + - (n + 1) * PAGE_DIR_SLOT_SIZE) +#endif /* UNIV_DEBUG */ +/****************************************************************** +Used to check the consistency of a record on a page. */ +UNIV_INLINE +ibool +page_rec_check( +/*===========*/ + /* out: TRUE if succeed */ + const rec_t* rec); /* in: record */ +/******************************************************************* +Gets the record pointed to by a directory slot. */ +UNIV_INLINE +const rec_t* +page_dir_slot_get_rec( +/*==================*/ + /* out: pointer to record */ + const page_dir_slot_t* slot); /* in: directory slot */ +/******************************************************************* +This is used to set the record offset in a directory slot. */ +UNIV_INLINE +void +page_dir_slot_set_rec( +/*==================*/ + page_dir_slot_t* slot, /* in: directory slot */ + rec_t* rec); /* in: record on the page */ +/******************************************************************* +Gets the number of records owned by a directory slot. */ +UNIV_INLINE +ulint +page_dir_slot_get_n_owned( +/*======================*/ + /* out: number of records */ + const page_dir_slot_t* slot); /* in: page directory slot */ +/******************************************************************* +This is used to set the owned records field of a directory slot. */ +UNIV_INLINE +void +page_dir_slot_set_n_owned( +/*======================*/ + page_dir_slot_t*slot, /* in/out: directory slot */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint n); /* in: number of records owned by the slot */ +/**************************************************************** +Calculates the space reserved for directory slots of a given +number of records. The exact value is a fraction number +n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is +rounded upwards to an integer. */ +UNIV_INLINE +ulint +page_dir_calc_reserved_space( +/*=========================*/ + ulint n_recs); /* in: number of records */ +/******************************************************************* +Looks for the directory slot which owns the given record. */ +UNIV_INTERN +ulint +page_dir_find_owner_slot( +/*=====================*/ + /* out: the directory slot number */ + const rec_t* rec); /* in: the physical record */ +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ulint +page_is_comp( +/*=========*/ + /* out: nonzero if the page is in compact + format, zero if it is in old-style format */ + const page_t* page); /* in: index page */ +/**************************************************************** +TRUE if the record is on a page in compact format. */ +UNIV_INLINE +ulint +page_rec_is_comp( +/*=============*/ + /* out: nonzero if in compact format */ + const rec_t* rec); /* in: record */ +/******************************************************************* +Returns the heap number of a record. */ +UNIV_INLINE +ulint +page_rec_get_heap_no( +/*=================*/ + /* out: heap number */ + const rec_t* rec); /* in: the physical record */ +/**************************************************************** +Determine whether the page is a B-tree leaf. */ +UNIV_INLINE +ibool +page_is_leaf( +/*=========*/ + /* out: TRUE if the page is a B-tree leaf */ + const page_t* page) /* in: page */ + __attribute__((nonnull, pure)); +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +const rec_t* +page_rec_get_next_low( +/*==================*/ + /* out: pointer to next record */ + const rec_t* rec, /* in: pointer to record */ + ulint comp); /* in: nonzero=compact page layout */ +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +rec_t* +page_rec_get_next( +/*==============*/ + /* out: pointer to next record */ + rec_t* rec); /* in: pointer to record */ +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +const rec_t* +page_rec_get_next_const( +/*====================*/ + /* out: pointer to next record */ + const rec_t* rec); /* in: pointer to record */ +/**************************************************************** +Sets the pointer to the next record on the page. */ +UNIV_INLINE +void +page_rec_set_next( +/*==============*/ + rec_t* rec, /* in: pointer to record, + must not be page supremum */ + rec_t* next); /* in: pointer to next record, + must not be page infimum */ +/**************************************************************** +Gets the pointer to the previous record. */ +UNIV_INLINE +const rec_t* +page_rec_get_prev_const( +/*====================*/ + /* out: pointer to previous record */ + const rec_t* rec); /* in: pointer to record, must not be page + infimum */ +/**************************************************************** +Gets the pointer to the previous record. */ +UNIV_INLINE +rec_t* +page_rec_get_prev( +/*==============*/ + /* out: pointer to previous record */ + rec_t* rec); /* in: pointer to record, + must not be page infimum */ +/**************************************************************** +TRUE if the record is a user record on the page. */ +UNIV_INLINE +ibool +page_rec_is_user_rec_low( +/*=====================*/ + /* out: TRUE if a user record */ + ulint offset) /* in: record offset on page */ + __attribute__((const)); +/**************************************************************** +TRUE if the record is the supremum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_supremum_low( +/*=====================*/ + /* out: TRUE if the supremum record */ + ulint offset) /* in: record offset on page */ + __attribute__((const)); +/**************************************************************** +TRUE if the record is the infimum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_infimum_low( +/*====================*/ + /* out: TRUE if the infimum record */ + ulint offset) /* in: record offset on page */ + __attribute__((const)); + +/**************************************************************** +TRUE if the record is a user record on the page. */ +UNIV_INLINE +ibool +page_rec_is_user_rec( +/*=================*/ + /* out: TRUE if a user record */ + const rec_t* rec) /* in: record */ + __attribute__((const)); +/**************************************************************** +TRUE if the record is the supremum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_supremum( +/*=================*/ + /* out: TRUE if the supremum record */ + const rec_t* rec) /* in: record */ + __attribute__((const)); + +/**************************************************************** +TRUE if the record is the infimum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_infimum( +/*================*/ + /* out: TRUE if the infimum record */ + const rec_t* rec) /* in: record */ + __attribute__((const)); +/******************************************************************* +Looks for the record which owns the given record. */ +UNIV_INLINE +rec_t* +page_rec_find_owner_rec( +/*====================*/ + /* out: the owner record */ + rec_t* rec); /* in: the physical record */ +/*************************************************************************** +This is a low-level operation which is used in a database index creation +to update the page number of a created B-tree to a data dictionary +record. */ +UNIV_INTERN +void +page_rec_write_index_page_no( +/*=========================*/ + rec_t* rec, /* in: record to update */ + ulint i, /* in: index of the field to update */ + ulint page_no,/* in: value to write */ + mtr_t* mtr); /* in: mtr */ +/**************************************************************** +Returns the maximum combined size of records which can be inserted on top +of record heap. */ +UNIV_INLINE +ulint +page_get_max_insert_size( +/*=====================*/ + /* out: maximum combined size for + inserted records */ + const page_t* page, /* in: index page */ + ulint n_recs);/* in: number of records */ +/**************************************************************** +Returns the maximum combined size of records which can be inserted on top +of record heap if page is first reorganized. */ +UNIV_INLINE +ulint +page_get_max_insert_size_after_reorganize( +/*======================================*/ + /* out: maximum combined size for + inserted records */ + const page_t* page, /* in: index page */ + ulint n_recs);/* in: number of records */ +/***************************************************************** +Calculates free space if a page is emptied. */ +UNIV_INLINE +ulint +page_get_free_space_of_empty( +/*=========================*/ + /* out: free space */ + ulint comp) /* in: nonzero=compact page format */ + __attribute__((const)); +/************************************************************** +Returns the base extra size of a physical record. This is the +size of the fixed header, independent of the record size. */ +UNIV_INLINE +ulint +page_rec_get_base_extra_size( +/*=========================*/ + /* out: REC_N_NEW_EXTRA_BYTES + or REC_N_OLD_EXTRA_BYTES */ + const rec_t* rec); /* in: physical record */ +/**************************************************************** +Returns the sum of the sizes of the records in the record list +excluding the infimum and supremum records. */ +UNIV_INLINE +ulint +page_get_data_size( +/*===============*/ + /* out: data in bytes */ + const page_t* page); /* in: index page */ +/**************************************************************** +Allocates a block of memory from the head of the free list +of an index page. */ +UNIV_INLINE +void +page_mem_alloc_free( +/*================*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page with enough + space available for inserting the record, + or NULL */ + rec_t* next_rec,/* in: pointer to the new head of the + free record list */ + ulint need); /* in: number of bytes allocated */ +/**************************************************************** +Allocates a block of memory from the heap of an index page. */ +UNIV_INTERN +byte* +page_mem_alloc_heap( +/*================*/ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page with enough + space available for inserting the record, + or NULL */ + ulint need, /* in: total number of bytes needed */ + ulint* heap_no);/* out: this contains the heap number + of the allocated record + if allocation succeeds */ +/**************************************************************** +Puts a record to free list. */ +UNIV_INLINE +void +page_mem_free( +/*==========*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index, /* in: index of rec */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Create an uncompressed B-tree index page. */ +UNIV_INTERN +page_t* +page_create( +/*========*/ + /* out: pointer to the page */ + buf_block_t* block, /* in: a buffer block where the + page is created */ + mtr_t* mtr, /* in: mini-transaction handle */ + ulint comp); /* in: nonzero=compact page format */ +/************************************************************** +Create a compressed B-tree index page. */ +UNIV_INTERN +page_t* +page_create_zip( +/*============*/ + /* out: pointer to the page */ + buf_block_t* block, /* in/out: a buffer frame where the + page is created */ + dict_index_t* index, /* in: the index of the page */ + ulint level, /* in: the B-tree level of the page */ + mtr_t* mtr); /* in: mini-transaction handle */ + +/***************************************************************** +Differs from page_copy_rec_list_end, because this function does not +touch the lock table and max trx id on page or compress the page. */ +UNIV_INTERN +void +page_copy_rec_list_end_no_locks( +/*============================*/ + buf_block_t* new_block, /* in: index page to copy to */ + buf_block_t* block, /* in: index page of rec */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** +Copies records from page to new_page, from the given record onward, +including that record. Infimum and supremum records are not copied. +The records are copied to the start of the record list on new_page. */ +UNIV_INTERN +rec_t* +page_copy_rec_list_end( +/*===================*/ + /* out: pointer to the original + successor of the infimum record + on new_page, or NULL on zip overflow + (new_block will be decompressed) */ + buf_block_t* new_block, /* in/out: index page to copy to */ + buf_block_t* block, /* in: index page containing rec */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull)); +/***************************************************************** +Copies records from page to new_page, up to the given record, NOT +including that record. Infimum and supremum records are not copied. +The records are copied to the end of the record list on new_page. */ +UNIV_INTERN +rec_t* +page_copy_rec_list_start( +/*=====================*/ + /* out: pointer to the original + predecessor of the supremum record + on new_page, or NULL on zip overflow + (new_block will be decompressed) */ + buf_block_t* new_block, /* in/out: index page to copy to */ + buf_block_t* block, /* in: index page containing rec */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull)); +/***************************************************************** +Deletes records from a page from a given record onward, including that record. +The infimum and supremum records are not deleted. */ +UNIV_INTERN +void +page_delete_rec_list_end( +/*=====================*/ + rec_t* rec, /* in: pointer to record on page */ + buf_block_t* block, /* in: buffer block of the page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull)); +/***************************************************************** +Deletes records from page, up to the given record, NOT including +that record. Infimum and supremum records are not deleted. */ +UNIV_INTERN +void +page_delete_rec_list_start( +/*=======================*/ + rec_t* rec, /* in: record on page */ + buf_block_t* block, /* in: buffer block of the page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull)); +/***************************************************************** +Moves record list end to another page. Moved records include +split_rec. */ +UNIV_INTERN +ibool +page_move_rec_list_end( +/*===================*/ + /* out: TRUE on success; FALSE on + compression failure + (new_block will be decompressed) */ + buf_block_t* new_block, /* in/out: index page where to move */ + buf_block_t* block, /* in: index page from where to move */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull(1, 2, 4, 5))); +/***************************************************************** +Moves record list start to another page. Moved records do not include +split_rec. */ +UNIV_INTERN +ibool +page_move_rec_list_start( +/*=====================*/ + /* out: TRUE on success; FALSE on + compression failure */ + buf_block_t* new_block, /* in/out: index page where to move */ + buf_block_t* block, /* in/out: page containing split_rec */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ + __attribute__((nonnull(1, 2, 4, 5))); +/******************************************************************** +Splits a directory slot which owns too many records. */ +UNIV_INTERN +void +page_dir_split_slot( +/*================*/ + page_t* page, /* in: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be written, or NULL */ + ulint slot_no)/* in: the directory slot */ + __attribute__((nonnull(1))); +/***************************************************************** +Tries to balance the given directory slot with too few records +with the upper neighbor, so that there are at least the minimum number +of records owned by the slot; this may result in the merging of +two slots. */ +UNIV_INTERN +void +page_dir_balance_slot( +/*==================*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint slot_no)/* in: the directory slot */ + __attribute__((nonnull(1))); +/************************************************************** +Parses a log record of a record list end or start deletion. */ +UNIV_INTERN +byte* +page_parse_delete_rec_list( +/*=======================*/ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + buf_block_t* block, /* in/out: buffer block or NULL */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr or NULL */ +/*************************************************************** +Parses a redo log record of creating a page. */ +UNIV_INTERN +byte* +page_parse_create( +/*==============*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + ulint comp, /* in: nonzero=compact page format */ + buf_block_t* block, /* in: block or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +/**************************************************************** +Prints record contents including the data relevant only in +the index page context. */ +UNIV_INTERN +void +page_rec_print( +/*===========*/ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: record descriptor */ +/******************************************************************* +This is used to print the contents of the directory for +debugging purposes. */ +UNIV_INTERN +void +page_dir_print( +/*===========*/ + page_t* page, /* in: index page */ + ulint pr_n); /* in: print n first and n last entries */ +/******************************************************************* +This is used to print the contents of the page record list for +debugging purposes. */ +UNIV_INTERN +void +page_print_list( +/*============*/ + buf_block_t* block, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n); /* in: print n first and n last entries */ +/******************************************************************* +Prints the info in a page header. */ +UNIV_INTERN +void +page_header_print( +/*==============*/ + const page_t* page); +/******************************************************************* +This is used to print the contents of the page for +debugging purposes. */ +UNIV_INTERN +void +page_print( +/*=======*/ + buf_block_t* block, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn); /* in: print rn first and last records + in directory */ +/******************************************************************* +The following is used to validate a record on a page. This function +differs from rec_validate as it can also check the n_owned field and +the heap_no field. */ +UNIV_INTERN +ibool +page_rec_validate( +/*==============*/ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Checks that the first directory slot points to the infimum record and +the last to the supremum. This function is intended to track if the +bug fixed in 4.0.14 has caused corruption to users' databases. */ +UNIV_INTERN +void +page_check_dir( +/*===========*/ + const page_t* page); /* in: index page */ +/******************************************************************* +This function checks the consistency of an index page when we do not +know the index. This is also resilient so that this should never crash +even if the page is total garbage. */ +UNIV_INTERN +ibool +page_simple_validate_old( +/*=====================*/ + /* out: TRUE if ok */ + page_t* page); /* in: old-style index page */ +/******************************************************************* +This function checks the consistency of an index page when we do not +know the index. This is also resilient so that this should never crash +even if the page is total garbage. */ +UNIV_INTERN +ibool +page_simple_validate_new( +/*=====================*/ + /* out: TRUE if ok */ + page_t* block); /* in: new-style index page */ +/******************************************************************* +This function checks the consistency of an index page. */ +UNIV_INTERN +ibool +page_validate( +/*==========*/ + /* out: TRUE if ok */ + page_t* page, /* in: index page */ + dict_index_t* index); /* in: data dictionary index containing + the page record type definition */ +/******************************************************************* +Looks in the page record list for a record with the given heap number. */ + +const rec_t* +page_find_rec_with_heap_no( +/*=======================*/ + /* out: record, NULL if not found */ + const page_t* page, /* in: index page */ + ulint heap_no);/* in: heap number */ + +#ifdef UNIV_MATERIALIZE +#undef UNIV_INLINE +#define UNIV_INLINE UNIV_INLINE_ORIGINAL +#endif + +#ifndef UNIV_NONINL +#include "page0page.ic" +#endif + +#endif diff --git a/storage/xtradb/include/page0page.ic b/storage/xtradb/include/page0page.ic new file mode 100644 index 00000000000..df0f6f8b360 --- /dev/null +++ b/storage/xtradb/include/page0page.ic @@ -0,0 +1,1060 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Index page routines + +Created 2/2/1994 Heikki Tuuri +*******************************************************/ + +#include "mach0data.h" +#include "rem0cmp.h" +#include "mtr0log.h" +#include "page0zip.h" + +#ifdef UNIV_MATERIALIZE +#undef UNIV_INLINE +#define UNIV_INLINE +#endif + +/**************************************************************** +Gets the start of a page. */ +UNIV_INLINE +page_t* +page_align( +/*=======*/ + /* out: start of the page */ + const void* ptr) /* in: pointer to page frame */ +{ + return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE)); +} +/**************************************************************** +Gets the offset within a page. */ +UNIV_INLINE +ulint +page_offset( +/*========*/ + /* out: offset from the start of the page */ + const void* ptr) /* in: pointer to page frame */ +{ + return(ut_align_offset(ptr, UNIV_PAGE_SIZE)); +} +/***************************************************************** +Returns the max trx id field value. */ +UNIV_INLINE +dulint +page_get_max_trx_id( +/*================*/ + const page_t* page) /* in: page */ +{ + ut_ad(page); + + return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID)); +} + +/***************************************************************** +Sets the max trx id field value if trx_id is bigger than the previous +value. */ +UNIV_INLINE +void +page_update_max_trx_id( +/*===================*/ + buf_block_t* block, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + dulint trx_id) /* in: transaction id */ +{ + ut_ad(block); + + if (ut_dulint_cmp(page_get_max_trx_id(buf_block_get_frame(block)), + trx_id) < 0) { + + page_set_max_trx_id(block, page_zip, trx_id); + } +} + +/***************************************************************** +Reads the given header field. */ +UNIV_INLINE +ulint +page_header_get_field( +/*==================*/ + const page_t* page, /* in: page */ + ulint field) /* in: PAGE_LEVEL, ... */ +{ + ut_ad(page); + ut_ad(field <= PAGE_INDEX_ID); + + return(mach_read_from_2(page + PAGE_HEADER + field)); +} + +/***************************************************************** +Sets the given header field. */ +UNIV_INLINE +void +page_header_set_field( +/*==================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint field, /* in: PAGE_N_DIR_SLOTS, ... */ + ulint val) /* in: value */ +{ + ut_ad(page); + ut_ad(field <= PAGE_N_RECS); + ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); + ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); + + mach_write_to_2(page + PAGE_HEADER + field, val); + if (UNIV_LIKELY_NULL(page_zip)) { + page_zip_write_header(page_zip, + page + PAGE_HEADER + field, 2, NULL); + } +} + +/***************************************************************** +Returns the offset stored in the given header field. */ +UNIV_INLINE +ulint +page_header_get_offs( +/*=================*/ + /* out: offset from the start of the page, + or 0 */ + const page_t* page, /* in: page */ + ulint field) /* in: PAGE_FREE, ... */ +{ + ulint offs; + + ut_ad(page); + ut_ad((field == PAGE_FREE) + || (field == PAGE_LAST_INSERT) + || (field == PAGE_HEAP_TOP)); + + offs = page_header_get_field(page, field); + + ut_ad((field != PAGE_HEAP_TOP) || offs); + + return(offs); +} + +/***************************************************************** +Sets the pointer stored in the given header field. */ +UNIV_INLINE +void +page_header_set_ptr( +/*================*/ + page_t* page, /* in: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint field, /* in: PAGE_FREE, ... */ + const byte* ptr) /* in: pointer or NULL*/ +{ + ulint offs; + + ut_ad(page); + ut_ad((field == PAGE_FREE) + || (field == PAGE_LAST_INSERT) + || (field == PAGE_HEAP_TOP)); + + if (ptr == NULL) { + offs = 0; + } else { + offs = ptr - page; + } + + ut_ad((field != PAGE_HEAP_TOP) || offs); + + page_header_set_field(page, page_zip, field, offs); +} + +/***************************************************************** +Resets the last insert info field in the page header. Writes to mlog +about this operation. */ +UNIV_INLINE +void +page_header_reset_last_insert( +/*==========================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(page && mtr); + + if (UNIV_LIKELY_NULL(page_zip)) { + mach_write_to_2(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0); + page_zip_write_header(page_zip, + page + (PAGE_HEADER + PAGE_LAST_INSERT), + 2, mtr); + } else { + mlog_write_ulint(page + (PAGE_HEADER + PAGE_LAST_INSERT), 0, + MLOG_2BYTES, mtr); + } +} + +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ulint +page_is_comp( +/*=========*/ + /* out: nonzero if the page is in compact + format, zero if it is in old-style format */ + const page_t* page) /* in: index page */ +{ + return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000, + 0x8000)); +} + +/**************************************************************** +TRUE if the record is on a page in compact format. */ +UNIV_INLINE +ulint +page_rec_is_comp( +/*=============*/ + /* out: nonzero if in compact format */ + const rec_t* rec) /* in: record */ +{ + return(page_is_comp(page_align(rec))); +} + +/******************************************************************* +Returns the heap number of a record. */ +UNIV_INLINE +ulint +page_rec_get_heap_no( +/*=================*/ + /* out: heap number */ + const rec_t* rec) /* in: the physical record */ +{ + if (page_rec_is_comp(rec)) { + return(rec_get_heap_no_new(rec)); + } else { + return(rec_get_heap_no_old(rec)); + } +} + +/**************************************************************** +Determine whether the page is a B-tree leaf. */ +UNIV_INLINE +ibool +page_is_leaf( +/*=========*/ + /* out: TRUE if the page is a B-tree leaf */ + const page_t* page) /* in: page */ +{ + return(!*(const uint16*) (page + (PAGE_HEADER + PAGE_LEVEL))); +} + +/**************************************************************** +Gets the offset of the first record on the page. */ +UNIV_INLINE +ulint +page_get_infimum_offset( +/*====================*/ + /* out: offset of the first record + in record list, relative from page */ + const page_t* page) /* in: page which must have record(s) */ +{ + ut_ad(page); + ut_ad(!page_offset(page)); + + if (page_is_comp(page)) { + return(PAGE_NEW_INFIMUM); + } else { + return(PAGE_OLD_INFIMUM); + } +} + +/**************************************************************** +Gets the offset of the last record on the page. */ +UNIV_INLINE +ulint +page_get_supremum_offset( +/*=====================*/ + /* out: offset of the last record in + record list, relative from page */ + const page_t* page) /* in: page which must have record(s) */ +{ + ut_ad(page); + ut_ad(!page_offset(page)); + + if (page_is_comp(page)) { + return(PAGE_NEW_SUPREMUM); + } else { + return(PAGE_OLD_SUPREMUM); + } +} + +/**************************************************************** +TRUE if the record is a user record on the page. */ +UNIV_INLINE +ibool +page_rec_is_user_rec_low( +/*=====================*/ + /* out: TRUE if a user record */ + ulint offset) /* in: record offset on page */ +{ + ut_ad(offset >= PAGE_NEW_INFIMUM); +#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM +# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM" +#endif +#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM +# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM" +#endif +#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM +# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM" +#endif +#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM +# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM" +#endif +#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END +# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END" +#endif +#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END +# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END" +#endif + ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); + + return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM) + && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM) + && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM) + && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM)); +} + +/**************************************************************** +TRUE if the record is the supremum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_supremum_low( +/*=====================*/ + /* out: TRUE if the supremum record */ + ulint offset) /* in: record offset on page */ +{ + ut_ad(offset >= PAGE_NEW_INFIMUM); + ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); + + return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM) + || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM)); +} + +/**************************************************************** +TRUE if the record is the infimum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_infimum_low( +/*====================*/ + /* out: TRUE if the infimum record */ + ulint offset) /* in: record offset on page */ +{ + ut_ad(offset >= PAGE_NEW_INFIMUM); + ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); + + return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM) + || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM)); +} + +/**************************************************************** +TRUE if the record is a user record on the page. */ +UNIV_INLINE +ibool +page_rec_is_user_rec( +/*=================*/ + /* out: TRUE if a user record */ + const rec_t* rec) /* in: record */ +{ + return(page_rec_is_user_rec_low(page_offset(rec))); +} + +/**************************************************************** +TRUE if the record is the supremum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_supremum( +/*=================*/ + /* out: TRUE if the supremum record */ + const rec_t* rec) /* in: record */ +{ + return(page_rec_is_supremum_low(page_offset(rec))); +} + +/**************************************************************** +TRUE if the record is the infimum record on a page. */ +UNIV_INLINE +ibool +page_rec_is_infimum( +/*================*/ + /* out: TRUE if the infimum record */ + const rec_t* rec) /* in: record */ +{ + return(page_rec_is_infimum_low(page_offset(rec))); +} + +/***************************************************************** +Compares a data tuple to a physical record. Differs from the function +cmp_dtuple_rec_with_match in the way that the record must reside on an +index page, and also page infimum and supremum records can be given in +the parameter rec. These are considered as the negative infinity and +the positive infinity in the alphabetical order. */ +UNIV_INLINE +int +page_cmp_dtuple_rec_with_match( +/*===========================*/ + /* out: 1, 0, -1, if dtuple is greater, equal, + less than rec, respectively, when only the + common first fields are compared */ + const dtuple_t* dtuple, /* in: data tuple */ + const rec_t* rec, /* in: physical record on a page; may also + be page infimum or supremum, in which case + matched-parameter values below are not + affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint* matched_fields, /* in/out: number of already completely + matched fields; when function returns + contains the value for current comparison */ + ulint* matched_bytes) /* in/out: number of already matched + bytes within the first field not completely + matched; when function returns contains the + value for current comparison */ +{ + ulint rec_offset; + + ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec)); + + rec_offset = page_offset(rec); + + if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM) + || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) { + return(1); + } + if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM) + || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) { + return(-1); + } + + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, + matched_fields, + matched_bytes)); +} + +/***************************************************************** +Gets the page number. */ +UNIV_INLINE +ulint +page_get_page_no( +/*=============*/ + /* out: page number */ + const page_t* page) /* in: page */ +{ + ut_ad(page == page_align((page_t*) page)); + return(mach_read_from_4(page + FIL_PAGE_OFFSET)); +} + +/***************************************************************** +Gets the tablespace identifier. */ +UNIV_INLINE +ulint +page_get_space_id( +/*==============*/ + /* out: space id */ + const page_t* page) /* in: page */ +{ + ut_ad(page == page_align((page_t*) page)); + return(mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); +} + +/***************************************************************** +Gets the number of user records on page (infimum and supremum records +are not user records). */ +UNIV_INLINE +ulint +page_get_n_recs( +/*============*/ + /* out: number of user records */ + const page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_RECS)); +} + +/***************************************************************** +Gets the number of dir slots in directory. */ +UNIV_INLINE +ulint +page_dir_get_n_slots( +/*=================*/ + /* out: number of slots */ + const page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); +} +/***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + page_t* page, /* in/out: page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + ulint n_slots)/* in: number of slots */ +{ + page_header_set_field(page, page_zip, PAGE_N_DIR_SLOTS, n_slots); +} + +/***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + const page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); +} + +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL. + Note that the size of the dense page directory + in the compressed page trailer is + n_heap * PAGE_ZIP_DIR_SLOT_SIZE. */ + ulint n_heap) /* in: number of records */ +{ + ut_ad(n_heap < 0x8000); + ut_ad(!page_zip || n_heap + == (page_header_get_field(page, PAGE_N_HEAP) & 0x7fff) + 1); + + page_header_set_field(page, page_zip, PAGE_N_HEAP, n_heap + | (0x8000 + & page_header_get_field(page, PAGE_N_HEAP))); +} + +#ifdef UNIV_DEBUG +/***************************************************************** +Gets pointer to nth directory slot. */ +UNIV_INLINE +page_dir_slot_t* +page_dir_get_nth_slot( +/*==================*/ + /* out: pointer to dir slot */ + const page_t* page, /* in: index page */ + ulint n) /* in: position */ +{ + ut_ad(page_dir_get_n_slots(page) > n); + + return((page_dir_slot_t*) + page + UNIV_PAGE_SIZE - PAGE_DIR + - (n + 1) * PAGE_DIR_SLOT_SIZE); +} +#endif /* UNIV_DEBUG */ + +/****************************************************************** +Used to check the consistency of a record on a page. */ +UNIV_INLINE +ibool +page_rec_check( +/*===========*/ + /* out: TRUE if succeed */ + const rec_t* rec) /* in: record */ +{ + const page_t* page = page_align(rec); + + ut_a(rec); + + ut_a(page_offset(rec) <= page_header_get_field(page, PAGE_HEAP_TOP)); + ut_a(page_offset(rec) >= PAGE_DATA); + + return(TRUE); +} + +/******************************************************************* +Gets the record pointed to by a directory slot. */ +UNIV_INLINE +const rec_t* +page_dir_slot_get_rec( +/*==================*/ + /* out: pointer to record */ + const page_dir_slot_t* slot) /* in: directory slot */ +{ + return(page_align(slot) + mach_read_from_2(slot)); +} + +/******************************************************************* +This is used to set the record offset in a directory slot. */ +UNIV_INLINE +void +page_dir_slot_set_rec( +/*==================*/ + page_dir_slot_t* slot, /* in: directory slot */ + rec_t* rec) /* in: record on the page */ +{ + ut_ad(page_rec_check(rec)); + + mach_write_to_2(slot, page_offset(rec)); +} + +/******************************************************************* +Gets the number of records owned by a directory slot. */ +UNIV_INLINE +ulint +page_dir_slot_get_n_owned( +/*======================*/ + /* out: number of records */ + const page_dir_slot_t* slot) /* in: page directory slot */ +{ + const rec_t* rec = page_dir_slot_get_rec(slot); + if (page_rec_is_comp(slot)) { + return(rec_get_n_owned_new(rec)); + } else { + return(rec_get_n_owned_old(rec)); + } +} + +/******************************************************************* +This is used to set the owned records field of a directory slot. */ +UNIV_INLINE +void +page_dir_slot_set_n_owned( +/*======================*/ + page_dir_slot_t*slot, /* in/out: directory slot */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint n) /* in: number of records owned by the slot */ +{ + rec_t* rec = (rec_t*) page_dir_slot_get_rec(slot); + if (page_rec_is_comp(slot)) { + rec_set_n_owned_new(rec, page_zip, n); + } else { + ut_ad(!page_zip); + rec_set_n_owned_old(rec, n); + } +} + +/**************************************************************** +Calculates the space reserved for directory slots of a given number of +records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE / +PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */ +UNIV_INLINE +ulint +page_dir_calc_reserved_space( +/*=========================*/ + ulint n_recs) /* in: number of records */ +{ + return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1) + / PAGE_DIR_SLOT_MIN_N_OWNED); +} + +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +const rec_t* +page_rec_get_next_low( +/*==================*/ + /* out: pointer to next record */ + const rec_t* rec, /* in: pointer to record */ + ulint comp) /* in: nonzero=compact page layout */ +{ + ulint offs; + const page_t* page; + + ut_ad(page_rec_check(rec)); + + page = page_align(rec); + + offs = rec_get_next_offs(rec, comp); + + if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) { + fprintf(stderr, + "InnoDB: Next record offset is nonsensical %lu" + " in record at offset %lu\n" + "InnoDB: rec address %p, space id %lu, page %lu\n", + (ulong)offs, (ulong) page_offset(rec), + (void*) rec, + (ulong) page_get_space_id(page), + (ulong) page_get_page_no(page)); + buf_page_print(page, 0); + + ut_error; + } + + if (UNIV_UNLIKELY(offs == 0)) { + + return(NULL); + } + + return(page + offs); +} + +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +rec_t* +page_rec_get_next( +/*==============*/ + /* out: pointer to next record */ + rec_t* rec) /* in: pointer to record */ +{ + return((rec_t*) page_rec_get_next_low(rec, page_rec_is_comp(rec))); +} + +/**************************************************************** +Gets the pointer to the next record on the page. */ +UNIV_INLINE +const rec_t* +page_rec_get_next_const( +/*====================*/ + /* out: pointer to next record */ + const rec_t* rec) /* in: pointer to record */ +{ + return(page_rec_get_next_low(rec, page_rec_is_comp(rec))); +} + +/**************************************************************** +Sets the pointer to the next record on the page. */ +UNIV_INLINE +void +page_rec_set_next( +/*==============*/ + rec_t* rec, /* in: pointer to record, + must not be page supremum */ + rec_t* next) /* in: pointer to next record, + must not be page infimum */ +{ + ulint offs; + + ut_ad(page_rec_check(rec)); + ut_ad(!page_rec_is_supremum(rec)); + ut_ad(rec != next); + + ut_ad(!next || !page_rec_is_infimum(next)); + ut_ad(!next || page_align(rec) == page_align(next)); + + if (UNIV_LIKELY(next != NULL)) { + offs = page_offset(next); + } else { + offs = 0; + } + + if (page_rec_is_comp(rec)) { + rec_set_next_offs_new(rec, offs); + } else { + rec_set_next_offs_old(rec, offs); + } +} + +/**************************************************************** +Gets the pointer to the previous record. */ +UNIV_INLINE +const rec_t* +page_rec_get_prev_const( +/*====================*/ + /* out: pointer to previous record */ + const rec_t* rec) /* in: pointer to record, must not be page + infimum */ +{ + const page_dir_slot_t* slot; + ulint slot_no; + const rec_t* rec2; + const rec_t* prev_rec = NULL; + const page_t* page; + + ut_ad(page_rec_check(rec)); + + page = page_align(rec); + + ut_ad(!page_rec_is_infimum(rec)); + + slot_no = page_dir_find_owner_slot(rec); + + ut_a(slot_no != 0); + + slot = page_dir_get_nth_slot(page, slot_no - 1); + + rec2 = page_dir_slot_get_rec(slot); + + if (page_is_comp(page)) { + while (rec != rec2) { + prev_rec = rec2; + rec2 = page_rec_get_next_low(rec2, TRUE); + } + } else { + while (rec != rec2) { + prev_rec = rec2; + rec2 = page_rec_get_next_low(rec2, FALSE); + } + } + + ut_a(prev_rec); + + return(prev_rec); +} + +/**************************************************************** +Gets the pointer to the previous record. */ +UNIV_INLINE +rec_t* +page_rec_get_prev( +/*==============*/ + /* out: pointer to previous record */ + rec_t* rec) /* in: pointer to record, must not be page + infimum */ +{ + return((rec_t*) page_rec_get_prev_const(rec)); +} + +/******************************************************************* +Looks for the record which owns the given record. */ +UNIV_INLINE +rec_t* +page_rec_find_owner_rec( +/*====================*/ + /* out: the owner record */ + rec_t* rec) /* in: the physical record */ +{ + ut_ad(page_rec_check(rec)); + + if (page_rec_is_comp(rec)) { + while (rec_get_n_owned_new(rec) == 0) { + rec = page_rec_get_next(rec); + } + } else { + while (rec_get_n_owned_old(rec) == 0) { + rec = page_rec_get_next(rec); + } + } + + return(rec); +} + +/************************************************************** +Returns the base extra size of a physical record. This is the +size of the fixed header, independent of the record size. */ +UNIV_INLINE +ulint +page_rec_get_base_extra_size( +/*=========================*/ + /* out: REC_N_NEW_EXTRA_BYTES + or REC_N_OLD_EXTRA_BYTES */ + const rec_t* rec) /* in: physical record */ +{ +#if REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES +# error "REC_N_NEW_EXTRA_BYTES + 1 != REC_N_OLD_EXTRA_BYTES" +#endif + return(REC_N_NEW_EXTRA_BYTES + (ulint) !page_rec_is_comp(rec)); +} + +/**************************************************************** +Returns the sum of the sizes of the records in the record list, excluding +the infimum and supremum records. */ +UNIV_INLINE +ulint +page_get_data_size( +/*===============*/ + /* out: data in bytes */ + const page_t* page) /* in: index page */ +{ + ulint ret; + + ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) + - (page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END + : PAGE_OLD_SUPREMUM_END) + - page_header_get_field(page, PAGE_GARBAGE)); + + ut_ad(ret < UNIV_PAGE_SIZE); + + return(ret); +} + + +/**************************************************************** +Allocates a block of memory from the free list of an index page. */ +UNIV_INTERN +void +page_mem_alloc_free( +/*================*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page with enough + space available for inserting the record, + or NULL */ + rec_t* next_rec,/* in: pointer to the new head of the + free record list */ + ulint need) /* in: number of bytes allocated */ +{ + ulint garbage; + +#ifdef UNIV_DEBUG + const rec_t* old_rec = page_header_get_ptr(page, PAGE_FREE); + ulint next_offs; + + ut_ad(old_rec); + next_offs = rec_get_next_offs(old_rec, page_is_comp(page)); + ut_ad(next_rec == (next_offs ? page + next_offs : NULL)); +#endif + + page_header_set_ptr(page, page_zip, PAGE_FREE, next_rec); + + garbage = page_header_get_field(page, PAGE_GARBAGE); + ut_ad(garbage >= need); + + page_header_set_field(page, page_zip, PAGE_GARBAGE, garbage - need); +} + +/***************************************************************** +Calculates free space if a page is emptied. */ +UNIV_INLINE +ulint +page_get_free_space_of_empty( +/*=========================*/ + /* out: free space */ + ulint comp) /* in: nonzero=compact page layout */ +{ + if (UNIV_LIKELY(comp)) { + return((ulint)(UNIV_PAGE_SIZE + - PAGE_NEW_SUPREMUM_END + - PAGE_DIR + - 2 * PAGE_DIR_SLOT_SIZE)); + } + + return((ulint)(UNIV_PAGE_SIZE + - PAGE_OLD_SUPREMUM_END + - PAGE_DIR + - 2 * PAGE_DIR_SLOT_SIZE)); +} + +/**************************************************************** +Each user record on a page, and also the deleted user records in the heap +takes its size plus the fraction of the dir cell size / +PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the +value of page_get_free_space_of_empty, the insert is impossible, otherwise +it is allowed. This function returns the maximum combined size of records +which can be inserted on top of the record heap. */ +UNIV_INLINE +ulint +page_get_max_insert_size( +/*=====================*/ + /* out: maximum combined size for + inserted records */ + const page_t* page, /* in: index page */ + ulint n_recs) /* in: number of records */ +{ + ulint occupied; + ulint free_space; + + if (page_is_comp(page)) { + occupied = page_header_get_field(page, PAGE_HEAP_TOP) + - PAGE_NEW_SUPREMUM_END + + page_dir_calc_reserved_space( + n_recs + page_dir_get_n_heap(page) - 2); + + free_space = page_get_free_space_of_empty(TRUE); + } else { + occupied = page_header_get_field(page, PAGE_HEAP_TOP) + - PAGE_OLD_SUPREMUM_END + + page_dir_calc_reserved_space( + n_recs + page_dir_get_n_heap(page) - 2); + + free_space = page_get_free_space_of_empty(FALSE); + } + + /* Above the 'n_recs +' part reserves directory space for the new + inserted records; the '- 2' excludes page infimum and supremum + records */ + + if (occupied > free_space) { + + return(0); + } + + return(free_space - occupied); +} + +/**************************************************************** +Returns the maximum combined size of records which can be inserted on top +of the record heap if a page is first reorganized. */ +UNIV_INLINE +ulint +page_get_max_insert_size_after_reorganize( +/*======================================*/ + /* out: maximum combined size for + inserted records */ + const page_t* page, /* in: index page */ + ulint n_recs) /* in: number of records */ +{ + ulint occupied; + ulint free_space; + + occupied = page_get_data_size(page) + + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); + + free_space = page_get_free_space_of_empty(page_is_comp(page)); + + if (occupied > free_space) { + + return(0); + } + + return(free_space - occupied); +} + +/**************************************************************** +Puts a record to free list. */ +UNIV_INLINE +void +page_mem_free( +/*==========*/ + page_t* page, /* in/out: index page */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index, /* in: index of rec */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + rec_t* free; + ulint garbage; + + ut_ad(rec_offs_validate(rec, index, offsets)); + free = page_header_get_ptr(page, PAGE_FREE); + + page_rec_set_next(rec, free); + page_header_set_ptr(page, page_zip, PAGE_FREE, rec); + + garbage = page_header_get_field(page, PAGE_GARBAGE); + + page_header_set_field(page, page_zip, PAGE_GARBAGE, + garbage + rec_offs_size(offsets)); + + if (UNIV_LIKELY_NULL(page_zip)) { + page_zip_dir_delete(page_zip, rec, index, offsets, free); + } else { + page_header_set_field(page, page_zip, PAGE_N_RECS, + page_get_n_recs(page) - 1); + } +} + +#ifdef UNIV_MATERIALIZE +#undef UNIV_INLINE +#define UNIV_INLINE UNIV_INLINE_ORIGINAL +#endif diff --git a/storage/xtradb/include/page0types.h b/storage/xtradb/include/page0types.h new file mode 100644 index 00000000000..06af7a63d58 --- /dev/null +++ b/storage/xtradb/include/page0types.h @@ -0,0 +1,142 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Index page routines + +Created 2/2/1994 Heikki Tuuri +*******************************************************/ + +#ifndef page0types_h +#define page0types_h + +#include "univ.i" +#include "dict0types.h" +#include "mtr0types.h" + +/* Type of the index page */ +/* The following define eliminates a name collision on HP-UX */ +#define page_t ib_page_t +typedef byte page_t; +typedef struct page_search_struct page_search_t; +typedef struct page_cur_struct page_cur_t; + +typedef byte page_zip_t; +typedef struct page_zip_des_struct page_zip_des_t; + +/* The following definitions would better belong to page0zip.h, +but we cannot include page0zip.h from rem0rec.ic, because +page0*.h includes rem0rec.h and may include rem0rec.ic. */ + +#define PAGE_ZIP_SSIZE_BITS 3 + +#define PAGE_ZIP_MIN_SIZE_SHIFT 10 /* log2 of smallest compressed size */ +#define PAGE_ZIP_MIN_SIZE (1 << PAGE_ZIP_MIN_SIZE_SHIFT) + +#define PAGE_ZIP_NUM_SSIZE (UNIV_PAGE_SIZE_SHIFT - PAGE_ZIP_MIN_SIZE_SHIFT + 2) +#if PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS) +# error "PAGE_ZIP_NUM_SSIZE > (1 << PAGE_ZIP_SSIZE_BITS)" +#endif + +/* Compressed page descriptor */ +struct page_zip_des_struct +{ + page_zip_t* data; /* compressed page data */ + +#ifdef UNIV_DEBUG + unsigned m_start:16; /* start offset of modification log */ +#endif /* UNIV_DEBUG */ + unsigned m_end:16; /* end offset of modification log */ + unsigned m_nonempty:1; /* TRUE if the modification log + is not empty */ + unsigned n_blobs:12; /* number of externally stored + columns on the page; the maximum + is 744 on a 16 KiB page */ + unsigned ssize:PAGE_ZIP_SSIZE_BITS; + /* 0 or compressed page size; + the size in bytes is + PAGE_ZIP_MIN_SIZE << (ssize - 1). */ +}; + +/** Compression statistics for a given page size */ +struct page_zip_stat_struct { + /** Number of page compressions */ + ulint compressed; + /** Number of successful page compressions */ + ulint compressed_ok; + /** Number of page decompressions */ + ulint decompressed; + /** Duration of page compressions in microseconds */ + ib_uint64_t compressed_usec; + /** Duration of page decompressions in microseconds */ + ib_uint64_t decompressed_usec; +}; + +typedef struct page_zip_stat_struct page_zip_stat_t; + +/** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */ +extern page_zip_stat_t page_zip_stat[PAGE_ZIP_NUM_SSIZE - 1]; + +/************************************************************************** +Write the "deleted" flag of a record on a compressed page. The flag must +already have been written on the uncompressed page. */ +UNIV_INTERN +void +page_zip_rec_set_deleted( +/*=====================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in: record on the uncompressed page */ + ulint flag) /* in: the deleted flag (nonzero=TRUE) */ + __attribute__((nonnull)); + +/************************************************************************** +Write the "owned" flag of a record on a compressed page. The n_owned field +must already have been written on the uncompressed page. */ +UNIV_INTERN +void +page_zip_rec_set_owned( +/*===================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in: record on the uncompressed page */ + ulint flag) /* in: the owned flag (nonzero=TRUE) */ + __attribute__((nonnull)); + +/************************************************************************** +Shift the dense page directory when a record is deleted. */ +UNIV_INTERN +void +page_zip_dir_delete( +/*================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + byte* rec, /* in: deleted record */ + dict_index_t* index, /* in: index of rec */ + const ulint* offsets,/* in: rec_get_offsets(rec) */ + const byte* free) /* in: previous start of the free list */ + __attribute__((nonnull(1,2,3,4))); + +/************************************************************************** +Add a slot to the dense page directory. */ +UNIV_INTERN +void +page_zip_dir_add_slot( +/*==================*/ + page_zip_des_t* page_zip, /* in/out: compressed page */ + ulint is_clustered) /* in: nonzero for clustered index, + zero for others */ + __attribute__((nonnull)); +#endif diff --git a/storage/xtradb/include/page0zip.h b/storage/xtradb/include/page0zip.h new file mode 100644 index 00000000000..0183e013d05 --- /dev/null +++ b/storage/xtradb/include/page0zip.h @@ -0,0 +1,455 @@ +/***************************************************************************** + +Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Compressed page interface + +Created June 2005 by Marko Makela +*******************************************************/ + +#ifndef page0zip_h +#define page0zip_h + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE +#endif + +#include "mtr0types.h" +#include "page0types.h" +#include "buf0types.h" +#include "dict0types.h" +#include "mem0mem.h" + +/************************************************************************** +Determine the size of a compressed page in bytes. */ +UNIV_INLINE +ulint +page_zip_get_size( +/*==============*/ + /* out: size in bytes */ + const page_zip_des_t* page_zip) /* in: compressed page */ + __attribute__((nonnull, pure)); +/************************************************************************** +Set the size of a compressed page in bytes. */ +UNIV_INLINE +void +page_zip_set_size( +/*==============*/ + page_zip_des_t* page_zip, /* in/out: compressed page */ + ulint size); /* in: size in bytes */ + +/************************************************************************** +Determine if a record is so big that it needs to be stored externally. */ +UNIV_INLINE +ibool +page_zip_rec_needs_ext( +/*===================*/ + /* out: FALSE if the entire record + can be stored locally on the page */ + ulint rec_size, /* in: length of the record in bytes */ + ulint comp, /* in: nonzero=compact format */ + ulint n_fields, /* in: number of fields in the record; + ignored if zip_size == 0 */ + ulint zip_size) /* in: compressed page size in bytes, or 0 */ + __attribute__((const)); + +/************************************************************************** +Determine the guaranteed free space on an empty page. */ +UNIV_INTERN +ulint +page_zip_empty_size( +/*================*/ + /* out: minimum payload size on the page */ + ulint n_fields, /* in: number of columns in the index */ + ulint zip_size) /* in: compressed page size in bytes */ + __attribute__((const)); + +/************************************************************************** +Initialize a compressed page descriptor. */ +UNIV_INLINE +void +page_zip_des_init( +/*==============*/ + page_zip_des_t* page_zip); /* in/out: compressed page + descriptor */ + +/************************************************************************** +Configure the zlib allocator to use the given memory heap. */ +UNIV_INTERN +void +page_zip_set_alloc( +/*===============*/ + void* stream, /* in/out: zlib stream */ + mem_heap_t* heap); /* in: memory heap to use */ + +/************************************************************************** +Compress a page. */ +UNIV_INTERN +ibool +page_zip_compress( +/*==============*/ + /* out: TRUE on success, FALSE on failure; + page_zip will be left intact on failure. */ + page_zip_des_t* page_zip,/* in: size; out: data, n_blobs, + m_start, m_end, m_nonempty */ + const page_t* page, /* in: uncompressed page */ + dict_index_t* index, /* in: index of the B-tree node */ + mtr_t* mtr) /* in: mini-transaction, or NULL */ + __attribute__((nonnull(1,2,3))); + +/************************************************************************** +Decompress a page. This function should tolerate errors on the compressed +page. Instead of letting assertions fail, it will return FALSE if an +inconsistency is detected. */ +UNIV_INTERN +ibool +page_zip_decompress( +/*================*/ + /* out: TRUE on success, FALSE on failure */ + page_zip_des_t* page_zip,/* in: data, ssize; + out: m_start, m_end, m_nonempty, n_blobs */ + page_t* page) /* out: uncompressed page, may be trashed */ + __attribute__((nonnull)); + +#ifdef UNIV_DEBUG +/************************************************************************** +Validate a compressed page descriptor. */ +UNIV_INLINE +ibool +page_zip_simple_validate( +/*=====================*/ + /* out: TRUE if ok */ + const page_zip_des_t* page_zip); /* in: compressed page + descriptor */ +#endif /* UNIV_DEBUG */ + +#ifdef UNIV_ZIP_DEBUG +/************************************************************************** +Check that the compressed and decompressed pages match. */ +UNIV_INTERN +ibool +page_zip_validate_low( +/*==================*/ + /* out: TRUE if valid, FALSE if not */ + const page_zip_des_t* page_zip,/* in: compressed page */ + const page_t* page, /* in: uncompressed page */ + ibool sloppy) /* in: FALSE=strict, + TRUE=ignore the MIN_REC_FLAG */ + __attribute__((nonnull)); +/************************************************************************** +Check that the compressed and decompressed pages match. */ +UNIV_INTERN +ibool +page_zip_validate( +/*==============*/ + const page_zip_des_t* page_zip,/* in: compressed page */ + const page_t* page) /* in: uncompressed page */ + __attribute__((nonnull)); +#endif /* UNIV_ZIP_DEBUG */ + +/************************************************************************** +Determine how big record can be inserted without recompressing the page. */ +UNIV_INLINE +lint +page_zip_max_ins_size( +/*==================*/ + /* out: a positive number + indicating the maximum size of + a record whose insertion is + guaranteed to succeed, or + zero or negative */ + const page_zip_des_t* page_zip,/* in: compressed page */ + ibool is_clust)/* in: TRUE if clustered index */ + __attribute__((nonnull, pure)); + +/************************************************************************** +Determine if enough space is available in the modification log. */ +UNIV_INLINE +ibool +page_zip_available( +/*===============*/ + /* out: TRUE if page_zip_write_rec() + will succeed */ + const page_zip_des_t* page_zip,/* in: compressed page */ + ibool is_clust,/* in: TRUE if clustered index */ + ulint length, /* in: combined size of the record */ + ulint create) /* in: nonzero=add the record to + the heap */ + __attribute__((nonnull, pure)); + +/************************************************************************** +Write data to the uncompressed header portion of a page. The data must +already have been written to the uncompressed page. */ +UNIV_INLINE +void +page_zip_write_header( +/*==================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* str, /* in: address on the uncompressed page */ + ulint length, /* in: length of the data */ + mtr_t* mtr) /* in: mini-transaction, or NULL */ + __attribute__((nonnull(1,2))); + +/************************************************************************** +Write an entire record on the compressed page. The data must already +have been written to the uncompressed page. */ +UNIV_INTERN +void +page_zip_write_rec( +/*===============*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in: record being written */ + dict_index_t* index, /* in: the index the record belongs to */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + ulint create) /* in: nonzero=insert, zero=update */ + __attribute__((nonnull)); + +/*************************************************************** +Parses a log record of writing a BLOB pointer of a record. */ +UNIV_INTERN +byte* +page_zip_parse_write_blob_ptr( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: redo log buffer */ + byte* end_ptr,/* in: redo log buffer end */ + page_t* page, /* in/out: uncompressed page */ + page_zip_des_t* page_zip);/* in/out: compressed page */ + +/************************************************************************** +Write a BLOB pointer of a record on the leaf page of a clustered index. +The information must already have been updated on the uncompressed page. */ +UNIV_INTERN +void +page_zip_write_blob_ptr( +/*====================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in/out: record whose data is being + written */ + dict_index_t* index, /* in: index of the page */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + ulint n, /* in: column index */ + mtr_t* mtr) /* in: mini-transaction handle, + or NULL if no logging is needed */ + __attribute__((nonnull(1,2,3,4))); + +/*************************************************************** +Parses a log record of writing the node pointer of a record. */ +UNIV_INTERN +byte* +page_zip_parse_write_node_ptr( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: redo log buffer */ + byte* end_ptr,/* in: redo log buffer end */ + page_t* page, /* in/out: uncompressed page */ + page_zip_des_t* page_zip);/* in/out: compressed page */ + +/************************************************************************** +Write the node pointer of a record on a non-leaf compressed page. */ +UNIV_INTERN +void +page_zip_write_node_ptr( +/*====================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + byte* rec, /* in/out: record */ + ulint size, /* in: data size of rec */ + ulint ptr, /* in: node pointer */ + mtr_t* mtr) /* in: mini-transaction, or NULL */ + __attribute__((nonnull(1,2))); + +/************************************************************************** +Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ +UNIV_INTERN +void +page_zip_write_trx_id_and_roll_ptr( +/*===============================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + byte* rec, /* in/out: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + ulint trx_id_col,/* in: column number of TRX_ID in rec */ + dulint trx_id, /* in: transaction identifier */ + dulint roll_ptr)/* in: roll_ptr */ + __attribute__((nonnull)); + +/************************************************************************** +Write the "deleted" flag of a record on a compressed page. The flag must +already have been written on the uncompressed page. */ +UNIV_INTERN +void +page_zip_rec_set_deleted( +/*=====================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in: record on the uncompressed page */ + ulint flag) /* in: the deleted flag (nonzero=TRUE) */ + __attribute__((nonnull)); + +/************************************************************************** +Write the "owned" flag of a record on a compressed page. The n_owned field +must already have been written on the uncompressed page. */ +UNIV_INTERN +void +page_zip_rec_set_owned( +/*===================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* rec, /* in: record on the uncompressed page */ + ulint flag) /* in: the owned flag (nonzero=TRUE) */ + __attribute__((nonnull)); + +/************************************************************************** +Insert a record to the dense page directory. */ +UNIV_INTERN +void +page_zip_dir_insert( +/*================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* prev_rec,/* in: record after which to insert */ + const byte* free_rec,/* in: record from which rec was + allocated, or NULL */ + byte* rec); /* in: record to insert */ + +/************************************************************************** +Shift the dense page directory and the array of BLOB pointers +when a record is deleted. */ +UNIV_INTERN +void +page_zip_dir_delete( +/*================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + byte* rec, /* in: deleted record */ + dict_index_t* index, /* in: index of rec */ + const ulint* offsets,/* in: rec_get_offsets(rec) */ + const byte* free) /* in: previous start of the free list */ + __attribute__((nonnull(1,2,3,4))); + +/************************************************************************** +Add a slot to the dense page directory. */ +UNIV_INTERN +void +page_zip_dir_add_slot( +/*==================*/ + page_zip_des_t* page_zip, /* in/out: compressed page */ + ulint is_clustered) /* in: nonzero for clustered index, + zero for others */ + __attribute__((nonnull)); + +/*************************************************************** +Parses a log record of writing to the header of a page. */ +UNIV_INTERN +byte* +page_zip_parse_write_header( +/*========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: redo log buffer */ + byte* end_ptr,/* in: redo log buffer end */ + page_t* page, /* in/out: uncompressed page */ + page_zip_des_t* page_zip);/* in/out: compressed page */ + +/************************************************************************** +Write data to the uncompressed header portion of a page. The data must +already have been written to the uncompressed page. +However, the data portion of the uncompressed page may differ from +the compressed page when a record is being inserted in +page_cur_insert_rec_low(). */ +UNIV_INLINE +void +page_zip_write_header( +/*==================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* str, /* in: address on the uncompressed page */ + ulint length, /* in: length of the data */ + mtr_t* mtr) /* in: mini-transaction, or NULL */ + __attribute__((nonnull(1,2))); + +/************************************************************************** +Reorganize and compress a page. This is a low-level operation for +compressed pages, to be used when page_zip_compress() fails. +On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. +The function btr_page_reorganize() should be preferred whenever possible. +IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a +non-clustered index, the caller must update the insert buffer free +bits in the same mini-transaction in such a way that the modification +will be redo-logged. */ +UNIV_INTERN +ibool +page_zip_reorganize( +/*================*/ + /* out: TRUE on success, FALSE on failure; + page and page_zip will be left intact + on failure. */ + buf_block_t* block, /* in/out: page with compressed page; + on the compressed page, in: size; + out: data, n_blobs, + m_start, m_end, m_nonempty */ + dict_index_t* index, /* in: index of the B-tree node */ + mtr_t* mtr) /* in: mini-transaction */ + __attribute__((nonnull)); +/************************************************************************** +Copy the records of a page byte for byte. Do not copy the page header +or trailer, except those B-tree header fields that are directly +related to the storage of records. Also copy PAGE_MAX_TRX_ID. +NOTE: The caller must update the lock table and the adaptive hash index. */ +UNIV_INTERN +void +page_zip_copy_recs( +/*===============*/ + page_zip_des_t* page_zip, /* out: copy of src_zip + (n_blobs, m_start, m_end, + m_nonempty, data[0..size-1]) */ + page_t* page, /* out: copy of src */ + const page_zip_des_t* src_zip, /* in: compressed page */ + const page_t* src, /* in: page */ + dict_index_t* index, /* in: index of the B-tree */ + mtr_t* mtr) /* in: mini-transaction */ + __attribute__((nonnull(1,2,3,4))); + +/************************************************************************** +Parses a log record of compressing an index page. */ +UNIV_INTERN +byte* +page_zip_parse_compress( +/*====================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* out: uncompressed page */ + page_zip_des_t* page_zip)/* out: compressed page */ + __attribute__((nonnull(1,2))); + +/************************************************************************** +Calculate the compressed page checksum. */ +UNIV_INTERN +ulint +page_zip_calc_checksum( +/*===================*/ + /* out: page checksum */ + const void* data, /* in: compressed page */ + ulint size) /* in: size of compressed page */ + __attribute__((nonnull)); + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE UNIV_INLINE_ORIGINAL +#endif + +#ifndef UNIV_NONINL +# include "page0zip.ic" +#endif + +#endif /* page0zip_h */ diff --git a/storage/xtradb/include/page0zip.ic b/storage/xtradb/include/page0zip.ic new file mode 100644 index 00000000000..3db5f025c31 --- /dev/null +++ b/storage/xtradb/include/page0zip.ic @@ -0,0 +1,398 @@ +/***************************************************************************** + +Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Compressed page interface + +Created June 2005 by Marko Makela +*******************************************************/ + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE +#endif + +#include "page0zip.h" +#include "page0page.h" + +/* The format of compressed pages is as follows. + +The header and trailer of the uncompressed pages, excluding the page +directory in the trailer, are copied as is to the header and trailer +of the compressed page. + +At the end of the compressed page, there is a dense page directory +pointing to every user record contained on the page, including deleted +records on the free list. The dense directory is indexed in the +collation order, i.e., in the order in which the record list is +linked on the uncompressed page. The infimum and supremum records are +excluded. The two most significant bits of the entries are allocated +for the delete-mark and an n_owned flag indicating the last record in +a chain of records pointed to from the sparse page directory on the +uncompressed page. + +The data between PAGE_ZIP_START and the last page directory entry will +be written in compressed format, starting at offset PAGE_DATA. +Infimum and supremum records are not stored. We exclude the +REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered +from the dense page directory stored at the end of the compressed +page. + +The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and +roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of +externally stored columns are stored separately, in ascending order of +heap_no and column index, starting backwards from the dense page +directory. + +The compressed data stream may be followed by a modification log +covering the compressed portion of the page, as follows. + +MODIFICATION LOG ENTRY FORMAT +- write record: + - (heap_no - 1) << 1 (1..2 bytes) + - extra bytes backwards + - data bytes +- clear record: + - (heap_no - 1) << 1 | 1 (1..2 bytes) + +The integer values are stored in a variable-length format: +- 0xxxxxxx: 0..127 +- 1xxxxxxx xxxxxxxx: 0..32767 + +The end of the modification log is marked by a 0 byte. + +In summary, the compressed page looks like this: + +(1) Uncompressed page header (PAGE_DATA bytes) +(2) Compressed index information +(3) Compressed page data +(4) Page modification log (page_zip->m_start..page_zip->m_end) +(5) Empty zero-filled space +(6) BLOB pointers (on leaf pages) + - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column + - in descending collation order +(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, + - indexed by heap_no + - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes + - REC_NODE_PTR_SIZE for non-leaf pages + - 0 otherwise +(8) dense page directory, stored backwards + - n_dense = n_heap - 2 + - existing records in ascending collation order + - deleted records (free list) in link order +*/ + +/* Start offset of the area that will be compressed */ +#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END +/* Size of an compressed page directory entry */ +#define PAGE_ZIP_DIR_SLOT_SIZE 2 +/* Mask of record offsets */ +#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff +/* 'owned' flag */ +#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 +/* 'deleted' flag */ +#define PAGE_ZIP_DIR_SLOT_DEL 0x8000 + +/************************************************************************** +Determine the size of a compressed page in bytes. */ +UNIV_INLINE +ulint +page_zip_get_size( +/*==============*/ + /* out: size in bytes */ + const page_zip_des_t* page_zip) /* in: compressed page */ +{ + ulint size; + + if (UNIV_UNLIKELY(!page_zip->ssize)) { + return(0); + } + + size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize; + + ut_ad(size >= PAGE_ZIP_MIN_SIZE); + ut_ad(size <= UNIV_PAGE_SIZE); + + return(size); +} +/************************************************************************** +Set the size of a compressed page in bytes. */ +UNIV_INLINE +void +page_zip_set_size( +/*==============*/ + page_zip_des_t* page_zip, /* in/out: compressed page */ + ulint size) /* in: size in bytes */ +{ + if (size) { + int ssize; + + ut_ad(ut_is_2pow(size)); + + for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { + } + + page_zip->ssize = ssize; + } else { + page_zip->ssize = 0; + } + + ut_ad(page_zip_get_size(page_zip) == size); +} + +/************************************************************************** +Determine if a record is so big that it needs to be stored externally. */ +UNIV_INLINE +ibool +page_zip_rec_needs_ext( +/*===================*/ + /* out: FALSE if the entire record + can be stored locally on the page */ + ulint rec_size, /* in: length of the record in bytes */ + ulint comp, /* in: nonzero=compact format */ + ulint n_fields, /* in: number of fields in the record; + ignored if zip_size == 0 */ + ulint zip_size) /* in: compressed page size in bytes, or 0 */ +{ + ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); + ut_ad(ut_is_2pow(zip_size)); + ut_ad(comp || !zip_size); + +#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE + if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) { + return(TRUE); + } +#endif + + if (UNIV_UNLIKELY(zip_size)) { + ut_ad(comp); + /* On a compressed page, there is a two-byte entry in + the dense page directory for every record. But there + is no record header. There should be enough room for + one record on an empty leaf page. Subtract 1 byte for + the encoded heap number. Check also the available space + on the uncompressed page. */ + return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2) + >= (page_zip_empty_size(n_fields, zip_size) - 1) + || rec_size >= page_get_free_space_of_empty(TRUE) / 2); + } + + return(rec_size >= page_get_free_space_of_empty(comp) / 2); +} + +#ifdef UNIV_DEBUG +/************************************************************************** +Validate a compressed page descriptor. */ +UNIV_INLINE +ibool +page_zip_simple_validate( +/*=====================*/ + /* out: TRUE if ok */ + const page_zip_des_t* page_zip)/* in: compressed page descriptor */ +{ + ut_ad(page_zip); + ut_ad(page_zip->data); + ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE); + ut_ad(page_zip_get_size(page_zip) + > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); + ut_ad(page_zip->m_start <= page_zip->m_end); + ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); + ut_ad(page_zip->n_blobs + < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); + return(TRUE); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************** +Determine if the length of the page trailer. */ +UNIV_INLINE +ibool +page_zip_get_trailer_len( +/*=====================*/ + /* out: length of the page trailer, + in bytes, not including the terminating + zero byte of the modification log */ + const page_zip_des_t* page_zip,/* in: compressed page */ + ibool is_clust,/* in: TRUE if clustered index */ + ulint* entry_size)/* out: size of the uncompressed + portion of a user record */ +{ + ulint uncompressed_size; + + ut_ad(page_zip_simple_validate(page_zip)); + UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); + + if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) { + uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + + REC_NODE_PTR_SIZE; + ut_ad(!page_zip->n_blobs); + } else if (UNIV_UNLIKELY(is_clust)) { + uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; + } else { + uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; + ut_ad(!page_zip->n_blobs); + } + + if (entry_size) { + *entry_size = uncompressed_size; + } + + return((page_dir_get_n_heap(page_zip->data) - 2) + * uncompressed_size + + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); +} + +/************************************************************************** +Determine how big record can be inserted without recompressing the page. */ +UNIV_INLINE +lint +page_zip_max_ins_size( +/*==================*/ + /* out: a positive number + indicating the maximum size of + a record whose insertion is + guaranteed to succeed, or + zero or negative */ + const page_zip_des_t* page_zip,/* in: compressed page */ + ibool is_clust)/* in: TRUE if clustered index */ +{ + ulint uncompressed_size; + ulint trailer_len; + + trailer_len = page_zip_get_trailer_len(page_zip, is_clust, + &uncompressed_size); + + /* When a record is created, a pointer may be added to + the dense directory. + Likewise, space for the columns that will not be + compressed will be allocated from the page trailer. + Also the BLOB pointers will be allocated from there, but + we may as well count them in the length of the record. */ + + trailer_len += uncompressed_size; + + return((lint) page_zip_get_size(page_zip) + - trailer_len - page_zip->m_end + - (REC_N_NEW_EXTRA_BYTES - 2)); +} + +/************************************************************************** +Determine if enough space is available in the modification log. */ +UNIV_INLINE +ibool +page_zip_available( +/*===============*/ + /* out: TRUE if enough space + is available */ + const page_zip_des_t* page_zip,/* in: compressed page */ + ibool is_clust,/* in: TRUE if clustered index */ + ulint length, /* in: combined size of the record */ + ulint create) /* in: nonzero=add the record to + the heap */ +{ + ulint uncompressed_size; + ulint trailer_len; + + ut_ad(length > REC_N_NEW_EXTRA_BYTES); + + trailer_len = page_zip_get_trailer_len(page_zip, is_clust, + &uncompressed_size); + + /* Subtract the fixed extra bytes and add the maximum + space needed for identifying the record (encoded heap_no). */ + length -= REC_N_NEW_EXTRA_BYTES - 2; + + if (UNIV_UNLIKELY(create)) { + /* When a record is created, a pointer may be added to + the dense directory. + Likewise, space for the columns that will not be + compressed will be allocated from the page trailer. + Also the BLOB pointers will be allocated from there, but + we may as well count them in the length of the record. */ + + trailer_len += uncompressed_size; + } + + return(UNIV_LIKELY(length + + trailer_len + + page_zip->m_end + < page_zip_get_size(page_zip))); +} + +/************************************************************************** +Initialize a compressed page descriptor. */ +UNIV_INLINE +void +page_zip_des_init( +/*==============*/ + page_zip_des_t* page_zip) /* in/out: compressed page + descriptor */ +{ + memset(page_zip, 0, sizeof *page_zip); +} + +/************************************************************************** +Write a log record of writing to the uncompressed header portion of a page. */ +UNIV_INTERN +void +page_zip_write_header_log( +/*======================*/ + const byte* data,/* in: data on the uncompressed page */ + ulint length, /* in: length of the data */ + mtr_t* mtr); /* in: mini-transaction */ + +/************************************************************************** +Write data to the uncompressed header portion of a page. The data must +already have been written to the uncompressed page. +However, the data portion of the uncompressed page may differ from +the compressed page when a record is being inserted in +page_cur_insert_rec_zip(). */ +UNIV_INLINE +void +page_zip_write_header( +/*==================*/ + page_zip_des_t* page_zip,/* in/out: compressed page */ + const byte* str, /* in: address on the uncompressed page */ + ulint length, /* in: length of the data */ + mtr_t* mtr) /* in: mini-transaction, or NULL */ +{ + ulint pos; + + ut_ad(buf_frame_get_page_zip(str) == page_zip); + ut_ad(page_zip_simple_validate(page_zip)); + UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); + + pos = page_offset(str); + + ut_ad(pos < PAGE_DATA); + + memcpy(page_zip->data + pos, str, length); + + /* The following would fail in page_cur_insert_rec_zip(). */ + /* ut_ad(page_zip_validate(page_zip, str - pos)); */ + + if (UNIV_LIKELY_NULL(mtr)) { + page_zip_write_header_log(str, length, mtr); + } +} + +#ifdef UNIV_MATERIALIZE +# undef UNIV_INLINE +# define UNIV_INLINE UNIV_INLINE_ORIGINAL +#endif diff --git a/storage/xtradb/include/pars0grm.h b/storage/xtradb/include/pars0grm.h new file mode 100644 index 00000000000..3de233eed3a --- /dev/null +++ b/storage/xtradb/include/pars0grm.h @@ -0,0 +1,236 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software +Foundation, Inc. + +As a special exception, when this file is copied by Bison into a +Bison output file, you may use that output file without restriction. +This special exception was added by the Free Software Foundation +in version 1.24 of Bison. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/* A Bison parser, made by GNU Bison 1.875d. */ + +/* Tokens. */ +#ifndef YYTOKENTYPE +# define YYTOKENTYPE + /* Put the tokens into the symbol table, so that GDB and other debuggers + know about them. */ + enum yytokentype { + PARS_INT_LIT = 258, + PARS_FLOAT_LIT = 259, + PARS_STR_LIT = 260, + PARS_FIXBINARY_LIT = 261, + PARS_BLOB_LIT = 262, + PARS_NULL_LIT = 263, + PARS_ID_TOKEN = 264, + PARS_AND_TOKEN = 265, + PARS_OR_TOKEN = 266, + PARS_NOT_TOKEN = 267, + PARS_GE_TOKEN = 268, + PARS_LE_TOKEN = 269, + PARS_NE_TOKEN = 270, + PARS_PROCEDURE_TOKEN = 271, + PARS_IN_TOKEN = 272, + PARS_OUT_TOKEN = 273, + PARS_BINARY_TOKEN = 274, + PARS_BLOB_TOKEN = 275, + PARS_INT_TOKEN = 276, + PARS_INTEGER_TOKEN = 277, + PARS_FLOAT_TOKEN = 278, + PARS_CHAR_TOKEN = 279, + PARS_IS_TOKEN = 280, + PARS_BEGIN_TOKEN = 281, + PARS_END_TOKEN = 282, + PARS_IF_TOKEN = 283, + PARS_THEN_TOKEN = 284, + PARS_ELSE_TOKEN = 285, + PARS_ELSIF_TOKEN = 286, + PARS_LOOP_TOKEN = 287, + PARS_WHILE_TOKEN = 288, + PARS_RETURN_TOKEN = 289, + PARS_SELECT_TOKEN = 290, + PARS_SUM_TOKEN = 291, + PARS_COUNT_TOKEN = 292, + PARS_DISTINCT_TOKEN = 293, + PARS_FROM_TOKEN = 294, + PARS_WHERE_TOKEN = 295, + PARS_FOR_TOKEN = 296, + PARS_DDOT_TOKEN = 297, + PARS_READ_TOKEN = 298, + PARS_ORDER_TOKEN = 299, + PARS_BY_TOKEN = 300, + PARS_ASC_TOKEN = 301, + PARS_DESC_TOKEN = 302, + PARS_INSERT_TOKEN = 303, + PARS_INTO_TOKEN = 304, + PARS_VALUES_TOKEN = 305, + PARS_UPDATE_TOKEN = 306, + PARS_SET_TOKEN = 307, + PARS_DELETE_TOKEN = 308, + PARS_CURRENT_TOKEN = 309, + PARS_OF_TOKEN = 310, + PARS_CREATE_TOKEN = 311, + PARS_TABLE_TOKEN = 312, + PARS_INDEX_TOKEN = 313, + PARS_UNIQUE_TOKEN = 314, + PARS_CLUSTERED_TOKEN = 315, + PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, + PARS_ON_TOKEN = 317, + PARS_ASSIGN_TOKEN = 318, + PARS_DECLARE_TOKEN = 319, + PARS_CURSOR_TOKEN = 320, + PARS_SQL_TOKEN = 321, + PARS_OPEN_TOKEN = 322, + PARS_FETCH_TOKEN = 323, + PARS_CLOSE_TOKEN = 324, + PARS_NOTFOUND_TOKEN = 325, + PARS_TO_CHAR_TOKEN = 326, + PARS_TO_NUMBER_TOKEN = 327, + PARS_TO_BINARY_TOKEN = 328, + PARS_BINARY_TO_NUMBER_TOKEN = 329, + PARS_SUBSTR_TOKEN = 330, + PARS_REPLSTR_TOKEN = 331, + PARS_CONCAT_TOKEN = 332, + PARS_INSTR_TOKEN = 333, + PARS_LENGTH_TOKEN = 334, + PARS_SYSDATE_TOKEN = 335, + PARS_PRINTF_TOKEN = 336, + PARS_ASSERT_TOKEN = 337, + PARS_RND_TOKEN = 338, + PARS_RND_STR_TOKEN = 339, + PARS_ROW_PRINTF_TOKEN = 340, + PARS_COMMIT_TOKEN = 341, + PARS_ROLLBACK_TOKEN = 342, + PARS_WORK_TOKEN = 343, + PARS_UNSIGNED_TOKEN = 344, + PARS_EXIT_TOKEN = 345, + PARS_FUNCTION_TOKEN = 346, + PARS_LOCK_TOKEN = 347, + PARS_SHARE_TOKEN = 348, + PARS_MODE_TOKEN = 349, + NEG = 350 + }; +#endif +#define PARS_INT_LIT 258 +#define PARS_FLOAT_LIT 259 +#define PARS_STR_LIT 260 +#define PARS_FIXBINARY_LIT 261 +#define PARS_BLOB_LIT 262 +#define PARS_NULL_LIT 263 +#define PARS_ID_TOKEN 264 +#define PARS_AND_TOKEN 265 +#define PARS_OR_TOKEN 266 +#define PARS_NOT_TOKEN 267 +#define PARS_GE_TOKEN 268 +#define PARS_LE_TOKEN 269 +#define PARS_NE_TOKEN 270 +#define PARS_PROCEDURE_TOKEN 271 +#define PARS_IN_TOKEN 272 +#define PARS_OUT_TOKEN 273 +#define PARS_BINARY_TOKEN 274 +#define PARS_BLOB_TOKEN 275 +#define PARS_INT_TOKEN 276 +#define PARS_INTEGER_TOKEN 277 +#define PARS_FLOAT_TOKEN 278 +#define PARS_CHAR_TOKEN 279 +#define PARS_IS_TOKEN 280 +#define PARS_BEGIN_TOKEN 281 +#define PARS_END_TOKEN 282 +#define PARS_IF_TOKEN 283 +#define PARS_THEN_TOKEN 284 +#define PARS_ELSE_TOKEN 285 +#define PARS_ELSIF_TOKEN 286 +#define PARS_LOOP_TOKEN 287 +#define PARS_WHILE_TOKEN 288 +#define PARS_RETURN_TOKEN 289 +#define PARS_SELECT_TOKEN 290 +#define PARS_SUM_TOKEN 291 +#define PARS_COUNT_TOKEN 292 +#define PARS_DISTINCT_TOKEN 293 +#define PARS_FROM_TOKEN 294 +#define PARS_WHERE_TOKEN 295 +#define PARS_FOR_TOKEN 296 +#define PARS_DDOT_TOKEN 297 +#define PARS_READ_TOKEN 298 +#define PARS_ORDER_TOKEN 299 +#define PARS_BY_TOKEN 300 +#define PARS_ASC_TOKEN 301 +#define PARS_DESC_TOKEN 302 +#define PARS_INSERT_TOKEN 303 +#define PARS_INTO_TOKEN 304 +#define PARS_VALUES_TOKEN 305 +#define PARS_UPDATE_TOKEN 306 +#define PARS_SET_TOKEN 307 +#define PARS_DELETE_TOKEN 308 +#define PARS_CURRENT_TOKEN 309 +#define PARS_OF_TOKEN 310 +#define PARS_CREATE_TOKEN 311 +#define PARS_TABLE_TOKEN 312 +#define PARS_INDEX_TOKEN 313 +#define PARS_UNIQUE_TOKEN 314 +#define PARS_CLUSTERED_TOKEN 315 +#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 +#define PARS_ON_TOKEN 317 +#define PARS_ASSIGN_TOKEN 318 +#define PARS_DECLARE_TOKEN 319 +#define PARS_CURSOR_TOKEN 320 +#define PARS_SQL_TOKEN 321 +#define PARS_OPEN_TOKEN 322 +#define PARS_FETCH_TOKEN 323 +#define PARS_CLOSE_TOKEN 324 +#define PARS_NOTFOUND_TOKEN 325 +#define PARS_TO_CHAR_TOKEN 326 +#define PARS_TO_NUMBER_TOKEN 327 +#define PARS_TO_BINARY_TOKEN 328 +#define PARS_BINARY_TO_NUMBER_TOKEN 329 +#define PARS_SUBSTR_TOKEN 330 +#define PARS_REPLSTR_TOKEN 331 +#define PARS_CONCAT_TOKEN 332 +#define PARS_INSTR_TOKEN 333 +#define PARS_LENGTH_TOKEN 334 +#define PARS_SYSDATE_TOKEN 335 +#define PARS_PRINTF_TOKEN 336 +#define PARS_ASSERT_TOKEN 337 +#define PARS_RND_TOKEN 338 +#define PARS_RND_STR_TOKEN 339 +#define PARS_ROW_PRINTF_TOKEN 340 +#define PARS_COMMIT_TOKEN 341 +#define PARS_ROLLBACK_TOKEN 342 +#define PARS_WORK_TOKEN 343 +#define PARS_UNSIGNED_TOKEN 344 +#define PARS_EXIT_TOKEN 345 +#define PARS_FUNCTION_TOKEN 346 +#define PARS_LOCK_TOKEN 347 +#define PARS_SHARE_TOKEN 348 +#define PARS_MODE_TOKEN 349 +#define NEG 350 + + + + +#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) +typedef int YYSTYPE; +# define yystype YYSTYPE /* obsolescent; will be withdrawn */ +# define YYSTYPE_IS_DECLARED 1 +# define YYSTYPE_IS_TRIVIAL 1 +#endif + +extern YYSTYPE yylval; + + + diff --git a/storage/xtradb/include/pars0opt.h b/storage/xtradb/include/pars0opt.h new file mode 100644 index 00000000000..02524e9d893 --- /dev/null +++ b/storage/xtradb/include/pars0opt.h @@ -0,0 +1,74 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Simple SQL optimizer + +Created 12/21/1997 Heikki Tuuri +*******************************************************/ + +#ifndef pars0opt_h +#define pars0opt_h + +#include "univ.i" +#include "que0types.h" +#include "usr0types.h" +#include "pars0sym.h" +#include "dict0types.h" +#include "row0sel.h" + +/*********************************************************************** +Optimizes a select. Decides which indexes to tables to use. The tables +are accessed in the order that they were written to the FROM part in the +select statement. */ +UNIV_INTERN +void +opt_search_plan( +/*============*/ + sel_node_t* sel_node); /* in: parsed select node */ +/*********************************************************************** +Looks for occurrences of the columns of the table in the query subgraph and +adds them to the list of columns if an occurrence of the same column does not +already exist in the list. If the column is already in the list, puts a value +indirection to point to the occurrence in the column list, except if the +column occurrence we are looking at is in the column list, in which case +nothing is done. */ +UNIV_INTERN +void +opt_find_all_cols( +/*==============*/ + ibool copy_val, /* in: if TRUE, new found columns are + added as columns to copy */ + dict_index_t* index, /* in: index to use */ + sym_node_list_t* col_list, /* in: base node of a list where + to add new found columns */ + plan_t* plan, /* in: plan or NULL */ + que_node_t* exp); /* in: expression or condition */ +/************************************************************************ +Prints info of a query plan. */ +UNIV_INTERN +void +opt_print_query_plan( +/*=================*/ + sel_node_t* sel_node); /* in: select node */ + +#ifndef UNIV_NONINL +#include "pars0opt.ic" +#endif + +#endif diff --git a/storage/xtradb/include/pars0opt.ic b/storage/xtradb/include/pars0opt.ic new file mode 100644 index 00000000000..35653453b30 --- /dev/null +++ b/storage/xtradb/include/pars0opt.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Simple SQL optimizer + +Created 12/21/1997 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/pars0pars.h b/storage/xtradb/include/pars0pars.h new file mode 100644 index 00000000000..e5693ee5575 --- /dev/null +++ b/storage/xtradb/include/pars0pars.h @@ -0,0 +1,747 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL parser + +Created 11/19/1996 Heikki Tuuri +*******************************************************/ + +#ifndef pars0pars_h +#define pars0pars_h + +#include "univ.i" +#include "que0types.h" +#include "usr0types.h" +#include "pars0types.h" +#include "row0types.h" +#include "trx0types.h" +#include "ut0vec.h" + +/* Type of the user functions. The first argument is always InnoDB-supplied +and varies in type, while 'user_arg' is a user-supplied argument. The +meaning of the return type also varies. See the individual use cases, e.g. +the FETCH statement, for details on them. */ +typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg); + +extern int yydebug; + +/* If the following is set TRUE, the lexer will print the SQL string +as it tokenizes it */ + +#ifdef UNIV_SQL_DEBUG +extern ibool pars_print_lexed; +#endif /* UNIV_SQL_DEBUG */ + +/* Global variable used while parsing a single procedure or query : the code is +NOT re-entrant */ +extern sym_tab_t* pars_sym_tab_global; + +extern pars_res_word_t pars_to_char_token; +extern pars_res_word_t pars_to_number_token; +extern pars_res_word_t pars_to_binary_token; +extern pars_res_word_t pars_binary_to_number_token; +extern pars_res_word_t pars_substr_token; +extern pars_res_word_t pars_replstr_token; +extern pars_res_word_t pars_concat_token; +extern pars_res_word_t pars_length_token; +extern pars_res_word_t pars_instr_token; +extern pars_res_word_t pars_sysdate_token; +extern pars_res_word_t pars_printf_token; +extern pars_res_word_t pars_assert_token; +extern pars_res_word_t pars_rnd_token; +extern pars_res_word_t pars_rnd_str_token; +extern pars_res_word_t pars_count_token; +extern pars_res_word_t pars_sum_token; +extern pars_res_word_t pars_distinct_token; +extern pars_res_word_t pars_binary_token; +extern pars_res_word_t pars_blob_token; +extern pars_res_word_t pars_int_token; +extern pars_res_word_t pars_char_token; +extern pars_res_word_t pars_float_token; +extern pars_res_word_t pars_update_token; +extern pars_res_word_t pars_asc_token; +extern pars_res_word_t pars_desc_token; +extern pars_res_word_t pars_open_token; +extern pars_res_word_t pars_close_token; +extern pars_res_word_t pars_share_token; +extern pars_res_word_t pars_unique_token; +extern pars_res_word_t pars_clustered_token; + +extern ulint pars_star_denoter; + +/* Procedure parameter types */ +#define PARS_INPUT 0 +#define PARS_OUTPUT 1 +#define PARS_NOT_PARAM 2 + +int +yyparse(void); + +/***************************************************************** +Parses an SQL string returning the query graph. */ +UNIV_INTERN +que_t* +pars_sql( +/*=====*/ + /* out, own: the query graph */ + pars_info_t* info, /* in: extra information, or NULL */ + const char* str); /* in: SQL string */ +/***************************************************************** +Retrieves characters to the lexical analyzer. */ +UNIV_INTERN +void +pars_get_lex_chars( +/*===============*/ + char* buf, /* in/out: buffer where to copy */ + int* result, /* out: number of characters copied or EOF */ + int max_size); /* in: maximum number of characters which fit + in the buffer */ +/***************************************************************** +Called by yyparse on error. */ +UNIV_INTERN +void +yyerror( +/*====*/ + const char* s); /* in: error message string */ +/************************************************************************* +Parses a variable declaration. */ +UNIV_INTERN +sym_node_t* +pars_variable_declaration( +/*======================*/ + /* out, own: symbol table node of type + SYM_VAR */ + sym_node_t* node, /* in: symbol table node allocated for the + id of the variable */ + pars_res_word_t* type); /* in: pointer to a type token */ +/************************************************************************* +Parses a function expression. */ +UNIV_INTERN +func_node_t* +pars_func( +/*======*/ + /* out, own: function node in a query tree */ + que_node_t* res_word,/* in: function name reserved word */ + que_node_t* arg); /* in: first argument in the argument list */ +/************************************************************************* +Parses an operator expression. */ +UNIV_INTERN +func_node_t* +pars_op( +/*====*/ + /* out, own: function node in a query tree */ + int func, /* in: operator token code */ + que_node_t* arg1, /* in: first argument */ + que_node_t* arg2); /* in: second argument or NULL for an unary + operator */ +/************************************************************************* +Parses an ORDER BY clause. Order by a single column only is supported. */ +UNIV_INTERN +order_node_t* +pars_order_by( +/*==========*/ + /* out, own: order-by node in a query tree */ + sym_node_t* column, /* in: column name */ + pars_res_word_t* asc); /* in: &pars_asc_token or pars_desc_token */ +/************************************************************************* +Parses a select list; creates a query graph node for the whole SELECT +statement. */ +UNIV_INTERN +sel_node_t* +pars_select_list( +/*=============*/ + /* out, own: select node in a query + tree */ + que_node_t* select_list, /* in: select list */ + sym_node_t* into_list); /* in: variables list or NULL */ +/************************************************************************* +Parses a cursor declaration. */ +UNIV_INTERN +que_node_t* +pars_cursor_declaration( +/*====================*/ + /* out: sym_node */ + sym_node_t* sym_node, /* in: cursor id node in the symbol + table */ + sel_node_t* select_node); /* in: select node */ +/************************************************************************* +Parses a function declaration. */ +UNIV_INTERN +que_node_t* +pars_function_declaration( +/*======================*/ + /* out: sym_node */ + sym_node_t* sym_node); /* in: function id node in the symbol + table */ +/************************************************************************* +Parses a select statement. */ +UNIV_INTERN +sel_node_t* +pars_select_statement( +/*==================*/ + /* out, own: select node in a query + tree */ + sel_node_t* select_node, /* in: select node already containing + the select list */ + sym_node_t* table_list, /* in: table list */ + que_node_t* search_cond, /* in: search condition or NULL */ + pars_res_word_t* for_update, /* in: NULL or &pars_update_token */ + pars_res_word_t* consistent_read,/* in: NULL or + &pars_consistent_token */ + order_node_t* order_by); /* in: NULL or an order-by node */ +/************************************************************************* +Parses a column assignment in an update. */ +UNIV_INTERN +col_assign_node_t* +pars_column_assignment( +/*===================*/ + /* out: column assignment node */ + sym_node_t* column, /* in: column to assign */ + que_node_t* exp); /* in: value to assign */ +/************************************************************************* +Parses a delete or update statement start. */ +UNIV_INTERN +upd_node_t* +pars_update_statement_start( +/*========================*/ + /* out, own: update node in a query + tree */ + ibool is_delete, /* in: TRUE if delete */ + sym_node_t* table_sym, /* in: table name node */ + col_assign_node_t* col_assign_list);/* in: column assignment list, NULL + if delete */ +/************************************************************************* +Parses an update or delete statement. */ +UNIV_INTERN +upd_node_t* +pars_update_statement( +/*==================*/ + /* out, own: update node in a query + tree */ + upd_node_t* node, /* in: update node */ + sym_node_t* cursor_sym, /* in: pointer to a cursor entry in + the symbol table or NULL */ + que_node_t* search_cond); /* in: search condition or NULL */ +/************************************************************************* +Parses an insert statement. */ +UNIV_INTERN +ins_node_t* +pars_insert_statement( +/*==================*/ + /* out, own: update node in a query + tree */ + sym_node_t* table_sym, /* in: table name node */ + que_node_t* values_list, /* in: value expression list or NULL */ + sel_node_t* select); /* in: select condition or NULL */ +/************************************************************************* +Parses a procedure parameter declaration. */ +UNIV_INTERN +sym_node_t* +pars_parameter_declaration( +/*=======================*/ + /* out, own: symbol table node of type + SYM_VAR */ + sym_node_t* node, /* in: symbol table node allocated for the + id of the parameter */ + ulint param_type, + /* in: PARS_INPUT or PARS_OUTPUT */ + pars_res_word_t* type); /* in: pointer to a type token */ +/************************************************************************* +Parses an elsif element. */ +UNIV_INTERN +elsif_node_t* +pars_elsif_element( +/*===============*/ + /* out: elsif node */ + que_node_t* cond, /* in: if-condition */ + que_node_t* stat_list); /* in: statement list */ +/************************************************************************* +Parses an if-statement. */ +UNIV_INTERN +if_node_t* +pars_if_statement( +/*==============*/ + /* out: if-statement node */ + que_node_t* cond, /* in: if-condition */ + que_node_t* stat_list, /* in: statement list */ + que_node_t* else_part); /* in: else-part statement list */ +/************************************************************************* +Parses a for-loop-statement. */ +UNIV_INTERN +for_node_t* +pars_for_statement( +/*===============*/ + /* out: for-statement node */ + sym_node_t* loop_var, /* in: loop variable */ + que_node_t* loop_start_limit,/* in: loop start expression */ + que_node_t* loop_end_limit, /* in: loop end expression */ + que_node_t* stat_list); /* in: statement list */ +/************************************************************************* +Parses a while-statement. */ +UNIV_INTERN +while_node_t* +pars_while_statement( +/*=================*/ + /* out: while-statement node */ + que_node_t* cond, /* in: while-condition */ + que_node_t* stat_list); /* in: statement list */ +/************************************************************************* +Parses an exit statement. */ +UNIV_INTERN +exit_node_t* +pars_exit_statement(void); +/*=====================*/ + /* out: exit statement node */ +/************************************************************************* +Parses a return-statement. */ +UNIV_INTERN +return_node_t* +pars_return_statement(void); +/*=======================*/ + /* out: return-statement node */ +/************************************************************************* +Parses a procedure call. */ +UNIV_INTERN +func_node_t* +pars_procedure_call( +/*================*/ + /* out: function node */ + que_node_t* res_word,/* in: procedure name reserved word */ + que_node_t* args); /* in: argument list */ +/************************************************************************* +Parses an assignment statement. */ +UNIV_INTERN +assign_node_t* +pars_assignment_statement( +/*======================*/ + /* out: assignment statement node */ + sym_node_t* var, /* in: variable to assign */ + que_node_t* val); /* in: value to assign */ +/************************************************************************* +Parses a fetch statement. into_list or user_func (but not both) must be +non-NULL. */ +UNIV_INTERN +fetch_node_t* +pars_fetch_statement( +/*=================*/ + /* out: fetch statement node */ + sym_node_t* cursor, /* in: cursor node */ + sym_node_t* into_list, /* in: variables to set, or NULL */ + sym_node_t* user_func); /* in: user function name, or NULL */ +/************************************************************************* +Parses an open or close cursor statement. */ +UNIV_INTERN +open_node_t* +pars_open_statement( +/*================*/ + /* out: fetch statement node */ + ulint type, /* in: ROW_SEL_OPEN_CURSOR + or ROW_SEL_CLOSE_CURSOR */ + sym_node_t* cursor); /* in: cursor node */ +/************************************************************************* +Parses a row_printf-statement. */ +UNIV_INTERN +row_printf_node_t* +pars_row_printf_statement( +/*======================*/ + /* out: row_printf-statement node */ + sel_node_t* sel_node); /* in: select node */ +/************************************************************************* +Parses a commit statement. */ +UNIV_INTERN +commit_node_t* +pars_commit_statement(void); +/*=======================*/ +/************************************************************************* +Parses a rollback statement. */ +UNIV_INTERN +roll_node_t* +pars_rollback_statement(void); +/*=========================*/ +/************************************************************************* +Parses a column definition at a table creation. */ +UNIV_INTERN +sym_node_t* +pars_column_def( +/*============*/ + /* out: column sym table + node */ + sym_node_t* sym_node, /* in: column node in the + symbol table */ + pars_res_word_t* type, /* in: data type */ + sym_node_t* len, /* in: length of column, or + NULL */ + void* is_unsigned, /* in: if not NULL, column + is of type UNSIGNED. */ + void* is_not_null); /* in: if not NULL, column + is of type NOT NULL. */ +/************************************************************************* +Parses a table creation operation. */ +UNIV_INTERN +tab_node_t* +pars_create_table( +/*==============*/ + /* out: table create subgraph */ + sym_node_t* table_sym, /* in: table name node in the symbol + table */ + sym_node_t* column_defs, /* in: list of column names */ + void* not_fit_in_memory);/* in: a non-NULL pointer means that + this is a table which in simulations + should be simulated as not fitting + in memory; thread is put to sleep + to simulate disk accesses; NOTE that + this flag is not stored to the data + dictionary on disk, and the database + will forget about non-NULL value if + it has to reload the table definition + from disk */ +/************************************************************************* +Parses an index creation operation. */ +UNIV_INTERN +ind_node_t* +pars_create_index( +/*==============*/ + /* out: index create subgraph */ + pars_res_word_t* unique_def, /* in: not NULL if a unique index */ + pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */ + sym_node_t* index_sym, /* in: index name node in the symbol + table */ + sym_node_t* table_sym, /* in: table name node in the symbol + table */ + sym_node_t* column_list); /* in: list of column names */ +/************************************************************************* +Parses a procedure definition. */ +UNIV_INTERN +que_fork_t* +pars_procedure_definition( +/*======================*/ + /* out: query fork node */ + sym_node_t* sym_node, /* in: procedure id node in the symbol + table */ + sym_node_t* param_list, /* in: parameter declaration list */ + que_node_t* stat_list); /* in: statement list */ + +/***************************************************************** +Parses a stored procedure call, when this is not within another stored +procedure, that is, the client issues a procedure call directly. +In MySQL/InnoDB, stored InnoDB procedures are invoked via the +parsed procedure tree, not via InnoDB SQL, so this function is not used. */ +UNIV_INTERN +que_fork_t* +pars_stored_procedure_call( +/*=======================*/ + /* out: query graph */ + sym_node_t* sym_node); /* in: stored procedure name */ +/********************************************************************** +Completes a query graph by adding query thread and fork nodes +above it and prepares the graph for running. The fork created is of +type QUE_FORK_MYSQL_INTERFACE. */ +UNIV_INTERN +que_thr_t* +pars_complete_graph_for_exec( +/*=========================*/ + /* out: query thread node to run */ + que_node_t* node, /* in: root node for an incomplete + query graph */ + trx_t* trx, /* in: transaction handle */ + mem_heap_t* heap); /* in: memory heap from which allocated */ + +/******************************************************************** +Create parser info struct.*/ +UNIV_INTERN +pars_info_t* +pars_info_create(void); +/*==================*/ + /* out, own: info struct */ + +/******************************************************************** +Free info struct and everything it contains.*/ +UNIV_INTERN +void +pars_info_free( +/*===========*/ + pars_info_t* info); /* in: info struct */ + +/******************************************************************** +Add bound literal. */ +UNIV_INTERN +void +pars_info_add_literal( +/*==================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + const void* address, /* in: address */ + ulint length, /* in: length of data */ + ulint type, /* in: type, e.g. DATA_FIXBINARY */ + ulint prtype); /* in: precise type, e.g. + DATA_UNSIGNED */ + +/******************************************************************** +Equivalent to pars_info_add_literal(info, name, str, strlen(str), +DATA_VARCHAR, DATA_ENGLISH). */ +UNIV_INTERN +void +pars_info_add_str_literal( +/*======================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + const char* str); /* in: string */ + +/******************************************************************** +Equivalent to: + +char buf[4]; +mach_write_to_4(buf, val); +pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); + +except that the buffer is dynamically allocated from the info struct's +heap. */ +UNIV_INTERN +void +pars_info_add_int4_literal( +/*=======================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + lint val); /* in: value */ + +/******************************************************************** +Equivalent to: + +char buf[8]; +mach_write_to_8(buf, val); +pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0); + +except that the buffer is dynamically allocated from the info struct's +heap. */ +UNIV_INTERN +void +pars_info_add_dulint_literal( +/*=========================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + dulint val); /* in: value */ +/******************************************************************** +Add user function. */ +UNIV_INTERN +void +pars_info_add_function( +/*===================*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: function name */ + pars_user_func_cb_t func, /* in: function address */ + void* arg); /* in: user-supplied argument */ + +/******************************************************************** +Add bound id. */ +UNIV_INTERN +void +pars_info_add_id( +/*=============*/ + pars_info_t* info, /* in: info struct */ + const char* name, /* in: name */ + const char* id); /* in: id */ + +/******************************************************************** +Get user function with the given name.*/ +UNIV_INTERN +pars_user_func_t* +pars_info_get_user_func( +/*====================*/ + /* out: user func, or NULL if not + found */ + pars_info_t* info, /* in: info struct */ + const char* name); /* in: function name to find*/ + +/******************************************************************** +Get bound literal with the given name.*/ +UNIV_INTERN +pars_bound_lit_t* +pars_info_get_bound_lit( +/*====================*/ + /* out: bound literal, or NULL if + not found */ + pars_info_t* info, /* in: info struct */ + const char* name); /* in: bound literal name to find */ + +/******************************************************************** +Get bound id with the given name.*/ +UNIV_INTERN +pars_bound_id_t* +pars_info_get_bound_id( +/*===================*/ + /* out: bound id, or NULL if not + found */ + pars_info_t* info, /* in: info struct */ + const char* name); /* in: bound id name to find */ + + +/* Extra information supplied for pars_sql(). */ +struct pars_info_struct { + mem_heap_t* heap; /* our own memory heap */ + + ib_vector_t* funcs; /* user functions, or NUll + (pars_user_func_t*) */ + ib_vector_t* bound_lits; /* bound literals, or NULL + (pars_bound_lit_t*) */ + ib_vector_t* bound_ids; /* bound ids, or NULL + (pars_bound_id_t*) */ + + ibool graph_owns_us; /* if TRUE (which is the default), + que_graph_free() will free us */ +}; + +/* User-supplied function and argument. */ +struct pars_user_func_struct { + const char* name; /* function name */ + pars_user_func_cb_t func; /* function address */ + void* arg; /* user-supplied argument */ +}; + +/* Bound literal. */ +struct pars_bound_lit_struct { + const char* name; /* name */ + const void* address; /* address */ + ulint length; /* length of data */ + ulint type; /* type, e.g. DATA_FIXBINARY */ + ulint prtype; /* precise type, e.g. DATA_UNSIGNED */ +}; + +/* Bound id. */ +struct pars_bound_id_struct { + const char* name; /* name */ + const char* id; /* id */ +}; + +/* Struct used to denote a reserved word in a parsing tree */ +struct pars_res_word_struct{ + int code; /* the token code for the reserved word from + pars0grm.h */ +}; + +/* A predefined function or operator node in a parsing tree; this construct +is also used for some non-functions like the assignment ':=' */ +struct func_node_struct{ + que_common_t common; /* type: QUE_NODE_FUNC */ + int func; /* token code of the function name */ + ulint class; /* class of the function */ + que_node_t* args; /* argument(s) of the function */ + UT_LIST_NODE_T(func_node_t) cond_list; + /* list of comparison conditions; defined + only for comparison operator nodes except, + presently, for OPT_SCROLL_TYPE ones */ + UT_LIST_NODE_T(func_node_t) func_node_list; + /* list of function nodes in a parsed + query graph */ +}; + +/* An order-by node in a select */ +struct order_node_struct{ + que_common_t common; /* type: QUE_NODE_ORDER */ + sym_node_t* column; /* order-by column */ + ibool asc; /* TRUE if ascending, FALSE if descending */ +}; + +/* Procedure definition node */ +struct proc_node_struct{ + que_common_t common; /* type: QUE_NODE_PROC */ + sym_node_t* proc_id; /* procedure name symbol in the symbol + table of this same procedure */ + sym_node_t* param_list; /* input and output parameters */ + que_node_t* stat_list; /* statement list */ + sym_tab_t* sym_tab; /* symbol table of this procedure */ +}; + +/* elsif-element node */ +struct elsif_node_struct{ + que_common_t common; /* type: QUE_NODE_ELSIF */ + que_node_t* cond; /* if condition */ + que_node_t* stat_list; /* statement list */ +}; + +/* if-statement node */ +struct if_node_struct{ + que_common_t common; /* type: QUE_NODE_IF */ + que_node_t* cond; /* if condition */ + que_node_t* stat_list; /* statement list */ + que_node_t* else_part; /* else-part statement list */ + elsif_node_t* elsif_list; /* elsif element list */ +}; + +/* while-statement node */ +struct while_node_struct{ + que_common_t common; /* type: QUE_NODE_WHILE */ + que_node_t* cond; /* while condition */ + que_node_t* stat_list; /* statement list */ +}; + +/* for-loop-statement node */ +struct for_node_struct{ + que_common_t common; /* type: QUE_NODE_FOR */ + sym_node_t* loop_var; /* loop variable: this is the + dereferenced symbol from the + variable declarations, not the + symbol occurrence in the for loop + definition */ + que_node_t* loop_start_limit;/* initial value of loop variable */ + que_node_t* loop_end_limit; /* end value of loop variable */ + int loop_end_value; /* evaluated value for the end value: + it is calculated only when the loop + is entered, and will not change within + the loop */ + que_node_t* stat_list; /* statement list */ +}; + +/* exit statement node */ +struct exit_node_struct{ + que_common_t common; /* type: QUE_NODE_EXIT */ +}; + +/* return-statement node */ +struct return_node_struct{ + que_common_t common; /* type: QUE_NODE_RETURN */ +}; + +/* Assignment statement node */ +struct assign_node_struct{ + que_common_t common; /* type: QUE_NODE_ASSIGNMENT */ + sym_node_t* var; /* variable to set */ + que_node_t* val; /* value to assign */ +}; + +/* Column assignment node */ +struct col_assign_node_struct{ + que_common_t common; /* type: QUE_NODE_COL_ASSIGN */ + sym_node_t* col; /* column to set */ + que_node_t* val; /* value to assign */ +}; + +/* Classes of functions */ +#define PARS_FUNC_ARITH 1 /* +, -, *, / */ +#define PARS_FUNC_LOGICAL 2 +#define PARS_FUNC_CMP 3 +#define PARS_FUNC_PREDEFINED 4 /* TO_NUMBER, SUBSTR, ... */ +#define PARS_FUNC_AGGREGATE 5 /* COUNT, DISTINCT, SUM */ +#define PARS_FUNC_OTHER 6 /* these are not real functions, + e.g., := */ + +#ifndef UNIV_NONINL +#include "pars0pars.ic" +#endif + +#endif diff --git a/storage/xtradb/include/pars0pars.ic b/storage/xtradb/include/pars0pars.ic new file mode 100644 index 00000000000..3a55ad86f48 --- /dev/null +++ b/storage/xtradb/include/pars0pars.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL parser + +Created 11/19/1996 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/pars0sym.h b/storage/xtradb/include/pars0sym.h new file mode 100644 index 00000000000..69227a2917e --- /dev/null +++ b/storage/xtradb/include/pars0sym.h @@ -0,0 +1,239 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL parser symbol table + +Created 12/15/1997 Heikki Tuuri +*******************************************************/ + +#ifndef pars0sym_h +#define pars0sym_h + +#include "univ.i" +#include "que0types.h" +#include "usr0types.h" +#include "dict0types.h" +#include "pars0types.h" +#include "row0types.h" + +/********************************************************************** +Creates a symbol table for a single stored procedure or query. */ +UNIV_INTERN +sym_tab_t* +sym_tab_create( +/*===========*/ + /* out, own: symbol table */ + mem_heap_t* heap); /* in: memory heap where to create */ +/********************************************************************** +Frees the memory allocated dynamically AFTER parsing phase for variables +etc. in the symbol table. Does not free the mem heap where the table was +originally created. Frees also SQL explicit cursor definitions. */ +UNIV_INTERN +void +sym_tab_free_private( +/*=================*/ + sym_tab_t* sym_tab); /* in, own: symbol table */ +/********************************************************************** +Adds an integer literal to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_int_lit( +/*================*/ + /* out: symbol table node */ + sym_tab_t* sym_tab, /* in: symbol table */ + ulint val); /* in: integer value */ +/********************************************************************** +Adds an string literal to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_str_lit( +/*================*/ + /* out: symbol table node */ + sym_tab_t* sym_tab, /* in: symbol table */ + byte* str, /* in: string with no quotes around + it */ + ulint len); /* in: string length */ +/********************************************************************** +Add a bound literal to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_bound_lit( +/*==================*/ + /* out: symbol table node */ + sym_tab_t* sym_tab, /* in: symbol table */ + const char* name, /* in: name of bound literal */ + ulint* lit_type); /* out: type of literal (PARS_*_LIT) */ +/********************************************************************** +Adds an SQL null literal to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_null_lit( +/*=================*/ + /* out: symbol table node */ + sym_tab_t* sym_tab); /* in: symbol table */ +/********************************************************************** +Adds an identifier to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_id( +/*===========*/ + /* out: symbol table node */ + sym_tab_t* sym_tab, /* in: symbol table */ + byte* name, /* in: identifier name */ + ulint len); /* in: identifier length */ + +/********************************************************************** +Add a bound identifier to a symbol table. */ +UNIV_INTERN +sym_node_t* +sym_tab_add_bound_id( +/*===========*/ + /* out: symbol table node */ + sym_tab_t* sym_tab, /* in: symbol table */ + const char* name); /* in: name of bound id */ + +#define SYM_CLUST_FIELD_NO 0 +#define SYM_SEC_FIELD_NO 1 + +struct sym_node_struct{ + que_common_t common; /* node type: + QUE_NODE_SYMBOL */ + /* NOTE: if the data field in 'common.val' is not NULL and the symbol + table node is not for a temporary column, the memory for the value has + been allocated from dynamic memory and it should be freed when the + symbol table is discarded */ + + /* 'alias' and 'indirection' are almost the same, but not quite. + 'alias' always points to the primary instance of the variable, while + 'indirection' does the same only if we should use the primary + instance's values for the node's data. This is usually the case, but + when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM + t WHERE id = x;"), we copy the values from the primary instance to + the cursor's instance so that they are fixed for the duration of the + cursor, and set 'indirection' to NULL. If we did not, the value of + 'x' could change between fetches and things would break horribly. + + TODO: It would be cleaner to make 'indirection' a boolean field and + always use 'alias' to refer to the primary node. */ + + sym_node_t* indirection; /* pointer to + another symbol table + node which contains + the value for this + node, NULL otherwise */ + sym_node_t* alias; /* pointer to + another symbol table + node for which this + node is an alias, + NULL otherwise */ + UT_LIST_NODE_T(sym_node_t) col_var_list; /* list of table + columns or a list of + input variables for an + explicit cursor */ + ibool copy_val; /* TRUE if a column + and its value should + be copied to dynamic + memory when fetched */ + ulint field_nos[2]; /* if a column, in + the position + SYM_CLUST_FIELD_NO is + the field number in the + clustered index; in + the position + SYM_SEC_FIELD_NO + the field number in the + non-clustered index to + use first; if not found + from the index, then + ULINT_UNDEFINED */ + ibool resolved; /* TRUE if the + meaning of a variable + or a column has been + resolved; for literals + this is always TRUE */ + ulint token_type; /* SYM_VAR, SYM_COLUMN, + SYM_IMPLICIT_VAR, + SYM_LIT, SYM_TABLE, + SYM_CURSOR, ... */ + const char* name; /* name of an id */ + ulint name_len; /* id name length */ + dict_table_t* table; /* table definition + if a table id or a + column id */ + ulint col_no; /* column number if a + column */ + sel_buf_t* prefetch_buf; /* NULL, or a buffer + for cached column + values for prefetched + rows */ + sel_node_t* cursor_def; /* cursor definition + select node if a + named cursor */ + ulint param_type; /* PARS_INPUT, + PARS_OUTPUT, or + PARS_NOT_PARAM if not a + procedure parameter */ + sym_tab_t* sym_table; /* back pointer to + the symbol table */ + UT_LIST_NODE_T(sym_node_t) sym_list; /* list of symbol + nodes */ +}; + +struct sym_tab_struct{ + que_t* query_graph; + /* query graph generated by the + parser */ + const char* sql_string; + /* SQL string to parse */ + size_t string_len; + /* SQL string length */ + int next_char_pos; + /* position of the next character in + sql_string to give to the lexical + analyzer */ + pars_info_t* info; /* extra information, or NULL */ + sym_node_list_t sym_list; + /* list of symbol nodes in the symbol + table */ + UT_LIST_BASE_NODE_T(func_node_t) + func_node_list; + /* list of function nodes in the + parsed query graph */ + mem_heap_t* heap; /* memory heap from which we can + allocate space */ +}; + +/* Types of a symbol table entry */ +#define SYM_VAR 91 /* declared parameter or local + variable of a procedure */ +#define SYM_IMPLICIT_VAR 92 /* storage for a intermediate result + of a calculation */ +#define SYM_LIT 93 /* literal */ +#define SYM_TABLE 94 /* database table name */ +#define SYM_COLUMN 95 /* database table name */ +#define SYM_CURSOR 96 /* named cursor */ +#define SYM_PROCEDURE_NAME 97 /* stored procedure name */ +#define SYM_INDEX 98 /* database index name */ +#define SYM_FUNCTION 99 /* user function name */ + +#ifndef UNIV_NONINL +#include "pars0sym.ic" +#endif + +#endif diff --git a/storage/xtradb/include/pars0sym.ic b/storage/xtradb/include/pars0sym.ic new file mode 100644 index 00000000000..235d6819ae9 --- /dev/null +++ b/storage/xtradb/include/pars0sym.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL parser symbol table + +Created 12/15/1997 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/pars0types.h b/storage/xtradb/include/pars0types.h new file mode 100644 index 00000000000..e0902d0611a --- /dev/null +++ b/storage/xtradb/include/pars0types.h @@ -0,0 +1,49 @@ +/***************************************************************************** + +Copyright (c) 1998, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +SQL parser global types + +Created 1/11/1998 Heikki Tuuri +*******************************************************/ + +#ifndef pars0types_h +#define pars0types_h + +typedef struct pars_info_struct pars_info_t; +typedef struct pars_user_func_struct pars_user_func_t; +typedef struct pars_bound_lit_struct pars_bound_lit_t; +typedef struct pars_bound_id_struct pars_bound_id_t; +typedef struct sym_node_struct sym_node_t; +typedef struct sym_tab_struct sym_tab_t; +typedef struct pars_res_word_struct pars_res_word_t; +typedef struct func_node_struct func_node_t; +typedef struct order_node_struct order_node_t; +typedef struct proc_node_struct proc_node_t; +typedef struct elsif_node_struct elsif_node_t; +typedef struct if_node_struct if_node_t; +typedef struct while_node_struct while_node_t; +typedef struct for_node_struct for_node_t; +typedef struct exit_node_struct exit_node_t; +typedef struct return_node_struct return_node_t; +typedef struct assign_node_struct assign_node_t; +typedef struct col_assign_node_struct col_assign_node_t; + +typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t; + +#endif diff --git a/storage/xtradb/include/que0que.h b/storage/xtradb/include/que0que.h new file mode 100644 index 00000000000..a534cb7e464 --- /dev/null +++ b/storage/xtradb/include/que0que.h @@ -0,0 +1,526 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Query graph + +Created 5/27/1996 Heikki Tuuri +*******************************************************/ + +#ifndef que0que_h +#define que0que_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0trx.h" +#include "srv0srv.h" +#include "usr0types.h" +#include "que0types.h" +#include "row0types.h" +#include "pars0types.h" + +/* If the following flag is set TRUE, the module will print trace info +of SQL execution in the UNIV_SQL_DEBUG version */ +extern ibool que_trace_on; + +/*************************************************************************** +Adds a query graph to the session's list of graphs. */ +UNIV_INTERN +void +que_graph_publish( +/*==============*/ + que_t* graph, /* in: graph */ + sess_t* sess); /* in: session */ +/*************************************************************************** +Creates a query graph fork node. */ +UNIV_INTERN +que_fork_t* +que_fork_create( +/*============*/ + /* out, own: fork node */ + que_t* graph, /* in: graph, if NULL then this + fork node is assumed to be the + graph root */ + que_node_t* parent, /* in: parent node */ + ulint fork_type, /* in: fork type */ + mem_heap_t* heap); /* in: memory heap where created */ +/*************************************************************************** +Gets the first thr in a fork. */ +UNIV_INLINE +que_thr_t* +que_fork_get_first_thr( +/*===================*/ + que_fork_t* fork); /* in: query fork */ +/*************************************************************************** +Gets the child node of the first thr in a fork. */ +UNIV_INLINE +que_node_t* +que_fork_get_child( +/*===============*/ + que_fork_t* fork); /* in: query fork */ +/*************************************************************************** +Sets the parent of a graph node. */ +UNIV_INLINE +void +que_node_set_parent( +/*================*/ + que_node_t* node, /* in: graph node */ + que_node_t* parent);/* in: parent */ +/*************************************************************************** +Creates a query graph thread node. */ +UNIV_INTERN +que_thr_t* +que_thr_create( +/*===========*/ + /* out, own: query thread node */ + que_fork_t* parent, /* in: parent node, i.e., a fork node */ + mem_heap_t* heap); /* in: memory heap where created */ +/************************************************************************** +Checks if the query graph is in a state where it should be freed, and +frees it in that case. If the session is in a state where it should be +closed, also this is done. */ +UNIV_INTERN +ibool +que_graph_try_free( +/*===============*/ + /* out: TRUE if freed */ + que_t* graph); /* in: query graph */ +/************************************************************************** +Frees a query graph, but not the heap where it was created. Does not free +explicit cursor declarations, they are freed in que_graph_free. */ +UNIV_INTERN +void +que_graph_free_recursive( +/*=====================*/ + que_node_t* node); /* in: query graph node */ +/************************************************************************** +Frees a query graph. */ +UNIV_INTERN +void +que_graph_free( +/*===========*/ + que_t* graph); /* in: query graph; we assume that the memory + heap where this graph was created is private + to this graph: if not, then use + que_graph_free_recursive and free the heap + afterwards! */ +/************************************************************************** +Stops a query thread if graph or trx is in a state requiring it. The +conditions are tested in the order (1) graph, (2) trx. The kernel mutex has +to be reserved. */ +UNIV_INTERN +ibool +que_thr_stop( +/*=========*/ + /* out: TRUE if stopped */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Moves a thread from another state to the QUE_THR_RUNNING state. Increments +the n_active_thrs counters of the query graph and transaction. */ +UNIV_INTERN +void +que_thr_move_to_run_state_for_mysql( +/*================================*/ + que_thr_t* thr, /* in: an query thread */ + trx_t* trx); /* in: transaction */ +/************************************************************************** +A patch for MySQL used to 'stop' a dummy query thread used in MySQL +select, when there is no error or lock wait. */ +UNIV_INTERN +void +que_thr_stop_for_mysql_no_error( +/*============================*/ + que_thr_t* thr, /* in: query thread */ + trx_t* trx); /* in: transaction */ +/************************************************************************** +A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The +query thread is stopped and made inactive, except in the case where +it was put to the lock wait state in lock0lock.c, but the lock has already +been granted or the transaction chosen as a victim in deadlock resolution. */ +UNIV_INTERN +void +que_thr_stop_for_mysql( +/*===================*/ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Run a query thread. Handles lock waits. */ +UNIV_INTERN +void +que_run_threads( +/*============*/ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +After signal handling is finished, returns control to a query graph error +handling routine. (Currently, just returns the control to the root of the +graph so that the graph can communicate an error message to the client.) */ +UNIV_INTERN +void +que_fork_error_handle( +/*==================*/ + trx_t* trx, /* in: trx */ + que_t* fork); /* in: query graph which was run before signal + handling started, NULL not allowed */ +/************************************************************************** +Moves a suspended query thread to the QUE_THR_RUNNING state and releases +a single worker thread to execute it. This function should be used to end +the wait state of a query thread waiting for a lock or a stored procedure +completion. */ +UNIV_INTERN +void +que_thr_end_wait( +/*=============*/ + que_thr_t* thr, /* in: query thread in the + QUE_THR_LOCK_WAIT, + or QUE_THR_PROCEDURE_WAIT, or + QUE_THR_SIG_REPLY_WAIT state */ + que_thr_t** next_thr); /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +/************************************************************************** +Same as que_thr_end_wait, but no parameter next_thr available. */ +UNIV_INTERN +void +que_thr_end_wait_no_next_thr( +/*=========================*/ + que_thr_t* thr); /* in: query thread in the + QUE_THR_LOCK_WAIT, + or QUE_THR_PROCEDURE_WAIT, or + QUE_THR_SIG_REPLY_WAIT state */ +/************************************************************************** +Starts execution of a command in a query fork. Picks a query thread which +is not in the QUE_THR_RUNNING state and moves it to that state. If none +can be chosen, a situation which may arise in parallelized fetches, NULL +is returned. */ +UNIV_INTERN +que_thr_t* +que_fork_start_command( +/*===================*/ + /* out: a query thread of the graph moved to + QUE_THR_RUNNING state, or NULL; the query + thread should be executed by que_run_threads + by the caller */ + que_fork_t* fork); /* in: a query fork */ +/*************************************************************************** +Gets the trx of a query thread. */ +UNIV_INLINE +trx_t* +thr_get_trx( +/*========*/ + que_thr_t* thr); /* in: query thread */ +/*************************************************************************** +Gets the type of a graph node. */ +UNIV_INLINE +ulint +que_node_get_type( +/*==============*/ + que_node_t* node); /* in: graph node */ +/*************************************************************************** +Gets pointer to the value data type field of a graph node. */ +UNIV_INLINE +dtype_t* +que_node_get_data_type( +/*===================*/ + que_node_t* node); /* in: graph node */ +/*************************************************************************** +Gets pointer to the value dfield of a graph node. */ +UNIV_INLINE +dfield_t* +que_node_get_val( +/*=============*/ + que_node_t* node); /* in: graph node */ +/*************************************************************************** +Gets the value buffer size of a graph node. */ +UNIV_INLINE +ulint +que_node_get_val_buf_size( +/*======================*/ + /* out: val buffer size, not defined if + val.data == NULL in node */ + que_node_t* node); /* in: graph node */ +/*************************************************************************** +Sets the value buffer size of a graph node. */ +UNIV_INLINE +void +que_node_set_val_buf_size( +/*======================*/ + que_node_t* node, /* in: graph node */ + ulint size); /* in: size */ +/************************************************************************* +Gets the next list node in a list of query graph nodes. */ +UNIV_INLINE +que_node_t* +que_node_get_next( +/*==============*/ + que_node_t* node); /* in: node in a list */ +/************************************************************************* +Gets the parent node of a query graph node. */ +UNIV_INLINE +que_node_t* +que_node_get_parent( +/*================*/ + /* out: parent node or NULL */ + que_node_t* node); /* in: node */ +/******************************************************************** +Get the first containing loop node (e.g. while_node_t or for_node_t) for the +given node, or NULL if the node is not within a loop. */ +UNIV_INTERN +que_node_t* +que_node_get_containing_loop_node( +/*==============================*/ + /* out: containing loop node, or NULL. */ + que_node_t* node); /* in: node */ +/************************************************************************* +Catenates a query graph node to a list of them, possible empty list. */ +UNIV_INLINE +que_node_t* +que_node_list_add_last( +/*===================*/ + /* out: one-way list of nodes */ + que_node_t* node_list, /* in: node list, or NULL */ + que_node_t* node); /* in: node */ +/************************************************************************* +Gets a query graph node list length. */ +UNIV_INLINE +ulint +que_node_list_get_len( +/*==================*/ + /* out: length, for NULL list 0 */ + que_node_t* node_list); /* in: node list, or NULL */ +/************************************************************************** +Checks if graph, trx, or session is in a state where the query thread should +be stopped. */ +UNIV_INLINE +ibool +que_thr_peek_stop( +/*==============*/ + /* out: TRUE if should be stopped; NOTE that + if the peek is made without reserving the + kernel mutex, then another peek with the + mutex reserved is necessary before deciding + the actual stopping */ + que_thr_t* thr); /* in: query thread */ +/*************************************************************************** +Returns TRUE if the query graph is for a SELECT statement. */ +UNIV_INLINE +ibool +que_graph_is_select( +/*================*/ + /* out: TRUE if a select */ + que_t* graph); /* in: graph */ +/************************************************************************** +Prints info of an SQL query graph node. */ +UNIV_INTERN +void +que_node_print_info( +/*================*/ + que_node_t* node); /* in: query graph node */ +/************************************************************************* +Evaluate the given SQL */ +UNIV_INTERN +ulint +que_eval_sql( +/*=========*/ + /* out: error code or DB_SUCCESS */ + pars_info_t* info, /* in: info struct, or NULL */ + const char* sql, /* in: SQL string */ + ibool reserve_dict_mutex, + /* in: if TRUE, acquire/release + dict_sys->mutex around call to pars_sql. */ + trx_t* trx); /* in: trx */ + +/* Query graph query thread node: the fields are protected by the kernel +mutex with the exceptions named below */ + +struct que_thr_struct{ + que_common_t common; /* type: QUE_NODE_THR */ + ulint magic_n; /* magic number to catch memory + corruption */ + que_node_t* child; /* graph child node */ + que_t* graph; /* graph where this node belongs */ + ibool is_active; /* TRUE if the thread has been set + to the run state in + que_thr_move_to_run_state, but not + deactivated in + que_thr_dec_reference_count */ + ulint state; /* state of the query thread */ + UT_LIST_NODE_T(que_thr_t) + thrs; /* list of thread nodes of the fork + node */ + UT_LIST_NODE_T(que_thr_t) + trx_thrs; /* lists of threads in wait list of + the trx */ + UT_LIST_NODE_T(que_thr_t) + queue; /* list of runnable thread nodes in + the server task queue */ + /*------------------------------*/ + /* The following fields are private to the OS thread executing the + query thread, and are not protected by the kernel mutex: */ + + que_node_t* run_node; /* pointer to the node where the + subgraph down from this node is + currently executed */ + que_node_t* prev_node; /* pointer to the node from which + the control came */ + ulint resource; /* resource usage of the query thread + thus far */ + ulint lock_state; /* lock state of thread (table or + row) */ +}; + +#define QUE_THR_MAGIC_N 8476583 +#define QUE_THR_MAGIC_FREED 123461526 + +/* Query graph fork node: its fields are protected by the kernel mutex */ +struct que_fork_struct{ + que_common_t common; /* type: QUE_NODE_FORK */ + que_t* graph; /* query graph of this node */ + ulint fork_type; /* fork type */ + ulint n_active_thrs; /* if this is the root of a graph, the + number query threads that have been + started in que_thr_move_to_run_state + but for which que_thr_dec_refer_count + has not yet been called */ + trx_t* trx; /* transaction: this is set only in + the root node */ + ulint state; /* state of the fork node */ + que_thr_t* caller; /* pointer to a possible calling query + thread */ + UT_LIST_BASE_NODE_T(que_thr_t) + thrs; /* list of query threads */ + /*------------------------------*/ + /* The fields in this section are defined only in the root node */ + sym_tab_t* sym_tab; /* symbol table of the query, + generated by the parser, or NULL + if the graph was created 'by hand' */ + pars_info_t* info; /* in: info struct, or NULL */ + /* The following cur_... fields are relevant only in a select graph */ + + ulint cur_end; /* QUE_CUR_NOT_DEFINED, QUE_CUR_START, + QUE_CUR_END */ + ulint cur_pos; /* if there are n rows in the result + set, values 0 and n + 1 mean before + first row, or after last row, depending + on cur_end; values 1...n mean a row + index */ + ibool cur_on_row; /* TRUE if cursor is on a row, i.e., + it is not before the first row or + after the last row */ + dulint n_inserts; /* number of rows inserted */ + dulint n_updates; /* number of rows updated */ + dulint n_deletes; /* number of rows deleted */ + sel_node_t* last_sel_node; /* last executed select node, or NULL + if none */ + UT_LIST_NODE_T(que_fork_t) + graphs; /* list of query graphs of a session + or a stored procedure */ + /*------------------------------*/ + mem_heap_t* heap; /* memory heap where the fork was + created */ + +}; + +/* Query fork (or graph) types */ +#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */ +#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */ +#define QUE_FORK_INSERT 3 +#define QUE_FORK_UPDATE 4 +#define QUE_FORK_ROLLBACK 5 + /* This is really the undo graph used in rollback, + no signal-sending roll_node in this graph */ +#define QUE_FORK_PURGE 6 +#define QUE_FORK_EXECUTE 7 +#define QUE_FORK_PROCEDURE 8 +#define QUE_FORK_PROCEDURE_CALL 9 +#define QUE_FORK_MYSQL_INTERFACE 10 +#define QUE_FORK_RECOVERY 11 + +/* Query fork (or graph) states */ +#define QUE_FORK_ACTIVE 1 +#define QUE_FORK_COMMAND_WAIT 2 +#define QUE_FORK_INVALID 3 +#define QUE_FORK_BEING_FREED 4 + +/* Flag which is ORed to control structure statement node types */ +#define QUE_NODE_CONTROL_STAT 1024 + +/* Query graph node types */ +#define QUE_NODE_LOCK 1 +#define QUE_NODE_INSERT 2 +#define QUE_NODE_UPDATE 4 +#define QUE_NODE_CURSOR 5 +#define QUE_NODE_SELECT 6 +#define QUE_NODE_AGGREGATE 7 +#define QUE_NODE_FORK 8 +#define QUE_NODE_THR 9 +#define QUE_NODE_UNDO 10 +#define QUE_NODE_COMMIT 11 +#define QUE_NODE_ROLLBACK 12 +#define QUE_NODE_PURGE 13 +#define QUE_NODE_CREATE_TABLE 14 +#define QUE_NODE_CREATE_INDEX 15 +#define QUE_NODE_SYMBOL 16 +#define QUE_NODE_RES_WORD 17 +#define QUE_NODE_FUNC 18 +#define QUE_NODE_ORDER 19 +#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT) +#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT) +#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT) +#define QUE_NODE_ASSIGNMENT 23 +#define QUE_NODE_FETCH 24 +#define QUE_NODE_OPEN 25 +#define QUE_NODE_COL_ASSIGNMENT 26 +#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT) +#define QUE_NODE_RETURN 28 +#define QUE_NODE_ROW_PRINTF 29 +#define QUE_NODE_ELSIF 30 +#define QUE_NODE_CALL 31 +#define QUE_NODE_EXIT 32 + +/* Query thread states */ +#define QUE_THR_RUNNING 1 +#define QUE_THR_PROCEDURE_WAIT 2 +#define QUE_THR_COMPLETED 3 /* in selects this means that the + thread is at the end of its result set + (or start, in case of a scroll cursor); + in other statements, this means the + thread has done its task */ +#define QUE_THR_COMMAND_WAIT 4 +#define QUE_THR_LOCK_WAIT 5 +#define QUE_THR_SIG_REPLY_WAIT 6 +#define QUE_THR_SUSPENDED 7 +#define QUE_THR_ERROR 8 + +/* Query thread lock states */ +#define QUE_THR_LOCK_NOLOCK 0 +#define QUE_THR_LOCK_ROW 1 +#define QUE_THR_LOCK_TABLE 2 + +/* From where the cursor position is counted */ +#define QUE_CUR_NOT_DEFINED 1 +#define QUE_CUR_START 2 +#define QUE_CUR_END 3 + + +#ifndef UNIV_NONINL +#include "que0que.ic" +#endif + +#endif diff --git a/storage/xtradb/include/que0que.ic b/storage/xtradb/include/que0que.ic new file mode 100644 index 00000000000..e9a6b00b9ab --- /dev/null +++ b/storage/xtradb/include/que0que.ic @@ -0,0 +1,275 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Query graph + +Created 5/27/1996 Heikki Tuuri +*******************************************************/ + +#include "usr0sess.h" + +/*************************************************************************** +Gets the trx of a query thread. */ +UNIV_INLINE +trx_t* +thr_get_trx( +/*========*/ + que_thr_t* thr) /* in: query thread */ +{ + ut_ad(thr); + + return(thr->graph->trx); +} + +/*************************************************************************** +Gets the first thr in a fork. */ +UNIV_INLINE +que_thr_t* +que_fork_get_first_thr( +/*===================*/ + que_fork_t* fork) /* in: query fork */ +{ + return(UT_LIST_GET_FIRST(fork->thrs)); +} + +/*************************************************************************** +Gets the child node of the first thr in a fork. */ +UNIV_INLINE +que_node_t* +que_fork_get_child( +/*===============*/ + que_fork_t* fork) /* in: query fork */ +{ + que_thr_t* thr; + + thr = UT_LIST_GET_FIRST(fork->thrs); + + return(thr->child); +} + +/*************************************************************************** +Gets the type of a graph node. */ +UNIV_INLINE +ulint +que_node_get_type( +/*==============*/ + que_node_t* node) /* in: graph node */ +{ + ut_ad(node); + + return(((que_common_t*)node)->type); +} + +/*************************************************************************** +Gets pointer to the value dfield of a graph node. */ +UNIV_INLINE +dfield_t* +que_node_get_val( +/*=============*/ + que_node_t* node) /* in: graph node */ +{ + ut_ad(node); + + return(&(((que_common_t*)node)->val)); +} + +/*************************************************************************** +Gets the value buffer size of a graph node. */ +UNIV_INLINE +ulint +que_node_get_val_buf_size( +/*======================*/ + /* out: val buffer size, not defined if + val.data == NULL in node */ + que_node_t* node) /* in: graph node */ +{ + ut_ad(node); + + return(((que_common_t*)node)->val_buf_size); +} + +/*************************************************************************** +Sets the value buffer size of a graph node. */ +UNIV_INLINE +void +que_node_set_val_buf_size( +/*======================*/ + que_node_t* node, /* in: graph node */ + ulint size) /* in: size */ +{ + ut_ad(node); + + ((que_common_t*)node)->val_buf_size = size; +} + +/*************************************************************************** +Sets the parent of a graph node. */ +UNIV_INLINE +void +que_node_set_parent( +/*================*/ + que_node_t* node, /* in: graph node */ + que_node_t* parent) /* in: parent */ +{ + ut_ad(node); + + ((que_common_t*)node)->parent = parent; +} + +/*************************************************************************** +Gets pointer to the value data type field of a graph node. */ +UNIV_INLINE +dtype_t* +que_node_get_data_type( +/*===================*/ + que_node_t* node) /* in: graph node */ +{ + ut_ad(node); + + return(dfield_get_type(&((que_common_t*) node)->val)); +} + +/************************************************************************* +Catenates a query graph node to a list of them, possible empty list. */ +UNIV_INLINE +que_node_t* +que_node_list_add_last( +/*===================*/ + /* out: one-way list of nodes */ + que_node_t* node_list, /* in: node list, or NULL */ + que_node_t* node) /* in: node */ +{ + que_common_t* cnode; + que_common_t* cnode2; + + cnode = (que_common_t*) node; + + cnode->brother = NULL; + + if (node_list == NULL) { + + return(node); + } + + cnode2 = (que_common_t*) node_list; + + while (cnode2->brother != NULL) { + cnode2 = (que_common_t*) cnode2->brother; + } + + cnode2->brother = node; + + return(node_list); +} + +/************************************************************************* +Gets the next list node in a list of query graph nodes. */ +UNIV_INLINE +que_node_t* +que_node_get_next( +/*==============*/ + /* out: next node in a list of nodes */ + que_node_t* node) /* in: node in a list */ +{ + return(((que_common_t*)node)->brother); +} + +/************************************************************************* +Gets a query graph node list length. */ +UNIV_INLINE +ulint +que_node_list_get_len( +/*==================*/ + /* out: length, for NULL list 0 */ + que_node_t* node_list) /* in: node list, or NULL */ +{ + const que_common_t* cnode; + ulint len; + + cnode = (const que_common_t*) node_list; + len = 0; + + while (cnode != NULL) { + len++; + cnode = (const que_common_t*) cnode->brother; + } + + return(len); +} + +/************************************************************************* +Gets the parent node of a query graph node. */ +UNIV_INLINE +que_node_t* +que_node_get_parent( +/*================*/ + /* out: parent node or NULL */ + que_node_t* node) /* in: node */ +{ + return(((que_common_t*)node)->parent); +} + +/************************************************************************** +Checks if graph, trx, or session is in a state where the query thread should +be stopped. */ +UNIV_INLINE +ibool +que_thr_peek_stop( +/*==============*/ + /* out: TRUE if should be stopped; NOTE that + if the peek is made without reserving the + kernel mutex, then another peek with the + mutex reserved is necessary before deciding + the actual stopping */ + que_thr_t* thr) /* in: query thread */ +{ + trx_t* trx; + que_t* graph; + + graph = thr->graph; + trx = graph->trx; + + if (graph->state != QUE_FORK_ACTIVE + || trx->que_state == TRX_QUE_LOCK_WAIT + || (UT_LIST_GET_LEN(trx->signals) > 0 + && trx->que_state == TRX_QUE_RUNNING)) { + + return(TRUE); + } + + return(FALSE); +} + +/*************************************************************************** +Returns TRUE if the query graph is for a SELECT statement. */ +UNIV_INLINE +ibool +que_graph_is_select( +/*================*/ + /* out: TRUE if a select */ + que_t* graph) /* in: graph */ +{ + if (graph->fork_type == QUE_FORK_SELECT_SCROLL + || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) { + + return(TRUE); + } + + return(FALSE); +} diff --git a/storage/xtradb/include/que0types.h b/storage/xtradb/include/que0types.h new file mode 100644 index 00000000000..1d3217fb491 --- /dev/null +++ b/storage/xtradb/include/que0types.h @@ -0,0 +1,59 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Query graph global types + +Created 5/27/1996 Heikki Tuuri +*******************************************************/ + +#ifndef que0types_h +#define que0types_h + +#include "data0data.h" +#include "dict0types.h" + +/* Pseudotype for all graph nodes */ +typedef void que_node_t; + +typedef struct que_fork_struct que_fork_t; + +/* Query graph root is a fork node */ +typedef que_fork_t que_t; + +typedef struct que_thr_struct que_thr_t; +typedef struct que_common_struct que_common_t; + +/* Common struct at the beginning of each query graph node; the name of this +substruct must be 'common' */ + +struct que_common_struct{ + ulint type; /* query node type */ + que_node_t* parent; /* back pointer to parent node, or NULL */ + que_node_t* brother;/* pointer to a possible brother node */ + dfield_t val; /* evaluated value for an expression */ + ulint val_buf_size; + /* buffer size for the evaluated value data, + if the buffer has been allocated dynamically: + if this field is != 0, and the node is a + symbol node or a function node, then we + have to free the data field in val + explicitly */ +}; + +#endif diff --git a/storage/xtradb/include/read0read.h b/storage/xtradb/include/read0read.h new file mode 100644 index 00000000000..7ea8bdaf8dd --- /dev/null +++ b/storage/xtradb/include/read0read.h @@ -0,0 +1,181 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Cursor read + +Created 2/16/1997 Heikki Tuuri +*******************************************************/ + +#ifndef read0read_h +#define read0read_h + +#include "univ.i" + + +#include "ut0byte.h" +#include "ut0lst.h" +#include "trx0trx.h" +#include "read0types.h" + +/************************************************************************* +Opens a read view where exactly the transactions serialized before this +point in time are seen in the view. */ +UNIV_INTERN +read_view_t* +read_view_open_now( +/*===============*/ + /* out, own: read view struct */ + dulint cr_trx_id, /* in: trx_id of creating + transaction, or (0, 0) used in + purge */ + mem_heap_t* heap); /* in: memory heap from which + allocated */ +/************************************************************************* +Makes a copy of the oldest existing read view, or opens a new. The view +must be closed with ..._close. */ +UNIV_INTERN +read_view_t* +read_view_oldest_copy_or_open_new( +/*==============================*/ + /* out, own: read view struct */ + dulint cr_trx_id, /* in: trx_id of creating + transaction, or (0, 0) used in + purge */ + mem_heap_t* heap); /* in: memory heap from which + allocated */ +/************************************************************************* +Closes a read view. */ +UNIV_INTERN +void +read_view_close( +/*============*/ + read_view_t* view); /* in: read view */ +/************************************************************************* +Closes a consistent read view for MySQL. This function is called at an SQL +statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ +UNIV_INTERN +void +read_view_close_for_mysql( +/*======================*/ + trx_t* trx); /* in: trx which has a read view */ +/************************************************************************* +Checks if a read view sees the specified transaction. */ +UNIV_INLINE +ibool +read_view_sees_trx_id( +/*==================*/ + /* out: TRUE if sees */ + read_view_t* view, /* in: read view */ + dulint trx_id);/* in: trx id */ +/************************************************************************* +Prints a read view to stderr. */ +UNIV_INTERN +void +read_view_print( +/*============*/ + read_view_t* view); /* in: read view */ +/************************************************************************* +Create a consistent cursor view for mysql to be used in cursors. In this +consistent read view modifications done by the creating transaction or future +transactions are not visible. */ +UNIV_INTERN +cursor_view_t* +read_cursor_view_create_for_mysql( +/*==============================*/ + trx_t* cr_trx);/* in: trx where cursor view is created */ +/************************************************************************* +Close a given consistent cursor view for mysql and restore global read view +back to a transaction read view. */ +UNIV_INTERN +void +read_cursor_view_close_for_mysql( +/*=============================*/ + trx_t* trx, /* in: trx */ + cursor_view_t* curview); /* in: cursor view to be closed */ +/************************************************************************* +This function sets a given consistent cursor view to a transaction +read view if given consistent cursor view is not NULL. Otherwise, function +restores a global read view to a transaction read view. */ +UNIV_INTERN +void +read_cursor_set_for_mysql( +/*======================*/ + trx_t* trx, /* in: transaction where cursor is set */ + cursor_view_t* curview);/* in: consistent cursor view to be set */ + +/* Read view lists the trx ids of those transactions for which a consistent +read should not see the modifications to the database. */ + +struct read_view_struct{ + ulint type; /* VIEW_NORMAL, VIEW_HIGH_GRANULARITY */ + dulint undo_no; /* (0, 0) or if type is VIEW_HIGH_GRANULARITY + transaction undo_no when this high-granularity + consistent read view was created */ + dulint low_limit_no; /* The view does not need to see the undo + logs for transactions whose transaction number + is strictly smaller (<) than this value: they + can be removed in purge if not needed by other + views */ + dulint low_limit_id; /* The read should not see any transaction + with trx id >= this value */ + dulint up_limit_id; /* The read should see all trx ids which + are strictly smaller (<) than this value */ + ulint n_trx_ids; /* Number of cells in the trx_ids array */ + dulint* trx_ids; /* Additional trx ids which the read should + not see: typically, these are the active + transactions at the time when the read is + serialized, except the reading transaction + itself; the trx ids in this array are in a + descending order */ + dulint creator_trx_id; /* trx id of creating transaction, or + (0, 0) used in purge */ + UT_LIST_NODE_T(read_view_t) view_list; + /* List of read views in trx_sys */ +}; + +/* Read view types */ +#define VIEW_NORMAL 1 /* Normal consistent read view + where transaction does not see changes + made by active transactions except + creating transaction. */ +#define VIEW_HIGH_GRANULARITY 2 /* High-granularity read view where + transaction does not see changes + made by active transactions and own + changes after a point in time when this + read view was created. */ + +/* Implement InnoDB framework to support consistent read views in +cursors. This struct holds both heap where consistent read view +is allocated and pointer to a read view. */ + +struct cursor_view_struct{ + mem_heap_t* heap; + /* Memory heap for the cursor view */ + read_view_t* read_view; + /* Consistent read view of the cursor*/ + ulint n_mysql_tables_in_use; + /* number of Innobase tables used in the + processing of this cursor */ +}; + +#ifndef UNIV_NONINL +#include "read0read.ic" +#endif + +#endif diff --git a/storage/xtradb/include/read0read.ic b/storage/xtradb/include/read0read.ic new file mode 100644 index 00000000000..9fc6af04e88 --- /dev/null +++ b/storage/xtradb/include/read0read.ic @@ -0,0 +1,97 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Cursor read + +Created 2/16/1997 Heikki Tuuri +*******************************************************/ + +/************************************************************************* +Gets the nth trx id in a read view. */ +UNIV_INLINE +dulint +read_view_get_nth_trx_id( +/*=====================*/ + /* out: trx id */ + read_view_t* view, /* in: read view */ + ulint n) /* in: position */ +{ + ut_ad(n < view->n_trx_ids); + + return(*(view->trx_ids + n)); +} + +/************************************************************************* +Sets the nth trx id in a read view. */ +UNIV_INLINE +void +read_view_set_nth_trx_id( +/*=====================*/ + read_view_t* view, /* in: read view */ + ulint n, /* in: position */ + dulint trx_id) /* in: trx id to set */ +{ + ut_ad(n < view->n_trx_ids); + + *(view->trx_ids + n) = trx_id; +} + +/************************************************************************* +Checks if a read view sees the specified transaction. */ +UNIV_INLINE +ibool +read_view_sees_trx_id( +/*==================*/ + /* out: TRUE if sees */ + read_view_t* view, /* in: read view */ + dulint trx_id) /* in: trx id */ +{ + ulint n_ids; + int cmp; + ulint i; + + if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) { + + return(TRUE); + } + + if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) { + + return(FALSE); + } + + /* We go through the trx ids in the array smallest first: this order + may save CPU time, because if there was a very long running + transaction in the trx id array, its trx id is looked at first, and + the first two comparisons may well decide the visibility of trx_id. */ + + n_ids = view->n_trx_ids; + + for (i = 0; i < n_ids; i++) { + + cmp = ut_dulint_cmp( + trx_id, + read_view_get_nth_trx_id(view, n_ids - i - 1)); + if (cmp <= 0) { + return(cmp < 0); + } + } + + return(TRUE); +} diff --git a/storage/xtradb/include/read0types.h b/storage/xtradb/include/read0types.h new file mode 100644 index 00000000000..44849cbb498 --- /dev/null +++ b/storage/xtradb/include/read0types.h @@ -0,0 +1,31 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Cursor read + +Created 2/16/1997 Heikki Tuuri +*******************************************************/ + +#ifndef read0types_h +#define read0types_h + +typedef struct read_view_struct read_view_t; +typedef struct cursor_view_struct cursor_view_t; + +#endif diff --git a/storage/xtradb/include/rem0cmp.h b/storage/xtradb/include/rem0cmp.h new file mode 100644 index 00000000000..f32bae73a13 --- /dev/null +++ b/storage/xtradb/include/rem0cmp.h @@ -0,0 +1,205 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +Comparison services for records + +Created 7/1/1994 Heikki Tuuri +************************************************************************/ + +#ifndef rem0cmp_h +#define rem0cmp_h + +#include "univ.i" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "rem0rec.h" + +/***************************************************************** +Returns TRUE if two columns are equal for comparison purposes. */ +UNIV_INTERN +ibool +cmp_cols_are_equal( +/*===============*/ + /* out: TRUE if the columns are + considered equal in comparisons */ + const dict_col_t* col1, /* in: column 1 */ + const dict_col_t* col2, /* in: column 2 */ + ibool check_charsets); + /* in: whether to check charsets */ +/***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ +UNIV_INLINE +int +cmp_data_data( +/*==========*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ +UNIV_INTERN +int +cmp_data_data_slow( +/*===============*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2); /* in: data field length or UNIV_SQL_NULL */ +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield( +/*==============*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + const dfield_t* dfield1,/* in: data field; must have type field set */ + const dfield_t* dfield2);/* in: data field */ +/***************************************************************** +This function is used to compare a data tuple to a physical record. +Only dtuple->n_fields_cmp first fields are taken into account for +the the data tuple! If we denote by n = n_fields_cmp, then rec must +have either m >= n fields, or it must differ from dtuple in some of +the m fields rec has. If rec has an externally stored field we do not +compare it but return with value 0 if such a comparison should be +made. */ +UNIV_INTERN +int +cmp_dtuple_rec_with_match( +/*======================*/ + /* out: 1, 0, -1, if dtuple is greater, equal, + less than rec, respectively, when only the + common first fields are compared, or + until the first externally stored field in + rec */ + const dtuple_t* dtuple, /* in: data tuple */ + const rec_t* rec, /* in: physical record which differs from + dtuple in some of the common fields, or which + has an equal number or more fields than + dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint* matched_fields, /* in/out: number of already completely + matched fields; when function returns, + contains the value for current comparison */ + ulint* matched_bytes); /* in/out: number of already matched + bytes within the first field not completely + matched; when function returns, contains the + value for current comparison */ +/****************************************************************** +Compares a data tuple to a physical record. */ +UNIV_INTERN +int +cmp_dtuple_rec( +/*===========*/ + /* out: 1, 0, -1, if dtuple is greater, equal, + less than rec, respectively; see the comments + for cmp_dtuple_rec_with_match */ + const dtuple_t* dtuple, /* in: data tuple */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/****************************************************************** +Checks if a dtuple is a prefix of a record. The last field in dtuple +is allowed to be a prefix of the corresponding field in the record. */ +UNIV_INTERN +ibool +cmp_dtuple_is_prefix_of_rec( +/*========================*/ + /* out: TRUE if prefix */ + const dtuple_t* dtuple, /* in: data tuple */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +#ifndef UNIV_HOTBACKUP +/***************************************************************** +Compare two physical records that contain the same number of columns, +none of which are stored externally. */ +UNIV_INTERN +int +cmp_rec_rec_simple( +/*===============*/ + /* out: 1, 0 , -1 if rec1 is greater, + equal, less, respectively, than rec2 */ + const rec_t* rec1, /* in: physical record */ + const rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + const dict_index_t* index); /* in: data dictionary index */ +#endif /* !UNIV_HOTBACKUP */ +/***************************************************************** +This function is used to compare two physical records. Only the common +first fields are compared, and if an externally stored field is +encountered, then 0 is returned. */ +UNIV_INTERN +int +cmp_rec_rec_with_match( +/*===================*/ + /* out: 1, 0 , -1 if rec1 is greater, equal, + less, respectively, than rec2; only the common + first fields are compared */ + const rec_t* rec1, /* in: physical record */ + const rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + dict_index_t* index, /* in: data dictionary index */ + ulint* matched_fields, /* in/out: number of already completely + matched fields; when the function returns, + contains the value the for current + comparison */ + ulint* matched_bytes);/* in/out: number of already matched + bytes within the first field not completely + matched; when the function returns, contains + the value for the current comparison */ +/***************************************************************** +This function is used to compare two physical records. Only the common +first fields are compared. */ +UNIV_INLINE +int +cmp_rec_rec( +/*========*/ + /* out: 1, 0 , -1 if rec1 is greater, equal, + less, respectively, than rec2; only the common + first fields are compared */ + const rec_t* rec1, /* in: physical record */ + const rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + dict_index_t* index); /* in: data dictionary index */ + + +#ifndef UNIV_NONINL +#include "rem0cmp.ic" +#endif + +#endif diff --git a/storage/xtradb/include/rem0cmp.ic b/storage/xtradb/include/rem0cmp.ic new file mode 100644 index 00000000000..6c58d9e5a25 --- /dev/null +++ b/storage/xtradb/include/rem0cmp.ic @@ -0,0 +1,92 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +Comparison services for records + +Created 7/1/1994 Heikki Tuuri +************************************************************************/ + +/***************************************************************** +This function is used to compare two data fields for which we know the +data type. */ +UNIV_INLINE +int +cmp_data_data( +/*==========*/ + /* out: 1, 0, -1, if data1 is greater, equal, + less than data2, respectively */ + ulint mtype, /* in: main type */ + ulint prtype, /* in: precise type */ + const byte* data1, /* in: data field (== a pointer to a memory + buffer) */ + ulint len1, /* in: data field length or UNIV_SQL_NULL */ + const byte* data2, /* in: data field (== a pointer to a memory + buffer) */ + ulint len2) /* in: data field length or UNIV_SQL_NULL */ +{ + return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2)); +} + +/***************************************************************** +This function is used to compare two dfields where at least the first +has its data type field set. */ +UNIV_INLINE +int +cmp_dfield_dfield( +/*==============*/ + /* out: 1, 0, -1, if dfield1 is greater, equal, + less than dfield2, respectively */ + const dfield_t* dfield1,/* in: data field; must have type field set */ + const dfield_t* dfield2)/* in: data field */ +{ + const dtype_t* type; + + ut_ad(dfield_check_typed(dfield1)); + + type = dfield_get_type(dfield1); + + return(cmp_data_data(type->mtype, type->prtype, + (const byte*) dfield_get_data(dfield1), + dfield_get_len(dfield1), + (const byte*) dfield_get_data(dfield2), + dfield_get_len(dfield2))); +} + +/***************************************************************** +This function is used to compare two physical records. Only the common +first fields are compared. */ +UNIV_INLINE +int +cmp_rec_rec( +/*========*/ + /* out: 1, 0 , -1 if rec1 is greater, equal, + less, respectively, than rec2; only the common + first fields are compared */ + const rec_t* rec1, /* in: physical record */ + const rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + dict_index_t* index) /* in: data dictionary index */ +{ + ulint match_f = 0; + ulint match_b = 0; + + return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, + &match_f, &match_b)); +} diff --git a/storage/xtradb/include/rem0rec.h b/storage/xtradb/include/rem0rec.h new file mode 100644 index 00000000000..cb72a5fa25b --- /dev/null +++ b/storage/xtradb/include/rem0rec.h @@ -0,0 +1,822 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +Record manager + +Created 5/30/1994 Heikki Tuuri +*************************************************************************/ + +#ifndef rem0rec_h +#define rem0rec_h + +#include "univ.i" +#include "data0data.h" +#include "rem0types.h" +#include "mtr0types.h" +#include "page0types.h" + +/* Info bit denoting the predefined minimum record: this bit is set +if and only if the record is the first user record on a non-leaf +B-tree page that is the leftmost page on its level +(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ +#define REC_INFO_MIN_REC_FLAG 0x10UL +/* The deleted flag in info bits */ +#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the + record has been delete marked */ + +/* Number of extra bytes in an old-style record, +in addition to the data and the offsets */ +#define REC_N_OLD_EXTRA_BYTES 6 +/* Number of extra bytes in a new-style record, +in addition to the data and the offsets */ +#define REC_N_NEW_EXTRA_BYTES 5 + +/* Record status values */ +#define REC_STATUS_ORDINARY 0 +#define REC_STATUS_NODE_PTR 1 +#define REC_STATUS_INFIMUM 2 +#define REC_STATUS_SUPREMUM 3 + +/* The following four constants are needed in page0zip.c in order to +efficiently compress and decompress pages. */ + +/* The offset of heap_no in a compact record */ +#define REC_NEW_HEAP_NO 4 +/* The shift of heap_no in a compact record. +The status is stored in the low-order bits. */ +#define REC_HEAP_NO_SHIFT 3 + +/* Length of a B-tree node pointer, in bytes */ +#define REC_NODE_PTR_SIZE 4 + +#ifdef UNIV_DEBUG +/* Length of the rec_get_offsets() header */ +# define REC_OFFS_HEADER_SIZE 4 +#else /* UNIV_DEBUG */ +/* Length of the rec_get_offsets() header */ +# define REC_OFFS_HEADER_SIZE 2 +#endif /* UNIV_DEBUG */ + +/* Number of elements that should be initially allocated for the +offsets[] array, first passed to rec_get_offsets() */ +#define REC_OFFS_NORMAL_SIZE 100 +#define REC_OFFS_SMALL_SIZE 10 + +/********************************************************** +The following function is used to get the pointer of the next chained record +on the same page. */ +UNIV_INLINE +const rec_t* +rec_get_next_ptr_const( +/*===================*/ + /* out: pointer to the next chained record, or + NULL if none */ + const rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to get the pointer of the next chained record +on the same page. */ +UNIV_INLINE +rec_t* +rec_get_next_ptr( +/*=============*/ + /* out: pointer to the next chained record, or + NULL if none */ + rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to get the offset of the +next chained record on the same page. */ +UNIV_INLINE +ulint +rec_get_next_offs( +/*==============*/ + /* out: the page offset of the next + chained record, or 0 if none */ + const rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to set the next record offset field +of an old-style record. */ +UNIV_INLINE +void +rec_set_next_offs_old( +/*==================*/ + rec_t* rec, /* in: old-style physical record */ + ulint next); /* in: offset of the next record */ +/********************************************************** +The following function is used to set the next record offset field +of a new-style record. */ +UNIV_INLINE +void +rec_set_next_offs_new( +/*==================*/ + rec_t* rec, /* in/out: new-style physical record */ + ulint next); /* in: offset of the next record */ +/********************************************************** +The following function is used to get the number of fields +in an old-style record. */ +UNIV_INLINE +ulint +rec_get_n_fields_old( +/*=================*/ + /* out: number of data fields */ + const rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + const rec_t* rec, /* in: physical record */ + const dict_index_t* index); /* in: record descriptor */ +/********************************************************** +The following function is used to get the number of records owned by the +previous directory record. */ +UNIV_INLINE +ulint +rec_get_n_owned_old( +/*================*/ + /* out: number of owned records */ + const rec_t* rec); /* in: old-style physical record */ +/********************************************************** +The following function is used to set the number of owned records. */ +UNIV_INLINE +void +rec_set_n_owned_old( +/*================*/ + /* out: TRUE on success */ + rec_t* rec, /* in: old-style physical record */ + ulint n_owned); /* in: the number of owned */ +/********************************************************** +The following function is used to get the number of records owned by the +previous directory record. */ +UNIV_INLINE +ulint +rec_get_n_owned_new( +/*================*/ + /* out: number of owned records */ + const rec_t* rec); /* in: new-style physical record */ +/********************************************************** +The following function is used to set the number of owned records. */ +UNIV_INLINE +void +rec_set_n_owned_new( +/*================*/ + rec_t* rec, /* in/out: new-style physical record */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint n_owned);/* in: the number of owned */ +/********************************************************** +The following function is used to retrieve the info bits of +a record. */ +UNIV_INLINE +ulint +rec_get_info_bits( +/*==============*/ + /* out: info bits */ + const rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to set the info bits of a record. */ +UNIV_INLINE +void +rec_set_info_bits_old( +/*==================*/ + rec_t* rec, /* in: old-style physical record */ + ulint bits); /* in: info bits */ +/********************************************************** +The following function is used to set the info bits of a record. */ +UNIV_INLINE +void +rec_set_info_bits_new( +/*==================*/ + rec_t* rec, /* in/out: new-style physical record */ + ulint bits); /* in: info bits */ +/********************************************************** +The following function retrieves the status bits of a new-style record. */ +UNIV_INLINE +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + const rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in/out: physical record */ + ulint bits); /* in: info bits */ + +/********************************************************** +The following function is used to retrieve the info and status +bits of a record. (Only compact records have status bits.) */ +UNIV_INLINE +ulint +rec_get_info_and_status_bits( +/*=========================*/ + /* out: info bits */ + const rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to set the info and status +bits of a record. (Only compact records have status bits.) */ +UNIV_INLINE +void +rec_set_info_and_status_bits( +/*=========================*/ + rec_t* rec, /* in/out: compact physical record */ + ulint bits); /* in: info bits */ + +/********************************************************** +The following function tells if record is delete marked. */ +UNIV_INLINE +ulint +rec_get_deleted_flag( +/*=================*/ + /* out: nonzero if delete marked */ + const rec_t* rec, /* in: physical record */ + ulint comp); /* in: nonzero=compact page format */ +/********************************************************** +The following function is used to set the deleted bit. */ +UNIV_INLINE +void +rec_set_deleted_flag_old( +/*=====================*/ + rec_t* rec, /* in: old-style physical record */ + ulint flag); /* in: nonzero if delete marked */ +/********************************************************** +The following function is used to set the deleted bit. */ +UNIV_INLINE +void +rec_set_deleted_flag_new( +/*=====================*/ + rec_t* rec, /* in/out: new-style physical record */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint flag); /* in: nonzero if delete marked */ +/********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*==================*/ + /* out: TRUE if node pointer */ + const rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to get the order number +of an old-style record in the heap of the index page. */ +UNIV_INLINE +ulint +rec_get_heap_no_old( +/*================*/ + /* out: heap order number */ + const rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to set the heap number +field in an old-style record. */ +UNIV_INLINE +void +rec_set_heap_no_old( +/*================*/ + rec_t* rec, /* in: physical record */ + ulint heap_no);/* in: the heap number */ +/********************************************************** +The following function is used to get the order number +of a new-style record in the heap of the index page. */ +UNIV_INLINE +ulint +rec_get_heap_no_new( +/*================*/ + /* out: heap order number */ + const rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to set the heap number +field in a new-style record. */ +UNIV_INLINE +void +rec_set_heap_no_new( +/*================*/ + rec_t* rec, /* in/out: physical record */ + ulint heap_no);/* in: the heap number */ +/********************************************************** +The following function is used to test whether the data offsets +in the record are stored in one-byte or two-byte format. */ +UNIV_INLINE +ibool +rec_get_1byte_offs_flag( +/*====================*/ + /* out: TRUE if 1-byte form */ + const rec_t* rec); /* in: physical record */ + +/********************************************************** +Determine how many of the first n columns in a compact +physical record are stored externally. */ +UNIV_INTERN +ulint +rec_get_n_extern_new( +/*=================*/ + /* out: number of externally stored columns */ + const rec_t* rec, /* in: compact physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n); /* in: number of columns to scan */ + +/********************************************************** +The following function determines the offsets to each field +in the record. It can reuse a previously allocated array. */ +UNIV_INTERN +ulint* +rec_get_offsets_func( +/*=================*/ + /* out: the new offsets */ + const rec_t* rec, /* in: physical record */ + const dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in/out: array consisting of + offsets[0] allocated elements, + or an array from rec_get_offsets(), + or NULL */ + ulint n_fields,/* in: maximum number of + initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t** heap, /* in/out: memory heap */ + const char* file, /* in: file name where called */ + ulint line); /* in: line number where called */ + +#define rec_get_offsets(rec,index,offsets,n,heap) \ + rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__) + +/********************************************************** +Determine the offset to each field in a leaf-page record +in ROW_FORMAT=COMPACT. This is a special case of +rec_init_offsets() and rec_get_offsets_func(). */ +UNIV_INTERN +void +rec_init_offsets_comp_ordinary( +/*===========================*/ + const rec_t* rec, /* in: physical record in + ROW_FORMAT=COMPACT */ + ulint extra, /* in: number of bytes to reserve + between the record header and + the data payload + (usually REC_N_NEW_EXTRA_BYTES) */ + const dict_index_t* index, /* in: record descriptor */ + ulint* offsets);/* in/out: array of offsets; + in: n=rec_offs_n_fields(offsets) */ + +/********************************************************** +The following function determines the offsets to each field +in the record. It can reuse a previously allocated array. */ +UNIV_INTERN +void +rec_get_offsets_reverse( +/*====================*/ + const byte* extra, /* in: the extra bytes of a + compact record in reverse order, + excluding the fixed-size + REC_N_NEW_EXTRA_BYTES */ + const dict_index_t* index, /* in: record descriptor */ + ulint node_ptr,/* in: nonzero=node pointer, + 0=leaf node */ + ulint* offsets);/* in/out: array consisting of + offsets[0] allocated elements */ + +/**************************************************************** +Validates offsets returned by rec_get_offsets(). */ +UNIV_INLINE +ibool +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + const rec_t* rec, /* in: record or NULL */ + const dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets);/* in: array returned by + rec_get_offsets() */ +#ifdef UNIV_DEBUG +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec, /* in: record */ + const dict_index_t* index, /* in: record descriptor */ + ulint* offsets);/* in: array returned by + rec_get_offsets() */ +#else +# define rec_offs_make_valid(rec, index, offsets) ((void) 0) +#endif /* UNIV_DEBUG */ + +/**************************************************************** +The following function is used to get the offset to the nth +data field in an old-style record. */ +UNIV_INTERN +ulint +rec_get_nth_field_offs_old( +/*=======================*/ + /* out: offset to the field */ + const rec_t* rec, /* in: record */ + ulint n, /* in: index of the field */ + ulint* len); /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +#define rec_get_nth_field_old(rec, n, len) \ +((rec) + rec_get_nth_field_offs_old(rec, n, len)) +/**************************************************************** +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ +UNIV_INLINE +ulint +rec_get_nth_field_size( +/*===================*/ + /* out: field size in bytes */ + const rec_t* rec, /* in: record */ + ulint n); /* in: index of the field */ +/**************************************************************** +The following function is used to get an offset to the nth +data field in a record. */ +UNIV_INLINE +ulint +rec_get_nth_field_offs( +/*===================*/ + /* out: offset from the origin of rec */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len); /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +#define rec_get_nth_field(rec, offsets, n, len) \ +((rec) + rec_get_nth_field_offs(offsets, n, len)) +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ +UNIV_INLINE +ulint +rec_offs_comp( +/*==========*/ + /* out: nonzero if compact format */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/********************************************************** +Determine if the offsets are for a record containing +externally stored columns. */ +UNIV_INLINE +ulint +rec_offs_any_extern( +/*================*/ + /* out: nonzero if externally stored */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/********************************************************** +Returns nonzero if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ulint +rec_offs_nth_extern( +/*================*/ + /* out: nonzero if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Returns nonzero if the SQL NULL bit is set in nth field of rec. */ +UNIV_INLINE +ulint +rec_offs_nth_sql_null( +/*==================*/ + /* out: nonzero if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ + +/********************************************************** +Returns the number of extern bits set in a record. */ +UNIV_INLINE +ulint +rec_offs_n_extern( +/*==============*/ + /* out: number of externally stored fields */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/*************************************************************** +This is used to modify the value of an already existing field in a record. +The previous value must have exactly the same size as the new value. If len +is UNIV_SQL_NULL then the field is treated as an SQL null. +For records in ROW_FORMAT=COMPACT (new-style records), len must not be +UNIV_SQL_NULL unless the field already is SQL null. */ +UNIV_INLINE +void +rec_set_nth_field( +/*==============*/ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data if not SQL null */ + ulint len); /* in: length of the data or UNIV_SQL_NULL */ +/************************************************************** +The following function returns the data size of an old-style physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_get_data_size_old( +/*==================*/ + /* out: size */ + const rec_t* rec); /* in: physical record */ +/************************************************************** +The following function returns the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +ulint +rec_offs_get_n_alloc( +/*=================*/ + /* out: number of elements */ + const ulint* offsets);/* in: array for rec_get_offsets() */ +/************************************************************** +The following function sets the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +void +rec_offs_set_n_alloc( +/*=================*/ + ulint* offsets, /* out: array for rec_get_offsets(), + must be allocated */ + ulint n_alloc); /* in: number of elements */ +#define rec_offs_init(offsets) \ + rec_offs_set_n_alloc(offsets, (sizeof offsets) / sizeof *offsets) +/************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*==============*/ + /* out: number of fields */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns the total size of record minus data size of record. +The value returned by the function is the distance from record +start to record origin in bytes. */ +UNIV_INLINE +ulint +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns the total size of a physical record. */ +UNIV_INLINE +ulint +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns a pointer to the start of the record. */ +UNIV_INLINE +byte* +rec_get_start( +/*==========*/ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns a pointer to the end of the record. */ +UNIV_INLINE +byte* +rec_get_end( +/*========*/ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Copies a physical record to a buffer. */ +UNIV_INLINE +rec_t* +rec_copy( +/*=====*/ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/****************************************************************** +Copies the first n fields of a physical record to a new physical record in +a buffer. */ +UNIV_INTERN +rec_t* +rec_copy_prefix_to_buf( +/*===================*/ + /* out, own: copied record */ + const rec_t* rec, /* in: physical record */ + const dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields + to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, + or NULL */ + ulint* buf_size); /* in/out: buffer size */ +/**************************************************************** +Folds a prefix of a physical record to a ulint. */ +UNIV_INLINE +ulint +rec_fold( +/*=====*/ + /* out: the folded value */ + const rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id) /* in: index tree id */ + __attribute__((pure)); +/************************************************************* +Builds a ROW_FORMAT=COMPACT record out of a data tuple. */ +UNIV_INTERN +void +rec_convert_dtuple_to_rec_comp( +/*===========================*/ + rec_t* rec, /* in: origin of record */ + ulint extra, /* in: number of bytes to + reserve between the record + header and the data payload + (normally REC_N_NEW_EXTRA_BYTES) */ + const dict_index_t* index, /* in: record descriptor */ + ulint status, /* in: status bits of the record */ + const dfield_t* fields, /* in: array of data fields */ + ulint n_fields);/* in: number of data fields */ +/************************************************************* +Builds a physical record out of a data tuple and +stores it into the given buffer. */ +UNIV_INTERN +rec_t* +rec_convert_dtuple_to_rec( +/*======================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + const dict_index_t* index, /* in: record descriptor */ + const dtuple_t* dtuple, /* in: data tuple */ + ulint n_ext); /* in: number of + externally stored columns */ +/************************************************************** +Returns the extra size of an old-style physical record if we know its +data size and number of fields. */ +UNIV_INLINE +ulint +rec_get_converted_extra_size( +/*=========================*/ + /* out: extra size */ + ulint data_size, /* in: data size */ + ulint n_fields, /* in: number of fields */ + ulint n_ext) /* in: number of externally stored columns */ + __attribute__((const)); +/************************************************************** +Determines the size of a data tuple prefix in ROW_FORMAT=COMPACT. */ +UNIV_INTERN +ulint +rec_get_converted_size_comp_prefix( +/*===============================*/ + /* out: total size */ + const dict_index_t* index, /* in: record descriptor; + dict_table_is_comp() is + assumed to hold, even if + it does not */ + const dfield_t* fields, /* in: array of data fields */ + ulint n_fields,/* in: number of data fields */ + ulint* extra); /* out: extra size */ +/************************************************************** +Determines the size of a data tuple in ROW_FORMAT=COMPACT. */ +UNIV_INTERN +ulint +rec_get_converted_size_comp( +/*========================*/ + /* out: total size */ + const dict_index_t* index, /* in: record descriptor; + dict_table_is_comp() is + assumed to hold, even if + it does not */ + ulint status, /* in: status bits of the record */ + const dfield_t* fields, /* in: array of data fields */ + ulint n_fields,/* in: number of data fields */ + ulint* extra); /* out: extra size */ +/************************************************************** +The following function returns the size of a data tuple when converted to +a physical record. */ +UNIV_INLINE +ulint +rec_get_converted_size( +/*===================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + const dtuple_t* dtuple, /* in: data tuple */ + ulint n_ext); /* in: number of externally stored columns */ +/****************************************************************** +Copies the first n fields of a physical record to a data tuple. +The fields are copied to the memory heap. */ +UNIV_INTERN +void +rec_copy_prefix_to_dtuple( +/*======================*/ + dtuple_t* tuple, /* out: data tuple */ + const rec_t* rec, /* in: physical record */ + const dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields + to copy */ + mem_heap_t* heap); /* in: memory heap */ +/******************************************************************* +Validates the consistency of a physical record. */ +UNIV_INTERN +ibool +rec_validate( +/*=========*/ + /* out: TRUE if ok */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints an old-style physical record. */ +UNIV_INTERN +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + const rec_t* rec); /* in: physical record */ +/******************************************************************* +Prints a physical record in ROW_FORMAT=COMPACT. Ignores the +record header. */ +UNIV_INTERN +void +rec_print_comp( +/*===========*/ + FILE* file, /* in: file where to print */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints a physical record. */ +UNIV_INTERN +void +rec_print_new( +/*==========*/ + FILE* file, /* in: file where to print */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints a physical record. */ +UNIV_INTERN +void +rec_print( +/*======*/ + FILE* file, /* in: file where to print */ + const rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ + +#define REC_INFO_BITS 6 /* This is single byte bit-field */ + +/* Maximum lengths for the data in a physical record if the offsets +are given in one byte (resp. two byte) format. */ +#define REC_1BYTE_OFFS_LIMIT 0x7FUL +#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL + +/* The data size of record must be smaller than this because we reserve +two upmost bits in a two byte offset for special purposes */ +#define REC_MAX_DATA_SIZE (16 * 1024) + +#ifndef UNIV_NONINL +#include "rem0rec.ic" +#endif + +#endif diff --git a/storage/xtradb/include/rem0rec.ic b/storage/xtradb/include/rem0rec.ic new file mode 100644 index 00000000000..0b2b9f4a685 --- /dev/null +++ b/storage/xtradb/include/rem0rec.ic @@ -0,0 +1,1652 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +Record manager + +Created 5/30/1994 Heikki Tuuri +*************************************************************************/ + +#include "mach0data.h" +#include "ut0byte.h" +#include "dict0dict.h" + +/* Compact flag ORed to the extra size returned by rec_get_offsets() */ +#define REC_OFFS_COMPACT ((ulint) 1 << 31) +/* SQL NULL flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) +/* External flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) +/* Mask for offsets returned by rec_get_offsets() */ +#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) + +/* Offsets of the bit-fields in an old-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. + + (1) byte offset (2) bit usage within byte + downward from + origin -> 1 8 bits pointer to next record + 2 8 bits pointer to next record + 3 1 bit short flag + 7 bits number of fields + 4 3 bits number of fields + 5 bits heap number + 5 8 bits heap number + 6 4 bits n_owned + 4 bits info bits +*/ + +/* Offsets of the bit-fields in a new-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. + + (1) byte offset (2) bit usage within byte + downward from + origin -> 1 8 bits relative offset of next record + 2 8 bits relative offset of next record + the relative offset is an unsigned 16-bit + integer: + (offset_of_next_record + - offset_of_this_record) mod 64Ki, + where mod is the modulo as a non-negative + number; + we can calculate the the offset of the next + record with the formula: + relative_offset + offset_of_this_record + mod UNIV_PAGE_SIZE + 3 3 bits status: + 000=conventional record + 001=node pointer record (inside B-tree) + 010=infimum record + 011=supremum record + 1xx=reserved + 5 bits heap number + 4 8 bits heap number + 5 4 bits n_owned + 4 bits info bits +*/ + +/* We list the byte offsets from the origin of the record, the mask, +and the shift needed to obtain each bit-field of the record. */ + +#define REC_NEXT 2 +#define REC_NEXT_MASK 0xFFFFUL +#define REC_NEXT_SHIFT 0 + +#define REC_OLD_SHORT 3 /* This is single byte bit-field */ +#define REC_OLD_SHORT_MASK 0x1UL +#define REC_OLD_SHORT_SHIFT 0 + +#define REC_OLD_N_FIELDS 4 +#define REC_OLD_N_FIELDS_MASK 0x7FEUL +#define REC_OLD_N_FIELDS_SHIFT 1 + +#define REC_NEW_STATUS 3 /* This is single byte bit-field */ +#define REC_NEW_STATUS_MASK 0x7UL +#define REC_NEW_STATUS_SHIFT 0 + +#define REC_OLD_HEAP_NO 5 +#define REC_HEAP_NO_MASK 0xFFF8UL +#if 0 /* defined in rem0rec.h for use of page0zip.c */ +#define REC_NEW_HEAP_NO 4 +#define REC_HEAP_NO_SHIFT 3 +#endif + +#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ +#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ +#define REC_N_OWNED_MASK 0xFUL +#define REC_N_OWNED_SHIFT 0 + +#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ +#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ +#define REC_INFO_BITS_MASK 0xF0UL +#define REC_INFO_BITS_SHIFT 0 + +/* The following masks are used to filter the SQL null bit from +one-byte and two-byte offsets */ + +#define REC_1BYTE_SQL_NULL_MASK 0x80UL +#define REC_2BYTE_SQL_NULL_MASK 0x8000UL + +/* In a 2-byte offset the second most significant bit denotes +a field stored to another page: */ + +#define REC_2BYTE_EXTERN_MASK 0x4000UL + +#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ + ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ + ^ 0xFFFFFFFFUL +# error "sum of old-style masks != 0xFFFFFFFFUL" +#endif +#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ + ^ 0xFFFFFFUL +# error "sum of new-style masks != 0xFFFFFFUL" +#endif + +/*************************************************************** +Sets the value of the ith field SQL null bit of an old-style record. */ +UNIV_INTERN +void +rec_set_nth_field_null_bit( +/*=======================*/ + rec_t* rec, /* in: record */ + ulint i, /* in: ith field */ + ibool val); /* in: value to set */ +/*************************************************************** +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ +UNIV_INTERN +void +rec_set_nth_field_sql_null( +/*=======================*/ + rec_t* rec, /* in: record */ + ulint n); /* in: index of the field */ + +/********************************************************** +Gets a bit field from within 1 byte. */ +UNIV_INLINE +ulint +rec_get_bit_field_1( +/*================*/ + const rec_t* rec, /* in: pointer to record origin */ + ulint offs, /* in: offset from the origin down */ + ulint mask, /* in: mask used to filter bits */ + ulint shift) /* in: shift right applied after masking */ +{ + ut_ad(rec); + + return((mach_read_from_1(rec - offs) & mask) >> shift); +} + +/********************************************************** +Sets a bit field within 1 byte. */ +UNIV_INLINE +void +rec_set_bit_field_1( +/*================*/ + rec_t* rec, /* in: pointer to record origin */ + ulint val, /* in: value to set */ + ulint offs, /* in: offset from the origin down */ + ulint mask, /* in: mask used to filter bits */ + ulint shift) /* in: shift right applied after masking */ +{ + ut_ad(rec); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); + ut_ad(mask); + ut_ad(mask <= 0xFFUL); + ut_ad(((mask >> shift) << shift) == mask); + ut_ad(((val << shift) & mask) == (val << shift)); + + mach_write_to_1(rec - offs, + (mach_read_from_1(rec - offs) & ~mask) + | (val << shift)); +} + +/********************************************************** +Gets a bit field from within 2 bytes. */ +UNIV_INLINE +ulint +rec_get_bit_field_2( +/*================*/ + const rec_t* rec, /* in: pointer to record origin */ + ulint offs, /* in: offset from the origin down */ + ulint mask, /* in: mask used to filter bits */ + ulint shift) /* in: shift right applied after masking */ +{ + ut_ad(rec); + + return((mach_read_from_2(rec - offs) & mask) >> shift); +} + +/********************************************************** +Sets a bit field within 2 bytes. */ +UNIV_INLINE +void +rec_set_bit_field_2( +/*================*/ + rec_t* rec, /* in: pointer to record origin */ + ulint val, /* in: value to set */ + ulint offs, /* in: offset from the origin down */ + ulint mask, /* in: mask used to filter bits */ + ulint shift) /* in: shift right applied after masking */ +{ + ut_ad(rec); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); + ut_ad(mask > 0xFFUL); + ut_ad(mask <= 0xFFFFUL); + ut_ad((mask >> shift) & 1); + ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); + ut_ad(((mask >> shift) << shift) == mask); + ut_ad(((val << shift) & mask) == (val << shift)); + + mach_write_to_2(rec - offs, + (mach_read_from_2(rec - offs) & ~mask) + | (val << shift)); +} + +/********************************************************** +The following function is used to get the pointer of the next chained record +on the same page. */ +UNIV_INLINE +const rec_t* +rec_get_next_ptr_const( +/*===================*/ + /* out: pointer to the next chained record, or + NULL if none */ + const rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + ulint field_value; + + ut_ad(REC_NEXT_MASK == 0xFFFFUL); + ut_ad(REC_NEXT_SHIFT == 0); + + field_value = mach_read_from_2(rec - REC_NEXT); + + if (UNIV_UNLIKELY(field_value == 0)) { + + return(NULL); + } + + if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { +#if UNIV_PAGE_SIZE <= 32768 + /* Note that for 64 KiB pages, field_value can 'wrap around' + and the debug assertion is not valid */ + + /* In the following assertion, field_value is interpreted + as signed 16-bit integer in 2's complement arithmetics. + If all platforms defined int16_t in the standard headers, + the expression could be written simpler as + (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE + */ + ut_ad((field_value >= 32768 + ? field_value - 65536 + : field_value) + + ut_align_offset(rec, UNIV_PAGE_SIZE) + < UNIV_PAGE_SIZE); +#endif + /* There must be at least REC_N_NEW_EXTRA_BYTES + 1 + between each record. */ + ut_ad((field_value > REC_N_NEW_EXTRA_BYTES + && field_value < 32768) + || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES); + + return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE) + + ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); + } else { + ut_ad(field_value < UNIV_PAGE_SIZE); + + return((byte*) ut_align_down(rec, UNIV_PAGE_SIZE) + + field_value); + } +} + +/********************************************************** +The following function is used to get the pointer of the next chained record +on the same page. */ +UNIV_INLINE +rec_t* +rec_get_next_ptr( +/*=============*/ + /* out: pointer to the next chained record, or + NULL if none */ + rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + return((rec_t*) rec_get_next_ptr_const(rec, comp)); +} + +/********************************************************** +The following function is used to get the offset of the next chained record +on the same page. */ +UNIV_INLINE +ulint +rec_get_next_offs( +/*==============*/ + /* out: the page offset of the next + chained record, or 0 if none */ + const rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + ulint field_value; +#if REC_NEXT_MASK != 0xFFFFUL +# error "REC_NEXT_MASK != 0xFFFFUL" +#endif +#if REC_NEXT_SHIFT +# error "REC_NEXT_SHIFT != 0" +#endif + + field_value = mach_read_from_2(rec - REC_NEXT); + + if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { +#if UNIV_PAGE_SIZE <= 32768 + /* Note that for 64 KiB pages, field_value can 'wrap around' + and the debug assertion is not valid */ + + /* In the following assertion, field_value is interpreted + as signed 16-bit integer in 2's complement arithmetics. + If all platforms defined int16_t in the standard headers, + the expression could be written simpler as + (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE + */ + ut_ad((field_value >= 32768 + ? field_value - 65536 + : field_value) + + ut_align_offset(rec, UNIV_PAGE_SIZE) + < UNIV_PAGE_SIZE); +#endif + if (UNIV_UNLIKELY(field_value == 0)) { + + return(0); + } + + /* There must be at least REC_N_NEW_EXTRA_BYTES + 1 + between each record. */ + ut_ad((field_value > REC_N_NEW_EXTRA_BYTES + && field_value < 32768) + || field_value < (uint16) -REC_N_NEW_EXTRA_BYTES); + + return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); + } else { + ut_ad(field_value < UNIV_PAGE_SIZE); + + return(field_value); + } +} + +/********************************************************** +The following function is used to set the next record offset field +of an old-style record. */ +UNIV_INLINE +void +rec_set_next_offs_old( +/*==================*/ + rec_t* rec, /* in: old-style physical record */ + ulint next) /* in: offset of the next record */ +{ + ut_ad(rec); + ut_ad(UNIV_PAGE_SIZE > next); +#if REC_NEXT_MASK != 0xFFFFUL +# error "REC_NEXT_MASK != 0xFFFFUL" +#endif +#if REC_NEXT_SHIFT +# error "REC_NEXT_SHIFT != 0" +#endif + + mach_write_to_2(rec - REC_NEXT, next); +} + +/********************************************************** +The following function is used to set the next record offset field +of a new-style record. */ +UNIV_INLINE +void +rec_set_next_offs_new( +/*==================*/ + rec_t* rec, /* in/out: new-style physical record */ + ulint next) /* in: offset of the next record */ +{ + ulint field_value; + + ut_ad(rec); + ut_ad(UNIV_PAGE_SIZE > next); + + if (UNIV_UNLIKELY(!next)) { + field_value = 0; + } else { + /* The following two statements calculate + next - offset_of_rec mod 64Ki, where mod is the modulo + as a non-negative number */ + + field_value = (ulint) + ((lint) next + - (lint) ut_align_offset(rec, UNIV_PAGE_SIZE)); + field_value &= REC_NEXT_MASK; + } + + mach_write_to_2(rec - REC_NEXT, field_value); +} + +/********************************************************** +The following function is used to get the number of fields +in an old-style record. */ +UNIV_INLINE +ulint +rec_get_n_fields_old( +/*=================*/ + /* out: number of data fields */ + const rec_t* rec) /* in: physical record */ +{ + ulint ret; + + ut_ad(rec); + + ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, + REC_OLD_N_FIELDS_SHIFT); + ut_ad(ret <= REC_MAX_N_FIELDS); + ut_ad(ret > 0); + + return(ret); +} + +/********************************************************** +The following function is used to set the number of fields +in an old-style record. */ +UNIV_INLINE +void +rec_set_n_fields_old( +/*=================*/ + rec_t* rec, /* in: physical record */ + ulint n_fields) /* in: the number of fields */ +{ + ut_ad(rec); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + ut_ad(n_fields > 0); + + rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); +} + +/********************************************************** +The following function retrieves the status bits of a new-style record. */ +UNIV_INLINE +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + const rec_t* rec) /* in: physical record */ +{ + ulint ret; + + ut_ad(rec); + + ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); + ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); + + return(ret); +} + +/********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + const rec_t* rec, /* in: physical record */ + const dict_index_t* index) /* in: record descriptor */ +{ + ut_ad(rec); + ut_ad(index); + + if (!dict_table_is_comp(index->table)) { + return(rec_get_n_fields_old(rec)); + } + + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + return(dict_index_get_n_fields(index)); + case REC_STATUS_NODE_PTR: + return(dict_index_get_n_unique_in_tree(index) + 1); + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + return(1); + default: + ut_error; + return(ULINT_UNDEFINED); + } +} + +/********************************************************** +The following function is used to get the number of records owned by the +previous directory record. */ +UNIV_INLINE +ulint +rec_get_n_owned_old( +/*================*/ + /* out: number of owned records */ + const rec_t* rec) /* in: old-style physical record */ +{ + return(rec_get_bit_field_1(rec, REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT)); +} + +/********************************************************** +The following function is used to set the number of owned records. */ +UNIV_INLINE +void +rec_set_n_owned_old( +/*================*/ + /* out: TRUE on success */ + rec_t* rec, /* in: old-style physical record */ + ulint n_owned) /* in: the number of owned */ +{ + rec_set_bit_field_1(rec, n_owned, REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); +} + +/********************************************************** +The following function is used to get the number of records owned by the +previous directory record. */ +UNIV_INLINE +ulint +rec_get_n_owned_new( +/*================*/ + /* out: number of owned records */ + const rec_t* rec) /* in: new-style physical record */ +{ + return(rec_get_bit_field_1(rec, REC_NEW_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT)); +} + +/********************************************************** +The following function is used to set the number of owned records. */ +UNIV_INLINE +void +rec_set_n_owned_new( +/*================*/ + rec_t* rec, /* in/out: new-style physical record */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint n_owned)/* in: the number of owned */ +{ + rec_set_bit_field_1(rec, n_owned, REC_NEW_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); + if (UNIV_LIKELY_NULL(page_zip) + && UNIV_LIKELY(rec_get_status(rec) + != REC_STATUS_SUPREMUM)) { + page_zip_rec_set_owned(page_zip, rec, n_owned); + } +} + +/********************************************************** +The following function is used to retrieve the info bits of a record. */ +UNIV_INLINE +ulint +rec_get_info_bits( +/*==============*/ + /* out: info bits */ + const rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + return(rec_get_bit_field_1( + rec, comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT)); +} + +/********************************************************** +The following function is used to set the info bits of a record. */ +UNIV_INLINE +void +rec_set_info_bits_old( +/*==================*/ + rec_t* rec, /* in: old-style physical record */ + ulint bits) /* in: info bits */ +{ + rec_set_bit_field_1(rec, bits, REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); +} +/********************************************************** +The following function is used to set the info bits of a record. */ +UNIV_INLINE +void +rec_set_info_bits_new( +/*==================*/ + rec_t* rec, /* in/out: new-style physical record */ + ulint bits) /* in: info bits */ +{ + rec_set_bit_field_1(rec, bits, REC_NEW_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); +} + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in/out: physical record */ + ulint bits) /* in: info bits */ +{ + rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); +} + +/********************************************************** +The following function is used to retrieve the info and status +bits of a record. (Only compact records have status bits.) */ +UNIV_INLINE +ulint +rec_get_info_and_status_bits( +/*=========================*/ + /* out: info bits */ + const rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + ulint bits; +#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ +& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) +# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" +#endif + if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec); + } else { + bits = rec_get_info_bits(rec, FALSE); + ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT))); + } + return(bits); +} +/********************************************************** +The following function is used to set the info and status +bits of a record. (Only compact records have status bits.) */ +UNIV_INLINE +void +rec_set_info_and_status_bits( +/*=========================*/ + rec_t* rec, /* in/out: physical record */ + ulint bits) /* in: info bits */ +{ +#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ +& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) +# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" +#endif + rec_set_status(rec, bits & REC_NEW_STATUS_MASK); + rec_set_info_bits_new(rec, bits & ~REC_NEW_STATUS_MASK); +} + +/********************************************************** +The following function tells if record is delete marked. */ +UNIV_INLINE +ulint +rec_get_deleted_flag( +/*=================*/ + /* out: nonzero if delete marked */ + const rec_t* rec, /* in: physical record */ + ulint comp) /* in: nonzero=compact page format */ +{ + if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { + return(UNIV_UNLIKELY( + rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, + REC_INFO_DELETED_FLAG, + REC_INFO_BITS_SHIFT))); + } else { + return(UNIV_UNLIKELY( + rec_get_bit_field_1(rec, REC_OLD_INFO_BITS, + REC_INFO_DELETED_FLAG, + REC_INFO_BITS_SHIFT))); + } +} + +/********************************************************** +The following function is used to set the deleted bit. */ +UNIV_INLINE +void +rec_set_deleted_flag_old( +/*=====================*/ + rec_t* rec, /* in: old-style physical record */ + ulint flag) /* in: nonzero if delete marked */ +{ + ulint val; + + val = rec_get_info_bits(rec, FALSE); + + if (flag) { + val |= REC_INFO_DELETED_FLAG; + } else { + val &= ~REC_INFO_DELETED_FLAG; + } + + rec_set_info_bits_old(rec, val); +} + +/********************************************************** +The following function is used to set the deleted bit. */ +UNIV_INLINE +void +rec_set_deleted_flag_new( +/*=====================*/ + rec_t* rec, /* in/out: new-style physical record */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + ulint flag) /* in: nonzero if delete marked */ +{ + ulint val; + + val = rec_get_info_bits(rec, TRUE); + + if (flag) { + val |= REC_INFO_DELETED_FLAG; + } else { + val &= ~REC_INFO_DELETED_FLAG; + } + + rec_set_info_bits_new(rec, val); + + if (UNIV_LIKELY_NULL(page_zip)) { + page_zip_rec_set_deleted(page_zip, rec, flag); + } +} + +/********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*==================*/ + /* out: TRUE if node pointer */ + const rec_t* rec) /* in: physical record */ +{ + return(REC_STATUS_NODE_PTR == rec_get_status(rec)); +} + +/********************************************************** +The following function is used to get the order number +of an old-style record in the heap of the index page. */ +UNIV_INLINE +ulint +rec_get_heap_no_old( +/*================*/ + /* out: heap order number */ + const rec_t* rec) /* in: physical record */ +{ + return(rec_get_bit_field_2(rec, REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT)); +} + +/********************************************************** +The following function is used to set the heap number +field in an old-style record. */ +UNIV_INLINE +void +rec_set_heap_no_old( +/*================*/ + rec_t* rec, /* in: physical record */ + ulint heap_no)/* in: the heap number */ +{ + rec_set_bit_field_2(rec, heap_no, REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); +} + +/********************************************************** +The following function is used to get the order number +of a new-style record in the heap of the index page. */ +UNIV_INLINE +ulint +rec_get_heap_no_new( +/*================*/ + /* out: heap order number */ + const rec_t* rec) /* in: physical record */ +{ + return(rec_get_bit_field_2(rec, REC_NEW_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT)); +} + +/********************************************************** +The following function is used to set the heap number +field in a new-style record. */ +UNIV_INLINE +void +rec_set_heap_no_new( +/*================*/ + rec_t* rec, /* in/out: physical record */ + ulint heap_no)/* in: the heap number */ +{ + rec_set_bit_field_2(rec, heap_no, REC_NEW_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); +} + +/********************************************************** +The following function is used to test whether the data offsets in the record +are stored in one-byte or two-byte format. */ +UNIV_INLINE +ibool +rec_get_1byte_offs_flag( +/*====================*/ + /* out: TRUE if 1-byte form */ + const rec_t* rec) /* in: physical record */ +{ +#if TRUE != 1 +#error "TRUE != 1" +#endif + + return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT)); +} + +/********************************************************** +The following function is used to set the 1-byte offsets flag. */ +UNIV_INLINE +void +rec_set_1byte_offs_flag( +/*====================*/ + rec_t* rec, /* in: physical record */ + ibool flag) /* in: TRUE if 1byte form */ +{ +#if TRUE != 1 +#error "TRUE != 1" +#endif + ut_ad(flag <= TRUE); + + rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT); +} + +/********************************************************** +Returns the offset of nth field end if the record is stored in the 1-byte +offsets form. If the field is SQL null, the flag is ORed in the returned +value. */ +UNIV_INLINE +ulint +rec_1_get_field_end_info( +/*=====================*/ + /* out: offset of the start of the + field, SQL null flag ORed */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(rec_get_1byte_offs_flag(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); + + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); +} + +/********************************************************** +Returns the offset of nth field end if the record is stored in the 2-byte +offsets form. If the field is SQL null, the flag is ORed in the returned +value. */ +UNIV_INLINE +ulint +rec_2_get_field_end_info( +/*=====================*/ + /* out: offset of the start of the + field, SQL null flag and extern + storage flag ORed */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(!rec_get_1byte_offs_flag(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); + + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); +} + +/* Get the base address of offsets. The extra_size is stored at +this position, and following positions hold the end offsets of +the fields. */ +#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) + +/************************************************************** +The following function returns the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +ulint +rec_offs_get_n_alloc( +/*=================*/ + /* out: number of elements */ + const ulint* offsets)/* in: array for rec_get_offsets() */ +{ + ulint n_alloc; + ut_ad(offsets); + n_alloc = offsets[0]; + ut_ad(n_alloc > REC_OFFS_HEADER_SIZE); + UNIV_MEM_ASSERT_W(offsets, n_alloc * sizeof *offsets); + return(n_alloc); +} + +/************************************************************** +The following function sets the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +void +rec_offs_set_n_alloc( +/*=================*/ + ulint* offsets, /* out: array for rec_get_offsets(), + must be allocated */ + ulint n_alloc) /* in: number of elements */ +{ + ut_ad(offsets); + ut_ad(n_alloc > REC_OFFS_HEADER_SIZE); + UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets); + offsets[0] = n_alloc; +} + +/************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*==============*/ + /* out: number of fields */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint n_fields; + ut_ad(offsets); + n_fields = offsets[1]; + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + ut_ad(n_fields + REC_OFFS_HEADER_SIZE + <= rec_offs_get_n_alloc(offsets)); + return(n_fields); +} + +/**************************************************************** +Validates offsets returned by rec_get_offsets(). */ +UNIV_INLINE +ibool +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + const rec_t* rec, /* in: record or NULL */ + const dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets)/* in: array returned by + rec_get_offsets() */ +{ + ulint i = rec_offs_n_fields(offsets); + ulint last = ULINT_MAX; + ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT; + + if (rec) { + ut_ad((ulint) rec == offsets[2]); + if (!comp) { + ut_a(rec_get_n_fields_old(rec) >= i); + } + } + if (index) { + ulint max_n_fields; + ut_ad((ulint) index == offsets[3]); + max_n_fields = ut_max( + dict_index_get_n_fields(index), + dict_index_get_n_unique_in_tree(index) + 1); + if (comp && rec) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + break; + case REC_STATUS_NODE_PTR: + max_n_fields = dict_index_get_n_unique_in_tree( + index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + max_n_fields = 1; + break; + default: + ut_error; + } + } + /* index->n_def == 0 for dummy indexes if !comp */ + ut_a(!comp || index->n_def); + ut_a(!index->n_def || i <= max_n_fields); + } + while (i--) { + ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; + ut_a(curr <= last); + last = curr; + } + return(TRUE); +} +#ifdef UNIV_DEBUG +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec, /* in: record */ + const dict_index_t* index, /* in: record descriptor */ + ulint* offsets)/* in: array returned by + rec_get_offsets() */ +{ + ut_ad(rec); + ut_ad(index); + ut_ad(offsets); + ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets)); + offsets[2] = (ulint) rec; + offsets[3] = (ulint) index; +} +#endif /* UNIV_DEBUG */ + +/**************************************************************** +The following function is used to get an offset to the nth +data field in a record. */ +UNIV_INLINE +ulint +rec_get_nth_field_offs( +/*===================*/ + /* out: offset from the origin of rec */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len) /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +{ + ulint offs; + ulint length; + ut_ad(n < rec_offs_n_fields(offsets)); + ut_ad(len); + + if (UNIV_UNLIKELY(n == 0)) { + offs = 0; + } else { + offs = rec_offs_base(offsets)[n] & REC_OFFS_MASK; + } + + length = rec_offs_base(offsets)[1 + n]; + + if (length & REC_OFFS_SQL_NULL) { + length = UNIV_SQL_NULL; + } else { + length &= REC_OFFS_MASK; + length -= offs; + } + + *len = length; + return(offs); +} + +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ +UNIV_INLINE +ulint +rec_offs_comp( +/*==========*/ + /* out: nonzero if compact format */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + return(*rec_offs_base(offsets) & REC_OFFS_COMPACT); +} + +/********************************************************** +Determine if the offsets are for a record containing +externally stored columns. */ +UNIV_INLINE +ulint +rec_offs_any_extern( +/*================*/ + /* out: nonzero if externally stored */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + return(UNIV_UNLIKELY(*rec_offs_base(offsets) & REC_OFFS_EXTERNAL)); +} + +/********************************************************** +Returns nonzero if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ulint +rec_offs_nth_extern( +/*================*/ + /* out: nonzero if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] + & REC_OFFS_EXTERNAL)); +} + +/********************************************************** +Returns nonzero if the SQL NULL bit is set in nth field of rec. */ +UNIV_INLINE +ulint +rec_offs_nth_sql_null( +/*==================*/ + /* out: nonzero if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] + & REC_OFFS_SQL_NULL)); +} + +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + if (!n) { + return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK); + } + return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n]) + & REC_OFFS_MASK); +} + +/********************************************************** +Returns the number of extern bits set in a record. */ +UNIV_INLINE +ulint +rec_offs_n_extern( +/*==============*/ + /* out: number of externally stored fields */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint n = 0; + + if (rec_offs_any_extern(offsets)) { + ulint i; + + for (i = rec_offs_n_fields(offsets); i--; ) { + if (rec_offs_nth_extern(offsets, i)) { + n++; + } + } + } + + return(n); +} + +/********************************************************** +Returns the offset of n - 1th field end if the record is stored in the 1-byte +offsets form. If the field is SQL null, the flag is ORed in the returned +value. This function and the 2-byte counterpart are defined here because the +C-compiler was not able to sum negative and positive constant offsets, and +warned of constant arithmetic overflow within the compiler. */ +UNIV_INLINE +ulint +rec_1_get_prev_field_end_info( +/*==========================*/ + /* out: offset of the start of the + PREVIOUS field, SQL null flag ORed */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(rec_get_1byte_offs_flag(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); + + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); +} + +/********************************************************** +Returns the offset of n - 1th field end if the record is stored in the 2-byte +offsets form. If the field is SQL null, the flag is ORed in the returned +value. */ +UNIV_INLINE +ulint +rec_2_get_prev_field_end_info( +/*==========================*/ + /* out: offset of the start of the + PREVIOUS field, SQL null flag ORed */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(!rec_get_1byte_offs_flag(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); + + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); +} + +/********************************************************** +Sets the field end info for the nth field if the record is stored in the +1-byte format. */ +UNIV_INLINE +void +rec_1_set_field_end_info( +/*=====================*/ + rec_t* rec, /* in: record */ + ulint n, /* in: field index */ + ulint info) /* in: value to set */ +{ + ut_ad(rec_get_1byte_offs_flag(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); + + mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); +} + +/********************************************************** +Sets the field end info for the nth field if the record is stored in the +2-byte format. */ +UNIV_INLINE +void +rec_2_set_field_end_info( +/*=====================*/ + rec_t* rec, /* in: record */ + ulint n, /* in: field index */ + ulint info) /* in: value to set */ +{ + ut_ad(!rec_get_1byte_offs_flag(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); + + mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); +} + +/********************************************************** +Returns the offset of nth field start if the record is stored in the 1-byte +offsets form. */ +UNIV_INLINE +ulint +rec_1_get_field_start_offs( +/*=======================*/ + /* out: offset of the start of the field */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(rec_get_1byte_offs_flag(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); + + if (n == 0) { + + return(0); + } + + return(rec_1_get_prev_field_end_info(rec, n) + & ~REC_1BYTE_SQL_NULL_MASK); +} + +/********************************************************** +Returns the offset of nth field start if the record is stored in the 2-byte +offsets form. */ +UNIV_INLINE +ulint +rec_2_get_field_start_offs( +/*=======================*/ + /* out: offset of the start of the field */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(!rec_get_1byte_offs_flag(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); + + if (n == 0) { + + return(0); + } + + return(rec_2_get_prev_field_end_info(rec, n) + & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK)); +} + +/********************************************************** +The following function is used to read the offset of the start of a data field +in the record. The start of an SQL null field is the end offset of the +previous non-null field, or 0, if none exists. If n is the number of the last +field + 1, then the end offset of the last field is returned. */ +UNIV_INLINE +ulint +rec_get_field_start_offs( +/*=====================*/ + /* out: offset of the start of the field */ + const rec_t* rec, /* in: record */ + ulint n) /* in: field index */ +{ + ut_ad(rec); + ut_ad(n <= rec_get_n_fields_old(rec)); + + if (n == 0) { + + return(0); + } + + if (rec_get_1byte_offs_flag(rec)) { + + return(rec_1_get_field_start_offs(rec, n)); + } + + return(rec_2_get_field_start_offs(rec, n)); +} + +/**************************************************************** +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ +UNIV_INLINE +ulint +rec_get_nth_field_size( +/*===================*/ + /* out: field size in bytes */ + const rec_t* rec, /* in: record */ + ulint n) /* in: index of the field */ +{ + ulint os; + ulint next_os; + + os = rec_get_field_start_offs(rec, n); + next_os = rec_get_field_start_offs(rec, n + 1); + + ut_ad(next_os - os < UNIV_PAGE_SIZE); + + return(next_os - os); +} + +/*************************************************************** +This is used to modify the value of an already existing field in a record. +The previous value must have exactly the same size as the new value. If len +is UNIV_SQL_NULL then the field is treated as an SQL null. +For records in ROW_FORMAT=COMPACT (new-style records), len must not be +UNIV_SQL_NULL unless the field already is SQL null. */ +UNIV_INLINE +void +rec_set_nth_field( +/*==============*/ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data + if not SQL null */ + ulint len) /* in: length of the data or UNIV_SQL_NULL */ +{ + byte* data2; + ulint len2; + + ut_ad(rec); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { + if (!rec_offs_nth_sql_null(offsets, n)) { + ut_a(!rec_offs_comp(offsets)); + rec_set_nth_field_sql_null(rec, n); + } + + return; + } + + data2 = rec_get_nth_field(rec, offsets, n, &len2); + if (len2 == UNIV_SQL_NULL) { + ut_ad(!rec_offs_comp(offsets)); + rec_set_nth_field_null_bit(rec, n, FALSE); + ut_ad(len == rec_get_nth_field_size(rec, n)); + } else { + ut_ad(len2 == len); + } + + ut_memcpy(data2, data, len); +} + +/************************************************************** +The following function returns the data size of an old-style physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_get_data_size_old( +/*==================*/ + /* out: size */ + const rec_t* rec) /* in: physical record */ +{ + ut_ad(rec); + + return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); +} + +/************************************************************** +The following function sets the number of fields in offsets. */ +UNIV_INLINE +void +rec_offs_set_n_fields( +/*==================*/ + ulint* offsets, /* in/out: array returned by + rec_get_offsets() */ + ulint n_fields) /* in: number of fields */ +{ + ut_ad(offsets); + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + ut_ad(n_fields + REC_OFFS_HEADER_SIZE + <= rec_offs_get_n_alloc(offsets)); + offsets[1] = n_fields; +} + +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint size; + + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] + & REC_OFFS_MASK; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); +} + +/************************************************************** +Returns the total size of record minus data size of record. The value +returned by the function is the distance from record start to record origin +in bytes. */ +UNIV_INLINE +ulint +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint size; + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = *rec_offs_base(offsets) & ~(REC_OFFS_COMPACT | REC_OFFS_EXTERNAL); + ut_ad(size < UNIV_PAGE_SIZE); + return(size); +} + +/************************************************************** +Returns the total size of a physical record. */ +UNIV_INLINE +ulint +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); +} + +/************************************************************** +Returns a pointer to the end of the record. */ +UNIV_INLINE +byte* +rec_get_end( +/*========*/ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ut_ad(rec_offs_validate(rec, NULL, offsets)); + return(rec + rec_offs_data_size(offsets)); +} + +/************************************************************** +Returns a pointer to the start of the record. */ +UNIV_INLINE +byte* +rec_get_start( +/*==========*/ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ut_ad(rec_offs_validate(rec, NULL, offsets)); + return(rec - rec_offs_extra_size(offsets)); +} + +/******************************************************************* +Copies a physical record to a buffer. */ +UNIV_INLINE +rec_t* +rec_copy( +/*=====*/ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint extra_len; + ulint data_len; + + ut_ad(rec && buf); + ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets)); + ut_ad(rec_validate(rec, offsets)); + + extra_len = rec_offs_extra_size(offsets); + data_len = rec_offs_data_size(offsets); + + ut_memcpy(buf, rec - extra_len, extra_len + data_len); + + return((byte*)buf + extra_len); +} + +/************************************************************** +Returns the extra size of an old-style physical record if we know its +data size and number of fields. */ +UNIV_INLINE +ulint +rec_get_converted_extra_size( +/*=========================*/ + /* out: extra size */ + ulint data_size, /* in: data size */ + ulint n_fields, /* in: number of fields */ + ulint n_ext) /* in: number of externally stored columns */ +{ + if (!n_ext && data_size <= REC_1BYTE_OFFS_LIMIT) { + + return(REC_N_OLD_EXTRA_BYTES + n_fields); + } + + return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); +} + +/************************************************************** +The following function returns the size of a data tuple when converted to +a physical record. */ +UNIV_INLINE +ulint +rec_get_converted_size( +/*===================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + const dtuple_t* dtuple, /* in: data tuple */ + ulint n_ext) /* in: number of externally stored columns */ +{ + ulint data_size; + ulint extra_size; + + ut_ad(index); + ut_ad(dtuple); + ut_ad(dtuple_check_typed(dtuple)); + + ut_ad(index->type & DICT_UNIVERSAL + || dtuple_get_n_fields(dtuple) + == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) + == REC_STATUS_NODE_PTR) + ? dict_index_get_n_unique_in_tree(index) + 1 + : dict_index_get_n_fields(index))); + + if (dict_table_is_comp(index->table)) { + return(rec_get_converted_size_comp(index, + dtuple_get_info_bits(dtuple) + & REC_NEW_STATUS_MASK, + dtuple->fields, + dtuple->n_fields, NULL)); + } + + data_size = dtuple_get_data_size(dtuple); + + extra_size = rec_get_converted_extra_size( + data_size, dtuple_get_n_fields(dtuple), n_ext); + + return(data_size + extra_size); +} + +/**************************************************************** +Folds a prefix of a physical record to a ulint. Folds only existing fields, +that is, checks that we do not run out of the record. */ +UNIV_INLINE +ulint +rec_fold( +/*=====*/ + /* out: the folded value */ + const rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id) /* in: index tree id */ +{ + ulint i; + const byte* data; + ulint len; + ulint fold; + ulint n_fields_rec; + + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_validate(rec, offsets)); + ut_ad(n_fields + n_bytes > 0); + + n_fields_rec = rec_offs_n_fields(offsets); + ut_ad(n_fields <= n_fields_rec); + ut_ad(n_fields < n_fields_rec || n_bytes == 0); + + if (n_fields > n_fields_rec) { + n_fields = n_fields_rec; + } + + if (n_fields == n_fields_rec) { + n_bytes = 0; + } + + fold = ut_fold_dulint(tree_id); + + for (i = 0; i < n_fields; i++) { + data = rec_get_nth_field(rec, offsets, i, &len); + + if (len != UNIV_SQL_NULL) { + fold = ut_fold_ulint_pair(fold, + ut_fold_binary(data, len)); + } + } + + if (n_bytes > 0) { + data = rec_get_nth_field(rec, offsets, i, &len); + + if (len != UNIV_SQL_NULL) { + if (len > n_bytes) { + len = n_bytes; + } + + fold = ut_fold_ulint_pair(fold, + ut_fold_binary(data, len)); + } + } + + return(fold); +} diff --git a/storage/xtradb/include/rem0types.h b/storage/xtradb/include/rem0types.h new file mode 100644 index 00000000000..d0b11b92495 --- /dev/null +++ b/storage/xtradb/include/rem0types.h @@ -0,0 +1,45 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/************************************************************************ +Record manager global types + +Created 5/30/1994 Heikki Tuuri +*************************************************************************/ + +#ifndef rem0types_h +#define rem0types_h + +/* We define the physical record simply as an array of bytes */ +typedef byte rec_t; + +/* Maximum values for various fields (for non-blob tuples) */ +#define REC_MAX_N_FIELDS (1024 - 1) +#define REC_MAX_HEAP_NO (2 * 8192 - 1) +#define REC_MAX_N_OWNED (16 - 1) + +/* REC_MAX_INDEX_COL_LEN is measured in bytes and is the maximum +indexed column length (or indexed prefix length). It is set to 3*256, +so that one can create a column prefix index on 256 characters of a +TEXT or VARCHAR column also in the UTF-8 charset. In that charset, +a character may take at most 3 bytes. +This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data +files would be at risk! */ +#define REC_MAX_INDEX_COL_LEN 768 + +#endif diff --git a/storage/xtradb/include/row0ext.h b/storage/xtradb/include/row0ext.h new file mode 100644 index 00000000000..08ebafa4d98 --- /dev/null +++ b/storage/xtradb/include/row0ext.h @@ -0,0 +1,98 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Caching of externally stored column prefixes + +Created September 2006 Marko Makela +*******************************************************/ + +#ifndef row0ext_h +#define row0ext_h + +#include "univ.i" +#include "row0types.h" +#include "data0types.h" +#include "mem0mem.h" + +/************************************************************************ +Creates a cache of column prefixes of externally stored columns. */ +UNIV_INTERN +row_ext_t* +row_ext_create( +/*===========*/ + /* out,own: column prefix cache */ + ulint n_ext, /* in: number of externally stored columns */ + const ulint* ext, /* in: col_no's of externally stored columns + in the InnoDB table object, as reported by + dict_col_get_no(); NOT relative to the records + in the clustered index */ + const dtuple_t* tuple, /* in: data tuple containing the field + references of the externally stored + columns; must be indexed by col_no; + the clustered index record must be + covered by a lock or a page latch + to prevent deletion (rollback or purge). */ + ulint zip_size,/* compressed page size in bytes, or 0 */ + mem_heap_t* heap); /* in: heap where created */ + +/************************************************************************ +Looks up a column prefix of an externally stored column. */ +UNIV_INLINE +const byte* +row_ext_lookup_ith( +/*===============*/ + /* out: column prefix, or NULL if + the column is not stored externally, + or pointer to field_ref_zero + if the BLOB pointer is unset */ + const row_ext_t* ext, /* in/out: column prefix cache */ + ulint i, /* in: index of ext->ext[] */ + ulint* len); /* out: length of prefix, in bytes, + at most REC_MAX_INDEX_COL_LEN */ +/************************************************************************ +Looks up a column prefix of an externally stored column. */ +UNIV_INLINE +const byte* +row_ext_lookup( +/*===========*/ + /* out: column prefix, or NULL if + the column is not stored externally, + or pointer to field_ref_zero + if the BLOB pointer is unset */ + const row_ext_t* ext, /* in: column prefix cache */ + ulint col, /* in: column number in the InnoDB + table object, as reported by + dict_col_get_no(); NOT relative to the + records in the clustered index */ + ulint* len); /* out: length of prefix, in bytes, + at most REC_MAX_INDEX_COL_LEN */ + +/* Prefixes of externally stored columns */ +struct row_ext_struct{ + ulint n_ext; /* number of externally stored columns */ + const ulint* ext; /* col_no's of externally stored columns */ + byte* buf; /* backing store of the column prefix cache */ + ulint len[1]; /* prefix lengths; 0 if not cached */ +}; + +#ifndef UNIV_NONINL +#include "row0ext.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0ext.ic b/storage/xtradb/include/row0ext.ic new file mode 100644 index 00000000000..e56fc175764 --- /dev/null +++ b/storage/xtradb/include/row0ext.ic @@ -0,0 +1,87 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Caching of externally stored column prefixes + +Created September 2006 Marko Makela +*******************************************************/ + +#include "rem0types.h" +#include "btr0types.h" + +/************************************************************************ +Looks up a column prefix of an externally stored column. */ +UNIV_INLINE +const byte* +row_ext_lookup_ith( +/*===============*/ + /* out: column prefix, or NULL if + the column is not stored externally, + or pointer to field_ref_zero + if the BLOB pointer is unset */ + const row_ext_t* ext, /* in/out: column prefix cache */ + ulint i, /* in: index of ext->ext[] */ + ulint* len) /* out: length of prefix, in bytes, + at most REC_MAX_INDEX_COL_LEN */ +{ + ut_ad(ext); + ut_ad(len); + ut_ad(i < ext->n_ext); + + *len = ext->len[i]; + + if (UNIV_UNLIKELY(*len == 0)) { + /* The BLOB could not be fetched to the cache. */ + return(field_ref_zero); + } else { + return(ext->buf + i * REC_MAX_INDEX_COL_LEN); + } +} + +/************************************************************************ +Looks up a column prefix of an externally stored column. */ +UNIV_INLINE +const byte* +row_ext_lookup( +/*===========*/ + /* out: column prefix, or NULL if + the column is not stored externally, + or pointer to field_ref_zero + if the BLOB pointer is unset */ + const row_ext_t* ext, /* in: column prefix cache */ + ulint col, /* in: column number in the InnoDB + table object, as reported by + dict_col_get_no(); NOT relative to the + records in the clustered index */ + ulint* len) /* out: length of prefix, in bytes, + at most REC_MAX_INDEX_COL_LEN */ +{ + ulint i; + + ut_ad(ext); + ut_ad(len); + + for (i = 0; i < ext->n_ext; i++) { + if (col == ext->ext[i]) { + return(row_ext_lookup_ith(ext, i, len)); + } + } + + return(NULL); +} diff --git a/storage/xtradb/include/row0ins.h b/storage/xtradb/include/row0ins.h new file mode 100644 index 00000000000..6aa83bed0f6 --- /dev/null +++ b/storage/xtradb/include/row0ins.h @@ -0,0 +1,157 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Insert into a table + +Created 4/20/1996 Heikki Tuuri +*******************************************************/ + +#ifndef row0ins_h +#define row0ins_h + +#include "univ.i" +#include "data0data.h" +#include "que0types.h" +#include "dict0types.h" +#include "trx0types.h" +#include "row0types.h" + +/******************************************************************* +Checks if foreign key constraint fails for an index entry. Sets shared locks +which lock either the success or the failure of the constraint. NOTE that +the caller must have a shared latch on dict_foreign_key_check_lock. */ +UNIV_INTERN +ulint +row_ins_check_foreign_constraint( +/*=============================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_NO_REFERENCED_ROW, + or DB_ROW_IS_REFERENCED */ + ibool check_ref,/* in: TRUE If we want to check that + the referenced table is ok, FALSE if we + want to to check the foreign key table */ + dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the + tables mentioned in it must be in the + dictionary cache if they exist at all */ + dict_table_t* table, /* in: if check_ref is TRUE, then the foreign + table, else the referenced table */ + dtuple_t* entry, /* in: index entry for index */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Creates an insert node struct. */ +UNIV_INTERN +ins_node_t* +ins_node_create( +/*============*/ + /* out, own: insert node struct */ + ulint ins_type, /* in: INS_VALUES, ... */ + dict_table_t* table, /* in: table where to insert */ + mem_heap_t* heap); /* in: mem heap where created */ +/************************************************************************* +Sets a new row to insert for an INS_DIRECT node. This function is only used +if we have constructed the row separately, which is a rare case; this +function is quite slow. */ +UNIV_INTERN +void +ins_node_set_new_row( +/*=================*/ + ins_node_t* node, /* in: insert node */ + dtuple_t* row); /* in: new row (or first row) for the node */ +/******************************************************************* +Inserts an index entry to index. Tries first optimistic, then pessimistic +descent down the tree. If the entry matches enough to a delete marked record, +performs the insert by updating or delete unmarking the delete marked +record. */ +UNIV_INTERN +ulint +row_ins_index_entry( +/*================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DUPLICATE_KEY, or some other error code */ + dict_index_t* index, /* in: index */ + dtuple_t* entry, /* in: index entry to insert */ + ulint n_ext, /* in: number of externally stored columns */ + ibool foreign,/* in: TRUE=check foreign key constraints */ + que_thr_t* thr); /* in: query thread */ +/*************************************************************** +Inserts a row to a table. This is a high-level function used in +SQL execution graphs. */ +UNIV_INTERN +que_thr_t* +row_ins_step( +/*=========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/*************************************************************** +Creates an entry template for each index of a table. */ +UNIV_INTERN +void +ins_node_create_entry_list( +/*=======================*/ + ins_node_t* node); /* in: row insert node */ + +/* Insert node structure */ + +struct ins_node_struct{ + que_common_t common; /* node type: QUE_NODE_INSERT */ + ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */ + dtuple_t* row; /* row to insert */ + dict_table_t* table; /* table where to insert */ + sel_node_t* select; /* select in searched insert */ + que_node_t* values_list;/* list of expressions to evaluate and + insert in an INS_VALUES insert */ + ulint state; /* node execution state */ + dict_index_t* index; /* NULL, or the next index where the index + entry should be inserted */ + dtuple_t* entry; /* NULL, or entry to insert in the index; + after a successful insert of the entry, + this should be reset to NULL */ + UT_LIST_BASE_NODE_T(dtuple_t) + entry_list;/* list of entries, one for each index */ + byte* row_id_buf;/* buffer for the row id sys field in row */ + dulint trx_id; /* trx id or the last trx which executed the + node */ + byte* trx_id_buf;/* buffer for the trx id sys field in row */ + mem_heap_t* entry_sys_heap; + /* memory heap used as auxiliary storage; + entry_list and sys fields are stored here; + if this is NULL, entry list should be created + and buffers for sys fields in row allocated */ + ulint magic_n; +}; + +#define INS_NODE_MAGIC_N 15849075 + +/* Insert node types */ +#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */ +#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */ +#define INS_DIRECT 2 /* this is for internal use in dict0crea: + insert the row directly */ + +/* Node execution states */ +#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */ +#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */ +#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and + inserted */ + +#ifndef UNIV_NONINL +#include "row0ins.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0ins.ic b/storage/xtradb/include/row0ins.ic new file mode 100644 index 00000000000..b7aeaf97834 --- /dev/null +++ b/storage/xtradb/include/row0ins.ic @@ -0,0 +1,25 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Insert into a table + +Created 4/20/1996 Heikki Tuuri +*******************************************************/ + + diff --git a/storage/xtradb/include/row0merge.h b/storage/xtradb/include/row0merge.h new file mode 100644 index 00000000000..9975497cbeb --- /dev/null +++ b/storage/xtradb/include/row0merge.h @@ -0,0 +1,198 @@ +/***************************************************************************** + +Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Index build routines using a merge sort + +Created 13/06/2005 Jan Lindstrom +*******************************************************/ + +#ifndef row0merge_h +#define row0merge_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "mtr0mtr.h" +#include "rem0types.h" +#include "rem0rec.h" +#include "read0types.h" +#include "btr0types.h" +#include "row0mysql.h" +#include "lock0types.h" + +/* This structure holds index field definitions */ + +struct merge_index_field_struct { + ulint prefix_len; /* Prefix len */ + const char* field_name; /* Field name */ +}; + +typedef struct merge_index_field_struct merge_index_field_t; + +/* This structure holds index definitions */ + +struct merge_index_def_struct { + const char* name; /* Index name */ + ulint ind_type; /* 0, DICT_UNIQUE, + or DICT_CLUSTERED */ + ulint n_fields; /* Number of fields in index */ + merge_index_field_t* fields; /* Field definitions */ +}; + +typedef struct merge_index_def_struct merge_index_def_t; + +/************************************************************************* +Sets an exclusive lock on a table, for the duration of creating indexes. */ +UNIV_INTERN +ulint +row_merge_lock_table( +/*=================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in/out: transaction */ + dict_table_t* table, /* in: table to lock */ + enum lock_mode mode); /* in: LOCK_X or LOCK_S */ +/************************************************************************* +Drop an index from the InnoDB system tables. The data dictionary must +have been locked exclusively by the caller, because the transaction +will not be committed. */ +UNIV_INTERN +void +row_merge_drop_index( +/*=================*/ + dict_index_t* index, /* in: index to be removed */ + dict_table_t* table, /* in: table */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Drop those indexes which were created before an error occurred when +building an index. The data dictionary must have been locked +exclusively by the caller, because the transaction will not be +committed. */ +UNIV_INTERN +void +row_merge_drop_indexes( +/*===================*/ + trx_t* trx, /* in: transaction */ + dict_table_t* table, /* in: table containing the indexes */ + dict_index_t** index, /* in: indexes to drop */ + ulint num_created); /* in: number of elements in index[] */ +/************************************************************************* +Drop all partially created indexes during crash recovery. */ +UNIV_INTERN +void +row_merge_drop_temp_indexes(void); +/*=============================*/ +/************************************************************************* +Rename the tables in the data dictionary. The data dictionary must +have been locked exclusively by the caller, because the transaction +will not be committed. */ +UNIV_INTERN +ulint +row_merge_rename_tables( +/*====================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* old_table, /* in/out: old table, renamed to + tmp_name */ + dict_table_t* new_table, /* in/out: new table, renamed to + old_table->name */ + const char* tmp_name, /* in: new name for old_table */ + trx_t* trx); /* in: transaction handle */ + +/************************************************************************* +Create a temporary table for creating a primary key, using the definition +of an existing table. */ +UNIV_INTERN +dict_table_t* +row_merge_create_temporary_table( +/*=============================*/ + /* out: table, + or NULL on error */ + const char* table_name, /* in: new table name */ + const merge_index_def_t*index_def, /* in: the index definition + of the primary key */ + const dict_table_t* table, /* in: old table definition */ + trx_t* trx); /* in/out: transaction + (sets error_state) */ +/************************************************************************* +Rename the temporary indexes in the dictionary to permanent ones. The +data dictionary must have been locked exclusively by the caller, +because the transaction will not be committed. */ +UNIV_INTERN +ulint +row_merge_rename_indexes( +/*=====================*/ + /* out: DB_SUCCESS if all OK */ + trx_t* trx, /* in/out: transaction */ + dict_table_t* table); /* in/out: table with new indexes */ +/************************************************************************* +Create the index and load in to the dictionary. */ +UNIV_INTERN +dict_index_t* +row_merge_create_index( +/*===================*/ + /* out: index, or NULL on error */ + trx_t* trx, /* in/out: trx (sets error_state) */ + dict_table_t* table, /* in: the index is on this table */ + const merge_index_def_t* /* in: the index definition */ + index_def); +#ifdef ROW_MERGE_IS_INDEX_USABLE +/************************************************************************* +Check if a transaction can use an index. */ +UNIV_INTERN +ibool +row_merge_is_index_usable( +/*======================*/ + /* out: TRUE if index can be used by + the transaction else FALSE*/ + const trx_t* trx, /* in: transaction */ + const dict_index_t* index); /* in: index to check */ +#endif /* ROW_MERGE_IS_INDEX_USABLE */ +/************************************************************************* +If there are views that refer to the old table name then we "attach" to +the new instance of the table else we drop it immediately. */ +UNIV_INTERN +ulint +row_merge_drop_table( +/*=================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* table); /* in: table instance to drop */ + +/************************************************************************* +Build indexes on a table by reading a clustered index, +creating a temporary file containing index entries, merge sorting +these index entries and inserting sorted index entries to indexes. */ +UNIV_INTERN +ulint +row_merge_build_indexes( +/*====================*/ + /* out: DB_SUCCESS or error code */ + trx_t* trx, /* in: transaction */ + dict_table_t* old_table, /* in: table where rows are + read from */ + dict_table_t* new_table, /* in: table where indexes are + created; identical to old_table + unless creating a PRIMARY KEY */ + dict_index_t** indexes, /* in: indexes to be created */ + ulint n_indexes, /* in: size of indexes[] */ + TABLE* table); /* in/out: MySQL table, for + reporting erroneous key value + if applicable */ +#endif /* row0merge.h */ diff --git a/storage/xtradb/include/row0mysql.h b/storage/xtradb/include/row0mysql.h new file mode 100644 index 00000000000..c1e11124a5d --- /dev/null +++ b/storage/xtradb/include/row0mysql.h @@ -0,0 +1,769 @@ +/***************************************************************************** + +Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Interface between Innobase row operations and MySQL. +Contains also create table and other data dictionary operations. + +Created 9/17/2000 Heikki Tuuri +*******************************************************/ + +#ifndef row0mysql_h +#define row0mysql_h + +#include "univ.i" +#include "data0data.h" +#include "que0types.h" +#include "dict0types.h" +#include "trx0types.h" +#include "row0types.h" +#include "btr0pcur.h" +#include "trx0types.h" + +extern ibool row_rollback_on_timeout; + +typedef struct row_prebuilt_struct row_prebuilt_t; + +/*********************************************************************** +Frees the blob heap in prebuilt when no longer needed. */ +UNIV_INTERN +void +row_mysql_prebuilt_free_blob_heap( +/*==============================*/ + row_prebuilt_t* prebuilt); /* in: prebuilt struct of a + ha_innobase:: table handle */ +/*********************************************************************** +Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row +format. */ +UNIV_INTERN +byte* +row_mysql_store_true_var_len( +/*=========================*/ + /* out: pointer to the data, we skip the 1 or 2 bytes + at the start that are used to store the len */ + byte* dest, /* in: where to store */ + ulint len, /* in: length, must fit in two bytes */ + ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */ +/*********************************************************************** +Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and +returns a pointer to the data. */ + +const byte* +row_mysql_read_true_varchar( +/*========================*/ + /* out: pointer to the data, we skip + the 1 or 2 bytes at the start that are + used to store the len */ + ulint* len, /* out: variable-length field length */ + const byte* field, /* in: field in the MySQL format */ + ulint lenlen);/* in: storage length of len: either 1 + or 2 bytes */ +/*********************************************************************** +Stores a reference to a BLOB in the MySQL format. */ +UNIV_INTERN +void +row_mysql_store_blob_ref( +/*=====================*/ + byte* dest, /* in: where to store */ + ulint col_len,/* in: dest buffer size: determines into + how many bytes the BLOB length is stored, + the space for the length may vary from 1 + to 4 bytes */ + const void* data, /* in: BLOB data; if the value to store + is SQL NULL this should be NULL pointer */ + ulint len); /* in: BLOB length; if the value to store + is SQL NULL this should be 0; remember + also to set the NULL bit in the MySQL record + header! */ +/*********************************************************************** +Reads a reference to a BLOB in the MySQL format. */ + +const byte* +row_mysql_read_blob_ref( +/*====================*/ + /* out: pointer to BLOB data */ + ulint* len, /* out: BLOB length */ + const byte* ref, /* in: BLOB reference in the + MySQL format */ + ulint col_len); /* in: BLOB reference length + (not BLOB length) */ +/****************************************************************** +Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. +The counterpart of this function is row_sel_field_store_in_mysql_format() in +row0sel.c. */ +UNIV_INTERN +byte* +row_mysql_store_col_in_innobase_format( +/*===================================*/ + /* out: up to which byte we used + buf in the conversion */ + dfield_t* dfield, /* in/out: dfield where dtype + information must be already set when + this function is called! */ + byte* buf, /* in/out: buffer for a converted + integer value; this must be at least + col_len long then! */ + ibool row_format_col, /* TRUE if the mysql_data is from + a MySQL row, FALSE if from a MySQL + key value; + in MySQL, a true VARCHAR storage + format differs in a row and in a + key value: in a key value the length + is always stored in 2 bytes! */ + const byte* mysql_data, /* in: MySQL column value, not + SQL NULL; NOTE that dfield may also + get a pointer to mysql_data, + therefore do not discard this as long + as dfield is used! */ + ulint col_len, /* in: MySQL column length; NOTE that + this is the storage length of the + column in the MySQL format row, not + necessarily the length of the actual + payload data; if the column is a true + VARCHAR then this is irrelevant */ + ulint comp); /* in: nonzero=compact format */ +/******************************************************************** +Handles user errors and lock waits detected by the database engine. */ +UNIV_INTERN +ibool +row_mysql_handle_errors( +/*====================*/ + /* out: TRUE if it was a lock wait and + we should continue running the query thread */ + ulint* new_err,/* out: possible new error encountered in + rollback, or the old error which was + during the function entry */ + trx_t* trx, /* in: transaction */ + que_thr_t* thr, /* in: query thread */ + trx_savept_t* savept);/* in: savepoint */ +/************************************************************************ +Create a prebuilt struct for a MySQL table handle. */ +UNIV_INTERN +row_prebuilt_t* +row_create_prebuilt( +/*================*/ + /* out, own: a prebuilt struct */ + dict_table_t* table); /* in: Innobase table handle */ +/************************************************************************ +Free a prebuilt struct for a MySQL table handle. */ +UNIV_INTERN +void +row_prebuilt_free( +/*==============*/ + row_prebuilt_t* prebuilt, /* in, own: prebuilt struct */ + ibool dict_locked); /* in: TRUE=data dictionary locked */ +/************************************************************************* +Updates the transaction pointers in query graphs stored in the prebuilt +struct. */ +UNIV_INTERN +void +row_update_prebuilt_trx( +/*====================*/ + /* out: prebuilt dtuple */ + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL + handle */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Unlocks AUTO_INC type locks that were possibly reserved by a trx. */ +UNIV_INTERN +void +row_unlock_table_autoinc_for_mysql( +/*===============================*/ + trx_t* trx); /* in/out: transaction */ +/************************************************************************* +Sets an AUTO_INC type lock on the table mentioned in prebuilt. The +AUTO_INC lock gives exclusive access to the auto-inc counter of the +table. The lock is reserved only for the duration of an SQL statement. +It is not compatible with another AUTO_INC or exclusive lock on the +table. */ +UNIV_INTERN +int +row_lock_table_autoinc_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL + table handle */ +/************************************************************************* +Sets a table lock on the table mentioned in prebuilt. */ +UNIV_INTERN +int +row_lock_table_for_mysql( +/*=====================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL + table handle */ + dict_table_t* table, /* in: table to lock, or NULL + if prebuilt->table should be + locked as + prebuilt->select_lock_type */ + ulint mode); /* in: lock mode of table + (ignored if table==NULL) */ + +/************************************************************************* +Does an insert for MySQL. */ +UNIV_INTERN +int +row_insert_for_mysql( +/*=================*/ + /* out: error code or DB_SUCCESS */ + byte* mysql_rec, /* in: row in the MySQL format */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ +/************************************************************************* +Builds a dummy query graph used in selects. */ +UNIV_INTERN +void +row_prebuild_sel_graph( +/*===================*/ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ +/************************************************************************* +Gets pointer to a prebuilt update vector used in updates. If the update +graph has not yet been built in the prebuilt struct, then this function +first builds it. */ +UNIV_INTERN +upd_t* +row_get_prebuilt_update_vector( +/*===========================*/ + /* out: prebuilt update vector */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ +/************************************************************************* +Checks if a table is such that we automatically created a clustered +index on it (on row id). */ +UNIV_INTERN +ibool +row_table_got_default_clust_index( +/*==============================*/ + const dict_table_t* table); +/************************************************************************* +Calculates the key number used inside MySQL for an Innobase index. We have +to take into account if we generated a default clustered index for the table */ +UNIV_INTERN +ulint +row_get_mysql_key_number_for_index( +/*===============================*/ + const dict_index_t* index); +/************************************************************************* +Does an update or delete of a row for MySQL. */ +UNIV_INTERN +int +row_update_for_mysql( +/*=================*/ + /* out: error code or DB_SUCCESS */ + byte* mysql_rec, /* in: the row to be updated, in + the MySQL format */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ +/************************************************************************* +This can only be used when srv_locks_unsafe_for_binlog is TRUE or +session is using a READ COMMITTED isolation level. Before +calling this function we must use trx_reset_new_rec_lock_info() and +trx_register_new_rec_lock() to store the information which new record locks +really were set. This function removes a newly set lock under prebuilt->pcur, +and also under prebuilt->clust_pcur. Currently, this is only used and tested +in the case of an UPDATE or a DELETE statement, where the row lock is of the +LOCK_X type. +Thus, this implements a 'mini-rollback' that releases the latest record +locks we set. */ +UNIV_INTERN +int +row_unlock_for_mysql( +/*=================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL + handle */ + ibool has_latches_on_recs);/* TRUE if called so that we have + the latches on the records under pcur + and clust_pcur, and we do not need to + reposition the cursors. */ +/************************************************************************* +Creates an query graph node of 'update' type to be used in the MySQL +interface. */ +UNIV_INTERN +upd_node_t* +row_create_update_node_for_mysql( +/*=============================*/ + /* out, own: update node */ + dict_table_t* table, /* in: table to update */ + mem_heap_t* heap); /* in: mem heap from which allocated */ +/************************************************************************** +Does a cascaded delete or set null in a foreign key operation. */ +UNIV_INTERN +ulint +row_update_cascade_for_mysql( +/*=========================*/ + /* out: error code or DB_SUCCESS */ + que_thr_t* thr, /* in: query thread */ + upd_node_t* node, /* in: update node used in the cascade + or set null operation */ + dict_table_t* table); /* in: table where we do the operation */ +/************************************************************************* +Locks the data dictionary exclusively for performing a table create or other +data dictionary modification operation. */ +UNIV_INTERN +void +row_mysql_lock_data_dictionary_func( +/*================================*/ + trx_t* trx, /* in/out: transaction */ + const char* file, /* in: file name */ + ulint line); /* in: line number */ +#define row_mysql_lock_data_dictionary(trx) \ + row_mysql_lock_data_dictionary_func(trx, __FILE__, __LINE__) +/************************************************************************* +Unlocks the data dictionary exclusive lock. */ +UNIV_INTERN +void +row_mysql_unlock_data_dictionary( +/*=============================*/ + trx_t* trx); /* in/out: transaction */ +/************************************************************************* +Locks the data dictionary in shared mode from modifications, for performing +foreign key check, rollback, or other operation invisible to MySQL. */ +UNIV_INTERN +void +row_mysql_freeze_data_dictionary_func( +/*==================================*/ + trx_t* trx, /* in/out: transaction */ + const char* file, /* in: file name */ + ulint line); /* in: line number */ +#define row_mysql_freeze_data_dictionary(trx) \ + row_mysql_freeze_data_dictionary_func(trx, __FILE__, __LINE__) +/************************************************************************* +Unlocks the data dictionary shared lock. */ +UNIV_INTERN +void +row_mysql_unfreeze_data_dictionary( +/*===============================*/ + trx_t* trx); /* in/out: transaction */ +#ifndef UNIV_HOTBACKUP +/************************************************************************* +Creates a table for MySQL. If the name of the table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also start the printing of monitor +output by the master thread. If the table name ends in "innodb_mem_validate", +InnoDB will try to invoke mem_validate(). */ +UNIV_INTERN +int +row_create_table_for_mysql( +/*=======================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in, own: table definition + (will be freed) */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Does an index creation operation for MySQL. TODO: currently failure +to create an index results in dropping the whole table! This is no problem +currently as all indexes must be created at the same time as the table. */ +UNIV_INTERN +int +row_create_index_for_mysql( +/*=======================*/ + /* out: error number or DB_SUCCESS */ + dict_index_t* index, /* in, own: index definition + (will be freed) */ + trx_t* trx, /* in: transaction handle */ + const ulint* field_lengths); /* in: if not NULL, must contain + dict_index_get_n_fields(index) + actual field lengths for the + index columns, which are + then checked for not being too + large. */ +/************************************************************************* +Scans a table create SQL string and adds to the data dictionary +the foreign key constraints declared in the string. This function +should be called after the indexes for a table have been created. +Each foreign key constraint must be accompanied with indexes in +bot participating tables. The indexes are allowed to contain more +fields than mentioned in the constraint. */ +UNIV_INTERN +int +row_table_add_foreign_constraints( +/*==============================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction */ + const char* sql_string, /* in: table create statement where + foreign keys are declared like: + FOREIGN KEY (a, b) REFERENCES table2(c, d), + table2 can be written also with the + database name before it: test.table2 */ + const char* name, /* in: table full name in the + normalized form + database_name/table_name */ + ibool reject_fks); /* in: if TRUE, fail with error + code DB_CANNOT_ADD_CONSTRAINT if + any foreign keys are found. */ + +/************************************************************************* +The master thread in srv0srv.c calls this regularly to drop tables which +we must drop in background after queries to them have ended. Such lazy +dropping of tables is needed in ALTER TABLE on Unix. */ +UNIV_INTERN +ulint +row_drop_tables_for_mysql_in_background(void); +/*=========================================*/ + /* out: how many tables dropped + + remaining tables in list */ +/************************************************************************* +Get the background drop list length. NOTE: the caller must own the kernel +mutex! */ +UNIV_INTERN +ulint +row_get_background_drop_list_len_low(void); +/*======================================*/ + /* out: how many tables in list */ +/************************************************************************* +Truncates a table for MySQL. */ +UNIV_INTERN +int +row_truncate_table_for_mysql( +/*=========================*/ + /* out: error code or DB_SUCCESS */ + dict_table_t* table, /* in: table handle */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Drops a table for MySQL. If the name of the dropped table ends in +one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", +"innodb_table_monitor", then this will also stop the printing of monitor +output by the master thread. If the data dictionary was not already locked +by the transaction, the transaction will be committed. Otherwise, the +data dictionary will remain locked. */ +UNIV_INTERN +int +row_drop_table_for_mysql( +/*=====================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: table name */ + trx_t* trx, /* in: transaction handle */ + ibool drop_db);/* in: TRUE=dropping whole database */ + +/************************************************************************* +Discards the tablespace of a table which stored in an .ibd file. Discarding +means that this function deletes the .ibd file and assigns a new table id for +the table. Also the flag table->ibd_file_missing is set TRUE. */ +UNIV_INTERN +int +row_discard_tablespace_for_mysql( +/*=============================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: table name */ + trx_t* trx); /* in: transaction handle */ +/********************************************************************* +Imports a tablespace. The space id in the .ibd file must match the space id +of the table in the data dictionary. */ +UNIV_INTERN +int +row_import_tablespace_for_mysql( +/*============================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: table name */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Drops a database for MySQL. */ +UNIV_INTERN +int +row_drop_database_for_mysql( +/*========================*/ + /* out: error code or DB_SUCCESS */ + const char* name, /* in: database name which ends to '/' */ + trx_t* trx); /* in: transaction handle */ +/************************************************************************* +Renames a table for MySQL. */ +UNIV_INTERN +ulint +row_rename_table_for_mysql( +/*=======================*/ + /* out: error code or DB_SUCCESS */ + const char* old_name, /* in: old table name */ + const char* new_name, /* in: new table name */ + trx_t* trx, /* in: transaction handle */ + ibool commit); /* in: if TRUE then commit trx */ +/************************************************************************* +Checks a table for corruption. */ +UNIV_INTERN +ulint +row_check_table_for_mysql( +/*======================*/ + /* out: DB_ERROR or DB_SUCCESS */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ +#endif /* !UNIV_HOTBACKUP */ + +/************************************************************************* +Determines if a table is a magic monitor table. */ +UNIV_INTERN +ibool +row_is_magic_monitor_table( +/*=======================*/ + /* out: TRUE if monitor table */ + const char* table_name); /* in: name of the table, in the + form database/table_name */ + +/* A struct describing a place for an individual column in the MySQL +row format which is presented to the table handler in ha_innobase. +This template struct is used to speed up row transformations between +Innobase and MySQL. */ + +typedef struct mysql_row_templ_struct mysql_row_templ_t; +struct mysql_row_templ_struct { + ulint col_no; /* column number of the column */ + ulint rec_field_no; /* field number of the column in an + Innobase record in the current index; + not defined if template_type is + ROW_MYSQL_WHOLE_ROW */ + ulint mysql_col_offset; /* offset of the column in the MySQL + row format */ + ulint mysql_col_len; /* length of the column in the MySQL + row format */ + ulint mysql_null_byte_offset; /* MySQL NULL bit byte offset in a + MySQL record */ + ulint mysql_null_bit_mask; /* bit mask to get the NULL bit, + zero if column cannot be NULL */ + ulint type; /* column type in Innobase mtype + numbers DATA_CHAR... */ + ulint mysql_type; /* MySQL type code; this is always + < 256 */ + ulint mysql_length_bytes; /* if mysql_type + == DATA_MYSQL_TRUE_VARCHAR, this tells + whether we should use 1 or 2 bytes to + store the MySQL true VARCHAR data + length at the start of row in the MySQL + format (NOTE that the MySQL key value + format always uses 2 bytes for the data + len) */ + ulint charset; /* MySQL charset-collation code + of the column, or zero */ + ulint mbminlen; /* minimum length of a char, in bytes, + or zero if not a char type */ + ulint mbmaxlen; /* maximum length of a char, in bytes, + or zero if not a char type */ + ulint is_unsigned; /* if a column type is an integer + type and this field is != 0, then + it is an unsigned integer type */ +}; + +#define MYSQL_FETCH_CACHE_SIZE 8 +/* After fetching this many rows, we start caching them in fetch_cache */ +#define MYSQL_FETCH_CACHE_THRESHOLD 4 + +#define ROW_PREBUILT_ALLOCATED 78540783 +#define ROW_PREBUILT_FREED 26423527 + +/* A struct for (sometimes lazily) prebuilt structures in an Innobase table +handle used within MySQL; these are used to save CPU time. */ + +struct row_prebuilt_struct { + ulint magic_n; /* this magic number is set to + ROW_PREBUILT_ALLOCATED when created, + or ROW_PREBUILT_FREED when the + struct has been freed */ + dict_table_t* table; /* Innobase table handle */ + trx_t* trx; /* current transaction handle */ + ibool sql_stat_start; /* TRUE when we start processing of + an SQL statement: we may have to set + an intention lock on the table, + create a consistent read view etc. */ + ibool mysql_has_locked; /* this is set TRUE when MySQL + calls external_lock on this handle + with a lock flag, and set FALSE when + with the F_UNLOCK flag */ + ibool clust_index_was_generated; + /* if the user did not define a + primary key in MySQL, then Innobase + automatically generated a clustered + index where the ordering column is + the row id: in this case this flag + is set to TRUE */ + dict_index_t* index; /* current index for a search, if + any */ + ulint read_just_key; /* set to 1 when MySQL calls + ha_innobase::extra with the + argument HA_EXTRA_KEYREAD; it is enough + to read just columns defined in + the index (i.e., no read of the + clustered index record necessary) */ + ibool used_in_HANDLER;/* TRUE if we have been using this + handle in a MySQL HANDLER low level + index cursor command: then we must + store the pcur position even in a + unique search from a clustered index, + because HANDLER allows NEXT and PREV + in such a situation */ + ulint template_type; /* ROW_MYSQL_WHOLE_ROW, + ROW_MYSQL_REC_FIELDS, + ROW_MYSQL_DUMMY_TEMPLATE, or + ROW_MYSQL_NO_TEMPLATE */ + ulint n_template; /* number of elements in the + template */ + ulint null_bitmap_len;/* number of bytes in the SQL NULL + bitmap at the start of a row in the + MySQL format */ + ibool need_to_access_clustered; /* if we are fetching + columns through a secondary index + and at least one column is not in + the secondary index, then this is + set to TRUE */ + ibool templ_contains_blob;/* TRUE if the template contains + BLOB column(s) */ + mysql_row_templ_t* mysql_template;/* template used to transform + rows fast between MySQL and Innobase + formats; memory for this template + is not allocated from 'heap' */ + mem_heap_t* heap; /* memory heap from which + these auxiliary structures are + allocated when needed */ + ins_node_t* ins_node; /* Innobase SQL insert node + used to perform inserts + to the table */ + byte* ins_upd_rec_buff;/* buffer for storing data converted + to the Innobase format from the MySQL + format */ + const byte* default_rec; /* the default values of all columns + (a "default row") in MySQL format */ + ulint hint_need_to_fetch_extra_cols; + /* normally this is set to 0; if this + is set to ROW_RETRIEVE_PRIMARY_KEY, + then we should at least retrieve all + columns in the primary key; if this + is set to ROW_RETRIEVE_ALL_COLS, then + we must retrieve all columns in the + key (if read_just_key == 1), or all + columns in the table */ + upd_node_t* upd_node; /* Innobase SQL update node used + to perform updates and deletes */ + que_fork_t* ins_graph; /* Innobase SQL query graph used + in inserts */ + que_fork_t* upd_graph; /* Innobase SQL query graph used + in updates or deletes */ + btr_pcur_t* pcur; /* persistent cursor used in selects + and updates */ + btr_pcur_t* clust_pcur; /* persistent cursor used in + some selects and updates */ + que_fork_t* sel_graph; /* dummy query graph used in + selects */ + dtuple_t* search_tuple; /* prebuilt dtuple used in selects */ + byte row_id[DATA_ROW_ID_LEN]; + /* if the clustered index was + generated, the row id of the + last row fetched is stored + here */ + dtuple_t* clust_ref; /* prebuilt dtuple used in + sel/upd/del */ + ulint select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */ + ulint stored_select_lock_type;/* this field is used to + remember the original select_lock_type + that was decided in ha_innodb.cc, + ::store_lock(), ::external_lock(), + etc. */ + ulint row_read_type; /* ROW_READ_WITH_LOCKS if row locks + should be the obtained for records + under an UPDATE or DELETE cursor. + If innodb_locks_unsafe_for_binlog + is TRUE, this can be set to + ROW_READ_TRY_SEMI_CONSISTENT, so that + if the row under an UPDATE or DELETE + cursor was locked by another + transaction, InnoDB will resort + to reading the last committed value + ('semi-consistent read'). Then, + this field will be set to + ROW_READ_DID_SEMI_CONSISTENT to + indicate that. If the row does not + match the WHERE condition, MySQL will + invoke handler::unlock_row() to + clear the flag back to + ROW_READ_TRY_SEMI_CONSISTENT and + to simply skip the row. If + the row matches, the next call to + row_search_for_mysql() will lock + the row. + This eliminates lock waits in some + cases; note that this breaks + serializability. */ + ulint mysql_prefix_len;/* byte offset of the end of + the last requested column */ + ulint mysql_row_len; /* length in bytes of a row in the + MySQL format */ + ulint n_rows_fetched; /* number of rows fetched after + positioning the current cursor */ + ulint fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */ + byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE]; + /* a cache for fetched rows if we + fetch many rows from the same cursor: + it saves CPU time to fetch them in a + batch; we reserve mysql_row_len + bytes for each such row; these + pointers point 4 bytes past the + allocated mem buf start, because + there is a 4 byte magic number at the + start and at the end */ + ibool keep_other_fields_on_keyread; /* when using fetch + cache with HA_EXTRA_KEYREAD, don't + overwrite other fields in mysql row + row buffer.*/ + ulint fetch_cache_first;/* position of the first not yet + fetched row in fetch_cache */ + ulint n_fetch_cached; /* number of not yet fetched rows + in fetch_cache */ + mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied + to this heap */ + mem_heap_t* old_vers_heap; /* memory heap where a previous + version is built in consistent read */ + /*----------------------*/ + ulonglong autoinc_last_value;/* last value of AUTO-INC interval */ + ulonglong autoinc_increment;/* The increment step of the auto + increment column. Value must be + greater than or equal to 1. Required to + calculate the next value */ + ulonglong autoinc_offset; /* The offset passed to + get_auto_increment() by MySQL. Required + to calculate the next value */ + ulint autoinc_error; /* The actual error code encountered + while trying to init or read the + autoinc value from the table. We + store it here so that we can return + it to MySQL */ + /*----------------------*/ + UT_LIST_NODE_T(row_prebuilt_t) prebuilts; + /* list node of table->prebuilts */ + ulint magic_n2; /* this should be the same as + magic_n */ +}; + +#define ROW_PREBUILT_FETCH_MAGIC_N 465765687 + +#define ROW_MYSQL_WHOLE_ROW 0 +#define ROW_MYSQL_REC_FIELDS 1 +#define ROW_MYSQL_NO_TEMPLATE 2 +#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in + row_scan_and_check_index */ + +/* Values for hint_need_to_fetch_extra_cols */ +#define ROW_RETRIEVE_PRIMARY_KEY 1 +#define ROW_RETRIEVE_ALL_COLS 2 + +/* Values for row_read_type */ +#define ROW_READ_WITH_LOCKS 0 +#define ROW_READ_TRY_SEMI_CONSISTENT 1 +#define ROW_READ_DID_SEMI_CONSISTENT 2 + +#ifndef UNIV_NONINL +#include "row0mysql.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0mysql.ic b/storage/xtradb/include/row0mysql.ic new file mode 100644 index 00000000000..5260ae17924 --- /dev/null +++ b/storage/xtradb/include/row0mysql.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 2001, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +MySQL interface for Innobase + +Created 1/23/2001 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/row0purge.h b/storage/xtradb/include/row0purge.h new file mode 100644 index 00000000000..fbc12f8d389 --- /dev/null +++ b/storage/xtradb/include/row0purge.h @@ -0,0 +1,95 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Purge obsolete records + +Created 3/14/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0purge_h +#define row0purge_h + +#include "univ.i" +#include "data0data.h" +#include "btr0types.h" +#include "btr0pcur.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "row0types.h" + +/************************************************************************ +Creates a purge node to a query graph. */ +UNIV_INTERN +purge_node_t* +row_purge_node_create( +/*==================*/ + /* out, own: purge node */ + que_thr_t* parent, /* in: parent node, i.e., a thr node */ + mem_heap_t* heap); /* in: memory heap where created */ +/*************************************************************** +Does the purge operation for a single undo log record. This is a high-level +function used in an SQL execution graph. */ +UNIV_INTERN +que_thr_t* +row_purge_step( +/*===========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ + +/* Purge node structure */ + +struct purge_node_struct{ + que_common_t common; /* node type: QUE_NODE_PURGE */ + /*----------------------*/ + /* Local storage for this graph node */ + dulint roll_ptr;/* roll pointer to undo log record */ + trx_undo_rec_t* undo_rec;/* undo log record */ + trx_undo_inf_t* reservation;/* reservation for the undo log record in + the purge array */ + dulint undo_no;/* undo number of the record */ + ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, + ... */ + btr_pcur_t pcur; /* persistent cursor used in searching the + clustered index record */ + ibool found_clust;/* TRUE if the clustered index record + determined by ref was found in the clustered + index, and we were able to position pcur on + it */ + dict_table_t* table; /* table where purge is done */ + ulint cmpl_info;/* compiler analysis info of an update */ + upd_t* update; /* update vector for a clustered index + record */ + dtuple_t* ref; /* NULL, or row reference to the next row to + handle */ + dtuple_t* row; /* NULL, or a copy (also fields copied to + heap) of the indexed fields of the row to + handle */ + dict_index_t* index; /* NULL, or the next index whose record should + be handled */ + mem_heap_t* heap; /* memory heap used as auxiliary storage for + row; this must be emptied after a successful + purge of a row */ +}; + +#ifndef UNIV_NONINL +#include "row0purge.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0purge.ic b/storage/xtradb/include/row0purge.ic new file mode 100644 index 00000000000..5fc665e9d20 --- /dev/null +++ b/storage/xtradb/include/row0purge.ic @@ -0,0 +1,24 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + + +/****************************************************** +Purge obsolete records + +Created 3/14/1997 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/row0row.h b/storage/xtradb/include/row0row.h new file mode 100644 index 00000000000..26c4b5e4e71 --- /dev/null +++ b/storage/xtradb/include/row0row.h @@ -0,0 +1,331 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +General row routines + +Created 4/20/1996 Heikki Tuuri +*******************************************************/ + +#ifndef row0row_h +#define row0row_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "mtr0mtr.h" +#include "rem0types.h" +#include "read0types.h" +#include "row0types.h" +#include "btr0types.h" + +/************************************************************************* +Gets the offset of the trx id field, in bytes relative to the origin of +a clustered index record. */ +UNIV_INTERN +ulint +row_get_trx_id_offset( +/*==================*/ + /* out: offset of DATA_TRX_ID */ + const rec_t* rec, /* in: record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ +/************************************************************************* +Reads the trx id field from a clustered index record. */ +UNIV_INLINE +dulint +row_get_rec_trx_id( +/*===============*/ + /* out: value of the field */ + const rec_t* rec, /* in: record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ +/************************************************************************* +Reads the roll pointer field from a clustered index record. */ +UNIV_INLINE +dulint +row_get_rec_roll_ptr( +/*=================*/ + /* out: value of the field */ + const rec_t* rec, /* in: record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ +/********************************************************************* +When an insert or purge to a table is performed, this function builds +the entry to be inserted into or purged from an index on the table. */ +UNIV_INTERN +dtuple_t* +row_build_index_entry( +/*==================*/ + /* out: index entry which should be + inserted or purged, or NULL if the + externally stored columns in the + clustered index record are unavailable + and ext != NULL */ + const dtuple_t* row, /* in: row which should be + inserted or purged */ + row_ext_t* ext, /* in: externally stored column prefixes, + or NULL */ + dict_index_t* index, /* in: index on the table */ + mem_heap_t* heap); /* in: memory heap from which the memory for + the index entry is allocated */ +/*********************************************************************** +An inverse function to row_build_index_entry. Builds a row from a +record in a clustered index. */ +UNIV_INTERN +dtuple_t* +row_build( +/*======*/ + /* out, own: row built; + see the NOTE below! */ + ulint type, /* in: ROW_COPY_POINTERS or + ROW_COPY_DATA; the latter + copies also the data fields to + heap while the first only + places pointers to data fields + on the index page, and thus is + more efficient */ + const dict_index_t* index, /* in: clustered index */ + const rec_t* rec, /* in: record in the clustered + index; NOTE: in the case + ROW_COPY_POINTERS the data + fields in the row will point + directly into this record, + therefore, the buffer page of + this record must be at least + s-latched and the latch held + as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec,index) + or NULL, in which case this function + will invoke rec_get_offsets() */ + const dict_table_t* col_table, + /* in: table, to check which + externally stored columns + occur in the ordering columns + of an index, or NULL if + index->table should be + consulted instead; the user + columns in this table should be + the same columns as in index->table */ + row_ext_t** ext, /* out, own: cache of + externally stored column + prefixes, or NULL */ + mem_heap_t* heap); /* in: memory heap from which + the memory needed is allocated */ +/*********************************************************************** +Converts an index record to a typed data tuple. */ +UNIV_INTERN +dtuple_t* +row_rec_to_index_entry_low( +/*=======================*/ + /* out: index entry built; does not + set info_bits, and the data fields in + the entry will point directly to rec */ + const rec_t* rec, /* in: record in the index */ + const dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + ulint* n_ext, /* out: number of externally + stored columns */ + mem_heap_t* heap); /* in: memory heap from which + the memory needed is allocated */ +/*********************************************************************** +Converts an index record to a typed data tuple. NOTE that externally +stored (often big) fields are NOT copied to heap. */ +UNIV_INTERN +dtuple_t* +row_rec_to_index_entry( +/*===================*/ + /* out, own: index entry + built; see the NOTE below! */ + ulint type, /* in: ROW_COPY_DATA, or + ROW_COPY_POINTERS: the former + copies also the data fields to + heap as the latter only places + pointers to data fields on the + index page */ + const rec_t* rec, /* in: record in the index; + NOTE: in the case + ROW_COPY_POINTERS the data + fields in the row will point + directly into this record, + therefore, the buffer page of + this record must be at least + s-latched and the latch held + as long as the dtuple is used! */ + const dict_index_t* index, /* in: index */ + ulint* offsets,/* in/out: rec_get_offsets(rec) */ + ulint* n_ext, /* out: number of externally + stored columns */ + mem_heap_t* heap); /* in: memory heap from which + the memory needed is allocated */ +/*********************************************************************** +Builds from a secondary index record a row reference with which we can +search the clustered index record. */ +UNIV_INTERN +dtuple_t* +row_build_row_ref( +/*==============*/ + /* out, own: row reference built; see the + NOTE below! */ + ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: + the former copies also the data fields to + heap, whereas the latter only places pointers + to data fields on the index page */ + dict_index_t* index, /* in: secondary index */ + const rec_t* rec, /* in: record in the index; + NOTE: in the case ROW_COPY_POINTERS + the data fields in the row will point + directly into this record, therefore, + the buffer page of this record must be + at least s-latched and the latch held + as long as the row reference is used! */ + mem_heap_t* heap); /* in: memory heap from which the memory + needed is allocated */ +/*********************************************************************** +Builds from a secondary index record a row reference with which we can +search the clustered index record. */ +UNIV_INTERN +void +row_build_row_ref_in_tuple( +/*=======================*/ + dtuple_t* ref, /* in/out: row reference built; + see the NOTE below! */ + const rec_t* rec, /* in: record in the index; + NOTE: the data fields in ref + will point directly into this + record, therefore, the buffer + page of this record must be at + least s-latched and the latch + held as long as the row + reference is used! */ + const dict_index_t* index, /* in: secondary index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL */ + trx_t* trx); /* in: transaction */ +/*********************************************************************** +From a row build a row reference with which we can search the clustered +index record. */ +UNIV_INTERN +void +row_build_row_ref_from_row( +/*=======================*/ + dtuple_t* ref, /* in/out: row reference built; + see the NOTE below! + ref must have the right number + of fields! */ + const dict_table_t* table, /* in: table */ + const dtuple_t* row); /* in: row + NOTE: the data fields in ref will point + directly into data of this row */ +/*********************************************************************** +Builds from a secondary index record a row reference with which we can +search the clustered index record. */ +UNIV_INLINE +void +row_build_row_ref_fast( +/*===================*/ + dtuple_t* ref, /* in/out: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + const rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Searches the clustered index record for a row, if we have the row +reference. */ +UNIV_INTERN +ibool +row_search_on_row_ref( +/*==================*/ + /* out: TRUE if found */ + btr_pcur_t* pcur, /* out: persistent cursor, which must + be closed by the caller */ + ulint mode, /* in: BTR_MODIFY_LEAF, ... */ + const dict_table_t* table, /* in: table */ + const dtuple_t* ref, /* in: row reference */ + mtr_t* mtr); /* in/out: mtr */ +/************************************************************************* +Fetches the clustered index record for a secondary index record. The latches +on the secondary index record are preserved. */ +UNIV_INTERN +rec_t* +row_get_clust_rec( +/*==============*/ + /* out: record or NULL, if no record found */ + ulint mode, /* in: BTR_MODIFY_LEAF, ... */ + const rec_t* rec, /* in: record in a secondary index */ + dict_index_t* index, /* in: secondary index */ + dict_index_t** clust_index,/* out: clustered index */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************* +Searches an index record. */ +UNIV_INTERN +ibool +row_search_index_entry( +/*===================*/ + /* out: TRUE if found */ + dict_index_t* index, /* in: index */ + const dtuple_t* entry, /* in: index entry */ + ulint mode, /* in: BTR_MODIFY_LEAF, ... */ + btr_pcur_t* pcur, /* in/out: persistent cursor, which must + be closed by the caller */ + mtr_t* mtr); /* in: mtr */ + + +#define ROW_COPY_DATA 1 +#define ROW_COPY_POINTERS 2 + +/* The allowed latching order of index records is the following: +(1) a secondary index record -> +(2) the clustered index record -> +(3) rollback segment data for the clustered index record. + +No new latches may be obtained while the kernel mutex is reserved. +However, the kernel mutex can be reserved while latches are owned. */ + +/*********************************************************************** +Formats the raw data in "data" (in InnoDB on-disk format) using +"dict_field" and writes the result to "buf". +Not more than "buf_size" bytes are written to "buf". +The result is always '\0'-terminated (provided buf_size > 0) and the +number of bytes that were written to "buf" is returned (including the +terminating '\0'). */ +UNIV_INTERN +ulint +row_raw_format( +/*===========*/ + /* out: number of bytes + that were written */ + const char* data, /* in: raw data */ + ulint data_len, /* in: raw data length + in bytes */ + const dict_field_t* dict_field, /* in: index field */ + char* buf, /* out: output buffer */ + ulint buf_size); /* in: output buffer size + in bytes */ + +#ifndef UNIV_NONINL +#include "row0row.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0row.ic b/storage/xtradb/include/row0row.ic new file mode 100644 index 00000000000..9947dd43257 --- /dev/null +++ b/storage/xtradb/include/row0row.ic @@ -0,0 +1,119 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +General row routines + +Created 4/20/1996 Heikki Tuuri +*******************************************************/ + +#include "dict0dict.h" +#include "rem0rec.h" +#include "trx0undo.h" + +/************************************************************************* +Reads the trx id field from a clustered index record. */ +UNIV_INLINE +dulint +row_get_rec_trx_id( +/*===============*/ + /* out: value of the field */ + const rec_t* rec, /* in: record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ +{ + ulint offset; + + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + offset = index->trx_id_offset; + + if (!offset) { + offset = row_get_trx_id_offset(rec, index, offsets); + } + + return(trx_read_trx_id(rec + offset)); +} + +/************************************************************************* +Reads the roll pointer field from a clustered index record. */ +UNIV_INLINE +dulint +row_get_rec_roll_ptr( +/*=================*/ + /* out: value of the field */ + const rec_t* rec, /* in: record */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ +{ + ulint offset; + + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); + + offset = index->trx_id_offset; + + if (!offset) { + offset = row_get_trx_id_offset(rec, index, offsets); + } + + return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); +} + +/*********************************************************************** +Builds from a secondary index record a row reference with which we can +search the clustered index record. */ +UNIV_INLINE +void +row_build_row_ref_fast( +/*===================*/ + dtuple_t* ref, /* in/out: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + const rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + dfield_t* dfield; + const byte* field; + ulint len; + ulint ref_len; + ulint field_no; + ulint i; + + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_offs_any_extern(offsets)); + ref_len = dtuple_get_n_fields(ref); + + for (i = 0; i < ref_len; i++) { + dfield = dtuple_get_nth_field(ref, i); + + field_no = *(map + i); + + if (field_no != ULINT_UNDEFINED) { + + field = rec_get_nth_field(rec, offsets, + field_no, &len); + dfield_set_data(dfield, field, len); + } + } +} diff --git a/storage/xtradb/include/row0sel.h b/storage/xtradb/include/row0sel.h new file mode 100644 index 00000000000..2f8574d0691 --- /dev/null +++ b/storage/xtradb/include/row0sel.h @@ -0,0 +1,401 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Select + +Created 12/19/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0sel_h +#define row0sel_h + +#include "univ.i" +#include "data0data.h" +#include "que0types.h" +#include "dict0types.h" +#include "trx0types.h" +#include "row0types.h" +#include "que0types.h" +#include "pars0sym.h" +#include "btr0pcur.h" +#include "read0read.h" +#include "row0mysql.h" + +/************************************************************************* +Creates a select node struct. */ +UNIV_INTERN +sel_node_t* +sel_node_create( +/*============*/ + /* out, own: select node struct */ + mem_heap_t* heap); /* in: memory heap where created */ +/************************************************************************* +Frees the memory private to a select node when a query graph is freed, +does not free the heap where the node was originally created. */ +UNIV_INTERN +void +sel_node_free_private( +/*==================*/ + sel_node_t* node); /* in: select node struct */ +/************************************************************************* +Frees a prefetch buffer for a column, including the dynamically allocated +memory for data stored there. */ +UNIV_INTERN +void +sel_col_prefetch_buf_free( +/*======================*/ + sel_buf_t* prefetch_buf); /* in, own: prefetch buffer */ +/************************************************************************* +Gets the plan node for the nth table in a join. */ +UNIV_INLINE +plan_t* +sel_node_get_nth_plan( +/*==================*/ + sel_node_t* node, + ulint i); +/************************************************************************** +Performs a select step. This is a high-level function used in SQL execution +graphs. */ +UNIV_INTERN +que_thr_t* +row_sel_step( +/*=========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs an execution step of an open or close cursor statement node. */ +UNIV_INLINE +que_thr_t* +open_step( +/*======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Performs a fetch for a cursor. */ +UNIV_INTERN +que_thr_t* +fetch_step( +/*=======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/******************************************************************** +Sample callback function for fetch that prints each row.*/ +UNIV_INTERN +void* +row_fetch_print( +/*============*/ + /* out: always returns non-NULL */ + void* row, /* in: sel_node_t* */ + void* user_arg); /* in: not used */ +/******************************************************************** +Callback function for fetch that stores an unsigned 4 byte integer to the +location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length += 4. */ +UNIV_INTERN +void* +row_fetch_store_uint4( +/*==================*/ + /* out: always returns NULL */ + void* row, /* in: sel_node_t* */ + void* user_arg); /* in: data pointer */ +/*************************************************************** +Prints a row in a select result. */ +UNIV_INTERN +que_thr_t* +row_printf_step( +/*============*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/******************************************************************** +Converts a key value stored in MySQL format to an Innobase dtuple. The last +field of the key value may be just a prefix of a fixed length field: hence +the parameter key_len. But currently we do not allow search keys where the +last field is only a prefix of the full key field len and print a warning if +such appears. */ +UNIV_INTERN +void +row_sel_convert_mysql_key_to_innobase( +/*==================================*/ + dtuple_t* tuple, /* in/out: tuple where to build; + NOTE: we assume that the type info + in the tuple is already according + to index! */ + byte* buf, /* in: buffer to use in field + conversions */ + ulint buf_len, /* in: buffer length */ + dict_index_t* index, /* in: index of the key value */ + const byte* key_ptr, /* in: MySQL key value */ + ulint key_len, /* in: MySQL key value length */ + trx_t* trx); /* in: transaction */ +/************************************************************************ +Searches for rows in the database. This is used in the interface to +MySQL. This function opens a cursor, and also implements fetch next +and fetch prev. NOTE that if we do a search with a full key value +from a unique index (ROW_SEL_EXACT), then we will not store the cursor +position and fetch next or fetch prev must not be tried to the cursor! */ +UNIV_INTERN +ulint +row_search_for_mysql( +/*=================*/ + /* out: DB_SUCCESS, + DB_RECORD_NOT_FOUND, + DB_END_OF_INDEX, DB_DEADLOCK, + DB_LOCK_TABLE_FULL, + or DB_TOO_BIG_RECORD */ + byte* buf, /* in/out: buffer for the fetched + row in the MySQL format */ + ulint mode, /* in: search mode PAGE_CUR_L, ... */ + row_prebuilt_t* prebuilt, /* in: prebuilt struct for the + table handle; this contains the info + of search_tuple, index; if search + tuple contains 0 fields then we + position the cursor at the start or + the end of the index, depending on + 'mode' */ + ulint match_mode, /* in: 0 or ROW_SEL_EXACT or + ROW_SEL_EXACT_PREFIX */ + ulint direction); /* in: 0 or ROW_SEL_NEXT or + ROW_SEL_PREV; NOTE: if this is != 0, + then prebuilt must have a pcur + with stored position! In opening of a + cursor 'direction' should be 0. */ +/*********************************************************************** +Checks if MySQL at the moment is allowed for this table to retrieve a +consistent read result, or store it to the query cache. */ +UNIV_INTERN +ibool +row_search_check_if_query_cache_permitted( +/*======================================*/ + /* out: TRUE if storing or retrieving + from the query cache is permitted */ + trx_t* trx, /* in: transaction object */ + const char* norm_name); /* in: concatenation of database name, + '/' char, table name */ +/*********************************************************************** +Read the max AUTOINC value from an index. */ +UNIV_INTERN +ulint +row_search_max_autoinc( +/*===================*/ + /* out: DB_SUCCESS if all OK else + error code */ + dict_index_t* index, /* in: index to search */ + const char* col_name, /* in: autoinc column name */ + ib_uint64_t* value); /* out: AUTOINC value read */ + +/* A structure for caching column values for prefetched rows */ +struct sel_buf_struct{ + byte* data; /* data, or NULL; if not NULL, this field + has allocated memory which must be explicitly + freed; can be != NULL even when len is + UNIV_SQL_NULL */ + ulint len; /* data length or UNIV_SQL_NULL */ + ulint val_buf_size; + /* size of memory buffer allocated for data: + this can be more than len; this is defined + when data != NULL */ +}; + +struct plan_struct{ + dict_table_t* table; /* table struct in the dictionary + cache */ + dict_index_t* index; /* table index used in the search */ + btr_pcur_t pcur; /* persistent cursor used to search + the index */ + ibool asc; /* TRUE if cursor traveling upwards */ + ibool pcur_is_open; /* TRUE if pcur has been positioned + and we can try to fetch new rows */ + ibool cursor_at_end; /* TRUE if the cursor is open but + we know that there are no more + qualifying rows left to retrieve from + the index tree; NOTE though, that + there may still be unprocessed rows in + the prefetch stack; always FALSE when + pcur_is_open is FALSE */ + ibool stored_cursor_rec_processed; + /* TRUE if the pcur position has been + stored and the record it is positioned + on has already been processed */ + que_node_t** tuple_exps; /* array of expressions which are used + to calculate the field values in the + search tuple: there is one expression + for each field in the search tuple */ + dtuple_t* tuple; /* search tuple */ + ulint mode; /* search mode: PAGE_CUR_G, ... */ + ulint n_exact_match; /* number of first fields in the search + tuple which must be exactly matched */ + ibool unique_search; /* TRUE if we are searching an + index record with a unique key */ + ulint n_rows_fetched; /* number of rows fetched using pcur + after it was opened */ + ulint n_rows_prefetched;/* number of prefetched rows cached + for fetch: fetching several rows in + the same mtr saves CPU time */ + ulint first_prefetched;/* index of the first cached row in + select buffer arrays for each column */ + ibool no_prefetch; /* no prefetch for this table */ + sym_node_list_t columns; /* symbol table nodes for the columns + to retrieve from the table */ + UT_LIST_BASE_NODE_T(func_node_t) + end_conds; /* conditions which determine the + fetch limit of the index segment we + have to look at: when one of these + fails, the result set has been + exhausted for the cursor in this + index; these conditions are normalized + so that in a comparison the column + for this table is the first argument */ + UT_LIST_BASE_NODE_T(func_node_t) + other_conds; /* the rest of search conditions we can + test at this table in a join */ + ibool must_get_clust; /* TRUE if index is a non-clustered + index and we must also fetch the + clustered index record; this is the + case if the non-clustered record does + not contain all the needed columns, or + if this is a single-table explicit + cursor, or a searched update or + delete */ + ulint* clust_map; /* map telling how clust_ref is built + from the fields of a non-clustered + record */ + dtuple_t* clust_ref; /* the reference to the clustered + index entry is built here if index is + a non-clustered index */ + btr_pcur_t clust_pcur; /* if index is non-clustered, we use + this pcur to search the clustered + index */ + mem_heap_t* old_vers_heap; /* memory heap used in building an old + version of a row, or NULL */ +}; + +struct sel_node_struct{ + que_common_t common; /* node type: QUE_NODE_SELECT */ + ulint state; /* node state */ + que_node_t* select_list; /* select list */ + sym_node_t* into_list; /* variables list or NULL */ + sym_node_t* table_list; /* table list */ + ibool asc; /* TRUE if the rows should be fetched + in an ascending order */ + ibool set_x_locks; /* TRUE if the cursor is for update or + delete, which means that a row x-lock + should be placed on the cursor row */ + ulint row_lock_mode; /* LOCK_X or LOCK_S */ + ulint n_tables; /* number of tables */ + ulint fetch_table; /* number of the next table to access + in the join */ + plan_t* plans; /* array of n_tables many plan nodes + containing the search plan and the + search data structures */ + que_node_t* search_cond; /* search condition */ + read_view_t* read_view; /* if the query is a non-locking + consistent read, its read view is + placed here, otherwise NULL */ + ibool consistent_read;/* TRUE if the select is a consistent, + non-locking read */ + order_node_t* order_by; /* order by column definition, or + NULL */ + ibool is_aggregate; /* TRUE if the select list consists of + aggregate functions */ + ibool aggregate_already_fetched; + /* TRUE if the aggregate row has + already been fetched for the current + cursor */ + ibool can_get_updated;/* this is TRUE if the select + is in a single-table explicit + cursor which can get updated + within the stored procedure, + or in a searched update or + delete; NOTE that to determine + of an explicit cursor if it + can get updated, the parser + checks from a stored procedure + if it contains positioned + update or delete statements */ + sym_node_t* explicit_cursor;/* not NULL if an explicit cursor */ + UT_LIST_BASE_NODE_T(sym_node_t) + copy_variables; /* variables whose values we have to + copy when an explicit cursor is opened, + so that they do not change between + fetches */ +}; + +/* Select node states */ +#define SEL_NODE_CLOSED 0 /* it is a declared cursor which is not + currently open */ +#define SEL_NODE_OPEN 1 /* intention locks not yet set on + tables */ +#define SEL_NODE_FETCH 2 /* intention locks have been set */ +#define SEL_NODE_NO_MORE_ROWS 3 /* cursor has reached the result set + end */ + +/* Fetch statement node */ +struct fetch_node_struct{ + que_common_t common; /* type: QUE_NODE_FETCH */ + sel_node_t* cursor_def; /* cursor definition */ + sym_node_t* into_list; /* variables to set */ + + pars_user_func_t* + func; /* User callback function or NULL. + The first argument to the function + is a sel_node_t*, containing the + results of the SELECT operation for + one row. If the function returns + NULL, it is not interested in + further rows and the cursor is + modified so (cursor % NOTFOUND) is + true. If it returns not-NULL, + continue normally. See + row_fetch_print() for an example + (and a useful debugging tool). */ +}; + +/* Open or close cursor statement node */ +struct open_node_struct{ + que_common_t common; /* type: QUE_NODE_OPEN */ + ulint op_type; /* ROW_SEL_OPEN_CURSOR or + ROW_SEL_CLOSE_CURSOR */ + sel_node_t* cursor_def; /* cursor definition */ +}; + +/* Row printf statement node */ +struct row_printf_node_struct{ + que_common_t common; /* type: QUE_NODE_ROW_PRINTF */ + sel_node_t* sel_node; /* select */ +}; + +#define ROW_SEL_OPEN_CURSOR 0 +#define ROW_SEL_CLOSE_CURSOR 1 + +/* Flags for the MySQL interface */ +#define ROW_SEL_NEXT 1 +#define ROW_SEL_PREV 2 + +#define ROW_SEL_EXACT 1 /* search using a complete key value */ +#define ROW_SEL_EXACT_PREFIX 2 /* search using a key prefix which + must match to rows: the prefix may + contain an incomplete field (the + last field in prefix may be just + a prefix of a fixed length column) */ + +#ifndef UNIV_NONINL +#include "row0sel.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0sel.ic b/storage/xtradb/include/row0sel.ic new file mode 100644 index 00000000000..a21181e3237 --- /dev/null +++ b/storage/xtradb/include/row0sel.ic @@ -0,0 +1,104 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Select + +Created 12/19/1997 Heikki Tuuri +*******************************************************/ + +#include "que0que.h" + +/************************************************************************* +Gets the plan node for the nth table in a join. */ +UNIV_INLINE +plan_t* +sel_node_get_nth_plan( +/*==================*/ + /* out: plan node */ + sel_node_t* node, /* in: select node */ + ulint i) /* in: get ith plan node */ +{ + ut_ad(i < node->n_tables); + + return(node->plans + i); +} + +/************************************************************************* +Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means +that it will start fetching from the start of the result set again, regardless +of where it was before, and it will set intention locks on the tables. */ +UNIV_INLINE +void +sel_node_reset_cursor( +/*==================*/ + sel_node_t* node) /* in: select node */ +{ + node->state = SEL_NODE_OPEN; +} + +/************************************************************************** +Performs an execution step of an open or close cursor statement node. */ +UNIV_INLINE +que_thr_t* +open_step( +/*======*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr) /* in: query thread */ +{ + sel_node_t* sel_node; + open_node_t* node; + ulint err; + + ut_ad(thr); + + node = (open_node_t*) thr->run_node; + ut_ad(que_node_get_type(node) == QUE_NODE_OPEN); + + sel_node = node->cursor_def; + + err = DB_SUCCESS; + + if (node->op_type == ROW_SEL_OPEN_CURSOR) { + + /* if (sel_node->state == SEL_NODE_CLOSED) { */ + + sel_node_reset_cursor(sel_node); + /* } else { + err = DB_ERROR; + } */ + } else { + if (sel_node->state != SEL_NODE_CLOSED) { + + sel_node->state = SEL_NODE_CLOSED; + } else { + err = DB_ERROR; + } + } + + if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) { + /* SQL error detected */ + fprintf(stderr, "SQL error %lu\n", (ulong) err); + + ut_error; + } + + thr->run_node = que_node_get_parent(node); + + return(thr); +} diff --git a/storage/xtradb/include/row0types.h b/storage/xtradb/include/row0types.h new file mode 100644 index 00000000000..f0af7c2bf53 --- /dev/null +++ b/storage/xtradb/include/row0types.h @@ -0,0 +1,58 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Row operation global types + +Created 12/27/1996 Heikki Tuuri +*******************************************************/ + +#ifndef row0types_h +#define row0types_h + +typedef struct plan_struct plan_t; + +typedef struct upd_struct upd_t; + +typedef struct upd_field_struct upd_field_t; + +typedef struct upd_node_struct upd_node_t; + +typedef struct del_node_struct del_node_t; + +typedef struct ins_node_struct ins_node_t; + +typedef struct sel_node_struct sel_node_t; + +typedef struct open_node_struct open_node_t; + +typedef struct fetch_node_struct fetch_node_t; + +typedef struct row_printf_node_struct row_printf_node_t; +typedef struct sel_buf_struct sel_buf_t; + +typedef struct undo_node_struct undo_node_t; + +typedef struct purge_node_struct purge_node_t; + +typedef struct row_ext_struct row_ext_t; + +/* MySQL data types */ +typedef struct st_table TABLE; + +#endif diff --git a/storage/xtradb/include/row0uins.h b/storage/xtradb/include/row0uins.h new file mode 100644 index 00000000000..16bbbbd0d12 --- /dev/null +++ b/storage/xtradb/include/row0uins.h @@ -0,0 +1,53 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Fresh insert undo + +Created 2/25/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0uins_h +#define row0uins_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "row0types.h" +#include "mtr0mtr.h" + +/*************************************************************** +Undoes a fresh insert of a row to a table. A fresh insert means that +the same clustered index unique key did not have any record, even delete +marked, at the time of the insert. InnoDB is eager in a rollback: +if it figures out that an index record will be removed in the purge +anyway, it will remove it in the rollback. */ +UNIV_INTERN +ulint +row_undo_ins( +/*=========*/ + /* out: DB_SUCCESS */ + undo_node_t* node); /* in: row undo node */ + +#ifndef UNIV_NONINL +#include "row0uins.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0uins.ic b/storage/xtradb/include/row0uins.ic new file mode 100644 index 00000000000..75bef8431eb --- /dev/null +++ b/storage/xtradb/include/row0uins.ic @@ -0,0 +1,24 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Fresh insert undo + +Created 2/25/1997 Heikki Tuuri +*******************************************************/ + diff --git a/storage/xtradb/include/row0umod.h b/storage/xtradb/include/row0umod.h new file mode 100644 index 00000000000..3a4e8c2f9a3 --- /dev/null +++ b/storage/xtradb/include/row0umod.h @@ -0,0 +1,51 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Undo modify of a row + +Created 2/27/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0umod_h +#define row0umod_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "row0types.h" +#include "mtr0mtr.h" + +/*************************************************************** +Undoes a modify operation on a row of a table. */ +UNIV_INTERN +ulint +row_undo_mod( +/*=========*/ + /* out: DB_SUCCESS or error code */ + undo_node_t* node, /* in: row undo node */ + que_thr_t* thr); /* in: query thread */ + + +#ifndef UNIV_NONINL +#include "row0umod.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0umod.ic b/storage/xtradb/include/row0umod.ic new file mode 100644 index 00000000000..7ac7bc2fea7 --- /dev/null +++ b/storage/xtradb/include/row0umod.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Undo modify of a row + +Created 2/27/1997 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/row0undo.h b/storage/xtradb/include/row0undo.h new file mode 100644 index 00000000000..a17cfb1babd --- /dev/null +++ b/storage/xtradb/include/row0undo.h @@ -0,0 +1,136 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Row undo + +Created 1/8/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0undo_h +#define row0undo_h + +#include "univ.i" +#include "mtr0mtr.h" +#include "trx0sys.h" +#include "btr0types.h" +#include "btr0pcur.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "row0types.h" + +/************************************************************************ +Creates a row undo node to a query graph. */ +UNIV_INTERN +undo_node_t* +row_undo_node_create( +/*=================*/ + /* out, own: undo node */ + trx_t* trx, /* in: transaction */ + que_thr_t* parent, /* in: parent node, i.e., a thr node */ + mem_heap_t* heap); /* in: memory heap where created */ +/*************************************************************** +Looks for the clustered index record when node has the row reference. +The pcur in node is used in the search. If found, stores the row to node, +and stores the position of pcur, and detaches it. The pcur must be closed +by the caller in any case. */ +UNIV_INTERN +ibool +row_undo_search_clust_to_pcur( +/*==========================*/ + /* out: TRUE if found; NOTE the node->pcur + must be closed by the caller, regardless of + the return value */ + undo_node_t* node); /* in: row undo node */ +/*************************************************************** +Undoes a row operation in a table. This is a high-level function used +in SQL execution graphs. */ +UNIV_INTERN +que_thr_t* +row_undo_step( +/*==========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ + +/* A single query thread will try to perform the undo for all successive +versions of a clustered index record, if the transaction has modified it +several times during the execution which is rolled back. It may happen +that the task is transferred to another query thread, if the other thread +is assigned to handle an undo log record in the chain of different versions +of the record, and the other thread happens to get the x-latch to the +clustered index record at the right time. + If a query thread notices that the clustered index record it is looking +for is missing, or the roll ptr field in the record doed not point to the +undo log record the thread was assigned to handle, then it gives up the undo +task for that undo log record, and fetches the next. This situation can occur +just in the case where the transaction modified the same record several times +and another thread is currently doing the undo for successive versions of +that index record. */ + +/* Undo node structure */ + +struct undo_node_struct{ + que_common_t common; /* node type: QUE_NODE_UNDO */ + ulint state; /* node execution state */ + trx_t* trx; /* trx for which undo is done */ + dulint roll_ptr;/* roll pointer to undo log record */ + trx_undo_rec_t* undo_rec;/* undo log record */ + dulint undo_no;/* undo number of the record */ + ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, + ... */ + dulint new_roll_ptr; /* roll ptr to restore to clustered index + record */ + dulint new_trx_id; /* trx id to restore to clustered index + record */ + btr_pcur_t pcur; /* persistent cursor used in searching the + clustered index record */ + dict_table_t* table; /* table where undo is done */ + ulint cmpl_info;/* compiler analysis of an update */ + upd_t* update; /* update vector for a clustered index + record */ + dtuple_t* ref; /* row reference to the next row to handle */ + dtuple_t* row; /* a copy (also fields copied to heap) of the + row to handle */ + row_ext_t* ext; /* NULL, or prefixes of the externally + stored columns of the row */ + dtuple_t* undo_row;/* NULL, or the row after undo */ + row_ext_t* undo_ext;/* NULL, or prefixes of the externally + stored columns of undo_row */ + dict_index_t* index; /* the next index whose record should be + handled */ + mem_heap_t* heap; /* memory heap used as auxiliary storage for + row; this must be emptied after undo is tried + on a row */ +}; + +/* Execution states for an undo node */ +#define UNDO_NODE_FETCH_NEXT 1 /* we should fetch the next undo log + record */ +#define UNDO_NODE_PREV_VERS 2 /* the roll ptr to previous version of + a row is stored in node, and undo + should be done based on it */ +#define UNDO_NODE_INSERT 3 +#define UNDO_NODE_MODIFY 4 + + +#ifndef UNIV_NONINL +#include "row0undo.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0undo.ic b/storage/xtradb/include/row0undo.ic new file mode 100644 index 00000000000..921e3633b10 --- /dev/null +++ b/storage/xtradb/include/row0undo.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Row undo + +Created 1/8/1997 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/row0upd.h b/storage/xtradb/include/row0upd.h new file mode 100644 index 00000000000..71aa20d158c --- /dev/null +++ b/storage/xtradb/include/row0upd.h @@ -0,0 +1,475 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Update of a row + +Created 12/27/1996 Heikki Tuuri +*******************************************************/ + +#ifndef row0upd_h +#define row0upd_h + +#include "univ.i" +#include "data0data.h" +#include "btr0types.h" +#include "btr0pcur.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "row0types.h" +#include "pars0types.h" + +/************************************************************************* +Creates an update vector object. */ +UNIV_INLINE +upd_t* +upd_create( +/*=======*/ + /* out, own: update vector object */ + ulint n, /* in: number of fields */ + mem_heap_t* heap); /* in: heap from which memory allocated */ +/************************************************************************* +Returns the number of fields in the update vector == number of columns +to be updated by an update vector. */ +UNIV_INLINE +ulint +upd_get_n_fields( +/*=============*/ + /* out: number of fields */ + const upd_t* update); /* in: update vector */ +#ifdef UNIV_DEBUG +/************************************************************************* +Returns the nth field of an update vector. */ +UNIV_INLINE +upd_field_t* +upd_get_nth_field( +/*==============*/ + /* out: update vector field */ + const upd_t* update, /* in: update vector */ + ulint n); /* in: field position in update vector */ +#else +# define upd_get_nth_field(update, n) ((update)->fields + (n)) +#endif +/************************************************************************* +Sets an index field number to be updated by an update vector field. */ +UNIV_INLINE +void +upd_field_set_field_no( +/*===================*/ + upd_field_t* upd_field, /* in: update vector field */ + ulint field_no, /* in: field number in a clustered + index */ + dict_index_t* index, /* in: index */ + trx_t* trx); /* in: transaction */ +/************************************************************************* +Returns a field of an update vector by field_no. */ +UNIV_INLINE +const upd_field_t* +upd_get_field_by_field_no( +/*======================*/ + /* out: update vector field, or NULL */ + const upd_t* update, /* in: update vector */ + ulint no) /* in: field_no */ + __attribute__((nonnull, pure)); +/************************************************************************* +Writes into the redo log the values of trx id and roll ptr and enough info +to determine their positions within a clustered index record. */ +UNIV_INTERN +byte* +row_upd_write_sys_vals_to_log( +/*==========================*/ + /* out: new pointer to mlog */ + dict_index_t* index, /* in: clustered index */ + trx_t* trx, /* in: transaction */ + dulint roll_ptr,/* in: roll ptr of the undo log record */ + byte* log_ptr,/* pointer to a buffer of size > 20 opened + in mlog */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************* +Updates the trx id and roll ptr field in a clustered index record when +a row is updated or marked deleted. */ +UNIV_INLINE +void +row_upd_rec_sys_fields( +/*===================*/ + rec_t* rec, /* in/out: record */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + trx_t* trx, /* in: transaction */ + dulint roll_ptr);/* in: roll ptr of the undo log record */ +/************************************************************************* +Sets the trx id or roll ptr field of a clustered index entry. */ +UNIV_INTERN +void +row_upd_index_entry_sys_field( +/*==========================*/ + const dtuple_t* entry, /* in: index entry, where the memory buffers + for sys fields are already allocated: + the function just copies the new values to + them */ + dict_index_t* index, /* in: clustered index */ + ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ + dulint val); /* in: value to write */ +/************************************************************************* +Creates an update node for a query graph. */ +UNIV_INTERN +upd_node_t* +upd_node_create( +/*============*/ + /* out, own: update node */ + mem_heap_t* heap); /* in: mem heap where created */ +/*************************************************************** +Writes to the redo log the new values of the fields occurring in the index. */ +UNIV_INTERN +void +row_upd_index_write_log( +/*====================*/ + const upd_t* update, /* in: update vector */ + byte* log_ptr,/* in: pointer to mlog buffer: must + contain at least MLOG_BUF_MARGIN bytes + of free space; the buffer is closed + within this function */ + mtr_t* mtr); /* in: mtr into whose log to write */ +/*************************************************************** +Returns TRUE if row update changes size of some field in index or if some +field to be updated is stored externally in rec or update. */ +UNIV_INTERN +ibool +row_upd_changes_field_size_or_external( +/*===================================*/ + /* out: TRUE if the update changes the size of + some field in index or the field is external + in rec or update */ + dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + const upd_t* update);/* in: update vector */ +/*************************************************************** +Replaces the new column values stored in the update vector to the record +given. No field size changes are allowed. */ +UNIV_INTERN +void +row_upd_rec_in_place( +/*=================*/ + rec_t* rec, /* in/out: record where replaced */ + dict_index_t* index, /* in: the index the record belongs to */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + const upd_t* update, /* in: update vector */ + page_zip_des_t* page_zip);/* in: compressed page with enough space + available, or NULL */ +/******************************************************************* +Builds an update vector from those fields which in a secondary index entry +differ from a record that has the equal ordering fields. NOTE: we compare +the fields as binary strings! */ +UNIV_INTERN +upd_t* +row_upd_build_sec_rec_difference_binary( +/*====================================*/ + /* out, own: update vector of differing + fields */ + dict_index_t* index, /* in: index */ + const dtuple_t* entry, /* in: entry to insert */ + const rec_t* rec, /* in: secondary index record */ + trx_t* trx, /* in: transaction */ + mem_heap_t* heap); /* in: memory heap from which allocated */ +/******************************************************************* +Builds an update vector from those fields, excluding the roll ptr and +trx id fields, which in an index entry differ from a record that has +the equal ordering fields. NOTE: we compare the fields as binary strings! */ +UNIV_INTERN +upd_t* +row_upd_build_difference_binary( +/*============================*/ + /* out, own: update vector of differing + fields, excluding roll ptr and trx id */ + dict_index_t* index, /* in: clustered index */ + const dtuple_t* entry, /* in: entry to insert */ + const rec_t* rec, /* in: clustered index record */ + trx_t* trx, /* in: transaction */ + mem_heap_t* heap); /* in: memory heap from which allocated */ +/*************************************************************** +Replaces the new column values stored in the update vector to the index entry +given. */ +UNIV_INTERN +void +row_upd_index_replace_new_col_vals_index_pos( +/*=========================================*/ + dtuple_t* entry, /* in/out: index entry where replaced; + the clustered index record must be + covered by a lock or a page latch to + prevent deletion (rollback or purge) */ + dict_index_t* index, /* in: index; NOTE that this may also be a + non-clustered index */ + const upd_t* update, /* in: an update vector built for the index so + that the field number in an upd_field is the + index position */ + ibool order_only, + /* in: if TRUE, limit the replacement to + ordering fields of index; note that this + does not work for non-clustered indexes. */ + mem_heap_t* heap) /* in: memory heap for allocating and + copying the new values */ + __attribute__((nonnull)); +/*************************************************************** +Replaces the new column values stored in the update vector to the index entry +given. */ +UNIV_INTERN +void +row_upd_index_replace_new_col_vals( +/*===============================*/ + dtuple_t* entry, /* in/out: index entry where replaced; + the clustered index record must be + covered by a lock or a page latch to + prevent deletion (rollback or purge) */ + dict_index_t* index, /* in: index; NOTE that this may also be a + non-clustered index */ + const upd_t* update, /* in: an update vector built for the + CLUSTERED index so that the field number in + an upd_field is the clustered index position */ + mem_heap_t* heap) /* in: memory heap for allocating and + copying the new values */ + __attribute__((nonnull)); +/*************************************************************** +Replaces the new column values stored in the update vector. */ +UNIV_INTERN +void +row_upd_replace( +/*============*/ + dtuple_t* row, /* in/out: row where replaced, + indexed by col_no; + the clustered index record must be + covered by a lock or a page latch to + prevent deletion (rollback or purge) */ + row_ext_t** ext, /* out, own: NULL, or externally + stored column prefixes */ + const dict_index_t* index, /* in: clustered index */ + const upd_t* update, /* in: an update vector built for the + clustered index */ + mem_heap_t* heap); /* in: memory heap */ +/*************************************************************** +Checks if an update vector changes an ordering field of an index record. +This function is fast if the update vector is short or the number of ordering +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ +UNIV_INTERN +ibool +row_upd_changes_ord_field_binary( +/*=============================*/ + /* out: TRUE if update vector changes + an ordering field in the index record; + NOTE: the fields are compared as binary + strings */ + const dtuple_t* row, /* in: old value of row, or NULL if the + row and the data values in update are not + known when this function is called, e.g., at + compile time */ + dict_index_t* index, /* in: index of the record */ + const upd_t* update);/* in: update vector for the row; NOTE: the + field numbers in this MUST be clustered index + positions! */ +/*************************************************************** +Checks if an update vector changes an ordering field of an index record. +This function is fast if the update vector is short or the number of ordering +fields in the index is small. Otherwise, this can be quadratic. +NOTE: we compare the fields as binary strings! */ +UNIV_INTERN +ibool +row_upd_changes_some_index_ord_field_binary( +/*========================================*/ + /* out: TRUE if update vector + may change an ordering field + in an index record */ + const dict_table_t* table, /* in: table */ + const upd_t* update);/* in: update vector for the row */ +/*************************************************************** +Updates a row in a table. This is a high-level function used +in SQL execution graphs. */ +UNIV_INTERN +que_thr_t* +row_upd_step( +/*=========*/ + /* out: query thread to run next or NULL */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Parses the log data of system field values. */ +UNIV_INTERN +byte* +row_upd_parse_sys_vals( +/*===================*/ + /* out: log data end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + ulint* pos, /* out: TRX_ID position in record */ + dulint* trx_id, /* out: trx id */ + dulint* roll_ptr);/* out: roll ptr */ +/************************************************************************* +Updates the trx id and roll ptr field in a clustered index record in database +recovery. */ +UNIV_INTERN +void +row_upd_rec_sys_fields_in_recovery( +/*===============================*/ + rec_t* rec, /* in/out: record */ + page_zip_des_t* page_zip,/* in/out: compressed page, or NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr);/* in: roll ptr of the undo log record */ +/************************************************************************* +Parses the log data written by row_upd_index_write_log. */ +UNIV_INTERN +byte* +row_upd_index_parse( +/*================*/ + /* out: log data end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + mem_heap_t* heap, /* in: memory heap where update vector is + built */ + upd_t** update_out);/* out: update vector */ + + +/* Update vector field */ +struct upd_field_struct{ + unsigned field_no:16; /* field number in an index, usually + the clustered index, but in updating + a secondary index record in btr0cur.c + this is the position in the secondary + index */ + unsigned orig_len:16; /* original length of the locally + stored part of an externally stored + column, or 0 */ + que_node_t* exp; /* expression for calculating a new + value: it refers to column values and + constants in the symbol table of the + query graph */ + dfield_t new_val; /* new value for the column */ +}; + +/* Update vector structure */ +struct upd_struct{ + ulint info_bits; /* new value of info bits to record; + default is 0 */ + ulint n_fields; /* number of update fields */ + upd_field_t* fields; /* array of update fields */ +}; + +/* Update node structure which also implements the delete operation +of a row */ + +struct upd_node_struct{ + que_common_t common; /* node type: QUE_NODE_UPDATE */ + ibool is_delete;/* TRUE if delete, FALSE if update */ + ibool searched_update; + /* TRUE if searched update, FALSE if + positioned */ + ibool in_mysql_interface; + /* TRUE if the update node was created + for the MySQL interface */ + dict_foreign_t* foreign;/* NULL or pointer to a foreign key + constraint if this update node is used in + doing an ON DELETE or ON UPDATE operation */ + upd_node_t* cascade_node;/* NULL or an update node template which + is used to implement ON DELETE/UPDATE CASCADE + or ... SET NULL for foreign keys */ + mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade + node is created */ + sel_node_t* select; /* query graph subtree implementing a base + table cursor: the rows returned will be + updated */ + btr_pcur_t* pcur; /* persistent cursor placed on the clustered + index record which should be updated or + deleted; the cursor is stored in the graph + of 'select' field above, except in the case + of the MySQL interface */ + dict_table_t* table; /* table where updated */ + upd_t* update; /* update vector for the row */ + ulint update_n_fields; + /* when this struct is used to implement + a cascade operation for foreign keys, we store + here the size of the buffer allocated for use + as the update vector */ + sym_node_list_t columns;/* symbol table nodes for the columns + to retrieve from the table */ + ibool has_clust_rec_x_lock; + /* TRUE if the select which retrieves the + records to update already sets an x-lock on + the clustered record; note that it must always + set at least an s-lock */ + ulint cmpl_info;/* information extracted during query + compilation; speeds up execution: + UPD_NODE_NO_ORD_CHANGE and + UPD_NODE_NO_SIZE_CHANGE, ORed */ + /*----------------------*/ + /* Local storage for this graph node */ + ulint state; /* node execution state */ + dict_index_t* index; /* NULL, or the next index whose record should + be updated */ + dtuple_t* row; /* NULL, or a copy (also fields copied to + heap) of the row to update; this must be reset + to NULL after a successful update */ + row_ext_t* ext; /* NULL, or prefixes of the externally + stored columns in the old row */ + dtuple_t* upd_row;/* NULL, or a copy of the updated row */ + row_ext_t* upd_ext;/* NULL, or prefixes of the externally + stored columns in upd_row */ + mem_heap_t* heap; /* memory heap used as auxiliary storage; + this must be emptied after a successful + update */ + /*----------------------*/ + sym_node_t* table_sym;/* table node in symbol table */ + que_node_t* col_assign_list; + /* column assignment list */ + ulint magic_n; +}; + +#define UPD_NODE_MAGIC_N 1579975 + +/* Node execution states */ +#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from + a node above and if the field + has_clust_rec_x_lock is FALSE, we + should set an intention x-lock on + the table */ +#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be + updated */ +#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be + inserted, old record is already delete + marked */ +#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered + index record was changed, or this is + a delete operation: should update + all the secondary index records */ +#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be + looked at and updated if an ordering + field changed */ + +/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */ +#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be + changed in the update and no ordering + field of the clustered index */ +#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be + changed in the update */ + +#ifndef UNIV_NONINL +#include "row0upd.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0upd.ic b/storage/xtradb/include/row0upd.ic new file mode 100644 index 00000000000..a0c23aa6b07 --- /dev/null +++ b/storage/xtradb/include/row0upd.ic @@ -0,0 +1,179 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Update of a row + +Created 12/27/1996 Heikki Tuuri +*******************************************************/ + +#include "mtr0log.h" +#include "trx0trx.h" +#include "trx0undo.h" +#include "row0row.h" +#include "btr0sea.h" +#include "page0zip.h" + +/************************************************************************* +Creates an update vector object. */ +UNIV_INLINE +upd_t* +upd_create( +/*=======*/ + /* out, own: update vector object */ + ulint n, /* in: number of fields */ + mem_heap_t* heap) /* in: heap from which memory allocated */ +{ + upd_t* update; + + update = (upd_t*) mem_heap_alloc(heap, sizeof(upd_t)); + + update->info_bits = 0; + update->n_fields = n; + update->fields = (upd_field_t*) + mem_heap_alloc(heap, sizeof(upd_field_t) * n); + + return(update); +} + +/************************************************************************* +Returns the number of fields in the update vector == number of columns +to be updated by an update vector. */ +UNIV_INLINE +ulint +upd_get_n_fields( +/*=============*/ + /* out: number of fields */ + const upd_t* update) /* in: update vector */ +{ + ut_ad(update); + + return(update->n_fields); +} + +#ifdef UNIV_DEBUG +/************************************************************************* +Returns the nth field of an update vector. */ +UNIV_INLINE +upd_field_t* +upd_get_nth_field( +/*==============*/ + /* out: update vector field */ + const upd_t* update, /* in: update vector */ + ulint n) /* in: field position in update vector */ +{ + ut_ad(update); + ut_ad(n < update->n_fields); + + return((upd_field_t*) update->fields + n); +} +#endif /* UNIV_DEBUG */ + +/************************************************************************* +Sets an index field number to be updated by an update vector field. */ +UNIV_INLINE +void +upd_field_set_field_no( +/*===================*/ + upd_field_t* upd_field, /* in: update vector field */ + ulint field_no, /* in: field number in a clustered + index */ + dict_index_t* index, /* in: index */ + trx_t* trx) /* in: transaction */ +{ + upd_field->field_no = field_no; + upd_field->orig_len = 0; + + if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) { + fprintf(stderr, + "InnoDB: Error: trying to access field %lu in ", + (ulong) field_no); + dict_index_name_print(stderr, trx, index); + fprintf(stderr, "\n" + "InnoDB: but index only has %lu fields\n", + (ulong) dict_index_get_n_fields(index)); + } + + dict_col_copy_type(dict_index_get_nth_col(index, field_no), + dfield_get_type(&upd_field->new_val)); +} + +/************************************************************************* +Returns a field of an update vector by field_no. */ +UNIV_INLINE +const upd_field_t* +upd_get_field_by_field_no( +/*======================*/ + /* out: update vector field, or NULL */ + const upd_t* update, /* in: update vector */ + ulint no) /* in: field_no */ +{ + ulint i; + for (i = 0; i < upd_get_n_fields(update); i++) { + const upd_field_t* uf = upd_get_nth_field(update, i); + + if (uf->field_no == no) { + + return(uf); + } + } + + return(NULL); +} + +/************************************************************************* +Updates the trx id and roll ptr field in a clustered index record when +a row is updated or marked deleted. */ +UNIV_INLINE +void +row_upd_rec_sys_fields( +/*===================*/ + rec_t* rec, /* in/out: record */ + page_zip_des_t* page_zip,/* in/out: compressed page whose + uncompressed part will be updated, or NULL */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + trx_t* trx, /* in: transaction */ + dulint roll_ptr)/* in: roll ptr of the undo log record */ +{ + ut_ad(dict_index_is_clust(index)); + ut_ad(rec_offs_validate(rec, index, offsets)); +#ifdef UNIV_SYNC_DEBUG + if (!rw_lock_own(&btr_search_latch, RW_LOCK_EX)) { + ut_ad(!buf_block_align(rec)->is_hashed); + } +#endif /* UNIV_SYNC_DEBUG */ + + if (UNIV_LIKELY_NULL(page_zip)) { + ulint pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + page_zip_write_trx_id_and_roll_ptr(page_zip, rec, offsets, + pos, trx->id, roll_ptr); + } else { + ulint offset = index->trx_id_offset; + + if (!offset) { + offset = row_get_trx_id_offset(rec, index, offsets); + } + +#if DATA_TRX_ID + 1 != DATA_ROLL_PTR +# error "DATA_TRX_ID + 1 != DATA_ROLL_PTR" +#endif + trx_write_trx_id(rec + offset, trx->id); + trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); + } +} diff --git a/storage/xtradb/include/row0vers.h b/storage/xtradb/include/row0vers.h new file mode 100644 index 00000000000..0feae77e8b5 --- /dev/null +++ b/storage/xtradb/include/row0vers.h @@ -0,0 +1,142 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Row versions + +Created 2/6/1997 Heikki Tuuri +*******************************************************/ + +#ifndef row0vers_h +#define row0vers_h + +#include "univ.i" +#include "data0data.h" +#include "dict0types.h" +#include "trx0types.h" +#include "que0types.h" +#include "rem0types.h" +#include "mtr0mtr.h" +#include "read0types.h" + +/********************************************************************* +Finds out if an active transaction has inserted or modified a secondary +index record. NOTE: the kernel mutex is temporarily released in this +function! */ +UNIV_INTERN +trx_t* +row_vers_impl_x_locked_off_kernel( +/*==============================*/ + /* out: NULL if committed, else the active + transaction; NOTE that the kernel mutex is + temporarily released! */ + const rec_t* rec, /* in: record in a secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ +/********************************************************************* +Finds out if we must preserve a delete marked earlier version of a clustered +index record, because it is >= the purge view. */ +UNIV_INTERN +ibool +row_vers_must_preserve_del_marked( +/*==============================*/ + /* out: TRUE if earlier version should be preserved */ + dulint trx_id, /* in: transaction id in the version */ + mtr_t* mtr); /* in: mtr holding the latch on the clustered index + record; it will also hold the latch on purge_view */ +/********************************************************************* +Finds out if a version of the record, where the version >= the current +purge view, should have ientry as its secondary index entry. We check +if there is any not delete marked version of the record where the trx +id >= purge view, and the secondary index entry == ientry; exactly in +this case we return TRUE. */ +UNIV_INTERN +ibool +row_vers_old_has_index_entry( +/*=========================*/ + /* out: TRUE if earlier version should have */ + ibool also_curr,/* in: TRUE if also rec is included in the + versions to search; otherwise only versions + prior to it are searched */ + const rec_t* rec, /* in: record in the clustered index; the + caller must have a latch on the page */ + mtr_t* mtr, /* in: mtr holding the latch on rec; it will + also hold the latch on purge_view */ + dict_index_t* index, /* in: the secondary index */ + const dtuple_t* ientry);/* in: the secondary index entry */ +/********************************************************************* +Constructs the version of a clustered index record which a consistent +read should see. We assume that the trx id stored in rec is such that +the consistent read should not see rec in its present version. */ +UNIV_INTERN +ulint +row_vers_build_for_consistent_read( +/*===============================*/ + /* out: DB_SUCCESS or DB_MISSING_HISTORY */ + const rec_t* rec, /* in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /* in: mtr holding the latch on rec; it will + also hold the latch on purge_view */ + dict_index_t* index, /* in: the clustered index */ + ulint** offsets,/* in/out: offsets returned by + rec_get_offsets(rec, index) */ + read_view_t* view, /* in: the consistent read view */ + mem_heap_t** offset_heap,/* in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap,/* in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + rec_t** old_vers);/* out, own: old version, or NULL if the + record does not exist in the view, that is, + it was freshly inserted afterwards */ + +/********************************************************************* +Constructs the last committed version of a clustered index record, +which should be seen by a semi-consistent read. */ +UNIV_INTERN +ulint +row_vers_build_for_semi_consistent_read( +/*====================================*/ + /* out: DB_SUCCESS or DB_MISSING_HISTORY */ + const rec_t* rec, /* in: record in a clustered index; the + caller must have a latch on the page; this + latch locks the top of the stack of versions + of this records */ + mtr_t* mtr, /* in: mtr holding the latch on rec */ + dict_index_t* index, /* in: the clustered index */ + ulint** offsets,/* in/out: offsets returned by + rec_get_offsets(rec, index) */ + mem_heap_t** offset_heap,/* in/out: memory heap from which + the offsets are allocated */ + mem_heap_t* in_heap,/* in: memory heap from which the memory for + *old_vers is allocated; memory for possible + intermediate versions is allocated and freed + locally within the function */ + const rec_t** old_vers);/* out: rec, old version, or NULL if the + record does not exist in the view, that is, + it was freshly inserted afterwards */ + + +#ifndef UNIV_NONINL +#include "row0vers.ic" +#endif + +#endif diff --git a/storage/xtradb/include/row0vers.ic b/storage/xtradb/include/row0vers.ic new file mode 100644 index 00000000000..aac95ea6593 --- /dev/null +++ b/storage/xtradb/include/row0vers.ic @@ -0,0 +1,29 @@ +/***************************************************************************** + +Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Row versions + +Created 2/6/1997 Heikki Tuuri +*******************************************************/ + +#include "row0row.h" +#include "dict0dict.h" +#include "read0read.h" +#include "page0page.h" +#include "log0recv.h" diff --git a/storage/xtradb/include/srv0que.h b/storage/xtradb/include/srv0que.h new file mode 100644 index 00000000000..88db1a013f6 --- /dev/null +++ b/storage/xtradb/include/srv0que.h @@ -0,0 +1,68 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Server query execution + +Created 6/5/1996 Heikki Tuuri +*******************************************************/ + +#ifndef srv0que_h +#define srv0que_h + +#include "univ.i" +#include "que0types.h" + +/************************************************************************** +Checks if there is work to do in the server task queue. If there is, the +thread starts processing a task. Before leaving, it again checks the task +queue and picks a new task if any exists. This is called by a SRV_WORKER +thread. */ +UNIV_INTERN +void +srv_que_task_queue_check(void); +/*==========================*/ +/************************************************************************** +Performs round-robin on the server tasks. This is called by a SRV_WORKER +thread every second or so. */ +UNIV_INTERN +que_thr_t* +srv_que_round_robin( +/*================*/ + /* out: the new (may be == thr) query thread + to run */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Enqueues a task to server task queue and releases a worker thread, if +there exists one suspended. */ +UNIV_INTERN +void +srv_que_task_enqueue( +/*=================*/ + que_thr_t* thr); /* in: query thread */ +/************************************************************************** +Enqueues a task to server task queue and releases a worker thread, if +there exists one suspended. */ +UNIV_INTERN +void +srv_que_task_enqueue_low( +/*=====================*/ + que_thr_t* thr); /* in: query thread */ + +#endif + diff --git a/storage/xtradb/include/srv0srv.h b/storage/xtradb/include/srv0srv.h new file mode 100644 index 00000000000..cb78d66da1b --- /dev/null +++ b/storage/xtradb/include/srv0srv.h @@ -0,0 +1,608 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The server main program + +Created 10/10/1995 Heikki Tuuri +*******************************************************/ + +#ifndef srv0srv_h +#define srv0srv_h + +#include "univ.i" +#include "sync0sync.h" +#include "os0sync.h" +#include "que0types.h" +#include "trx0types.h" + +extern const char* srv_main_thread_op_info; + +/* Prefix used by MySQL to indicate pre-5.1 table name encoding */ +extern const char srv_mysql50_table_name_prefix[9]; + +/* When this event is set the lock timeout and InnoDB monitor +thread starts running */ +extern os_event_t srv_lock_timeout_thread_event; + +/* If the last data file is auto-extended, we add this many pages to it +at a time */ +#define SRV_AUTO_EXTEND_INCREMENT \ + (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) + +/* This is set to TRUE if the MySQL user has set it in MySQL */ +extern ibool srv_lower_case_table_names; + +/* Mutex for locking srv_monitor_file */ +extern mutex_t srv_monitor_file_mutex; +/* Temporary file for innodb monitor output */ +extern FILE* srv_monitor_file; +/* Mutex for locking srv_dict_tmpfile. +This mutex has a very high rank; threads reserving it should not +be holding any InnoDB latches. */ +extern mutex_t srv_dict_tmpfile_mutex; +/* Temporary file for output from the data dictionary */ +extern FILE* srv_dict_tmpfile; +/* Mutex for locking srv_misc_tmpfile. +This mutex has a very low rank; threads reserving it should not +acquire any further latches or sleep before releasing this one. */ +extern mutex_t srv_misc_tmpfile_mutex; +/* Temporary file for miscellanous diagnostic output */ +extern FILE* srv_misc_tmpfile; + +/* Server parameters which are read from the initfile */ + +extern char* srv_data_home; +#ifdef UNIV_LOG_ARCHIVE +extern char* srv_arch_dir; +#endif /* UNIV_LOG_ARCHIVE */ + +/* store to its own file each table created by an user; data +dictionary tables are in the system tablespace 0 */ +extern my_bool srv_file_per_table; +/* The file format to use on new *.ibd files. */ +extern ulint srv_file_format; +/* Whether to check file format during startup.*/ +extern ulint srv_check_file_format_at_startup; +/* Place locks to records only i.e. do not use next-key locking except +on duplicate key checking and foreign key checking */ +extern ibool srv_locks_unsafe_for_binlog; + +extern ulint srv_n_data_files; +extern char** srv_data_file_names; +extern ulint* srv_data_file_sizes; +extern ulint* srv_data_file_is_raw_partition; + +extern ibool srv_extra_undoslots; + +extern ibool srv_auto_extend_last_data_file; +extern ulint srv_last_file_size_max; +extern ulong srv_auto_extend_increment; + +extern ibool srv_created_new_raw; + +#define SRV_NEW_RAW 1 +#define SRV_OLD_RAW 2 + +extern char** srv_log_group_home_dirs; + +extern ulint srv_n_log_groups; +extern ulint srv_n_log_files; +extern ulint srv_log_file_size; +extern ulint srv_log_buffer_size; +extern ulong srv_flush_log_at_trx_commit; + +extern ulint srv_show_locks_held; +extern ulint srv_show_verbose_locks; + +/* The sort order table of the MySQL latin1_swedish_ci character set +collation */ +extern const byte* srv_latin1_ordering; +extern my_bool srv_use_sys_malloc; +extern ulint srv_buf_pool_size; /* requested size in bytes */ +extern ulint srv_buf_pool_old_size; /* previously requested size */ +extern ulint srv_buf_pool_curr_size; /* current size in bytes */ +extern ulint srv_mem_pool_size; +extern ulint srv_lock_table_size; + +extern ulint srv_n_file_io_threads; +extern ulint srv_n_read_io_threads; +extern ulint srv_n_write_io_threads; + +#ifdef UNIV_LOG_ARCHIVE +extern ibool srv_log_archive_on; +extern ibool srv_archive_recovery; +extern dulint srv_archive_recovery_limit_lsn; +#endif /* UNIV_LOG_ARCHIVE */ + +extern char* srv_file_flush_method_str; +extern ulint srv_unix_file_flush_method; +extern ulint srv_win_file_flush_method; + +extern ulint srv_max_n_open_files; + +extern ulint srv_max_dirty_pages_pct; + +extern ulint srv_force_recovery; +extern ulong srv_thread_concurrency; +extern ulong srv_commit_concurrency; + +extern ulint srv_max_n_threads; + +extern lint srv_conc_n_threads; + +extern ulint srv_fast_shutdown; /* If this is 1, do not do a + purge and index buffer merge. + If this 2, do not even flush the + buffer pool to data files at the + shutdown: we effectively 'crash' + InnoDB (but lose no committed + transactions). */ +extern ibool srv_innodb_status; + +extern unsigned long long srv_stats_sample_pages; + +extern ibool srv_use_doublewrite_buf; +extern ibool srv_use_checksums; + +extern ibool srv_set_thread_priorities; +extern int srv_query_thread_priority; + +extern ulong srv_max_buf_pool_modified_pct; +extern ulong srv_max_purge_lag; + +extern ulong srv_replication_delay; + +extern ulint srv_io_capacity; +extern long long srv_ibuf_max_size; +extern ulint srv_ibuf_active_contract; +extern ulint srv_ibuf_accel_rate; +extern ulint srv_flush_neighbor_pages; +extern ulint srv_enable_unsafe_group_commit; +extern ulint srv_read_ahead; +extern ulint srv_adaptive_checkpoint; + +extern ulint srv_extra_rsegments; + +/*-------------------------------------------*/ + +extern ulint srv_n_rows_inserted; +extern ulint srv_n_rows_updated; +extern ulint srv_n_rows_deleted; +extern ulint srv_n_rows_read; + +extern ibool srv_print_innodb_monitor; +extern ibool srv_print_innodb_lock_monitor; +extern ibool srv_print_innodb_tablespace_monitor; +extern ibool srv_print_verbose_log; +extern ibool srv_print_innodb_table_monitor; + +extern ibool srv_lock_timeout_and_monitor_active; +extern ibool srv_error_monitor_active; + +extern ulong srv_n_spin_wait_rounds; +extern ulong srv_n_free_tickets_to_enter; +extern ulong srv_thread_sleep_delay; +extern ulint srv_spin_wait_delay; +extern ibool srv_priority_boost; + +extern ulint srv_mem_pool_size; +extern ulint srv_lock_table_size; + +#ifdef UNIV_DEBUG +extern ibool srv_print_thread_releases; +extern ibool srv_print_lock_waits; +extern ibool srv_print_buf_io; +extern ibool srv_print_log_io; +extern ibool srv_print_latch_waits; +#else /* UNIV_DEBUG */ +# define srv_print_thread_releases FALSE +# define srv_print_lock_waits FALSE +# define srv_print_buf_io FALSE +# define srv_print_log_io FALSE +# define srv_print_latch_waits FALSE +#endif /* UNIV_DEBUG */ + +extern ulint srv_activity_count; +extern ulint srv_fatal_semaphore_wait_threshold; +extern ulint srv_dml_needed_delay; + +extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, + query threads, and lock table: we allocate + it from dynamic memory to get it to the + same DRAM page as other hotspot semaphores */ +#define kernel_mutex (*kernel_mutex_temp) + +#define SRV_MAX_N_IO_THREADS 100 + +/* Array of English strings describing the current state of an +i/o handler thread */ +extern const char* srv_io_thread_op_info[]; +extern const char* srv_io_thread_function[]; + +/* the number of the log write requests done */ +extern ulint srv_log_write_requests; + +/* the number of physical writes to the log performed */ +extern ulint srv_log_writes; + +/* amount of data written to the log files in bytes */ +extern ulint srv_os_log_written; + +/* amount of writes being done to the log files */ +extern ulint srv_os_log_pending_writes; + +/* we increase this counter, when there we don't have enough space in the +log buffer and have to flush it */ +extern ulint srv_log_waits; + +/* variable that counts amount of data read in total (in bytes) */ +extern ulint srv_data_read; + +/* here we count the amount of data written in total (in bytes) */ +extern ulint srv_data_written; + +/* this variable counts the amount of times, when the doublewrite buffer +was flushed */ +extern ulint srv_dblwr_writes; + +/* here we store the number of pages that have been flushed to the +doublewrite buffer */ +extern ulint srv_dblwr_pages_written; + +/* in this variable we store the number of write requests issued */ +extern ulint srv_buf_pool_write_requests; + +/* here we store the number of times when we had to wait for a free page +in the buffer pool. It happens when the buffer pool is full and we need +to make a flush, in order to be able to read or create a page. */ +extern ulint srv_buf_pool_wait_free; + +/* variable to count the number of pages that were written from the +buffer pool to disk */ +extern ulint srv_buf_pool_flushed; + +/* variable to count the number of buffer pool reads that led to the +reading of a disk page */ +extern ulint srv_buf_pool_reads; + +/* variable to count the number of sequential read-aheads were done */ +extern ulint srv_read_ahead_seq; + +/* variable to count the number of random read-aheads were done */ +extern ulint srv_read_ahead_rnd; + +/* In this structure we store status variables to be passed to MySQL */ +typedef struct export_var_struct export_struc; + +extern export_struc export_vars; + +typedef struct srv_sys_struct srv_sys_t; + +/* The server system */ +extern srv_sys_t* srv_sys; + +/* Alternatives for the file flush option in Unix; see the InnoDB manual +about what these mean */ +#define SRV_UNIX_FSYNC 1 /* This is the default */ +#define SRV_UNIX_O_DSYNC 2 +#define SRV_UNIX_LITTLESYNC 3 +#define SRV_UNIX_NOSYNC 4 +#define SRV_UNIX_O_DIRECT 5 + +/* Alternatives for file i/o in Windows */ +#define SRV_WIN_IO_NORMAL 1 +#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */ + +/* Alternatives for srv_force_recovery. Non-zero values are intended +to help the user get a damaged database up so that he can dump intact +tables and rows with SELECT INTO OUTFILE. The database must not otherwise +be used with these options! A bigger number below means that all precautions +of lower numbers are included. */ + +#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it + detects a corrupt page */ +#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from + running: if a crash would occur + in purge, this prevents it */ +#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after + recovery */ +#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations: + if they would cause a crash, better + not do them */ +#define SRV_FORCE_NO_UNDO_LOG_SCAN 5 /* do not look at undo logs when + starting the database: InnoDB will + treat even incomplete transactions + as committed */ +#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward + in connection with recovery */ + +/** Types of threads existing in the system. */ +enum srv_thread_type { + SRV_COM = 1, /**< threads serving communication and queries */ + SRV_CONSOLE, /**< thread serving console */ + SRV_WORKER, /**< threads serving parallelized queries and + queries released from lock wait */ +#if 0 + /* Utility threads */ + SRV_BUFFER, /**< thread flushing dirty buffer blocks */ + SRV_RECOVERY, /**< threads finishing a recovery */ + SRV_INSERT, /**< thread flushing the insert buffer to disk */ +#endif + SRV_MASTER /**< the master thread, (whose type number must + be biggest) */ +}; + +/************************************************************************* +Boots Innobase server. */ +UNIV_INTERN +ulint +srv_boot(void); +/*==========*/ + /* out: DB_SUCCESS or error code */ +/************************************************************************* +Initializes the server. */ +UNIV_INTERN +void +srv_init(void); +/*==========*/ +/************************************************************************* +Frees the OS fast mutex created in srv_boot(). */ +UNIV_INTERN +void +srv_free(void); +/*==========*/ +/************************************************************************* +Initializes the synchronization primitives, memory system, and the thread +local storage. */ +UNIV_INTERN +void +srv_general_init(void); +/*==================*/ +/************************************************************************* +Gets the number of threads in the system. */ +UNIV_INTERN +ulint +srv_get_n_threads(void); +/*===================*/ +/************************************************************************* +Returns the calling thread type. */ + +enum srv_thread_type +srv_get_thread_type(void); +/*=====================*/ + /* out: SRV_COM, ... */ +/************************************************************************* +Sets the info describing an i/o thread current state. */ +UNIV_INTERN +void +srv_set_io_thread_op_info( +/*======================*/ + ulint i, /* in: the 'segment' of the i/o thread */ + const char* str); /* in: constant char string describing the + state */ +/************************************************************************* +Releases threads of the type given from suspension in the thread table. +NOTE! The server mutex has to be reserved by the caller! */ +UNIV_INTERN +ulint +srv_release_threads( +/*================*/ + /* out: number of threads + released: this may be < n if + not enough threads were + suspended at the moment */ + enum srv_thread_type type, /* in: thread type */ + ulint n); /* in: number of threads to release */ +/************************************************************************* +The master thread controlling the server. */ +UNIV_INTERN +os_thread_ret_t +srv_master_thread( +/*==============*/ + /* out: a dummy parameter */ + void* arg); /* in: a dummy parameter required by + os_thread_create */ +/*********************************************************************** +Tells the Innobase server that there has been activity in the database +and wakes up the master thread if it is suspended (not sleeping). Used +in the MySQL interface. Note that there is a small chance that the master +thread stays suspended (we do not protect our operation with the kernel +mutex, for performace reasons). */ +UNIV_INTERN +void +srv_active_wake_master_thread(void); +/*===============================*/ +/*********************************************************************** +Wakes up the master thread if it is suspended or being suspended. */ +UNIV_INTERN +void +srv_wake_master_thread(void); +/*========================*/ +/************************************************************************* +Puts an OS thread to wait if there are too many concurrent threads +(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ +UNIV_INTERN +void +srv_conc_enter_innodb( +/*==================*/ + trx_t* trx); /* in: transaction object associated with the + thread */ +/************************************************************************* +This lets a thread enter InnoDB regardless of the number of threads inside +InnoDB. This must be called when a thread ends a lock wait. */ +UNIV_INTERN +void +srv_conc_force_enter_innodb( +/*========================*/ + trx_t* trx); /* in: transaction object associated with the + thread */ +/************************************************************************* +This must be called when a thread exits InnoDB in a lock wait or at the +end of an SQL statement. */ +UNIV_INTERN +void +srv_conc_force_exit_innodb( +/*=======================*/ + trx_t* trx); /* in: transaction object associated with the + thread */ +/************************************************************************* +This must be called when a thread exits InnoDB. */ +UNIV_INTERN +void +srv_conc_exit_innodb( +/*=================*/ + trx_t* trx); /* in: transaction object associated with the + thread */ +/******************************************************************* +Puts a MySQL OS thread to wait for a lock to be released. If an error +occurs during the wait trx->error_state associated with thr is +!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK +are possible errors. DB_DEADLOCK is returned if selective deadlock +resolution chose this transaction as a victim. */ +UNIV_INTERN +void +srv_suspend_mysql_thread( +/*=====================*/ + que_thr_t* thr); /* in: query thread associated with the MySQL + OS thread */ +/************************************************************************ +Releases a MySQL OS thread waiting for a lock to be released, if the +thread is already suspended. */ +UNIV_INTERN +void +srv_release_mysql_thread_if_suspended( +/*==================================*/ + que_thr_t* thr); /* in: query thread associated with the + MySQL OS thread */ +/************************************************************************* +A thread which wakes up threads whose lock wait may have lasted too long. +This also prints the info output by various InnoDB monitors. */ +UNIV_INTERN +os_thread_ret_t +srv_lock_timeout_and_monitor_thread( +/*================================*/ + /* out: a dummy parameter */ + void* arg); /* in: a dummy parameter required by + os_thread_create */ +/************************************************************************* +A thread which prints warnings about semaphore waits which have lasted +too long. These can be used to track bugs which cause hangs. */ +UNIV_INTERN +os_thread_ret_t +srv_error_monitor_thread( +/*=====================*/ + /* out: a dummy parameter */ + void* arg); /* in: a dummy parameter required by + os_thread_create */ +/********************************************************************** +Outputs to a file the output of the InnoDB Monitor. */ +UNIV_INTERN +void +srv_printf_innodb_monitor( +/*======================*/ + FILE* file, /* in: output stream */ + ulint* trx_start, /* out: file position of the start of + the list of active transactions */ + ulint* trx_end); /* out: file position of the end of + the list of active transactions */ + +/********************************************************************** +Function to pass InnoDB status variables to MySQL */ +UNIV_INTERN +void +srv_export_innodb_status(void); +/*=====================*/ + +/* Thread slot in the thread table */ +typedef struct srv_slot_struct srv_slot_t; + +/* Thread table is an array of slots */ +typedef srv_slot_t srv_table_t; + +/* In this structure we store status variables to be passed to MySQL */ +struct export_var_struct{ + ulint innodb_data_pending_reads; + ulint innodb_data_pending_writes; + ulint innodb_data_pending_fsyncs; + ulint innodb_data_fsyncs; + ulint innodb_data_read; + ulint innodb_data_writes; + ulint innodb_data_written; + ulint innodb_data_reads; + ulint innodb_buffer_pool_pages_total; + ulint innodb_buffer_pool_pages_data; + ulint innodb_buffer_pool_pages_dirty; + ulint innodb_buffer_pool_pages_misc; + ulint innodb_buffer_pool_pages_free; +#ifdef UNIV_DEBUG + ulint innodb_buffer_pool_pages_latched; +#endif /* UNIV_DEBUG */ + ulint innodb_buffer_pool_read_requests; + ulint innodb_buffer_pool_reads; + ulint innodb_buffer_pool_wait_free; + ulint innodb_buffer_pool_pages_flushed; + ulint innodb_buffer_pool_write_requests; + ulint innodb_buffer_pool_read_ahead_seq; + ulint innodb_buffer_pool_read_ahead_rnd; + ulint innodb_dblwr_pages_written; + ulint innodb_dblwr_writes; + ibool innodb_have_atomic_builtins; + ulint innodb_log_waits; + ulint innodb_log_write_requests; + ulint innodb_log_writes; + ulint innodb_os_log_written; + ulint innodb_os_log_fsyncs; + ulint innodb_os_log_pending_writes; + ulint innodb_os_log_pending_fsyncs; + ulint innodb_page_size; + ulint innodb_pages_created; + ulint innodb_pages_read; + ulint innodb_pages_written; + ulint innodb_row_lock_waits; + ulint innodb_row_lock_current_waits; + ib_int64_t innodb_row_lock_time; + ulint innodb_row_lock_time_avg; + ulint innodb_row_lock_time_max; + ulint innodb_rows_read; + ulint innodb_rows_inserted; + ulint innodb_rows_updated; + ulint innodb_rows_deleted; +}; + +/* The server system struct */ +struct srv_sys_struct{ + srv_table_t* threads; /* server thread table */ + UT_LIST_BASE_NODE_T(que_thr_t) + tasks; /* task queue */ + dict_index_t* dummy_ind1; /* dummy index for old-style + supremum and infimum records */ + dict_index_t* dummy_ind2; /* dummy index for new-style + supremum and infimum records */ +}; + +extern ulint srv_n_threads_active[]; + +#endif diff --git a/storage/xtradb/include/srv0srv.ic b/storage/xtradb/include/srv0srv.ic new file mode 100644 index 00000000000..93d675f1dca --- /dev/null +++ b/storage/xtradb/include/srv0srv.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Server main program + +Created 10/4/1995 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/srv0start.h b/storage/xtradb/include/srv0start.h new file mode 100644 index 00000000000..15fa3b8f95f --- /dev/null +++ b/storage/xtradb/include/srv0start.h @@ -0,0 +1,118 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Starts the Innobase database server + +Created 10/10/1995 Heikki Tuuri +*******************************************************/ + +#ifndef srv0start_h +#define srv0start_h + +#include "univ.i" +#include "ut0byte.h" + +/************************************************************************* +Normalizes a directory path for Windows: converts slashes to backslashes. */ +UNIV_INTERN +void +srv_normalize_path_for_win( +/*=======================*/ + char* str); /* in/out: null-terminated character string */ +/************************************************************************* +Reads the data files and their sizes from a character string given in +the .cnf file. */ +UNIV_INTERN +ibool +srv_parse_data_file_paths_and_sizes( +/*================================*/ + /* out: TRUE if ok, FALSE on parse error */ + char* str); /* in/out: the data file path string */ +/************************************************************************* +Reads log group home directories from a character string given in +the .cnf file. */ +UNIV_INTERN +ibool +srv_parse_log_group_home_dirs( +/*==========================*/ + /* out: TRUE if ok, FALSE on parse error */ + char* str); /* in/out: character string */ +/************************************************************************* +Frees the memory allocated by srv_parse_data_file_paths_and_sizes() +and srv_parse_log_group_home_dirs(). */ +UNIV_INTERN +void +srv_free_paths_and_sizes(void); +/*==========================*/ +/************************************************************************* +Adds a slash or a backslash to the end of a string if it is missing +and the string is not empty. */ +UNIV_INTERN +char* +srv_add_path_separator_if_needed( +/*=============================*/ + /* out: string which has the separator if the + string is not empty */ + char* str); /* in: null-terminated character string */ +/******************************************************************** +Starts Innobase and creates a new database if database files +are not found and the user wants. */ +UNIV_INTERN +int +innobase_start_or_create_for_mysql(void); +/*====================================*/ + /* out: DB_SUCCESS or error code */ +/******************************************************************** +Shuts down the Innobase database. */ +UNIV_INTERN +int +innobase_shutdown_for_mysql(void); +/*=============================*/ + /* out: DB_SUCCESS or error code */ +extern ib_uint64_t srv_shutdown_lsn; +extern ib_uint64_t srv_start_lsn; + +#ifdef __NETWARE__ +void set_panic_flag_for_netware(void); +#endif + +#ifdef HAVE_DARWIN_THREADS +extern ibool srv_have_fullfsync; +#endif + +extern ibool srv_is_being_started; +extern ibool srv_was_started; +extern ibool srv_startup_is_before_trx_rollback_phase; +extern ibool srv_is_being_shut_down; + +extern ibool srv_start_raw_disk_in_use; + +/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP +and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ + +extern ulint srv_shutdown_state; + +#define SRV_SHUTDOWN_CLEANUP 1 +#define SRV_SHUTDOWN_LAST_PHASE 2 +#define SRV_SHUTDOWN_EXIT_THREADS 3 + +/* Log 'spaces' have id's >= this */ +#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL + +#endif diff --git a/storage/xtradb/include/sync0arr.h b/storage/xtradb/include/sync0arr.h new file mode 100644 index 00000000000..cc01c9ac5c8 --- /dev/null +++ b/storage/xtradb/include/sync0arr.h @@ -0,0 +1,138 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The wait array used in synchronization primitives + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef sync0arr_h +#define sync0arr_h + +#include "univ.i" +#include "ut0lst.h" +#include "ut0mem.h" +#include "os0thread.h" + +typedef struct sync_cell_struct sync_cell_t; +typedef struct sync_array_struct sync_array_t; + +#define SYNC_ARRAY_OS_MUTEX 1 +#define SYNC_ARRAY_MUTEX 2 + +/*********************************************************************** +Creates a synchronization wait array. It is protected by a mutex +which is automatically reserved when the functions operating on it +are called. */ +UNIV_INTERN +sync_array_t* +sync_array_create( +/*==============*/ + /* out, own: created wait array */ + ulint n_cells, /* in: number of cells in the array + to create */ + ulint protection); /* in: either SYNC_ARRAY_OS_MUTEX or + SYNC_ARRAY_MUTEX: determines the type + of mutex protecting the data structure */ +/********************************************************************** +Frees the resources in a wait array. */ +UNIV_INTERN +void +sync_array_free( +/*============*/ + sync_array_t* arr); /* in, own: sync wait array */ +/********************************************************************** +Reserves a wait array cell for waiting for an object. +The event of the cell is reset to nonsignalled state. */ +UNIV_INTERN +void +sync_array_reserve_cell( +/*====================*/ + sync_array_t* arr, /* in: wait array */ + void* object, /* in: pointer to the object to wait for */ + ulint type, /* in: lock request type */ + const char* file, /* in: file where requested */ + ulint line, /* in: line where requested */ + ulint* index); /* out: index of the reserved cell */ +/********************************************************************** +This function should be called when a thread starts to wait on +a wait array cell. In the debug version this function checks +if the wait for a semaphore will result in a deadlock, in which +case prints info and asserts. */ +UNIV_INTERN +void +sync_array_wait_event( +/*==================*/ + sync_array_t* arr, /* in: wait array */ + ulint index); /* in: index of the reserved cell */ +/********************************************************************** +Frees the cell. NOTE! sync_array_wait_event frees the cell +automatically! */ +UNIV_INTERN +void +sync_array_free_cell( +/*=================*/ + sync_array_t* arr, /* in: wait array */ + ulint index); /* in: index of the cell in array */ +/************************************************************************** +Note that one of the wait objects was signalled. */ +UNIV_INTERN +void +sync_array_object_signalled( +/*========================*/ + sync_array_t* arr); /* in: wait array */ +/************************************************************************** +If the wakeup algorithm does not work perfectly at semaphore relases, +this function will do the waking (see the comment in mutex_exit). This +function should be called about every 1 second in the server. */ +UNIV_INTERN +void +sync_arr_wake_threads_if_sema_free(void); +/*====================================*/ +/************************************************************************** +Prints warnings of long semaphore waits to stderr. */ +UNIV_INTERN +ibool +sync_array_print_long_waits(void); +/*=============================*/ + /* out: TRUE if fatal semaphore wait threshold + was exceeded */ +/************************************************************************ +Validates the integrity of the wait array. Checks +that the number of reserved cells equals the count variable. */ +UNIV_INTERN +void +sync_array_validate( +/*================*/ + sync_array_t* arr); /* in: sync wait array */ +/************************************************************************** +Prints info of the wait array. */ +UNIV_INTERN +void +sync_array_print_info( +/*==================*/ + FILE* file, /* in: file where to print */ + sync_array_t* arr); /* in: wait array */ + + +#ifndef UNIV_NONINL +#include "sync0arr.ic" +#endif + +#endif diff --git a/storage/xtradb/include/sync0arr.ic b/storage/xtradb/include/sync0arr.ic new file mode 100644 index 00000000000..09a562a4723 --- /dev/null +++ b/storage/xtradb/include/sync0arr.ic @@ -0,0 +1,26 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The wait array for synchronization primitives + +Inline code + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + diff --git a/storage/xtradb/include/sync0rw.h b/storage/xtradb/include/sync0rw.h new file mode 100644 index 00000000000..89c63af42b1 --- /dev/null +++ b/storage/xtradb/include/sync0rw.h @@ -0,0 +1,603 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The read-write lock (for threads, not for database transactions) + +Created 9/11/1995 Heikki Tuuri +*******************************************************/ + +#ifndef sync0rw_h +#define sync0rw_h + +#include "univ.i" +#include "ut0lst.h" +#include "sync0sync.h" +#include "os0sync.h" + +/* The following undef is to prevent a name conflict with a macro +in MySQL: */ +#undef rw_lock_t + +/* Latch types; these are used also in btr0btr.h: keep the numerical values +smaller than 30 and the order of the numerical values like below! */ +#define RW_S_LATCH 1 +#define RW_X_LATCH 2 +#define RW_NO_LATCH 3 + +/* We decrement lock_word by this amount for each x_lock. It is also the +start value for the lock_word, meaning that it limits the maximum number +of concurrent read locks before the rw_lock breaks. The current value of +0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/ +#define X_LOCK_DECR 0x00100000 + +typedef struct rw_lock_struct rw_lock_t; +#ifdef UNIV_SYNC_DEBUG +typedef struct rw_lock_debug_struct rw_lock_debug_t; +#endif /* UNIV_SYNC_DEBUG */ + +typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t; + +extern rw_lock_list_t rw_lock_list; +extern mutex_t rw_lock_list_mutex; + +#ifdef UNIV_SYNC_DEBUG +/* The global mutex which protects debug info lists of all rw-locks. +To modify the debug info list of an rw-lock, this mutex has to be + +acquired in addition to the mutex protecting the lock. */ +extern mutex_t rw_lock_debug_mutex; +extern os_event_t rw_lock_debug_event; /* If deadlock detection does + not get immediately the mutex it + may wait for this event */ +extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if + there may be waiters for the event */ +#endif /* UNIV_SYNC_DEBUG */ + +extern ib_int64_t rw_s_spin_wait_count; +extern ib_int64_t rw_s_spin_round_count; +extern ib_int64_t rw_s_exit_count; +extern ib_int64_t rw_s_os_wait_count; +extern ib_int64_t rw_x_spin_wait_count; +extern ib_int64_t rw_x_spin_round_count; +extern ib_int64_t rw_x_os_wait_count; +extern ib_int64_t rw_x_exit_count; + +/********************************************************************** +Creates, or rather, initializes an rw-lock object in a specified memory +location (which must be appropriately aligned). The rw-lock is initialized +to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free +is necessary only if the memory block containing it is freed. */ +#ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define rw_lock_create(L, level) \ + rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) +# else /* UNIV_SYNC_DEBUG */ +# define rw_lock_create(L, level) \ + rw_lock_create_func((L), #L, __FILE__, __LINE__) +# endif /* UNIV_SYNC_DEBUG */ +#else /* UNIV_DEBUG */ +# define rw_lock_create(L, level) \ + rw_lock_create_func((L), __FILE__, __LINE__) +#endif /* UNIV_DEBUG */ + +/********************************************************************** +Creates, or rather, initializes an rw-lock object in a specified memory +location (which must be appropriately aligned). The rw-lock is initialized +to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free +is necessary only if the memory block containing it is freed. */ +UNIV_INTERN +void +rw_lock_create_func( +/*================*/ + rw_lock_t* lock, /* in: pointer to memory */ +#ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG + ulint level, /* in: level */ +# endif /* UNIV_SYNC_DEBUG */ + const char* cmutex_name, /* in: mutex name */ +#endif /* UNIV_DEBUG */ + const char* cfile_name, /* in: file name where created */ + ulint cline); /* in: file line where created */ +/********************************************************************** +Calling this function is obligatory only if the memory buffer containing +the rw-lock is freed. Removes an rw-lock object from the global list. The +rw-lock is checked to be in the non-locked state. */ +UNIV_INTERN +void +rw_lock_free( +/*=========*/ + rw_lock_t* lock); /* in: rw-lock */ +#ifdef UNIV_DEBUG +/********************************************************************** +Checks that the rw-lock has been initialized and that there are no +simultaneous shared and exclusive locks. */ +UNIV_INTERN +ibool +rw_lock_validate( +/*=============*/ + rw_lock_t* lock); +#endif /* UNIV_DEBUG */ +/****************************************************************** +NOTE! The following macros should be used in rw s-locking, not the +corresponding function. */ + +#define rw_lock_s_lock(M) rw_lock_s_lock_func(\ + (M), 0, __FILE__, __LINE__) +/****************************************************************** +NOTE! The following macros should be used in rw s-locking, not the +corresponding function. */ + +#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\ + (M), (P), __FILE__, __LINE__) +/****************************************************************** +NOTE! The following macros should be used in rw s-locking, not the +corresponding function. */ + +#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\ + (M), 0, (F), (L)) +/********************************************************************** +Low-level function which tries to lock an rw-lock in s-mode. Performs no +spinning. */ +UNIV_INLINE +ibool +rw_lock_s_lock_low( +/*===============*/ + /* out: TRUE if success */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass __attribute__((unused)), + /* in: pass value; != 0, if the lock will be + passed to another thread to unlock */ + const char* file_name, /* in: file name where lock requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +NOTE! Use the corresponding macro, not directly this function, except if +you supply the file name and line number. Lock an rw-lock in shared mode +for the current thread. If the rw-lock is locked in exclusive mode, or +there is an exclusive lock request waiting, the function spins a preset +time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before +suspending the thread. */ +UNIV_INLINE +void +rw_lock_s_lock_func( +/*================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in exclusive mode for the current thread if the lock can be +obtained immediately. */ +UNIV_INLINE +ibool +rw_lock_x_lock_func_nowait( +/*=======================*/ + /* out: TRUE if success */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +Releases a shared mode lock. */ +UNIV_INLINE +void +rw_lock_s_unlock_func( +/*==================*/ + rw_lock_t* lock /* in: rw-lock */ +#ifdef UNIV_SYNC_DEBUG + ,ulint pass /* in: pass value; != 0, if the lock may have + been passed to another thread to unlock */ +#endif + ); +/*********************************************************************** +Releases a shared mode lock. */ + +#ifdef UNIV_SYNC_DEBUG +#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L, 0) +#else +#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L) +#endif +/*********************************************************************** +Releases a shared mode lock. */ + +#ifdef UNIV_SYNC_DEBUG +#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L, P) +#else +#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L) +#endif +/****************************************************************** +NOTE! The following macro should be used in rw x-locking, not the +corresponding function. */ + +#define rw_lock_x_lock(M) rw_lock_x_lock_func(\ + (M), 0, __FILE__, __LINE__) +/****************************************************************** +NOTE! The following macro should be used in rw x-locking, not the +corresponding function. */ + +#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\ + (M), (P), __FILE__, __LINE__) +/****************************************************************** +NOTE! The following macros should be used in rw x-locking, not the +corresponding function. */ + +#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\ + (M), __FILE__, __LINE__) +/********************************************************************** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in exclusive mode for the current thread. If the rw-lock is locked +in shared or exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting +for the lock, before suspending the thread. If the same thread has an x-lock +on the rw-lock, locking succeed, with the following exception: if pass != 0, +only a single x-lock may be taken on the lock. NOTE: If the same thread has +an s-lock, locking does not succeed! */ +UNIV_INTERN +void +rw_lock_x_lock_func( +/*================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +Releases an exclusive mode lock. */ +UNIV_INLINE +void +rw_lock_x_unlock_func( +/*==================*/ + rw_lock_t* lock /* in: rw-lock */ +#ifdef UNIV_SYNC_DEBUG + ,ulint pass /* in: pass value; != 0, if the lock may have + been passed to another thread to unlock */ +#endif + ); +/*********************************************************************** +Releases an exclusive mode lock. */ + +#ifdef UNIV_SYNC_DEBUG +#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L, 0) +#else +#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L) +#endif +/*********************************************************************** +Releases an exclusive mode lock. */ + +#ifdef UNIV_SYNC_DEBUG +#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L, P) +#else +#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L) +#endif +/********************************************************************** +Low-level function which locks an rw-lock in s-mode when we know that it +is possible and none else is currently accessing the rw-lock structure. +Then we can do the locking without reserving the mutex. */ +UNIV_INLINE +void +rw_lock_s_lock_direct( +/*==================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name, /* in: file name where requested */ + ulint line /* in: line where lock requested */ +); +/********************************************************************** +Low-level function which locks an rw-lock in x-mode when we know that it +is not locked and none else is currently accessing the rw-lock structure. +Then we can do the locking without reserving the mutex. */ +UNIV_INLINE +void +rw_lock_x_lock_direct( +/*==================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name, /* in: file name where requested */ + ulint line /* in: line where lock requested */ +); +/********************************************************************** +This function is used in the insert buffer to move the ownership of an +x-latch on a buffer frame to the current thread. The x-latch was set by +the buffer read operation and it protected the buffer frame while the +read was done. The ownership is moved because we want that the current +thread is able to acquire a second x-latch which is stored in an mtr. +This, in turn, is needed to pass the debug checks of index page +operations. */ +UNIV_INTERN +void +rw_lock_x_lock_move_ownership( +/*==========================*/ + rw_lock_t* lock); /* in: lock which was x-locked in the + buffer read */ +/********************************************************************** +Releases a shared mode lock when we know there are no waiters and none +else will access the lock during the time this function is executed. */ +UNIV_INLINE +void +rw_lock_s_unlock_direct( +/*====================*/ + rw_lock_t* lock); /* in: rw-lock */ +/********************************************************************** +Releases an exclusive mode lock when we know there are no waiters, and +none else will access the lock durint the time this function is executed. */ +UNIV_INLINE +void +rw_lock_x_unlock_direct( +/*====================*/ + rw_lock_t* lock); /* in: rw-lock */ +/********************************************************************** +Returns the value of writer_count for the lock. Does not reserve the lock +mutex, so the caller must be sure it is not changed during the call. */ +UNIV_INLINE +ulint +rw_lock_get_x_lock_count( +/*=====================*/ + /* out: value of writer_count */ + rw_lock_t* lock); /* in: rw-lock */ +/************************************************************************ +Accessor functions for rw lock. */ +UNIV_INLINE +ulint +rw_lock_get_s_waiters( +/*==================*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_x_waiters( +/*==================*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_wx_waiters( +/*================*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_writer( +/*===============*/ + rw_lock_t* lock); +UNIV_INLINE +ulint +rw_lock_get_reader_count( +/*=====================*/ + rw_lock_t* lock); +/********************************************************************** +Decrements lock_word the specified amount if it is greater than 0. +This is used by both s_lock and x_lock operations. */ +UNIV_INLINE +ibool +rw_lock_lock_word_decr( +/*===================*/ + /* out: TRUE if decr occurs */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount); /* in: amount to decrement */ +/********************************************************************** +Increments lock_word the specified amount and returns new value. */ +UNIV_INLINE +lint +rw_lock_lock_word_incr( +/*===================*/ + /* out: TRUE if decr occurs */ + rw_lock_t* lock, + ulint amount); /* in: rw-lock */ +/********************************************************************** +This function sets the lock->writer_thread and lock->recursive fields. +For platforms where we are using atomic builtins instead of lock->mutex +it sets the lock->writer_thread field using atomics to ensure memory +ordering. Note that it is assumed that the caller of this function +effectively owns the lock i.e.: nobody else is allowed to modify +lock->writer_thread at this point in time. +The protocol is that lock->writer_thread MUST be updated BEFORE the +lock->recursive flag is set. */ +UNIV_INLINE +void +rw_lock_set_writer_id_and_recursion_flag( +/*=====================================*/ + rw_lock_t* lock, /* in/out: lock to work on */ + ibool recursive); /* in: TRUE if recursion + allowed */ +#ifdef UNIV_SYNC_DEBUG +/********************************************************************** +Checks if the thread has locked the rw-lock in the specified mode, with +the pass value == 0. */ +UNIV_INTERN +ibool +rw_lock_own( +/*========*/ + rw_lock_t* lock, /* in: rw-lock */ + ulint lock_type); /* in: lock type: RW_LOCK_SHARED, + RW_LOCK_EX */ +#endif /* UNIV_SYNC_DEBUG */ +/********************************************************************** +Checks if somebody has locked the rw-lock in the specified mode. */ +UNIV_INTERN +ibool +rw_lock_is_locked( +/*==============*/ + rw_lock_t* lock, /* in: rw-lock */ + ulint lock_type); /* in: lock type: RW_LOCK_SHARED, + RW_LOCK_EX */ +#ifdef UNIV_SYNC_DEBUG +/******************************************************************* +Prints debug info of an rw-lock. */ +UNIV_INTERN +void +rw_lock_print( +/*==========*/ + rw_lock_t* lock); /* in: rw-lock */ +/******************************************************************* +Prints debug info of currently locked rw-locks. */ +UNIV_INTERN +void +rw_lock_list_print_info( +/*====================*/ + FILE* file); /* in: file where to print */ +/******************************************************************* +Returns the number of currently locked rw-locks. +Works only in the debug version. */ +UNIV_INTERN +ulint +rw_lock_n_locked(void); +/*==================*/ + +/*#####################################################################*/ + +/********************************************************************** +Acquires the debug mutex. We cannot use the mutex defined in sync0sync, +because the debug mutex is also acquired in sync0arr while holding the OS +mutex protecting the sync array, and the ordinary mutex_enter might +recursively call routines in sync0arr, leading to a deadlock on the OS +mutex. */ +UNIV_INTERN +void +rw_lock_debug_mutex_enter(void); +/*==========================*/ +/********************************************************************** +Releases the debug mutex. */ +UNIV_INTERN +void +rw_lock_debug_mutex_exit(void); +/*==========================*/ +/************************************************************************* +Prints info of a debug struct. */ +UNIV_INTERN +void +rw_lock_debug_print( +/*================*/ + rw_lock_debug_t* info); /* in: debug struct */ +#endif /* UNIV_SYNC_DEBUG */ + +#ifndef INNODB_RW_LOCKS_USE_ATOMICS +#error INNODB_RW_LOCKS_USE_ATOMICS is not defined. Do you use enough new GCC or compatibles? +#error Or do you use exact options for CFLAGS? +#error e.g. (for x86_32): "-m32 -march=i586 -mtune=i686" +#error e.g. (for Sparc_64): "-m64 -mcpu=v9" +#error Otherwise, this build may be slower than normal version. +#endif + +/* NOTE! The structure appears here only for the compiler to know its size. +Do not use its fields directly! The structure used in the spin lock +implementation of a read-write lock. Several threads may have a shared lock +simultaneously in this lock, but only one writer may have an exclusive lock, +in which case no shared locks are allowed. To prevent starving of a writer +blocked by readers, a writer may queue for x-lock by decrementing lock_word: +no new readers will be let in while the thread waits for readers to exit. */ + +struct rw_lock_struct { + volatile lint lock_word; + /* Holds the state of the lock. */ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + volatile ulint s_waiters; /* 1: there are waiters (s_lock) */ + volatile ulint x_waiters; /* 1: there are waiters (x_lock) */ + volatile ulint wait_ex_waiters; /* 1: there are waiters (wait_ex) */ + volatile ulint reader_count; /* Number of readers who have locked this + lock in the shared mode */ + volatile ulint writer; +#else + volatile ulint waiters;/* 1: there are waiters */ +#endif + volatile ibool recursive;/* Default value FALSE which means the lock + is non-recursive. The value is typically set + to TRUE making normal rw_locks recursive. In + case of asynchronous IO, when a non-zero + value of 'pass' is passed then we keep the + lock non-recursive. + This flag also tells us about the state of + writer_thread field. If this flag is set + then writer_thread MUST contain the thread + id of the current x-holder or wait-x thread. + This flag must be reset in x_unlock + functions before incrementing the lock_word */ + volatile os_thread_id_t writer_thread; + /* Thread id of writer thread. Is only + guaranteed to have sane and non-stale + value iff recursive flag is set. */ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + volatile ulint writer_count; /* Number of times the same thread has + recursively locked the lock in the exclusive + mode */ + /* Used by sync0arr.c for thread queueing */ + os_event_t s_event; /* Used for s_lock */ + os_event_t x_event; /* Used for x_lock */ +#else + os_event_t event; /* Used by sync0arr.c for thread queueing */ +#endif + os_event_t wait_ex_event; + /* Event for next-writer to wait on. A thread + must decrement lock_word before waiting. */ +#ifndef INNODB_RW_LOCKS_USE_ATOMICS + mutex_t mutex; /* The mutex protecting rw_lock_struct */ +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ + + UT_LIST_NODE_T(rw_lock_t) list; + /* All allocated rw locks are put into a + list */ +#ifdef UNIV_SYNC_DEBUG + UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list; + /* In the debug version: pointer to the debug + info list of the lock */ + ulint level; /* Level in the global latching order. */ +#endif /* UNIV_SYNC_DEBUG */ + ulint count_os_wait; /* Count of os_waits. May not be accurate */ + const char* cfile_name;/* File name where lock created */ + /* last s-lock file/line is not guaranteed to be correct */ + const char* last_s_file_name;/* File name where last s-locked */ + const char* last_x_file_name;/* File name where last x-locked */ + volatile ibool writer_is_wait_ex; + /* This is TRUE if the writer field is + RW_LOCK_WAIT_EX; this field is located far + from the memory update hotspot fields which + are at the start of this struct, thus we can + peek this field without causing much memory + bus traffic */ + unsigned cline:14; /* Line where created */ + unsigned last_s_line:14; /* Line number where last time s-locked */ + unsigned last_x_line:14; /* Line number where last time x-locked */ + ulint magic_n; +}; + +#define RW_LOCK_MAGIC_N 22643 + +#ifdef UNIV_SYNC_DEBUG +/* The structure for storing debug info of an rw-lock */ +struct rw_lock_debug_struct { + + os_thread_id_t thread_id; /* The thread id of the thread which + locked the rw-lock */ + ulint pass; /* Pass value given in the lock operation */ + ulint lock_type; /* Type of the lock: RW_LOCK_EX, + RW_LOCK_SHARED, RW_LOCK_WAIT_EX */ + const char* file_name;/* File name where the lock was obtained */ + ulint line; /* Line where the rw-lock was locked */ + UT_LIST_NODE_T(rw_lock_debug_t) list; + /* Debug structs are linked in a two-way + list */ +}; +#endif /* UNIV_SYNC_DEBUG */ + +#ifndef UNIV_NONINL +#include "sync0rw.ic" +#endif + +#endif diff --git a/storage/xtradb/include/sync0rw.ic b/storage/xtradb/include/sync0rw.ic new file mode 100644 index 00000000000..b09e0072725 --- /dev/null +++ b/storage/xtradb/include/sync0rw.ic @@ -0,0 +1,902 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The read-write lock (for threads) + +Created 9/11/1995 Heikki Tuuri +*******************************************************/ + +/********************************************************************** +Lock an rw-lock in shared mode for the current thread. If the rw-lock is +locked in exclusive mode, or there is an exclusive lock request waiting, +the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), +waiting for the lock before suspending the thread. */ +UNIV_INTERN +void +rw_lock_s_lock_spin( +/*================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/* in: file name where lock requested */ + ulint line); /* in: line where requested */ +#ifdef UNIV_SYNC_DEBUG +/********************************************************************** +Inserts the debug information for an rw-lock. */ +UNIV_INTERN +void +rw_lock_add_debug_info( +/*===================*/ + rw_lock_t* lock, /* in: rw-lock */ + ulint pass, /* in: pass value */ + ulint lock_type, /* in: lock type */ + const char* file_name, /* in: file where requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +Removes a debug information struct for an rw-lock. */ +UNIV_INTERN +void +rw_lock_remove_debug_info( +/*======================*/ + rw_lock_t* lock, /* in: rw-lock */ + ulint pass, /* in: pass value */ + ulint lock_type); /* in: lock type */ +#endif /* UNIV_SYNC_DEBUG */ + +/************************************************************************ +Accessor functions for rw lock. */ +UNIV_INLINE +ulint +rw_lock_get_s_waiters( +/*================*/ + /* out: 1 if waiters, 0 otherwise */ + rw_lock_t* lock) /* in: rw-lock */ +{ + return(lock->s_waiters); +} +UNIV_INLINE +ulint +rw_lock_get_x_waiters( +/*================*/ + rw_lock_t* lock) +{ + return(lock->x_waiters); +} +UNIV_INLINE +ulint +rw_lock_get_wx_waiters( +/*================*/ + rw_lock_t* lock) +{ + return(lock->wait_ex_waiters); +} + +/************************************************************************ +Sets lock->waiters to 1. It is not an error if lock->waiters is already +1. On platforms where ATOMIC builtins are used this function enforces a +memory barrier. */ +UNIV_INLINE +void +rw_lock_set_s_waiter_flag( +/*====================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->s_waiters, 0, 1); + __sync_lock_test_and_set(&lock->s_waiters, 1); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->s_waiters = 1; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} +UNIV_INLINE +void +rw_lock_set_x_waiter_flag( +/*====================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->x_waiters, 0, 1); + __sync_lock_test_and_set(&lock->x_waiters, 1); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->x_waiters = 1; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} +UNIV_INLINE +void +rw_lock_set_wx_waiter_flag( +/*====================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->wait_ex_waiters, 0, 1); + __sync_lock_test_and_set(&lock->wait_ex_waiters, 1); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->wait_ex_waiters = 1; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} + +/************************************************************************ +Resets lock->waiters to 0. It is not an error if lock->waiters is already +0. On platforms where ATOMIC builtins are used this function enforces a +memory barrier. */ +UNIV_INLINE +void +rw_lock_reset_s_waiter_flag( +/*======================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->s_waiters, 1, 0); + __sync_lock_test_and_set(&lock->s_waiters, 0); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->s_waiters = 0; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} +UNIV_INLINE +void +rw_lock_reset_x_waiter_flag( +/*======================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->x_waiters, 1, 0); + __sync_lock_test_and_set(&lock->x_waiters, 0); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->x_waiters = 0; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} +UNIV_INLINE +void +rw_lock_reset_wx_waiter_flag( +/*======================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + // os_compare_and_swap(&lock->wait_ex_waiters, 1, 0); + __sync_lock_test_and_set(&lock->wait_ex_waiters, 0); +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + lock->wait_ex_waiters = 0; +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} + +/********************************************************************** +Returns the write-status of the lock - this function made more sense +with the old rw_lock implementation. */ +UNIV_INLINE +ulint +rw_lock_get_writer( +/*===============*/ + rw_lock_t* lock) +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + if (lock->writer == RW_LOCK_NOT_LOCKED) { + return(RW_LOCK_NOT_LOCKED); + } + + if (lock->writer_is_wait_ex) { + return(RW_LOCK_WAIT_EX); + } else { + return(RW_LOCK_EX); + } +#else + lint lock_word = lock->lock_word; + if(lock_word > 0) { + /* return NOT_LOCKED in s-lock state, like the writer + member of the old lock implementation. */ + return(RW_LOCK_NOT_LOCKED); + } else if (((-lock_word) % X_LOCK_DECR) == 0) { + return(RW_LOCK_EX); + } else { + ut_ad(lock_word > -X_LOCK_DECR); + return(RW_LOCK_WAIT_EX); + } +#endif +} + +/********************************************************************** +Returns number of readers. */ +UNIV_INLINE +ulint +rw_lock_get_reader_count( +/*=====================*/ + rw_lock_t* lock) +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + return(lock->reader_count); +#else + lint lock_word = lock->lock_word; + if(lock_word > 0) { + /* s-locked, no x-waiters */ + return(X_LOCK_DECR - lock_word); + } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) { + /* s-locked, with x-waiters */ + return((ulint)(-lock_word)); + } + return(0); +#endif +} + +#ifndef INNODB_RW_LOCKS_USE_ATOMICS +UNIV_INLINE +mutex_t* +rw_lock_get_mutex( +/*==============*/ + rw_lock_t* lock) +{ + return(&(lock->mutex)); +} +#endif + +/********************************************************************** +Returns the value of writer_count for the lock. Does not reserve the lock +mutex, so the caller must be sure it is not changed during the call. */ +UNIV_INLINE +ulint +rw_lock_get_x_lock_count( +/*=====================*/ + /* out: value of writer_count */ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + return(lock->writer_count); +#else + lint lock_copy = lock->lock_word; + /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */ + if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) { + return(0); + } + return(((-lock_copy) / X_LOCK_DECR) + 1); +#endif +} + +/********************************************************************** +Two different implementations for decrementing the lock_word of a rw_lock: +one for systems supporting atomic operations, one for others. This does +does not support recusive x-locks: they should be handled by the caller and +need not be atomic since they are performed by the current lock holder. +Returns true if the decrement was made, false if not. */ +UNIV_INLINE +ibool +rw_lock_lock_word_decr( +/*===================*/ + /* out: TRUE if decr occurs */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount) /* in: amount of decrement */ +{ + +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + + lint local_lock_word = lock->lock_word; + while (local_lock_word > 0) { + if(os_compare_and_swap(&(lock->lock_word), + local_lock_word, + local_lock_word - amount)) { + return(TRUE); + } + local_lock_word = lock->lock_word; + } + return(FALSE); + +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + + ibool success = FALSE; + mutex_enter(&(lock->mutex)); + if(lock->lock_word > 0) { + lock->lock_word -= amount; + success = TRUE; + } + mutex_exit(&(lock->mutex)); + return(success); + +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} + +/********************************************************************** +Two different implementations for incrementing the lock_word of a rw_lock: +one for systems supporting atomic operations, one for others. +Returns the value of lock_word after increment. */ +UNIV_INLINE +lint +rw_lock_lock_word_incr( +/*===================*/ + /* out: lock->lock_word after increment */ + rw_lock_t* lock, /* in: rw-lock */ + ulint amount) /* in: amount of increment */ +{ + +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + + return(os_atomic_increment(&(lock->lock_word), amount)); + +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + + lint local_lock_word; + + mutex_enter(&(lock->mutex)); + + lock->lock_word += amount; + local_lock_word = lock->lock_word; + + mutex_exit(&(lock->mutex)); + + return(local_lock_word); + +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} + +/********************************************************************** +This function sets the lock->writer_thread and lock->recursive fields. +For platforms where we are using atomic builtins instead of lock->mutex +it sets the lock->writer_thread field using atomics to ensure memory +ordering. Note that it is assumed that the caller of this function +effectively owns the lock i.e.: nobody else is allowed to modify +lock->writer_thread at this point in time. +The protocol is that lock->writer_thread MUST be updated BEFORE the +lock->recursive flag is set. */ +UNIV_INLINE +void +rw_lock_set_writer_id_and_recursion_flag( +/*=====================================*/ + rw_lock_t* lock, /* in/out: lock to work on */ + ibool recursive) /* in: TRUE if recursion + allowed */ +{ + os_thread_id_t curr_thread = os_thread_get_curr_id(); + +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + os_thread_id_t local_thread; + ibool success; + + /* Prevent Valgrind warnings about writer_thread being + uninitialized. It does not matter if writer_thread is + uninitialized, because we are comparing writer_thread against + itself, and the operation should always succeed. */ + UNIV_MEM_VALID(&lock->writer_thread, sizeof lock->writer_thread); + + local_thread = lock->writer_thread; + success = os_compare_and_swap(&lock->writer_thread, + local_thread, curr_thread); + ut_a(success); + lock->recursive = recursive; + +#else /* INNODB_RW_LOCKS_USE_ATOMICS */ + + mutex_enter(&lock->mutex); + lock->writer_thread = curr_thread; + lock->recursive = recursive; + mutex_exit(&lock->mutex); + +#endif /* INNODB_RW_LOCKS_USE_ATOMICS */ +} + +/********************************************************************** +Low-level function which tries to lock an rw-lock in s-mode. Performs no +spinning. */ +UNIV_INLINE +ibool +rw_lock_s_lock_low( +/*===============*/ + /* out: TRUE if success */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass __attribute__((unused)), + /* in: pass value; != 0, if the lock will be + passed to another thread to unlock */ + const char* file_name, /* in: file name where lock requested */ + ulint line) /* in: line where requested */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + if (UNIV_LIKELY(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED)) { + /* try s-lock */ + if(__sync_sub_and_fetch(&(lock->lock_word),1) <= 0) { + /* fail */ + __sync_fetch_and_add(&(lock->lock_word),1); + return(FALSE); /* locking did not succeed */ + } + /* success */ + __sync_fetch_and_add(&(lock->reader_count),1); + } else { + return(FALSE); /* locking did not succeed */ + } +#else + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ + if (!rw_lock_lock_word_decr(lock, 1)) { + /* Locking did not succeed */ + return(FALSE); + } +#endif + +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line); +#endif + /* These debugging values are not set safely: they may be incorrect + or even refer to a line that is invalid for the file name. */ + lock->last_s_file_name = file_name; + lock->last_s_line = line; + + return(TRUE); /* locking succeeded */ +} + +/********************************************************************** +Low-level function which locks an rw-lock in s-mode when we know that it +is possible and none else is currently accessing the rw-lock structure. +Then we can do the locking without reserving the mutex. */ +UNIV_INLINE +void +rw_lock_s_lock_direct( +/*==================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name, /* in: file name where requested */ + ulint line) /* in: line where lock requested */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + ut_ad(rw_lock_get_reader_count(lock) == 0); + + __sync_fetch_and_add(&(lock->reader_count),1); +#else + ut_ad(lock->lock_word == X_LOCK_DECR); + + /* Indicate there is a new reader by decrementing lock_word */ + lock->lock_word--; +#endif + + lock->last_s_file_name = file_name; + lock->last_s_line = line; + +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); +#endif +} + +/********************************************************************** +Low-level function which locks an rw-lock in x-mode when we know that it +is not locked and none else is currently accessing the rw-lock structure. +Then we can do the locking without reserving the mutex. */ +UNIV_INLINE +void +rw_lock_x_lock_direct( +/*==================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name, /* in: file name where requested */ + ulint line) /* in: line where lock requested */ +{ + ut_ad(rw_lock_validate(lock)); +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ut_ad(rw_lock_get_reader_count(lock) == 0); + ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED); + + lock->writer = RW_LOCK_EX; + __sync_fetch_and_add(&(lock->writer_count),1); +#else + ut_ad(lock->lock_word == X_LOCK_DECR); + + lock->lock_word -= X_LOCK_DECR; +#endif + lock->writer_thread = os_thread_get_curr_id(); + lock->recursive = TRUE; + + lock->last_x_file_name = file_name; + lock->last_x_line = line; + +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); +#endif +} + +/********************************************************************** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in shared mode for the current thread. If the rw-lock is locked +in exclusive mode, or there is an exclusive lock request waiting, the +function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for +the lock, before suspending the thread. */ +UNIV_INLINE +void +rw_lock_s_lock_func( +/*================*/ + rw_lock_t* lock, /* in: pointer to rw-lock */ + ulint pass, /* in: pass value; != 0, if the lock will + be passed to another thread to unlock */ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ +{ + /* NOTE: As we do not know the thread ids for threads which have + s-locked a latch, and s-lockers will be served only after waiting + x-lock requests have been fulfilled, then if this thread already + owns an s-lock here, it may end up in a deadlock with another thread + which requests an x-lock here. Therefore, we will forbid recursive + s-locking of a latch: the following assert will warn the programmer + of the possibility of this kind of a deadlock. If we want to implement + safe recursive s-locking, we should keep in a list the thread ids of + the threads which have s-locked a latch. This would use some CPU + time. */ + +#ifdef UNIV_SYNC_DEBUG + ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ +#endif /* UNIV_SYNC_DEBUG */ + + /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ + if (rw_lock_s_lock_low(lock, pass, file_name, line)) { + + return; /* Success */ + } else { + /* Did not succeed, try spin wait */ + + rw_lock_s_lock_spin(lock, pass, file_name, line); + + return; + } +} + +/********************************************************************** +NOTE! Use the corresponding macro, not directly this function! Lock an +rw-lock in exclusive mode for the current thread if the lock can be +obtained immediately. */ +UNIV_INLINE +ibool +rw_lock_x_lock_func_nowait( +/*=======================*/ + /* out: TRUE if success */ + rw_lock_t* lock, /* in: pointer to rw-lock */ + const char* file_name,/* in: file name where lock requested */ + ulint line) /* in: line where requested */ +{ + os_thread_id_t curr_thread = os_thread_get_curr_id(); + + ibool success; + +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + success = FALSE; + if ((lock->reader_count == 0) + && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) { +retry_x_lock: + /* try x-lock */ + if(__sync_sub_and_fetch(&(lock->lock_word), + X_LOCK_DECR) == 0) { + /* success */ + /* try to lock writer */ + if(__sync_lock_test_and_set(&(lock->writer),RW_LOCK_EX) + == RW_LOCK_NOT_LOCKED) { + /* success */ + lock->writer_thread = curr_thread; + lock->recursive = TRUE; + lock->writer_is_wait_ex = FALSE; + /* next function may work as memory barrier */ + relock: + __sync_fetch_and_add(&(lock->writer_count),1); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); +#endif + + lock->last_x_file_name = file_name; + lock->last_x_line = line; + + ut_ad(rw_lock_validate(lock)); + + return(TRUE); + } else { + /* x-unlock */ + __sync_fetch_and_add(&(lock->lock_word), + X_LOCK_DECR); + } + } else { + /* fail (x-lock) */ + if (__sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR) + == 0) + goto retry_x_lock; + } + } + + if (lock->recursive + && os_thread_eq(lock->writer_thread, curr_thread)) { + goto relock; + } + + //ut_ad(rw_lock_validate(lock)); + + return(FALSE); +#else + + success = FALSE; + mutex_enter(&(lock->mutex)); + if (lock->lock_word == X_LOCK_DECR) { + lock->lock_word = 0; + success = TRUE; + } + mutex_exit(&(lock->mutex)); + + if (success) { + rw_lock_set_writer_id_and_recursion_flag(lock, TRUE); + + } else if (lock->recursive + && os_thread_eq(lock->writer_thread, curr_thread)) { + /* Relock: this lock_word modification is safe since no other + threads can modify (lock, unlock, or reserve) lock_word while + there is an exclusive writer and this is the writer thread. */ + lock->lock_word -= X_LOCK_DECR; + + ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0); + + } else { + /* Failure */ + return(FALSE); + } +#ifdef UNIV_SYNC_DEBUG + rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); +#endif + + lock->last_x_file_name = file_name; + lock->last_x_line = line; + + ut_ad(rw_lock_validate(lock)); + + return(TRUE); +#endif +} + +/********************************************************************** +Releases a shared mode lock. */ +UNIV_INLINE +void +rw_lock_s_unlock_func( +/*==================*/ + rw_lock_t* lock /* in: rw-lock */ +#ifdef UNIV_SYNC_DEBUG + ,ulint pass /* in: pass value; != 0, if the lock may have + been passed to another thread to unlock */ +#endif + ) +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ibool last = FALSE; + + ut_a(lock->reader_count > 0); + + /* unlock lock_word */ + __sync_fetch_and_add(&(lock->lock_word),1); + + if(__sync_sub_and_fetch(&(lock->reader_count),1) == 0) { + last = TRUE; + } + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); +#endif + + if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->wait_ex_waiters, 0))) { + os_event_set(lock->wait_ex_event); + sync_array_object_signalled(sync_primary_wait_array); + } + else if (UNIV_UNLIKELY(last && __sync_lock_test_and_set(&lock->x_waiters, 0))) { + os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } +#else + ut_ad((lock->lock_word % X_LOCK_DECR) != 0); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); +#endif + + /* Increment lock_word to indicate 1 less reader */ + if (rw_lock_lock_word_incr(lock, 1) == 0) { + + /* wait_ex waiter exists. It may not be asleep, but we signal + anyway. We do not wake other waiters, because they can't + exist without wait_ex waiter and wait_ex waiter goes first.*/ + os_event_set(lock->wait_ex_event); + sync_array_object_signalled(sync_primary_wait_array); + + } +#endif + + ut_ad(rw_lock_validate(lock)); + +#ifdef UNIV_SYNC_PERF_STAT + rw_s_exit_count++; +#endif +} + +/********************************************************************** +Releases a shared mode lock when we know there are no waiters and none +else will access the lock during the time this function is executed. */ +UNIV_INLINE +void +rw_lock_s_unlock_direct( +/*====================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ut_ad(lock->reader_count > 0); + + __sync_sub_and_fetch(&(lock->reader_count),1); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); +#endif + + ut_ad(!lock->s_waiters); + ut_ad(!lock->x_waiters); + ut_ad(!lock->wait_ex_waiters); +#else + ut_ad(lock->lock_word < X_LOCK_DECR); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); +#endif + + /* Decrease reader count by incrementing lock_word */ + lock->lock_word++; + + ut_ad(!lock->waiters); +#endif + ut_ad(rw_lock_validate(lock)); +#ifdef UNIV_SYNC_PERF_STAT + rw_s_exit_count++; +#endif +} + +/********************************************************************** +Releases an exclusive mode lock. */ +UNIV_INLINE +void +rw_lock_x_unlock_func( +/*==================*/ + rw_lock_t* lock /* in: rw-lock */ +#ifdef UNIV_SYNC_DEBUG + ,ulint pass /* in: pass value; != 0, if the lock may have + been passed to another thread to unlock */ +#endif + ) +{ +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + ibool last = FALSE; + ibool s_sg = FALSE; + ibool x_sg = FALSE; + + ut_ad(lock->writer_count > 0); + + if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { + last = TRUE; + } + + if (last) { + /* unlock lock_word */ + __sync_fetch_and_add(&(lock->lock_word),X_LOCK_DECR); + + lock->recursive = FALSE; + /* FIXME: It is a value of bad manners for pthread. + But we shouldn't keep an ID of not-owner. */ + lock->writer_thread = -1; + __sync_lock_test_and_set(&(lock->writer),RW_LOCK_NOT_LOCKED); + } + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); +#endif + if (last) { + if(__sync_lock_test_and_set(&lock->s_waiters, 0)){ + s_sg = TRUE; + } + if(__sync_lock_test_and_set(&lock->x_waiters, 0)){ + x_sg = TRUE; + } + } + + if (UNIV_UNLIKELY(s_sg)) { + os_event_set(lock->s_event); + sync_array_object_signalled(sync_primary_wait_array); + } + if (UNIV_UNLIKELY(x_sg)) { + os_event_set(lock->x_event); + sync_array_object_signalled(sync_primary_wait_array); + } +#else + ut_ad((lock->lock_word % X_LOCK_DECR) == 0); + + /* lock->recursive flag also indicates if lock->writer_thread is + valid or stale. If we are the last of the recursive callers + then we must unset lock->recursive flag to indicate that the + lock->writer_thread is now stale. + Note that since we still hold the x-lock we can safely read the + lock_word. */ + if (lock->lock_word == 0) { + /* Last caller in a possible recursive chain. */ + lock->recursive = FALSE; + UNIV_MEM_INVALID(&lock->writer_thread, + sizeof lock->writer_thread); + } + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); +#endif + + if (rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) { + /* Lock is now free. May have to signal read/write waiters. + We do not need to signal wait_ex waiters, since they cannot + exist when there is a writer. */ + if (lock->waiters) { + rw_lock_reset_waiter_flag(lock); + os_event_set(lock->event); + sync_array_object_signalled(sync_primary_wait_array); + } + } + +#endif + ut_ad(rw_lock_validate(lock)); + +#ifdef UNIV_SYNC_PERF_STAT + rw_x_exit_count++; +#endif +} + +/********************************************************************** +Releases an exclusive mode lock when we know there are no waiters, and +none else will access the lock during the time this function is executed. */ +UNIV_INLINE +void +rw_lock_x_unlock_direct( +/*====================*/ + rw_lock_t* lock) /* in: rw-lock */ +{ + /* Reset the exclusive lock if this thread no longer has an x-mode + lock */ + +#ifdef INNODB_RW_LOCKS_USE_ATOMICS + if(__sync_sub_and_fetch(&(lock->writer_count),1) == 0) { + lock->writer = RW_LOCK_NOT_LOCKED; + } + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); +#endif + + ut_ad(!lock->s_waiters); + ut_ad(!lock->x_waiters); + ut_ad(!lock->wait_ex_waiters); +#else + ut_ad((lock->lock_word % X_LOCK_DECR) == 0); + +#ifdef UNIV_SYNC_DEBUG + rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); +#endif + + if (lock->lock_word == 0) { + lock->recursive = FALSE; + UNIV_MEM_INVALID(&lock->writer_thread, + sizeof lock->writer_thread); + } + + lock->lock_word += X_LOCK_DECR; + + ut_ad(!lock->waiters); +#endif + ut_ad(rw_lock_validate(lock)); + +#ifdef UNIV_SYNC_PERF_STAT + rw_x_exit_count++; +#endif +} diff --git a/storage/xtradb/include/sync0sync.h b/storage/xtradb/include/sync0sync.h new file mode 100644 index 00000000000..ea4abddbbf4 --- /dev/null +++ b/storage/xtradb/include/sync0sync.h @@ -0,0 +1,569 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mutex, the basic synchronization primitive + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef sync0sync_h +#define sync0sync_h + +#include "univ.i" +#include "sync0types.h" +#include "ut0lst.h" +#include "ut0mem.h" +#include "os0thread.h" +#include "os0sync.h" +#include "sync0arr.h" + +#ifndef UNIV_HOTBACKUP +extern my_bool timed_mutexes; +#endif /* UNIV_HOTBACKUP */ + +/********************************************************************** +Initializes the synchronization data structures. */ +UNIV_INTERN +void +sync_init(void); +/*===========*/ +/********************************************************************** +Frees the resources in synchronization data structures. */ +UNIV_INTERN +void +sync_close(void); +/*===========*/ +/********************************************************************** +Creates, or rather, initializes a mutex object to a specified memory +location (which must be appropriately aligned). The mutex is initialized +in the reset state. Explicit freeing of the mutex with mutex_free is +necessary only if the memory block containing it is freed. */ + +#ifdef UNIV_DEBUG +# ifdef UNIV_SYNC_DEBUG +# define mutex_create(M, level) \ + mutex_create_func((M), #M, (level), __FILE__, __LINE__) +# else +# define mutex_create(M, level) \ + mutex_create_func((M), #M, __FILE__, __LINE__) +# endif +#else +# define mutex_create(M, level) \ + mutex_create_func((M), __FILE__, __LINE__) +#endif + +/********************************************************************** +Creates, or rather, initializes a mutex object in a specified memory +location (which must be appropriately aligned). The mutex is initialized +in the reset state. Explicit freeing of the mutex with mutex_free is +necessary only if the memory block containing it is freed. */ +UNIV_INTERN +void +mutex_create_func( +/*==============*/ + mutex_t* mutex, /* in: pointer to memory */ +#ifdef UNIV_DEBUG + const char* cmutex_name, /* in: mutex name */ +# ifdef UNIV_SYNC_DEBUG + ulint level, /* in: level */ +# endif /* UNIV_SYNC_DEBUG */ +#endif /* UNIV_DEBUG */ + const char* cfile_name, /* in: file name where created */ + ulint cline); /* in: file line where created */ + +#undef mutex_free /* Fix for MacOS X */ + +/********************************************************************** +Calling this function is obligatory only if the memory buffer containing +the mutex is freed. Removes a mutex object from the mutex list. The mutex +is checked to be in the reset state. */ +UNIV_INTERN +void +mutex_free( +/*=======*/ + mutex_t* mutex); /* in: mutex */ +/****************************************************************** +NOTE! The following macro should be used in mutex locking, not the +corresponding function. */ + +#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__) +/****************************************************************** +NOTE! The following macro should be used in mutex locking, not the +corresponding function. */ + +/* NOTE! currently same as mutex_enter! */ + +#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__) +/********************************************************************** +NOTE! Use the corresponding macro in the header file, not this function +directly. Locks a mutex for the current thread. If the mutex is reserved +the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting +for the mutex before suspending the thread. */ +UNIV_INLINE +void +mutex_enter_func( +/*=============*/ + mutex_t* mutex, /* in: pointer to mutex */ + const char* file_name, /* in: file name where locked */ + ulint line); /* in: line where locked */ +/****************************************************************** +NOTE! The following macro should be used in mutex locking, not the +corresponding function. */ + +#define mutex_enter_nowait(M) \ + mutex_enter_nowait_func((M), __FILE__, __LINE__) +/************************************************************************ +NOTE! Use the corresponding macro in the header file, not this function +directly. Tries to lock the mutex for the current thread. If the lock is not +acquired immediately, returns with return value 1. */ +UNIV_INTERN +ulint +mutex_enter_nowait_func( +/*====================*/ + /* out: 0 if succeed, 1 if not */ + mutex_t* mutex, /* in: pointer to mutex */ + const char* file_name, /* in: file name where mutex + requested */ + ulint line); /* in: line where requested */ +/********************************************************************** +Unlocks a mutex owned by the current thread. */ +UNIV_INLINE +void +mutex_exit( +/*=======*/ + mutex_t* mutex); /* in: pointer to mutex */ +/********************************************************************** +Returns TRUE if no mutex or rw-lock is currently locked. +Works only in the debug version. */ +UNIV_INTERN +ibool +sync_all_freed(void); +/*================*/ +/*##################################################################### +FUNCTION PROTOTYPES FOR DEBUGGING */ +/*********************************************************************** +Prints wait info of the sync system. */ +UNIV_INTERN +void +sync_print_wait_info( +/*=================*/ + FILE* file); /* in: file where to print */ +/*********************************************************************** +Prints info of the sync system. */ +UNIV_INTERN +void +sync_print( +/*=======*/ + FILE* file); /* in: file where to print */ +#ifdef UNIV_DEBUG +/********************************************************************** +Checks that the mutex has been initialized. */ +UNIV_INTERN +ibool +mutex_validate( +/*===========*/ + const mutex_t* mutex); +/********************************************************************** +Checks that the current thread owns the mutex. Works only +in the debug version. */ +UNIV_INTERN +ibool +mutex_own( +/*======*/ + /* out: TRUE if owns */ + const mutex_t* mutex); /* in: mutex */ +#endif /* UNIV_DEBUG */ +#ifdef UNIV_SYNC_DEBUG +/********************************************************************** +Adds a latch and its level in the thread level array. Allocates the memory +for the array if called first time for this OS thread. Makes the checks +against other latch levels stored in the array for this thread. */ +UNIV_INTERN +void +sync_thread_add_level( +/*==================*/ + void* latch, /* in: pointer to a mutex or an rw-lock */ + ulint level); /* in: level in the latching order; if + SYNC_LEVEL_VARYING, nothing is done */ +/********************************************************************** +Removes a latch from the thread level array if it is found there. */ +UNIV_INTERN +ibool +sync_thread_reset_level( +/*====================*/ + /* out: TRUE if found from the array; it is no error + if the latch is not found, as we presently are not + able to determine the level for every latch + reservation the program does */ + void* latch); /* in: pointer to a mutex or an rw-lock */ +/********************************************************************** +Checks that the level array for the current thread is empty. */ +UNIV_INTERN +ibool +sync_thread_levels_empty(void); +/*==========================*/ + /* out: TRUE if empty */ +/********************************************************************** +Checks that the level array for the current thread is empty. */ +UNIV_INTERN +ibool +sync_thread_levels_empty_gen( +/*=========================*/ + /* out: TRUE if empty except the + exceptions specified below */ + ibool dict_mutex_allowed); /* in: TRUE if dictionary mutex is + allowed to be owned by the thread, + also purge_is_running mutex is + allowed */ +/********************************************************************** +Gets the debug information for a reserved mutex. */ +UNIV_INTERN +void +mutex_get_debug_info( +/*=================*/ + mutex_t* mutex, /* in: mutex */ + const char** file_name, /* out: file where requested */ + ulint* line, /* out: line where requested */ + os_thread_id_t* thread_id); /* out: id of the thread which owns + the mutex */ +/********************************************************************** +Counts currently reserved mutexes. Works only in the debug version. */ +UNIV_INTERN +ulint +mutex_n_reserved(void); +/*==================*/ +#endif /* UNIV_SYNC_DEBUG */ +/********************************************************************** +NOT to be used outside this module except in debugging! Gets the value +of the lock word. */ +UNIV_INLINE +byte +mutex_get_lock_word( +/*================*/ + const mutex_t* mutex); /* in: mutex */ +#ifdef UNIV_SYNC_DEBUG +/********************************************************************** +NOT to be used outside this module except in debugging! Gets the waiters +field in a mutex. */ +UNIV_INLINE +ulint +mutex_get_waiters( +/*==============*/ + /* out: value to set */ + const mutex_t* mutex); /* in: mutex */ +#endif /* UNIV_SYNC_DEBUG */ + +/* + LATCHING ORDER WITHIN THE DATABASE + ================================== + +The mutex or latch in the central memory object, for instance, a rollback +segment object, must be acquired before acquiring the latch or latches to +the corresponding file data structure. In the latching order below, these +file page object latches are placed immediately below the corresponding +central memory object latch or mutex. + +Synchronization object Notes +---------------------- ----- + +Dictionary mutex If we have a pointer to a dictionary +| object, e.g., a table, it can be +| accessed without reserving the +| dictionary mutex. We must have a +| reservation, a memoryfix, to the +| appropriate table object in this case, +| and the table must be explicitly +| released later. +V +Dictionary header +| +V +Secondary index tree latch The tree latch protects also all +| the B-tree non-leaf pages. These +V can be read with the page only +Secondary index non-leaf bufferfixed to save CPU time, +| no s-latch is needed on the page. +| Modification of a page requires an +| x-latch on the page, however. If a +| thread owns an x-latch to the tree, +| it is allowed to latch non-leaf pages +| even after it has acquired the fsp +| latch. +V +Secondary index leaf The latch on the secondary index leaf +| can be kept while accessing the +| clustered index, to save CPU time. +V +Clustered index tree latch To increase concurrency, the tree +| latch is usually released when the +| leaf page latch has been acquired. +V +Clustered index non-leaf +| +V +Clustered index leaf +| +V +Transaction system header +| +V +Transaction undo mutex The undo log entry must be written +| before any index page is modified. +| Transaction undo mutex is for the undo +| logs the analogue of the tree latch +| for a B-tree. If a thread has the +| trx undo mutex reserved, it is allowed +| to latch the undo log pages in any +| order, and also after it has acquired +| the fsp latch. +V +Rollback segment mutex The rollback segment mutex must be +| reserved, if, e.g., a new page must +| be added to an undo log. The rollback +| segment and the undo logs in its +| history list can be seen as an +| analogue of a B-tree, and the latches +| reserved similarly, using a version of +| lock-coupling. If an undo log must be +| extended by a page when inserting an +| undo log record, this corresponds to +| a pessimistic insert in a B-tree. +V +Rollback segment header +| +V +Purge system latch +| +V +Undo log pages If a thread owns the trx undo mutex, +| or for a log in the history list, the +| rseg mutex, it is allowed to latch +| undo log pages in any order, and even +| after it has acquired the fsp latch. +| If a thread does not have the +| appropriate mutex, it is allowed to +| latch only a single undo log page in +| a mini-transaction. +V +File space management latch If a mini-transaction must allocate +| several file pages, it can do that, +| because it keeps the x-latch to the +| file space management in its memo. +V +File system pages +| +V +Kernel mutex If a kernel operation needs a file +| page allocation, it must reserve the +| fsp x-latch before acquiring the kernel +| mutex. +V +Search system mutex +| +V +Buffer pool mutex +| +V +Log mutex +| +Any other latch +| +V +Memory pool mutex */ + +/* Latching order levels */ + +/* User transaction locks are higher than any of the latch levels below: +no latches are allowed when a thread goes to wait for a normal table +or row lock! */ +#define SYNC_USER_TRX_LOCK 9999 +#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress + latching order checking */ +#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with + buffer pool page locks, which do not + have a fixed level, but instead have + their level set after the page is + locked; see e.g. + ibuf_bitmap_get_map_page(). */ +#define SYNC_TRX_I_S_RWLOCK 1910 /* Used for + trx_i_s_cache_t::rw_lock */ +#define SYNC_TRX_I_S_LAST_READ 1900 /* Used for + trx_i_s_cache_t::last_read_mutex */ +#define SYNC_FILE_FORMAT_TAG 1200 /* Used to serialize access to the + file format tag */ +#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve + this in X-mode, implicit or backround + operations purge, rollback, foreign + key checks reserve this in S-mode */ +#define SYNC_DICT 1000 +#define SYNC_DICT_AUTOINC_MUTEX 999 +#define SYNC_DICT_HEADER 995 +#define SYNC_IBUF_HEADER 914 +#define SYNC_IBUF_PESS_INSERT_MUTEX 912 +#define SYNC_IBUF_MUTEX 910 /* ibuf mutex is really below + SYNC_FSP_PAGE: we assign a value this + high only to make the program to pass + the debug checks */ +/*-------------------------------*/ +#define SYNC_INDEX_TREE 900 +#define SYNC_TREE_NODE_NEW 892 +#define SYNC_TREE_NODE_FROM_HASH 891 +#define SYNC_TREE_NODE 890 +#define SYNC_PURGE_SYS 810 +#define SYNC_PURGE_LATCH 800 +#define SYNC_TRX_UNDO 700 +#define SYNC_RSEG 600 +#define SYNC_RSEG_HEADER_NEW 591 +#define SYNC_RSEG_HEADER 590 +#define SYNC_TRX_UNDO_PAGE 570 +#define SYNC_EXTERN_STORAGE 500 +#define SYNC_FSP 400 +#define SYNC_FSP_PAGE 395 +/*------------------------------------- Insert buffer headers */ +/*------------------------------------- ibuf_mutex */ +/*------------------------------------- Insert buffer tree */ +#define SYNC_IBUF_BITMAP_MUTEX 351 +#define SYNC_IBUF_BITMAP 350 +/*------------------------------------- MySQL query cache mutex */ +/*------------------------------------- MySQL binlog mutex */ +/*-------------------------------*/ +#define SYNC_KERNEL 300 +#define SYNC_REC_LOCK 299 +#define SYNC_TRX_LOCK_HEAP 298 +#define SYNC_TRX_SYS_HEADER 290 +#define SYNC_LOG 170 +#define SYNC_RECV 168 +#define SYNC_WORK_QUEUE 162 +#define SYNC_SEARCH_SYS_CONF 161 /* for assigning btr_search_enabled */ +#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory + heap that can be extended to the + buffer pool, its logical level is + SYNC_SEARCH_SYS, as memory allocation + can call routines there! Otherwise + the level is SYNC_MEM_HASH. */ +#define SYNC_BUF_POOL 150 +#define SYNC_BUF_BLOCK 149 +#define SYNC_DOUBLEWRITE 140 +#define SYNC_ANY_LATCH 135 +#define SYNC_THR_LOCAL 133 +#define SYNC_MEM_HASH 131 +#define SYNC_MEM_POOL 130 + +/* Codes used to designate lock operations */ +#define RW_LOCK_NOT_LOCKED 350 +#define RW_LOCK_EX 351 +#define RW_LOCK_EXCLUSIVE 351 +#define RW_LOCK_SHARED 352 +#define RW_LOCK_WAIT_EX 353 +#define SYNC_MUTEX 354 + +/* NOTE! The structure appears here only for the compiler to know its size. +Do not use its fields directly! The structure used in the spin lock +implementation of a mutual exclusion semaphore. */ + +struct mutex_struct { + os_event_t event; /* Used by sync0arr.c for the wait queue */ + byte lock_word; /* This byte is the target of the atomic + test-and-set instruction in Win32 and + x86 32/64 with GCC 4.1.0 or later version */ +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) +#else + os_fast_mutex_t + os_fast_mutex; /* In other systems we use this OS mutex + in place of lock_word */ +#endif + ulint waiters; /* This ulint is set to 1 if there are (or + may be) threads waiting in the global wait + array for this mutex to be released. + Otherwise, this is 0. */ + UT_LIST_NODE_T(mutex_t) list; /* All allocated mutexes are put into + a list. Pointers to the next and prev. */ +#ifdef UNIV_SYNC_DEBUG + const char* file_name; /* File where the mutex was locked */ + ulint line; /* Line where the mutex was locked */ + ulint level; /* Level in the global latching order */ +#endif /* UNIV_SYNC_DEBUG */ + const char* cfile_name;/* File name where mutex created */ + ulint cline; /* Line where created */ +#ifdef UNIV_DEBUG + os_thread_id_t thread_id; /* The thread id of the thread + which locked the mutex. */ + ulint magic_n; +# define MUTEX_MAGIC_N (ulint)979585 +#endif /* UNIV_DEBUG */ +#ifndef UNIV_HOTBACKUP + ulong count_os_wait; /* count of os_wait */ +# ifdef UNIV_DEBUG + ulong count_using; /* count of times mutex used */ + ulong count_spin_loop; /* count of spin loops */ + ulong count_spin_rounds; /* count of spin rounds */ + ulong count_os_yield; /* count of os_wait */ + ulonglong lspent_time; /* mutex os_wait timer msec */ + ulonglong lmax_spent_time; /* mutex os_wait timer msec */ + const char* cmutex_name;/* mutex name */ + ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */ +# endif /* UNIV_DEBUG */ +#endif /* !UNIV_HOTBACKUP */ +}; + +/* The global array of wait cells for implementation of the databases own +mutexes and read-write locks. Appears here for debugging purposes only! */ + +extern sync_array_t* sync_primary_wait_array; + +/* Constant determining how long spin wait is continued before suspending +the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond +to 20 microseconds. */ + +#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds + +/* The number of system calls made in this module. Intended for performance +monitoring. */ + +extern ib_int64_t mutex_exit_count; + +#ifdef UNIV_SYNC_DEBUG +/* Latching order checks start when this is set TRUE */ +extern ibool sync_order_checks_on; +#endif /* UNIV_SYNC_DEBUG */ + +/* This variable is set to TRUE when sync_init is called */ +extern ibool sync_initialized; + +/* Global list of database mutexes (not OS mutexes) created. */ +typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t; +extern ut_list_base_node_t mutex_list; + +/* Mutex protecting the mutex_list variable */ +extern mutex_t mutex_list_mutex; + + +#ifndef UNIV_NONINL +#include "sync0sync.ic" +#endif + +#endif diff --git a/storage/xtradb/include/sync0sync.ic b/storage/xtradb/include/sync0sync.ic new file mode 100644 index 00000000000..c43121ebd0b --- /dev/null +++ b/storage/xtradb/include/sync0sync.ic @@ -0,0 +1,270 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Mutex, the basic synchronization primitive + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +/********************************************************************** +Sets the waiters field in a mutex. */ +UNIV_INTERN +void +mutex_set_waiters( +/*==============*/ + mutex_t* mutex, /* in: mutex */ + ulint n); /* in: value to set */ +/********************************************************************** +Reserves a mutex for the current thread. If the mutex is reserved, the +function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting +for the mutex before suspending the thread. */ +UNIV_INTERN +void +mutex_spin_wait( +/*============*/ + mutex_t* mutex, /* in: pointer to mutex */ + const char* file_name, /* in: file name where mutex + requested */ + ulint line); /* in: line where requested */ +#ifdef UNIV_SYNC_DEBUG +/********************************************************************** +Sets the debug information for a reserved mutex. */ +UNIV_INTERN +void +mutex_set_debug_info( +/*=================*/ + mutex_t* mutex, /* in: mutex */ + const char* file_name, /* in: file where requested */ + ulint line); /* in: line where requested */ +#endif /* UNIV_SYNC_DEBUG */ +/********************************************************************** +Releases the threads waiting in the primary wait array for this mutex. */ +UNIV_INTERN +void +mutex_signal_object( +/*================*/ + mutex_t* mutex); /* in: mutex */ + +/********************************************************************** +Performs an atomic test-and-set instruction to the lock_word field of a +mutex. */ +UNIV_INLINE +byte +mutex_test_and_set( +/*===============*/ + /* out: the previous value of lock_word: 0 or + 1 */ + mutex_t* mutex) /* in: mutex */ +{ +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) + byte res; + byte* lw; /* assembler code is used to ensure that + lock_word is loaded from memory */ + ut_ad(mutex); + ut_ad(sizeof(byte) == 1); + + lw = &(mutex->lock_word); + + __asm MOV ECX, lw + __asm MOV EDX, 1 + __asm XCHG DL, BYTE PTR [ECX] + __asm MOV res, DL + + /* The fence below would prevent this thread from + reading the data structure protected by the mutex + before the test-and-set operation is committed, but + the fence is apparently not needed: + + In a posting to comp.arch newsgroup (August 10, 1997) + Andy Glew said that in P6 a LOCKed instruction like + XCHG establishes a fence with respect to memory reads + and writes and thus an explicit fence is not + needed. In P5 he seemed to agree with a previous + newsgroup poster that LOCKed instructions serialize + all instruction execution, and, consequently, also + memory operations. This is confirmed in Intel Software + Dev. Manual, Vol. 3. */ + + /* mutex_fence(); */ + + return(res); +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + return __sync_lock_test_and_set(&(mutex->lock_word), 1); +#else + ibool ret; + + ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex)); + + if (ret == 0) { + /* We check that os_fast_mutex_trylock does not leak + and allow race conditions */ + ut_a(mutex->lock_word == 0); + + mutex->lock_word = 1; + } + + return((byte)ret); +#endif +} + +/********************************************************************** +Performs a reset instruction to the lock_word field of a mutex. This +instruction also serializes memory operations to the program order. */ +UNIV_INLINE +void +mutex_reset_lock_word( +/*==================*/ + mutex_t* mutex) /* in: mutex */ +{ +#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) + byte* lw; /* assembler code is used to ensure that + lock_word is loaded from memory */ + ut_ad(mutex); + + lw = &(mutex->lock_word); + + __asm MOV EDX, 0 + __asm MOV ECX, lw + __asm XCHG DL, BYTE PTR [ECX] +#elif defined(HAVE_GCC_ATOMIC_BUILTINS) + /* In theory __sync_lock_release should be used to release the lock. + Unfortunately, it does not work properly alone. The workaround is + that more conservative __sync_lock_test_and_set is used instead. */ + __sync_lock_test_and_set(&(mutex->lock_word), 0); +#else + mutex->lock_word = 0; + + os_fast_mutex_unlock(&(mutex->os_fast_mutex)); +#endif +} + +/********************************************************************** +Gets the value of the lock word. */ +UNIV_INLINE +byte +mutex_get_lock_word( +/*================*/ + const mutex_t* mutex) /* in: mutex */ +{ + const volatile byte* ptr; /* declared volatile to ensure that + lock_word is loaded from memory */ + ut_ad(mutex); + + ptr = &(mutex->lock_word); + + return(*ptr); +} + +/********************************************************************** +Gets the waiters field in a mutex. */ +UNIV_INLINE +ulint +mutex_get_waiters( +/*==============*/ + /* out: value to set */ + const mutex_t* mutex) /* in: mutex */ +{ + const volatile ulint* ptr; /* declared volatile to ensure that + the value is read from memory */ + ut_ad(mutex); + + ptr = &(mutex->waiters); + + return(*ptr); /* Here we assume that the read of a single + word from memory is atomic */ +} + +/********************************************************************** +Unlocks a mutex owned by the current thread. */ +UNIV_INLINE +void +mutex_exit( +/*=======*/ + mutex_t* mutex) /* in: pointer to mutex */ +{ + ut_ad(mutex_own(mutex)); + + ut_d(mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED); + +#ifdef UNIV_SYNC_DEBUG + sync_thread_reset_level(mutex); +#endif + mutex_reset_lock_word(mutex); + + /* A problem: we assume that mutex_reset_lock word + is a memory barrier, that is when we read the waiters + field next, the read must be serialized in memory + after the reset. A speculative processor might + perform the read first, which could leave a waiting + thread hanging indefinitely. + + Our current solution call every second + sync_arr_wake_threads_if_sema_free() + to wake up possible hanging threads if + they are missed in mutex_signal_object. */ + + if (mutex_get_waiters(mutex) != 0) { + + mutex_signal_object(mutex); + } + +#ifdef UNIV_SYNC_PERF_STAT + mutex_exit_count++; +#endif +} + +/********************************************************************** +Locks a mutex for the current thread. If the mutex is reserved, the function +spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex +before suspending the thread. */ +UNIV_INLINE +void +mutex_enter_func( +/*=============*/ + mutex_t* mutex, /* in: pointer to mutex */ + const char* file_name, /* in: file name where locked */ + ulint line) /* in: line where locked */ +{ + ut_ad(mutex_validate(mutex)); + ut_ad(!mutex_own(mutex)); + + /* Note that we do not peek at the value of lock_word before trying + the atomic test_and_set; we could peek, and possibly save time. */ + +#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP + mutex->count_using++; +#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ + + if (!mutex_test_and_set(mutex)) { + ut_d(mutex->thread_id = os_thread_get_curr_id()); +#ifdef UNIV_SYNC_DEBUG + mutex_set_debug_info(mutex, file_name, line); +#endif + return; /* Succeeded! */ + } + + mutex_spin_wait(mutex, file_name, line); +} diff --git a/storage/xtradb/include/sync0types.h b/storage/xtradb/include/sync0types.h new file mode 100644 index 00000000000..3c1021b1a30 --- /dev/null +++ b/storage/xtradb/include/sync0types.h @@ -0,0 +1,31 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Global types for sync + +Created 9/5/1995 Heikki Tuuri +*******************************************************/ + +#ifndef sync0types_h +#define sync0types_h + +#define mutex_t ib_mutex_t +typedef struct mutex_struct mutex_t; + +#endif diff --git a/storage/xtradb/include/thr0loc.h b/storage/xtradb/include/thr0loc.h new file mode 100644 index 00000000000..96ec13cc8e4 --- /dev/null +++ b/storage/xtradb/include/thr0loc.h @@ -0,0 +1,94 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The thread local storage + +Created 10/5/1995 Heikki Tuuri +*******************************************************/ + +/* This module implements storage private to each thread, +a capability useful in some situations like storing the +OS handle to the current thread, or its priority. */ + +#ifndef thr0loc_h +#define thr0loc_h + +#include "univ.i" +#include "os0thread.h" + +/******************************************************************** +Initializes the thread local storage module. */ +UNIV_INTERN +void +thr_local_init(void); +/*================*/ +/*********************************************************************** +Creates a local storage struct for the calling new thread. */ +UNIV_INTERN +void +thr_local_create(void); +/*==================*/ +/*********************************************************************** +Frees the local storage struct for the specified thread. */ +UNIV_INTERN +void +thr_local_free( +/*===========*/ + os_thread_id_t id); /* in: thread id */ +/*********************************************************************** +Gets the slot number in the thread table of a thread. */ +UNIV_INTERN +ulint +thr_local_get_slot_no( +/*==================*/ + /* out: slot number */ + os_thread_id_t id); /* in: thread id of the thread */ +/*********************************************************************** +Sets in the local storage the slot number in the thread table of a thread. */ +UNIV_INTERN +void +thr_local_set_slot_no( +/*==================*/ + os_thread_id_t id, /* in: thread id of the thread */ + ulint slot_no);/* in: slot number */ +/*********************************************************************** +Returns pointer to the 'in_ibuf' field within the current thread local +storage. */ +UNIV_INTERN +ibool* +thr_local_get_in_ibuf_field(void); +/*=============================*/ + /* out: pointer to the in_ibuf field */ + +/************************************************************************* +Return local hash table informations. */ + +ulint +thr_local_hash_cells(void); +/*=======================*/ + +ulint +thr_local_hash_nodes(void); +/*=======================*/ + +#ifndef UNIV_NONINL +#include "thr0loc.ic" +#endif + +#endif diff --git a/storage/xtradb/include/thr0loc.ic b/storage/xtradb/include/thr0loc.ic new file mode 100644 index 00000000000..6de183fd857 --- /dev/null +++ b/storage/xtradb/include/thr0loc.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Thread local storage + +Created 10/4/1995 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/trx0i_s.h b/storage/xtradb/include/trx0i_s.h new file mode 100644 index 00000000000..cf2865af127 --- /dev/null +++ b/storage/xtradb/include/trx0i_s.h @@ -0,0 +1,212 @@ +/***************************************************************************** + +Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +INFORMATION SCHEMA innodb_trx, innodb_locks and +innodb_lock_waits tables cache structures and public +functions. + +Created July 17, 2007 Vasil Dimov +*******************************************************/ + +#ifndef trx0i_s_h +#define trx0i_s_h + +#include "univ.i" +#include "trx0types.h" +#include "ut0ut.h" + +/* the maximum amount of memory that can be consumed by innodb_trx, +innodb_locks and innodb_lock_waits information schema tables. */ +#define TRX_I_S_MEM_LIMIT 16777216 /* 16 MiB */ + +/* the maximum length of a string that can be stored in +i_s_locks_row_t::lock_data */ +#define TRX_I_S_LOCK_DATA_MAX_LEN 8192 + +/* the maximum length of a string that can be stored in +i_s_trx_row_t::trx_query */ +#define TRX_I_S_TRX_QUERY_MAX_LEN 1024 + +typedef struct i_s_locks_row_struct i_s_locks_row_t; +typedef struct i_s_hash_chain_struct i_s_hash_chain_t; + +/* Objects of this type are added to the hash table +trx_i_s_cache_t::locks_hash */ +struct i_s_hash_chain_struct { + i_s_locks_row_t* value; + i_s_hash_chain_t* next; +}; + +/* This structure represents INFORMATION_SCHEMA.innodb_locks row */ +struct i_s_locks_row_struct { + ullint lock_trx_id; + const char* lock_mode; + const char* lock_type; + const char* lock_table; + const char* lock_index; + ulint lock_space; + ulint lock_page; + ulint lock_rec; + const char* lock_data; + + /* The following are auxiliary and not included in the table */ + ullint lock_table_id; + i_s_hash_chain_t hash_chain; /* this object is added to the hash + table + trx_i_s_cache_t::locks_hash */ +}; + +/* This structure represents INFORMATION_SCHEMA.innodb_trx row */ +typedef struct i_s_trx_row_struct { + ullint trx_id; + const char* trx_state; + ib_time_t trx_started; + const i_s_locks_row_t* requested_lock_row; + ib_time_t trx_wait_started; + ullint trx_weight; + ulint trx_mysql_thread_id; + const char* trx_query; +} i_s_trx_row_t; + +/* This structure represents INFORMATION_SCHEMA.innodb_lock_waits row */ +typedef struct i_s_lock_waits_row_struct { + const i_s_locks_row_t* requested_lock_row; + const i_s_locks_row_t* blocking_lock_row; +} i_s_lock_waits_row_t; + +/* This type is opaque and is defined in trx/trx0i_s.c */ +typedef struct trx_i_s_cache_struct trx_i_s_cache_t; + +/* Auxiliary enum used by functions that need to select one of the +INFORMATION_SCHEMA tables */ +enum i_s_table { + I_S_INNODB_TRX, + I_S_INNODB_LOCKS, + I_S_INNODB_LOCK_WAITS +}; + +/* This is the intermediate buffer where data needed to fill the +INFORMATION SCHEMA tables is fetched and later retrieved by the C++ +code in handler/i_s.cc. */ +extern trx_i_s_cache_t* trx_i_s_cache; + +/*********************************************************************** +Initialize INFORMATION SCHEMA trx related cache. */ +UNIV_INTERN +void +trx_i_s_cache_init( +/*===============*/ + trx_i_s_cache_t* cache); /* out: cache to init */ + +/*********************************************************************** +Issue a shared/read lock on the tables cache. */ +UNIV_INTERN +void +trx_i_s_cache_start_read( +/*=====================*/ + trx_i_s_cache_t* cache); /* in: cache */ + +/*********************************************************************** +Release a shared/read lock on the tables cache. */ +UNIV_INTERN +void +trx_i_s_cache_end_read( +/*===================*/ + trx_i_s_cache_t* cache); /* in: cache */ + +/*********************************************************************** +Issue an exclusive/write lock on the tables cache. */ +UNIV_INTERN +void +trx_i_s_cache_start_write( +/*======================*/ + trx_i_s_cache_t* cache); /* in: cache */ + +/*********************************************************************** +Release an exclusive/write lock on the tables cache. */ +UNIV_INTERN +void +trx_i_s_cache_end_write( +/*====================*/ + trx_i_s_cache_t* cache); /* in: cache */ + + +/*********************************************************************** +Retrieves the number of used rows in the cache for a given +INFORMATION SCHEMA table. */ +UNIV_INTERN +ulint +trx_i_s_cache_get_rows_used( +/*========================*/ + /* out: number of rows */ + trx_i_s_cache_t* cache, /* in: cache */ + enum i_s_table table); /* in: which table */ + +/*********************************************************************** +Retrieves the nth row in the cache for a given INFORMATION SCHEMA +table. */ +UNIV_INTERN +void* +trx_i_s_cache_get_nth_row( +/*======================*/ + /* out: row */ + trx_i_s_cache_t* cache, /* in: cache */ + enum i_s_table table, /* in: which table */ + ulint n); /* in: row number */ + +/*********************************************************************** +Update the transactions cache if it has not been read for some time. */ +UNIV_INTERN +int +trx_i_s_possibly_fetch_data_into_cache( +/*===================================*/ + /* out: 0 - fetched, 1 - not */ + trx_i_s_cache_t* cache); /* in/out: cache */ + +/*********************************************************************** +Returns TRUE if the data in the cache is truncated due to the memory +limit posed by TRX_I_S_MEM_LIMIT. */ +UNIV_INTERN +ibool +trx_i_s_cache_is_truncated( +/*=======================*/ + /* out: TRUE if truncated */ + trx_i_s_cache_t* cache); /* in: cache */ + +/* The maximum length of a resulting lock_id_size in +trx_i_s_create_lock_id(), not including the terminating '\0'. +":%lu:%lu:%lu" -> 63 chars */ +#define TRX_I_S_LOCK_ID_MAX_LEN (TRX_ID_MAX_LEN + 63) + +/*********************************************************************** +Crafts a lock id string from a i_s_locks_row_t object. Returns its +second argument. This function aborts if there is not enough space in +lock_id. Be sure to provide at least TRX_I_S_LOCK_ID_MAX_LEN + 1 if you +want to be 100% sure that it will not abort. */ +UNIV_INTERN +char* +trx_i_s_create_lock_id( +/*===================*/ + /* out: resulting lock id */ + const i_s_locks_row_t* row, /* in: innodb_locks row */ + char* lock_id,/* out: resulting lock_id */ + ulint lock_id_size);/* in: size of the lock id + buffer */ + +#endif /* trx0i_s_h */ diff --git a/storage/xtradb/include/trx0purge.h b/storage/xtradb/include/trx0purge.h new file mode 100644 index 00000000000..4921b860485 --- /dev/null +++ b/storage/xtradb/include/trx0purge.h @@ -0,0 +1,185 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Purge old versions + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0purge_h +#define trx0purge_h + +#include "univ.i" +#include "trx0types.h" +#include "mtr0mtr.h" +#include "trx0sys.h" +#include "que0types.h" +#include "page0page.h" +#include "usr0sess.h" +#include "fil0fil.h" + +/* The global data structure coordinating a purge */ +extern trx_purge_t* purge_sys; + +/* A dummy undo record used as a return value when we have a whole undo log +which needs no purge */ +extern trx_undo_rec_t trx_purge_dummy_rec; + +/************************************************************************ +Calculates the file address of an undo log header when we have the file +address of its history list node. */ +UNIV_INLINE +fil_addr_t +trx_purge_get_log_from_hist( +/*========================*/ + /* out: file address of the log */ + fil_addr_t node_addr); /* in: file address of the history + list node of the log */ +/********************************************************************* +Checks if trx_id is >= purge_view: then it is guaranteed that its update +undo log still exists in the system. */ +UNIV_INTERN +ibool +trx_purge_update_undo_must_exist( +/*=============================*/ + /* out: TRUE if is sure that it is preserved, also + if the function returns FALSE, it is possible that + the undo log still exists in the system */ + dulint trx_id);/* in: transaction id */ +/************************************************************************ +Creates the global purge system control structure and inits the history +mutex. */ +UNIV_INTERN +void +trx_purge_sys_create(void); +/*======================*/ +/************************************************************************ +Adds the update undo log as the first log in the history list. Removes the +update undo log segment from the rseg slot if it is too big for reuse. */ +UNIV_INTERN +void +trx_purge_add_update_undo_to_history( +/*=================================*/ + trx_t* trx, /* in: transaction */ + page_t* undo_page, /* in: update undo log header page, + x-latched */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +Fetches the next undo log record from the history list to purge. It must be +released with the corresponding release function. */ +UNIV_INTERN +trx_undo_rec_t* +trx_purge_fetch_next_rec( +/*=====================*/ + /* out: copy of an undo log record, or + pointer to the dummy undo log record + &trx_purge_dummy_rec if the whole undo log + can skipped in purge; NULL if none left */ + dulint* roll_ptr,/* out: roll pointer to undo record */ + trx_undo_inf_t** cell, /* out: storage cell for the record in the + purge array */ + mem_heap_t* heap); /* in: memory heap where copied */ +/*********************************************************************** +Releases a reserved purge undo record. */ +UNIV_INTERN +void +trx_purge_rec_release( +/*==================*/ + trx_undo_inf_t* cell); /* in: storage cell */ +/*********************************************************************** +This function runs a purge batch. */ +UNIV_INTERN +ulint +trx_purge(void); +/*===========*/ + /* out: number of undo log pages handled in + the batch */ +/********************************************************************** +Prints information of the purge system to stderr. */ +UNIV_INTERN +void +trx_purge_sys_print(void); +/*======================*/ + +/* The control structure used in the purge operation */ +struct trx_purge_struct{ + ulint state; /* Purge system state */ + sess_t* sess; /* System session running the purge + query */ + trx_t* trx; /* System transaction running the purge + query: this trx is not in the trx list + of the trx system and it never ends */ + que_t* query; /* The query graph which will do the + parallelized purge operation */ + rw_lock_t latch; /* The latch protecting the purge view. + A purge operation must acquire an + x-latch here for the instant at which + it changes the purge view: an undo + log operation can prevent this by + obtaining an s-latch here. */ + read_view_t* view; /* The purge will not remove undo logs + which are >= this view (purge view) */ + mutex_t mutex; /* Mutex protecting the fields below */ + ulint n_pages_handled;/* Approximate number of undo log + pages processed in purge */ + ulint handle_limit; /* Target of how many pages to get + processed in the current purge */ + /*------------------------------*/ + /* The following two fields form the 'purge pointer' which advances + during a purge, and which is used in history list truncation */ + + dulint purge_trx_no; /* Purge has advanced past all + transactions whose number is less + than this */ + dulint purge_undo_no; /* Purge has advanced past all records + whose undo number is less than this */ + /*-----------------------------*/ + ibool next_stored; /* TRUE if the info of the next record + to purge is stored below: if yes, then + the transaction number and the undo + number of the record are stored in + purge_trx_no and purge_undo_no above */ + trx_rseg_t* rseg; /* Rollback segment for the next undo + record to purge */ + ulint page_no; /* Page number for the next undo + record to purge, page number of the + log header, if dummy record */ + ulint offset; /* Page offset for the next undo + record to purge, 0 if the dummy + record */ + ulint hdr_page_no; /* Header page of the undo log where + the next record to purge belongs */ + ulint hdr_offset; /* Header byte offset on the page */ + /*-----------------------------*/ + trx_undo_arr_t* arr; /* Array of transaction numbers and + undo numbers of the undo records + currently under processing in purge */ + mem_heap_t* heap; /* Temporary storage used during a + purge: can be emptied after purge + completes */ +}; + +#define TRX_PURGE_ON 1 /* purge operation is running */ +#define TRX_STOP_PURGE 2 /* purge operation is stopped, or + it should be stopped */ +#ifndef UNIV_NONINL +#include "trx0purge.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0purge.ic b/storage/xtradb/include/trx0purge.ic new file mode 100644 index 00000000000..2c1d2ac75af --- /dev/null +++ b/storage/xtradb/include/trx0purge.ic @@ -0,0 +1,42 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Purge old versions + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "trx0undo.h" + +/************************************************************************ +Calculates the file address of an undo log header when we have the file +address of its history list node. */ +UNIV_INLINE +fil_addr_t +trx_purge_get_log_from_hist( +/*========================*/ + /* out: file address of the log */ + fil_addr_t node_addr) /* in: file address of the history + list node of the log */ +{ + node_addr.boffset -= TRX_UNDO_HISTORY_NODE; + + return(node_addr); +} + diff --git a/storage/xtradb/include/trx0rec.h b/storage/xtradb/include/trx0rec.h new file mode 100644 index 00000000000..444d39e39db --- /dev/null +++ b/storage/xtradb/include/trx0rec.h @@ -0,0 +1,333 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction undo log record + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0rec_h +#define trx0rec_h + +#include "univ.i" +#include "trx0types.h" +#include "row0types.h" +#include "mtr0mtr.h" +#include "dict0types.h" +#include "que0types.h" +#include "data0data.h" +#include "rem0types.h" + +/*************************************************************************** +Copies the undo record to the heap. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_rec_copy( +/*==============*/ + /* out, own: copy of undo log record */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + mem_heap_t* heap); /* in: heap where copied */ +/************************************************************************** +Reads the undo log record type. */ +UNIV_INLINE +ulint +trx_undo_rec_get_type( +/*==================*/ + /* out: record type */ + trx_undo_rec_t* undo_rec); /* in: undo log record */ +/************************************************************************** +Reads from an undo log record the record compiler info. */ +UNIV_INLINE +ulint +trx_undo_rec_get_cmpl_info( +/*=======================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec); /* in: undo log record */ +/************************************************************************** +Returns TRUE if an undo log record contains an extern storage field. */ +UNIV_INLINE +ibool +trx_undo_rec_get_extern_storage( +/*============================*/ + /* out: TRUE if extern */ + trx_undo_rec_t* undo_rec); /* in: undo log record */ +/************************************************************************** +Reads the undo log record number. */ +UNIV_INLINE +dulint +trx_undo_rec_get_undo_no( +/*=====================*/ + /* out: undo no */ + trx_undo_rec_t* undo_rec); /* in: undo log record */ +/************************************************************************** + * Returns the start of the undo record data area. */ + +UNIV_INLINE +byte* +trx_undo_rec_get_ptr( +/*==================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + dulint undo_no); /* in: undo no read from node */ + +/************************************************************************** +Reads from an undo log record the general parameters. */ +UNIV_INTERN +byte* +trx_undo_rec_get_pars( +/*==================*/ + /* out: remaining part of undo log + record after reading these values */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + ulint* type, /* out: undo record type: + TRX_UNDO_INSERT_REC, ... */ + ulint* cmpl_info, /* out: compiler info, relevant only + for update type records */ + ibool* updated_extern, /* out: TRUE if we updated an + externally stored fild */ + dulint* undo_no, /* out: undo log record number */ + dulint* table_id); /* out: table id */ +/*********************************************************************** +Builds a row reference from an undo log record. */ +UNIV_INTERN +byte* +trx_undo_rec_get_row_ref( +/*=====================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part of a copy of an undo log + record, at the start of the row reference; + NOTE that this copy of the undo log record must + be preserved as long as the row reference is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + dtuple_t** ref, /* out, own: row reference */ + mem_heap_t* heap); /* in: memory heap from which the memory + needed is allocated */ +/*********************************************************************** +Skips a row reference from an undo log record. */ +UNIV_INTERN +byte* +trx_undo_rec_skip_row_ref( +/*======================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part in update undo log + record, at the start of the row reference */ + dict_index_t* index); /* in: clustered index */ +/************************************************************************** +Reads from an undo log update record the system field values of the old +version. */ +UNIV_INTERN +byte* +trx_undo_update_rec_get_sys_cols( +/*=============================*/ + /* out: remaining part of undo log + record after reading these values */ + byte* ptr, /* in: remaining part of undo log + record after reading general + parameters */ + dulint* trx_id, /* out: trx id */ + dulint* roll_ptr, /* out: roll ptr */ + ulint* info_bits); /* out: info bits state */ +/*********************************************************************** +Builds an update vector based on a remaining part of an undo log record. */ +UNIV_INTERN +byte* +trx_undo_update_rec_get_update( +/*===========================*/ + /* out: remaining part of the record, + NULL if an error detected, which means that + the record is corrupted */ + byte* ptr, /* in: remaining part in update undo log + record, after reading the row reference + NOTE that this copy of the undo log record must + be preserved as long as the update vector is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + ulint type, /* in: TRX_UNDO_UPD_EXIST_REC, + TRX_UNDO_UPD_DEL_REC, or + TRX_UNDO_DEL_MARK_REC; in the last case, + only trx id and roll ptr fields are added to + the update vector */ + dulint trx_id, /* in: transaction id from this undorecord */ + dulint roll_ptr,/* in: roll pointer from this undo record */ + ulint info_bits,/* in: info bits from this undo record */ + trx_t* trx, /* in: transaction */ + mem_heap_t* heap, /* in: memory heap from which the memory + needed is allocated */ + upd_t** upd); /* out, own: update vector */ +/*********************************************************************** +Builds a partial row from an update undo log record. It contains the +columns which occur as ordering in any index of the table. */ +UNIV_INTERN +byte* +trx_undo_rec_get_partial_row( +/*=========================*/ + /* out: pointer to remaining part of undo + record */ + byte* ptr, /* in: remaining part in update undo log + record of a suitable type, at the start of + the stored index columns; + NOTE that this copy of the undo log record must + be preserved as long as the partial row is + used, as we do NOT copy the data in the + record! */ + dict_index_t* index, /* in: clustered index */ + dtuple_t** row, /* out, own: partial row */ + ibool ignore_prefix, /* in: flag to indicate if we + expect blob prefixes in undo. Used + only in the assertion. */ + mem_heap_t* heap); /* in: memory heap from which the memory + needed is allocated */ +/*************************************************************************** +Writes information to an undo log about an insert, update, or a delete marking +of a clustered index record. This information is used in a rollback of the +transaction and in consistent reads that must look to the history of this +transaction. */ +UNIV_INTERN +ulint +trx_undo_report_row_operation( +/*==========================*/ + /* out: DB_SUCCESS or error code */ + ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is + set, does nothing */ + ulint op_type, /* in: TRX_UNDO_INSERT_OP or + TRX_UNDO_MODIFY_OP */ + que_thr_t* thr, /* in: query thread */ + dict_index_t* index, /* in: clustered index */ + const dtuple_t* clust_entry, /* in: in the case of an insert, + index entry to insert into the + clustered index, otherwise NULL */ + const upd_t* update, /* in: in the case of an update, + the update vector, otherwise NULL */ + ulint cmpl_info, /* in: compiler info on secondary + index updates */ + const rec_t* rec, /* in: case of an update or delete + marking, the record in the clustered + index, otherwise NULL */ + dulint* roll_ptr); /* out: rollback pointer to the + inserted undo log record, + ut_dulint_zero if BTR_NO_UNDO_LOG + flag was specified */ +/********************************************************************** +Copies an undo record to heap. This function can be called if we know that +the undo log record exists. */ +UNIV_INTERN +trx_undo_rec_t* +trx_undo_get_undo_rec_low( +/*======================*/ + /* out, own: copy of the record */ + dulint roll_ptr, /* in: roll pointer to record */ + mem_heap_t* heap); /* in: memory heap where copied */ +/********************************************************************** +Copies an undo record to heap. */ +UNIV_INTERN +ulint +trx_undo_get_undo_rec( +/*==================*/ + /* out: DB_SUCCESS, or + DB_MISSING_HISTORY if the undo log + has been truncated and we cannot + fetch the old version; NOTE: the + caller must have latches on the + clustered index page and purge_view */ + dulint roll_ptr, /* in: roll pointer to record */ + dulint trx_id, /* in: id of the trx that generated + the roll pointer: it points to an + undo log of this transaction */ + trx_undo_rec_t** undo_rec, /* out, own: copy of the record */ + mem_heap_t* heap); /* in: memory heap where copied */ +/*********************************************************************** +Build a previous version of a clustered index record. This function checks +that the caller has a latch on the index page of the clustered index record +and an s-latch on the purge_view. This guarantees that the stack of versions +is locked. */ +UNIV_INTERN +ulint +trx_undo_prev_version_build( +/*========================*/ + /* out: DB_SUCCESS, or DB_MISSING_HISTORY if + the previous version is not >= purge_view, + which means that it may have been removed, + DB_ERROR if corrupted record */ + const rec_t* index_rec,/* in: clustered index record in the + index tree */ + mtr_t* index_mtr,/* in: mtr which contains the latch to + index_rec page and purge_view */ + const rec_t* rec, /* in: version of a clustered index record */ + dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ + mem_heap_t* heap, /* in: memory heap from which the memory + needed is allocated */ + rec_t** old_vers);/* out, own: previous version, or NULL if + rec is the first inserted version, or if + history data has been deleted */ +/*************************************************************** +Parses a redo log record of adding an undo log record. */ +UNIV_INTERN +byte* +trx_undo_parse_add_undo_rec( +/*========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page); /* in: page or NULL */ +/*************************************************************** +Parses a redo log record of erasing of an undo page end. */ +UNIV_INTERN +byte* +trx_undo_parse_erase_page_end( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ + +/* Types of an undo log record: these have to be smaller than 16, as the +compilation info multiplied by 16 is ORed to this value in an undo log +record */ + +#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */ +#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked + record */ +#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to + a not delete marked record; also the + fields of the record can change */ +#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields + do not change */ +#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by + this and ORed to the type above */ +#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl + to denote that we updated external + storage fields: used by purge to + free the external storage */ + +/* Operation type flags used in trx_undo_report_row_operation */ +#define TRX_UNDO_INSERT_OP 1 +#define TRX_UNDO_MODIFY_OP 2 + +#ifndef UNIV_NONINL +#include "trx0rec.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0rec.ic b/storage/xtradb/include/trx0rec.ic new file mode 100644 index 00000000000..bfd74eb9dfb --- /dev/null +++ b/storage/xtradb/include/trx0rec.ic @@ -0,0 +1,116 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction undo log record + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +/************************************************************************** +Reads from an undo log record the record type. */ +UNIV_INLINE +ulint +trx_undo_rec_get_type( +/*==================*/ + /* out: record type */ + trx_undo_rec_t* undo_rec) /* in: undo log record */ +{ + return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1)); +} + +/************************************************************************** +Reads from an undo log record the record compiler info. */ +UNIV_INLINE +ulint +trx_undo_rec_get_cmpl_info( +/*=======================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec) /* in: undo log record */ +{ + return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT); +} + +/************************************************************************** +Returns TRUE if an undo log record contains an extern storage field. */ +UNIV_INLINE +ibool +trx_undo_rec_get_extern_storage( +/*============================*/ + /* out: TRUE if extern */ + trx_undo_rec_t* undo_rec) /* in: undo log record */ +{ + if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) { + + return(TRUE); + } + + return(FALSE); +} + +/************************************************************************** +Reads the undo log record number. */ +UNIV_INLINE +dulint +trx_undo_rec_get_undo_no( +/*=====================*/ + /* out: undo no */ + trx_undo_rec_t* undo_rec) /* in: undo log record */ +{ + byte* ptr; + + ptr = undo_rec + 3; + + return(mach_dulint_read_much_compressed(ptr)); +} + +/************************************************************************** +Returns the start of the undo record data area. */ +UNIV_INLINE +byte* +trx_undo_rec_get_ptr( +/*=================*/ + /* out: compiler info */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + dulint undo_no) /* in: undo no read from node */ +{ + return (((byte*) undo_rec) + 3 + + mach_dulint_get_much_compressed_size(undo_no)); +} + +/*************************************************************************** +Copies the undo record to the heap. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_rec_copy( +/*==============*/ + /* out, own: copy of undo log record */ + trx_undo_rec_t* undo_rec, /* in: undo log record */ + mem_heap_t* heap) /* in: heap where copied */ +{ + ulint len; + trx_undo_rec_t* rec_copy; + + len = mach_read_from_2(undo_rec) + - ut_align_offset(undo_rec, UNIV_PAGE_SIZE); + rec_copy = mem_heap_alloc(heap, len); + + ut_memcpy(rec_copy, undo_rec, len); + + return(rec_copy); +} diff --git a/storage/xtradb/include/trx0roll.h b/storage/xtradb/include/trx0roll.h new file mode 100644 index 00000000000..3318a5985d7 --- /dev/null +++ b/storage/xtradb/include/trx0roll.h @@ -0,0 +1,340 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction rollback + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0roll_h +#define trx0roll_h + +#include "univ.i" +#include "trx0trx.h" +#include "trx0types.h" +#include "mtr0mtr.h" +#include "trx0sys.h" + +#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL) + +/*********************************************************************** +Determines if this transaction is rolling back an incomplete transaction +in crash recovery. */ +UNIV_INTERN +ibool +trx_is_recv( +/*========*/ + /* out: TRUE if trx is an incomplete + transaction that is being rolled back + in crash recovery */ + const trx_t* trx); /* in: transaction */ +/*********************************************************************** +Returns a transaction savepoint taken at this point in time. */ +UNIV_INTERN +trx_savept_t +trx_savept_take( +/*============*/ + /* out: savepoint */ + trx_t* trx); /* in: transaction */ +/*********************************************************************** +Creates an undo number array. */ +UNIV_INTERN +trx_undo_arr_t* +trx_undo_arr_create(void); +/*=====================*/ +/*********************************************************************** +Frees an undo number array. */ +UNIV_INTERN +void +trx_undo_arr_free( +/*==============*/ + trx_undo_arr_t* arr); /* in: undo number array */ +/*********************************************************************** +Returns pointer to nth element in an undo number array. */ +UNIV_INLINE +trx_undo_inf_t* +trx_undo_arr_get_nth_info( +/*======================*/ + /* out: pointer to the nth element */ + trx_undo_arr_t* arr, /* in: undo number array */ + ulint n); /* in: position */ +/*************************************************************************** +Tries truncate the undo logs. */ +UNIV_INTERN +void +trx_roll_try_truncate( +/*==================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************ +Pops the topmost record when the two undo logs of a transaction are seen +as a single stack of records ordered by their undo numbers. Inserts the +undo number of the popped undo record to the array of currently processed +undo numbers in the transaction. When the query thread finishes processing +of this undo record, it must be released with trx_undo_rec_release. */ +UNIV_INTERN +trx_undo_rec_t* +trx_roll_pop_top_rec_of_trx( +/*========================*/ + /* out: undo log record copied to heap, NULL + if none left, or if the undo number of the + top record would be less than the limit */ + trx_t* trx, /* in: transaction */ + dulint limit, /* in: least undo number we need */ + dulint* roll_ptr,/* out: roll pointer to undo record */ + mem_heap_t* heap); /* in: memory heap where copied */ +/************************************************************************ +Reserves an undo log record for a query thread to undo. This should be +called if the query thread gets the undo log record not using the pop +function above. */ +UNIV_INTERN +ibool +trx_undo_rec_reserve( +/*=================*/ + /* out: TRUE if succeeded */ + trx_t* trx, /* in: transaction */ + dulint undo_no);/* in: undo number of the record */ +/*********************************************************************** +Releases a reserved undo record. */ +UNIV_INTERN +void +trx_undo_rec_release( +/*=================*/ + trx_t* trx, /* in: transaction */ + dulint undo_no);/* in: undo number */ +/************************************************************************* +Starts a rollback operation. */ +UNIV_INTERN +void +trx_rollback( +/*=========*/ + trx_t* trx, /* in: transaction */ + trx_sig_t* sig, /* in: signal starting the rollback */ + que_thr_t** next_thr);/* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +/*********************************************************************** +Rollback or clean up any incomplete transactions which were +encountered in crash recovery. If the transaction already was +committed, then we clean up a possible insert undo log. If the +transaction was not yet committed, then we roll it back. +Note: this is done in a background thread. */ +UNIV_INTERN +os_thread_ret_t +trx_rollback_or_clean_all_recovered( +/*================================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))); + /* in: a dummy parameter required by + os_thread_create */ +/******************************************************************** +Finishes a transaction rollback. */ +UNIV_INTERN +void +trx_finish_rollback_off_kernel( +/*===========================*/ + que_t* graph, /* in: undo graph which can now be freed */ + trx_t* trx, /* in: transaction */ + que_thr_t** next_thr);/* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if this parameter is + NULL, it is ignored */ +/******************************************************************** +Builds an undo 'query' graph for a transaction. The actual rollback is +performed by executing this query graph like a query subprocedure call. +The reply about the completion of the rollback will be sent by this +graph. */ +UNIV_INTERN +que_t* +trx_roll_graph_build( +/*=================*/ + /* out, own: the query graph */ + trx_t* trx); /* in: trx handle */ +/************************************************************************* +Creates a rollback command node struct. */ +UNIV_INTERN +roll_node_t* +roll_node_create( +/*=============*/ + /* out, own: rollback node struct */ + mem_heap_t* heap); /* in: mem heap where created */ +/*************************************************************** +Performs an execution step for a rollback command node in a query graph. */ +UNIV_INTERN +que_thr_t* +trx_rollback_step( +/*==============*/ + /* out: query thread to run next, or NULL */ + que_thr_t* thr); /* in: query thread */ +/*********************************************************************** +Rollback a transaction used in MySQL. */ +UNIV_INTERN +int +trx_rollback_for_mysql( +/*===================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx); /* in: transaction handle */ +/*********************************************************************** +Rollback the latest SQL statement for MySQL. */ +UNIV_INTERN +int +trx_rollback_last_sql_stat_for_mysql( +/*=================================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx); /* in: transaction handle */ +/*********************************************************************** +Rollback a transaction used in MySQL. */ +UNIV_INTERN +int +trx_general_rollback_for_mysql( +/*===========================*/ + /* out: error code or DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + ibool partial,/* in: TRUE if partial rollback requested */ + trx_savept_t* savept);/* in: pointer to savepoint undo number, if + partial rollback requested */ +/*********************************************************************** +Rolls back a transaction back to a named savepoint. Modifications after the +savepoint are undone but InnoDB does NOT release the corresponding locks +which are stored in memory. If a lock is 'implicit', that is, a new inserted +row holds a lock where the lock information is carried by the trx id stored in +the row, these locks are naturally released in the rollback. Savepoints which +were set after this savepoint are deleted. */ +UNIV_INTERN +ulint +trx_rollback_to_savepoint_for_mysql( +/*================================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + const char* savepoint_name, /* in: savepoint name */ + ib_int64_t* mysql_binlog_cache_pos);/* out: the MySQL binlog cache + position corresponding to this + savepoint; MySQL needs this + information to remove the + binlog entries of the queries + executed after the savepoint */ +/*********************************************************************** +Creates a named savepoint. If the transaction is not yet started, starts it. +If there is already a savepoint of the same name, this call erases that old +savepoint and replaces it with a new. Savepoints are deleted in a transaction +commit or rollback. */ +UNIV_INTERN +ulint +trx_savepoint_for_mysql( +/*====================*/ + /* out: always DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + const char* savepoint_name, /* in: savepoint name */ + ib_int64_t binlog_cache_pos); /* in: MySQL binlog cache + position corresponding to this + connection at the time of the + savepoint */ + +/*********************************************************************** +Releases a named savepoint. Savepoints which +were set after this savepoint are deleted. */ +UNIV_INTERN +ulint +trx_release_savepoint_for_mysql( +/*============================*/ + /* out: if no savepoint + of the name found then + DB_NO_SAVEPOINT, + otherwise DB_SUCCESS */ + trx_t* trx, /* in: transaction handle */ + const char* savepoint_name); /* in: savepoint name */ + +/*********************************************************************** +Frees a single savepoint struct. */ +UNIV_INTERN +void +trx_roll_savepoint_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep); /* in: savepoint to free */ + +/*********************************************************************** +Frees savepoint structs starting from savep, if savep == NULL then +free all savepoints. */ + +void +trx_roll_savepoints_free( +/*=====================*/ + trx_t* trx, /* in: transaction handle */ + trx_named_savept_t* savep); /* in: free all savepoints > this one; + if this is NULL, free all savepoints + of trx */ + +/* A cell in the array used during a rollback and a purge */ +struct trx_undo_inf_struct{ + dulint trx_no; /* transaction number: not defined during + a rollback */ + dulint undo_no; /* undo number of an undo record */ + ibool in_use; /* TRUE if the cell is in use */ +}; + +/* During a rollback and a purge, undo numbers of undo records currently being +processed are stored in this array */ + +struct trx_undo_arr_struct{ + ulint n_cells; /* number of cells in the array */ + ulint n_used; /* number of cells currently in use */ + trx_undo_inf_t* infos; /* the array of undo infos */ + mem_heap_t* heap; /* memory heap from which allocated */ +}; + +/* Rollback command node in a query graph */ +struct roll_node_struct{ + que_common_t common; /* node type: QUE_NODE_ROLLBACK */ + ulint state; /* node execution state */ + ibool partial;/* TRUE if we want a partial rollback */ + trx_savept_t savept; /* savepoint to which to roll back, in the + case of a partial rollback */ +}; + +/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ +struct trx_named_savept_struct{ + char* name; /* savepoint name */ + trx_savept_t savept; /* the undo number corresponding to + the savepoint */ + ib_int64_t mysql_binlog_cache_pos; + /* the MySQL binlog cache position + corresponding to this savepoint, not + defined if the MySQL binlogging is not + enabled */ + UT_LIST_NODE_T(trx_named_savept_t) + trx_savepoints; /* the list of savepoints of a + transaction */ +}; + +/* Rollback node states */ +#define ROLL_NODE_SEND 1 +#define ROLL_NODE_WAIT 2 + +#ifndef UNIV_NONINL +#include "trx0roll.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0roll.ic b/storage/xtradb/include/trx0roll.ic new file mode 100644 index 00000000000..513b8b44847 --- /dev/null +++ b/storage/xtradb/include/trx0roll.ic @@ -0,0 +1,39 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction rollback + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +/*********************************************************************** +Returns pointer to nth element in an undo number array. */ +UNIV_INLINE +trx_undo_inf_t* +trx_undo_arr_get_nth_info( +/*======================*/ + /* out: pointer to the nth element */ + trx_undo_arr_t* arr, /* in: undo number array */ + ulint n) /* in: position */ +{ + ut_ad(arr); + ut_ad(n < arr->n_cells); + + return(arr->infos + n); +} diff --git a/storage/xtradb/include/trx0rseg.h b/storage/xtradb/include/trx0rseg.h new file mode 100644 index 00000000000..f3aa736f788 --- /dev/null +++ b/storage/xtradb/include/trx0rseg.h @@ -0,0 +1,220 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Rollback segment + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0rseg_h +#define trx0rseg_h + +#include "univ.i" +#include "trx0types.h" +#include "trx0sys.h" + +/********************************************************************** +Gets a rollback segment header. */ +UNIV_INLINE +trx_rsegf_t* +trx_rsegf_get( +/*==========*/ + /* out: rollback segment header, page + x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number of the header */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Gets a newly created rollback segment header. */ +UNIV_INLINE +trx_rsegf_t* +trx_rsegf_get_new( +/*==============*/ + /* out: rollback segment header, page + x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number of the header */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************* +Gets the file page number of the nth undo log slot. */ +UNIV_INLINE +ulint +trx_rsegf_get_nth_undo( +/*===================*/ + /* out: page number of the undo log segment */ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + ulint n, /* in: index of slot */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************* +Sets the file page number of the nth undo log slot. */ +UNIV_INLINE +void +trx_rsegf_set_nth_undo( +/*===================*/ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + ulint n, /* in: index of slot */ + ulint page_no,/* in: page number of the undo log segment */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************** +Looks for a free slot for an undo log segment. */ +UNIV_INLINE +ulint +trx_rsegf_undo_find_free( +/*=====================*/ + /* out: slot index or ULINT_UNDEFINED if not + found */ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Looks for a rollback segment, based on the rollback segment id. */ +UNIV_INTERN +trx_rseg_t* +trx_rseg_get_on_id( +/*===============*/ + /* out: rollback segment */ + ulint id); /* in: rollback segment id */ +/******************************************************************** +Creates a rollback segment header. This function is called only when +a new rollback segment is created in the database. */ +UNIV_INTERN +ulint +trx_rseg_header_create( +/*===================*/ + /* out: page number of the created segment, + FIL_NULL if fail */ + ulint space, /* in: space id */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint max_size, /* in: max size in pages */ + ulint* slot_no, /* out: rseg id == slot number in trx sys */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************* +Creates the memory copies for rollback segments and initializes the +rseg list and array in trx_sys at a database startup. */ +UNIV_INTERN +void +trx_rseg_list_and_array_init( +/*=========================*/ + trx_sysf_t* sys_header, /* in: trx system header */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************** +Creates a new rollback segment to the database. */ +UNIV_INTERN +trx_rseg_t* +trx_rseg_create( +/*============*/ + /* out: the created segment object, NULL if + fail */ + ulint space, /* in: space id */ + ulint max_size, /* in: max size in pages */ + ulint* id, /* out: rseg id */ + mtr_t* mtr); /* in: mtr */ + + +/* Real max value may be 4076 in usual. But reserve 4 slot for safety or etc... */ +#define TRX_RSEG_N_EXTRA_SLOTS (((UNIV_PAGE_SIZE - (FIL_PAGE_DATA + FIL_PAGE_DATA_END + TRX_RSEG_UNDO_SLOTS)) / TRX_RSEG_SLOT_SIZE) - 4) + +/* Number of undo log slots in a rollback segment file copy */ +#define TRX_RSEG_N_SLOTS (srv_extra_undoslots ? TRX_RSEG_N_EXTRA_SLOTS : (UNIV_PAGE_SIZE / 16)) + +/* Maximum number of transactions supported by a single rollback segment */ +#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2) + +/* The rollback segment memory object */ +struct trx_rseg_struct{ + /*--------------------------------------------------------*/ + ulint id; /* rollback segment id == the index of + its slot in the trx system file copy */ + mutex_t mutex; /* mutex protecting the fields in this + struct except id; NOTE that the latching + order must always be kernel mutex -> + rseg mutex */ + ulint space; /* space where the rollback segment is + header is placed */ + ulint zip_size;/* in: compressed page size of space + in bytes, or 0 for uncompressed spaces */ + ulint page_no;/* page number of the rollback segment + header */ + ulint max_size;/* maximum allowed size in pages */ + ulint curr_size;/* current size in pages */ + /*--------------------------------------------------------*/ + /* Fields for update undo logs */ + UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list; + /* List of update undo logs */ + UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached; + /* List of update undo log segments + cached for fast reuse */ + /*--------------------------------------------------------*/ + /* Fields for insert undo logs */ + UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list; + /* List of insert undo logs */ + UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached; + /* List of insert undo log segments + cached for fast reuse */ + /*--------------------------------------------------------*/ + ulint last_page_no; /* Page number of the last not yet + purged log header in the history list; + FIL_NULL if all list purged */ + ulint last_offset; /* Byte offset of the last not yet + purged log header */ + dulint last_trx_no; /* Transaction number of the last not + yet purged log */ + ibool last_del_marks; /* TRUE if the last not yet purged log + needs purging */ + /*--------------------------------------------------------*/ + UT_LIST_NODE_T(trx_rseg_t) rseg_list; + /* the list of the rollback segment + memory objects */ +}; + +/* Undo log segment slot in a rollback segment header */ +/*-------------------------------------------------------------*/ +#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of + an undo log segment */ +/*-------------------------------------------------------------*/ +/* Slot size */ +#define TRX_RSEG_SLOT_SIZE 4 + +/* The offset of the rollback segment header on its page */ +#define TRX_RSEG FSEG_PAGE_DATA + +/* Transaction rollback segment header */ +/*-------------------------------------------------------------*/ +#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback + segment in pages */ +#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied + by the logs in the history list */ +#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed + transactions */ +#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE) + /* Header for the file segment where + this page is placed */ +#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE) + /* Undo log segment slots */ +/*-------------------------------------------------------------*/ + +#ifndef UNIV_NONINL +#include "trx0rseg.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0rseg.ic b/storage/xtradb/include/trx0rseg.ic new file mode 100644 index 00000000000..e665a40fa8b --- /dev/null +++ b/storage/xtradb/include/trx0rseg.ic @@ -0,0 +1,146 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Rollback segment + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "srv0srv.h" + +/********************************************************************** +Gets a rollback segment header. */ +UNIV_INLINE +trx_rsegf_t* +trx_rsegf_get( +/*==========*/ + /* out: rollback segment header, page + x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number of the header */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block; + trx_rsegf_t* header; + + block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_RSEG_HEADER); + + header = TRX_RSEG + buf_block_get_frame(block); + + return(header); +} + +/********************************************************************** +Gets a newly created rollback segment header. */ +UNIV_INLINE +trx_rsegf_t* +trx_rsegf_get_new( +/*==============*/ + /* out: rollback segment header, page + x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number of the header */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block; + trx_rsegf_t* header; + + block = buf_page_get(space, zip_size, page_no, RW_X_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_RSEG_HEADER_NEW); + + header = TRX_RSEG + buf_block_get_frame(block); + + return(header); +} + +/******************************************************************* +Gets the file page number of the nth undo log slot. */ +UNIV_INLINE +ulint +trx_rsegf_get_nth_undo( +/*===================*/ + /* out: page number of the undo log segment */ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + ulint n, /* in: index of slot */ + mtr_t* mtr) /* in: mtr */ +{ + if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { + fprintf(stderr, + "InnoDB: Error: trying to get slot %lu of rseg\n", + (ulong) n); + ut_error; + } + + return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr)); +} + +/******************************************************************* +Sets the file page number of the nth undo log slot. */ +UNIV_INLINE +void +trx_rsegf_set_nth_undo( +/*===================*/ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + ulint n, /* in: index of slot */ + ulint page_no,/* in: page number of the undo log segment */ + mtr_t* mtr) /* in: mtr */ +{ + if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { + fprintf(stderr, + "InnoDB: Error: trying to set slot %lu of rseg\n", + (ulong) n); + ut_error; + } + + mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, + page_no, MLOG_4BYTES, mtr); +} + +/******************************************************************** +Looks for a free slot for an undo log segment. */ +UNIV_INLINE +ulint +trx_rsegf_undo_find_free( +/*=====================*/ + /* out: slot index or ULINT_UNDEFINED if not + found */ + trx_rsegf_t* rsegf, /* in: rollback segment header */ + mtr_t* mtr) /* in: mtr */ +{ + ulint i; + ulint page_no; + + for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { + + page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr); + + if (page_no == FIL_NULL) { + + return(i); + } + } + + return(ULINT_UNDEFINED); +} diff --git a/storage/xtradb/include/trx0sys.h b/storage/xtradb/include/trx0sys.h new file mode 100644 index 00000000000..012b34564bc --- /dev/null +++ b/storage/xtradb/include/trx0sys.h @@ -0,0 +1,555 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction system + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0sys_h +#define trx0sys_h + +#include "univ.i" + +#include "trx0types.h" +#include "mtr0mtr.h" +#include "mtr0log.h" +#include "ut0byte.h" +#include "mem0mem.h" +#include "sync0sync.h" +#include "ut0lst.h" +#include "buf0buf.h" +#include "fil0fil.h" +#include "fut0lst.h" +#include "fsp0fsp.h" +#include "read0types.h" +#include "page0types.h" + +/* In a MySQL replication slave, in crash recovery we store the master log +file name and position here. We have successfully got the updates to InnoDB +up to this position. If .._pos is -1, it means no crash recovery was needed, +or there was no master log position info inside InnoDB. */ + +extern char trx_sys_mysql_master_log_name[]; +extern ib_int64_t trx_sys_mysql_master_log_pos; + +/* If this MySQL server uses binary logging, after InnoDB has been inited +and if it has done a crash recovery, we store the binlog file name and position +here. If .._pos is -1, it means there was no binlog position info inside +InnoDB. */ + +extern char trx_sys_mysql_bin_log_name[]; +extern ib_int64_t trx_sys_mysql_bin_log_pos; + +/* The transaction system */ +extern trx_sys_t* trx_sys; + +/* Doublewrite system */ +extern trx_doublewrite_t* trx_doublewrite; +extern ibool trx_doublewrite_must_reset_space_ids; +extern ibool trx_sys_multiple_tablespace_format; + +/******************************************************************** +Creates the doublewrite buffer to a new InnoDB installation. The header of the +doublewrite buffer is placed on the trx system header page. */ +UNIV_INTERN +void +trx_sys_create_doublewrite_buf(void); +/*================================*/ +/******************************************************************** +At a database startup initializes the doublewrite buffer memory structure if +we already have a doublewrite buffer created in the data files. If we are +upgrading to an InnoDB version which supports multiple tablespaces, then this +function performs the necessary update operations. If we are in a crash +recovery, this function uses a possible doublewrite buffer to restore +half-written pages in the data files. */ +UNIV_INTERN +void +trx_sys_doublewrite_init_or_restore_pages( +/*======================================*/ + ibool restore_corrupt_pages); +/******************************************************************** +Marks the trx sys header when we have successfully upgraded to the >= 4.1.x +multiple tablespace format. */ +UNIV_INTERN +void +trx_sys_mark_upgraded_to_multiple_tablespaces(void); +/*===============================================*/ +/******************************************************************** +Determines if a page number is located inside the doublewrite buffer. */ +UNIV_INTERN +ibool +trx_doublewrite_page_inside( +/*========================*/ + /* out: TRUE if the location is inside + the two blocks of the doublewrite buffer */ + ulint page_no); /* in: page number */ +/******************************************************************* +Checks if a page address is the trx sys header page. */ +UNIV_INLINE +ibool +trx_sys_hdr_page( +/*=============*/ + /* out: TRUE if trx sys header page */ + ulint space, /* in: space */ + ulint page_no);/* in: page number */ +/********************************************************************* +Creates and initializes the central memory structures for the transaction +system. This is called when the database is started. */ +UNIV_INTERN +void +trx_sys_init_at_db_start(void); +/*==========================*/ +/********************************************************************* +Creates and initializes the transaction system at the database creation. */ +UNIV_INTERN +void +trx_sys_create(void); +/*================*/ +/********************************************************************* +Create extra rollback segments when create_new_db */ +UNIV_INTERN +void +trx_sys_create_extra_rseg( +/*======================*/ + ulint num); /* in: number of extra user rollback segments */ +/******************************************************************** +Looks for a free slot for a rollback segment in the trx system file copy. */ +UNIV_INTERN +ulint +trx_sysf_rseg_find_free( +/*====================*/ + /* out: slot index or ULINT_UNDEFINED + if not found */ + mtr_t* mtr); /* in: mtr */ +/******************************************************************* +Gets the pointer in the nth slot of the rseg array. */ +UNIV_INLINE +trx_rseg_t* +trx_sys_get_nth_rseg( +/*=================*/ + /* out: pointer to rseg object, NULL if slot + not in use */ + trx_sys_t* sys, /* in: trx system */ + ulint n); /* in: index of slot */ +/******************************************************************* +Sets the pointer in the nth slot of the rseg array. */ +UNIV_INLINE +void +trx_sys_set_nth_rseg( +/*=================*/ + trx_sys_t* sys, /* in: trx system */ + ulint n, /* in: index of slot */ + trx_rseg_t* rseg); /* in: pointer to rseg object, NULL if slot + not in use */ +/************************************************************************** +Gets a pointer to the transaction system file copy and x-locks its page. */ +UNIV_INLINE +trx_sysf_t* +trx_sysf_get( +/*=========*/ + /* out: pointer to system file copy, page x-locked */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Gets the space of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +ulint +trx_sysf_rseg_get_space( +/*====================*/ + /* out: space id */ + trx_sysf_t* sys_header, /* in: trx sys file copy */ + ulint i, /* in: slot index == rseg id */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Gets the page number of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +ulint +trx_sysf_rseg_get_page_no( +/*======================*/ + /* out: page number, FIL_NULL + if slot unused */ + trx_sysf_t* sys_header, /* in: trx sys file copy */ + ulint i, /* in: slot index == rseg id */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Sets the space id of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +void +trx_sysf_rseg_set_space( +/*====================*/ + trx_sysf_t* sys_header, /* in: trx sys file copy */ + ulint i, /* in: slot index == rseg id */ + ulint space, /* in: space id */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Sets the page number of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +void +trx_sysf_rseg_set_page_no( +/*======================*/ + trx_sysf_t* sys_header, /* in: trx sys file copy */ + ulint i, /* in: slot index == rseg id */ + ulint page_no, /* in: page number, FIL_NULL if + the slot is reset to unused */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Allocates a new transaction id. */ +UNIV_INLINE +dulint +trx_sys_get_new_trx_id(void); +/*========================*/ + /* out: new, allocated trx id */ +/********************************************************************* +Allocates a new transaction number. */ +UNIV_INLINE +dulint +trx_sys_get_new_trx_no(void); +/*========================*/ + /* out: new, allocated trx number */ +/********************************************************************* +Writes a trx id to an index page. In case that the id size changes in +some future version, this function should be used instead of +mach_write_... */ +UNIV_INLINE +void +trx_write_trx_id( +/*=============*/ + byte* ptr, /* in: pointer to memory where written */ + dulint id); /* in: id */ +/********************************************************************* +Reads a trx id from an index page. In case that the id size changes in +some future version, this function should be used instead of +mach_read_... */ +UNIV_INLINE +dulint +trx_read_trx_id( +/*============*/ + /* out: id */ + const byte* ptr); /* in: pointer to memory from where to read */ +/******************************************************************** +Looks for the trx handle with the given id in trx_list. */ +UNIV_INLINE +trx_t* +trx_get_on_id( +/*==========*/ + /* out: the trx handle or NULL if not found */ + dulint trx_id); /* in: trx id to search for */ +/******************************************************************** +Returns the minumum trx id in trx list. This is the smallest id for which +the trx can possibly be active. (But, you must look at the trx->conc_state to +find out if the minimum trx id transaction itself is active, or already +committed.) */ +UNIV_INLINE +dulint +trx_list_get_min_trx_id(void); +/*=========================*/ + /* out: the minimum trx id, or trx_sys->max_trx_id + if the trx list is empty */ +/******************************************************************** +Checks if a transaction with the given id is active. */ +UNIV_INLINE +ibool +trx_is_active( +/*==========*/ + /* out: TRUE if active */ + dulint trx_id);/* in: trx id of the transaction */ +/******************************************************************** +Checks that trx is in the trx list. */ +UNIV_INTERN +ibool +trx_in_trx_list( +/*============*/ + /* out: TRUE if is in */ + trx_t* in_trx);/* in: trx */ +/********************************************************************* +Updates the offset information about the end of the MySQL binlog entry +which corresponds to the transaction just being committed. In a MySQL +replication slave updates the latest master binlog position up to which +replication has proceeded. */ +UNIV_INTERN +void +trx_sys_update_mysql_binlog_offset( +/*===============================*/ + const char* file_name,/* in: MySQL log file name */ + ib_int64_t offset, /* in: position in that log file */ + ulint field, /* in: offset of the MySQL log info field in + the trx sys header */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************* +Prints to stderr the MySQL binlog offset info in the trx system header if +the magic number shows it valid. */ +UNIV_INTERN +void +trx_sys_print_mysql_binlog_offset(void); +/*===================================*/ +#ifdef UNIV_HOTBACKUP +/********************************************************************* +Prints to stderr the MySQL binlog info in the system header if the +magic number shows it valid. */ +UNIV_INTERN +void +trx_sys_print_mysql_binlog_offset_from_page( +/*========================================*/ + const byte* page); /* in: buffer containing the trx + system header page, i.e., page number + TRX_SYS_PAGE_NO in the tablespace */ +#endif /* UNIV_HOTBACKUP */ +/********************************************************************* +Prints to stderr the MySQL master log offset info in the trx system header if +the magic number shows it valid. */ +UNIV_INTERN +void +trx_sys_print_mysql_master_log_pos(void); +/*====================================*/ +/********************************************************************* +Initializes the tablespace tag system. */ +UNIV_INTERN +void +trx_sys_file_format_init(void); +/*==========================*/ +/********************************************************************* +Closes the tablespace tag system. */ +UNIV_INTERN +void +trx_sys_file_format_close(void); +/*===========================*/ +/************************************************************************ +Tags the system table space with minimum format id if it has not been +tagged yet. +WARNING: This function is only called during the startup and AFTER the +redo log application during recovery has finished. */ +UNIV_INTERN +void +trx_sys_file_format_tag_init(void); +/*==============================*/ +/********************************************************************* +Get the name representation of the file format from its id. */ +UNIV_INTERN +const char* +trx_sys_file_format_id_to_name( +/*===========================*/ + /* out: pointer to the name */ + const ulint id); /* in: id of the file format */ +/********************************************************************* +Set the file format id unconditionally except if it's already the +same value. */ +UNIV_INTERN +ibool +trx_sys_file_format_max_set( +/*========================*/ + /* out: TRUE if value updated */ + ulint format_id, /* in: file format id */ + const char** name); /* out: max file format name or + NULL if not needed. */ +/********************************************************************* +Get the name representation of the file format from its id. */ +UNIV_INTERN +const char* +trx_sys_file_format_max_get(void); +/*=============================*/ + /* out: pointer to the max format name */ +/********************************************************************* +Check for the max file format tag stored on disk. */ +UNIV_INTERN +ulint +trx_sys_file_format_max_check( +/*==========================*/ + /* out: DB_SUCCESS or error code */ + ulint max_format_id); /* in: the max format id to check */ +/************************************************************************ +Update the file format tag in the system tablespace only if the given +format id is greater than the known max id. */ +UNIV_INTERN +ibool +trx_sys_file_format_max_upgrade( +/*============================*/ + /* out: TRUE if format_id was + bigger than the known max id */ + const char** name, /* out: max file format name */ + ulint format_id); /* in: file format identifier */ +/* The automatically created system rollback segment has this id */ +#define TRX_SYS_SYSTEM_RSEG_ID 0 + +/* Space id and page no where the trx system file copy resides */ +#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ +#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO + +/* The offset of the transaction system header on the page */ +#define TRX_SYS FSEG_PAGE_DATA + +/* Transaction system header */ +/*-------------------------------------------------------------*/ +#define TRX_SYS_TRX_ID_STORE 0 /* the maximum trx id or trx number + modulo TRX_SYS_TRX_ID_UPDATE_MARGIN + written to a file page by any + transaction; the assignment of + transaction ids continues from this + number rounded up by .._MARGIN plus + .._MARGIN when the database is + started */ +#define TRX_SYS_FSEG_HEADER 8 /* segment header for the tablespace + segment the trx system is created + into */ +#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE) + /* the start of the array of rollback + segment specification slots */ +/*-------------------------------------------------------------*/ + +/* Max number of rollback segments: the number of segment specification slots +in the transaction system array; rollback segment id must fit in one byte, +therefore 256; each slot is currently 8 bytes in size */ +#define TRX_SYS_N_RSEGS 256 + +#define TRX_SYS_MYSQL_LOG_NAME_LEN 512 +#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 + +#if UNIV_PAGE_SIZE < 4096 +# error "UNIV_PAGE_SIZE < 4096" +#endif +/* The offset of the MySQL replication info in the trx system header; +this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ +#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000) + +/* The offset of the MySQL binlog offset info in the trx system header */ +#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000) +#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows + if we have valid data in the + MySQL binlog info; the value + is ..._MAGIC_N if yes */ +#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset + within that file */ +#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset + within that file */ +#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */ + +/* The offset of the doublewrite buffer header on the trx system header page */ +#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200) +/*-------------------------------------------------------------*/ +#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg + containing the doublewrite + buffer */ +#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE + /* 4-byte magic number which + shows if we already have + created the doublewrite + buffer */ +#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE) + /* page number of the + first page in the first + sequence of 64 + (= FSP_EXTENT_SIZE) consecutive + pages in the doublewrite + buffer */ +#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE) + /* page number of the + first page in the second + sequence of 64 consecutive + pages in the doublewrite + buffer */ +#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3 + numbers so that if the trx + sys header is half-written + to disk, we still may be able + to recover the information */ +#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE) + /* If this is not yet set to + .._N, we must reset the + doublewrite buffer, because + starting from 4.1.x the space + id of a data page is stored to + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */ +/*-------------------------------------------------------------*/ +#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855 +#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386 + + +#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE + +/* The offset of the file format tag on the trx system header page */ +#define TRX_SYS_FILE_FORMAT_TAG (UNIV_PAGE_SIZE - 16) + +/* We use these random constants to reduce the probability of reading +garbage (from previous versions) that maps to an actual format id. We +use these as bit masks at the time of reading and writing from/to disk. */ +#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_LOW 3645922177UL +#define TRX_SYS_FILE_FORMAT_TAG_MAGIC_N_HIGH 2745987765UL + +/* Doublewrite control struct */ +struct trx_doublewrite_struct{ + mutex_t mutex; /* mutex protecting the first_free field and + write_buf */ + ulint block1; /* the page number of the first + doublewrite block (64 pages) */ + ulint block2; /* page number of the second block */ + ulint first_free; /* first free position in write_buf measured + in units of UNIV_PAGE_SIZE */ + byte* write_buf; /* write buffer used in writing to the + doublewrite buffer, aligned to an + address divisible by UNIV_PAGE_SIZE + (which is required by Windows aio) */ + byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */ + buf_page_t** + buf_block_arr; /* array to store pointers to the buffer + blocks which have been cached to write_buf */ +}; + +/* The transaction system central memory data structure; protected by the +kernel mutex */ +struct trx_sys_struct{ + dulint max_trx_id; /* The smallest number not yet + assigned as a transaction id or + transaction number */ + UT_LIST_BASE_NODE_T(trx_t) trx_list; + /* List of active and committed in + memory transactions, sorted on trx id, + biggest first */ + UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list; + /* List of transactions created + for MySQL */ + UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list; + /* List of rollback segment objects */ + trx_rseg_t* latest_rseg; /* Latest rollback segment in the + round-robin assignment of rollback + segments to transactions */ + trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS]; + /* Pointer array to rollback segments; + NULL if slot not in use */ + ulint rseg_history_len;/* Length of the TRX_RSEG_HISTORY + list (update undo logs for committed + transactions), protected by + rseg->mutex */ + UT_LIST_BASE_NODE_T(read_view_t) view_list; + /* List of read views sorted on trx no, + biggest first */ +}; + +/* When a trx id which is zero modulo this number (which must be a power of +two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system +page is updated */ +#define TRX_SYS_TRX_ID_WRITE_MARGIN 256 + +#ifndef UNIV_NONINL +#include "trx0sys.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0sys.ic b/storage/xtradb/include/trx0sys.ic new file mode 100644 index 00000000000..4437133188f --- /dev/null +++ b/storage/xtradb/include/trx0sys.ic @@ -0,0 +1,383 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction system + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "srv0srv.h" +#include "trx0trx.h" + +/* The typedef for rseg slot in the file copy */ +typedef byte trx_sysf_rseg_t; + +/* Rollback segment specification slot offsets */ +/*-------------------------------------------------------------*/ +#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment + header is placed; starting with + MySQL/InnoDB 5.1.7, this is + UNIV_UNDEFINED if the slot is unused */ +#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment + header is placed; this is FIL_NULL + if the slot is unused */ +/*-------------------------------------------------------------*/ +/* Size of a rollback segment specification slot */ +#define TRX_SYS_RSEG_SLOT_SIZE 8 + +/********************************************************************* +Writes the value of max_trx_id to the file based trx system header. */ +UNIV_INTERN +void +trx_sys_flush_max_trx_id(void); +/*==========================*/ + +/******************************************************************* +Checks if a page address is the trx sys header page. */ +UNIV_INLINE +ibool +trx_sys_hdr_page( +/*=============*/ + /* out: TRUE if trx sys header page */ + ulint space, /* in: space */ + ulint page_no)/* in: page number */ +{ + if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) { + + return(TRUE); + } + + return(FALSE); +} + +/******************************************************************* +Gets the pointer in the nth slot of the rseg array. */ +UNIV_INLINE +trx_rseg_t* +trx_sys_get_nth_rseg( +/*=================*/ + /* out: pointer to rseg object, NULL if slot + not in use */ + trx_sys_t* sys, /* in: trx system */ + ulint n) /* in: index of slot */ +{ + ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(n < TRX_SYS_N_RSEGS); + + return(sys->rseg_array[n]); +} + +/******************************************************************* +Sets the pointer in the nth slot of the rseg array. */ +UNIV_INLINE +void +trx_sys_set_nth_rseg( +/*=================*/ + trx_sys_t* sys, /* in: trx system */ + ulint n, /* in: index of slot */ + trx_rseg_t* rseg) /* in: pointer to rseg object, NULL if slot + not in use */ +{ + ut_ad(n < TRX_SYS_N_RSEGS); + + sys->rseg_array[n] = rseg; +} + +/************************************************************************** +Gets a pointer to the transaction system header and x-latches its page. */ +UNIV_INLINE +trx_sysf_t* +trx_sysf_get( +/*=========*/ + /* out: pointer to system header, page x-latched. */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block; + trx_sysf_t* header; + + ut_ad(mtr); + + block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, + RW_X_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_TRX_SYS_HEADER); + + header = TRX_SYS + buf_block_get_frame(block); + + return(header); +} + +/********************************************************************* +Gets the space of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +ulint +trx_sysf_rseg_get_space( +/*====================*/ + /* out: space id */ + trx_sysf_t* sys_header, /* in: trx sys header */ + ulint i, /* in: slot index == rseg id */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(sys_header); + ut_ad(i < TRX_SYS_N_RSEGS); + + return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS + + i * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr)); +} + +/********************************************************************* +Gets the page number of the nth rollback segment slot in the trx system +header. */ +UNIV_INLINE +ulint +trx_sysf_rseg_get_page_no( +/*======================*/ + /* out: page number, FIL_NULL + if slot unused */ + trx_sysf_t* sys_header, /* in: trx system header */ + ulint i, /* in: slot index == rseg id */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(sys_header); + ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(i < TRX_SYS_N_RSEGS); + + return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS + + i * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr)); +} + +/********************************************************************* +Sets the space id of the nth rollback segment slot in the trx system +file copy. */ +UNIV_INLINE +void +trx_sysf_rseg_set_space( +/*====================*/ + trx_sysf_t* sys_header, /* in: trx sys file copy */ + ulint i, /* in: slot index == rseg id */ + ulint space, /* in: space id */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(sys_header); + ut_ad(i < TRX_SYS_N_RSEGS); + + mlog_write_ulint(sys_header + TRX_SYS_RSEGS + + i * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_SPACE, + space, + MLOG_4BYTES, mtr); +} + +/********************************************************************* +Sets the page number of the nth rollback segment slot in the trx system +header. */ +UNIV_INLINE +void +trx_sysf_rseg_set_page_no( +/*======================*/ + trx_sysf_t* sys_header, /* in: trx sys header */ + ulint i, /* in: slot index == rseg id */ + ulint page_no, /* in: page number, FIL_NULL if the + slot is reset to unused */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mutex_own(&(kernel_mutex))); + ut_ad(sys_header); + ut_ad(i < TRX_SYS_N_RSEGS); + + mlog_write_ulint(sys_header + TRX_SYS_RSEGS + + i * TRX_SYS_RSEG_SLOT_SIZE + + TRX_SYS_RSEG_PAGE_NO, + page_no, + MLOG_4BYTES, mtr); +} + +/********************************************************************* +Writes a trx id to an index page. In case that the id size changes in +some future version, this function should be used instead of +mach_write_... */ +UNIV_INLINE +void +trx_write_trx_id( +/*=============*/ + byte* ptr, /* in: pointer to memory where written */ + dulint id) /* in: id */ +{ +#if DATA_TRX_ID_LEN != 6 +# error "DATA_TRX_ID_LEN != 6" +#endif + mach_write_to_6(ptr, id); +} + +/********************************************************************* +Reads a trx id from an index page. In case that the id size changes in +some future version, this function should be used instead of +mach_read_... */ +UNIV_INLINE +dulint +trx_read_trx_id( +/*============*/ + /* out: id */ + const byte* ptr) /* in: pointer to memory from where to read */ +{ +#if DATA_TRX_ID_LEN != 6 +# error "DATA_TRX_ID_LEN != 6" +#endif + return(mach_read_from_6(ptr)); +} + +/******************************************************************** +Looks for the trx handle with the given id in trx_list. */ +UNIV_INLINE +trx_t* +trx_get_on_id( +/*==========*/ + /* out: the trx handle or NULL if not found */ + dulint trx_id) /* in: trx id to search for */ +{ + trx_t* trx; + + ut_ad(mutex_own(&(kernel_mutex))); + + trx = UT_LIST_GET_FIRST(trx_sys->trx_list); + + while (trx != NULL) { + if (0 == ut_dulint_cmp(trx_id, trx->id)) { + + return(trx); + } + + trx = UT_LIST_GET_NEXT(trx_list, trx); + } + + return(NULL); +} + +/******************************************************************** +Returns the minumum trx id in trx list. This is the smallest id for which +the trx can possibly be active. (But, you must look at the trx->conc_state to +find out if the minimum trx id transaction itself is active, or already +committed.) */ +UNIV_INLINE +dulint +trx_list_get_min_trx_id(void) +/*=========================*/ + /* out: the minimum trx id, or trx_sys->max_trx_id + if the trx list is empty */ +{ + trx_t* trx; + + ut_ad(mutex_own(&(kernel_mutex))); + + trx = UT_LIST_GET_LAST(trx_sys->trx_list); + + if (trx == NULL) { + + return(trx_sys->max_trx_id); + } + + return(trx->id); +} + +/******************************************************************** +Checks if a transaction with the given id is active. */ +UNIV_INLINE +ibool +trx_is_active( +/*==========*/ + /* out: TRUE if active */ + dulint trx_id) /* in: trx id of the transaction */ +{ + trx_t* trx; + + ut_ad(mutex_own(&(kernel_mutex))); + + if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) { + + return(FALSE); + } + + if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { + + /* There must be corruption: we return TRUE because this + function is only called by lock_clust_rec_some_has_impl() + and row_vers_impl_x_locked_off_kernel() and they have + diagnostic prints in this case */ + + return(TRUE); + } + + trx = trx_get_on_id(trx_id); + if (trx && (trx->conc_state == TRX_ACTIVE + || trx->conc_state == TRX_PREPARED)) { + + return(TRUE); + } + + return(FALSE); +} + +/********************************************************************* +Allocates a new transaction id. */ +UNIV_INLINE +dulint +trx_sys_get_new_trx_id(void) +/*========================*/ + /* out: new, allocated trx id */ +{ + dulint id; + + ut_ad(mutex_own(&kernel_mutex)); + + /* VERY important: after the database is started, max_trx_id value is + divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if + will evaluate to TRUE when this function is first time called, + and the value for trx id will be written to disk-based header! + Thus trx id values will not overlap when the database is + repeatedly started! */ + + if (ut_dulint_get_low(trx_sys->max_trx_id) + % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) { + + trx_sys_flush_max_trx_id(); + } + + id = trx_sys->max_trx_id; + + UT_DULINT_INC(trx_sys->max_trx_id); + + return(id); +} + +/********************************************************************* +Allocates a new transaction number. */ +UNIV_INLINE +dulint +trx_sys_get_new_trx_no(void) +/*========================*/ + /* out: new, allocated trx number */ +{ + ut_ad(mutex_own(&kernel_mutex)); + + return(trx_sys_get_new_trx_id()); +} diff --git a/storage/xtradb/include/trx0trx.h b/storage/xtradb/include/trx0trx.h new file mode 100644 index 00000000000..8d5803ab353 --- /dev/null +++ b/storage/xtradb/include/trx0trx.h @@ -0,0 +1,845 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The transaction + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0trx_h +#define trx0trx_h + +#include "univ.i" +#include "trx0types.h" +#include "lock0types.h" +#include "usr0types.h" +#include "que0types.h" +#include "mem0mem.h" +#include "read0types.h" +#include "dict0types.h" +#include "trx0xa.h" +#include "ut0vec.h" + +/* Dummy session used currently in MySQL interface */ +extern sess_t* trx_dummy_sess; + +/* Number of transactions currently allocated for MySQL: protected by +the kernel mutex */ +extern ulint trx_n_mysql_transactions; + +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx); /* in: transaction struct */ +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: trx sets a new record lock on this + index */ +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index); /* in: index */ +/************************************************************************ +Releases the search latch if trx has reserved it. */ +UNIV_INTERN +void +trx_search_latch_release_if_reserved( +/*=================================*/ + trx_t* trx); /* in: transaction */ +/********************************************************************** +Set detailed error message for the transaction. */ +UNIV_INTERN +void +trx_set_detailed_error( +/*===================*/ + trx_t* trx, /* in: transaction struct */ + const char* msg); /* in: detailed error message */ +/***************************************************************** +Set detailed error message for the transaction from a file. Note that the +file is rewinded before reading from it. */ +UNIV_INTERN +void +trx_set_detailed_error_from_file( +/*=============================*/ + trx_t* trx, /* in: transaction struct */ + FILE* file); /* in: file to read message from */ +/******************************************************************** +Retrieves the error_info field from a trx. */ +UNIV_INLINE +const dict_index_t* +trx_get_error_info( +/*===============*/ + /* out: the error info */ + const trx_t* trx); /* in: trx object */ +/******************************************************************** +Creates and initializes a transaction object. */ +UNIV_INTERN +trx_t* +trx_create( +/*=======*/ + /* out, own: the transaction */ + sess_t* sess) /* in: session */ + __attribute__((nonnull)); +/************************************************************************ +Creates a transaction object for MySQL. */ +UNIV_INTERN +trx_t* +trx_allocate_for_mysql(void); +/*========================*/ + /* out, own: transaction object */ +/************************************************************************ +Creates a transaction object for background operations by the master thread. */ +UNIV_INTERN +trx_t* +trx_allocate_for_background(void); +/*=============================*/ + /* out, own: transaction object */ +/************************************************************************ +Frees a transaction object. */ +UNIV_INTERN +void +trx_free( +/*=====*/ + trx_t* trx); /* in, own: trx object */ +/************************************************************************ +Frees a transaction object for MySQL. */ +UNIV_INTERN +void +trx_free_for_mysql( +/*===============*/ + trx_t* trx); /* in, own: trx object */ +/************************************************************************ +Frees a transaction object of a background operation of the master thread. */ +UNIV_INTERN +void +trx_free_for_background( +/*====================*/ + trx_t* trx); /* in, own: trx object */ +/******************************************************************** +Creates trx objects for transactions and initializes the trx list of +trx_sys at database start. Rollback segment and undo log lists must +already exist when this function is called, because the lists of +transactions to be rolled back or cleaned up are built based on the +undo log lists. */ +UNIV_INTERN +void +trx_lists_init_at_db_start(void); +/*============================*/ +/******************************************************************** +Starts a new transaction. */ +UNIV_INTERN +ibool +trx_start( +/*======*/ + /* out: TRUE if success, FALSE if the rollback + segment could not support this many transactions */ + trx_t* trx, /* in: transaction */ + ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED + is passed, the system chooses the rollback segment + automatically in a round-robin fashion */ +/******************************************************************** +Starts a new transaction. */ +UNIV_INTERN +ibool +trx_start_low( +/*==========*/ + /* out: TRUE */ + trx_t* trx, /* in: transaction */ + ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED + is passed, the system chooses the rollback segment + automatically in a round-robin fashion */ +/***************************************************************** +Starts the transaction if it is not yet started. */ +UNIV_INLINE +void +trx_start_if_not_started( +/*=====================*/ + trx_t* trx); /* in: transaction */ +/***************************************************************** +Starts the transaction if it is not yet started. Assumes we have reserved +the kernel mutex! */ +UNIV_INLINE +void +trx_start_if_not_started_low( +/*=========================*/ + trx_t* trx); /* in: transaction */ +/******************************************************************** +Commits a transaction. */ +UNIV_INTERN +void +trx_commit_off_kernel( +/*==================*/ + trx_t* trx); /* in: transaction */ +/******************************************************************** +Cleans up a transaction at database startup. The cleanup is needed if +the transaction already got to the middle of a commit when the database +crashed, andf we cannot roll it back. */ +UNIV_INTERN +void +trx_cleanup_at_db_startup( +/*======================*/ + trx_t* trx); /* in: transaction */ +/************************************************************************** +Does the transaction commit for MySQL. */ +UNIV_INTERN +ulint +trx_commit_for_mysql( +/*=================*/ + /* out: DB_SUCCESS or error number */ + trx_t* trx); /* in: trx handle */ +/************************************************************************** +Does the transaction prepare for MySQL. */ +UNIV_INTERN +ulint +trx_prepare_for_mysql( +/*==================*/ + /* out: 0 or error number */ + trx_t* trx); /* in: trx handle */ +/************************************************************************** +This function is used to find number of prepared transactions and +their transaction objects for a recovery. */ +UNIV_INTERN +int +trx_recover_for_mysql( +/*==================*/ + /* out: number of prepared transactions */ + XID* xid_list, /* in/out: prepared transactions */ + ulint len); /* in: number of slots in xid_list */ +/*********************************************************************** +This function is used to find one X/Open XA distributed transaction +which is in the prepared state */ +UNIV_INTERN +trx_t * +trx_get_trx_by_xid( +/*===============*/ + /* out: trx or NULL */ + XID* xid); /* in: X/Open XA transaction identification */ +/************************************************************************** +If required, flushes the log to disk if we called trx_commit_for_mysql() +with trx->flush_log_later == TRUE. */ +UNIV_INTERN +ulint +trx_commit_complete_for_mysql( +/*==========================*/ + /* out: 0 or error number */ + trx_t* trx); /* in: trx handle */ +/************************************************************************** +Marks the latest SQL statement ended. */ +UNIV_INTERN +void +trx_mark_sql_stat_end( +/*==================*/ + trx_t* trx); /* in: trx handle */ +/************************************************************************ +Assigns a read view for a consistent read query. All the consistent reads +within the same transaction will get the same read view, which is created +when this function is first called for a new started transaction. */ +UNIV_INTERN +read_view_t* +trx_assign_read_view( +/*=================*/ + /* out: consistent read view */ + trx_t* trx); /* in: active transaction */ +/*************************************************************** +The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to +the TRX_QUE_RUNNING state and releases query threads which were +waiting for a lock in the wait_thrs list. */ +UNIV_INTERN +void +trx_end_lock_wait( +/*==============*/ + trx_t* trx); /* in: transaction */ +/******************************************************************** +Sends a signal to a trx object. */ +UNIV_INTERN +void +trx_sig_send( +/*=========*/ + trx_t* trx, /* in: trx handle */ + ulint type, /* in: signal type */ + ulint sender, /* in: TRX_SIG_SELF or + TRX_SIG_OTHER_SESS */ + que_thr_t* receiver_thr, /* in: query thread which wants the + reply, or NULL; if type is + TRX_SIG_END_WAIT, this must be NULL */ + trx_savept_t* savept, /* in: possible rollback savepoint, or + NULL */ + que_thr_t** next_thr); /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread; if the parameter + is NULL, it is ignored */ +/******************************************************************** +Send the reply message when a signal in the queue of the trx has +been handled. */ +UNIV_INTERN +void +trx_sig_reply( +/*==========*/ + trx_sig_t* sig, /* in: signal */ + que_thr_t** next_thr); /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +/******************************************************************** +Removes the signal object from a trx signal queue. */ +UNIV_INTERN +void +trx_sig_remove( +/*===========*/ + trx_t* trx, /* in: trx handle */ + trx_sig_t* sig); /* in, own: signal */ +/******************************************************************** +Starts handling of a trx signal. */ +UNIV_INTERN +void +trx_sig_start_handle( +/*=================*/ + trx_t* trx, /* in: trx handle */ + que_thr_t** next_thr); /* in/out: next query thread to run; + if the value which is passed in is + a pointer to a NULL pointer, then the + calling function can start running + a new query thread */ +/******************************************************************** +Ends signal handling. If the session is in the error state, and +trx->graph_before_signal_handling != NULL, returns control to the error +handling routine of the graph (currently only returns the control to the +graph root which then sends an error message to the client). */ +UNIV_INTERN +void +trx_end_signal_handling( +/*====================*/ + trx_t* trx); /* in: trx */ +/************************************************************************* +Creates a commit command node struct. */ +UNIV_INTERN +commit_node_t* +commit_node_create( +/*===============*/ + /* out, own: commit node struct */ + mem_heap_t* heap); /* in: mem heap where created */ +/*************************************************************** +Performs an execution step for a commit type node in a query graph. */ +UNIV_INTERN +que_thr_t* +trx_commit_step( +/*============*/ + /* out: query thread to run next, or NULL */ + que_thr_t* thr); /* in: query thread */ + +/************************************************************************** +Prints info about a transaction to the given file. The caller must own the +kernel mutex and must have called +innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL +or InnoDB cannot meanwhile change the info printed here. */ +UNIV_INTERN +void +trx_print( +/*======*/ + FILE* f, /* in: output stream */ + trx_t* trx, /* in: transaction */ + ulint max_query_len); /* in: max query length to print, or 0 to + use the default max length */ + +/** Type of data dictionary operation */ +enum trx_dict_op { + /** The transaction is not modifying the data dictionary. */ + TRX_DICT_OP_NONE = 0, + /** The transaction is creating a table or an index, or + dropping a table. The table must be dropped in crash + recovery. This and TRX_DICT_OP_NONE are the only possible + operation modes in crash recovery. */ + TRX_DICT_OP_TABLE = 1, + /** The transaction is creating or dropping an index in an + existing table. In crash recovery, the the data dictionary + must be locked, but the table must not be dropped. */ + TRX_DICT_OP_INDEX = 2 +}; + +/************************************************************************** +Determine if a transaction is a dictionary operation. */ +UNIV_INLINE +enum trx_dict_op +trx_get_dict_operation( +/*===================*/ + /* out: dictionary operation mode */ + const trx_t* trx) /* in: transaction */ + __attribute__((pure)); +/************************************************************************** +Flag a transaction a dictionary operation. */ +UNIV_INLINE +void +trx_set_dict_operation( +/*===================*/ + trx_t* trx, /* in/out: transaction */ + enum trx_dict_op op); /* in: operation, not + TRX_DICT_OP_NONE */ + +#ifndef UNIV_HOTBACKUP +/************************************************************************** +Determines if the currently running transaction has been interrupted. */ +UNIV_INTERN +ibool +trx_is_interrupted( +/*===============*/ + /* out: TRUE if interrupted */ + trx_t* trx); /* in: transaction */ +#else /* !UNIV_HOTBACKUP */ +#define trx_is_interrupted(trx) FALSE +#endif /* !UNIV_HOTBACKUP */ + +/*********************************************************************** +Calculates the "weight" of a transaction. The weight of one transaction +is estimated as the number of altered rows + the number of locked rows. +*/ + +#define TRX_WEIGHT(t) \ + ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks)) + +/*********************************************************************** +Compares the "weight" (or size) of two transactions. Transactions that +have edited non-transactional tables are considered heavier than ones +that have not. */ +UNIV_INTERN +int +trx_weight_cmp( +/*===========*/ + /* out: <0, 0 or >0; similar to strcmp(3) */ + const trx_t* a, /* in: the first transaction to be compared */ + const trx_t* b); /* in: the second transaction to be compared */ + +/*********************************************************************** +Retrieves transacion's id, represented as unsigned long long. */ +UNIV_INLINE +ullint +trx_get_id( +/*=======*/ + /* out: transaction's id */ + const trx_t* trx); /* in: transaction */ + +/* Maximum length of a string that can be returned by +trx_get_que_state_str(). */ +#define TRX_QUE_STATE_STR_MAX_LEN 12 /* "ROLLING BACK" */ + +/*********************************************************************** +Retrieves transaction's que state in a human readable string. The string +should not be free()'d or modified. */ +UNIV_INLINE +const char* +trx_get_que_state_str( +/*==================*/ + /* out: string in the data segment */ + const trx_t* trx); /* in: transaction */ + +/* Signal to a transaction */ +struct trx_sig_struct{ + unsigned type:3; /* signal type */ + unsigned sender:1; /* TRX_SIG_SELF or + TRX_SIG_OTHER_SESS */ + que_thr_t* receiver; /* non-NULL if the sender of the signal + wants reply after the operation induced + by the signal is completed */ + trx_savept_t savept; /* possible rollback savepoint */ + UT_LIST_NODE_T(trx_sig_t) + signals; /* queue of pending signals to the + transaction */ + UT_LIST_NODE_T(trx_sig_t) + reply_signals; /* list of signals for which the sender + transaction is waiting a reply */ +}; + +#define TRX_MAGIC_N 91118598 + +/* The transaction handle; every session has a trx object which is freed only +when the session is freed; in addition there may be session-less transactions +rolling back after a database recovery */ + +struct trx_struct{ + ulint magic_n; + /* All the next fields are protected by the kernel mutex, except the + undo logs which are protected by undo_mutex */ + const char* op_info; /* English text describing the + current operation, or an empty + string */ + unsigned is_purge:1; /* 0=user transaction, 1=purge */ + unsigned is_recovered:1; /* 0=normal transaction, + 1=recovered, must be rolled back */ + unsigned conc_state:2; /* state of the trx from the point + of view of concurrency control: + TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY, + ... */ + unsigned que_state:2; /* valid when conc_state == TRX_ACTIVE: + TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, + ... */ + unsigned isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */ + unsigned check_foreigns:1;/* normally TRUE, but if the user + wants to suppress foreign key checks, + (in table imports, for example) we + set this FALSE */ + unsigned check_unique_secondary:1; + /* normally TRUE, but if the user + wants to speed up inserts by + suppressing unique key checks + for secondary indexes when we decide + if we can use the insert buffer for + them, we set this FALSE */ + unsigned support_xa:1; /* normally we do the XA two-phase + commit steps, but by setting this to + FALSE, one can save CPU time and about + 150 bytes in the undo log size as then + we skip XA steps */ + unsigned flush_log_later:1;/* when we commit the transaction + in MySQL's binlog write, we will + flush the log to disk later in + a separate call */ + unsigned must_flush_log_later:1;/* this flag is set to TRUE in + trx_commit_off_kernel() if + flush_log_later was TRUE, and there + were modifications by the transaction; + in that case we must flush the log + in trx_commit_complete_for_mysql() */ + unsigned dict_operation:2;/**< @see enum trx_dict_op */ + unsigned duplicates:2; /* TRX_DUP_IGNORE | TRX_DUP_REPLACE */ + unsigned active_trans:2; /* 1 - if a transaction in MySQL + is active. 2 - if prepare_commit_mutex + was taken */ + unsigned has_search_latch:1; + /* TRUE if this trx has latched the + search system latch in S-mode */ + unsigned declared_to_be_inside_innodb:1; + /* this is TRUE if we have declared + this transaction in + srv_conc_enter_innodb to be inside the + InnoDB engine */ + unsigned handling_signals:1;/* this is TRUE as long as the trx + is handling signals */ + unsigned dict_operation_lock_mode:2; + /* 0, RW_S_LATCH, or RW_X_LATCH: + the latch mode trx currently holds + on dict_operation_lock */ + time_t start_time; /* time the trx object was created + or the state last time became + TRX_ACTIVE */ + dulint id; /* transaction id */ + XID xid; /* X/Open XA transaction + identification to identify a + transaction branch */ + dulint no; /* transaction serialization number == + max trx id when the transaction is + moved to COMMITTED_IN_MEMORY state */ + ib_uint64_t commit_lsn; /* lsn at the time of the commit */ + dulint table_id; /* Table to drop iff dict_operation + is TRUE, or ut_dulint_zero. */ + /*------------------------------*/ + void* mysql_thd; /* MySQL thread handle corresponding + to this trx, or NULL */ + char** mysql_query_str;/* pointer to the field in mysqld_thd + which contains the pointer to the + current SQL query string */ + const char* mysql_log_file_name; + /* if MySQL binlog is used, this field + contains a pointer to the latest file + name; this is NULL if binlog is not + used */ + ib_int64_t mysql_log_offset;/* if MySQL binlog is used, this field + contains the end offset of the binlog + entry */ + os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated + with this transaction object */ + ulint mysql_process_no;/* since in Linux, 'top' reports + process id's and not thread id's, we + store the process number too */ + /*------------------------------*/ + ulint n_mysql_tables_in_use; /* number of Innobase tables + used in the processing of the current + SQL statement in MySQL */ + ulint mysql_n_tables_locked; + /* how many tables the current SQL + statement uses, except those + in consistent read */ + ulint search_latch_timeout; + /* If we notice that someone is + waiting for our S-lock on the search + latch to be released, we wait in + row0sel.c for BTR_SEA_TIMEOUT new + searches until we try to keep + the search latch again over + calls from MySQL; this is intended + to reduce contention on the search + latch */ + /*------------------------------*/ + ulint n_tickets_to_enter_innodb; + /* this can be > 0 only when + declared_to_... is TRUE; when we come + to srv_conc_innodb_enter, if the value + here is > 0, we decrement this by 1 */ + /*------------------------------*/ + dict_index_t* new_rec_locks[2];/* these are normally NULL; if + srv_locks_unsafe_for_binlog is TRUE + or session is using READ COMMITTED + isolation level, + in a cursor search, if we set a new + record lock on an index, this is set + to point to the index; this is + used in releasing the locks under the + cursors if we are performing an UPDATE + and we determine after retrieving + the row that it does not need to be + locked; thus, these can be used to + implement a 'mini-rollback' that + releases the latest record locks */ + UT_LIST_NODE_T(trx_t) + trx_list; /* list of transactions */ + UT_LIST_NODE_T(trx_t) + mysql_trx_list; /* list of transactions created for + MySQL */ + /*------------------------------*/ + ulint error_state; /* 0 if no error, otherwise error + number; NOTE That ONLY the thread + doing the transaction is allowed to + set this field: this is NOT protected + by the kernel mutex */ + const dict_index_t*error_info; /* if the error number indicates a + duplicate key error, a pointer to + the problematic index is stored here */ + ulint error_key_num; /* if the index creation fails to a + duplicate key error, a mysql key + number of that index is stored here */ + sess_t* sess; /* session of the trx, NULL if none */ + que_t* graph; /* query currently run in the session, + or NULL if none; NOTE that the query + belongs to the session, and it can + survive over a transaction commit, if + it is a stored procedure with a COMMIT + WORK statement, for instance */ + ulint n_active_thrs; /* number of active query threads */ + que_t* graph_before_signal_handling; + /* value of graph when signal handling + for this trx started: this is used to + return control to the original query + graph for error processing */ + trx_sig_t sig; /* one signal object can be allocated + in this space, avoiding mem_alloc */ + UT_LIST_BASE_NODE_T(trx_sig_t) + signals; /* queue of processed or pending + signals to the trx */ + UT_LIST_BASE_NODE_T(trx_sig_t) + reply_signals; /* list of signals sent by the query + threads of this trx for which a thread + is waiting for a reply; if this trx is + killed, the reply requests in the list + must be canceled */ + /*------------------------------*/ + lock_t* wait_lock; /* if trx execution state is + TRX_QUE_LOCK_WAIT, this points to + the lock request, otherwise this is + NULL */ + ibool was_chosen_as_deadlock_victim; + /* when the transaction decides to wait + for a lock, it sets this to FALSE; + if another transaction chooses this + transaction as a victim in deadlock + resolution, it sets this to TRUE */ + time_t wait_started; /* lock wait started at this time */ + UT_LIST_BASE_NODE_T(que_thr_t) + wait_thrs; /* query threads belonging to this + trx that are in the QUE_THR_LOCK_WAIT + state */ + ulint deadlock_mark; /* a mark field used in deadlock + checking algorithm. This must be + in its own machine word, because + it can be changed by other + threads while holding kernel_mutex. */ + /*------------------------------*/ + mem_heap_t* lock_heap; /* memory heap for the locks of the + transaction */ + UT_LIST_BASE_NODE_T(lock_t) + trx_locks; /* locks reserved by the transaction */ + /*------------------------------*/ + mem_heap_t* global_read_view_heap; + /* memory heap for the global read + view */ + read_view_t* global_read_view; + /* consistent read view associated + to a transaction or NULL */ + read_view_t* read_view; /* consistent read view used in the + transaction or NULL, this read view + if defined can be normal read view + associated to a transaction (i.e. + same as global_read_view) or read view + associated to a cursor */ + /*------------------------------*/ + UT_LIST_BASE_NODE_T(trx_named_savept_t) + trx_savepoints; /* savepoints set with SAVEPOINT ..., + oldest first */ + /*------------------------------*/ + mutex_t undo_mutex; /* mutex protecting the fields in this + section (down to undo_no_arr), EXCEPT + last_sql_stat_start, which can be + accessed only when we know that there + cannot be any activity in the undo + logs! */ + dulint undo_no; /* next undo log record number to + assign; since the undo log is + private for a transaction, this + is a simple ascending sequence + with no gaps; thus it represents + the number of modified/inserted + rows in a transaction */ + trx_savept_t last_sql_stat_start; + /* undo_no when the last sql statement + was started: in case of an error, trx + is rolled back down to this undo + number; see note at undo_mutex! */ + trx_rseg_t* rseg; /* rollback segment assigned to the + transaction, or NULL if not assigned + yet */ + trx_undo_t* insert_undo; /* pointer to the insert undo log, or + NULL if no inserts performed yet */ + trx_undo_t* update_undo; /* pointer to the update undo log, or + NULL if no update performed yet */ + dulint roll_limit; /* least undo number to undo during + a rollback */ + ulint pages_undone; /* number of undo log pages undone + since the last undo log truncation */ + trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log + records which are currently processed + by a rollback operation */ + /*------------------------------*/ + ulint n_autoinc_rows; /* no. of AUTO-INC rows required for + an SQL statement. This is useful for + multi-row INSERTs */ + ib_vector_t* autoinc_locks; /* AUTOINC locks held by this + transaction. Note that these are + also in the lock list trx_locks. This + vector needs to be freed explicitly + when the trx_t instance is desrtoyed */ + /*------------------------------*/ + char detailed_error[256]; /* detailed error message for last + error, or empty. */ +}; + +#define TRX_MAX_N_THREADS 32 /* maximum number of + concurrent threads running a + single operation of a + transaction, e.g., a parallel + query */ +/* Transaction concurrency states (trx->conc_state) */ +#define TRX_NOT_STARTED 0 +#define TRX_ACTIVE 1 +#define TRX_COMMITTED_IN_MEMORY 2 +#define TRX_PREPARED 3 /* Support for 2PC/XA */ + +/* Transaction execution states when trx->conc_state == TRX_ACTIVE */ +#define TRX_QUE_RUNNING 0 /* transaction is running */ +#define TRX_QUE_LOCK_WAIT 1 /* transaction is waiting for a lock */ +#define TRX_QUE_ROLLING_BACK 2 /* transaction is rolling back */ +#define TRX_QUE_COMMITTING 3 /* transaction is committing */ + +/* Transaction isolation levels (trx->isolation_level) */ +#define TRX_ISO_READ_UNCOMMITTED 0 /* dirty read: non-locking + SELECTs are performed so that + we do not look at a possible + earlier version of a record; + thus they are not 'consistent' + reads under this isolation + level; otherwise like level + 2 */ + +#define TRX_ISO_READ_COMMITTED 1 /* somewhat Oracle-like + isolation, except that in + range UPDATE and DELETE we + must block phantom rows + with next-key locks; + SELECT ... FOR UPDATE and ... + LOCK IN SHARE MODE only lock + the index records, NOT the + gaps before them, and thus + allow free inserting; + each consistent read reads its + own snapshot */ + +#define TRX_ISO_REPEATABLE_READ 2 /* this is the default; + all consistent reads in the + same trx read the same + snapshot; + full next-key locking used + in locking reads to block + insertions into gaps */ + +#define TRX_ISO_SERIALIZABLE 3 /* all plain SELECTs are + converted to LOCK IN SHARE + MODE reads */ + +/* Treatment of duplicate values (trx->duplicates; for example, in inserts). +Multiple flags can be combined with bitwise OR. */ +#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */ +#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */ + + +/* Types of a trx signal */ +#define TRX_SIG_NO_SIGNAL 0 +#define TRX_SIG_TOTAL_ROLLBACK 1 +#define TRX_SIG_ROLLBACK_TO_SAVEPT 2 +#define TRX_SIG_COMMIT 3 +#define TRX_SIG_ERROR_OCCURRED 4 +#define TRX_SIG_BREAK_EXECUTION 5 + +/* Sender types of a signal */ +#define TRX_SIG_SELF 0 /* sent by the session itself, or + by an error occurring within this + session */ +#define TRX_SIG_OTHER_SESS 1 /* sent by another session (which + must hold rights to this) */ + +/* Commit command node in a query graph */ +struct commit_node_struct{ + que_common_t common; /* node type: QUE_NODE_COMMIT */ + ulint state; /* node execution state */ +}; + +/* Commit node states */ +#define COMMIT_NODE_SEND 1 +#define COMMIT_NODE_WAIT 2 + + +#ifndef UNIV_NONINL +#include "trx0trx.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0trx.ic b/storage/xtradb/include/trx0trx.ic new file mode 100644 index 00000000000..6da89f002fe --- /dev/null +++ b/storage/xtradb/include/trx0trx.ic @@ -0,0 +1,221 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +The transaction + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +/***************************************************************** +Starts the transaction if it is not yet started. */ +UNIV_INLINE +void +trx_start_if_not_started( +/*=====================*/ + trx_t* trx) /* in: transaction */ +{ + ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); + + if (trx->conc_state == TRX_NOT_STARTED) { + + trx_start(trx, ULINT_UNDEFINED); + } +} + +/***************************************************************** +Starts the transaction if it is not yet started. Assumes we have reserved +the kernel mutex! */ +UNIV_INLINE +void +trx_start_if_not_started_low( +/*=========================*/ + trx_t* trx) /* in: transaction */ +{ + ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); + + if (trx->conc_state == TRX_NOT_STARTED) { + + trx_start_low(trx, ULINT_UNDEFINED); + } +} + +/***************************************************************** +Resets the new record lock info in a transaction struct. */ +UNIV_INLINE +void +trx_reset_new_rec_lock_info( +/*========================*/ + trx_t* trx) /* in: transaction struct */ +{ + trx->new_rec_locks[0] = NULL; + trx->new_rec_locks[1] = NULL; +} + +/***************************************************************** +Registers that we have set a new record lock on an index. We only have space +to store 2 indexes! If this is called to store more than 2 indexes after +trx_reset_new_rec_lock_info(), then this function does nothing. */ +UNIV_INLINE +void +trx_register_new_rec_lock( +/*======================*/ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: trx sets a new record lock on this + index */ +{ + if (trx->new_rec_locks[0] == NULL) { + trx->new_rec_locks[0] = index; + + return; + } + + if (trx->new_rec_locks[0] == index) { + + return; + } + + if (trx->new_rec_locks[1] != NULL) { + + return; + } + + trx->new_rec_locks[1] = index; +} + +/***************************************************************** +Checks if trx has set a new record lock on an index. */ +UNIV_INLINE +ibool +trx_new_rec_locks_contain( +/*======================*/ + /* out: TRUE if trx has set a new record lock + on index */ + trx_t* trx, /* in: transaction struct */ + dict_index_t* index) /* in: index */ +{ + return(trx->new_rec_locks[0] == index + || trx->new_rec_locks[1] == index); +} + +/******************************************************************** +Retrieves the error_info field from a trx. */ +UNIV_INLINE +const dict_index_t* +trx_get_error_info( +/*===============*/ + /* out: the error info */ + const trx_t* trx) /* in: trx object */ +{ + return(trx->error_info); +} + +/*********************************************************************** +Retrieves transacion's id, represented as unsigned long long. */ +UNIV_INLINE +ullint +trx_get_id( +/*=======*/ + /* out: transaction's id */ + const trx_t* trx) /* in: transaction */ +{ + return((ullint)ut_conv_dulint_to_longlong(trx->id)); +} + +/*********************************************************************** +Retrieves transaction's que state in a human readable string. The string +should not be free()'d or modified. */ +UNIV_INLINE +const char* +trx_get_que_state_str( +/*==================*/ + /* out: string in the data segment */ + const trx_t* trx) /* in: transaction */ +{ + /* be sure to adjust TRX_QUE_STATE_STR_MAX_LEN if you change this */ + switch (trx->que_state) { + case TRX_QUE_RUNNING: + return("RUNNING"); + case TRX_QUE_LOCK_WAIT: + return("LOCK WAIT"); + case TRX_QUE_ROLLING_BACK: + return("ROLLING BACK"); + case TRX_QUE_COMMITTING: + return("COMMITTING"); + default: + return("UNKNOWN"); + } +} + +/************************************************************************** +Determine if a transaction is a dictionary operation. */ +UNIV_INLINE +enum trx_dict_op +trx_get_dict_operation( +/*===================*/ + /* out: dictionary operation mode */ + const trx_t* trx) /* in: transaction */ +{ + enum trx_dict_op op = (enum trx_dict_op) trx->dict_operation; + +#ifdef UNIV_DEBUG + switch (op) { + case TRX_DICT_OP_NONE: + case TRX_DICT_OP_TABLE: + case TRX_DICT_OP_INDEX: + return(op); + } + ut_error; +#endif /* UNIV_DEBUG */ + return((enum trx_dict_op) UNIV_EXPECT(op, TRX_DICT_OP_NONE)); +} +/************************************************************************** +Flag a transaction a dictionary operation. */ +UNIV_INLINE +void +trx_set_dict_operation( +/*===================*/ + trx_t* trx, /* in/out: transaction */ + enum trx_dict_op op) /* in: operation, not + TRX_DICT_OP_NONE */ +{ +#ifdef UNIV_DEBUG + enum trx_dict_op old_op = trx_get_dict_operation(trx); + + switch (op) { + case TRX_DICT_OP_NONE: + ut_error; + break; + case TRX_DICT_OP_TABLE: + switch (old_op) { + case TRX_DICT_OP_NONE: + case TRX_DICT_OP_INDEX: + case TRX_DICT_OP_TABLE: + goto ok; + } + ut_error; + break; + case TRX_DICT_OP_INDEX: + ut_ad(old_op == TRX_DICT_OP_NONE); + break; + } +ok: +#endif /* UNIV_DEBUG */ + + trx->dict_operation = op; +} diff --git a/storage/xtradb/include/trx0types.h b/storage/xtradb/include/trx0types.h new file mode 100644 index 00000000000..896f4e8c0a2 --- /dev/null +++ b/storage/xtradb/include/trx0types.h @@ -0,0 +1,78 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction system global type definitions + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0types_h +#define trx0types_h + +#include "ut0byte.h" + +/* prepare trx_t::id for being printed via printf(3) */ +#define TRX_ID_PREP_PRINTF(id) (ullint) ut_conv_dulint_to_longlong(id) + +/* printf(3) format used for printing TRX_ID_PRINTF_PREP() */ +#define TRX_ID_FMT "%llX" + +/* maximum length that a formatted trx_t::id could take, not including +the terminating '\0'. */ +#define TRX_ID_MAX_LEN 17 + +/* Memory objects */ +typedef struct trx_struct trx_t; +typedef struct trx_sys_struct trx_sys_t; +typedef struct trx_doublewrite_struct trx_doublewrite_t; +typedef struct trx_sig_struct trx_sig_t; +typedef struct trx_rseg_struct trx_rseg_t; +typedef struct trx_undo_struct trx_undo_t; +typedef struct trx_undo_arr_struct trx_undo_arr_t; +typedef struct trx_undo_inf_struct trx_undo_inf_t; +typedef struct trx_purge_struct trx_purge_t; +typedef struct roll_node_struct roll_node_t; +typedef struct commit_node_struct commit_node_t; +typedef struct trx_named_savept_struct trx_named_savept_t; + +/* Rollback contexts */ +enum trx_rb_ctx { + RB_NONE = 0, /* no rollback */ + RB_NORMAL, /* normal rollback */ + RB_RECOVERY, /* rolling back an incomplete transaction, + in crash recovery */ +}; + +/* Transaction savepoint */ +typedef struct trx_savept_struct trx_savept_t; +struct trx_savept_struct{ + dulint least_undo_no; /* least undo number to undo */ +}; + +/* File objects */ +typedef byte trx_sysf_t; +typedef byte trx_rsegf_t; +typedef byte trx_usegf_t; +typedef byte trx_ulogf_t; +typedef byte trx_upagef_t; + +/* Undo log record */ +typedef byte trx_undo_rec_t; + +#endif diff --git a/storage/xtradb/include/trx0undo.h b/storage/xtradb/include/trx0undo.h new file mode 100644 index 00000000000..7f7408931da --- /dev/null +++ b/storage/xtradb/include/trx0undo.h @@ -0,0 +1,526 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction undo log + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#ifndef trx0undo_h +#define trx0undo_h + +#include "univ.i" +#include "trx0types.h" +#include "mtr0mtr.h" +#include "trx0sys.h" +#include "page0types.h" +#include "trx0xa.h" + +/*************************************************************************** +Builds a roll pointer dulint. */ +UNIV_INLINE +dulint +trx_undo_build_roll_ptr( +/*====================*/ + /* out: roll pointer */ + ibool is_insert, /* in: TRUE if insert undo log */ + ulint rseg_id, /* in: rollback segment id */ + ulint page_no, /* in: page number */ + ulint offset); /* in: offset of the undo entry within page */ +/*************************************************************************** +Decodes a roll pointer dulint. */ +UNIV_INLINE +void +trx_undo_decode_roll_ptr( +/*=====================*/ + dulint roll_ptr, /* in: roll pointer */ + ibool* is_insert, /* out: TRUE if insert undo log */ + ulint* rseg_id, /* out: rollback segment id */ + ulint* page_no, /* out: page number */ + ulint* offset); /* out: offset of the undo entry within page */ +/*************************************************************************** +Returns TRUE if the roll pointer is of the insert type. */ +UNIV_INLINE +ibool +trx_undo_roll_ptr_is_insert( +/*========================*/ + /* out: TRUE if insert undo log */ + dulint roll_ptr); /* in: roll pointer */ +/********************************************************************* +Writes a roll ptr to an index page. In case that the size changes in +some future version, this function should be used instead of +mach_write_... */ +UNIV_INLINE +void +trx_write_roll_ptr( +/*===============*/ + byte* ptr, /* in: pointer to memory where written */ + dulint roll_ptr); /* in: roll ptr */ +/********************************************************************* +Reads a roll ptr from an index page. In case that the roll ptr size +changes in some future version, this function should be used instead of +mach_read_... */ +UNIV_INLINE +dulint +trx_read_roll_ptr( +/*==============*/ + /* out: roll ptr */ + const byte* ptr); /* in: pointer to memory from where to read */ +/********************************************************************** +Gets an undo log page and x-latches it. */ +UNIV_INLINE +page_t* +trx_undo_page_get( +/*==============*/ + /* out: pointer to page x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Gets an undo log page and s-latches it. */ +UNIV_INLINE +page_t* +trx_undo_page_get_s_latched( +/*========================*/ + /* out: pointer to page s-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Returns the previous undo record on the page in the specified log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_prev_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + trx_undo_rec_t* rec, /* in: undo log record */ + ulint page_no,/* in: undo log header page number */ + ulint offset);/* in: undo log header offset on page */ +/********************************************************************** +Returns the next undo log record on the page in the specified log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_next_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + trx_undo_rec_t* rec, /* in: undo log record */ + ulint page_no,/* in: undo log header page number */ + ulint offset);/* in: undo log header offset on page */ +/********************************************************************** +Returns the last undo record on the page in the specified undo log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_last_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset); /* in: undo log header offset on page */ +/********************************************************************** +Returns the first undo record on the page in the specified undo log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_first_rec( +/*========================*/ + /* out: pointer to record, NULL if none */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset);/* in: undo log header offset on page */ +/*************************************************************************** +Gets the previous record in an undo log. */ +UNIV_INTERN +trx_undo_rec_t* +trx_undo_get_prev_rec( +/*==================*/ + /* out: undo log record, the page s-latched, + NULL if none */ + trx_undo_rec_t* rec, /* in: undo record */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************************** +Gets the next record in an undo log. */ +UNIV_INTERN +trx_undo_rec_t* +trx_undo_get_next_rec( +/*==================*/ + /* out: undo log record, the page s-latched, + NULL if none */ + trx_undo_rec_t* rec, /* in: undo record */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + mtr_t* mtr); /* in: mtr */ +/*************************************************************************** +Gets the first record in an undo log. */ +UNIV_INTERN +trx_undo_rec_t* +trx_undo_get_first_rec( +/*===================*/ + /* out: undo log record, the page latched, NULL if + none */ + ulint space, /* in: undo log header space */ + ulint zip_size,/* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no,/* in: undo log header page number */ + ulint offset, /* in: undo log header offset on page */ + ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */ + mtr_t* mtr); /* in: mtr */ +/************************************************************************ +Tries to add a page to the undo log segment where the undo log is placed. */ +UNIV_INTERN +ulint +trx_undo_add_page( +/*==============*/ + /* out: page number if success, else + FIL_NULL */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory object */ + mtr_t* mtr); /* in: mtr which does not have a latch to any + undo log page; the caller must have reserved + the rollback segment mutex */ +/*************************************************************************** +Truncates an undo log from the end. This function is used during a rollback +to free space from an undo log. */ +UNIV_INTERN +void +trx_undo_truncate_end( +/*==================*/ + trx_t* trx, /* in: transaction whose undo log it is */ + trx_undo_t* undo, /* in: undo log */ + dulint limit); /* in: all undo records with undo number + >= this value should be truncated */ +/*************************************************************************** +Truncates an undo log from the start. This function is used during a purge +operation. */ +UNIV_INTERN +void +trx_undo_truncate_start( +/*====================*/ + trx_rseg_t* rseg, /* in: rollback segment */ + ulint space, /* in: space id of the log */ + ulint hdr_page_no, /* in: header page number */ + ulint hdr_offset, /* in: header offset on the page */ + dulint limit); /* in: all undo pages with undo numbers < + this value should be truncated; NOTE that + the function only frees whole pages; the + header page is not freed, but emptied, if + all the records there are < limit */ +/************************************************************************ +Initializes the undo log lists for a rollback segment memory copy. +This function is only called when the database is started or a new +rollback segment created. */ +UNIV_INTERN +ulint +trx_undo_lists_init( +/*================*/ + /* out: the combined size of undo log segments + in pages */ + trx_rseg_t* rseg); /* in: rollback segment memory object */ +/************************************************************************** +Assigns an undo log for a transaction. A new undo log is created or a cached +undo log reused. */ +UNIV_INTERN +ulint +trx_undo_assign_undo( +/*=================*/ + /* out: DB_SUCCESS if undo log assign + successful, possible error codes are: + DB_TOO_MANY_CONCURRENT_TRXS + DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/ + trx_t* trx, /* in: transaction */ + ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ +/********************************************************************** +Sets the state of the undo log segment at a transaction finish. */ +UNIV_INTERN +page_t* +trx_undo_set_state_at_finish( +/*=========================*/ + /* out: undo log segment header page, + x-latched */ + trx_rseg_t* rseg, /* in: rollback segment memory object */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Sets the state of the undo log segment at a transaction prepare. */ +UNIV_INTERN +page_t* +trx_undo_set_state_at_prepare( +/*==========================*/ + /* out: undo log segment header page, + x-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr); /* in: mtr */ + +/************************************************************************** +Adds the update undo log header as the first in the history list, and +frees the memory object, or puts it to the list of cached update undo log +segments. */ +UNIV_INTERN +void +trx_undo_update_cleanup( +/*====================*/ + trx_t* trx, /* in: trx owning the update undo log */ + page_t* undo_page, /* in: update undo log header page, + x-latched */ + mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Frees or caches an insert undo log after a transaction commit or rollback. +Knowledge of inserts is not needed after a commit or rollback, therefore +the data can be discarded. */ +UNIV_INTERN +void +trx_undo_insert_cleanup( +/*====================*/ + trx_t* trx); /* in: transaction handle */ +/*************************************************************** +Parses the redo log entry of an undo log page initialization. */ +UNIV_INTERN +byte* +trx_undo_parse_page_init( +/*=====================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +/*************************************************************** +Parses the redo log entry of an undo log page header create or reuse. */ +UNIV_INTERN +byte* +trx_undo_parse_page_header( +/*=======================*/ + /* out: end of log record or NULL */ + ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ +/*************************************************************** +Parses the redo log entry of an undo log page header discard. */ +UNIV_INTERN +byte* +trx_undo_parse_discard_latest( +/*==========================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ + +/* Types of an undo log segment */ +#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ +#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates + and delete markings: in short, + modifys (the name 'UPDATE' is a + historical relic) */ +/* States of an undo log segment */ +#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active + transaction */ +#define TRX_UNDO_CACHED 2 /* cached for quick reuse */ +#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */ +#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be + reused: it can be freed in purge when + all undo data in it is removed */ +#define TRX_UNDO_PREPARED 5 /* contains an undo log of an + prepared transaction */ + +/* Transaction undo log memory object; this is protected by the undo_mutex +in the corresponding transaction object */ + +struct trx_undo_struct{ + /*-----------------------------*/ + ulint id; /* undo log slot number within the + rollback segment */ + ulint type; /* TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ + ulint state; /* state of the corresponding undo log + segment */ + ibool del_marks; /* relevant only in an update undo log: + this is TRUE if the transaction may + have delete marked records, because of + a delete of a row or an update of an + indexed field; purge is then + necessary; also TRUE if the transaction + has updated an externally stored + field */ + dulint trx_id; /* id of the trx assigned to the undo + log */ + XID xid; /* X/Open XA transaction + identification */ + ibool dict_operation; /* TRUE if a dict operation trx */ + dulint table_id; /* if a dict operation, then the table + id */ + trx_rseg_t* rseg; /* rseg where the undo log belongs */ + /*-----------------------------*/ + ulint space; /* space id where the undo log + placed */ + ulint zip_size; /* in: compressed page size of space + in bytes, or 0 for uncompressed */ + ulint hdr_page_no; /* page number of the header page in + the undo log */ + ulint hdr_offset; /* header offset of the undo log on the + page */ + ulint last_page_no; /* page number of the last page in the + undo log; this may differ from + top_page_no during a rollback */ + ulint size; /* current size in pages */ + /*-----------------------------*/ + ulint empty; /* TRUE if the stack of undo log + records is currently empty */ + ulint top_page_no; /* page number where the latest undo + log record was catenated; during + rollback the page from which the latest + undo record was chosen */ + ulint top_offset; /* offset of the latest undo record, + i.e., the topmost element in the undo + log if we think of it as a stack */ + dulint top_undo_no; /* undo number of the latest record */ + buf_block_t* guess_block; /* guess for the buffer block where + the top page might reside */ + /*-----------------------------*/ + UT_LIST_NODE_T(trx_undo_t) undo_list; + /* undo log objects in the rollback + segment are chained into lists */ +}; + +/* The offset of the undo log page header on pages of the undo log */ +#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA +/*-------------------------------------------------------------*/ +/* Transaction undo log page header offsets */ +#define TRX_UNDO_PAGE_TYPE 0 /* TRX_UNDO_INSERT or + TRX_UNDO_UPDATE */ +#define TRX_UNDO_PAGE_START 2 /* Byte offset where the undo log + records for the LATEST transaction + start on this page (remember that + in an update undo log, the first page + can contain several undo logs) */ +#define TRX_UNDO_PAGE_FREE 4 /* On each page of the undo log this + field contains the byte offset of the + first free byte on the page */ +#define TRX_UNDO_PAGE_NODE 6 /* The file list node in the chain + of undo log pages */ +/*-------------------------------------------------------------*/ +#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE) + +/* An update undo segment with just one page can be reused if it has +< this number bytes used; we must leave space at least for one new undo +log header on the page */ + +#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4) + +/* An update undo log segment may contain several undo logs on its first page +if the undo logs took so little space that the segment could be cached and +reused. All the undo log headers are then on the first page, and the last one +owns the undo log records on subsequent pages if the segment is bigger than +one page. If an undo log is stored in a segment, then on the first page it is +allowed to have zero undo records, but if the segment extends to several +pages, then all the rest of the pages must contain at least one undo log +record. */ + +/* The offset of the undo log segment header on the first page of the undo +log segment */ + +#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE) +/*-------------------------------------------------------------*/ +#define TRX_UNDO_STATE 0 /* TRX_UNDO_ACTIVE, ... */ +#define TRX_UNDO_LAST_LOG 2 /* Offset of the last undo log header + on the segment header page, 0 if + none */ +#define TRX_UNDO_FSEG_HEADER 4 /* Header for the file segment which + the undo log segment occupies */ +#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE) + /* Base node for the list of pages in + the undo log segment; defined only on + the undo log segment's first page */ +/*-------------------------------------------------------------*/ +/* Size of the undo log segment header */ +#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE) + + +/* The undo log header. There can be several undo log headers on the first +page of an update undo log segment. */ +/*-------------------------------------------------------------*/ +#define TRX_UNDO_TRX_ID 0 /* Transaction id */ +#define TRX_UNDO_TRX_NO 8 /* Transaction number of the + transaction; defined only if the log + is in a history list */ +#define TRX_UNDO_DEL_MARKS 16 /* Defined only in an update undo + log: TRUE if the transaction may have + done delete markings of records, and + thus purge is necessary */ +#define TRX_UNDO_LOG_START 18 /* Offset of the first undo log record + of this log on the header page; purge + may remove undo log record from the + log start, and therefore this is not + necessarily the same as this log + header end offset */ +#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes + X/Open XA transaction identification + XID */ +#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table + create, index create, or drop + transaction: in recovery + the transaction cannot be rolled back + in the usual way: a 'rollback' rather + means dropping the created or dropped + table, if it still exists */ +#define TRX_UNDO_TABLE_ID 22 /* Id of the table if the preceding + field is TRUE */ +#define TRX_UNDO_NEXT_LOG 30 /* Offset of the next undo log header + on this page, 0 if none */ +#define TRX_UNDO_PREV_LOG 32 /* Offset of the previous undo log + header on this page, 0 if none */ +#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history + list, the file list node is here */ +/*-------------------------------------------------------------*/ +#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE) + +/* Note: the writing of the undo log old header is coded by a log record +MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the +header is logged separately. In this sense, the XID is not really a member +of the undo log header. TODO: do not append the XID to the log header if XA +is not needed by the user. The XID wastes about 150 bytes of space in every +undo log. In the history list we may have millions of undo logs, which means +quite a large overhead. */ + +/* X/Open XA Transaction Identification (XID) */ + +#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE) +#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4) +#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4) +#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) +/*--------------------------------------------------------------*/ +#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE) + /* Total size of the header with the XA XID */ + +#ifndef UNIV_NONINL +#include "trx0undo.ic" +#endif + +#endif diff --git a/storage/xtradb/include/trx0undo.ic b/storage/xtradb/include/trx0undo.ic new file mode 100644 index 00000000000..0bd8b79414b --- /dev/null +++ b/storage/xtradb/include/trx0undo.ic @@ -0,0 +1,344 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Transaction undo log + +Created 3/26/1996 Heikki Tuuri +*******************************************************/ + +#include "data0type.h" +#include "page0page.h" + +/*************************************************************************** +Builds a roll pointer dulint. */ +UNIV_INLINE +dulint +trx_undo_build_roll_ptr( +/*====================*/ + /* out: roll pointer */ + ibool is_insert, /* in: TRUE if insert undo log */ + ulint rseg_id, /* in: rollback segment id */ + ulint page_no, /* in: page number */ + ulint offset) /* in: offset of the undo entry within page */ +{ +#if DATA_ROLL_PTR_LEN != 7 +# error "DATA_ROLL_PTR_LEN != 7" +#endif + ut_ad(rseg_id < 128); + + return(ut_dulint_create(is_insert * 128 * 256 * 256 + + rseg_id * 256 * 256 + + (page_no / 256) / 256, + (page_no % (256 * 256)) * 256 * 256 + + offset)); +} + +/*************************************************************************** +Decodes a roll pointer dulint. */ +UNIV_INLINE +void +trx_undo_decode_roll_ptr( +/*=====================*/ + dulint roll_ptr, /* in: roll pointer */ + ibool* is_insert, /* out: TRUE if insert undo log */ + ulint* rseg_id, /* out: rollback segment id */ + ulint* page_no, /* out: page number */ + ulint* offset) /* out: offset of the undo entry within page */ +{ + ulint low; + ulint high; +#if DATA_ROLL_PTR_LEN != 7 +# error "DATA_ROLL_PTR_LEN != 7" +#endif +#if TRUE != 1 +# error "TRUE != 1" +#endif + high = ut_dulint_get_high(roll_ptr); + low = ut_dulint_get_low(roll_ptr); + + *offset = low % (256 * 256); + + *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */ + *rseg_id = (high / (256 * 256)) % 128; + + *page_no = (high % (256 * 256)) * 256 * 256 + + (low / 256) / 256; +} + +/*************************************************************************** +Returns TRUE if the roll pointer is of the insert type. */ +UNIV_INLINE +ibool +trx_undo_roll_ptr_is_insert( +/*========================*/ + /* out: TRUE if insert undo log */ + dulint roll_ptr) /* in: roll pointer */ +{ + ulint high; +#if DATA_ROLL_PTR_LEN != 7 +# error "DATA_ROLL_PTR_LEN != 7" +#endif +#if TRUE != 1 +# error "TRUE != 1" +#endif + high = ut_dulint_get_high(roll_ptr); + + return(high / (256 * 256 * 128)); +} + +/********************************************************************* +Writes a roll ptr to an index page. In case that the size changes in +some future version, this function should be used instead of +mach_write_... */ +UNIV_INLINE +void +trx_write_roll_ptr( +/*===============*/ + byte* ptr, /* in: pointer to memory where written */ + dulint roll_ptr) /* in: roll ptr */ +{ +#if DATA_ROLL_PTR_LEN != 7 +# error "DATA_ROLL_PTR_LEN != 7" +#endif + mach_write_to_7(ptr, roll_ptr); +} + +/********************************************************************* +Reads a roll ptr from an index page. In case that the roll ptr size +changes in some future version, this function should be used instead of +mach_read_... */ +UNIV_INLINE +dulint +trx_read_roll_ptr( +/*==============*/ + /* out: roll ptr */ + const byte* ptr) /* in: pointer to memory from where to read */ +{ +#if DATA_ROLL_PTR_LEN != 7 +# error "DATA_ROLL_PTR_LEN != 7" +#endif + return(mach_read_from_7(ptr)); +} + +/********************************************************************** +Gets an undo log page and x-latches it. */ +UNIV_INLINE +page_t* +trx_undo_page_get( +/*==============*/ + /* out: pointer to page x-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block = buf_page_get(space, zip_size, page_no, + RW_X_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); + + return(buf_block_get_frame(block)); +} + +/********************************************************************** +Gets an undo log page and s-latches it. */ +UNIV_INLINE +page_t* +trx_undo_page_get_s_latched( +/*========================*/ + /* out: pointer to page s-latched */ + ulint space, /* in: space where placed */ + ulint zip_size, /* in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /* in: page number */ + mtr_t* mtr) /* in: mtr */ +{ + buf_block_t* block = buf_page_get(space, zip_size, page_no, + RW_S_LATCH, mtr); + buf_block_dbg_add_level(block, SYNC_TRX_UNDO_PAGE); + + return(buf_block_get_frame(block)); +} + +/********************************************************************** +Returns the start offset of the undo log records of the specified undo +log on the page. */ +UNIV_INLINE +ulint +trx_undo_page_get_start( +/*====================*/ + /* out: start offset */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + ulint start; + + if (page_no == page_get_page_no(undo_page)) { + + start = mach_read_from_2(offset + undo_page + + TRX_UNDO_LOG_START); + } else { + start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE; + } + + return(start); +} + +/********************************************************************** +Returns the end offset of the undo log records of the specified undo +log on the page. */ +UNIV_INLINE +ulint +trx_undo_page_get_end( +/*==================*/ + /* out: end offset */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + trx_ulogf_t* log_hdr; + ulint end; + + if (page_no == page_get_page_no(undo_page)) { + + log_hdr = undo_page + offset; + + end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); + + if (end == 0) { + end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + } + } else { + end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + + TRX_UNDO_PAGE_FREE); + } + + return(end); +} + +/********************************************************************** +Returns the previous undo record on the page in the specified log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_prev_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + trx_undo_rec_t* rec, /* in: undo log record */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + page_t* undo_page; + ulint start; + + undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE); + + start = trx_undo_page_get_start(undo_page, page_no, offset); + + if (start + undo_page == rec) { + + return(NULL); + } + + return(undo_page + mach_read_from_2(rec - 2)); +} + +/********************************************************************** +Returns the next undo log record on the page in the specified log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_next_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + trx_undo_rec_t* rec, /* in: undo log record */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + page_t* undo_page; + ulint end; + ulint next; + + undo_page = (page_t*) ut_align_down(rec, UNIV_PAGE_SIZE); + + end = trx_undo_page_get_end(undo_page, page_no, offset); + + next = mach_read_from_2(rec); + + if (next == end) { + + return(NULL); + } + + return(undo_page + next); +} + +/********************************************************************** +Returns the last undo record on the page in the specified undo log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_last_rec( +/*=======================*/ + /* out: pointer to record, NULL if none */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + ulint start; + ulint end; + + start = trx_undo_page_get_start(undo_page, page_no, offset); + end = trx_undo_page_get_end(undo_page, page_no, offset); + + if (start == end) { + + return(NULL); + } + + return(undo_page + mach_read_from_2(undo_page + end - 2)); +} + +/********************************************************************** +Returns the first undo record on the page in the specified undo log, or +NULL if none exists. */ +UNIV_INLINE +trx_undo_rec_t* +trx_undo_page_get_first_rec( +/*========================*/ + /* out: pointer to record, NULL if none */ + page_t* undo_page,/* in: undo log page */ + ulint page_no,/* in: undo log header page number */ + ulint offset) /* in: undo log header offset on page */ +{ + ulint start; + ulint end; + + start = trx_undo_page_get_start(undo_page, page_no, offset); + end = trx_undo_page_get_end(undo_page, page_no, offset); + + if (start == end) { + + return(NULL); + } + + return(undo_page + start); +} diff --git a/storage/xtradb/include/trx0xa.h b/storage/xtradb/include/trx0xa.h new file mode 100644 index 00000000000..0e040b8d8e5 --- /dev/null +++ b/storage/xtradb/include/trx0xa.h @@ -0,0 +1,61 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/* + * Start of xa.h header + * + * Define a symbol to prevent multiple inclusions of this header file + */ +#ifndef XA_H +#define XA_H + +/* + * Transaction branch identification: XID and NULLXID: + */ +#ifndef XIDDATASIZE + +#define XIDDATASIZE 128 /* size in bytes */ +#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ +#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ + +struct xid_t { + long formatID; /* format identifier; -1 + means that the XID is null */ + long gtrid_length; /* value from 1 through 64 */ + long bqual_length; /* value from 1 through 64 */ + char data[XIDDATASIZE]; +}; +typedef struct xid_t XID; +#endif +#define XA_OK 0 /* normal execution */ +#define XAER_ASYNC -2 /* asynchronous operation already + outstanding */ +#define XAER_RMERR -3 /* a resource manager error occurred in + the transaction branch */ +#define XAER_NOTA -4 /* the XID is not valid */ +#define XAER_INVAL -5 /* invalid arguments were given */ +#define XAER_PROTO -6 /* routine invoked in an improper + context */ +#define XAER_RMFAIL -7 /* resource manager unavailable */ +#define XAER_DUPID -8 /* the XID already exists */ +#define XAER_OUTSIDE -9 /* resource manager doing work outside + transaction */ +#endif /* ifndef XA_H */ +/* + * End of xa.h header + */ diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i new file mode 100644 index 00000000000..122d53c0055 --- /dev/null +++ b/storage/xtradb/include/univ.i @@ -0,0 +1,466 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 2008, Google Inc. + +Portions of this file contain modifications contributed and copyrighted by +Google, Inc. Those modifications are gratefully acknowledged and are described +briefly in the InnoDB documentation. The contributions by Google are +incorporated with their permission, and subject to the conditions contained in +the file COPYING.Google. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*************************************************************************** +Version control for database, common definitions, and include files + +Created 1/20/1994 Heikki Tuuri +****************************************************************************/ + +#ifndef univ_i +#define univ_i + +#define INNODB_VERSION_MAJOR 1 +#define INNODB_VERSION_MINOR 0 +#define INNODB_VERSION_BUGFIX 3 +#define PERCONA_INNODB_VERSION 3 + +/* The following is the InnoDB version as shown in +SELECT plugin_version FROM information_schema.plugins; +calculated in in make_version_string() in sql/sql_show.cc like this: +"version >> 8" . "version & 0xff" +because the version is shown with only one dot, we skip the last +component, i.e. we show M.N.P as M.N */ +#define INNODB_VERSION_SHORT \ + (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) + +/* auxiliary macros to help creating the version as string */ +#define __INNODB_VERSION(a, b, c, d) (#a "." #b "." #c "-" #d) +#define _INNODB_VERSION(a, b, c, d) __INNODB_VERSION(a, b, c, d) + +#define INNODB_VERSION_STR \ + _INNODB_VERSION(INNODB_VERSION_MAJOR, \ + INNODB_VERSION_MINOR, \ + INNODB_VERSION_BUGFIX, \ + PERCONA_INNODB_VERSION) + +#ifdef MYSQL_DYNAMIC_PLUGIN +/* In the dynamic plugin, redefine some externally visible symbols +in order not to conflict with the symbols of a builtin InnoDB. */ + +/* Rename all C++ classes that contain virtual functions, because we +have not figured out how to apply the visibility=hidden attribute to +the virtual method table (vtable) in GCC 3. */ +# define ha_innobase ha_innodb +#endif /* MYSQL_DYNAMIC_PLUGIN */ + +#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) +# undef __WIN__ +# define __WIN__ + +# include <windows.h> + +# if !defined(WIN64) && !defined(_WIN64) +# define UNIV_CAN_USE_X86_ASSEMBLER +# endif + +# ifdef _NT_ +# define __NT__ +# endif + +#else +/* The defines used with MySQL */ + +/* Include two header files from MySQL to make the Unix flavor used +in compiling more Posix-compatible. These headers also define __WIN__ +if we are compiling on Windows. */ + +# include <my_global.h> +# include <my_pthread.h> + +/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ +# include <sys/stat.h> +# if !defined(__NETWARE__) && !defined(__WIN__) +# include <sys/mman.h> /* mmap() for os0proc.c */ +# endif + +# undef PACKAGE +# undef VERSION + +/* Include the header file generated by GNU autoconf */ +# ifndef __WIN__ +# include "config.h" +# endif + +# ifdef HAVE_SCHED_H +# include <sched.h> +# endif + +/* When compiling for Itanium IA64, undefine the flag below to prevent use +of the 32-bit x86 assembler in mutex operations. */ + +# if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64) +# define UNIV_CAN_USE_X86_ASSEMBLER +# endif + +/* For InnoDB rw_locks to work with atomics we need the thread_id +to be no more than machine word wide. The following enables using +atomics for InnoDB rw_locks where these conditions are met. */ +#ifdef HAVE_GCC_ATOMIC_BUILTINS +/* if HAVE_ATOMIC_PTHREAD_T is defined at this point that means that +the code from plug.in has defined it and we do not need to include +ut0auxconf.h which would either define HAVE_ATOMIC_PTHREAD_T or will +be empty */ +# ifndef HAVE_ATOMIC_PTHREAD_T +# include "ut0auxconf.h" +# endif /* HAVE_ATOMIC_PTHREAD_T */ +/* now HAVE_ATOMIC_PTHREAD_T is eventually defined either by plug.in or +from Makefile.in->ut0auxconf.h */ +# ifdef HAVE_ATOMIC_PTHREAD_T +# define INNODB_RW_LOCKS_USE_ATOMICS +# endif /* HAVE_ATOMIC_PTHREAD_T */ +#endif /* HAVE_GCC_ATOMIC_BUILTINS */ + +/* We only try to do explicit inlining of functions with gcc and +Microsoft Visual C++ */ + +# if !defined(__GNUC__) +# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */ +# define UNIV_MUST_NOT_INLINE +# endif + +# ifdef HAVE_PREAD +# define HAVE_PWRITE +# endif + +#endif /* #if (defined(WIN32) || ... */ + +/* DEBUG VERSION CONTROL + ===================== */ + +/* The following flag will make InnoDB to initialize +all memory it allocates to zero. It hides Purify +warnings about reading unallocated memory unless +memory is read outside the allocated blocks. */ +/* +#define UNIV_INIT_MEM_TO_ZERO +*/ + +/* When this macro is defined then additional test functions will be +compiled. These functions live at the end of each relevant source file +and have "test_" prefix. These functions are not called from anywhere in +the code, they can be called from gdb after +innobase_start_or_create_for_mysql() has executed using the call +command. Not tested on Windows. */ +/* +#define UNIV_COMPILE_TEST_FUNCS +*/ + +#if 0 +#define UNIV_DEBUG_VALGRIND /* Enable extra + Valgrind instrumentation */ +#define UNIV_DEBUG_PRINT /* Enable the compilation of + some debug print functions */ +#define UNIV_AHI_DEBUG /* Enable adaptive hash index + debugging without UNIV_DEBUG */ +#define UNIV_BUF_DEBUG /* Enable buffer pool + debugging without UNIV_DEBUG */ +#define UNIV_DEBUG /* Enable ut_ad() assertions + and disable UNIV_INLINE */ +#define UNIV_DEBUG_FILE_ACCESSES /* Debug .ibd file access + (field file_page_was_freed + in buf_page_t) */ +#define UNIV_LRU_DEBUG /* debug the buffer pool LRU */ +#define UNIV_HASH_DEBUG /* debug HASH_ macros */ +#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ +#define UNIV_MEM_DEBUG /* detect memory leaks etc */ +#define UNIV_IBUF_DEBUG /* debug the insert buffer */ +#define UNIV_IBUF_COUNT_DEBUG /* debug the insert buffer; +this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES, +and the insert buffer must be empty when the database is started */ +#define UNIV_SYNC_DEBUG /* debug mutex and latch +operations (very slow); also UNIV_DEBUG must be defined */ +#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */ +#define UNIV_SYNC_PERF_STAT /* operation counts for + rw-locks and mutexes */ +#define UNIV_SEARCH_PERF_STAT /* statistics for the + adaptive hash index */ +#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output + in sync0sync.c */ +#define UNIV_BTR_PRINT /* enable functions for + printing B-trees */ +#define UNIV_ZIP_DEBUG /* extensive consistency checks + for compressed pages */ +#define UNIV_ZIP_COPY /* call page_zip_copy_recs() + more often */ +#endif + +#define UNIV_BTR_DEBUG /* check B-tree links */ +#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ + +#ifdef HAVE_purify +/* The following sets all new allocated memory to zero before use: +this can be used to eliminate unnecessary Purify warnings, but note that +it also masks many bugs Purify could detect. For detailed Purify analysis it +is best to remove the define below and look through the warnings one +by one. */ +#define UNIV_SET_MEM_TO_ZERO +#endif + +/* +#define UNIV_SQL_DEBUG +#define UNIV_LOG_DEBUG +*/ + /* the above option prevents forcing of log to disk + at a buffer page write: it should be tested with this + option off; also some ibuf tests are suppressed */ +/* +#define UNIV_BASIC_LOG_DEBUG +*/ + /* the above option enables basic recovery debugging: + new allocated file pages are reset */ + +/* Linkage specifier for non-static InnoDB symbols (variables and functions) +that are only referenced from within InnoDB, not from MySQL */ +#ifdef __WIN__ +# define UNIV_INTERN +#else +# define UNIV_INTERN __attribute__((visibility ("hidden"))) +#endif + +#if (!defined(UNIV_DEBUG) && !defined(UNIV_MUST_NOT_INLINE)) +/* Definition for inline version */ + +#ifdef __WIN__ +#define UNIV_INLINE __inline +#else +#define UNIV_INLINE static __inline__ +#endif + +#else +/* If we want to compile a noninlined version we use the following macro +definitions: */ + +#define UNIV_NONINL +#define UNIV_INLINE UNIV_INTERN + +#endif /* UNIV_DEBUG */ + +#ifdef _WIN32 +#define UNIV_WORD_SIZE 4 +#elif defined(_WIN64) +#define UNIV_WORD_SIZE 8 +#else +/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */ +#define UNIV_WORD_SIZE SIZEOF_LONG +#endif + +/* The following alignment is used in memory allocations in memory heap +management to ensure correct alignment for doubles etc. */ +#define UNIV_MEM_ALIGNMENT 8 + +/* The following alignment is used in aligning lints etc. */ +#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE + +/* + DATABASE VERSION CONTROL + ======================== +*/ + +/* The 2-logarithm of UNIV_PAGE_SIZE: */ +#define UNIV_PAGE_SIZE_SHIFT 14 +/* The universal page size of the database */ +#define UNIV_PAGE_SIZE (1 << UNIV_PAGE_SIZE_SHIFT) + +/* Maximum number of parallel threads in a parallelized operation */ +#define UNIV_MAX_PARALLELISM 32 + +/* + UNIVERSAL TYPE DEFINITIONS + ========================== +*/ + +/* Note that inside MySQL 'byte' is defined as char on Linux! */ +#define byte unsigned char + +/* Define an unsigned integer type that is exactly 32 bits. */ + +#if SIZEOF_INT == 4 +typedef unsigned int ib_uint32_t; +#elif SIZEOF_LONG == 4 +typedef unsigned long ib_uint32_t; +#else +#error "Neither int or long is 4 bytes" +#endif + +/* Another basic type we use is unsigned long integer which should be equal to +the word size of the machine, that is on a 32-bit platform 32 bits, and on a +64-bit platform 64 bits. We also give the printf format for the type as a +macro ULINTPF. */ + +#ifdef _WIN64 +typedef unsigned __int64 ulint; +#define ULINTPF "%I64u" +typedef __int64 lint; +#else +typedef unsigned long int ulint; +#define ULINTPF "%lu" +typedef long int lint; +#endif + +#ifdef __WIN__ +typedef __int64 ib_int64_t; +typedef unsigned __int64 ib_uint64_t; +#else +/* Note: longlong and ulonglong come from MySQL headers. */ +typedef longlong ib_int64_t; +typedef ulonglong ib_uint64_t; +#endif + +typedef unsigned long long int ullint; + +#ifndef __WIN__ +#if SIZEOF_LONG != SIZEOF_VOIDP +#error "Error: InnoDB's ulint must be of the same size as void*" +#endif +#endif + +/* The 'undefined' value for a ulint */ +#define ULINT_UNDEFINED ((ulint)(-1)) + +/* The undefined 32-bit unsigned integer */ +#define ULINT32_UNDEFINED 0xFFFFFFFF + +/* Maximum value for a ulint */ +#define ULINT_MAX ((ulint)(-2)) + +/* Maximum value for ib_uint64_t */ +#define IB_ULONGLONG_MAX ((ib_uint64_t) (~0ULL)) + +/* This 'ibool' type is used within Innobase. Remember that different included +headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ +#define ibool ulint + +#ifndef TRUE + +#define TRUE 1 +#define FALSE 0 + +#endif + +/* The following number as the length of a logical field means that the field +has the SQL NULL as its value. NOTE that because we assume that the length +of a field is a 32-bit integer when we store it, for example, to an undo log +on disk, we must have also this number fit in 32 bits, also in 64-bit +computers! */ + +#define UNIV_SQL_NULL ULINT32_UNDEFINED + +/* Lengths which are not UNIV_SQL_NULL, but bigger than the following +number indicate that a field contains a reference to an externally +stored part of the field in the tablespace. The length field then +contains the sum of the following flag and the locally stored len. */ + +#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE) + +/* Some macros to improve branch prediction and reduce cache misses */ +#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) +/* Tell the compiler that 'expr' probably evaluates to 'constant'. */ +# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant) +/* Tell the compiler that a pointer is likely to be NULL */ +# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0) +/* Minimize cache-miss latency by moving data at addr into a cache before +it is read. */ +# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3) +/* Minimize cache-miss latency by moving data at addr into a cache before +it is read or written. */ +# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) +#else +/* Dummy versions of the macros */ +# define UNIV_EXPECT(expr,value) (expr) +# define UNIV_LIKELY_NULL(expr) (expr) +# define UNIV_PREFETCH_R(addr) ((void) 0) +# define UNIV_PREFETCH_RW(addr) ((void) 0) +#endif +/* Tell the compiler that cond is likely to hold */ +#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE) +/* Tell the compiler that cond is unlikely to hold */ +#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE) + +/* Compile-time constant of the given array's size. */ +#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0])) + +/* The return type from a thread's start function differs between Unix and +Windows, so define a typedef for it and a macro to use at the end of such +functions. */ + +#ifdef __WIN__ +typedef ulint os_thread_ret_t; +#define OS_THREAD_DUMMY_RETURN return(0) +#else +typedef void* os_thread_ret_t; +#define OS_THREAD_DUMMY_RETURN return(NULL) +#endif + +#include <stdio.h> +#include "ut0dbg.h" +#include "ut0ut.h" +#include "db0err.h" +#ifdef UNIV_DEBUG_VALGRIND +# include <valgrind/memcheck.h> +# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size) +# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) +# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size) +# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) +# define UNIV_MEM_DESC(addr, size, b) VALGRIND_CREATE_BLOCK(addr, size, b) +# define UNIV_MEM_UNDESC(b) VALGRIND_DISCARD(b) +# define UNIV_MEM_ASSERT_RW(addr, size) do { \ + const void* _p = (const void*) (ulint) \ + VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \ + if (UNIV_LIKELY_NULL(_p)) \ + fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \ + __FILE__, __LINE__, \ + (const void*) (addr), (unsigned) (size), (long) \ + (((const char*) _p) - ((const char*) (addr)))); \ + } while (0) +# define UNIV_MEM_ASSERT_W(addr, size) do { \ + const void* _p = (const void*) (ulint) \ + VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \ + if (UNIV_LIKELY_NULL(_p)) \ + fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \ + __FILE__, __LINE__, \ + (const void*) (addr), (unsigned) (size), (long) \ + (((const char*) _p) - ((const char*) (addr)))); \ + } while (0) +#else +# define UNIV_MEM_VALID(addr, size) do {} while(0) +# define UNIV_MEM_INVALID(addr, size) do {} while(0) +# define UNIV_MEM_FREE(addr, size) do {} while(0) +# define UNIV_MEM_ALLOC(addr, size) do {} while(0) +# define UNIV_MEM_DESC(addr, size, b) do {} while(0) +# define UNIV_MEM_UNDESC(b) do {} while(0) +# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0) +# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0) +#endif +#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \ + UNIV_MEM_ASSERT_W(addr, size); \ + UNIV_MEM_FREE(addr, size); \ +} while (0) +#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \ + UNIV_MEM_ASSERT_W(addr, size); \ + UNIV_MEM_ALLOC(addr, size); \ +} while (0) + +#endif diff --git a/storage/xtradb/include/usr0sess.h b/storage/xtradb/include/usr0sess.h new file mode 100644 index 00000000000..08c6c70066f --- /dev/null +++ b/storage/xtradb/include/usr0sess.h @@ -0,0 +1,77 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Sessions + +Created 6/25/1996 Heikki Tuuri +*******************************************************/ + +#ifndef usr0sess_h +#define usr0sess_h + +#include "univ.i" +#include "ut0byte.h" +#include "trx0types.h" +#include "srv0srv.h" +#include "trx0types.h" +#include "usr0types.h" +#include "que0types.h" +#include "data0data.h" +#include "rem0rec.h" + +/************************************************************************* +Opens a session. */ +UNIV_INTERN +sess_t* +sess_open(void); +/*============*/ + /* out, own: session object */ +/************************************************************************* +Closes a session, freeing the memory occupied by it, if it is in a state +where it should be closed. */ +UNIV_INTERN +ibool +sess_try_close( +/*===========*/ + /* out: TRUE if closed */ + sess_t* sess); /* in, own: session object */ + +/* The session handle. All fields are protected by the kernel mutex */ +struct sess_struct{ + ulint state; /* state of the session */ + trx_t* trx; /* transaction object permanently + assigned for the session: the + transaction instance designated by the + trx id changes, but the memory + structure is preserved */ + UT_LIST_BASE_NODE_T(que_t) + graphs; /* query graphs belonging to this + session */ +}; + +/* Session states */ +#define SESS_ACTIVE 1 +#define SESS_ERROR 2 /* session contains an error message + which has not yet been communicated + to the client */ +#ifndef UNIV_NONINL +#include "usr0sess.ic" +#endif + +#endif diff --git a/storage/xtradb/include/usr0sess.ic b/storage/xtradb/include/usr0sess.ic new file mode 100644 index 00000000000..5eefed382da --- /dev/null +++ b/storage/xtradb/include/usr0sess.ic @@ -0,0 +1,23 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Sessions + +Created 6/25/1996 Heikki Tuuri +*******************************************************/ diff --git a/storage/xtradb/include/usr0types.h b/storage/xtradb/include/usr0types.h new file mode 100644 index 00000000000..7f7d12f7bf5 --- /dev/null +++ b/storage/xtradb/include/usr0types.h @@ -0,0 +1,30 @@ +/***************************************************************************** + +Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************** +Users and sessions global types + +Created 6/25/1996 Heikki Tuuri +*******************************************************/ + +#ifndef usr0types_h +#define usr0types_h + +typedef struct sess_struct sess_t; + +#endif diff --git a/storage/xtradb/include/ut0auxconf.h b/storage/xtradb/include/ut0auxconf.h new file mode 100644 index 00000000000..fb5fae9b399 --- /dev/null +++ b/storage/xtradb/include/ut0auxconf.h @@ -0,0 +1,19 @@ +/* Do not remove this file even though it is empty. +This file is included in univ.i and will cause compilation failure +if not present. +A custom check has been added in the generated +storage/innobase/Makefile.in that is shipped with with the InnoDB Plugin +source archive. This check tries to compile a test program and if +successful then adds "#define HAVE_ATOMIC_PTHREAD_T" to this file. +This is a hack that has been developed in order to check for pthread_t +atomicity without the need to regenerate the ./configure script that is +distributed in the MySQL 5.1 official source archives. +If by any chance Makefile.in and ./configure are regenerated and thus +the hack from Makefile.in wiped away then the "real" check from plug.in +will take over. +*/ +/* This is temprary fix for http://bugs.mysql.com/43740 */ +/* force to enable */ +#ifdef HAVE_GCC_ATOMIC_BUILTINS +#define HAVE_ATOMIC_PTHREAD_T +#endif diff --git a/storage/xtradb/include/ut0byte.h b/storage/xtradb/include/ut0byte.h new file mode 100644 index 00000000000..24aac1678b3 --- /dev/null +++ b/storage/xtradb/include/ut0byte.h @@ -0,0 +1,268 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Utilities for byte operations + +Created 1/20/1994 Heikki Tuuri +***********************************************************************/ + +#ifndef ut0byte_h +#define ut0byte_h + + +#include "univ.i" + +/* Type definition for a 64-bit unsigned integer, which works also +in 32-bit machines. NOTE! Access the fields only with the accessor +functions. This definition appears here only for the compiler to +know the size of a dulint. */ + +typedef struct dulint_struct dulint; +struct dulint_struct{ + ulint high; /* most significant 32 bits */ + ulint low; /* least significant 32 bits */ +}; + +/* Zero value for a dulint */ +extern const dulint ut_dulint_zero; + +/* Maximum value for a dulint */ +extern const dulint ut_dulint_max; + +/*********************************************************** +Creates a 64-bit dulint out of two ulints. */ +UNIV_INLINE +dulint +ut_dulint_create( +/*=============*/ + /* out: created dulint */ + ulint high, /* in: high-order 32 bits */ + ulint low); /* in: low-order 32 bits */ +/*********************************************************** +Gets the high-order 32 bits of a dulint. */ +UNIV_INLINE +ulint +ut_dulint_get_high( +/*===============*/ + /* out: 32 bits in ulint */ + dulint d); /* in: dulint */ +/*********************************************************** +Gets the low-order 32 bits of a dulint. */ +UNIV_INLINE +ulint +ut_dulint_get_low( +/*==============*/ + /* out: 32 bits in ulint */ + dulint d); /* in: dulint */ +/*********************************************************** +Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit +integer type. */ +UNIV_INLINE +ib_int64_t +ut_conv_dulint_to_longlong( +/*=======================*/ + /* out: value in ib_int64_t type */ + dulint d); /* in: dulint */ +/*********************************************************** +Tests if a dulint is zero. */ +UNIV_INLINE +ibool +ut_dulint_is_zero( +/*==============*/ + /* out: TRUE if zero */ + dulint a); /* in: dulint */ +/*********************************************************** +Compares two dulints. */ +UNIV_INLINE +int +ut_dulint_cmp( +/*==========*/ + /* out: -1 if a < b, 0 if a == b, + 1 if a > b */ + dulint a, /* in: dulint */ + dulint b); /* in: dulint */ +/*********************************************************** +Calculates the max of two dulints. */ +UNIV_INLINE +dulint +ut_dulint_get_max( +/*==============*/ + /* out: max(a, b) */ + dulint a, /* in: dulint */ + dulint b); /* in: dulint */ +/*********************************************************** +Calculates the min of two dulints. */ +UNIV_INLINE +dulint +ut_dulint_get_min( +/*==============*/ + /* out: min(a, b) */ + dulint a, /* in: dulint */ + dulint b); /* in: dulint */ +/*********************************************************** +Adds a ulint to a dulint. */ +UNIV_INLINE +dulint +ut_dulint_add( +/*==========*/ + /* out: sum a + b */ + dulint a, /* in: dulint */ + ulint b); /* in: ulint */ +/*********************************************************** +Subtracts a ulint from a dulint. */ +UNIV_INLINE +dulint +ut_dulint_subtract( +/*===============*/ + /* out: a - b */ + dulint a, /* in: dulint */ + ulint b); /* in: ulint, b <= a */ +/*********************************************************** +Subtracts a dulint from another. NOTE that the difference must be positive +and smaller that 4G. */ +UNIV_INLINE +ulint +ut_dulint_minus( +/*============*/ + /* out: a - b */ + dulint a, /* in: dulint; NOTE a must be >= b and at most + 2 to power 32 - 1 greater */ + dulint b); /* in: dulint */ +/************************************************************ +Rounds a dulint downward to a multiple of a power of 2. */ +UNIV_INLINE +dulint +ut_dulint_align_down( +/*=================*/ + /* out: rounded value */ + dulint n, /* in: number to be rounded */ + ulint align_no); /* in: align by this number which must be a + power of 2 */ +/************************************************************ +Rounds a dulint upward to a multiple of a power of 2. */ +UNIV_INLINE +dulint +ut_dulint_align_up( +/*===============*/ + /* out: rounded value */ + dulint n, /* in: number to be rounded */ + ulint align_no); /* in: align by this number which must be a + power of 2 */ +/************************************************************ +Rounds a dulint downward to a multiple of a power of 2. */ +UNIV_INLINE +ib_uint64_t +ut_uint64_align_down( +/*=================*/ + /* out: rounded value */ + ib_uint64_t n, /* in: number to be rounded */ + ulint align_no); /* in: align by this number + which must be a power of 2 */ +/************************************************************ +Rounds ib_uint64_t upward to a multiple of a power of 2. */ +UNIV_INLINE +ib_uint64_t +ut_uint64_align_up( +/*===============*/ + /* out: rounded value */ + ib_uint64_t n, /* in: number to be rounded */ + ulint align_no); /* in: align by this number + which must be a power of 2 */ +/*********************************************************** +Increments a dulint variable by 1. */ +#define UT_DULINT_INC(D)\ +{\ + if ((D).low == 0xFFFFFFFFUL) {\ + (D).high = (D).high + 1;\ + (D).low = 0;\ + } else {\ + (D).low = (D).low + 1;\ + }\ +} +/*********************************************************** +Tests if two dulints are equal. */ +#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\ + && ((D1).high == (D2).high)) +#ifdef notdefined +/**************************************************************** +Sort function for dulint arrays. */ +UNIV_INTERN +void +ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high); +/*===============================================================*/ +#endif /* notdefined */ + +/************************************************************* +The following function rounds up a pointer to the nearest aligned address. */ +UNIV_INLINE +void* +ut_align( +/*=====*/ + /* out: aligned pointer */ + void* ptr, /* in: pointer */ + ulint align_no); /* in: align by this number */ +/************************************************************* +The following function rounds down a pointer to the nearest +aligned address. */ +UNIV_INLINE +void* +ut_align_down( +/*==========*/ + /* out: aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*============*/ + /* out: distance from aligned + pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); +/********************************************************************* +Gets the nth bit of a ulint. */ +UNIV_INLINE +ibool +ut_bit_get_nth( +/*===========*/ + /* out: TRUE if nth bit is 1; 0th bit is defined to + be the least significant */ + ulint a, /* in: ulint */ + ulint n); /* in: nth bit requested */ +/********************************************************************* +Sets the nth bit of a ulint. */ +UNIV_INLINE +ulint +ut_bit_set_nth( +/*===========*/ + /* out: the ulint with the bit set as requested */ + ulint a, /* in: ulint */ + ulint n, /* in: nth bit requested */ + ibool val); /* in: value for the bit to set */ + +#ifndef UNIV_NONINL +#include "ut0byte.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ut0byte.ic b/storage/xtradb/include/ut0byte.ic new file mode 100644 index 00000000000..021a3a15009 --- /dev/null +++ b/storage/xtradb/include/ut0byte.ic @@ -0,0 +1,413 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************************** +Utilities for byte operations + +Created 5/30/1994 Heikki Tuuri +*******************************************************************/ + +/*********************************************************** +Creates a 64-bit dulint out of two ulints. */ +UNIV_INLINE +dulint +ut_dulint_create( +/*=============*/ + /* out: created dulint */ + ulint high, /* in: high-order 32 bits */ + ulint low) /* in: low-order 32 bits */ +{ + dulint res; + + ut_ad(high <= 0xFFFFFFFF); + ut_ad(low <= 0xFFFFFFFF); + + res.high = high; + res.low = low; + + return(res); +} + +/*********************************************************** +Gets the high-order 32 bits of a dulint. */ +UNIV_INLINE +ulint +ut_dulint_get_high( +/*===============*/ + /* out: 32 bits in ulint */ + dulint d) /* in: dulint */ +{ + return(d.high); +} + +/*********************************************************** +Gets the low-order 32 bits of a dulint. */ +UNIV_INLINE +ulint +ut_dulint_get_low( +/*==============*/ + /* out: 32 bits in ulint */ + dulint d) /* in: dulint */ +{ + return(d.low); +} + +/*********************************************************** +Converts a dulint (a struct of 2 ulints) to ib_int64_t, which is a 64-bit +integer type. */ +UNIV_INLINE +ib_int64_t +ut_conv_dulint_to_longlong( +/*=======================*/ + /* out: value in ib_int64_t type */ + dulint d) /* in: dulint */ +{ + return((ib_int64_t)d.low + + (((ib_int64_t)d.high) << 32)); +} + +/*********************************************************** +Tests if a dulint is zero. */ +UNIV_INLINE +ibool +ut_dulint_is_zero( +/*==============*/ + /* out: TRUE if zero */ + dulint a) /* in: dulint */ +{ + if ((a.low == 0) && (a.high == 0)) { + + return(TRUE); + } + + return(FALSE); +} + +/*********************************************************** +Compares two dulints. */ +UNIV_INLINE +int +ut_dulint_cmp( +/*==========*/ + /* out: -1 if a < b, 0 if a == b, + 1 if a > b */ + dulint a, /* in: dulint */ + dulint b) /* in: dulint */ +{ + if (a.high > b.high) { + return(1); + } else if (a.high < b.high) { + return(-1); + } else if (a.low > b.low) { + return(1); + } else if (a.low < b.low) { + return(-1); + } else { + return(0); + } +} + +/*********************************************************** +Calculates the max of two dulints. */ +UNIV_INLINE +dulint +ut_dulint_get_max( +/*==============*/ + /* out: max(a, b) */ + dulint a, /* in: dulint */ + dulint b) /* in: dulint */ +{ + if (ut_dulint_cmp(a, b) > 0) { + + return(a); + } + + return(b); +} + +/*********************************************************** +Calculates the min of two dulints. */ +UNIV_INLINE +dulint +ut_dulint_get_min( +/*==============*/ + /* out: min(a, b) */ + dulint a, /* in: dulint */ + dulint b) /* in: dulint */ +{ + if (ut_dulint_cmp(a, b) > 0) { + + return(b); + } + + return(a); +} + +/*********************************************************** +Adds a ulint to a dulint. */ +UNIV_INLINE +dulint +ut_dulint_add( +/*==========*/ + /* out: sum a + b */ + dulint a, /* in: dulint */ + ulint b) /* in: ulint */ +{ + if (0xFFFFFFFFUL - b >= a.low) { + a.low += b; + + return(a); + } + + a.low = a.low - (0xFFFFFFFFUL - b) - 1; + + a.high++; + + return(a); +} + +/*********************************************************** +Subtracts a ulint from a dulint. */ +UNIV_INLINE +dulint +ut_dulint_subtract( +/*===============*/ + /* out: a - b */ + dulint a, /* in: dulint */ + ulint b) /* in: ulint, b <= a */ +{ + if (a.low >= b) { + a.low -= b; + + return(a); + } + + b -= a.low + 1; + + a.low = 0xFFFFFFFFUL - b; + + ut_ad(a.high > 0); + + a.high--; + + return(a); +} + +/*********************************************************** +Subtracts a dulint from another. NOTE that the difference must be positive +and smaller that 4G. */ +UNIV_INLINE +ulint +ut_dulint_minus( +/*============*/ + /* out: a - b */ + dulint a, /* in: dulint; NOTE a must be >= b and at most + 2 to power 32 - 1 greater */ + dulint b) /* in: dulint */ +{ + ulint diff; + + if (a.high == b.high) { + ut_ad(a.low >= b.low); + + return(a.low - b.low); + } + + ut_ad(a.high == b.high + 1); + + diff = (ulint)(0xFFFFFFFFUL - b.low); + diff += 1 + a.low; + + ut_ad(diff > a.low); + + return(diff); +} + +/************************************************************ +Rounds a dulint downward to a multiple of a power of 2. */ +UNIV_INLINE +dulint +ut_dulint_align_down( +/*=================*/ + /* out: rounded value */ + dulint n, /* in: number to be rounded */ + ulint align_no) /* in: align by this number which must be a + power of 2 */ +{ + ulint low, high; + + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + + low = ut_dulint_get_low(n); + high = ut_dulint_get_high(n); + + low = low & ~(align_no - 1); + + return(ut_dulint_create(high, low)); +} + +/************************************************************ +Rounds a dulint upward to a multiple of a power of 2. */ +UNIV_INLINE +dulint +ut_dulint_align_up( +/*===============*/ + /* out: rounded value */ + dulint n, /* in: number to be rounded */ + ulint align_no) /* in: align by this number which must be a + power of 2 */ +{ + return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no)); +} + +/************************************************************ +Rounds ib_uint64_t downward to a multiple of a power of 2. */ +UNIV_INLINE +ib_uint64_t +ut_uint64_align_down( +/*=================*/ + /* out: rounded value */ + ib_uint64_t n, /* in: number to be rounded */ + ulint align_no) /* in: align by this number + which must be a power of 2 */ +{ + ut_ad(align_no > 0); + ut_ad(ut_is_2pow(align_no)); + + return(n & ~((ib_uint64_t) align_no - 1)); +} + +/************************************************************ +Rounds ib_uint64_t upward to a multiple of a power of 2. */ +UNIV_INLINE +ib_uint64_t +ut_uint64_align_up( +/*===============*/ + /* out: rounded value */ + ib_uint64_t n, /* in: number to be rounded */ + ulint align_no) /* in: align by this number + which must be a power of 2 */ +{ + ib_uint64_t align_1 = (ib_uint64_t) align_no - 1; + + ut_ad(align_no > 0); + ut_ad(ut_is_2pow(align_no)); + + return((n + align_1) & ~align_1); +} + +/************************************************************* +The following function rounds up a pointer to the nearest aligned address. */ +UNIV_INLINE +void* +ut_align( +/*=====*/ + /* out: aligned pointer */ + void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1))); +} + +/************************************************************* +The following function rounds down a pointer to the nearest +aligned address. */ +UNIV_INLINE +void* +ut_align_down( +/*==========*/ + /* out: aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return((void*)((((ulint)ptr)) & ~(align_no - 1))); +} + +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*============*/ + /* out: distance from + aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return(((ulint)ptr) & (align_no - 1)); +} + +/********************************************************************* +Gets the nth bit of a ulint. */ +UNIV_INLINE +ibool +ut_bit_get_nth( +/*===========*/ + /* out: TRUE if nth bit is 1; 0th bit is defined to + be the least significant */ + ulint a, /* in: ulint */ + ulint n) /* in: nth bit requested */ +{ + ut_ad(n < 8 * sizeof(ulint)); +#if TRUE != 1 +# error "TRUE != 1" +#endif + return(1 & (a >> n)); +} + +/********************************************************************* +Sets the nth bit of a ulint. */ +UNIV_INLINE +ulint +ut_bit_set_nth( +/*===========*/ + /* out: the ulint with the bit set as requested */ + ulint a, /* in: ulint */ + ulint n, /* in: nth bit requested */ + ibool val) /* in: value for the bit to set */ +{ + ut_ad(n < 8 * sizeof(ulint)); +#if TRUE != 1 +# error "TRUE != 1" +#endif + if (val) { + return(((ulint) 1 << n) | a); + } else { + return(~((ulint) 1 << n) & a); + } +} diff --git a/storage/xtradb/include/ut0dbg.h b/storage/xtradb/include/ut0dbg.h new file mode 100644 index 00000000000..a206789fd4c --- /dev/null +++ b/storage/xtradb/include/ut0dbg.h @@ -0,0 +1,160 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************* +Debug utilities for Innobase + +Created 1/30/1994 Heikki Tuuri +**********************************************************************/ + +#ifndef ut0dbg_h +#define ut0dbg_h + +#include "univ.i" +#include <stdlib.h> +#include "os0thread.h" + +#if defined(__GNUC__) && (__GNUC__ > 2) +# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR))) +#else +extern ulint ut_dbg_zero; /* This is used to eliminate + compiler warnings */ +# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero) +#endif + +/***************************************************************** +Report a failed assertion. */ +UNIV_INTERN +void +ut_dbg_assertion_failed( +/*====================*/ + const char* expr, /* in: the failed assertion */ + const char* file, /* in: source file containing the assertion */ + ulint line); /* in: line number of the assertion */ + +#ifdef __NETWARE__ +/* Flag for ignoring further assertion failures. +On NetWare, have a graceful exit rather than a segfault to avoid abends. */ +extern ibool panic_shutdown; +/* Abort the execution. */ +void ut_dbg_panic(void); +# define UT_DBG_PANIC ut_dbg_panic() +/* Stop threads in ut_a(). */ +# define UT_DBG_STOP do {} while (0) /* We do not do this on NetWare */ +#else /* __NETWARE__ */ +# if defined(__WIN__) || defined(__INTEL_COMPILER) +# undef UT_DBG_USE_ABORT +# elif defined(__GNUC__) && (__GNUC__ > 2) +# define UT_DBG_USE_ABORT +# endif + +# ifndef UT_DBG_USE_ABORT +/* A null pointer that will be dereferenced to trigger a memory trap */ +extern ulint* ut_dbg_null_ptr; +# endif + +# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) +/* Flag for indicating that all threads should stop. This will be set +by ut_dbg_assertion_failed(). */ +extern ibool ut_dbg_stop_threads; + +/***************************************************************** +Stop a thread after assertion failure. */ +UNIV_INTERN +void +ut_dbg_stop_thread( +/*===============*/ + const char* file, + ulint line); +# endif + +# ifdef UT_DBG_USE_ABORT +/* Abort the execution. */ +# define UT_DBG_PANIC abort() +/* Stop threads (null operation) */ +# define UT_DBG_STOP do {} while (0) +# else /* UT_DBG_USE_ABORT */ +/* Abort the execution. */ +# define UT_DBG_PANIC \ + if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL +/* Stop threads in ut_a(). */ +# define UT_DBG_STOP do \ + if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \ + ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \ + } while (0) +# endif /* UT_DBG_USE_ABORT */ +#endif /* __NETWARE__ */ + +/* Abort execution if EXPR does not evaluate to nonzero. */ +#define ut_a(EXPR) do { \ + if (UT_DBG_FAIL(EXPR)) { \ + ut_dbg_assertion_failed(#EXPR, \ + __FILE__, (ulint) __LINE__); \ + UT_DBG_PANIC; \ + } \ + UT_DBG_STOP; \ +} while (0) + +/* Abort execution. */ +#define ut_error do { \ + ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \ + UT_DBG_PANIC; \ +} while (0) + +#ifdef UNIV_DEBUG +#define ut_ad(EXPR) ut_a(EXPR) +#define ut_d(EXPR) do {EXPR;} while (0) +#else +#define ut_ad(EXPR) +#define ut_d(EXPR) +#endif + +#define UT_NOT_USED(A) A = A + +#ifdef UNIV_COMPILE_TEST_FUNCS + +#include <sys/types.h> +#include <sys/time.h> +#include <sys/resource.h> + +/* structure used for recording usage statistics */ +typedef struct speedo_struct { + struct rusage ru; + struct timeval tv; +} speedo_t; + +/*********************************************************************** +Resets a speedo (records the current time in it). */ +UNIV_INTERN +void +speedo_reset( +/*=========*/ + speedo_t* speedo); /* out: speedo */ + +/*********************************************************************** +Shows the time elapsed and usage statistics since the last reset of a +speedo. */ +UNIV_INTERN +void +speedo_show( +/*========*/ + const speedo_t* speedo); /* in: speedo */ + +#endif /* UNIV_COMPILE_TEST_FUNCS */ + +#endif diff --git a/storage/xtradb/include/ut0list.h b/storage/xtradb/include/ut0list.h new file mode 100644 index 00000000000..034aa400af9 --- /dev/null +++ b/storage/xtradb/include/ut0list.h @@ -0,0 +1,165 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +A double-linked list. This differs from the one in ut0lst.h in that in this +one, each list node contains a pointer to the data, whereas the one in +ut0lst.h uses a strategy where the list pointers are embedded in the data +items themselves. + +Use this one when you need to store arbitrary data in the list where you +can't embed the list pointers in the data, if a data item needs to be +stored in multiple lists, etc. + +Note about the memory management: ib_list_t is a fixed-size struct whose +allocation/deallocation is done through ib_list_create/ib_list_free, but the +memory for the list nodes is allocated through a user-given memory heap, +which can either be the same for all nodes or vary per node. Most users will +probably want to create a memory heap to store the item-specific data, and +pass in this same heap to the list node creation functions, thus +automatically freeing the list node when the item's heap is freed. + +************************************************************************/ + +#ifndef IB_LIST_H +#define IB_LIST_H + +#include "mem0mem.h" + +typedef struct ib_list_struct ib_list_t; +typedef struct ib_list_node_struct ib_list_node_t; +typedef struct ib_list_helper_struct ib_list_helper_t; + +/******************************************************************** +Create a new list using mem_alloc. Lists created with this function must be +freed with ib_list_free. */ +UNIV_INTERN +ib_list_t* +ib_list_create(void); +/*=================*/ + /* out: list */ + + +/******************************************************************** +Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for +lists created with this function. */ +UNIV_INTERN +ib_list_t* +ib_list_create_heap( +/*================*/ + /* out: list */ + mem_heap_t* heap); /* in: memory heap to use */ + +/******************************************************************** +Free a list. */ +UNIV_INTERN +void +ib_list_free( +/*=========*/ + ib_list_t* list); /* in: list */ + +/******************************************************************** +Add the data to the start of the list. */ +UNIV_INTERN +ib_list_node_t* +ib_list_add_first( +/*==============*/ + /* out: new list node*/ + ib_list_t* list, /* in: list */ + void* data, /* in: data */ + mem_heap_t* heap); /* in: memory heap to use */ + +/******************************************************************** +Add the data to the end of the list. */ +UNIV_INTERN +ib_list_node_t* +ib_list_add_last( +/*=============*/ + /* out: new list node*/ + ib_list_t* list, /* in: list */ + void* data, /* in: data */ + mem_heap_t* heap); /* in: memory heap to use */ + +/******************************************************************** +Add the data after the indicated node. */ +UNIV_INTERN +ib_list_node_t* +ib_list_add_after( +/*==============*/ + /* out: new list node*/ + ib_list_t* list, /* in: list */ + ib_list_node_t* prev_node, /* in: node preceding new node (can + be NULL) */ + void* data, /* in: data */ + mem_heap_t* heap); /* in: memory heap to use */ + +/******************************************************************** +Remove the node from the list. */ +UNIV_INTERN +void +ib_list_remove( +/*===========*/ + ib_list_t* list, /* in: list */ + ib_list_node_t* node); /* in: node to remove */ + +/******************************************************************** +Get the first node in the list. */ +UNIV_INLINE +ib_list_node_t* +ib_list_get_first( +/*==============*/ + /* out: first node, or NULL */ + ib_list_t* list); /* in: list */ + +/******************************************************************** +Get the last node in the list. */ +UNIV_INLINE +ib_list_node_t* +ib_list_get_last( +/*=============*/ + /* out: last node, or NULL */ + ib_list_t* list); /* in: list */ + +/* List. */ +struct ib_list_struct { + ib_list_node_t* first; /* first node */ + ib_list_node_t* last; /* last node */ + ibool is_heap_list; /* TRUE if this list was + allocated through a heap */ +}; + +/* A list node. */ +struct ib_list_node_struct { + ib_list_node_t* prev; /* previous node */ + ib_list_node_t* next; /* next node */ + void* data; /* user data */ +}; + +/* Quite often, the only additional piece of data you need is the per-item +memory heap, so we have this generic struct available to use in those +cases. */ +struct ib_list_helper_struct { + mem_heap_t* heap; /* memory heap */ + void* data; /* user data */ +}; + +#ifndef UNIV_NONINL +#include "ut0list.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ut0list.ic b/storage/xtradb/include/ut0list.ic new file mode 100644 index 00000000000..c79a0cf18dc --- /dev/null +++ b/storage/xtradb/include/ut0list.ic @@ -0,0 +1,41 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************** +Get the first node in the list. */ +UNIV_INLINE +ib_list_node_t* +ib_list_get_first( +/*==============*/ + /* out: first node, or NULL */ + ib_list_t* list) /* in: list */ +{ + return(list->first); +} + +/******************************************************************** +Get the last node in the list. */ +UNIV_INLINE +ib_list_node_t* +ib_list_get_last( +/*=============*/ + /* out: last node, or NULL */ + ib_list_t* list) /* in: list */ +{ + return(list->last); +} diff --git a/storage/xtradb/include/ut0lst.h b/storage/xtradb/include/ut0lst.h new file mode 100644 index 00000000000..46ee23a2538 --- /dev/null +++ b/storage/xtradb/include/ut0lst.h @@ -0,0 +1,243 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +List utilities + +Created 9/10/1995 Heikki Tuuri +***********************************************************************/ + +#ifndef ut0lst_h +#define ut0lst_h + +#include "univ.i" + +/* This module implements the two-way linear list which should be used +if a list is used in the database. Note that a single struct may belong +to two or more lists, provided that the list are given different names. +An example of the usage of the lists can be found in fil0fil.c. */ + +/*********************************************************************** +This macro expands to the unnamed type definition of a struct which acts +as the two-way list base node. The base node contains pointers +to both ends of the list and a count of nodes in the list (excluding +the base node from the count). TYPE should be the list node type name. */ + +#define UT_LIST_BASE_NODE_T(TYPE)\ +struct {\ + ulint count; /* count of nodes in list */\ + TYPE * start; /* pointer to list start, NULL if empty */\ + TYPE * end; /* pointer to list end, NULL if empty */\ +}\ + +/*********************************************************************** +This macro expands to the unnamed type definition of a struct which +should be embedded in the nodes of the list, the node type must be a struct. +This struct contains the pointers to next and previous nodes in the list. +The name of the field in the node struct should be the name given +to the list. TYPE should be the list node type name. Example of usage: + +typedef struct LRU_node_struct LRU_node_t; +struct LRU_node_struct { + UT_LIST_NODE_T(LRU_node_t) LRU_list; + ... +} +The example implements an LRU list of name LRU_list. Its nodes are of type +LRU_node_t. +*/ + +#define UT_LIST_NODE_T(TYPE)\ +struct {\ + TYPE * prev; /* pointer to the previous node,\ + NULL if start of list */\ + TYPE * next; /* pointer to next node, NULL if end of list */\ +}\ + +/*********************************************************************** +Initializes the base node of a two-way list. */ + +#define UT_LIST_INIT(BASE)\ +{\ + (BASE).count = 0;\ + (BASE).start = NULL;\ + (BASE).end = NULL;\ +}\ + +/*********************************************************************** +Adds the node as the first element in a two-way linked list. +BASE has to be the base node (not a pointer to it). N has to be +the pointer to the node to be added to the list. NAME is the list name. */ + +#define UT_LIST_ADD_FIRST(NAME, BASE, N)\ +{\ + ut_ad(N);\ + ((BASE).count)++;\ + ((N)->NAME).next = (BASE).start;\ + ((N)->NAME).prev = NULL;\ + if (UNIV_LIKELY((BASE).start != NULL)) {\ + ut_ad((BASE).start != (N));\ + (((BASE).start)->NAME).prev = (N);\ + }\ + (BASE).start = (N);\ + if (UNIV_UNLIKELY((BASE).end == NULL)) {\ + (BASE).end = (N);\ + }\ +}\ + +/*********************************************************************** +Adds the node as the last element in a two-way linked list. +BASE has to be the base node (not a pointer to it). N has to be +the pointer to the node to be added to the list. NAME is the list name. */ + +#define UT_LIST_ADD_LAST(NAME, BASE, N)\ +{\ + ut_ad(N);\ + ((BASE).count)++;\ + ((N)->NAME).prev = (BASE).end;\ + ((N)->NAME).next = NULL;\ + if ((BASE).end != NULL) {\ + ut_ad((BASE).end != (N));\ + (((BASE).end)->NAME).next = (N);\ + }\ + (BASE).end = (N);\ + if ((BASE).start == NULL) {\ + (BASE).start = (N);\ + }\ +}\ + +/*********************************************************************** +Inserts a NODE2 after NODE1 in a list. +BASE has to be the base node (not a pointer to it). NAME is the list +name, NODE1 and NODE2 are pointers to nodes. */ + +#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\ +{\ + ut_ad(NODE1);\ + ut_ad(NODE2);\ + ut_ad((NODE1) != (NODE2));\ + ((BASE).count)++;\ + ((NODE2)->NAME).prev = (NODE1);\ + ((NODE2)->NAME).next = ((NODE1)->NAME).next;\ + if (((NODE1)->NAME).next != NULL) {\ + ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\ + }\ + ((NODE1)->NAME).next = (NODE2);\ + if ((BASE).end == (NODE1)) {\ + (BASE).end = (NODE2);\ + }\ +}\ + +/* Invalidate the pointers in a list node. */ +#ifdef UNIV_LIST_DEBUG +# define UT_LIST_REMOVE_CLEAR(NAME, N) \ +((N)->NAME.prev = (N)->NAME.next = (void*) -1) +#else +# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0) +#endif + +/*********************************************************************** +Removes a node from a two-way linked list. BASE has to be the base node +(not a pointer to it). N has to be the pointer to the node to be removed +from the list. NAME is the list name. */ + +#define UT_LIST_REMOVE(NAME, BASE, N) \ +do { \ + ut_ad(N); \ + ut_a((BASE).count > 0); \ + ((BASE).count)--; \ + if (((N)->NAME).next != NULL) { \ + ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \ + } else { \ + (BASE).end = ((N)->NAME).prev; \ + } \ + if (((N)->NAME).prev != NULL) { \ + ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \ + } else { \ + (BASE).start = ((N)->NAME).next; \ + } \ + UT_LIST_REMOVE_CLEAR(NAME, N); \ +} while (0) + +/************************************************************************ +Gets the next node in a two-way list. NAME is the name of the list +and N is pointer to a node. */ + +#define UT_LIST_GET_NEXT(NAME, N)\ + (((N)->NAME).next) + +/************************************************************************ +Gets the previous node in a two-way list. NAME is the name of the list +and N is pointer to a node. */ + +#define UT_LIST_GET_PREV(NAME, N)\ + (((N)->NAME).prev) + +/************************************************************************ +Alternative macro to get the number of nodes in a two-way list, i.e., +its length. BASE is the base node (not a pointer to it). */ + +#define UT_LIST_GET_LEN(BASE)\ + (BASE).count + +/************************************************************************ +Gets the first node in a two-way list, or returns NULL, +if the list is empty. BASE is the base node (not a pointer to it). */ + +#define UT_LIST_GET_FIRST(BASE)\ + (BASE).start + +/************************************************************************ +Gets the last node in a two-way list, or returns NULL, +if the list is empty. BASE is the base node (not a pointer to it). */ + +#define UT_LIST_GET_LAST(BASE)\ + (BASE).end + +/************************************************************************ +Checks the consistency of a two-way list. NAME is the name of the list, +TYPE is the node type, and BASE is the base node (not a pointer to it). */ + +#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\ +{\ + ulint ut_list_i_313;\ + TYPE * ut_list_node_313;\ +\ + ut_list_node_313 = (BASE).start;\ +\ + for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\ + ut_list_i_313++) {\ + ut_a(ut_list_node_313);\ + ut_list_node_313 = (ut_list_node_313->NAME).next;\ + }\ +\ + ut_a(ut_list_node_313 == NULL);\ +\ + ut_list_node_313 = (BASE).end;\ +\ + for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\ + ut_list_i_313++) {\ + ut_a(ut_list_node_313);\ + ut_list_node_313 = (ut_list_node_313->NAME).prev;\ + }\ +\ + ut_a(ut_list_node_313 == NULL);\ +}\ + + +#endif + diff --git a/storage/xtradb/include/ut0mem.h b/storage/xtradb/include/ut0mem.h new file mode 100644 index 00000000000..f8dec99ed4a --- /dev/null +++ b/storage/xtradb/include/ut0mem.h @@ -0,0 +1,271 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +Memory primitives + +Created 5/30/1994 Heikki Tuuri +************************************************************************/ + +#ifndef ut0mem_h +#define ut0mem_h + +#include "univ.i" +#include "os0sync.h" +#include <string.h> + +/* The total amount of memory currently allocated from the operating +system with os_mem_alloc_large() or malloc(). Does not count malloc() +if srv_use_sys_malloc is set. Protected by ut_list_mutex. */ +extern ulint ut_total_allocated_memory; + +/* Mutex protecting ut_total_allocated_memory and ut_mem_block_list */ +extern os_fast_mutex_t ut_list_mutex; + +UNIV_INLINE +void* +ut_memcpy(void* dest, const void* sour, ulint n); + +UNIV_INLINE +void* +ut_memmove(void* dest, const void* sour, ulint n); + +UNIV_INLINE +int +ut_memcmp(const void* str1, const void* str2, ulint n); + +/************************************************************************** +Initializes the mem block list at database startup. */ +UNIV_INTERN +void +ut_mem_init(void); +/*=============*/ + +/************************************************************************** +Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is +defined and set_to_zero is TRUE. */ +UNIV_INTERN +void* +ut_malloc_low( +/*==========*/ + /* out, own: allocated memory */ + ulint n, /* in: number of bytes to allocate */ + ibool set_to_zero, /* in: TRUE if allocated memory + should be set to zero if + UNIV_SET_MEM_TO_ZERO is defined */ + ibool assert_on_error); /* in: if TRUE, we crash mysqld if + the memory cannot be allocated */ +/************************************************************************** +Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is +defined. */ +UNIV_INTERN +void* +ut_malloc( +/*======*/ + /* out, own: allocated memory */ + ulint n); /* in: number of bytes to allocate */ +/************************************************************************** +Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs +out. It cannot be used if we want to return an error message. Prints to +stderr a message if fails. */ +UNIV_INTERN +ibool +ut_test_malloc( +/*===========*/ + /* out: TRUE if succeeded */ + ulint n); /* in: try to allocate this many bytes */ +/************************************************************************** +Frees a memory block allocated with ut_malloc. */ +UNIV_INTERN +void +ut_free( +/*====*/ + void* ptr); /* in, own: memory block */ +/************************************************************************** +Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not +use this function because the allocation functions in mem0mem.h are the +recommended ones in InnoDB. + +man realloc in Linux, 2004: + + realloc() changes the size of the memory block pointed to + by ptr to size bytes. The contents will be unchanged to + the minimum of the old and new sizes; newly allocated mem + ory will be uninitialized. If ptr is NULL, the call is + equivalent to malloc(size); if size is equal to zero, the + call is equivalent to free(ptr). Unless ptr is NULL, it + must have been returned by an earlier call to malloc(), + calloc() or realloc(). + +RETURN VALUE + realloc() returns a pointer to the newly allocated memory, + which is suitably aligned for any kind of variable and may + be different from ptr, or NULL if the request fails. If + size was equal to 0, either NULL or a pointer suitable to + be passed to free() is returned. If realloc() fails the + original block is left untouched - it is not freed or + moved. */ +UNIV_INTERN +void* +ut_realloc( +/*=======*/ + /* out, own: pointer to new mem block or NULL */ + void* ptr, /* in: pointer to old block or NULL */ + ulint size); /* in: desired size */ +/************************************************************************** +Frees in shutdown all allocated memory not freed yet. */ +UNIV_INTERN +void +ut_free_all_mem(void); +/*=================*/ + +UNIV_INLINE +char* +ut_strcpy(char* dest, const char* sour); + +UNIV_INLINE +ulint +ut_strlen(const char* str); + +UNIV_INLINE +int +ut_strcmp(const char* str1, const char* str2); + +/************************************************************************** +Copies up to size - 1 characters from the NUL-terminated string src to +dst, NUL-terminating the result. Returns strlen(src), so truncation +occurred if the return value >= size. */ +UNIV_INTERN +ulint +ut_strlcpy( +/*=======*/ + /* out: strlen(src) */ + char* dst, /* in: destination buffer */ + const char* src, /* in: source buffer */ + ulint size); /* in: size of destination buffer */ + +/************************************************************************** +Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last +(size - 1) bytes of src, not the first. */ +UNIV_INTERN +ulint +ut_strlcpy_rev( +/*===========*/ + /* out: strlen(src) */ + char* dst, /* in: destination buffer */ + const char* src, /* in: source buffer */ + ulint size); /* in: size of destination buffer */ + +/************************************************************************** +Compute strlen(ut_strcpyq(str, q)). */ +UNIV_INLINE +ulint +ut_strlenq( +/*=======*/ + /* out: length of the string when quoted */ + const char* str, /* in: null-terminated string */ + char q); /* in: the quote character */ + +/************************************************************************** +Make a quoted copy of a NUL-terminated string. Leading and trailing +quotes will not be included; only embedded quotes will be escaped. +See also ut_strlenq() and ut_memcpyq(). */ +UNIV_INTERN +char* +ut_strcpyq( +/*=======*/ + /* out: pointer to end of dest */ + char* dest, /* in: output buffer */ + char q, /* in: the quote character */ + const char* src); /* in: null-terminated string */ + +/************************************************************************** +Make a quoted copy of a fixed-length string. Leading and trailing +quotes will not be included; only embedded quotes will be escaped. +See also ut_strlenq() and ut_strcpyq(). */ +UNIV_INTERN +char* +ut_memcpyq( +/*=======*/ + /* out: pointer to end of dest */ + char* dest, /* in: output buffer */ + char q, /* in: the quote character */ + const char* src, /* in: string to be quoted */ + ulint len); /* in: length of src */ + +/************************************************************************** +Return the number of times s2 occurs in s1. Overlapping instances of s2 +are only counted once. */ +UNIV_INTERN +ulint +ut_strcount( +/*========*/ + /* out: the number of times s2 occurs in s1 */ + const char* s1, /* in: string to search in */ + const char* s2); /* in: string to search for */ + +/************************************************************************** +Replace every occurrence of s1 in str with s2. Overlapping instances of s1 +are only replaced once. */ +UNIV_INTERN +char* +ut_strreplace( +/*==========*/ + /* out, own: modified string, must be + freed with mem_free() */ + const char* str, /* in: string to operate on */ + const char* s1, /* in: string to replace */ + const char* s2); /* in: string to replace s1 with */ + +/************************************************************************** +Converts a raw binary data to a '\0'-terminated hex string. The output is +truncated if there is not enough space in "hex", make sure "hex_size" is at +least (2 * raw_size + 1) if you do not want this to happen. Returns the +actual number of characters written to "hex" (including the '\0'). */ +UNIV_INLINE +ulint +ut_raw_to_hex( +/*==========*/ + /* out: number of chars written */ + const void* raw, /* in: raw data */ + ulint raw_size, /* in: "raw" length in bytes */ + char* hex, /* out: hex string */ + ulint hex_size); /* in: "hex" size in bytes */ + +/*********************************************************************** +Adds single quotes to the start and end of string and escapes any quotes +by doubling them. Returns the number of bytes that were written to "buf" +(including the terminating '\0'). If buf_size is too small then the +trailing bytes from "str" are discarded. */ +UNIV_INLINE +ulint +ut_str_sql_format( +/*==============*/ + /* out: number of bytes + that were written */ + const char* str, /* in: string */ + ulint str_len, /* in: string length in bytes */ + char* buf, /* out: output buffer */ + ulint buf_size); /* in: output buffer size + in bytes */ + +#ifndef UNIV_NONINL +#include "ut0mem.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ut0mem.ic b/storage/xtradb/include/ut0mem.ic new file mode 100644 index 00000000000..5078c721706 --- /dev/null +++ b/storage/xtradb/include/ut0mem.ic @@ -0,0 +1,308 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +Memory primitives + +Created 5/30/1994 Heikki Tuuri +************************************************************************/ + +#include "ut0byte.h" +#include "mach0data.h" + +UNIV_INLINE +void* +ut_memcpy(void* dest, const void* sour, ulint n) +{ + return(memcpy(dest, sour, n)); +} + +UNIV_INLINE +void* +ut_memmove(void* dest, const void* sour, ulint n) +{ + return(memmove(dest, sour, n)); +} + +UNIV_INLINE +int +ut_memcmp(const void* str1, const void* str2, ulint n) +{ + return(memcmp(str1, str2, n)); +} + +UNIV_INLINE +char* +ut_strcpy(char* dest, const char* sour) +{ + return(strcpy(dest, sour)); +} + +UNIV_INLINE +ulint +ut_strlen(const char* str) +{ + return(strlen(str)); +} + +UNIV_INLINE +int +ut_strcmp(const char* str1, const char* str2) +{ + return(strcmp(str1, str2)); +} + +/************************************************************************** +Compute strlen(ut_strcpyq(str, q)). */ +UNIV_INLINE +ulint +ut_strlenq( +/*=======*/ + /* out: length of the string when quoted */ + const char* str, /* in: null-terminated string */ + char q) /* in: the quote character */ +{ + ulint len; + + for (len = 0; *str; len++, str++) { + if (*str == q) { + len++; + } + } + + return(len); +} + +/************************************************************************** +Converts a raw binary data to a '\0'-terminated hex string. The output is +truncated if there is not enough space in "hex", make sure "hex_size" is at +least (2 * raw_size + 1) if you do not want this to happen. Returns the +actual number of characters written to "hex" (including the '\0'). */ +UNIV_INLINE +ulint +ut_raw_to_hex( +/*==========*/ + /* out: number of chars written */ + const void* raw, /* in: raw data */ + ulint raw_size, /* in: "raw" length in bytes */ + char* hex, /* out: hex string */ + ulint hex_size) /* in: "hex" size in bytes */ +{ + +#ifdef WORDS_BIGENDIAN + +#define MK_UINT16(a, b) (((uint16) (a)) << 8 | (uint16) (b)) + +#define UINT16_GET_A(u) ((unsigned char) ((u) >> 8)) +#define UINT16_GET_B(u) ((unsigned char) ((u) & 0xFF)) + +#else /* WORDS_BIGENDIAN */ + +#define MK_UINT16(a, b) (((uint16) (b)) << 8 | (uint16) (a)) + +#define UINT16_GET_A(u) ((unsigned char) ((u) & 0xFF)) +#define UINT16_GET_B(u) ((unsigned char) ((u) >> 8)) + +#endif /* WORDS_BIGENDIAN */ + +#define MK_ALL_UINT16_WITH_A(a) \ + MK_UINT16(a, '0'), \ + MK_UINT16(a, '1'), \ + MK_UINT16(a, '2'), \ + MK_UINT16(a, '3'), \ + MK_UINT16(a, '4'), \ + MK_UINT16(a, '5'), \ + MK_UINT16(a, '6'), \ + MK_UINT16(a, '7'), \ + MK_UINT16(a, '8'), \ + MK_UINT16(a, '9'), \ + MK_UINT16(a, 'A'), \ + MK_UINT16(a, 'B'), \ + MK_UINT16(a, 'C'), \ + MK_UINT16(a, 'D'), \ + MK_UINT16(a, 'E'), \ + MK_UINT16(a, 'F') + + static const uint16 hex_map[256] = { + MK_ALL_UINT16_WITH_A('0'), + MK_ALL_UINT16_WITH_A('1'), + MK_ALL_UINT16_WITH_A('2'), + MK_ALL_UINT16_WITH_A('3'), + MK_ALL_UINT16_WITH_A('4'), + MK_ALL_UINT16_WITH_A('5'), + MK_ALL_UINT16_WITH_A('6'), + MK_ALL_UINT16_WITH_A('7'), + MK_ALL_UINT16_WITH_A('8'), + MK_ALL_UINT16_WITH_A('9'), + MK_ALL_UINT16_WITH_A('A'), + MK_ALL_UINT16_WITH_A('B'), + MK_ALL_UINT16_WITH_A('C'), + MK_ALL_UINT16_WITH_A('D'), + MK_ALL_UINT16_WITH_A('E'), + MK_ALL_UINT16_WITH_A('F') + }; + const unsigned char* rawc; + ulint read_bytes; + ulint write_bytes; + ulint i; + + rawc = (const unsigned char*) raw; + + if (hex_size == 0) { + + return(0); + } + + if (hex_size <= 2 * raw_size) { + + read_bytes = hex_size / 2; + write_bytes = hex_size; + } else { + + read_bytes = raw_size; + write_bytes = 2 * raw_size + 1; + } + +#define LOOP_READ_BYTES(ASSIGN) \ + for (i = 0; i < read_bytes; i++) { \ + ASSIGN; \ + hex += 2; \ + rawc++; \ + } + + if (ut_align_offset(hex, 2) == 0) { + + LOOP_READ_BYTES( + *(uint16*) hex = hex_map[*rawc] + ); + } else { + + LOOP_READ_BYTES( + *hex = UINT16_GET_A(hex_map[*rawc]); + *(hex + 1) = UINT16_GET_B(hex_map[*rawc]) + ); + } + + if (hex_size <= 2 * raw_size && hex_size % 2 == 0) { + + hex--; + } + + *hex = '\0'; + + return(write_bytes); +} + +/*********************************************************************** +Adds single quotes to the start and end of string and escapes any quotes +by doubling them. Returns the number of bytes that were written to "buf" +(including the terminating '\0'). If buf_size is too small then the +trailing bytes from "str" are discarded. */ +UNIV_INLINE +ulint +ut_str_sql_format( +/*==============*/ + /* out: number of bytes + that were written */ + const char* str, /* in: string */ + ulint str_len, /* in: string length in bytes */ + char* buf, /* out: output buffer */ + ulint buf_size) /* in: output buffer size + in bytes */ +{ + ulint str_i; + ulint buf_i; + + buf_i = 0; + + switch (buf_size) { + case 3: + + if (str_len == 0) { + + buf[buf_i] = '\''; + buf_i++; + buf[buf_i] = '\''; + buf_i++; + } + /* FALLTHROUGH */ + case 2: + case 1: + + buf[buf_i] = '\0'; + buf_i++; + /* FALLTHROUGH */ + case 0: + + return(buf_i); + } + + /* buf_size >= 4 */ + + buf[0] = '\''; + buf_i = 1; + + for (str_i = 0; str_i < str_len; str_i++) { + + char ch; + + if (buf_size - buf_i == 2) { + + break; + } + + ch = str[str_i]; + + switch (ch) { + case '\0': + + if (UNIV_UNLIKELY(buf_size - buf_i < 4)) { + + goto func_exit; + } + buf[buf_i] = '\\'; + buf_i++; + buf[buf_i] = '0'; + buf_i++; + break; + case '\'': + case '\\': + + if (UNIV_UNLIKELY(buf_size - buf_i < 4)) { + + goto func_exit; + } + buf[buf_i] = ch; + buf_i++; + /* FALLTHROUGH */ + default: + + buf[buf_i] = ch; + buf_i++; + } + } + +func_exit: + + buf[buf_i] = '\''; + buf_i++; + buf[buf_i] = '\0'; + buf_i++; + + return(buf_i); +} diff --git a/storage/xtradb/include/ut0rnd.h b/storage/xtradb/include/ut0rnd.h new file mode 100644 index 00000000000..b9e23d7cd14 --- /dev/null +++ b/storage/xtradb/include/ut0rnd.h @@ -0,0 +1,142 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Random numbers and hashing + +Created 1/20/1994 Heikki Tuuri +***********************************************************************/ + +#ifndef ut0rnd_h +#define ut0rnd_h + +#include "univ.i" + +#include "ut0byte.h" + +/* The 'character code' for end of field or string (used +in folding records */ +#define UT_END_OF_FIELD 257 + +/************************************************************ +This is used to set the random number seed. */ +UNIV_INLINE +void +ut_rnd_set_seed( +/*============*/ + ulint seed); /* in: seed */ +/************************************************************ +The following function generates a series of 'random' ulint integers. */ +UNIV_INLINE +ulint +ut_rnd_gen_next_ulint( +/*==================*/ + /* out: the next 'random' number */ + ulint rnd); /* in: the previous random number value */ +/************************************************************* +The following function generates 'random' ulint integers which +enumerate the value space (let there be N of them) of ulint integers +in a pseudo-random fashion. Note that the same integer is repeated +always after N calls to the generator. */ +UNIV_INLINE +ulint +ut_rnd_gen_ulint(void); +/*==================*/ + /* out: the 'random' number */ +/************************************************************ +Generates a random integer from a given interval. */ +UNIV_INLINE +ulint +ut_rnd_interval( +/*============*/ + /* out: the 'random' number */ + ulint low, /* in: low limit; can generate also this value */ + ulint high); /* in: high limit; can generate also this value */ +/************************************************************* +Generates a random iboolean value. */ +UNIV_INLINE +ibool +ut_rnd_gen_ibool(void); +/*=================*/ + /* out: the random value */ +/*********************************************************** +The following function generates a hash value for a ulint integer +to a hash table of size table_size, which should be a prime or some +random number to work reliably. */ +UNIV_INLINE +ulint +ut_hash_ulint( +/*==========*/ + /* out: hash value */ + ulint key, /* in: value to be hashed */ + ulint table_size); /* in: hash table size */ +/***************************************************************** +Folds a pair of ulints. */ +UNIV_INLINE +ulint +ut_fold_ulint_pair( +/*===============*/ + /* out: folded value */ + ulint n1, /* in: ulint */ + ulint n2) /* in: ulint */ + __attribute__((const)); +/***************************************************************** +Folds a dulint. */ +UNIV_INLINE +ulint +ut_fold_dulint( +/*===========*/ + /* out: folded value */ + dulint d) /* in: dulint */ + __attribute__((const)); +/***************************************************************** +Folds a character string ending in the null character. */ +UNIV_INLINE +ulint +ut_fold_string( +/*===========*/ + /* out: folded value */ + const char* str) /* in: null-terminated string */ + __attribute__((pure)); +/***************************************************************** +Folds a binary string. */ +UNIV_INLINE +ulint +ut_fold_binary( +/*===========*/ + /* out: folded value */ + const byte* str, /* in: string of bytes */ + ulint len) /* in: length */ + __attribute__((pure)); +/*************************************************************** +Looks for a prime number slightly greater than the given argument. +The prime is chosen so that it is not near any power of 2. */ +UNIV_INTERN +ulint +ut_find_prime( +/*==========*/ + /* out: prime */ + ulint n) /* in: positive number > 100 */ + __attribute__((const)); + + +#ifndef UNIV_NONINL +#include "ut0rnd.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ut0rnd.ic b/storage/xtradb/include/ut0rnd.ic new file mode 100644 index 00000000000..d72100d16a1 --- /dev/null +++ b/storage/xtradb/include/ut0rnd.ic @@ -0,0 +1,228 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************************** +Random numbers and hashing + +Created 5/30/1994 Heikki Tuuri +*******************************************************************/ + +#define UT_HASH_RANDOM_MASK 1463735687 +#define UT_HASH_RANDOM_MASK2 1653893711 +#define UT_RND1 151117737 +#define UT_RND2 119785373 +#define UT_RND3 85689495 +#define UT_RND4 76595339 +#define UT_SUM_RND2 98781234 +#define UT_SUM_RND3 126792457 +#define UT_SUM_RND4 63498502 +#define UT_XOR_RND1 187678878 +#define UT_XOR_RND2 143537923 + +extern ulint ut_rnd_ulint_counter; + +/************************************************************ +This is used to set the random number seed. */ +UNIV_INLINE +void +ut_rnd_set_seed( +/*============*/ + ulint seed) /* in: seed */ +{ + ut_rnd_ulint_counter = seed; +} + +/************************************************************ +The following function generates a series of 'random' ulint integers. */ +UNIV_INLINE +ulint +ut_rnd_gen_next_ulint( +/*==================*/ + /* out: the next 'random' number */ + ulint rnd) /* in: the previous random number value */ +{ + ulint n_bits; + + n_bits = 8 * sizeof(ulint); + + rnd = UT_RND2 * rnd + UT_SUM_RND3; + rnd = UT_XOR_RND1 ^ rnd; + rnd = (rnd << 20) + (rnd >> (n_bits - 20)); + rnd = UT_RND3 * rnd + UT_SUM_RND4; + rnd = UT_XOR_RND2 ^ rnd; + rnd = (rnd << 20) + (rnd >> (n_bits - 20)); + rnd = UT_RND1 * rnd + UT_SUM_RND2; + + return(rnd); +} + +/************************************************************ +The following function generates 'random' ulint integers which +enumerate the value space of ulint integers in a pseudo random +fashion. Note that the same integer is repeated always after +2 to power 32 calls to the generator (if ulint is 32-bit). */ +UNIV_INLINE +ulint +ut_rnd_gen_ulint(void) +/*==================*/ + /* out: the 'random' number */ +{ + ulint rnd; + ulint n_bits; + + n_bits = 8 * sizeof(ulint); + + ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2; + + rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter); + + return(rnd); +} + +/************************************************************ +Generates a random integer from a given interval. */ +UNIV_INLINE +ulint +ut_rnd_interval( +/*============*/ + /* out: the 'random' number */ + ulint low, /* in: low limit; can generate also this value */ + ulint high) /* in: high limit; can generate also this value */ +{ + ulint rnd; + + ut_ad(high >= low); + + if (low == high) { + + return(low); + } + + rnd = ut_rnd_gen_ulint(); + + return(low + (rnd % (high - low + 1))); +} + +/************************************************************* +Generates a random iboolean value. */ +UNIV_INLINE +ibool +ut_rnd_gen_ibool(void) +/*=================*/ + /* out: the random value */ +{ + ulint x; + + x = ut_rnd_gen_ulint(); + + if (((x >> 20) + (x >> 15)) & 1) { + + return(TRUE); + } + + return(FALSE); +} + +/*********************************************************** +The following function generates a hash value for a ulint integer +to a hash table of size table_size, which should be a prime +or some random number for the hash table to work reliably. */ +UNIV_INLINE +ulint +ut_hash_ulint( +/*==========*/ + /* out: hash value */ + ulint key, /* in: value to be hashed */ + ulint table_size) /* in: hash table size */ +{ + key = key ^ UT_HASH_RANDOM_MASK2; + + return(key % table_size); +} + +/***************************************************************** +Folds a pair of ulints. */ +UNIV_INLINE +ulint +ut_fold_ulint_pair( +/*===============*/ + /* out: folded value */ + ulint n1, /* in: ulint */ + ulint n2) /* in: ulint */ +{ + return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) + ^ UT_HASH_RANDOM_MASK) + n2); +} + +/***************************************************************** +Folds a dulint. */ +UNIV_INLINE +ulint +ut_fold_dulint( +/*===========*/ + /* out: folded value */ + dulint d) /* in: dulint */ +{ + return(ut_fold_ulint_pair(ut_dulint_get_low(d), + ut_dulint_get_high(d))); +} + +/***************************************************************** +Folds a character string ending in the null character. */ +UNIV_INLINE +ulint +ut_fold_string( +/*===========*/ + /* out: folded value */ + const char* str) /* in: null-terminated string */ +{ + ulint fold = 0; + + ut_ad(str); + + while (*str != '\0') { + fold = ut_fold_ulint_pair(fold, (ulint)(*str)); + str++; + } + + return(fold); +} + +/***************************************************************** +Folds a binary string. */ +UNIV_INLINE +ulint +ut_fold_binary( +/*===========*/ + /* out: folded value */ + const byte* str, /* in: string of bytes */ + ulint len) /* in: length */ +{ + const byte* str_end = str + len; + ulint fold = 0; + + ut_ad(str || !len); + + while (str < str_end) { + fold = ut_fold_ulint_pair(fold, (ulint)(*str)); + + str++; + } + + return(fold); +} diff --git a/storage/xtradb/include/ut0sort.h b/storage/xtradb/include/ut0sort.h new file mode 100644 index 00000000000..5fd5db54832 --- /dev/null +++ b/storage/xtradb/include/ut0sort.h @@ -0,0 +1,105 @@ +/***************************************************************************** + +Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Sort utility + +Created 11/9/1995 Heikki Tuuri +***********************************************************************/ + +#ifndef ut0sort_h +#define ut0sort_h + +#include "univ.i" + +/* This module gives a macro definition of the body of +a standard sort function for an array of elements of any +type. The comparison function is given as a parameter to +the macro. The sort algorithm is mergesort which has logarithmic +worst case. +*/ + +/*********************************************************************** +This macro expands to the body of a standard sort function. +The sort function uses mergesort and must be defined separately +for each type of array. +Also the comparison function has to be defined individually +for each array cell type. SORT_FUN is the sort function name. +The function takes the array to be sorted (ARR), +the array of auxiliary space (AUX_ARR) of same size, +and the low (LOW), inclusive, and high (HIGH), noninclusive, +limits for the sort interval as arguments. +CMP_FUN is the comparison function name. It takes as arguments +two elements from the array and returns 1, if the first is bigger, +0 if equal, and -1 if the second bigger. */ + +#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\ +{\ + ulint ut_sort_mid77;\ + ulint ut_sort_i77;\ + ulint ut_sort_low77;\ + ulint ut_sort_high77;\ +\ + ut_ad((LOW) < (HIGH));\ + ut_ad(ARR);\ + ut_ad(AUX_ARR);\ +\ + if ((LOW) == (HIGH) - 1) {\ + return;\ + } else if ((LOW) == (HIGH) - 2) {\ + if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\ + (AUX_ARR)[LOW] = (ARR)[LOW];\ + (ARR)[LOW] = (ARR)[(HIGH) - 1];\ + (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\ + }\ + return;\ + }\ +\ + ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\ +\ + SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\ + SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\ +\ + ut_sort_low77 = (LOW);\ + ut_sort_high77 = ut_sort_mid77;\ +\ + for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\ +\ + if (ut_sort_low77 >= ut_sort_mid77) {\ + (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ + ut_sort_high77++;\ + } else if (ut_sort_high77 >= (HIGH)) {\ + (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ + ut_sort_low77++;\ + } else if (CMP_FUN((ARR)[ut_sort_low77],\ + (ARR)[ut_sort_high77]) > 0) {\ + (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ + ut_sort_high77++;\ + } else {\ + (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ + ut_sort_low77++;\ + }\ + }\ +\ + memcpy((void*) ((ARR) + (LOW)), (AUX_ARR) + (LOW),\ + ((HIGH) - (LOW)) * sizeof *(ARR));\ +}\ + + +#endif + diff --git a/storage/xtradb/include/ut0ut.h b/storage/xtradb/include/ut0ut.h new file mode 100644 index 00000000000..3ca14acd2ef --- /dev/null +++ b/storage/xtradb/include/ut0ut.h @@ -0,0 +1,328 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/********************************************************************** +Various utilities + +Created 1/20/1994 Heikki Tuuri +***********************************************************************/ + +#ifndef ut0ut_h +#define ut0ut_h + +#include "univ.i" +#include <time.h> +#ifndef MYSQL_SERVER +#include <ctype.h> +#endif + +#define TEMP_INDEX_PREFIX '\377' /* Index name prefix in fast index + creation */ + +typedef time_t ib_time_t; + +/************************************************************************* +Delays execution for at most max_wait_us microseconds or returns earlier +if cond becomes true; cond is evaluated every 2 ms. */ + +#define UT_WAIT_FOR(cond, max_wait_us) \ +do { \ + ullint start_us; \ + start_us = ut_time_us(NULL); \ + while (!(cond) \ + && ut_time_us(NULL) - start_us < (max_wait_us)) {\ + \ + os_thread_sleep(2000 /* 2 ms */); \ + } \ +} while (0) + +/************************************************************ +Gets the high 32 bits in a ulint. That is makes a shift >> 32, +but since there seem to be compiler bugs in both gcc and Visual C++, +we do this by a special conversion. */ +UNIV_INTERN +ulint +ut_get_high32( +/*==========*/ + /* out: a >> 32 */ + ulint a); /* in: ulint */ +/********************************************************** +Calculates the minimum of two ulints. */ +UNIV_INLINE +ulint +ut_min( +/*===*/ + /* out: minimum */ + ulint n1, /* in: first number */ + ulint n2); /* in: second number */ +/********************************************************** +Calculates the maximum of two ulints. */ +UNIV_INLINE +ulint +ut_max( +/*===*/ + /* out: maximum */ + ulint n1, /* in: first number */ + ulint n2); /* in: second number */ +/******************************************************************** +Calculates minimum of two ulint-pairs. */ +UNIV_INLINE +void +ut_pair_min( +/*========*/ + ulint* a, /* out: more significant part of minimum */ + ulint* b, /* out: less significant part of minimum */ + ulint a1, /* in: more significant part of first pair */ + ulint b1, /* in: less significant part of first pair */ + ulint a2, /* in: more significant part of second pair */ + ulint b2); /* in: less significant part of second pair */ +/********************************************************** +Compares two ulints. */ +UNIV_INLINE +int +ut_ulint_cmp( +/*=========*/ + /* out: 1 if a > b, 0 if a == b, -1 if a < b */ + ulint a, /* in: ulint */ + ulint b); /* in: ulint */ +/*********************************************************** +Compares two pairs of ulints. */ +UNIV_INLINE +int +ut_pair_cmp( +/*========*/ + /* out: -1 if a < b, 0 if a == b, + 1 if a > b */ + ulint a1, /* in: more significant part of first pair */ + ulint a2, /* in: less significant part of first pair */ + ulint b1, /* in: more significant part of second pair */ + ulint b2); /* in: less significant part of second pair */ +/***************************************************************** +Determines if a number is zero or a power of two. */ +#define ut_is_2pow(n) UNIV_LIKELY(!((n) & ((n) - 1))) +/***************************************************************** +Calculates fast the remainder of n/m when m is a power of two. */ +#define ut_2pow_remainder(n, m) ((n) & ((m) - 1)) +/***************************************************************** +Calculates the biggest multiple of m that is not bigger than n +when m is a power of two. In other words, rounds n down to m * k. */ +#define ut_2pow_round(n, m) ((n) & ~((m) - 1)) +#define ut_calc_align_down(n, m) ut_2pow_round(n, m) +/************************************************************ +Calculates the smallest multiple of m that is not smaller than n +when m is a power of two. In other words, rounds n up to m * k. */ +#define ut_calc_align(n, m) (((n) + ((m) - 1)) & ~((m) - 1)) +/***************************************************************** +Calculates fast the 2-logarithm of a number, rounded upward to an +integer. */ +UNIV_INLINE +ulint +ut_2_log( +/*=====*/ + /* out: logarithm in the base 2, rounded upward */ + ulint n); /* in: number */ +/***************************************************************** +Calculates 2 to power n. */ +UNIV_INLINE +ulint +ut_2_exp( +/*=====*/ + /* out: 2 to power n */ + ulint n); /* in: number */ +/***************************************************************** +Calculates fast the number rounded up to the nearest power of 2. */ +UNIV_INTERN +ulint +ut_2_power_up( +/*==========*/ + /* out: first power of 2 which is >= n */ + ulint n) /* in: number != 0 */ + __attribute__((const)); + +/* Determine how many bytes (groups of 8 bits) are needed to +store the given number of bits. */ +#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8) + +/************************************************************** +Returns system time. We do not specify the format of the time returned: +the only way to manipulate it is to use the function ut_difftime. */ +UNIV_INTERN +ib_time_t +ut_time(void); +/*=========*/ +/************************************************************** +Returns system time. +Upon successful completion, the value 0 is returned; otherwise the +value -1 is returned and the global variable errno is set to indicate the +error. */ +UNIV_INTERN +int +ut_usectime( +/*========*/ + /* out: 0 on success, -1 otherwise */ + ulint* sec, /* out: seconds since the Epoch */ + ulint* ms); /* out: microseconds since the Epoch+*sec */ + +/************************************************************** +Returns the number of microseconds since epoch. Similar to +time(3), the return value is also stored in *tloc, provided +that tloc is non-NULL. */ +UNIV_INTERN +ullint +ut_time_us( +/*=======*/ + /* out: us since epoch */ + ullint* tloc); /* out: us since epoch, if non-NULL */ + +/************************************************************** +Returns the difference of two times in seconds. */ +UNIV_INTERN +double +ut_difftime( +/*========*/ + /* out: time2 - time1 expressed in seconds */ + ib_time_t time2, /* in: time */ + ib_time_t time1); /* in: time */ +/************************************************************** +Prints a timestamp to a file. */ +UNIV_INTERN +void +ut_print_timestamp( +/*===============*/ + FILE* file); /* in: file where to print */ +/************************************************************** +Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ +UNIV_INTERN +void +ut_sprintf_timestamp( +/*=================*/ + char* buf); /* in: buffer where to sprintf */ +#ifdef UNIV_HOTBACKUP +/************************************************************** +Sprintfs a timestamp to a buffer with no spaces and with ':' characters +replaced by '_'. */ +UNIV_INTERN +void +ut_sprintf_timestamp_without_extra_chars( +/*=====================================*/ + char* buf); /* in: buffer where to sprintf */ +/************************************************************** +Returns current year, month, day. */ +UNIV_INTERN +void +ut_get_year_month_day( +/*==================*/ + ulint* year, /* out: current year */ + ulint* month, /* out: month */ + ulint* day); /* out: day */ +#endif /* UNIV_HOTBACKUP */ +/***************************************************************** +Runs an idle loop on CPU. The argument gives the desired delay +in microseconds on 100 MHz Pentium + Visual C++. */ +UNIV_INTERN +ulint +ut_delay( +/*=====*/ + /* out: dummy value */ + ulint delay); /* in: delay in microseconds on 100 MHz Pentium */ +/***************************************************************** +Prints the contents of a memory buffer in hex and ascii. */ +UNIV_INTERN +void +ut_print_buf( +/*=========*/ + FILE* file, /* in: file where to print */ + const void* buf, /* in: memory buffer */ + ulint len); /* in: length of the buffer */ + +/************************************************************************** +Outputs a NUL-terminated file name, quoted with apostrophes. */ +UNIV_INTERN +void +ut_print_filename( +/*==============*/ + FILE* f, /* in: output stream */ + const char* name); /* in: name to print */ + +/* Forward declaration of transaction handle */ +struct trx_struct; + +/************************************************************************** +Outputs a fixed-length string, quoted as an SQL identifier. +If the string contains a slash '/', the string will be +output as two identifiers separated by a period (.), +as in SQL database_name.identifier. */ +UNIV_INTERN +void +ut_print_name( +/*==========*/ + FILE* f, /* in: output stream */ + struct trx_struct*trx, /* in: transaction */ + ibool table_id,/* in: TRUE=print a table name, + FALSE=print other identifier */ + const char* name); /* in: name to print */ + +/************************************************************************** +Outputs a fixed-length string, quoted as an SQL identifier. +If the string contains a slash '/', the string will be +output as two identifiers separated by a period (.), +as in SQL database_name.identifier. */ +UNIV_INTERN +void +ut_print_namel( +/*===========*/ + FILE* f, /* in: output stream */ + struct trx_struct*trx, /* in: transaction (NULL=no quotes) */ + ibool table_id,/* in: TRUE=print a table name, + FALSE=print other identifier */ + const char* name, /* in: name to print */ + ulint namelen);/* in: length of name */ + +/************************************************************************** +Catenate files. */ +UNIV_INTERN +void +ut_copy_file( +/*=========*/ + FILE* dest, /* in: output file */ + FILE* src); /* in: input file to be appended to output */ + +/************************************************************************** +snprintf(). */ + +#ifdef __WIN__ +int +ut_snprintf( + /* out: number of characters that would + have been printed if the size were + unlimited, not including the terminating + '\0'. */ + char* str, /* out: string */ + size_t size, /* in: str size */ + const char* fmt, /* in: format */ + ...); /* in: format values */ +#else +#define ut_snprintf snprintf +#endif /* __WIN__ */ + +#ifndef UNIV_NONINL +#include "ut0ut.ic" +#endif + +#endif + diff --git a/storage/xtradb/include/ut0ut.ic b/storage/xtradb/include/ut0ut.ic new file mode 100644 index 00000000000..e4e0a2acce6 --- /dev/null +++ b/storage/xtradb/include/ut0ut.ic @@ -0,0 +1,161 @@ +/***************************************************************************** + +Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/****************************************************************** +Various utilities + +Created 5/30/1994 Heikki Tuuri +*******************************************************************/ + +/********************************************************** +Calculates the minimum of two ulints. */ +UNIV_INLINE +ulint +ut_min( +/*===*/ + /* out: minimum */ + ulint n1, /* in: first number */ + ulint n2) /* in: second number */ +{ + return((n1 <= n2) ? n1 : n2); +} + +/********************************************************** +Calculates the maximum of two ulints. */ +UNIV_INLINE +ulint +ut_max( +/*===*/ + /* out: maximum */ + ulint n1, /* in: first number */ + ulint n2) /* in: second number */ +{ + return((n1 <= n2) ? n2 : n1); +} + +/******************************************************************** +Calculates minimum of two ulint-pairs. */ +UNIV_INLINE +void +ut_pair_min( +/*========*/ + ulint* a, /* out: more significant part of minimum */ + ulint* b, /* out: less significant part of minimum */ + ulint a1, /* in: more significant part of first pair */ + ulint b1, /* in: less significant part of first pair */ + ulint a2, /* in: more significant part of second pair */ + ulint b2) /* in: less significant part of second pair */ +{ + if (a1 == a2) { + *a = a1; + *b = ut_min(b1, b2); + } else if (a1 < a2) { + *a = a1; + *b = b1; + } else { + *a = a2; + *b = b2; + } +} + +/********************************************************** +Compares two ulints. */ +UNIV_INLINE +int +ut_ulint_cmp( +/*=========*/ + /* out: 1 if a > b, 0 if a == b, -1 if a < b */ + ulint a, /* in: ulint */ + ulint b) /* in: ulint */ +{ + if (a < b) { + return(-1); + } else if (a == b) { + return(0); + } else { + return(1); + } +} + +/*********************************************************** +Compares two pairs of ulints. */ +UNIV_INLINE +int +ut_pair_cmp( +/*========*/ + /* out: -1 if a < b, 0 if a == b, 1 if a > b */ + ulint a1, /* in: more significant part of first pair */ + ulint a2, /* in: less significant part of first pair */ + ulint b1, /* in: more significant part of second pair */ + ulint b2) /* in: less significant part of second pair */ +{ + if (a1 > b1) { + return(1); + } else if (a1 < b1) { + return(-1); + } else if (a2 > b2) { + return(1); + } else if (a2 < b2) { + return(-1); + } else { + return(0); + } +} + +/***************************************************************** +Calculates fast the 2-logarithm of a number, rounded upward to an +integer. */ +UNIV_INLINE +ulint +ut_2_log( +/*=====*/ + /* out: logarithm in the base 2, rounded upward */ + ulint n) /* in: number != 0 */ +{ + ulint res; + + res = 0; + + ut_ad(n > 0); + + n = n - 1; + + for (;;) { + n = n / 2; + + if (n == 0) { + break; + } + + res++; + } + + return(res + 1); +} + +/***************************************************************** +Calculates 2 to power n. */ +UNIV_INLINE +ulint +ut_2_exp( +/*=====*/ + /* out: 2 to power n */ + ulint n) /* in: number */ +{ + return((ulint) 1 << n); +} diff --git a/storage/xtradb/include/ut0vec.h b/storage/xtradb/include/ut0vec.h new file mode 100644 index 00000000000..aeb7e168dc6 --- /dev/null +++ b/storage/xtradb/include/ut0vec.h @@ -0,0 +1,117 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +#ifndef IB_VECTOR_H +#define IB_VECTOR_H + +#include "univ.i" +#include "mem0mem.h" + +typedef struct ib_vector_struct ib_vector_t; + +/* An automatically resizing vector datatype with the following properties: + + -Contains void* items. + + -The items are owned by the caller. + + -All memory allocation is done through a heap owned by the caller, who is + responsible for freeing it when done with the vector. + + -When the vector is resized, the old memory area is left allocated since it + uses the same heap as the new memory area, so this is best used for + relatively small or short-lived uses. +*/ + +/******************************************************************** +Create a new vector with the given initial size. */ +UNIV_INTERN +ib_vector_t* +ib_vector_create( +/*=============*/ + /* out: vector */ + mem_heap_t* heap, /* in: heap */ + ulint size); /* in: initial size */ + +/******************************************************************** +Push a new element to the vector, increasing its size if necessary. */ +UNIV_INTERN +void +ib_vector_push( +/*===========*/ + ib_vector_t* vec, /* in: vector */ + void* elem); /* in: data element */ + +/******************************************************************** +Get the number of elements in the vector. */ +UNIV_INLINE +ulint +ib_vector_size( +/*===========*/ + /* out: number of elements in vector */ + const ib_vector_t* vec); /* in: vector */ + +/******************************************************************** +Test whether a vector is empty or not. */ +UNIV_INLINE +ibool +ib_vector_is_empty( +/*===============*/ + /* out: TRUE if empty */ + const ib_vector_t* vec); /* in: vector */ + +/******************************************************************** +Get the n'th element. */ +UNIV_INLINE +void* +ib_vector_get( +/*==========*/ + /* out: n'th element */ + ib_vector_t* vec, /* in: vector */ + ulint n); /* in: element index to get */ + +/******************************************************************** +Remove the last element from the vector. */ +UNIV_INLINE +void* +ib_vector_pop( +/*==========*/ + ib_vector_t* vec); /* in: vector */ + +/******************************************************************** +Free the underlying heap of the vector. Note that vec is invalid +after this call. */ +UNIV_INLINE +void +ib_vector_free( +/*===========*/ + ib_vector_t* vec); /* in,own: vector */ + +/* See comment at beginning of file. */ +struct ib_vector_struct { + mem_heap_t* heap; /* heap */ + void** data; /* data elements */ + ulint used; /* number of elements currently used */ + ulint total; /* number of elements allocated */ +}; + +#ifndef UNIV_NONINL +#include "ut0vec.ic" +#endif + +#endif diff --git a/storage/xtradb/include/ut0vec.ic b/storage/xtradb/include/ut0vec.ic new file mode 100644 index 00000000000..b0e853717e3 --- /dev/null +++ b/storage/xtradb/include/ut0vec.ic @@ -0,0 +1,88 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************** +Get number of elements in vector. */ +UNIV_INLINE +ulint +ib_vector_size( +/*===========*/ + /* out: number of elements in vector */ + const ib_vector_t* vec) /* in: vector */ +{ + return(vec->used); +} + +/******************************************************************** +Get n'th element. */ +UNIV_INLINE +void* +ib_vector_get( +/*==========*/ + /* out: n'th element */ + ib_vector_t* vec, /* in: vector */ + ulint n) /* in: element index to get */ +{ + ut_a(n < vec->used); + + return(vec->data[n]); +} + +/******************************************************************** +Remove the last element from the vector. */ +UNIV_INLINE +void* +ib_vector_pop( +/*==========*/ + /* out: last vector element */ + ib_vector_t* vec) /* in/out: vector */ +{ + void* elem; + + ut_a(vec->used > 0); + --vec->used; + elem = vec->data[vec->used]; + + ut_d(vec->data[vec->used] = NULL); + UNIV_MEM_INVALID(&vec->data[vec->used], sizeof(*vec->data)); + + return(elem); +} + +/******************************************************************** +Free the underlying heap of the vector. Note that vec is invalid +after this call. */ +UNIV_INLINE +void +ib_vector_free( +/*===========*/ + ib_vector_t* vec) /* in, own: vector */ +{ + mem_heap_free(vec->heap); +} + +/******************************************************************** +Test whether a vector is empty or not. */ +UNIV_INLINE +ibool +ib_vector_is_empty( +/*===============*/ /* out: TRUE if empty else FALSE */ + const ib_vector_t* vec) /* in vector to test */ +{ + return(ib_vector_size(vec) == 0); +} diff --git a/storage/xtradb/include/ut0wqueue.h b/storage/xtradb/include/ut0wqueue.h new file mode 100644 index 00000000000..6bb80dad532 --- /dev/null +++ b/storage/xtradb/include/ut0wqueue.h @@ -0,0 +1,77 @@ +/***************************************************************************** + +Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/*********************************************************************** +A Work queue. Threads can add work items to the queue and other threads can +wait for work items to be available and take them off the queue for +processing. +************************************************************************/ + +#ifndef IB_WORK_QUEUE_H +#define IB_WORK_QUEUE_H + +#include "ut0list.h" +#include "mem0mem.h" +#include "os0sync.h" +#include "sync0types.h" + +typedef struct ib_wqueue_struct ib_wqueue_t; + +/******************************************************************** +Create a new work queue. */ +UNIV_INTERN +ib_wqueue_t* +ib_wqueue_create(void); +/*===================*/ + /* out: work queue */ + +/******************************************************************** +Free a work queue. */ +UNIV_INTERN +void +ib_wqueue_free( +/*===========*/ + ib_wqueue_t* wq); /* in: work queue */ + +/******************************************************************** +Add a work item to the queue. */ +UNIV_INTERN +void +ib_wqueue_add( +/*==========*/ + ib_wqueue_t* wq, /* in: work queue */ + void* item, /* in: work item */ + mem_heap_t* heap); /* in: memory heap to use for allocating the + list node */ + +/******************************************************************** +Wait for a work item to appear in the queue. */ +UNIV_INTERN +void* +ib_wqueue_wait( + /* out: work item */ + ib_wqueue_t* wq); /* in: work queue */ + +/* Work queue. */ +struct ib_wqueue_struct { + mutex_t mutex; /* mutex protecting everything */ + ib_list_t* items; /* work item list */ + os_event_t event; /* event we use to signal additions to list */ +}; + +#endif |