diff options
Diffstat (limited to 'storage/innobase')
274 files changed, 0 insertions, 165224 deletions
diff --git a/storage/innobase/CMakeLists.txt b/storage/innobase/CMakeLists.txt deleted file mode 100755 index 249a600834d..00000000000 --- a/storage/innobase/CMakeLists.txt +++ /dev/null @@ -1,92 +0,0 @@ -# Copyright (C) 2006 MySQL AB -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - -SET(CMAKE_CXX_FLAGS_DEBUG "${CMAKE_CXX_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -SET(CMAKE_C_FLAGS_DEBUG "${CMAKE_C_FLAGS_DEBUG} -DSAFEMALLOC -DSAFE_MUTEX") -ADD_DEFINITIONS(-DMYSQL_SERVER -D_WIN32 -D_LIB) - -# Bug 19424 - InnoDB: Possibly a memory overrun of the buffer being freed (64-bit Visual C) -# Removing Win64 compiler optimizations for all innodb/mem/* files. -IF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8) - SET_SOURCE_FILES_PROPERTIES(${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0mem.c - ${CMAKE_SOURCE_DIR}/storage/innobase/mem/mem0pool.c - PROPERTIES COMPILE_FLAGS -Od) -ENDIF(CMAKE_GENERATOR MATCHES "Visual Studio" AND CMAKE_SIZEOF_VOID_P MATCHES 8) - -IF (WIN32) - IF (NOT WITHOUT_ATOMICS) -# Check if this Windows version supports atomic instructions - IF (CMAKE_SIZEOF_VOID_P MATCHES 8) -# Check for 64 bit atomics - TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics64_test.c) - IF (COMPILE_RES AND NOT RUN_RES) - MESSAGE("Adding support for Win64 atomics") - ADD_DEFINITIONS(-DWIN_ATOMICS64) - ENDIF (COMPILE_RES AND NOT RUN_RES) - ELSE (CMAKE_SIZEOF_VOID_P MATCHES 8) -# Check for 32 bit atomics - TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR} - ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics32_test.c) - IF (COMPILE_RES AND NOT RUN_RES) - MESSAGE("Adding support for Win32 atomics") - ADD_DEFINITIONS(-DWIN_ATOMICS32) - ENDIF (COMPILE_RES AND NOT RUN_RES) - ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8) - ENDIF (NOT WITHOUT_ATOMICS) -ENDIF (WIN32) -INCLUDE_DIRECTORIES(${CMAKE_SOURCE_DIR}/include ${CMAKE_SOURCE_DIR}/zlib - ${CMAKE_SOURCE_DIR}/storage/innobase/include - ${CMAKE_SOURCE_DIR}/storage/innobase/handler - ${CMAKE_SOURCE_DIR}/sql - ${CMAKE_SOURCE_DIR}/regex - ${CMAKE_SOURCE_DIR}/extra/yassl/include) - -SET(INNOBASE_SOURCES btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c - buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c - data/data0data.c data/data0type.c - dict/dict0boot.c dict/dict0crea.c dict/dict0dict.c dict/dict0load.c dict/dict0mem.c - dyn/dyn0dyn.c - eval/eval0eval.c eval/eval0proc.c - fil/fil0fil.c - fsp/fsp0fsp.c - fut/fut0fut.c fut/fut0lst.c - ha/ha0ha.c ha/hash0hash.c - ibuf/ibuf0ibuf.c - pars/lexyy.c pars/pars0grm.c pars/pars0opt.c pars/pars0pars.c pars/pars0sym.c - lock/lock0lock.c - log/log0log.c log/log0recv.c - mach/mach0data.c - mem/mem0mem.c mem/mem0pool.c - mtr/mtr0log.c mtr/mtr0mtr.c - os/os0file.c os/os0proc.c os/os0sync.c os/os0thread.c - page/page0cur.c page/page0page.c - que/que0que.c - handler/ha_innodb.cc - read/read0read.c - rem/rem0cmp.c rem/rem0rec.c - row/row0ins.c row/row0mysql.c row/row0purge.c row/row0row.c row/row0sel.c row/row0uins.c - row/row0umod.c row/row0undo.c row/row0upd.c row/row0vers.c - srv/srv0que.c srv/srv0srv.c srv/srv0start.c - sync/sync0arr.c sync/sync0rw.c sync/sync0sync.c - thr/thr0loc.c - trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c - usr/usr0sess.c - ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c ut/ut0list.c ut/ut0wqueue.c) - -IF(NOT SOURCE_SUBLIBS) - ADD_LIBRARY(innobase ${INNOBASE_SOURCES}) - ADD_DEPENDENCIES(innobase GenError) -ENDIF(NOT SOURCE_SUBLIBS) diff --git a/storage/innobase/Makefile.am b/storage/innobase/Makefile.am deleted file mode 100644 index 180d2ca0b87..00000000000 --- a/storage/innobase/Makefile.am +++ /dev/null @@ -1,175 +0,0 @@ -# Copyright (C) 2001, 2004, 2006 MySQL AB & Innobase Oy -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License -# along with this program; if not, write to the Free Software -# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -# Process this file with automake to create Makefile.in - -MYSQLDATAdir= $(localstatedir) -MYSQLSHAREdir= $(pkgdatadir) -MYSQLBASEdir= $(prefix) -MYSQLLIBdir= $(pkglibdir) -pkgplugindir= $(pkglibdir)/plugin -INCLUDES= -I$(top_srcdir)/include -I$(top_builddir)/include \ - -I$(top_srcdir)/regex \ - -I$(top_srcdir)/storage/innobase/include \ - -I$(top_srcdir)/sql \ - -I$(srcdir) - -DEFS= @DEFS@ - - -noinst_HEADERS= include/btr0btr.h include/btr0btr.ic \ - include/btr0cur.h include/btr0cur.ic \ - include/btr0pcur.h include/btr0pcur.ic \ - include/btr0sea.h include/btr0sea.ic \ - include/btr0types.h include/buf0buf.h \ - include/buf0buf.ic include/buf0flu.h \ - include/buf0flu.ic include/buf0lru.h \ - include/buf0lru.ic include/buf0rea.h \ - include/buf0types.h include/data0data.h \ - include/data0data.ic include/data0type.h \ - include/data0type.ic include/data0types.h \ - include/db0err.h include/dict0boot.h \ - include/dict0boot.ic include/dict0crea.h \ - include/dict0crea.ic include/dict0dict.h \ - include/dict0dict.ic include/dict0load.h \ - include/dict0load.ic include/dict0mem.h \ - include/dict0mem.ic include/dict0types.h \ - include/dyn0dyn.h include/dyn0dyn.ic \ - include/eval0eval.h include/eval0eval.ic \ - include/eval0proc.h include/eval0proc.ic \ - include/fil0fil.h include/fsp0fsp.h \ - include/fsp0fsp.ic include/fut0fut.h \ - include/fut0fut.ic include/fut0lst.h \ - include/fut0lst.ic include/ha0ha.h \ - include/ha0ha.ic include/hash0hash.h \ - include/hash0hash.ic include/ibuf0ibuf.h \ - include/ibuf0ibuf.ic include/ibuf0types.h \ - include/lock0iter.h \ - include/lock0lock.h include/lock0lock.ic \ - include/lock0priv.h include/lock0priv.ic \ - include/lock0types.h include/log0log.h \ - include/log0log.ic include/log0recv.h \ - include/log0recv.ic include/mach0data.h \ - include/mach0data.ic include/mem0dbg.h \ - include/mem0dbg.ic mem/mem0dbg.c \ - include/mem0mem.h include/mem0mem.ic \ - include/mem0pool.h include/mem0pool.ic \ - include/mtr0log.h include/mtr0log.ic \ - include/mtr0mtr.h include/mtr0mtr.ic \ - include/mtr0types.h include/os0file.h \ - include/os0proc.h include/os0proc.ic \ - include/os0sync.h include/os0sync.ic \ - include/os0thread.h include/os0thread.ic \ - include/page0cur.h include/page0cur.ic \ - include/page0page.h include/page0page.ic \ - include/page0types.h include/pars0grm.h \ - include/pars0opt.h include/pars0opt.ic \ - include/pars0pars.h include/pars0pars.ic \ - include/pars0sym.h include/pars0sym.ic \ - include/pars0types.h include/que0que.h \ - include/que0que.ic include/que0types.h \ - include/read0read.h include/read0read.ic \ - include/read0types.h include/rem0cmp.h \ - include/rem0cmp.ic include/rem0rec.h \ - include/rem0rec.ic include/rem0types.h \ - include/row0ins.h include/row0ins.ic \ - include/row0mysql.h include/row0mysql.ic \ - include/row0purge.h include/row0purge.ic \ - include/row0row.h include/row0row.ic \ - include/row0sel.h include/row0sel.ic \ - include/row0types.h include/row0uins.h \ - include/row0uins.ic include/row0umod.h \ - include/row0umod.ic include/row0undo.h \ - include/row0undo.ic include/row0upd.h \ - include/row0upd.ic include/row0vers.h \ - include/row0vers.ic include/srv0que.h \ - include/srv0srv.h include/srv0srv.ic \ - include/srv0start.h include/sync0arr.h \ - include/sync0arr.ic include/sync0rw.h \ - include/sync0rw.ic include/sync0sync.h \ - include/sync0sync.ic include/sync0types.h \ - include/thr0loc.h include/thr0loc.ic \ - include/trx0purge.h include/trx0purge.ic \ - include/trx0rec.h include/trx0rec.ic \ - include/trx0roll.h include/trx0roll.ic \ - include/trx0rseg.h include/trx0rseg.ic \ - include/trx0sys.h include/trx0sys.ic \ - include/trx0trx.h include/trx0trx.ic \ - include/trx0types.h include/trx0undo.h \ - include/trx0undo.ic include/trx0xa.h \ - include/univ.i include/usr0sess.h \ - include/usr0sess.ic include/usr0types.h \ - include/ut0byte.h include/ut0byte.ic \ - include/ut0dbg.h include/ut0lst.h \ - include/ut0mem.h include/ut0mem.ic \ - include/ut0rnd.h include/ut0rnd.ic \ - include/ut0sort.h include/ut0ut.h \ - include/ut0ut.ic include/ut0vec.h \ - include/ut0vec.ic include/ut0list.h \ - include/ut0list.ic include/ut0wqueue.h \ - include/ha_prototypes.h handler/ha_innodb.h - -EXTRA_LIBRARIES= libinnobase.a -noinst_LIBRARIES= @plugin_innobase_static_target@ -libinnobase_a_SOURCES= btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c \ - btr/btr0sea.c buf/buf0buf.c buf/buf0flu.c \ - buf/buf0lru.c buf/buf0rea.c data/data0data.c \ - data/data0type.c dict/dict0boot.c \ - dict/dict0crea.c dict/dict0dict.c \ - dict/dict0load.c dict/dict0mem.c dyn/dyn0dyn.c \ - eval/eval0eval.c eval/eval0proc.c \ - fil/fil0fil.c fsp/fsp0fsp.c fut/fut0fut.c \ - fut/fut0lst.c ha/ha0ha.c ha/hash0hash.c \ - ibuf/ibuf0ibuf.c lock/lock0iter.c \ - lock/lock0lock.c \ - log/log0log.c log/log0recv.c mach/mach0data.c \ - mem/mem0mem.c mem/mem0pool.c mtr/mtr0log.c \ - mtr/mtr0mtr.c os/os0file.c os/os0proc.c \ - os/os0sync.c os/os0thread.c page/page0cur.c \ - page/page0page.c pars/lexyy.c pars/pars0grm.c \ - pars/pars0opt.c pars/pars0pars.c \ - pars/pars0sym.c que/que0que.c read/read0read.c \ - rem/rem0cmp.c rem/rem0rec.c row/row0ins.c \ - row/row0mysql.c row/row0purge.c row/row0row.c \ - row/row0sel.c row/row0uins.c row/row0umod.c \ - row/row0undo.c row/row0upd.c row/row0vers.c \ - srv/srv0que.c srv/srv0srv.c srv/srv0start.c \ - sync/sync0arr.c sync/sync0rw.c \ - sync/sync0sync.c thr/thr0loc.c trx/trx0purge.c \ - trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c \ - trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c \ - usr/usr0sess.c ut/ut0byte.c ut/ut0dbg.c \ - ut/ut0list.c ut/ut0mem.c ut/ut0rnd.c \ - ut/ut0ut.c ut/ut0vec.c ut/ut0wqueue.c \ - handler/ha_innodb.cc - -libinnobase_a_CXXFLAGS= $(AM_CFLAGS) -libinnobase_a_CFLAGS= $(AM_CFLAGS) - -EXTRA_LTLIBRARIES= ha_innodb.la -pkgplugin_LTLIBRARIES= @plugin_innobase_shared_target@ - -ha_innodb_la_LDFLAGS= -module -rpath $(pkgplugindir) -ha_innodb_la_CXXFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_CFLAGS= $(AM_CFLAGS) $(INNODB_DYNAMIC_CFLAGS) -ha_innodb_la_SOURCES= $(libinnobase_a_SOURCES) - -EXTRA_DIST= CMakeLists.txt plug.in \ - pars/make_bison.sh pars/make_flex.sh \ - pars/pars0grm.y pars/pars0lex.l \ - win_atomics32_test.c win_atomics64_test.c - -# Don't update the files from bitkeeper -%::SCCS/s.% diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c deleted file mode 100644 index 6e8b43aeb8d..00000000000 --- a/storage/innobase/btr/btr0btr.c +++ /dev/null @@ -1,3077 +0,0 @@ -/****************************************************** -The B-tree - -(c) 1994-1996 Innobase Oy - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0btr.h" - -#ifdef UNIV_NONINL -#include "btr0btr.ic" -#endif - -#include "fsp0fsp.h" -#include "page0page.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "btr0pcur.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "ibuf0ibuf.h" -#include "trx0trx.h" - -/* -Latching strategy of the InnoDB B-tree --------------------------------------- -A tree latch protects all non-leaf nodes of the tree. Each node of a tree -also has a latch of its own. - -A B-tree operation normally first acquires an S-latch on the tree. It -searches down the tree and releases the tree latch when it has the -leaf node latch. To save CPU time we do not acquire any latch on -non-leaf nodes of the tree during a search, those pages are only bufferfixed. - -If an operation needs to restructure the tree, it acquires an X-latch on -the tree before searching to a leaf node. If it needs, for example, to -split a leaf, -(1) InnoDB decides the split point in the leaf, -(2) allocates a new page, -(3) inserts the appropriate node pointer to the first non-leaf level, -(4) releases the tree X-latch, -(5) and then moves records from the leaf to the new allocated page. - -Node pointers -------------- -Leaf pages of a B-tree contain the index records stored in the -tree. On levels n > 0 we store 'node pointers' to pages on level -n - 1. For each page there is exactly one node pointer stored: -thus the our tree is an ordinary B-tree, not a B-link tree. - -A node pointer contains a prefix P of an index record. The prefix -is long enough so that it determines an index record uniquely. -The file page number of the child page is added as the last -field. To the child page we can store node pointers or index records -which are >= P in the alphabetical order, but < P1 if there is -a next node pointer on the level, and P1 is its prefix. - -If a node pointer with a prefix P points to a non-leaf child, -then the leftmost record in the child must have the same -prefix P. If it points to a leaf node, the child is not required -to contain any record with a prefix equal to P. The leaf case -is decided this way to allow arbitrary deletions in a leaf node -without touching upper levels of the tree. - -We have predefined a special minimum record which we -define as the smallest record in any alphabetical order. -A minimum record is denoted by setting a bit in the record -header. A minimum record acts as the prefix of a node pointer -which points to a leftmost node on any level of the tree. - -File page allocation --------------------- -In the root node of a B-tree there are two file segment headers. -The leaf pages of a tree are allocated from one file segment, to -make them consecutive on disk if possible. From the other file segment -we allocate pages for the non-leaf levels of the tree. -*/ - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that -mtr holds an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_node_ptr( -/*=========================*/ - /* out: pointer to node pointer record */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Empties an index page. */ -static -void -btr_page_empty( -/*===========*/ - page_t* page, /* in: page to be emptied */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Returns TRUE if the insert fits on the appropriate half-page -with the chosen split_rec. */ -static -ibool -btr_page_insert_fits( -/*=================*/ - /* out: TRUE if fits */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - rec_t* split_rec, /* in: suggestion for first record - on upper half-page, or NULL if - tuple should be first */ - const ulint* offsets, /* in: rec_get_offsets( - split_rec, cursor->index) */ - dtuple_t* tuple, /* in: tuple to insert */ - mem_heap_t* heap); /* in: temporary memory heap */ - -/****************************************************************** -Gets the root node of a tree and x-latches it. */ - -page_t* -btr_root_get( -/*=========*/ - /* out: root page, x-latched */ - dict_index_t* index, /* in: index tree */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - ulint root_page_no; - page_t* root; - - space = dict_index_get_space(index); - root_page_no = dict_index_get_page(index); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - ut_a((ibool)!!page_is_comp(root) == dict_table_is_comp(index->table)); - - return(root); -} - -/***************************************************************** -Gets pointer to the previous user record in the tree. It is assumed that -the caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_prev_user_rec( -/*==================*/ - /* out: previous user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr) /* in: mtr holding a latch on the page, and if - needed, also to the previous page */ -{ - page_t* page; - page_t* prev_page; - ulint prev_page_no; - ulint space; - - if (!page_rec_is_infimum(rec)) { - - rec_t* prev_rec = page_rec_get_prev(rec); - - if (!page_rec_is_infimum(prev_rec)) { - - return(prev_rec); - } - } - - page = buf_frame_align(rec); - prev_page_no = btr_page_get_prev(page, mtr); - space = buf_frame_get_space_id(page); - - if (prev_page_no != FIL_NULL) { - - prev_page = buf_page_get_with_no_latch(space, prev_page_no, - mtr); - /* The caller must already have a latch to the brother */ - ut_ad((mtr_memo_contains(mtr, buf_block_align(prev_page), - MTR_MEMO_PAGE_S_FIX)) - || (mtr_memo_contains(mtr, buf_block_align(prev_page), - MTR_MEMO_PAGE_X_FIX))); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - return(page_rec_get_prev(page_get_supremum_rec(prev_page))); - } - - return(NULL); -} - -/***************************************************************** -Gets pointer to the next user record in the tree. It is assumed that the -caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_next_user_rec( -/*==================*/ - /* out: next user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr) /* in: mtr holding a latch on the page, and if - needed, also to the next page */ -{ - page_t* page; - page_t* next_page; - ulint next_page_no; - ulint space; - - if (!page_rec_is_supremum(rec)) { - - rec_t* next_rec = page_rec_get_next(rec); - - if (!page_rec_is_supremum(next_rec)) { - - return(next_rec); - } - } - - page = buf_frame_align(rec); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - if (next_page_no != FIL_NULL) { - - next_page = buf_page_get_with_no_latch(space, next_page_no, - mtr); - /* The caller must already have a latch to the brother */ - ut_ad((mtr_memo_contains(mtr, buf_block_align(next_page), - MTR_MEMO_PAGE_S_FIX)) - || (mtr_memo_contains(mtr, buf_block_align(next_page), - MTR_MEMO_PAGE_X_FIX))); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - ut_a(page_is_comp(next_page) == page_is_comp(page)); - return(page_rec_get_next(page_get_infimum_rec(next_page))); - } - - return(NULL); -} - -/****************************************************************** -Creates a new index page (not the root, and also not -used in page reorganization). */ -static -void -btr_page_create( -/*============*/ - page_t* page, /* in: page to be created */ - dict_index_t* index, /* in: index */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - page_create(page, mtr, dict_table_is_comp(index->table)); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - btr_page_set_index_id(page, index->id, mtr); -} - -/****************************************************************** -Allocates a new file page to be used in an ibuf tree. Takes the page from -the free list of the tree, which must contain pages! */ -static -page_t* -btr_page_alloc_for_ibuf( -/*====================*/ - /* out: new allocated page, x-latched */ - dict_index_t* index, /* in: index tree */ - mtr_t* mtr) /* in: mtr */ -{ - fil_addr_t node_addr; - page_t* root; - page_t* new_page; - - root = btr_root_get(index, mtr); - - node_addr = flst_get_first(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, mtr); - ut_a(node_addr.page != FIL_NULL); - - new_page = buf_page_get(dict_index_get_space(index), node_addr.page, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - new_page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, - mtr); - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); - - return(new_page); -} - -/****************************************************************** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! */ - -page_t* -btr_page_alloc( -/*===========*/ - /* out: new allocated page, x-latched; - NULL if out of space */ - dict_index_t* index, /* in: index */ - ulint hint_page_no, /* in: hint of a good page */ - byte file_direction, /* in: direction where a possible - page split is made */ - ulint level, /* in: level where the page is placed - in the tree */ - mtr_t* mtr) /* in: mtr */ -{ - fseg_header_t* seg_header; - page_t* root; - page_t* new_page; - ulint new_page_no; - - if (index->type & DICT_IBUF) { - - return(btr_page_alloc_for_ibuf(index, mtr)); - } - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - /* Parameter TRUE below states that the caller has made the - reservation for free extents, and thus we know that a page can - be allocated: */ - - new_page_no = fseg_alloc_free_page_general(seg_header, hint_page_no, - file_direction, TRUE, mtr); - if (new_page_no == FIL_NULL) { - - return(NULL); - } - - new_page = buf_page_get(dict_index_get_space(index), new_page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(new_page, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - return(new_page); -} - -/****************************************************************** -Gets the number of pages in a B-tree. */ - -ulint -btr_get_size( -/*=========*/ - /* out: number of pages */ - dict_index_t* index, /* in: index */ - ulint flag) /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ -{ - fseg_header_t* seg_header; - page_t* root; - ulint n; - ulint dummy; - mtr_t mtr; - - mtr_start(&mtr); - - mtr_s_lock(dict_index_get_lock(index), &mtr); - - root = btr_root_get(index, &mtr); - - if (flag == BTR_N_LEAF_PAGES) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - fseg_n_reserved_pages(seg_header, &n, &mtr); - - } else if (flag == BTR_TOTAL_SIZE) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - n = fseg_n_reserved_pages(seg_header, &dummy, &mtr); - - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - n += fseg_n_reserved_pages(seg_header, &dummy, &mtr); - } else { - ut_error; - } - - mtr_commit(&mtr); - - return(n); -} - -/****************************************************************** -Frees a page used in an ibuf tree. Puts the page to the free list of the -ibuf tree. */ -static -void -btr_page_free_for_ibuf( -/*===================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* root; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - root = btr_root_get(index, mtr); - - flst_add_first(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, mtr); - - ut_ad(flst_validate(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - mtr)); -} - -/****************************************************************** -Frees a file page used in an index tree. Can be used also to (BLOB) -external storage pages, because the page level 0 can be given as an -argument. */ - -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - ulint level, /* in: page level */ - mtr_t* mtr) /* in: mtr */ -{ - fseg_header_t* seg_header; - page_t* root; - ulint space; - ulint page_no; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* The page gets invalid for optimistic searches: increment the frame - modify clock */ - - buf_frame_modify_clock_inc(page); - - if (index->type & DICT_IBUF) { - - btr_page_free_for_ibuf(index, page, mtr); - - return; - } - - root = btr_root_get(index, mtr); - - if (level == 0) { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - } else { - seg_header = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - } - - space = buf_frame_get_space_id(page); - page_no = buf_frame_get_page_no(page); - - fseg_free_page(seg_header, space, page_no, mtr); -} - -/****************************************************************** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ - -void -btr_page_free( -/*==========*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - ulint level; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - level = btr_page_get_level(page, mtr); - - btr_page_free_low(index, page, level, mtr); -} - -/****************************************************************** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /* in: node pointer record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint page_no,/* in: child node address */ - mtr_t* mtr) /* in: mtr */ -{ - byte* field; - ulint len; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); - ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); - - /* The child address is in the last field */ - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, &len); - - ut_ad(len == 4); - - mlog_write_ulint(field, page_no, MLOG_4BYTES, mtr); -} - -/**************************************************************** -Returns the child page of a node pointer and x-latches it. */ -static -page_t* -btr_node_ptr_get_child( -/*===================*/ - /* out: child page, x-latched */ - rec_t* node_ptr,/* in: node pointer */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - mtr_t* mtr) /* in: mtr */ -{ - ulint page_no; - ulint space; - page_t* page; - - ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); - space = buf_frame_get_space_id(node_ptr); - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - - page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - - return(page); -} - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that mtr holds -an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_for_rec( -/*========================*/ - /* out: pointer to node pointer record, - its page x-latched */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - rec_t* user_rec,/* in: user_record on page */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - dtuple_t* tuple; - btr_cur_t cursor; - rec_t* node_ptr; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_a(page_rec_is_user_rec(user_rec)); - - ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); - - heap = mem_heap_create(100); - - tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, - btr_page_get_level(page, mtr)); - - btr_cur_search_to_nth_level(index, - btr_page_get_level(page, mtr) + 1, - tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); - - node_ptr = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - - if (UNIV_UNLIKELY(btr_node_ptr_get_child_page_no(node_ptr, offsets) - != buf_frame_get_page_no(page))) { - rec_t* print_rec; - fputs("InnoDB: Dump of the child page:\n", stderr); - buf_page_print(buf_frame_align(page)); - fputs("InnoDB: Dump of the parent page:\n", stderr); - buf_page_print(buf_frame_align(node_ptr)); - - fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, TRUE, index->table_name); - fputs(", index ", stderr); - ut_print_name(stderr, NULL, FALSE, index->name); - fprintf(stderr, ",\n" - "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) - btr_node_ptr_get_child_page_no(node_ptr, offsets), - (ulong) buf_frame_get_page_no(page)); - print_rec = page_rec_get_next(page_get_infimum_rec(page)); - offsets = rec_get_offsets(print_rec, index, - offsets, ULINT_UNDEFINED, &heap); - page_rec_print(print_rec, offsets); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(node_ptr, offsets); - - fputs("InnoDB: You should dump + drop + reimport the table" - " to fix the\n" - "InnoDB: corruption. If the crash happens at " - "the database startup, see\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html about\n" - "InnoDB: forcing recovery. " - "Then dump + drop + reimport.\n", stderr); - } - - ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) - == buf_frame_get_page_no(page)); - mem_heap_free(heap); - - return(node_ptr); -} - -/**************************************************************** -Returns the upper level node pointer to a page. It is assumed that -mtr holds an x-latch on the tree. */ -static -rec_t* -btr_page_get_father_node_ptr( -/*=========================*/ - /* out: pointer to node pointer record */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page: must contain at least one - user record */ - mtr_t* mtr) /* in: mtr */ -{ - return(btr_page_get_father_for_rec( - index, page, - page_rec_get_next(page_get_infimum_rec(page)), mtr)); -} - -/**************************************************************** -Creates the root node for a new index tree. */ - -ulint -btr_create( -/*=======*/ - /* out: page number of the created root, FIL_NULL if - did not succeed */ - ulint type, /* in: type of the index */ - ulint space, /* in: space where created */ - dulint index_id,/* in: index id */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint page_no; - buf_frame_t* ibuf_hdr_frame; - buf_frame_t* frame; - page_t* page; - - /* Create the two new segments (one, in the case of an ibuf tree) for - the index tree; the segment headers are put on the allocated root page - (for an ibuf tree, not in the root, but on a separate ibuf header - page) */ - - if (type & DICT_IBUF) { - /* Allocate first the ibuf header page */ - ibuf_hdr_frame = fseg_create( - space, 0, IBUF_HEADER + IBUF_TREE_SEG_HEADER, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(ibuf_hdr_frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_frame_get_page_no(ibuf_hdr_frame) - == IBUF_HEADER_PAGE_NO); - /* Allocate then the next page to the segment: it will be the - tree root page */ - - page_no = fseg_alloc_free_page(ibuf_hdr_frame + IBUF_HEADER - + IBUF_TREE_SEG_HEADER, - IBUF_TREE_ROOT_PAGE_NO, - FSP_UP, mtr); - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); - - frame = buf_page_get(space, page_no, RW_X_LATCH, mtr); - } else { - frame = fseg_create(space, 0, PAGE_HEADER + PAGE_BTR_SEG_TOP, - mtr); - } - - if (frame == NULL) { - - return(FIL_NULL); - } - - page_no = buf_frame_get_page_no(frame); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - if (type & DICT_IBUF) { - /* It is an insert buffer tree: initialize the free list */ - - ut_ad(page_no == IBUF_TREE_ROOT_PAGE_NO); - - flst_init(frame + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, mtr); - } else { - /* It is a non-ibuf tree: create a file segment for leaf - pages */ - fseg_create(space, page_no, PAGE_HEADER + PAGE_BTR_SEG_LEAF, - mtr); - /* The fseg create acquires a second latch on the page, - therefore we must declare it: */ -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(frame, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - } - - /* Create a new index page on the the allocated segment page */ - page = page_create(frame, mtr, comp); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - /* Set the index id of the page */ - btr_page_set_index_id(page, index_id, mtr); - - /* Set the level of the new index page */ - btr_page_set_level(page, 0, mtr); - - /* Set the next node and previous node fields */ - btr_page_set_next(page, FIL_NULL, mtr); - btr_page_set_prev(page, FIL_NULL, mtr); - - /* We reset the free bits for the page to allow creation of several - trees in the same mtr, otherwise the latch on a bitmap page would - prevent it because of the latching order */ - - ibuf_reset_free_bits_with_type(type, page); - - /* In the following assertion we test that two records of maximum - allowed size fit on the root page: this fact is needed to ensure - correctness of split algorithms */ - - ut_ad(page_get_max_insert_size(page, 2) > 2 * BTR_PAGE_MAX_REC_SIZE); - - return(page_no); -} - -/**************************************************************** -Frees a B-tree except the root page, which MUST be freed after this -by calling btr_free_root. */ - -void -btr_free_but_not_root( -/*==================*/ - ulint space, /* in: space where created */ - ulint root_page_no) /* in: root page number */ -{ - ibool finished; - page_t* root; - mtr_t mtr; - -leaf_loop: - mtr_start(&mtr); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); - - /* NOTE: page hash indexes are dropped when a page is freed inside - fsp0fsp. */ - - finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_LEAF, - &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto leaf_loop; - } -top_loop: - mtr_start(&mtr); - - root = btr_page_get(space, root_page_no, RW_X_LATCH, &mtr); - - finished = fseg_free_step_not_header( - root + PAGE_HEADER + PAGE_BTR_SEG_TOP, &mtr); - mtr_commit(&mtr); - - if (!finished) { - - goto top_loop; - } -} - -/**************************************************************** -Frees the B-tree root page. Other tree MUST already have been freed. */ - -void -btr_free_root( -/*==========*/ - ulint space, /* in: space where created */ - ulint root_page_no, /* in: root page number */ - mtr_t* mtr) /* in: a mini-transaction which has already - been started */ -{ - ibool finished; - page_t* root; - - root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); - - btr_search_drop_page_hash_index(root); -top_loop: - finished = fseg_free_step(root + PAGE_HEADER + PAGE_BTR_SEG_TOP, mtr); - if (!finished) { - - goto top_loop; - } -} - -/***************************************************************** -Reorganizes an index page. */ -static -void -btr_page_reorganize_low( -/*====================*/ - ibool recovery,/* in: TRUE if called in recovery: - locks should not be updated, i.e., - there cannot exist locks on the - page, and a hash index should not be - dropped: it cannot exist */ - page_t* page, /* in: page to be reorganized */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* new_page; - ulint log_mode; - ulint data_size1; - ulint data_size2; - ulint max_ins_size1; - ulint max_ins_size2; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - data_size1 = page_get_data_size(page); - max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); - - /* Write the log record */ - mlog_open_and_write_index(mtr, page, index, page_is_comp(page) - ? MLOG_COMP_PAGE_REORGANIZE - : MLOG_PAGE_REORGANIZE, 0); - - /* Turn logging off */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - - new_page = buf_frame_alloc(); - - /* Copy the old page to temporary space */ - buf_frame_copy(new_page, page); - - if (!recovery) { - btr_search_drop_page_hash_index(page); - } - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(page, mtr, page_is_comp(page)); - buf_block_align(page)->check_index_page_at_flush = TRUE; - - /* Copy the records from the temporary space to the recreated page; - do not copy the lock bits yet */ - - page_copy_rec_list_end_no_locks(page, new_page, - page_get_infimum_rec(new_page), - index, mtr); - /* Copy max trx id to recreated page */ - page_set_max_trx_id(page, page_get_max_trx_id(new_page)); - - if (!recovery) { - /* Update the record lock bitmaps */ - lock_move_reorganize_page(page, new_page); - } - - data_size2 = page_get_data_size(page); - max_ins_size2 = page_get_max_insert_size_after_reorganize(page, 1); - - if (data_size1 != data_size2 || max_ins_size1 != max_ins_size2) { - buf_page_print(page); - buf_page_print(new_page); - fprintf(stderr, - "InnoDB: Error: page old data size %lu" - " new data size %lu\n" - "InnoDB: Error: page old max ins size %lu" - " new max ins size %lu\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - (unsigned long) data_size1, (unsigned long) data_size2, - (unsigned long) max_ins_size1, - (unsigned long) max_ins_size2); - } - - buf_frame_free(new_page); - - /* Restore logging mode */ - mtr_set_log_mode(mtr, log_mode); -} - -/***************************************************************** -Reorganizes an index page. */ - -void -btr_page_reorganize( -/*================*/ - page_t* page, /* in: page to be reorganized */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - btr_page_reorganize_low(FALSE, page, index, mtr); -} - -/*************************************************************** -Parses a redo log record of reorganizing a page. */ - -byte* -btr_parse_page_reorganize( -/*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), - /* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - /* The record is empty, except for the record initial part */ - - if (page) { - btr_page_reorganize_low(TRUE, page, index, mtr); - } - - return(ptr); -} - -/***************************************************************** -Empties an index page. */ -static -void -btr_page_empty( -/*===========*/ - page_t* page, /* in: page to be emptied */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(page); - - /* Recreate the page: note that global data on page (possible - segment headers, next page-field, etc.) is preserved intact */ - - page_create(page, mtr, page_is_comp(page)); - buf_block_align(page)->check_index_page_at_flush = TRUE; -} - -/***************************************************************** -Makes tree one level higher by splitting the root, and inserts -the tuple. It is assumed that mtr contains an x-latch on the tree. -NOTE that the operation of this function must always succeed, -we cannot reverse it: therefore enough free disk space must be -guaranteed to be available before this function is called. */ - -rec_t* -btr_root_raise_and_insert( -/*======================*/ - /* out: inserted record */ - btr_cur_t* cursor, /* in: cursor at which to insert: must be - on the root page; when the function returns, - the cursor is positioned on the predecessor - of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - page_t* root; - page_t* new_page; - ulint new_page_no; - rec_t* rec; - mem_heap_t* heap; - dtuple_t* node_ptr; - ulint level; - rec_t* node_ptr_rec; - page_cur_t* page_cursor; - - root = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(dict_index_get_page(index) == buf_frame_get_page_no(root)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(root), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(root); - - /* Allocate a new page to the tree. Root splitting is done by first - moving the root records to the new page, emptying the root, putting - a node pointer to the new page, and then splitting the new page. */ - - new_page = btr_page_alloc(index, 0, FSP_NO_DIR, - btr_page_get_level(root, mtr), mtr); - - btr_page_create(new_page, index, mtr); - - level = btr_page_get_level(root, mtr); - - /* Set the levels of the new index page and root page */ - btr_page_set_level(new_page, level, mtr); - btr_page_set_level(root, level + 1, mtr); - - /* Set the next node and previous node fields of new page */ - btr_page_set_next(new_page, FIL_NULL, mtr); - btr_page_set_prev(new_page, FIL_NULL, mtr); - - /* Move the records from root to the new page */ - - page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), - index, mtr); - /* If this is a pessimistic insert which is actually done to - perform a pessimistic update then we have stored the lock - information of the record to be inserted on the infimum of the - root page: we cannot discard the lock structs on the root page */ - - lock_update_root_raise(new_page, root); - - /* Create a memory heap where the node pointer is stored */ - heap = mem_heap_create(100); - - rec = page_rec_get_next(page_get_infimum_rec(new_page)); - new_page_no = buf_frame_get_page_no(new_page); - - /* Build the node pointer (= node key and page address) for the - child */ - - node_ptr = dict_index_build_node_ptr(index, rec, new_page_no, heap, - level); - /* Reorganize the root to get free space */ - btr_page_reorganize(root, index, mtr); - - page_cursor = btr_cur_get_page_cur(cursor); - - /* Insert node pointer to the root */ - - page_cur_set_before_first(root, page_cursor); - - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, - index, mtr); - - ut_ad(node_ptr_rec); - - /* The node pointer must be marked as the predefined minimum record, - as there is no lower alphabetical limit to records in the leftmost - node of a level: */ - - btr_set_min_rec_mark(node_ptr_rec, page_is_comp(root), mtr); - - /* Free the memory heap */ - mem_heap_free(heap); - - /* We play safe and reset the free bits for the new page */ - -#if 0 - fprintf(stderr, "Root raise new page no %lu\n", - buf_frame_get_page_no(new_page)); -#endif - - ibuf_reset_free_bits(index, new_page); - /* Reposition the cursor to the child node */ - page_cur_search(new_page, index, tuple, - PAGE_CUR_LE, page_cursor); - - /* Split the child and insert tuple */ - return(btr_page_split_and_insert(cursor, tuple, mtr)); -} - -/***************************************************************** -Decides if the page should be split at the convergence point of inserts -converging to the left. */ - -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec) /* out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - rec_t* infimum; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - if (page_header_get_ptr(page, PAGE_LAST_INSERT) - == page_rec_get_next(insert_point)) { - - infimum = page_get_infimum_rec(page); - - /* If the convergence is in the middle of a page, include also - the record immediately before the new insert to the upper - page. Otherwise, we could repeatedly move from page to page - lots of records smaller than the convergence point. */ - - if (infimum != insert_point - && page_rec_get_next(infimum) != insert_point) { - - *split_rec = insert_point; - } else { - *split_rec = page_rec_get_next(insert_point); - } - - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************** -Decides if the page should be split at the convergence point of inserts -converging to the right. */ - -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec) /* out: if split recommended, - the first record on upper half page, - or NULL if tuple to be inserted should - be first */ -{ - page_t* page; - rec_t* insert_point; - - page = btr_cur_get_page(cursor); - insert_point = btr_cur_get_rec(cursor); - - /* We use eager heuristics: if the new insert would be right after - the previous insert on the same page, we assume that there is a - pattern of sequential inserts here. */ - - if (UNIV_LIKELY(page_header_get_ptr(page, PAGE_LAST_INSERT) - == insert_point)) { - - rec_t* next_rec; - - next_rec = page_rec_get_next(insert_point); - - if (page_rec_is_supremum(next_rec)) { -split_at_new: - /* Split at the new record to insert */ - *split_rec = NULL; - } else { - rec_t* next_next_rec = page_rec_get_next(next_rec); - if (page_rec_is_supremum(next_next_rec)) { - - goto split_at_new; - } - - /* If there are >= 2 user records up from the insert - point, split all but 1 off. We want to keep one because - then sequential inserts can use the adaptive hash - index, as they can do the necessary checks of the right - search position just by looking at the records on this - page. */ - - *split_rec = next_next_rec; - } - - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************** -Calculates a split record such that the tuple will certainly fit on -its half-page when the split is performed. We assume in this function -only that the cursor page has at least one user record. */ -static -rec_t* -btr_page_get_sure_split_rec( -/*========================*/ - /* out: split record, or NULL if - tuple will be the first record on - upper half-page */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - dtuple_t* tuple) /* in: tuple to insert */ -{ - page_t* page; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - ulint total_space; - ulint incl_data; - rec_t* ins_rec; - rec_t* rec; - rec_t* next_rec; - ulint n; - mem_heap_t* heap; - ulint* offsets; - - page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(cursor->index, tuple); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - ut_ad(total_n_recs >= 2); - total_space = total_data + page_dir_calc_reserved_space(total_n_recs); - - n = 0; - incl_data = 0; - ins_rec = btr_cur_get_rec(cursor); - rec = page_get_infimum_rec(page); - - heap = NULL; - offsets = NULL; - - /* We start to include records to the left half, and when the - space reserved by them exceeds half of total_space, then if - the included records fit on the left page, they will be put there - if something was left over also for the right page, - otherwise the last included record will be the first on the right - half page */ - - for (;;) { - /* Decide the next record to include */ - if (rec == ins_rec) { - rec = NULL; /* NULL denotes that tuple is - now included */ - } else if (rec == NULL) { - rec = page_rec_get_next(ins_rec); - } else { - rec = page_rec_get_next(rec); - } - - if (rec == NULL) { - /* Include tuple */ - incl_data += insert_size; - } else { - offsets = rec_get_offsets(rec, cursor->index, - offsets, ULINT_UNDEFINED, - &heap); - incl_data += rec_offs_size(offsets); - } - - n++; - - if (incl_data + page_dir_calc_reserved_space(n) - >= total_space / 2) { - - if (incl_data + page_dir_calc_reserved_space(n) - <= free_space) { - /* The next record will be the first on - the right half page if it is not the - supremum record of page */ - - if (rec == ins_rec) { - rec = NULL; - - goto func_exit; - } else if (rec == NULL) { - next_rec = page_rec_get_next(ins_rec); - } else { - next_rec = page_rec_get_next(rec); - } - ut_ad(next_rec); - if (!page_rec_is_supremum(next_rec)) { - rec = next_rec; - } - } - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(rec); - } - } -} - -/***************************************************************** -Returns TRUE if the insert fits on the appropriate half-page with the -chosen split_rec. */ -static -ibool -btr_page_insert_fits( -/*=================*/ - /* out: TRUE if fits */ - btr_cur_t* cursor, /* in: cursor at which insert - should be made */ - rec_t* split_rec, /* in: suggestion for first record - on upper half-page, or NULL if - tuple to be inserted should be first */ - const ulint* offsets, /* in: rec_get_offsets( - split_rec, cursor->index) */ - dtuple_t* tuple, /* in: tuple to insert */ - mem_heap_t* heap) /* in: temporary memory heap */ -{ - page_t* page; - ulint insert_size; - ulint free_space; - ulint total_data; - ulint total_n_recs; - rec_t* rec; - rec_t* end_rec; - ulint* offs; - - page = btr_cur_get_page(cursor); - - ut_ad(!split_rec == !offsets); - ut_ad(!offsets - || !page_is_comp(page) == !rec_offs_comp(offsets)); - ut_ad(!offsets - || rec_offs_validate(split_rec, cursor->index, offsets)); - - insert_size = rec_get_converted_size(cursor->index, tuple); - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - /* free_space is now the free space of a created new page */ - - total_data = page_get_data_size(page) + insert_size; - total_n_recs = page_get_n_recs(page) + 1; - - /* We determine which records (from rec to end_rec, not including - end_rec) will end up on the other half page from tuple when it is - inserted. */ - - if (split_rec == NULL) { - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - - } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { - - rec = page_rec_get_next(page_get_infimum_rec(page)); - end_rec = split_rec; - } else { - rec = split_rec; - end_rec = page_get_supremum_rec(page); - } - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(TRUE); - } - - offs = NULL; - - while (rec != end_rec) { - /* In this loop we calculate the amount of reserved - space after rec is removed from page. */ - - offs = rec_get_offsets(rec, cursor->index, offs, - ULINT_UNDEFINED, &heap); - - total_data -= rec_offs_size(offs); - total_n_recs--; - - if (total_data + page_dir_calc_reserved_space(total_n_recs) - <= free_space) { - - /* Ok, there will be enough available space on the - half page where the tuple is inserted */ - - return(TRUE); - } - - rec = page_rec_get_next(rec); - } - - return(FALSE); -} - -/*********************************************************** -Inserts a data tuple to a tree on a non-leaf level. It is assumed -that mtr holds an x-latch on the tree. */ - -void -btr_insert_on_non_leaf_level( -/*=========================*/ - dict_index_t* index, /* in: index */ - ulint level, /* in: level, must be > 0 */ - dtuple_t* tuple, /* in: the record to be inserted */ - mtr_t* mtr) /* in: mtr */ -{ - big_rec_t* dummy_big_rec; - btr_cur_t cursor; - ulint err; - rec_t* rec; - - ut_ad(level > 0); - - btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE, - BTR_CONT_MODIFY_TREE, - &cursor, 0, mtr); - - err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG - | BTR_NO_UNDO_LOG_FLAG, - &cursor, tuple, &rec, - &dummy_big_rec, NULL, mtr); - ut_a(err == DB_SUCCESS); -} - -/****************************************************************** -Attaches the halves of an index page on the appropriate level in an -index tree. */ -static -void -btr_attach_half_pages( -/*==================*/ - dict_index_t* index, /* in: the index tree */ - page_t* page, /* in: page to be split */ - rec_t* split_rec, /* in: first record on upper - half page */ - page_t* new_page, /* in: the new half page */ - ulint direction, /* in: FSP_UP or FSP_DOWN */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - rec_t* node_ptr; - page_t* prev_page; - page_t* next_page; - ulint prev_page_no; - ulint next_page_no; - ulint level; - page_t* lower_page; - page_t* upper_page; - ulint lower_page_no; - ulint upper_page_no; - dtuple_t* node_ptr_upper; - mem_heap_t* heap; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), - MTR_MEMO_PAGE_X_FIX)); - ut_a(page_is_comp(page) == page_is_comp(new_page)); - - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(1024); - - /* Based on split direction, decide upper and lower pages */ - if (direction == FSP_DOWN) { - - lower_page_no = buf_frame_get_page_no(new_page); - upper_page_no = buf_frame_get_page_no(page); - lower_page = new_page; - upper_page = page; - - /* Look up the index for the node pointer to page */ - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - /* Replace the address of the old child node (= page) with the - address of the new lower half */ - - btr_node_ptr_set_child_page_no(node_ptr, - rec_get_offsets( - node_ptr, index, - NULL, ULINT_UNDEFINED, - &heap), - lower_page_no, mtr); - mem_heap_empty(heap); - } else { - lower_page_no = buf_frame_get_page_no(page); - upper_page_no = buf_frame_get_page_no(new_page); - lower_page = page; - upper_page = new_page; - } - - /* Get the level of the split pages */ - level = btr_page_get_level(page, mtr); - - /* Build the node pointer (= node key and page address) for the upper - half */ - - node_ptr_upper = dict_index_build_node_ptr(index, split_rec, - upper_page_no, heap, level); - - /* Insert it next to the pointer to the lower half. Note that this - may generate recursion leading to a split on the higher level. */ - - btr_insert_on_non_leaf_level(index, level + 1, node_ptr_upper, mtr); - - /* Free the memory heap */ - mem_heap_free(heap); - - /* Get the previous and next pages of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - - prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(prev_page, lower_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - - next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(next_page) == page_is_comp(page)); - - btr_page_set_prev(next_page, upper_page_no, mtr); - } - - btr_page_set_prev(lower_page, prev_page_no, mtr); - btr_page_set_next(lower_page, upper_page_no, mtr); - btr_page_set_level(lower_page, level, mtr); - - btr_page_set_prev(upper_page, lower_page_no, mtr); - btr_page_set_next(upper_page, next_page_no, mtr); - btr_page_set_level(upper_page, level, mtr); -} - -/***************************************************************** -Splits an index page to halves and inserts the tuple. It is assumed -that mtr holds an x-latch to the index tree. NOTE: the tree x-latch -is released within this function! NOTE that the operation of this -function must always succeed, we cannot reverse it: therefore -enough free disk space must be guaranteed to be available before -this function is called. */ - -rec_t* -btr_page_split_and_insert( -/*======================*/ - /* out: inserted record; NOTE: the tree - x-latch is released! NOTE: 2 free disk - pages must be available! */ - btr_cur_t* cursor, /* in: cursor at which to insert; when the - function returns, the cursor is positioned - on the predecessor of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - ulint page_no; - byte direction; - ulint hint_page_no; - page_t* new_page; - rec_t* split_rec; - page_t* left_page; - page_t* right_page; - page_t* insert_page; - page_cur_t* page_cursor; - rec_t* first_rec; - byte* buf = 0; /* remove warning */ - rec_t* move_limit; - ibool insert_will_fit; - ulint n_iterations = 0; - rec_t* rec; - mem_heap_t* heap; - ulint n_uniq; - ulint* offsets; - - heap = mem_heap_create(1024); - n_uniq = dict_index_get_n_unique_in_tree(cursor->index); -func_start: - mem_heap_empty(heap); - offsets = NULL; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(dict_index_get_lock(cursor->index), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - page = btr_cur_get_page(cursor); - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(page_get_n_recs(page) >= 2); - - page_no = buf_frame_get_page_no(page); - - /* 1. Decide the split record; split_rec == NULL means that the - tuple to be inserted should be the first record on the upper - half-page */ - - if (n_iterations > 0) { - direction = FSP_UP; - hint_page_no = page_no + 1; - split_rec = btr_page_get_sure_split_rec(cursor, tuple); - - } else if (btr_page_get_split_rec_to_right(cursor, &split_rec)) { - direction = FSP_UP; - hint_page_no = page_no + 1; - - } else if (btr_page_get_split_rec_to_left(cursor, &split_rec)) { - direction = FSP_DOWN; - hint_page_no = page_no - 1; - } else { - direction = FSP_UP; - hint_page_no = page_no + 1; - split_rec = page_get_middle_rec(page); - } - - /* 2. Allocate a new page to the index */ - new_page = btr_page_alloc(cursor->index, hint_page_no, direction, - btr_page_get_level(page, mtr), mtr); - btr_page_create(new_page, cursor->index, mtr); - - /* 3. Calculate the first record on the upper half-page, and the - first record (move_limit) on original page which ends up on the - upper half */ - - if (split_rec != NULL) { - first_rec = split_rec; - move_limit = split_rec; - } else { - buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); - - first_rec = rec_convert_dtuple_to_rec(buf, - cursor->index, tuple); - move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); - } - - /* 4. Do first the modifications in the tree structure */ - - btr_attach_half_pages(cursor->index, page, first_rec, - new_page, direction, mtr); - - if (split_rec == NULL) { - mem_free(buf); - } - - /* If the split is made on the leaf level and the insert will fit - on the appropriate half-page, we may release the tree x-latch. - We can then move the records after releasing the tree latch, - thus reducing the tree latch contention. */ - - if (split_rec) { - offsets = rec_get_offsets(split_rec, cursor->index, offsets, - n_uniq, &heap); - - insert_will_fit = btr_page_insert_fits(cursor, - split_rec, offsets, - tuple, heap); - } else { - insert_will_fit = btr_page_insert_fits(cursor, - NULL, NULL, - tuple, heap); - } - - if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { - - mtr_memo_release(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - } - - /* 5. Move then the records to the new page */ - if (direction == FSP_DOWN) { - /* fputs("Split left\n", stderr); */ - - page_move_rec_list_start(new_page, page, move_limit, - cursor->index, mtr); - left_page = new_page; - right_page = page; - - lock_update_split_left(right_page, left_page); - } else { - /* fputs("Split right\n", stderr); */ - - page_move_rec_list_end(new_page, page, move_limit, - cursor->index, mtr); - left_page = page; - right_page = new_page; - - lock_update_split_right(right_page, left_page); - } - - /* 6. The split and the tree modification is now completed. Decide the - page where the tuple should be inserted */ - - if (split_rec == NULL) { - insert_page = right_page; - - } else { - offsets = rec_get_offsets(first_rec, cursor->index, - offsets, n_uniq, &heap); - - if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { - - insert_page = right_page; - } else { - insert_page = left_page; - } - } - - /* 7. Reposition the cursor for insert and try insertion */ - page_cursor = btr_cur_get_page_cur(cursor); - - page_cur_search(insert_page, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); - - if (rec != NULL) { - /* Insert fit on the page: update the free bits for the - left and right pages in the same mtr */ - - ibuf_update_free_bits_for_two_pages_low(cursor->index, - left_page, - right_page, mtr); - /* fprintf(stderr, "Split and insert done %lu %lu\n", - buf_frame_get_page_no(left_page), - buf_frame_get_page_no(right_page)); */ - mem_heap_free(heap); - return(rec); - } - - /* 8. If insert did not fit, try page reorganization */ - - btr_page_reorganize(insert_page, cursor->index, mtr); - - page_cur_search(insert_page, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); - - if (rec == NULL) { - /* The insert did not fit on the page: loop back to the - start of the function for a new split */ - - /* We play safe and reset the free bits for new_page */ - ibuf_reset_free_bits(cursor->index, new_page); - - /* fprintf(stderr, "Split second round %lu\n", - buf_frame_get_page_no(page)); */ - n_iterations++; - ut_ad(n_iterations < 2); - ut_ad(!insert_will_fit); - - goto func_start; - } - - /* Insert fit on the page: update the free bits for the - left and right pages in the same mtr */ - - ibuf_update_free_bits_for_two_pages_low(cursor->index, left_page, - right_page, mtr); -#if 0 - fprintf(stderr, "Split and insert done %lu %lu\n", - buf_frame_get_page_no(left_page), - buf_frame_get_page_no(right_page)); -#endif - - ut_ad(page_validate(left_page, cursor->index)); - ut_ad(page_validate(right_page, cursor->index)); - - mem_heap_free(heap); - return(rec); -} - -/***************************************************************** -Removes a page from the level list of pages. */ -static -void -btr_level_list_remove( -/*==================*/ - page_t* page, /* in: page to remove */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - ulint prev_page_no; - page_t* prev_page; - ulint next_page_no; - page_t* next_page; - - ut_ad(page && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* Get the previous and next page numbers of page */ - - prev_page_no = btr_page_get_prev(page, mtr); - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - /* Update page links of the level */ - - if (prev_page_no != FIL_NULL) { - - prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(prev_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_next(prev_page, next_page_no, mtr); - } - - if (next_page_no != FIL_NULL) { - - next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(next_page) == page_is_comp(page)); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - btr_page_set_prev(next_page, prev_page_no, mtr); - } -} - -/******************************************************************** -Writes the redo log record for setting an index record as the predefined -minimum record. */ -UNIV_INLINE -void -btr_set_min_rec_mark_log( -/*=====================*/ - rec_t* rec, /* in: record */ - ulint comp, /* nonzero=compact record format */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record( - rec, comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); - - /* Write rec offset as a 2-byte ulint */ - mlog_catenate_ulint(mtr, page_offset(rec), MLOG_2BYTES); -} - -/******************************************************************** -Parses the redo log record for setting an index record as the predefined -minimum record. */ - -byte* -btr_parse_set_min_rec_mark( -/*=======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint comp, /* in: nonzero=compact page format */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - rec_t* rec; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - if (page) { - ut_a(!page_is_comp(page) == !comp); - - rec = page + mach_read_from_2(ptr); - - btr_set_min_rec_mark(rec, comp, mtr); - } - - return(ptr + 2); -} - -/******************************************************************** -Sets a record as the predefined minimum record. */ - -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /* in: record */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr) /* in: mtr */ -{ - ulint info_bits; - - info_bits = rec_get_info_bits(rec, comp); - - rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); - - btr_set_min_rec_mark_log(rec, comp, mtr); -} - -/***************************************************************** -Deletes on the upper level the node pointer to a page. */ - -void -btr_node_ptr_delete( -/*================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page whose node pointer is deleted */ - mtr_t* mtr) /* in: mtr */ -{ - rec_t* node_ptr; - btr_cur_t cursor; - ibool compressed; - ulint err; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - /* Delete node pointer on father page */ - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - btr_cur_position(index, node_ptr, &cursor); - compressed = btr_cur_pessimistic_delete(&err, TRUE, &cursor, FALSE, - mtr); - ut_a(err == DB_SUCCESS); - - if (!compressed) { - btr_cur_compress_if_useful(&cursor, mtr); - } -} - -/***************************************************************** -If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. */ -static -void -btr_lift_page_up( -/*=============*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page which is the only on its level; - must not be empty: use - btr_discard_only_page_on_level if the last - record from the page should be removed */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* father_page; - page_t* iter_page; - page_t* pages[BTR_MAX_LEVELS]; - ulint page_level; - ulint root_page_no; - ulint ancestors; - ulint i; - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - father_page = buf_frame_align( - btr_page_get_father_node_ptr(index, page, mtr)); - - page_level = btr_page_get_level(page, mtr); - root_page_no = dict_index_get_page(index); - - ancestors = 1; - pages[0] = father_page; - - /* Store all ancestor pages so we can reset their levels later on. - We have to do all the searches on the tree now because later on, - after we've replaced the first level, the tree is in an inconsistent - state and can not be searched. */ - iter_page = father_page; - for (;;) { - if (buf_block_get_page_no(buf_block_align(iter_page)) - == root_page_no) { - - break; - } - - ut_a(ancestors < BTR_MAX_LEVELS); - - iter_page = buf_frame_align( - btr_page_get_father_node_ptr(index, iter_page, mtr)); - - pages[ancestors++] = iter_page; - } - - btr_search_drop_page_hash_index(page); - - /* Make the father empty */ - btr_page_empty(father_page, mtr); - - /* Move records to the father */ - page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), - index, mtr); - lock_update_copy_and_discard(father_page, page); - - /* Go upward to root page, decreasing levels by one. */ - for (i = 0; i < ancestors; i++) { - iter_page = pages[i]; - - ut_ad(btr_page_get_level(iter_page, mtr) == (page_level + 1)); - - btr_page_set_level(iter_page, page_level, mtr); - page_level++; - } - - /* Free the file page */ - btr_page_free(index, page, mtr); - - /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(index, father_page); - ut_ad(page_validate(father_page, index)); - ut_ad(btr_check_node_ptr(index, father_page, mtr)); -} - -/***************************************************************** -Tries to merge the page first to the left immediate brother if such a -brother exists, and the node pointers to the current page and to the brother -reside on the same page. If the left brother does not satisfy these -conditions, looks at the right brother. If the page is the only one on that -level lifts the records of the page to the father page, thus reducing the -tree height. It is assumed that mtr holds an x-latch on the tree and on the -page. If cursor is on the leaf level, mtr must also hold x-latches to the -brothers, if they exist. NOTE: it is assumed that the caller has reserved -enough free extents so that the compression will always succeed if done! */ - -void -btr_compress( -/*=========*/ - btr_cur_t* cursor, /* in: cursor on the page to merge or lift; - the page must not be empty: in record delete - use btr_discard_page if the page would become - empty */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - ulint space; - ulint left_page_no; - ulint right_page_no; - page_t* merge_page; - page_t* father_page; - ibool is_left; - page_t* page; - rec_t* orig_pred; - rec_t* orig_succ; - rec_t* node_ptr; - ulint data_size; - ulint n_recs; - ulint max_ins_size; - ulint max_ins_size_reorg; - ulint level; - ulint comp; - - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - comp = page_is_comp(page); - ut_a((ibool)!!comp == dict_table_is_comp(index->table)); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - level = btr_page_get_level(page, mtr); - space = dict_index_get_space(index); - - left_page_no = btr_page_get_prev(page, mtr); - right_page_no = btr_page_get_next(page, mtr); - -#if 0 - fprintf(stderr, "Merge left page %lu right %lu \n", - left_page_no, right_page_no); -#endif - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); - father_page = buf_frame_align(node_ptr); - ut_a(comp == page_is_comp(father_page)); - - /* Decide the page to which we try to merge and which will inherit - the locks */ - - is_left = left_page_no != FIL_NULL; - - if (is_left) { - - merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else if (right_page_no != FIL_NULL) { - - merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else { - /* The page is the only one on the level, lift the records - to the father */ - btr_lift_page_up(index, page, mtr); - - return; - } - - n_recs = page_get_n_recs(page); - data_size = page_get_data_size(page); - ut_a(page_is_comp(merge_page) == comp); - - max_ins_size_reorg = page_get_max_insert_size_after_reorganize( - merge_page, n_recs); - if (data_size > max_ins_size_reorg) { - - /* No space for merge */ - - return; - } - - ut_ad(page_validate(merge_page, index)); - - max_ins_size = page_get_max_insert_size(merge_page, n_recs); - - if (data_size > max_ins_size) { - - /* We have to reorganize merge_page */ - - btr_page_reorganize(merge_page, index, mtr); - - max_ins_size = page_get_max_insert_size(merge_page, n_recs); - - ut_ad(page_validate(merge_page, index)); - ut_ad(page_get_max_insert_size(merge_page, n_recs) - == max_ins_size_reorg); - } - - if (data_size > max_ins_size) { - - /* Add fault tolerance, though this should never happen */ - - return; - } - - btr_search_drop_page_hash_index(page); - - /* Remove the page from the level list */ - btr_level_list_remove(page, mtr); - - if (is_left) { - btr_node_ptr_delete(index, page, mtr); - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - /* Replace the address of the old child node (= page) with the - address of the merge page to the right */ - - btr_node_ptr_set_child_page_no(node_ptr, - rec_get_offsets( - node_ptr, index, - offsets_, - ULINT_UNDEFINED, - &heap), - right_page_no, mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - btr_node_ptr_delete(index, merge_page, mtr); - } - - /* Move records to the merge page */ - if (is_left) { - orig_pred = page_rec_get_prev( - page_get_supremum_rec(merge_page)); - page_copy_rec_list_start(merge_page, page, - page_get_supremum_rec(page), - index, mtr); - - lock_update_merge_left(merge_page, orig_pred, page); - } else { - orig_succ = page_rec_get_next( - page_get_infimum_rec(merge_page)); - page_copy_rec_list_end(merge_page, page, - page_get_infimum_rec(page), - index, mtr); - - lock_update_merge_right(orig_succ, page); - } - - /* We have added new records to merge_page: update its free bits */ - ibuf_update_free_bits_if_full(index, merge_page, - UNIV_PAGE_SIZE, ULINT_UNDEFINED); - - ut_ad(page_validate(merge_page, index)); - - /* Free the file page */ - btr_page_free(index, page, mtr); - - ut_ad(btr_check_node_ptr(index, merge_page, mtr)); -} - -/***************************************************************** -Discards a page that is the only page on its level. */ -static -void -btr_discard_only_page_on_level( -/*===========================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page which is the only on its level */ - mtr_t* mtr) /* in: mtr */ -{ - rec_t* node_ptr; - page_t* father_page; - ulint page_level; - - ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); - ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - btr_search_drop_page_hash_index(page); - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - father_page = buf_frame_align(node_ptr); - - page_level = btr_page_get_level(page, mtr); - - lock_update_discard(page_get_supremum_rec(father_page), page); - - btr_page_set_level(father_page, page_level, mtr); - - /* Free the file page */ - btr_page_free(index, page, mtr); - - if (buf_frame_get_page_no(father_page) == dict_index_get_page(index)) { - /* The father is the root page */ - - btr_page_empty(father_page, mtr); - - /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(index, father_page); - } else { - ut_ad(page_get_n_recs(father_page) == 1); - - btr_discard_only_page_on_level(index, father_page, mtr); - } -} - -/***************************************************************** -Discards a page from a B-tree. This is used to remove the last record from -a B-tree page: the whole page must be removed at the same time. This cannot -be used for the root page, which is allowed to be empty. */ - -void -btr_discard_page( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to discard: not on - the root page */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - ulint space; - ulint left_page_no; - ulint right_page_no; - page_t* merge_page; - page_t* page; - rec_t* node_ptr; - - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(dict_index_get_page(index) != buf_frame_get_page_no(page)); - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - space = dict_index_get_space(index); - - /* Decide the page which will inherit the locks */ - - left_page_no = btr_page_get_prev(page, mtr); - right_page_no = btr_page_get_next(page, mtr); - - if (left_page_no != FIL_NULL) { - merge_page = btr_page_get(space, left_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else if (right_page_no != FIL_NULL) { - merge_page = btr_page_get(space, right_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(merge_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - } else { - btr_discard_only_page_on_level(index, page, mtr); - - return; - } - - ut_a(page_is_comp(merge_page) == page_is_comp(page)); - btr_search_drop_page_hash_index(page); - - if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { - - /* We have to mark the leftmost node pointer on the right - side page as the predefined minimum record */ - - node_ptr = page_rec_get_next(page_get_infimum_rec(merge_page)); - - ut_ad(page_rec_is_user_rec(node_ptr)); - - btr_set_min_rec_mark(node_ptr, page_is_comp(merge_page), mtr); - } - - btr_node_ptr_delete(index, page, mtr); - - /* Remove the page from the level list */ - btr_level_list_remove(page, mtr); - - if (left_page_no != FIL_NULL) { - lock_update_discard(page_get_supremum_rec(merge_page), page); - } else { - lock_update_discard(page_rec_get_next( - page_get_infimum_rec(merge_page)), - page); - } - - /* Free the file page */ - btr_page_free(index, page, mtr); - - ut_ad(btr_check_node_ptr(index, merge_page, mtr)); -} - -#ifdef UNIV_BTR_PRINT -/***************************************************************** -Prints size info of a B-tree. */ - -void -btr_print_size( -/*===========*/ - dict_index_t* index) /* in: index tree */ -{ - page_t* root; - fseg_header_t* seg; - mtr_t mtr; - - if (index->type & DICT_IBUF) { - fputs("Sorry, cannot print info of an ibuf tree:" - " use ibuf functions\n", stderr); - - return; - } - - mtr_start(&mtr); - - root = btr_root_get(index, &mtr); - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_TOP; - - fputs("INFO OF THE NON-LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - - if (!(index->type & DICT_UNIVERSAL)) { - - seg = root + PAGE_HEADER + PAGE_BTR_SEG_LEAF; - - fputs("INFO OF THE LEAF PAGE SEGMENT\n", stderr); - fseg_print(seg, &mtr); - } - - mtr_commit(&mtr); -} - -/**************************************************************** -Prints recursively index tree pages. */ -static -void -btr_print_recursive( -/*================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: index page */ - ulint width, /* in: print this many entries from start - and end */ - mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */ - ulint** offsets,/* in/out: buffer for rec_get_offsets() */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t cursor; - ulint n_recs; - ulint i = 0; - mtr_t mtr2; - rec_t* node_ptr; - page_t* child; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", - (ulong) btr_page_get_level(page, mtr), - (ulong) buf_frame_get_page_no(page)); - - page_print(page, index, width, width); - - n_recs = page_get_n_recs(page); - - page_cur_set_before_first(page, &cursor); - page_cur_move_to_next(&cursor); - - while (!page_cur_is_after_last(&cursor)) { - - if (0 == btr_page_get_level(page, mtr)) { - - /* If this is the leaf level, do nothing */ - - } else if ((i <= width) || (i >= n_recs - width)) { - - mtr_start(&mtr2); - - node_ptr = page_cur_get_rec(&cursor); - - *offsets = rec_get_offsets(node_ptr, index, *offsets, - ULINT_UNDEFINED, heap); - child = btr_node_ptr_get_child(node_ptr, - *offsets, &mtr2); - btr_print_recursive(index, child, width, - heap, offsets, &mtr2); - mtr_commit(&mtr2); - } - - page_cur_move_to_next(&cursor); - i++; - } -} - -/****************************************************************** -Prints directories and other info of all nodes in the tree. */ - -void -btr_print_index( -/*============*/ - dict_index_t* index, /* in: index */ - ulint width) /* in: print this many entries from start - and end */ -{ - mtr_t mtr; - page_t* root; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - fputs("--------------------------\n" - "INDEX TREE PRINT\n", stderr); - - mtr_start(&mtr); - - root = btr_root_get(index, &mtr); - - btr_print_recursive(index, root, width, &heap, &offsets, &mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - mtr_commit(&mtr); - - btr_validate_index(index, NULL); -} -#endif /* UNIV_BTR_PRINT */ - -#ifdef UNIV_DEBUG -/**************************************************************** -Checks that the node pointer to a page is appropriate. */ - -ibool -btr_check_node_ptr( -/*===============*/ - /* out: TRUE */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - rec_t* node_ptr; - dtuple_t* node_ptr_tuple; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - if (dict_index_get_page(index) == buf_frame_get_page_no(page)) { - - return(TRUE); - } - - node_ptr = btr_page_get_father_node_ptr(index, page, mtr); - - if (btr_page_get_level(page, mtr) == 0) { - - return(TRUE); - } - - heap = mem_heap_create(256); - - node_ptr_tuple = dict_index_build_node_ptr( - index, page_rec_get_next(page_get_infimum_rec(page)), 0, heap, - btr_page_get_level(page, mtr)); - - ut_a(!cmp_dtuple_rec(node_ptr_tuple, node_ptr, - rec_get_offsets(node_ptr, index, - NULL, ULINT_UNDEFINED, &heap))); - - mem_heap_free(heap); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/**************************************************************** -Display identification information for a record. */ -static -void -btr_index_rec_validate_report( -/*==========================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: index record */ - dict_index_t* index) /* in: index */ -{ - fputs("InnoDB: Record in ", stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, ", page %lu, at offset %lu\n", - buf_frame_get_page_no(page), (ulint)(rec - page)); -} - -/**************************************************************** -Checks the size and number of fields in a record based on the definition of -the index. */ - -ibool -btr_index_rec_validate( -/*===================*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: index record */ - dict_index_t* index, /* in: index */ - ibool dump_on_error) /* in: TRUE if the function - should print hex dump of record - and page on error */ -{ - ulint len; - ulint n; - ulint i; - page_t* page; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - page = buf_frame_align(rec); - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - /* The insert buffer index tree can contain records from any - other index: we cannot check the number of fields or - their length */ - - return(TRUE); - } - - if (UNIV_UNLIKELY((ibool)!!page_is_comp(page) - != dict_table_is_comp(index->table))) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: compact flag=%lu, should be %lu\n", - (ulong) !!page_is_comp(page), - (ulong) dict_table_is_comp(index->table)); - - return(FALSE); - } - - n = dict_index_get_n_fields(index); - - if (!page_is_comp(page) - && UNIV_UNLIKELY(rec_get_n_fields_old(rec) != n)) { - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields_old(rec), (ulong) n); - - if (dump_on_error) { - buf_page_print(page); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_old(stderr, rec); - putc('\n', stderr); - } - return(FALSE); - } - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - for (i = 0; i < n; i++) { - ulint fixed_size = dict_col_get_fixed_size( - dict_index_get_nth_col(index, i)); - - rec_get_nth_field(rec, offsets, i, &len); - - /* Note that if fixed_size != 0, it equals the - length of a fixed-size column in the clustered index. - A prefix index of the column is of fixed, but different - length. When fixed_size == 0, prefix_len is the maximum - length of the prefix index column. */ - - if ((dict_index_get_nth_field(index, i)->prefix_len == 0 - && len != UNIV_SQL_NULL && fixed_size - && len != fixed_size) - || (dict_index_get_nth_field(index, i)->prefix_len > 0 - && len != UNIV_SQL_NULL - && len - > dict_index_get_nth_field(index, i)->prefix_len)) { - - btr_index_rec_validate_report(page, rec, index); - fprintf(stderr, - "InnoDB: field %lu len is %lu," - " should be %lu\n", - (ulong) i, (ulong) len, (ulong) fixed_size); - - if (dump_on_error) { - buf_page_print(page); - - fputs("InnoDB: corrupt record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - } - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(FALSE); - } - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(TRUE); -} - -/**************************************************************** -Checks the size and number of fields in records based on the definition of -the index. */ -static -ibool -btr_index_page_validate( -/*====================*/ - /* out: TRUE if ok */ - page_t* page, /* in: index page */ - dict_index_t* index) /* in: index */ -{ - page_cur_t cur; - ibool ret = TRUE; - - page_cur_set_before_first(page, &cur); - page_cur_move_to_next(&cur); - - for (;;) { - if (page_cur_is_after_last(&cur)) { - - break; - } - - if (!btr_index_rec_validate(cur.rec, index, TRUE)) { - - return(FALSE); - } - - page_cur_move_to_next(&cur); - } - - return(ret); -} - -/**************************************************************** -Report an error on one page of an index tree. */ -static -void -btr_validate_report1( -/*=================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - ulint level, /* in: B-tree level */ - page_t* page) /* in: index page */ -{ - fprintf(stderr, "InnoDB: Error in page %lu of ", - buf_frame_get_page_no(page)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/**************************************************************** -Report an error on two pages of an index tree. */ -static -void -btr_validate_report2( -/*=================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - ulint level, /* in: B-tree level */ - page_t* page1, /* in: first index page */ - page_t* page2) /* in: second index page */ -{ - fprintf(stderr, "InnoDB: Error in pages %lu and %lu of ", - buf_frame_get_page_no(page1), - buf_frame_get_page_no(page2)); - dict_index_name_print(stderr, NULL, index); - if (level) { - fprintf(stderr, ", index tree level %lu", level); - } - putc('\n', stderr); -} - -/**************************************************************** -Validates index tree level. */ -static -ibool -btr_validate_level( -/*===============*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index tree */ - trx_t* trx, /* in: transaction or NULL */ - ulint level) /* in: level number */ -{ - ulint space; - page_t* page; - page_t* right_page = 0; /* remove warning */ - page_t* father_page; - page_t* right_father_page; - rec_t* node_ptr; - rec_t* right_node_ptr; - rec_t* rec; - ulint right_page_no; - ulint left_page_no; - page_cur_t cursor; - dtuple_t* node_ptr_tuple; - ibool ret = TRUE; - mtr_t mtr; - mem_heap_t* heap = mem_heap_create(256); - ulint* offsets = NULL; - ulint* offsets2= NULL; - - mtr_start(&mtr); - - mtr_x_lock(dict_index_get_lock(index), &mtr); - - page = btr_root_get(index, &mtr); - - space = buf_frame_get_space_id(page); - - while (level != btr_page_get_level(page, &mtr)) { - - ut_a(btr_page_get_level(page, &mtr) > 0); - - page_cur_set_before_first(page, &cursor); - page_cur_move_to_next(&cursor); - - node_ptr = page_cur_get_rec(&cursor); - offsets = rec_get_offsets(node_ptr, index, offsets, - ULINT_UNDEFINED, &heap); - page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); - } - - /* Now we are on the desired level. Loop through the pages on that - level. */ -loop: - if (trx_is_interrupted(trx)) { - mtr_commit(&mtr); - mem_heap_free(heap); - return(ret); - } - mem_heap_empty(heap); - offsets = offsets2 = NULL; - mtr_x_lock(dict_index_get_lock(index), &mtr); - - /* Check ordering etc. of records */ - - if (!page_validate(page, index)) { - btr_validate_report1(index, level, page); - - ret = FALSE; - } else if (level == 0) { - /* We are on level 0. Check that the records have the right - number of fields, and field lengths are right. */ - - if (!btr_index_page_validate(page, index)) { - - ret = FALSE; - } - } - - ut_a(btr_page_get_level(page, &mtr) == level); - - right_page_no = btr_page_get_next(page, &mtr); - left_page_no = btr_page_get_prev(page, &mtr); - - ut_a((page_get_n_recs(page) > 0) - || ((level == 0) - && (buf_frame_get_page_no(page) - == dict_index_get_page(index)))); - - if (right_page_no != FIL_NULL) { - rec_t* right_rec; - right_page = btr_page_get(space, right_page_no, RW_X_LATCH, - &mtr); - if (UNIV_UNLIKELY(btr_page_get_prev(right_page, &mtr) - != buf_frame_get_page_no(page))) { - btr_validate_report2(index, level, page, right_page); - fputs("InnoDB: broken FIL_PAGE_NEXT" - " or FIL_PAGE_PREV links\n", stderr); - buf_page_print(page); - buf_page_print(right_page); - - ret = FALSE; - } - - if (UNIV_UNLIKELY(page_is_comp(right_page) - != page_is_comp(page))) { - btr_validate_report2(index, level, page, right_page); - fputs("InnoDB: 'compact' flag mismatch\n", stderr); - buf_page_print(page); - buf_page_print(right_page); - - ret = FALSE; - - goto node_ptr_fails; - } - - rec = page_rec_get_prev(page_get_supremum_rec(page)); - right_rec = page_rec_get_next(page_get_infimum_rec( - right_page)); - offsets = rec_get_offsets(rec, index, - offsets, ULINT_UNDEFINED, &heap); - offsets2 = rec_get_offsets(right_rec, index, - offsets2, ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(cmp_rec_rec(rec, right_rec, - offsets, offsets2, - index) >= 0)) { - - btr_validate_report2(index, level, page, right_page); - - fputs("InnoDB: records in wrong order" - " on adjacent pages\n", stderr); - - buf_page_print(page); - buf_page_print(right_page); - - fputs("InnoDB: record ", stderr); - rec = page_rec_get_prev(page_get_supremum_rec(page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - fputs("InnoDB: record ", stderr); - rec = page_rec_get_next( - page_get_infimum_rec(right_page)); - rec_print(stderr, rec, index); - putc('\n', stderr); - - ret = FALSE; - } - } - - if (level > 0 && left_page_no == FIL_NULL) { - ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)), - page_is_comp(page))); - } - - if (buf_frame_get_page_no(page) != dict_index_get_page(index)) { - - /* Check father node pointers */ - - node_ptr = btr_page_get_father_node_ptr(index, page, &mtr); - father_page = buf_frame_align(node_ptr); - offsets = rec_get_offsets(node_ptr, index, - offsets, ULINT_UNDEFINED, &heap); - - if (btr_node_ptr_get_child_page_no(node_ptr, offsets) - != buf_frame_get_page_no(page) - || node_ptr != btr_page_get_father_for_rec( - index, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)) { - btr_validate_report1(index, level, page); - - fputs("InnoDB: node pointer to the page is wrong\n", - stderr); - - buf_page_print(father_page); - buf_page_print(page); - - fputs("InnoDB: node ptr ", stderr); - rec_print_new(stderr, node_ptr, offsets); - - fprintf(stderr, "\n" - "InnoDB: node ptr child page n:o %lu\n", - (unsigned long) btr_node_ptr_get_child_page_no - (node_ptr, offsets)); - - fputs("InnoDB: record on page ", stderr); - rec = btr_page_get_father_for_rec( - index, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr); - rec_print(stderr, rec, index); - putc('\n', stderr); - ret = FALSE; - - goto node_ptr_fails; - } - - if (btr_page_get_level(page, &mtr) > 0) { - offsets = rec_get_offsets(node_ptr, index, - offsets, ULINT_UNDEFINED, - &heap); - - node_ptr_tuple = dict_index_build_node_ptr( - index, - page_rec_get_next(page_get_infimum_rec(page)), - 0, heap, btr_page_get_level(page, &mtr)); - - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, - offsets)) { - rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - btr_validate_report1(index, level, page); - - buf_page_print(father_page); - buf_page_print(page); - - fputs("InnoDB: Error: node ptrs differ" - " on levels > 0\n" - "InnoDB: node ptr ", stderr); - rec_print_new(stderr, node_ptr, offsets); - fputs("InnoDB: first rec ", stderr); - rec_print(stderr, first_rec, index); - putc('\n', stderr); - ret = FALSE; - - goto node_ptr_fails; - } - } - - if (left_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_next( - page_get_infimum_rec(father_page))); - ut_a(btr_page_get_prev(father_page, &mtr) == FIL_NULL); - } - - if (right_page_no == FIL_NULL) { - ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); - ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); - } else { - right_node_ptr = btr_page_get_father_node_ptr( - index, right_page, &mtr); - if (page_rec_get_next(node_ptr) - != page_get_supremum_rec(father_page)) { - - if (right_node_ptr - != page_rec_get_next(node_ptr)) { - ret = FALSE; - fputs("InnoDB: node pointer to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(page); - buf_page_print(right_page); - } - } else { - right_father_page = buf_frame_align( - right_node_ptr); - - if (right_node_ptr != page_rec_get_next( - page_get_infimum_rec( - right_father_page))) { - ret = FALSE; - fputs("InnoDB: node pointer 2 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(right_father_page); - buf_page_print(page); - buf_page_print(right_page); - } - - if (buf_frame_get_page_no(right_father_page) - != btr_page_get_next(father_page, &mtr)) { - - ret = FALSE; - fputs("InnoDB: node pointer 3 to" - " the right page is wrong\n", - stderr); - - btr_validate_report1(index, level, - page); - - buf_page_print(father_page); - buf_page_print(right_father_page); - buf_page_print(page); - buf_page_print(right_page); - } - } - } - } - -node_ptr_fails: - /* Commit the mini-transaction to release the latch on 'page'. - Re-acquire the latch on right_page, which will become 'page' - on the next loop. The page has already been checked. */ - mtr_commit(&mtr); - - if (right_page_no != FIL_NULL) { - mtr_start(&mtr); - - page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); - - goto loop; - } - - mem_heap_free(heap); - return(ret); -} - -/****************************************************************** -Checks the consistency of an index tree. */ - -ibool -btr_validate_index( -/*===============*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - trx_t* trx) /* in: transaction or NULL */ -{ - mtr_t mtr; - page_t* root; - ulint i; - ulint n; - - mtr_start(&mtr); - mtr_x_lock(dict_index_get_lock(index), &mtr); - - root = btr_root_get(index, &mtr); - n = btr_page_get_level(root, &mtr); - - for (i = 0; i <= n && !trx_is_interrupted(trx); i++) { - if (!btr_validate_level(index, trx, n - i)) { - - mtr_commit(&mtr); - - return(FALSE); - } - } - - mtr_commit(&mtr); - - return(TRUE); -} diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c deleted file mode 100644 index a2f62255dd6..00000000000 --- a/storage/innobase/btr/btr0cur.c +++ /dev/null @@ -1,3848 +0,0 @@ -/****************************************************** -The index tree cursor - -All changes that row operations make to a B-tree or the records -there must go through this module! Undo log records are written here -of every modify or insert of a clustered index record. - - NOTE!!! -To make sure we do not run out of disk space during a pessimistic -insert or update, we have to reserve 2 x the height of the index tree -many pages in the tablespace before we start the operation, because -if leaf splitting has been started, it is difficult to undo, except -by crashing the database and doing a roll-forward. - -(c) 1994-2001 Innobase Oy - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0cur.h" - -#ifdef UNIV_NONINL -#include "btr0cur.ic" -#endif - -#include "page0page.h" -#include "rem0rec.h" -#include "rem0cmp.h" -#include "btr0btr.h" -#include "btr0sea.h" -#include "row0upd.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "srv0srv.h" -#include "ibuf0ibuf.h" -#include "lock0lock.h" - -#ifdef UNIV_DEBUG -/* If the following is set to TRUE, this module prints a lot of -trace information of individual record operations */ -ibool btr_cur_print_record_ops = FALSE; -#endif /* UNIV_DEBUG */ - -ulint btr_cur_n_non_sea = 0; -ulint btr_cur_n_sea = 0; -ulint btr_cur_n_non_sea_old = 0; -ulint btr_cur_n_sea_old = 0; - -/* In the optimistic insert, if the insert does not fit, but this much space -can be released by page reorganize, then it is reorganized */ - -#define BTR_CUR_PAGE_REORGANIZE_LIMIT (UNIV_PAGE_SIZE / 32) - -/* When estimating number of different key values in an index, sample -this many index pages */ -#define BTR_KEY_VAL_ESTIMATE_N_PAGES 8 - -/* The structure of a BLOB part header */ -/*--------------------------------------*/ -#define BTR_BLOB_HDR_PART_LEN 0 /* BLOB part len on this - page */ -#define BTR_BLOB_HDR_NEXT_PAGE_NO 4 /* next BLOB part page no, - FIL_NULL if none */ -/*--------------------------------------*/ -#define BTR_BLOB_HDR_SIZE 8 - -/*********************************************************************** -Marks all extern fields in a record as owned by the record. This function -should be called if the delete mark of a record is removed: a not delete -marked record always owns all its extern fields. */ -static -void -btr_cur_unmark_extern_fields( -/*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr, /* in: mtr */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/*********************************************************************** -Adds path information to the cursor for the current page, for which -the binary search has been performed. */ -static -void -btr_cur_add_path_info( -/*==================*/ - btr_cur_t* cursor, /* in: cursor positioned on a page */ - ulint height, /* in: height of the page in tree; - 0 means leaf node */ - ulint root_height); /* in: root node height in tree */ -/*************************************************************** -Frees the externally stored fields for a record, if the field is mentioned -in the update vector. */ -static -void -btr_rec_free_updated_extern_fields( -/*===============================*/ - dict_index_t* index, /* in: index of rec; the index tree MUST be - X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - upd_t* update, /* in: update vector */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* mtr); /* in: mini-transaction handle which contains - an X-latch to record page and to the tree */ -/*************************************************************** -Gets the externally stored size of a record, in units of a database page. */ -static -ulint -btr_rec_get_externally_stored_len( -/*==============================*/ - /* out: externally stored part, - in units of a database page */ - rec_t* rec, /* in: record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ - -/*==================== B-TREE SEARCH =========================*/ - -/************************************************************************ -Latches the leaf page or pages requested. */ -static -void -btr_cur_latch_leaves( -/*=================*/ - page_t* page, /* in: leaf page where the search - converged */ - ulint space, /* in: space id */ - ulint page_no, /* in: page number of the leaf */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr) /* in: mtr */ -{ - ulint left_page_no; - ulint right_page_no; - page_t* get_page; - - ut_ad(page && mtr); - - if (latch_mode == BTR_SEARCH_LEAF) { - - get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush = TRUE; - - } else if (latch_mode == BTR_MODIFY_LEAF) { - - get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush = TRUE; - - } else if (latch_mode == BTR_MODIFY_TREE) { - - /* x-latch also brothers from left to right */ - left_page_no = btr_page_get_prev(page, mtr); - - if (left_page_no != FIL_NULL) { - get_page = btr_page_get(space, left_page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(get_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush - = TRUE; - } - - get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush = TRUE; - - right_page_no = btr_page_get_next(page, mtr); - - if (right_page_no != FIL_NULL) { - get_page = btr_page_get(space, right_page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(get_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - buf_block_align(get_page)->check_index_page_at_flush - = TRUE; - } - - } else if (latch_mode == BTR_SEARCH_PREV) { - - /* s-latch also left brother */ - left_page_no = btr_page_get_prev(page, mtr); - - if (left_page_no != FIL_NULL) { - cursor->left_page = btr_page_get(space, left_page_no, - RW_S_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(cursor->left_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - ut_a(page_is_comp(cursor->left_page) - == page_is_comp(page)); - buf_block_align(cursor->left_page) - ->check_index_page_at_flush = TRUE; - } - - get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush = TRUE; - - } else if (latch_mode == BTR_MODIFY_PREV) { - - /* x-latch also left brother */ - left_page_no = btr_page_get_prev(page, mtr); - - if (left_page_no != FIL_NULL) { - cursor->left_page = btr_page_get(space, left_page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(cursor->left_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - ut_a(page_is_comp(cursor->left_page) - == page_is_comp(page)); - buf_block_align(cursor->left_page) - ->check_index_page_at_flush = TRUE; - } - - get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); - ut_a(page_is_comp(get_page) == page_is_comp(page)); - buf_block_align(get_page)->check_index_page_at_flush = TRUE; - } else { - ut_error; - } -} - -/************************************************************************ -Searches an index tree and positions a tree cursor on a given level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then -cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. - -If mode is PAGE_CUR_LE , cursor is left at the place where an insert of the -search tuple should be performed in the B-tree. InnoDB does an insert -immediately after the cursor. Thus, the cursor may end up on a user record, -or on a page infimum record. */ - -void -btr_cur_search_to_nth_level( -/*========================*/ - dict_index_t* index, /* in: index */ - ulint level, /* in: the tree level of search */ - dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in - tuple must be set so that it cannot get - compared to the node ptr page number field! */ - ulint mode, /* in: PAGE_CUR_L, ...; - Inserts should always be made using - PAGE_CUR_LE to search the position! */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with - BTR_INSERT and BTR_ESTIMATE; - cursor->left_page is used to store a pointer - to the left neighbor page, in the cases - BTR_SEARCH_PREV and BTR_MODIFY_PREV; - NOTE that if has_search_latch - is != 0, we maybe do not have a latch set - on the cursor page, we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is - s- or x-latched, but see also above! */ - ulint has_search_latch,/* in: info on the latch mode the - caller currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t* page_cursor; - page_t* page; - page_t* guess; - rec_t* node_ptr; - ulint page_no; - ulint space; - ulint up_match; - ulint up_bytes; - ulint low_match; - ulint low_bytes; - ulint height; - ulint savepoint; - ulint rw_latch; - ulint page_mode; - ulint insert_planned; - ulint buf_mode; - ulint estimate; - ulint ignore_sec_unique; - ulint root_height = 0; /* remove warning */ -#ifdef BTR_CUR_ADAPT - btr_search_t* info; -#endif - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - /* Currently, PAGE_CUR_LE is the only search mode used for searches - ending to upper levels */ - - ut_ad(level == 0 || mode == PAGE_CUR_LE); - ut_ad(dict_index_check_search_tuple(index, tuple)); - ut_ad(!(index->type & DICT_IBUF) || ibuf_inside()); - ut_ad(dtuple_check_typed(tuple)); - -#ifdef UNIV_DEBUG - cursor->up_match = ULINT_UNDEFINED; - cursor->low_match = ULINT_UNDEFINED; -#endif - insert_planned = latch_mode & BTR_INSERT; - estimate = latch_mode & BTR_ESTIMATE; - ignore_sec_unique = latch_mode & BTR_IGNORE_SEC_UNIQUE; - latch_mode = latch_mode & ~(BTR_INSERT | BTR_ESTIMATE - | BTR_IGNORE_SEC_UNIQUE); - - ut_ad(!insert_planned || (mode == PAGE_CUR_LE)); - - cursor->flag = BTR_CUR_BINARY; - cursor->index = index; - -#ifndef BTR_CUR_ADAPT - guess = NULL; -#else - info = btr_search_get_info(index); - - guess = info->root_guess; - -#ifdef BTR_CUR_HASH_ADAPT - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_searches++; -#endif - if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED - && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ - && !estimate -#ifdef PAGE_CUR_LE_OR_EXTENDS - && mode != PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - && srv_use_adaptive_hash_indexes - && btr_search_guess_on_hash(index, info, tuple, mode, - latch_mode, cursor, - has_search_latch, mtr)) { - - /* Search using the hash index succeeded */ - - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - btr_cur_n_sea++; - - return; - } -#endif -#endif - btr_cur_n_non_sea++; - - /* If the hash search did not succeed, do binary search down the - tree */ - - if (has_search_latch) { - /* Release possible search latch to obey latching order */ - rw_lock_s_unlock(&btr_search_latch); - } - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched leaf node(s) */ - - savepoint = mtr_set_savepoint(mtr); - - if (latch_mode == BTR_MODIFY_TREE) { - mtr_x_lock(dict_index_get_lock(index), mtr); - - } else if (latch_mode == BTR_CONT_MODIFY_TREE) { - /* Do nothing */ - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - - space = dict_index_get_space(index); - page_no = dict_index_get_page(index); - - up_match = 0; - up_bytes = 0; - low_match = 0; - low_bytes = 0; - - height = ULINT_UNDEFINED; - rw_latch = RW_NO_LATCH; - buf_mode = BUF_GET; - - /* We use these modified search modes on non-leaf levels of the - B-tree. These let us end up in the right B-tree leaf. In that leaf - we use the original search mode. */ - - switch (mode) { - case PAGE_CUR_GE: - page_mode = PAGE_CUR_L; - break; - case PAGE_CUR_G: - page_mode = PAGE_CUR_LE; - break; - default: -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || mode == PAGE_CUR_LE_OR_EXTENDS); -#else /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - page_mode = mode; - break; - } - - /* Loop and search until we arrive at the desired level */ - - for (;;) { - if ((height == 0) && (latch_mode <= BTR_MODIFY_LEAF)) { - - rw_latch = latch_mode; - - if (insert_planned - && ibuf_should_try(index, ignore_sec_unique)) { - - /* Try insert to the insert buffer if the - page is not in the buffer pool */ - - buf_mode = BUF_GET_IF_IN_POOL; - } - } -retry_page_get: - page = buf_page_get_gen(space, page_no, rw_latch, guess, - buf_mode, - __FILE__, __LINE__, - mtr); - if (page == NULL) { - /* This must be a search to perform an insert; - try insert to the insert buffer */ - - ut_ad(buf_mode == BUF_GET_IF_IN_POOL); - ut_ad(insert_planned); - ut_ad(cursor->thr); - - if (ibuf_should_try(index, ignore_sec_unique) - && ibuf_insert(tuple, index, space, page_no, - cursor->thr)) { - /* Insertion to the insert buffer succeeded */ - cursor->flag = BTR_CUR_INSERT_TO_IBUF; - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - goto func_exit; - } - - /* Insert to the insert buffer did not succeed: - retry page get */ - - buf_mode = BUF_GET; - - goto retry_page_get; - } - - buf_block_align(page)->check_index_page_at_flush = TRUE; - -#ifdef UNIV_SYNC_DEBUG - if (rw_latch != RW_NO_LATCH) { - buf_page_dbg_add_level(page, SYNC_TREE_NODE); - } -#endif - ut_ad(0 == ut_dulint_cmp(index->id, - btr_page_get_index_id(page))); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - root_height = height; - cursor->tree_height = root_height + 1; -#ifdef BTR_CUR_ADAPT - if (page != guess) { - info->root_guess = page; - } -#endif - } - - if (height == 0) { - if (rw_latch == RW_NO_LATCH) { - - btr_cur_latch_leaves(page, space, - page_no, latch_mode, - cursor, mtr); - } - - if ((latch_mode != BTR_MODIFY_TREE) - && (latch_mode != BTR_CONT_MODIFY_TREE)) { - - /* Release the tree s-latch */ - - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - dict_index_get_lock(index)); - } - - page_mode = mode; - } - - page_cur_search_with_match(page, index, tuple, page_mode, - &up_match, &up_bytes, - &low_match, &low_bytes, - page_cursor); - if (estimate) { - btr_cur_add_path_info(cursor, height, root_height); - } - - /* If this is the desired level, leave the loop */ - - ut_ad(height == btr_page_get_level( - page_cur_get_page(page_cursor), mtr)); - - if (level == height) { - - if (level > 0) { - /* x-latch the page */ - page = btr_page_get(space, - page_no, RW_X_LATCH, mtr); - ut_a((ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - } - - break; - } - - ut_ad(height > 0); - - height--; - guess = NULL; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - if (level == 0) { - cursor->low_match = low_match; - cursor->low_bytes = low_bytes; - cursor->up_match = up_match; - cursor->up_bytes = up_bytes; - -#ifdef BTR_CUR_ADAPT - if (srv_use_adaptive_hash_indexes) { - - btr_search_info_update(index, cursor); - } -#endif - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_GE); - ut_ad(cursor->up_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - ut_ad(cursor->low_match != ULINT_UNDEFINED - || mode != PAGE_CUR_LE); - } - -func_exit: - if (has_search_latch) { - - rw_lock_s_lock(&btr_search_latch); - } -} - -/********************************************************************* -Opens a cursor at either end of an index. */ - -void -btr_cur_open_at_index_side( -/*=======================*/ - ibool from_left, /* in: TRUE if open to the low end, - FALSE if to the high end */ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: latch mode */ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t* page_cursor; - page_t* page; - ulint page_no; - ulint space; - ulint height; - ulint root_height = 0; /* remove warning */ - rec_t* node_ptr; - ulint estimate; - ulint savepoint; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - estimate = latch_mode & BTR_ESTIMATE; - latch_mode = latch_mode & ~BTR_ESTIMATE; - - /* Store the position of the tree latch we push to mtr so that we - know how to release it when we have latched the leaf node */ - - savepoint = mtr_set_savepoint(mtr); - - if (latch_mode == BTR_MODIFY_TREE) { - mtr_x_lock(dict_index_get_lock(index), mtr); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - - for (;;) { - page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, - BUF_GET, - __FILE__, __LINE__, - mtr); - ut_ad(0 == ut_dulint_cmp(index->id, - btr_page_get_index_id(page))); - - buf_block_align(page)->check_index_page_at_flush = TRUE; - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - root_height = height; - } - - if (height == 0) { - btr_cur_latch_leaves(page, space, page_no, - latch_mode, cursor, mtr); - - /* In versions <= 3.23.52 we had forgotten to - release the tree latch here. If in an index scan - we had to scan far to find a record visible to the - current transaction, that could starve others - waiting for the tree latch. */ - - if ((latch_mode != BTR_MODIFY_TREE) - && (latch_mode != BTR_CONT_MODIFY_TREE)) { - - /* Release the tree s-latch */ - - mtr_release_s_latch_at_savepoint( - mtr, savepoint, - dict_index_get_lock(index)); - } - } - - if (from_left) { - page_cur_set_before_first(page, page_cursor); - } else { - page_cur_set_after_last(page, page_cursor); - } - - if (height == 0) { - if (estimate) { - btr_cur_add_path_info(cursor, height, - root_height); - } - - break; - } - - ut_ad(height > 0); - - if (from_left) { - page_cur_move_to_next(page_cursor); - } else { - page_cur_move_to_prev(page_cursor); - } - - if (estimate) { - btr_cur_add_path_info(cursor, height, root_height); - } - - height--; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/************************************************************************** -Positions a cursor at a randomly chosen position within a B-tree. */ - -void -btr_cur_open_at_rnd_pos( -/*====================*/ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /* in/out: B-tree cursor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t* page_cursor; - page_t* page; - ulint page_no; - ulint space; - ulint height; - rec_t* node_ptr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - if (latch_mode == BTR_MODIFY_TREE) { - mtr_x_lock(dict_index_get_lock(index), mtr); - } else { - mtr_s_lock(dict_index_get_lock(index), mtr); - } - - page_cursor = btr_cur_get_page_cur(cursor); - cursor->index = index; - - space = dict_index_get_space(index); - page_no = dict_index_get_page(index); - - height = ULINT_UNDEFINED; - - for (;;) { - page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, - BUF_GET, - __FILE__, __LINE__, - mtr); - ut_ad(0 == ut_dulint_cmp(index->id, - btr_page_get_index_id(page))); - - if (height == ULINT_UNDEFINED) { - /* We are in the root node */ - - height = btr_page_get_level(page, mtr); - } - - if (height == 0) { - btr_cur_latch_leaves(page, space, page_no, - latch_mode, cursor, mtr); - } - - page_cur_open_on_rnd_user_rec(page, page_cursor); - - if (height == 0) { - - break; - } - - ut_ad(height > 0); - - height--; - - node_ptr = page_cur_get_rec(page_cursor); - offsets = rec_get_offsets(node_ptr, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*==================== B-TREE INSERT =========================*/ - -/***************************************************************** -Inserts a record if there is enough space, or if enough space can -be freed by reorganizing. Differs from _optimistic_insert because -no heuristics is applied to whether it pays to use CPU time for -reorganizing the page or not. */ -static -rec_t* -btr_cur_insert_if_possible( -/*=======================*/ - /* out: pointer to inserted record if succeed, - else NULL */ - btr_cur_t* cursor, /* in: cursor on page after which to insert; - cursor stays valid */ - dtuple_t* tuple, /* in: tuple to insert; the size info need not - have been stored to tuple */ - ibool* reorg, /* out: TRUE if reorganization occurred */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t* page_cursor; - page_t* page; - rec_t* rec; - - ut_ad(dtuple_check_typed(tuple)); - - *reorg = FALSE; - - page = btr_cur_get_page(cursor); - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - page_cursor = btr_cur_get_page_cur(cursor); - - /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); - - if (!rec) { - /* If record did not fit, reorganize */ - - btr_page_reorganize(page, cursor->index, mtr); - - *reorg = TRUE; - - page_cur_search(page, cursor->index, tuple, - PAGE_CUR_LE, page_cursor); - - rec = page_cur_tuple_insert(page_cursor, tuple, - cursor->index, mtr); - } - - return(rec); -} - -/***************************************************************** -For an insert, checks the locks and does the undo logging if desired. */ -UNIV_INLINE -ulint -btr_cur_ins_lock_and_undo( -/*======================*/ - /* out: DB_SUCCESS, DB_WAIT_LOCK, - DB_FAIL, or error number */ - ulint flags, /* in: undo logging and locking flags: if - not zero, the parameters index and thr - should be specified */ - btr_cur_t* cursor, /* in: cursor on page after which to insert */ - dtuple_t* entry, /* in: entry to insert */ - que_thr_t* thr, /* in: query thread or NULL */ - ibool* inherit)/* out: TRUE if the inserted new record maybe - should inherit LOCK_GAP type locks from the - successor record */ -{ - dict_index_t* index; - ulint err; - rec_t* rec; - dulint roll_ptr; - - /* Check if we have to wait for a lock: enqueue an explicit lock - request if yes */ - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - - err = lock_rec_insert_check_and_lock(flags, rec, index, thr, inherit); - - if (err != DB_SUCCESS) { - - return(err); - } - - if ((index->type & DICT_CLUSTERED) && !(index->type & DICT_IBUF)) { - - err = trx_undo_report_row_operation(flags, TRX_UNDO_INSERT_OP, - thr, index, entry, - NULL, 0, NULL, - &roll_ptr); - if (err != DB_SUCCESS) { - - return(err); - } - - /* Now we can fill in the roll ptr field in entry */ - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - - row_upd_index_entry_sys_field(entry, index, - DATA_ROLL_PTR, roll_ptr); - } - } - - return(DB_SUCCESS); -} - -#ifdef UNIV_DEBUG -/***************************************************************** -Report information about a transaction. */ -static -void -btr_cur_trx_report( -/*===============*/ - trx_t* trx, /* in: transaction */ - const dict_index_t* index, /* in: index */ - const char* op) /* in: operation */ -{ - fprintf(stderr, "Trx with id %lu %lu going to ", - ut_dulint_get_high(trx->id), - ut_dulint_get_low(trx->id)); - fputs(op, stderr); - dict_index_name_print(stderr, trx, index); - putc('\n', stderr); -} -#endif /* UNIV_DEBUG */ - -/***************************************************************** -Tries to perform an insert to a page in an index tree, next to cursor. -It is assumed that mtr holds an x-latch on the page. The operation does -not succeed if there is too little space on the page. If there is just -one record on the page, the insert will always succeed; this is to -prevent trying to split a page with just one record. */ - -ulint -btr_cur_optimistic_insert( -/*======================*/ - /* out: DB_SUCCESS, DB_WAIT_LOCK, - DB_FAIL, or error number */ - ulint flags, /* in: undo logging and locking flags: if not - zero, the parameters index and thr should be - specified */ - btr_cur_t* cursor, /* in: cursor on page after which to insert; - cursor stays valid */ - dtuple_t* entry, /* in: entry to insert */ - rec_t** rec, /* out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ - que_thr_t* thr, /* in: query thread or NULL */ - mtr_t* mtr) /* in: mtr */ -{ - big_rec_t* big_rec_vec = NULL; - dict_index_t* index; - page_cur_t* page_cursor; - page_t* page; - ulint max_size; - rec_t* dummy_rec; - ulint level; - ibool reorg; - ibool inherit; - ulint rec_size; - ulint type; - ulint err; - - *big_rec = NULL; - - page = btr_cur_get_page(cursor); - index = cursor->index; - - if (!dtuple_check_typed_no_assert(entry)) { - fputs("InnoDB: Error in a tuple to insert into ", stderr); - dict_index_name_print(stderr, thr_get_trx(thr), index); - } -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "insert into "); - dtuple_print(stderr, entry); - } -#endif /* UNIV_DEBUG */ - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - max_size = page_get_max_insert_size_after_reorganize(page, 1); - level = btr_page_get_level(page, mtr); - -calculate_sizes_again: - /* Calculate the record size when entry is converted to a record */ - rec_size = rec_get_converted_size(index, entry); - - if (rec_size - >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2, - REC_MAX_DATA_SIZE)) { - - /* The record is so big that we have to store some fields - externally on separate database pages */ - - big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); - - if (big_rec_vec == NULL) { - - return(DB_TOO_BIG_RECORD); - } - - goto calculate_sizes_again; - } - - /* If there have been many consecutive inserts, and we are on the leaf - level, check if we have to split the page to reserve enough free space - for future updates of records. */ - - type = index->type; - - if ((type & DICT_CLUSTERED) - && (dict_index_get_space_reserve() + rec_size > max_size) - && (page_get_n_recs(page) >= 2) - && (0 == level) - && (btr_page_get_split_rec_to_right(cursor, &dummy_rec) - || btr_page_get_split_rec_to_left(cursor, &dummy_rec))) { - - if (big_rec_vec) { - dtuple_convert_back_big_rec(index, entry, big_rec_vec); - } - - return(DB_FAIL); - } - - if (!(((max_size >= rec_size) - && (max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT)) - || (page_get_max_insert_size(page, 1) >= rec_size) - || (page_get_n_recs(page) <= 1))) { - - if (big_rec_vec) { - dtuple_convert_back_big_rec(index, entry, big_rec_vec); - } - return(DB_FAIL); - } - - /* Check locks and write to the undo log, if specified */ - err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &inherit); - - if (err != DB_SUCCESS) { - - if (big_rec_vec) { - dtuple_convert_back_big_rec(index, entry, big_rec_vec); - } - return(err); - } - - page_cursor = btr_cur_get_page_cur(cursor); - - reorg = FALSE; - - /* Now, try the insert */ - - *rec = page_cur_insert_rec_low(page_cursor, entry, index, - NULL, NULL, mtr); - if (UNIV_UNLIKELY(!(*rec))) { - /* If the record did not fit, reorganize */ - btr_page_reorganize(page, index, mtr); - - ut_ad(page_get_max_insert_size(page, 1) == max_size); - - reorg = TRUE; - - page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor); - - *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr); - - if (UNIV_UNLIKELY(!*rec)) { - fputs("InnoDB: Error: cannot insert tuple ", stderr); - dtuple_print(stderr, entry); - fputs(" into ", stderr); - dict_index_name_print(stderr, thr_get_trx(thr), index); - fprintf(stderr, "\nInnoDB: max insert size %lu\n", - (ulong) max_size); - ut_error; - } - } - -#ifdef BTR_CUR_HASH_ADAPT - if (!reorg && (0 == level) && (cursor->flag == BTR_CUR_HASH)) { - btr_search_update_hash_node_on_insert(cursor); - } else { - btr_search_update_hash_on_insert(cursor); - } -#endif - - if (!(flags & BTR_NO_LOCKING_FLAG) && inherit) { - - lock_update_insert(*rec); - } - -#if 0 - fprintf(stderr, "Insert into page %lu, max ins size %lu," - " rec %lu ind type %lu\n", - buf_frame_get_page_no(page), max_size, - rec_size + PAGE_DIR_SLOT_SIZE, type); -#endif - if (!(type & DICT_CLUSTERED)) { - /* We have added a record to page: update its free bits */ - ibuf_update_free_bits_if_full(cursor->index, page, max_size, - rec_size + PAGE_DIR_SLOT_SIZE); - } - - *big_rec = big_rec_vec; - - return(DB_SUCCESS); -} - -/***************************************************************** -Performs an insert on a page of an index tree. It is assumed that mtr -holds an x-latch on the tree and on the cursor page. If the insert is -made on the leaf level, to avoid deadlocks, mtr must also own x-latches -to brothers of page, if those brothers exist. */ - -ulint -btr_cur_pessimistic_insert( -/*=======================*/ - /* out: DB_SUCCESS or error number */ - ulint flags, /* in: undo logging and locking flags: if not - zero, the parameter thr should be - specified; if no undo logging is specified, - then the caller must have reserved enough - free extents in the file space so that the - insertion will certainly succeed */ - btr_cur_t* cursor, /* in: cursor after which to insert; - cursor stays valid */ - dtuple_t* entry, /* in: entry to insert */ - rec_t** rec, /* out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ - que_thr_t* thr, /* in: query thread or NULL */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index = cursor->index; - big_rec_t* big_rec_vec = NULL; - page_t* page; - ulint err; - ibool dummy_inh; - ibool success; - ulint n_extents = 0; - ulint n_reserved; - - ut_ad(dtuple_check_typed(entry)); - - *big_rec = NULL; - - page = btr_cur_get_page(cursor); - - ut_ad(mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - - /* Try first an optimistic insert; reset the cursor flag: we do not - assume anything of how it was positioned */ - - cursor->flag = BTR_CUR_BINARY; - - err = btr_cur_optimistic_insert(flags, cursor, entry, rec, big_rec, - thr, mtr); - if (err != DB_FAIL) { - - return(err); - } - - /* Retry with a pessimistic insert. Check locks and write to undo log, - if specified */ - - err = btr_cur_ins_lock_and_undo(flags, cursor, entry, thr, &dummy_inh); - - if (err != DB_SUCCESS) { - - return(err); - } - - if (!(flags & BTR_NO_UNDO_LOG_FLAG)) { - /* First reserve enough free space for the file segments - of the index tree, so that the insert will not fail because - of lack of space */ - - n_extents = cursor->tree_height / 16 + 3; - - success = fsp_reserve_free_extents(&n_reserved, index->space, - n_extents, FSP_NORMAL, mtr); - if (!success) { - err = DB_OUT_OF_FILE_SPACE; - - return(err); - } - } - - if (rec_get_converted_size(index, entry) - >= ut_min(page_get_free_space_of_empty(page_is_comp(page)) / 2, - REC_MAX_DATA_SIZE)) { - - /* The record is so big that we have to store some fields - externally on separate database pages */ - - big_rec_vec = dtuple_convert_big_rec(index, entry, NULL, 0); - - if (big_rec_vec == NULL) { - - if (n_extents > 0) { - fil_space_release_free_extents(index->space, - n_reserved); - } - return(DB_TOO_BIG_RECORD); - } - } - - if (dict_index_get_page(index) == buf_frame_get_page_no(page)) { - - /* The page is the root page */ - *rec = btr_root_raise_and_insert(cursor, entry, mtr); - } else { - *rec = btr_page_split_and_insert(cursor, entry, mtr); - } - - btr_cur_position(index, page_rec_get_prev(*rec), cursor); - -#ifdef BTR_CUR_ADAPT - btr_search_update_hash_on_insert(cursor); -#endif - if (!(flags & BTR_NO_LOCKING_FLAG)) { - - lock_update_insert(*rec); - } - - err = DB_SUCCESS; - - if (n_extents > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - *big_rec = big_rec_vec; - - return(err); -} - -/*==================== B-TREE UPDATE =========================*/ - -/***************************************************************** -For an update, checks the locks and does the undo logging. */ -UNIV_INLINE -ulint -btr_cur_upd_lock_and_undo( -/*======================*/ - /* out: DB_SUCCESS, DB_WAIT_LOCK, or error - number */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor on record to update */ - upd_t* update, /* in: update vector */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - dulint* roll_ptr)/* out: roll pointer */ -{ - dict_index_t* index; - rec_t* rec; - ulint err; - - ut_ad(cursor && update && thr && roll_ptr); - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - - if (!(index->type & DICT_CLUSTERED)) { - /* We do undo logging only when we update a clustered index - record */ - return(lock_sec_rec_modify_check_and_lock(flags, rec, index, - thr)); - } - - /* Check if we have to wait for a lock: enqueue an explicit lock - request if yes */ - - err = DB_SUCCESS; - - if (!(flags & BTR_NO_LOCKING_FLAG)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - err = lock_clust_rec_modify_check_and_lock( - flags, rec, index, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), thr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - if (err != DB_SUCCESS) { - - return(err); - } - } - - /* Append the info about the update in the undo log */ - - err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, - index, NULL, update, - cmpl_info, rec, roll_ptr); - return(err); -} - -/*************************************************************** -Writes a redo log record of updating a record in-place. */ -UNIV_INLINE -void -btr_cur_update_in_place_log( -/*========================*/ - ulint flags, /* in: flags */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index where cursor positioned */ - upd_t* update, /* in: update vector */ - trx_t* trx, /* in: transaction */ - dulint roll_ptr, /* in: roll ptr */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - page_t* page = page_align(rec); - ut_ad(flags < 256); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, page_is_comp(page) - ? MLOG_COMP_REC_UPDATE_IN_PLACE - : MLOG_REC_UPDATE_IN_PLACE, - 1 + DATA_ROLL_PTR_LEN + 14 + 2 - + MLOG_BUF_MARGIN); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery */ - return; - } - - /* The code below assumes index is a clustered index: change index to - the clustered index if we are updating a secondary index record (or we - could as well skip writing the sys col values to the log in this case - because they are not needed for a secondary index record update) */ - - index = dict_table_get_first_index(index->table); - - mach_write_to_1(log_ptr, flags); - log_ptr++; - - log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, - mtr); - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - row_upd_index_write_log(update, log_ptr, mtr); -} - -/*************************************************************** -Parses a redo log record of updating a record in-place. */ - -byte* -btr_cur_parse_update_in_place( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - dict_index_t* index) /* in: index corresponding to page */ -{ - ulint flags; - rec_t* rec; - upd_t* update; - ulint pos; - dulint trx_id; - dulint roll_ptr; - ulint rec_offset; - mem_heap_t* heap; - ulint* offsets; - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - flags = mach_read_from_1(ptr); - ptr++; - - ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - rec_offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(rec_offset <= UNIV_PAGE_SIZE); - - heap = mem_heap_create(256); - - ptr = row_upd_index_parse(ptr, end_ptr, heap, &update); - - if (!ptr || !page) { - - goto func_exit; - } - - ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); - rec = page + rec_offset; - - /* We do not need to reserve btr_search_latch, as the page is only - being recovered, and there cannot be a hash index to it. */ - - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, offsets, - pos, trx_id, roll_ptr); - } - - row_upd_rec_in_place(rec, offsets, update); - -func_exit: - mem_heap_free(heap); - - return(ptr); -} - -/***************************************************************** -Updates a record when the update causes no size changes in its fields. -We assume here that the ordering fields of the record do not change. */ - -ulint -btr_cur_update_in_place( -/*====================*/ - /* out: DB_SUCCESS or error number */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - upd_t* update, /* in: update vector */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - buf_block_t* block; - ulint err; - rec_t* rec; - dulint roll_ptr = ut_dulint_zero; - trx_t* trx; - ulint was_delete_marked; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - trx = thr_get_trx(thr); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(trx, index, "update "); - rec_print_new(stderr, rec, offsets); - } -#endif /* UNIV_DEBUG */ - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, - thr, &roll_ptr); - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - - block = buf_block_align(rec); - ut_ad(!!page_is_comp(buf_block_get_frame(block)) - == dict_table_is_comp(index->table)); - - if (block->is_hashed) { - /* The function row_upd_changes_ord_field_binary works only - if the update vector was built for a clustered index, we must - NOT call it if index is secondary */ - - if (!(index->type & DICT_CLUSTERED) - || row_upd_changes_ord_field_binary(NULL, index, update)) { - - /* Remove possible hash index pointer to this record */ - btr_search_update_hash_on_delete(cursor); - } - - rw_lock_x_lock(&btr_search_latch); - } - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); - } - - was_delete_marked = rec_get_deleted_flag( - rec, page_is_comp(buf_block_get_frame(block))); - - row_upd_rec_in_place(rec, offsets, update); - - if (block->is_hashed) { - rw_lock_x_unlock(&btr_search_latch); - } - - btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, - mtr); - if (was_delete_marked - && !rec_get_deleted_flag(rec, page_is_comp( - buf_block_get_frame(block)))) { - /* The new updated record owns its possible externally - stored fields */ - - btr_cur_unmark_extern_fields(rec, mtr, offsets); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(DB_SUCCESS); -} - -/***************************************************************** -Tries to update a record on a page in an index tree. It is assumed that mtr -holds an x-latch on the page. The operation does not succeed if there is too -little space on the page or if the update would result in too empty a page, -so that tree compression is recommended. We assume here that the ordering -fields of the record do not change. */ - -ulint -btr_cur_optimistic_update( -/*======================*/ - /* out: DB_SUCCESS, or DB_OVERFLOW if the - updated record does not fit, DB_UNDERFLOW - if the page would become too empty */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - upd_t* update, /* in: update vector; this must also - contain trx id and roll ptr fields */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - page_cur_t* page_cursor; - ulint err; - page_t* page; - rec_t* rec; - ulint max_size; - ulint new_rec_size; - ulint old_rec_size; - dtuple_t* new_entry; - dulint roll_ptr; - trx_t* trx; - mem_heap_t* heap; - ibool reorganized = FALSE; - ulint i; - ulint* offsets; - - page = btr_cur_get_page(cursor); - rec = btr_cur_get_rec(cursor); - index = cursor->index; - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - heap = mem_heap_create(1024); - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "update "); - rec_print_new(stderr, rec, offsets); - } -#endif /* UNIV_DEBUG */ - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - if (!row_upd_changes_field_size_or_external(index, offsets, update)) { - - /* The simplest and the most common case: the update does not - change the size of any field and none of the updated fields is - externally stored in rec or update */ - mem_heap_free(heap); - return(btr_cur_update_in_place(flags, cursor, update, - cmpl_info, thr, mtr)); - } - - for (i = 0; i < upd_get_n_fields(update); i++) { - if (upd_get_nth_field(update, i)->extern_storage) { - - /* Externally stored fields are treated in pessimistic - update */ - - mem_heap_free(heap); - return(DB_OVERFLOW); - } - } - - if (rec_offs_any_extern(offsets)) { - /* Externally stored fields are treated in pessimistic - update */ - - mem_heap_free(heap); - return(DB_OVERFLOW); - } - - page_cursor = btr_cur_get_page_cur(cursor); - - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - - row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, NULL); - old_rec_size = rec_offs_size(offsets); - new_rec_size = rec_get_converted_size(index, new_entry); - - if (UNIV_UNLIKELY(new_rec_size - >= (page_get_free_space_of_empty(page_is_comp(page)) - / 2))) { - - mem_heap_free(heap); - - return(DB_OVERFLOW); - } - - max_size = old_rec_size - + page_get_max_insert_size_after_reorganize(page, 1); - - if (UNIV_UNLIKELY(page_get_data_size(page) - - old_rec_size + new_rec_size - < BTR_CUR_PAGE_COMPRESS_LIMIT)) { - - /* The page would become too empty */ - - mem_heap_free(heap); - - return(DB_UNDERFLOW); - } - - if (!(((max_size >= BTR_CUR_PAGE_REORGANIZE_LIMIT) - && (max_size >= new_rec_size)) - || (page_get_n_recs(page) <= 1))) { - - /* There was not enough space, or it did not pay to - reorganize: for simplicity, we decide what to do assuming a - reorganization is needed, though it might not be necessary */ - - mem_heap_free(heap); - - return(DB_OVERFLOW); - } - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, thr, - &roll_ptr); - if (err != DB_SUCCESS) { - - mem_heap_free(heap); - - return(err); - } - - /* Ok, we may do the replacement. Store on the page infimum the - explicit locks on rec, before deleting rec (see the comment in - .._pessimistic_update). */ - - lock_rec_store_on_page_infimum(page, rec); - - btr_search_update_hash_on_delete(cursor); - - page_cur_delete_rec(page_cursor, index, offsets, mtr); - - page_cur_move_to_prev(page_cursor); - - trx = thr_get_trx(thr); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx->id); - } - - rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr); - - ut_a(rec); /* <- We calculated above the insert would fit */ - - if (!rec_get_deleted_flag(rec, page_is_comp(page))) { - /* The new inserted record owns its possible externally - stored fields */ - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - btr_cur_unmark_extern_fields(rec, mtr, offsets); - } - - /* Restore the old explicit lock state on the record */ - - lock_rec_restore_from_page_infimum(rec, page); - - page_cur_move_to_next(page_cursor); - - mem_heap_free(heap); - - return(DB_SUCCESS); -} - -/***************************************************************** -If, in a split, a new supremum record was created as the predecessor of the -updated record, the supremum record must inherit exactly the locks on the -updated record. In the split it may have inherited locks from the successor -of the updated record, which is not correct. This function restores the -right locks for the new supremum. */ -static -void -btr_cur_pess_upd_restore_supremum( -/*==============================*/ - rec_t* rec, /* in: updated record */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - page_t* prev_page; - ulint space; - ulint prev_page_no; - - page = buf_frame_align(rec); - - if (page_rec_get_next(page_get_infimum_rec(page)) != rec) { - /* Updated record is not the first user record on its page */ - - return; - } - - space = buf_frame_get_space_id(page); - prev_page_no = btr_page_get_prev(page, mtr); - - ut_ad(prev_page_no != FIL_NULL); - prev_page = buf_page_get_with_no_latch(space, prev_page_no, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - - /* We must already have an x-latch to prev_page! */ - ut_ad(mtr_memo_contains(mtr, buf_block_align(prev_page), - MTR_MEMO_PAGE_X_FIX)); - - lock_rec_reset_and_inherit_gap_locks(page_get_supremum_rec(prev_page), - rec); -} - -/***************************************************************** -Performs an update of a record on a page of a tree. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. If the -update is made on the leaf level, to avoid deadlocks, mtr must also -own x-latches to brothers of page, if those brothers exist. We assume -here that the ordering fields of the record do not change. */ - -ulint -btr_cur_pessimistic_update( -/*=======================*/ - /* out: DB_SUCCESS or error code */ - ulint flags, /* in: undo logging, locking, and rollback - flags */ - btr_cur_t* cursor, /* in: cursor on the record to update */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or NULL */ - upd_t* update, /* in: update vector; this is allowed also - contain trx id and roll ptr fields, but - the values in update vector have no effect */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - big_rec_t* big_rec_vec = NULL; - big_rec_t* dummy_big_rec; - dict_index_t* index; - page_t* page; - rec_t* rec; - page_cur_t* page_cursor; - dtuple_t* new_entry; - mem_heap_t* heap; - ulint err; - ulint optim_err; - ibool dummy_reorganized; - dulint roll_ptr; - trx_t* trx; - ibool was_first; - ibool success; - ulint n_extents = 0; - ulint n_reserved; - ulint* ext_vect; - ulint n_ext_vect; - ulint reserve_flag; - ulint* offsets = NULL; - - *big_rec = NULL; - - page = btr_cur_get_page(cursor); - rec = btr_cur_get_rec(cursor); - index = cursor->index; - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - - optim_err = btr_cur_optimistic_update(flags, cursor, update, - cmpl_info, thr, mtr); - - if (optim_err != DB_UNDERFLOW && optim_err != DB_OVERFLOW) { - - return(optim_err); - } - - /* Do lock checking and undo logging */ - err = btr_cur_upd_lock_and_undo(flags, cursor, update, cmpl_info, - thr, &roll_ptr); - if (err != DB_SUCCESS) { - - return(err); - } - - if (optim_err == DB_OVERFLOW) { - /* First reserve enough free space for the file segments - of the index tree, so that the update will not fail because - of lack of space */ - - n_extents = cursor->tree_height / 16 + 3; - - if (flags & BTR_NO_UNDO_LOG_FLAG) { - reserve_flag = FSP_CLEANING; - } else { - reserve_flag = FSP_NORMAL; - } - - success = fsp_reserve_free_extents(&n_reserved, index->space, - n_extents, - reserve_flag, mtr); - if (!success) { - err = DB_OUT_OF_FILE_SPACE; - - return(err); - } - } - - heap = mem_heap_create(1024); - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - - trx = thr_get_trx(thr); - - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - - row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, - FALSE, heap); - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, - roll_ptr); - row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx->id); - } - - if (flags & BTR_NO_UNDO_LOG_FLAG) { - /* We are in a transaction rollback undoing a row - update: we must free possible externally stored fields - which got new values in the update, if they are not - inherited values. They can be inherited if we have - updated the primary key to another value, and then - update it back again. */ - - ut_a(big_rec_vec == NULL); - - btr_rec_free_updated_extern_fields(index, rec, offsets, - update, TRUE, mtr); - } - - /* We have to set appropriate extern storage bits in the new - record to be inserted: we have to remember which fields were such */ - - ext_vect = mem_heap_alloc(heap, sizeof(ulint) - * dict_index_get_n_fields(index)); - ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); - - if (UNIV_UNLIKELY(rec_get_converted_size(index, new_entry) - >= ut_min(page_get_free_space_of_empty( - page_is_comp(page)) / 2, - REC_MAX_DATA_SIZE))) { - - big_rec_vec = dtuple_convert_big_rec(index, new_entry, - ext_vect, n_ext_vect); - if (big_rec_vec == NULL) { - - err = DB_TOO_BIG_RECORD; - goto return_after_reservations; - } - } - - page_cursor = btr_cur_get_page_cur(cursor); - - /* Store state of explicit locks on rec on the page infimum record, - before deleting rec. The page infimum acts as a dummy carrier of the - locks, taking care also of lock releases, before we can move the locks - back on the actual record. There is a special case: if we are - inserting on the root page and the insert causes a call of - btr_root_raise_and_insert. Therefore we cannot in the lock system - delete the lock structs set on the root page even if the root - page carries just node pointers. */ - - lock_rec_store_on_page_infimum(buf_frame_align(rec), rec); - - btr_search_update_hash_on_delete(cursor); - - page_cur_delete_rec(page_cursor, index, offsets, mtr); - - page_cur_move_to_prev(page_cursor); - - rec = btr_cur_insert_if_possible(cursor, new_entry, - &dummy_reorganized, mtr); - ut_a(rec || optim_err != DB_UNDERFLOW); - - if (rec) { - lock_rec_restore_from_page_infimum(rec, page); - rec_set_field_extern_bits(rec, index, - ext_vect, n_ext_vect, mtr); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { - /* The new inserted record owns its possible externally - stored fields */ - btr_cur_unmark_extern_fields(rec, mtr, offsets); - } - - btr_cur_compress_if_useful(cursor, mtr); - - err = DB_SUCCESS; - goto return_after_reservations; - } - - if (page_cur_is_before_first(page_cursor)) { - /* The record to be updated was positioned as the first user - record on its page */ - - was_first = TRUE; - } else { - was_first = FALSE; - } - - /* The first parameter means that no lock checking and undo logging - is made in the insert */ - - err = btr_cur_pessimistic_insert(BTR_NO_UNDO_LOG_FLAG - | BTR_NO_LOCKING_FLAG - | BTR_KEEP_SYS_FLAG, - cursor, new_entry, &rec, - &dummy_big_rec, NULL, mtr); - ut_a(rec); - ut_a(err == DB_SUCCESS); - ut_a(dummy_big_rec == NULL); - - rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { - /* The new inserted record owns its possible externally - stored fields */ - - btr_cur_unmark_extern_fields(rec, mtr, offsets); - } - - lock_rec_restore_from_page_infimum(rec, page); - - /* If necessary, restore also the correct lock state for a new, - preceding supremum record created in a page split. While the old - record was nonexistent, the supremum might have inherited its locks - from a wrong record. */ - - if (!was_first) { - btr_cur_pess_upd_restore_supremum(rec, mtr); - } - -return_after_reservations: - mem_heap_free(heap); - - if (n_extents > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - *big_rec = big_rec_vec; - - return(err); -} - -/*==================== B-TREE DELETE MARK AND UNMARK ===============*/ - -/******************************************************************** -Writes the redo log record for delete marking or unmarking of an index -record. */ -UNIV_INLINE -void -btr_cur_del_mark_set_clust_rec_log( -/*===============================*/ - ulint flags, /* in: flags */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index of the record */ - ibool val, /* in: value to set */ - trx_t* trx, /* in: deleting transaction */ - dulint roll_ptr,/* in: roll ptr to the undo log record */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - ut_ad(flags < 256); - ut_ad(val <= 1); - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, - page_rec_is_comp(rec) - ? MLOG_COMP_REC_CLUST_DELETE_MARK - : MLOG_REC_CLUST_DELETE_MARK, - 1 + 1 + DATA_ROLL_PTR_LEN - + 14 + 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery */ - return; - } - - mach_write_to_1(log_ptr, flags); - log_ptr++; - mach_write_to_1(log_ptr, val); - log_ptr++; - - log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, - mtr); - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - mlog_close(mtr, log_ptr); -} - -/******************************************************************** -Parses the redo log record for delete marking or unmarking of a clustered -index record. */ - -byte* -btr_cur_parse_del_mark_set_clust_rec( -/*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: index corresponding to page */ - page_t* page) /* in: page or NULL */ -{ - ulint flags; - ulint val; - ulint pos; - dulint trx_id; - dulint roll_ptr; - ulint offset; - rec_t* rec; - - ut_ad(!page - || !!page_is_comp(page) == dict_table_is_comp(index->table)); - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - flags = mach_read_from_1(ptr); - ptr++; - val = mach_read_from_1(ptr); - ptr++; - - ptr = row_upd_parse_sys_vals(ptr, end_ptr, &pos, &trx_id, &roll_ptr); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (page) { - rec = page + offset; - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - row_upd_rec_sys_fields_in_recovery( - rec, rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - pos, trx_id, roll_ptr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - /* We do not need to reserve btr_search_latch, as the page - is only being recovered, and there cannot be a hash index to - it. */ - - rec_set_deleted_flag(rec, page_is_comp(page), val); - } - - return(ptr); -} - -/*************************************************************** -Marks a clustered index record deleted. Writes an undo log record to -undo log on this delete marking. Writes in the trx id field the id -of the deleting transaction, and in the roll ptr field pointer to the -undo log record created. */ - -ulint -btr_cur_del_mark_set_clust_rec( -/*===========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - number */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor */ - ibool val, /* in: value to set */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - buf_block_t* block; - dulint roll_ptr; - ulint err; - rec_t* rec; - trx_t* trx; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rec = btr_cur_get_rec(cursor); - index = cursor->index; - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), index, "del mark "); - rec_print_new(stderr, rec, offsets); - } -#endif /* UNIV_DEBUG */ - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); - - err = lock_clust_rec_modify_check_and_lock(flags, - rec, index, offsets, thr); - - if (err != DB_SUCCESS) { - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - - err = trx_undo_report_row_operation(flags, TRX_UNDO_MODIFY_OP, thr, - index, NULL, NULL, 0, rec, - &roll_ptr); - if (err != DB_SUCCESS) { - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - - block = buf_block_align(rec); - - if (block->is_hashed) { - rw_lock_x_lock(&btr_search_latch); - } - - rec_set_deleted_flag(rec, rec_offs_comp(offsets), val); - - trx = thr_get_trx(thr); - - if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); - } - - if (block->is_hashed) { - rw_lock_x_unlock(&btr_search_latch); - } - - btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, - roll_ptr, mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(DB_SUCCESS); -} - -/******************************************************************** -Writes the redo log record for a delete mark setting of a secondary -index record. */ -UNIV_INLINE -void -btr_cur_del_mark_set_sec_rec_log( -/*=============================*/ - rec_t* rec, /* in: record */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - ut_ad(val <= 1); - - log_ptr = mlog_open(mtr, 11 + 1 + 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - log_ptr = mlog_write_initial_log_record_fast( - rec, MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr); - mach_write_to_1(log_ptr, val); - log_ptr++; - - mach_write_to_2(log_ptr, page_offset(rec)); - log_ptr += 2; - - mlog_close(mtr, log_ptr); -} - -/******************************************************************** -Parses the redo log record for delete marking or unmarking of a secondary -index record. */ - -byte* -btr_cur_parse_del_mark_set_sec_rec( -/*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ -{ - ulint val; - ulint offset; - rec_t* rec; - - if (end_ptr < ptr + 3) { - - return(NULL); - } - - val = mach_read_from_1(ptr); - ptr++; - - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (page) { - rec = page + offset; - - /* We do not need to reserve btr_search_latch, as the page - is only being recovered, and there cannot be a hash index to - it. */ - - rec_set_deleted_flag(rec, page_is_comp(page), val); - } - - return(ptr); -} - -/*************************************************************** -Sets a secondary index record delete mark to TRUE or FALSE. */ - -ulint -btr_cur_del_mark_set_sec_rec( -/*=========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - number */ - ulint flags, /* in: locking flag */ - btr_cur_t* cursor, /* in: cursor */ - ibool val, /* in: value to set */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - buf_block_t* block; - rec_t* rec; - ulint err; - - rec = btr_cur_get_rec(cursor); - -#ifdef UNIV_DEBUG - if (btr_cur_print_record_ops && thr) { - btr_cur_trx_report(thr_get_trx(thr), cursor->index, - "del mark "); - rec_print(stderr, rec, cursor->index); - } -#endif /* UNIV_DEBUG */ - - err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index, - thr); - if (err != DB_SUCCESS) { - - return(err); - } - - block = buf_block_align(rec); - ut_ad(!!page_is_comp(buf_block_get_frame(block)) - == dict_table_is_comp(cursor->index->table)); - - if (block->is_hashed) { - rw_lock_x_lock(&btr_search_latch); - } - - rec_set_deleted_flag(rec, page_is_comp(buf_block_get_frame(block)), - val); - - if (block->is_hashed) { - rw_lock_x_unlock(&btr_search_latch); - } - - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); - - return(DB_SUCCESS); -} - -/*************************************************************** -Sets a secondary index record delete mark to FALSE. This function is only -used by the insert buffer insert merge mechanism. */ - -void -btr_cur_del_unmark_for_ibuf( -/*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr) /* in: mtr */ -{ - /* We do not need to reserve btr_search_latch, as the page has just - been read to the buffer pool and there cannot be a hash index to it. */ - - rec_set_deleted_flag(rec, page_is_comp(buf_frame_align(rec)), FALSE); - - btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr); -} - -/*==================== B-TREE RECORD REMOVE =========================*/ - -/***************************************************************** -Tries to compress a page of the tree on the leaf level. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! */ - -void -btr_cur_compress( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0); - - btr_compress(cursor, mtr); -} - -/***************************************************************** -Tries to compress a page of the tree if it seems useful. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! */ - -ibool -btr_cur_compress_if_useful( -/*=======================*/ - /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - - if (btr_cur_compress_recommendation(cursor, mtr)) { - - btr_compress(cursor, mtr); - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************** -Removes the record on which the tree cursor is positioned on a leaf page. -It is assumed that the mtr has an x-latch on the page where the cursor is -positioned, but no latch on the whole tree. */ - -ibool -btr_cur_optimistic_delete( -/*======================*/ - /* out: TRUE if success, i.e., the page - did not become too empty */ - btr_cur_t* cursor, /* in: cursor on leaf page, on the record to - delete; cursor stays valid: if deletion - succeeds, on function exit it points to the - successor of the deleted record */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - ulint max_ins_size; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ibool no_compress_needed; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - /* This is intended only for leaf page deletions */ - - page = btr_cur_get_page(cursor); - - ut_ad(btr_page_get_level(page, mtr) == 0); - - rec = btr_cur_get_rec(cursor); - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - no_compress_needed = !rec_offs_any_extern(offsets) - && btr_cur_can_delete_without_compress( - cursor, rec_offs_size(offsets), mtr); - - if (no_compress_needed) { - - lock_update_delete(rec); - - btr_search_update_hash_on_delete(cursor); - - max_ins_size = page_get_max_insert_size_after_reorganize( - page, 1); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), - cursor->index, offsets, mtr); - - ibuf_update_free_bits_low(cursor->index, page, max_ins_size, - mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(no_compress_needed); -} - -/***************************************************************** -Removes the record on which the tree cursor is positioned. Tries -to compress the page if its fillfactor drops below a threshold -or if it is the only page on the level. It is assumed that mtr holds -an x-latch on the tree and on the cursor page. To avoid deadlocks, -mtr must also own x-latches to brothers of page, if those brothers -exist. */ - -ibool -btr_cur_pessimistic_delete( -/*=======================*/ - /* out: TRUE if compression occurred */ - ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; - the latter may occur because we may have - to update node pointers on upper levels, - and in the case of variable length keys - these may actually grow in size */ - ibool has_reserved_extents, /* in: TRUE if the - caller has already reserved enough free - extents so that he knows that the operation - will succeed */ - btr_cur_t* cursor, /* in: cursor on the record to delete; - if compression does not occur, the cursor - stays valid: it points to successor of - deleted record on function exit */ - ibool in_rollback,/* in: TRUE if called in rollback */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - dict_index_t* index; - rec_t* rec; - dtuple_t* node_ptr; - ulint n_extents = 0; - ulint n_reserved; - ibool success; - ibool ret = FALSE; - ulint level; - mem_heap_t* heap; - ulint* offsets; - - page = btr_cur_get_page(cursor); - index = btr_cur_get_index(cursor); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - if (!has_reserved_extents) { - /* First reserve enough free space for the file segments - of the index tree, so that the node pointer updates will - not fail because of lack of space */ - - n_extents = cursor->tree_height / 32 + 1; - - success = fsp_reserve_free_extents(&n_reserved, - index->space, - n_extents, - FSP_CLEANING, mtr); - if (!success) { - *err = DB_OUT_OF_FILE_SPACE; - - return(FALSE); - } - } - - heap = mem_heap_create(1024); - rec = btr_cur_get_rec(cursor); - - offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); - - /* Free externally stored fields if the record is neither - a node pointer nor in two-byte format. - This avoids an unnecessary loop. */ - if (page_is_comp(page) - ? !rec_get_node_ptr_flag(rec) - : !rec_get_1byte_offs_flag(rec)) { - btr_rec_free_externally_stored_fields(index, - rec, offsets, - in_rollback, mtr); - } - - if (UNIV_UNLIKELY(page_get_n_recs(page) < 2) - && UNIV_UNLIKELY(dict_index_get_page(btr_cur_get_index(cursor)) - != buf_frame_get_page_no(page))) { - - /* If there is only one record, drop the whole page in - btr_discard_page, if this is not the root page */ - - btr_discard_page(cursor, mtr); - - *err = DB_SUCCESS; - ret = TRUE; - - goto return_after_reservations; - } - - lock_update_delete(rec); - level = btr_page_get_level(page, mtr); - - if (level > 0 - && UNIV_UNLIKELY(rec == page_rec_get_next( - page_get_infimum_rec(page)))) { - - rec_t* next_rec = page_rec_get_next(rec); - - if (btr_page_get_prev(page, mtr) == FIL_NULL) { - - /* If we delete the leftmost node pointer on a - non-leaf level, we must mark the new leftmost node - pointer as the predefined minimum record */ - - btr_set_min_rec_mark(next_rec, page_is_comp(page), - mtr); - } else { - /* Otherwise, if we delete the leftmost node pointer - on a page, we have to change the father node pointer - so that it is equal to the new leftmost node pointer - on the page */ - - btr_node_ptr_delete(index, page, mtr); - - node_ptr = dict_index_build_node_ptr( - index, next_rec, buf_frame_get_page_no(page), - heap, level); - - btr_insert_on_non_leaf_level(index, - level + 1, node_ptr, mtr); - } - } - - btr_search_update_hash_on_delete(cursor); - - page_cur_delete_rec(btr_cur_get_page_cur(cursor), index, offsets, mtr); - - ut_ad(btr_check_node_ptr(index, page, mtr)); - - *err = DB_SUCCESS; - -return_after_reservations: - mem_heap_free(heap); - - if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, mtr); - } - - if (n_extents > 0) { - fil_space_release_free_extents(index->space, n_reserved); - } - - return(ret); -} - -/*********************************************************************** -Adds path information to the cursor for the current page, for which -the binary search has been performed. */ -static -void -btr_cur_add_path_info( -/*==================*/ - btr_cur_t* cursor, /* in: cursor positioned on a page */ - ulint height, /* in: height of the page in tree; - 0 means leaf node */ - ulint root_height) /* in: root node height in tree */ -{ - btr_path_t* slot; - rec_t* rec; - - ut_a(cursor->path_arr); - - if (root_height >= BTR_PATH_ARRAY_N_SLOTS - 1) { - /* Do nothing; return empty path */ - - slot = cursor->path_arr; - slot->nth_rec = ULINT_UNDEFINED; - - return; - } - - if (height == 0) { - /* Mark end of slots for path */ - slot = cursor->path_arr + root_height + 1; - slot->nth_rec = ULINT_UNDEFINED; - } - - rec = btr_cur_get_rec(cursor); - - slot = cursor->path_arr + (root_height - height); - - slot->nth_rec = page_rec_get_n_recs_before(rec); - slot->n_recs = page_get_n_recs(buf_frame_align(rec)); -} - -/*********************************************************************** -Estimates the number of rows in a given index range. */ - -ib_longlong -btr_estimate_n_rows_in_range( -/*=========================*/ - /* out: estimated number of rows */ - dict_index_t* index, /* in: index */ - dtuple_t* tuple1, /* in: range start, may also be empty tuple */ - ulint mode1, /* in: search mode for range start */ - dtuple_t* tuple2, /* in: range end, may also be empty tuple */ - ulint mode2) /* in: search mode for range end */ -{ - btr_path_t path1[BTR_PATH_ARRAY_N_SLOTS]; - btr_path_t path2[BTR_PATH_ARRAY_N_SLOTS]; - btr_cur_t cursor; - btr_path_t* slot1; - btr_path_t* slot2; - ibool diverged; - ibool diverged_lot; - ulint divergence_level; - ib_longlong n_rows; - ulint i; - mtr_t mtr; - - mtr_start(&mtr); - - cursor.path_arr = path1; - - if (dtuple_get_n_fields(tuple1) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple1, mode1, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - } else { - btr_cur_open_at_index_side(TRUE, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); - } - - mtr_commit(&mtr); - - mtr_start(&mtr); - - cursor.path_arr = path2; - - if (dtuple_get_n_fields(tuple2) > 0) { - - btr_cur_search_to_nth_level(index, 0, tuple2, mode2, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, 0, &mtr); - } else { - btr_cur_open_at_index_side(FALSE, index, - BTR_SEARCH_LEAF | BTR_ESTIMATE, - &cursor, &mtr); - } - - mtr_commit(&mtr); - - /* We have the path information for the range in path1 and path2 */ - - n_rows = 1; - diverged = FALSE; /* This becomes true when the path is not - the same any more */ - diverged_lot = FALSE; /* This becomes true when the paths are - not the same or adjacent any more */ - divergence_level = 1000000; /* This is the level where paths diverged - a lot */ - for (i = 0; ; i++) { - ut_ad(i < BTR_PATH_ARRAY_N_SLOTS); - - slot1 = path1 + i; - slot2 = path2 + i; - - if (slot1->nth_rec == ULINT_UNDEFINED - || slot2->nth_rec == ULINT_UNDEFINED) { - - if (i > divergence_level + 1) { - /* In trees whose height is > 1 our algorithm - tends to underestimate: multiply the estimate - by 2: */ - - n_rows = n_rows * 2; - } - - /* Do not estimate the number of rows in the range - to over 1 / 2 of the estimated rows in the whole - table */ - - if (n_rows > index->table->stat_n_rows / 2) { - n_rows = index->table->stat_n_rows / 2; - - /* If there are just 0 or 1 rows in the table, - then we estimate all rows are in the range */ - - if (n_rows == 0) { - n_rows = index->table->stat_n_rows; - } - } - - return(n_rows); - } - - if (!diverged && slot1->nth_rec != slot2->nth_rec) { - - diverged = TRUE; - - if (slot1->nth_rec < slot2->nth_rec) { - n_rows = slot2->nth_rec - slot1->nth_rec; - - if (n_rows > 1) { - diverged_lot = TRUE; - divergence_level = i; - } - } else { - /* Maybe the tree has changed between - searches */ - - return(10); - } - - } else if (diverged && !diverged_lot) { - - if (slot1->nth_rec < slot1->n_recs - || slot2->nth_rec > 1) { - - diverged_lot = TRUE; - divergence_level = i; - - n_rows = 0; - - if (slot1->nth_rec < slot1->n_recs) { - n_rows += slot1->n_recs - - slot1->nth_rec; - } - - if (slot2->nth_rec > 1) { - n_rows += slot2->nth_rec - 1; - } - } - } else if (diverged_lot) { - - n_rows = (n_rows * (slot1->n_recs + slot2->n_recs)) - / 2; - } - } -} - -/*********************************************************************** -Estimates the number of different key values in a given index, for -each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ - -void -btr_estimate_number_of_different_key_vals( -/*======================================*/ - dict_index_t* index) /* in: index */ -{ - btr_cur_t cursor; - page_t* page; - rec_t* rec; - ulint n_cols; - ulint matched_fields; - ulint matched_bytes; - ib_longlong* n_diff; - ulint not_empty_flag = 0; - ulint total_external_size = 0; - ulint i; - ulint j; - ulint add_on; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_rec_[REC_OFFS_NORMAL_SIZE]; - ulint offsets_next_rec_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets_rec = offsets_rec_; - ulint* offsets_next_rec= offsets_next_rec_; - *offsets_rec_ = (sizeof offsets_rec_) / sizeof *offsets_rec_; - *offsets_next_rec_ - = (sizeof offsets_next_rec_) / sizeof *offsets_next_rec_; - - n_cols = dict_index_get_n_unique(index); - - n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); - - memset(n_diff, 0, (n_cols + 1) * sizeof(ib_longlong)); - - /* We sample some pages in the index to get an estimate */ - - for (i = 0; i < BTR_KEY_VAL_ESTIMATE_N_PAGES; i++) { - rec_t* supremum; - mtr_start(&mtr); - - btr_cur_open_at_rnd_pos(index, BTR_SEARCH_LEAF, &cursor, &mtr); - - /* Count the number of different key values for each prefix of - the key on this index page. If the prefix does not determine - the index record uniquely in te B-tree, then we subtract one - because otherwise our algorithm would give a wrong estimate - for an index where there is just one key value. */ - - page = btr_cur_get_page(&cursor); - - supremum = page_get_supremum_rec(page); - rec = page_rec_get_next(page_get_infimum_rec(page)); - - if (rec != supremum) { - not_empty_flag = 1; - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - } - - while (rec != supremum) { - rec_t* next_rec = page_rec_get_next(rec); - if (next_rec == supremum) { - break; - } - - matched_fields = 0; - matched_bytes = 0; - offsets_next_rec = rec_get_offsets(next_rec, index, - offsets_next_rec, - n_cols, &heap); - - cmp_rec_rec_with_match(rec, next_rec, - offsets_rec, offsets_next_rec, - index, &matched_fields, - &matched_bytes); - - for (j = matched_fields + 1; j <= n_cols; j++) { - /* We add one if this index record has - a different prefix from the previous */ - - n_diff[j]++; - } - - total_external_size - += btr_rec_get_externally_stored_len( - rec, offsets_rec); - - rec = next_rec; - /* Initialize offsets_rec for the next round - and assign the old offsets_rec buffer to - offsets_next_rec. */ - { - ulint* offsets_tmp = offsets_rec; - offsets_rec = offsets_next_rec; - offsets_next_rec = offsets_tmp; - } - } - - - if (n_cols == dict_index_get_n_unique_in_tree(index)) { - - /* If there is more than one leaf page in the tree, - we add one because we know that the first record - on the page certainly had a different prefix than the - last record on the previous index page in the - alphabetical order. Before this fix, if there was - just one big record on each clustered index page, the - algorithm grossly underestimated the number of rows - in the table. */ - - if (btr_page_get_prev(page, &mtr) != FIL_NULL - || btr_page_get_next(page, &mtr) != FIL_NULL) { - - n_diff[n_cols]++; - } - } - - offsets_rec = rec_get_offsets(rec, index, offsets_rec, - ULINT_UNDEFINED, &heap); - total_external_size += btr_rec_get_externally_stored_len( - rec, offsets_rec); - mtr_commit(&mtr); - } - - /* If we saw k borders between different key values on - BTR_KEY_VAL_ESTIMATE_N_PAGES leaf pages, we can estimate how many - there will be in index->stat_n_leaf_pages */ - - /* We must take into account that our sample actually represents - also the pages used for external storage of fields (those pages are - included in index->stat_n_leaf_pages) */ - - for (j = 0; j <= n_cols; j++) { - index->stat_n_diff_key_vals[j] - = ((n_diff[j] - * (ib_longlong)index->stat_n_leaf_pages - + BTR_KEY_VAL_ESTIMATE_N_PAGES - 1 - + total_external_size - + not_empty_flag) - / (BTR_KEY_VAL_ESTIMATE_N_PAGES - + total_external_size)); - - /* If the tree is small, smaller than - 10 * BTR_KEY_VAL_ESTIMATE_N_PAGES + total_external_size, then - the above estimate is ok. For bigger trees it is common that we - do not see any borders between key values in the few pages - we pick. But still there may be BTR_KEY_VAL_ESTIMATE_N_PAGES - different key values, or even more. Let us try to approximate - that: */ - - add_on = index->stat_n_leaf_pages - / (10 * (BTR_KEY_VAL_ESTIMATE_N_PAGES - + total_external_size)); - - if (add_on > BTR_KEY_VAL_ESTIMATE_N_PAGES) { - add_on = BTR_KEY_VAL_ESTIMATE_N_PAGES; - } - - index->stat_n_diff_key_vals[j] += add_on; - } - - mem_free(n_diff); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ - -/*************************************************************** -Gets the externally stored size of a record, in units of a database page. */ -static -ulint -btr_rec_get_externally_stored_len( -/*==============================*/ - /* out: externally stored part, - in units of a database page */ - rec_t* rec, /* in: record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint n_fields; - byte* data; - ulint local_len; - ulint extern_len; - ulint total_extern_len = 0; - ulint i; - - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - n_fields = rec_offs_n_fields(offsets); - - for (i = 0; i < n_fields; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - data = rec_get_nth_field(rec, offsets, i, &local_len); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - extern_len = mach_read_from_4(data + local_len - + BTR_EXTERN_LEN + 4); - - total_extern_len += ut_calc_align(extern_len, - UNIV_PAGE_SIZE); - } - } - - return(total_extern_len / UNIV_PAGE_SIZE); -} - -/*********************************************************************** -Sets the ownership bit of an externally stored field in a record. */ -static -void -btr_cur_set_ownership_of_extern_field( -/*==================================*/ - rec_t* rec, /* in: clustered index record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint i, /* in: field number */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ -{ - byte* data; - ulint local_len; - ulint byte_val; - - data = rec_get_nth_field(rec, offsets, i, &local_len); - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - byte_val = mach_read_from_1(data + local_len + BTR_EXTERN_LEN); - - if (val) { - byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); - } else { - byte_val = byte_val | BTR_EXTERN_OWNER_FLAG; - } - - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, byte_val, - MLOG_1BYTE, mtr); -} - -/*********************************************************************** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the -index tree. In purge only the owner of externally stored field is allowed -to free the field. */ - -void -btr_cur_mark_extern_inherited_fields( -/*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update, /* in: update vector */ - mtr_t* mtr) /* in: mtr */ -{ - ibool is_updated; - ulint n; - ulint j; - ulint i; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - /* Check it is not in updated fields */ - is_updated = FALSE; - - if (update) { - for (j = 0; j < upd_get_n_fields(update); - j++) { - if (upd_get_nth_field(update, j) - ->field_no == i) { - is_updated = TRUE; - } - } - } - - if (!is_updated) { - btr_cur_set_ownership_of_extern_field( - rec, offsets, i, FALSE, mtr); - } - } - } -} - -/*********************************************************************** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ - -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /* in: updated entry to be inserted to - clustered index */ - ulint* ext_vec, /* in: array of extern fields in the - original record */ - ulint n_ext_vec, /* in: number of elements in ext_vec */ - upd_t* update) /* in: update vector */ -{ - dfield_t* dfield; - ulint byte_val; - byte* data; - ulint len; - ibool is_updated; - ulint j; - ulint i; - - if (ext_vec == NULL) { - - return; - } - - for (i = 0; i < n_ext_vec; i++) { - - /* Check ext_vec[i] is in updated fields */ - is_updated = FALSE; - - for (j = 0; j < upd_get_n_fields(update); j++) { - if (upd_get_nth_field(update, j)->field_no - == ext_vec[i]) { - is_updated = TRUE; - } - } - - if (!is_updated) { - dfield = dtuple_get_nth_field(entry, ext_vec[i]); - - data = (byte*) dfield_get_data(dfield); - len = dfield_get_len(dfield); - - len -= BTR_EXTERN_FIELD_REF_SIZE; - - byte_val = mach_read_from_1(data + len - + BTR_EXTERN_LEN); - - byte_val = byte_val | BTR_EXTERN_INHERITED_FLAG; - - mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); - } - } -} - -/*********************************************************************** -Marks all extern fields in a record as owned by the record. This function -should be called if the delete mark of a record is removed: a not delete -marked record always owns all its extern fields. */ -static -void -btr_cur_unmark_extern_fields( -/*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr, /* in: mtr */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint n; - ulint i; - - ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - btr_cur_set_ownership_of_extern_field(rec, offsets, i, - TRUE, mtr); - } - } -} - -/*********************************************************************** -Marks all extern fields in a dtuple as owned by the record. */ - -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry, /* in: clustered index entry */ - ulint* ext_vec, /* in: array of numbers of fields - which have been stored externally */ - ulint n_ext_vec) /* in: number of elements in ext_vec */ -{ - dfield_t* dfield; - ulint byte_val; - byte* data; - ulint len; - ulint i; - - for (i = 0; i < n_ext_vec; i++) { - dfield = dtuple_get_nth_field(entry, ext_vec[i]); - - data = (byte*) dfield_get_data(dfield); - len = dfield_get_len(dfield); - - len -= BTR_EXTERN_FIELD_REF_SIZE; - - byte_val = mach_read_from_1(data + len + BTR_EXTERN_LEN); - - byte_val = byte_val & (~BTR_EXTERN_OWNER_FLAG); - - mach_write_to_1(data + len + BTR_EXTERN_LEN, byte_val); - } -} - -/*********************************************************************** -Stores the positions of the fields marked as extern storage in the update -vector, and also those fields who are marked as extern storage in rec -and not mentioned in updated fields. We use this function to remember -which fields we must mark as extern storage in a record inserted for an -update. */ - -ulint -btr_push_update_extern_fields( -/*==========================*/ - /* out: number of values stored in ext_vect */ - ulint* ext_vect,/* in: array of ulints, must be preallocated - to have space for all fields in rec */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update) /* in: update vector or NULL */ -{ - ulint n_pushed = 0; - ibool is_updated; - ulint n; - ulint j; - ulint i; - - if (update) { - n = upd_get_n_fields(update); - - for (i = 0; i < n; i++) { - - if (upd_get_nth_field(update, i)->extern_storage) { - - ext_vect[n_pushed] = upd_get_nth_field( - update, i)->field_no; - - n_pushed++; - } - } - } - - n = rec_offs_n_fields(offsets); - - for (i = 0; i < n; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - /* Check it is not in updated fields */ - is_updated = FALSE; - - if (update) { - for (j = 0; j < upd_get_n_fields(update); - j++) { - if (upd_get_nth_field(update, j) - ->field_no == i) { - is_updated = TRUE; - } - } - } - - if (!is_updated) { - ext_vect[n_pushed] = i; - n_pushed++; - } - } - } - - return(n_pushed); -} - -/*********************************************************************** -Returns the length of a BLOB part stored on the header page. */ -static -ulint -btr_blob_get_part_len( -/*==================*/ - /* out: part length */ - byte* blob_header) /* in: blob header */ -{ - return(mach_read_from_4(blob_header + BTR_BLOB_HDR_PART_LEN)); -} - -/*********************************************************************** -Returns the page number where the next BLOB part is stored. */ -static -ulint -btr_blob_get_next_page_no( -/*======================*/ - /* out: page number or FIL_NULL if - no more pages */ - byte* blob_header) /* in: blob header */ -{ - return(mach_read_from_4(blob_header + BTR_BLOB_HDR_NEXT_PAGE_NO)); -} - -/*********************************************************************** -Stores the fields in big_rec_vec to the tablespace and puts pointers to -them in rec. The fields are stored on pages allocated from leaf node -file segment of the index tree. */ - -ulint -btr_store_big_rec_extern_fields( -/*============================*/ - /* out: DB_SUCCESS or error */ - dict_index_t* index, /* in: index of rec; the index tree - MUST be X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets, /* in: rec_get_offsets(rec, index); - the "external storage" flags in offsets - will not correspond to rec when - this function returns */ - big_rec_t* big_rec_vec, /* in: vector containing fields - to be stored externally */ - mtr_t* local_mtr __attribute__((unused))) /* in: mtr - containing the latch to rec and to the - tree */ -{ - byte* data; - ulint local_len; - ulint extern_len; - ulint store_len; - ulint page_no; - page_t* page; - ulint space_id; - page_t* prev_page; - page_t* rec_page; - ulint prev_page_no; - ulint hint_page_no; - ulint i; - mtr_t mtr; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec), - MTR_MEMO_PAGE_X_FIX)); - ut_a(index->type & DICT_CLUSTERED); - - space_id = buf_frame_get_space_id(rec); - - /* We have to create a file segment to the tablespace - for each field and put the pointer to the field in rec */ - - for (i = 0; i < big_rec_vec->n_fields; i++) { - - data = rec_get_nth_field(rec, offsets, - big_rec_vec->fields[i].field_no, - &local_len); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - extern_len = big_rec_vec->fields[i].len; - - ut_a(extern_len > 0); - - prev_page_no = FIL_NULL; - - while (extern_len > 0) { - mtr_start(&mtr); - - if (prev_page_no == FIL_NULL) { - hint_page_no = buf_frame_get_page_no(rec) + 1; - } else { - hint_page_no = prev_page_no + 1; - } - - page = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, &mtr); - if (page == NULL) { - - mtr_commit(&mtr); - - return(DB_OUT_OF_FILE_SPACE); - } - - mlog_write_ulint(page + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_BLOB, - MLOG_2BYTES, &mtr); - - page_no = buf_frame_get_page_no(page); - - if (prev_page_no != FIL_NULL) { - prev_page = buf_page_get(space_id, - prev_page_no, - RW_X_LATCH, &mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(prev_page, - SYNC_EXTERN_STORAGE); -#endif /* UNIV_SYNC_DEBUG */ - - mlog_write_ulint(prev_page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO, - page_no, MLOG_4BYTES, &mtr); - } - - if (extern_len > (UNIV_PAGE_SIZE - FIL_PAGE_DATA - - BTR_BLOB_HDR_SIZE - - FIL_PAGE_DATA_END)) { - store_len = UNIV_PAGE_SIZE - FIL_PAGE_DATA - - BTR_BLOB_HDR_SIZE - - FIL_PAGE_DATA_END; - } else { - store_len = extern_len; - } - - mlog_write_string(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_SIZE, - big_rec_vec->fields[i].data - + big_rec_vec->fields[i].len - - extern_len, - store_len, &mtr); - mlog_write_ulint(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_PART_LEN, - store_len, MLOG_4BYTES, &mtr); - mlog_write_ulint(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO, - FIL_NULL, MLOG_4BYTES, &mtr); - - extern_len -= store_len; - - rec_page = buf_page_get(space_id, - buf_frame_get_page_no(data), - RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, &mtr); - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4, - big_rec_vec->fields[i].len - - extern_len, - MLOG_4BYTES, &mtr); - - if (prev_page_no == FIL_NULL) { - mlog_write_ulint(data + local_len - + BTR_EXTERN_SPACE_ID, - space_id, - MLOG_4BYTES, &mtr); - - mlog_write_ulint(data + local_len - + BTR_EXTERN_PAGE_NO, - page_no, - MLOG_4BYTES, &mtr); - - mlog_write_ulint(data + local_len - + BTR_EXTERN_OFFSET, - FIL_PAGE_DATA, - MLOG_4BYTES, &mtr); - - /* Set the bit denoting that this field - in rec is stored externally */ - - rec_set_nth_field_extern_bit( - rec, index, - big_rec_vec->fields[i].field_no, - TRUE, &mtr); - } - - prev_page_no = page_no; - - mtr_commit(&mtr); - } - } - - return(DB_SUCCESS); -} - -/*********************************************************************** -Frees the space in an externally stored field to the file space -management if the field in data is owned the externally stored field, -in a rollback we may have the additional condition that the field must -not be inherited. */ - -void -btr_free_externally_stored_field( -/*=============================*/ - dict_index_t* index, /* in: index of the data, the index - tree MUST be X-latched; if the tree - height is 1, then also the root page - must be X-latched! (this is relevant - in the case this function is called - from purge where 'data' is located on - an undo log page, not an index - page) */ - byte* data, /* in: internally stored data - + reference to the externally - stored part */ - ulint local_len, /* in: length of data */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* local_mtr __attribute__((unused))) /* in: mtr - containing the latch to data an an - X-latch to the index tree */ -{ - page_t* page; - page_t* rec_page; - ulint space_id; - ulint page_no; - ulint offset; - ulint extern_len; - ulint next_page_no; - ulint part_len; - mtr_t mtr; - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(local_mtr, buf_block_align(data), - MTR_MEMO_PAGE_X_FIX)); - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - for (;;) { - mtr_start(&mtr); - - rec_page = buf_page_get(buf_frame_get_space_id(data), - buf_frame_get_page_no(data), - RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - space_id = mach_read_from_4(data + local_len - + BTR_EXTERN_SPACE_ID); - - page_no = mach_read_from_4(data + local_len - + BTR_EXTERN_PAGE_NO); - - offset = mach_read_from_4(data + local_len - + BTR_EXTERN_OFFSET); - extern_len = mach_read_from_4(data + local_len - + BTR_EXTERN_LEN + 4); - - /* If extern len is 0, then there is no external storage data - at all */ - - if (extern_len == 0) { - - mtr_commit(&mtr); - - return; - } - - if (mach_read_from_1(data + local_len + BTR_EXTERN_LEN) - & BTR_EXTERN_OWNER_FLAG) { - /* This field does not own the externally - stored field: do not free! */ - - mtr_commit(&mtr); - - return; - } - - if (do_not_free_inherited - && mach_read_from_1(data + local_len + BTR_EXTERN_LEN) - & BTR_EXTERN_INHERITED_FLAG) { - /* Rollback and inherited field: do not free! */ - - mtr_commit(&mtr); - - return; - } - - page = buf_page_get(space_id, page_no, RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE); -#endif /* UNIV_SYNC_DEBUG */ - next_page_no = mach_read_from_4(page + FIL_PAGE_DATA - + BTR_BLOB_HDR_NEXT_PAGE_NO); - - part_len = btr_blob_get_part_len(page + FIL_PAGE_DATA); - - ut_a(extern_len >= part_len); - - /* We must supply the page level (= 0) as an argument - because we did not store it on the page (we save the space - overhead from an index page header. */ - - btr_page_free_low(index, page, 0, &mtr); - - mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO, - next_page_no, - MLOG_4BYTES, &mtr); - mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4, - extern_len - part_len, - MLOG_4BYTES, &mtr); - if (next_page_no == FIL_NULL) { - ut_a(extern_len - part_len == 0); - } - - if (extern_len - part_len == 0) { - ut_a(next_page_no == FIL_NULL); - } - - mtr_commit(&mtr); - } -} - -/*************************************************************** -Frees the externally stored fields for a record. */ - -void -btr_rec_free_externally_stored_fields( -/*==================================*/ - dict_index_t* index, /* in: index of the data, the index - tree MUST be X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* mtr) /* in: mini-transaction handle which contains - an X-latch to record page and to the index - tree */ -{ - ulint n_fields; - byte* data; - ulint len; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), - MTR_MEMO_PAGE_X_FIX)); - /* Free possible externally stored fields in the record */ - - ut_ad(dict_table_is_comp(index->table) == !!rec_offs_comp(offsets)); - n_fields = rec_offs_n_fields(offsets); - - for (i = 0; i < n_fields; i++) { - if (rec_offs_nth_extern(offsets, i)) { - - data = rec_get_nth_field(rec, offsets, i, &len); - btr_free_externally_stored_field(index, data, len, - do_not_free_inherited, - mtr); - } - } -} - -/*************************************************************** -Frees the externally stored fields for a record, if the field is mentioned -in the update vector. */ -static -void -btr_rec_free_updated_extern_fields( -/*===============================*/ - dict_index_t* index, /* in: index of rec; the index tree MUST be - X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - upd_t* update, /* in: update vector */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* mtr) /* in: mini-transaction handle which contains - an X-latch to record page and to the tree */ -{ - upd_field_t* ufield; - ulint n_fields; - byte* data; - ulint len; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), - MTR_MEMO_PAGE_X_FIX)); - - /* Free possible externally stored fields in the record */ - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - ufield = upd_get_nth_field(update, i); - - if (rec_offs_nth_extern(offsets, ufield->field_no)) { - - data = rec_get_nth_field(rec, offsets, - ufield->field_no, &len); - btr_free_externally_stored_field(index, data, len, - do_not_free_inherited, - mtr); - } - } -} - -/*********************************************************************** -Copies an externally stored field of a record to mem heap. Parameter -data contains a pointer to 'internally' stored part of the field: -possibly some data, and the reference to the externally stored part in -the last 20 bytes of data. */ - -byte* -btr_copy_externally_stored_field( -/*=============================*/ - /* out: the whole field copied to heap */ - ulint* len, /* out: length of the whole field */ - byte* data, /* in: 'internally' stored part of the - field containing also the reference to - the external part */ - ulint local_len,/* in: length of data */ - mem_heap_t* heap) /* in: mem heap */ -{ - page_t* page; - ulint space_id; - ulint page_no; - ulint offset; - ulint extern_len; - byte* blob_header; - ulint part_len; - byte* buf; - ulint copied_len; - mtr_t mtr; - - ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); - - local_len -= BTR_EXTERN_FIELD_REF_SIZE; - - space_id = mach_read_from_4(data + local_len + BTR_EXTERN_SPACE_ID); - - page_no = mach_read_from_4(data + local_len + BTR_EXTERN_PAGE_NO); - - offset = mach_read_from_4(data + local_len + BTR_EXTERN_OFFSET); - - /* Currently a BLOB cannot be bigger that 4 GB; we - leave the 4 upper bytes in the length field unused */ - - extern_len = mach_read_from_4(data + local_len + BTR_EXTERN_LEN + 4); - - buf = mem_heap_alloc(heap, local_len + extern_len); - - ut_memcpy(buf, data, local_len); - copied_len = local_len; - - if (extern_len == 0) { - *len = copied_len; - - return(buf); - } - - for (;;) { - mtr_start(&mtr); - - page = buf_page_get(space_id, page_no, RW_S_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_EXTERN_STORAGE); -#endif /* UNIV_SYNC_DEBUG */ - blob_header = page + offset; - - part_len = btr_blob_get_part_len(blob_header); - - ut_memcpy(buf + copied_len, blob_header + BTR_BLOB_HDR_SIZE, - part_len); - copied_len += part_len; - - page_no = btr_blob_get_next_page_no(blob_header); - - mtr_commit(&mtr); - - if (page_no == FIL_NULL) { - ut_a(copied_len == local_len + extern_len); - - *len = copied_len; - - return(buf); - } - - /* On other BLOB pages except the first the BLOB header - always is at the page data start: */ - - offset = FIL_PAGE_DATA; - - ut_a(copied_len < local_len + extern_len); - } -} - -/*********************************************************************** -Copies an externally stored field of a record to mem heap. */ - -byte* -btr_rec_copy_externally_stored_field( -/*=================================*/ - /* out: the field copied to heap */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint no, /* in: field number */ - ulint* len, /* out: length of the field */ - mem_heap_t* heap) /* in: mem heap */ -{ - ulint local_len; - byte* data; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_a(rec_offs_nth_extern(offsets, no)); - - /* An externally stored field can contain some initial - data from the field, and in the last 20 bytes it has the - space id, page number, and offset where the rest of the - field data is stored, and the data length in addition to - the data stored locally. We may need to store some data - locally to get the local record length above the 128 byte - limit so that field offsets are stored in two bytes, and - the extern bit is available in those two bytes. */ - - data = rec_get_nth_field(rec, offsets, no, &local_len); - - return(btr_copy_externally_stored_field(len, data, local_len, heap)); -} diff --git a/storage/innobase/btr/btr0pcur.c b/storage/innobase/btr/btr0pcur.c deleted file mode 100644 index 65b3c90c809..00000000000 --- a/storage/innobase/btr/btr0pcur.c +++ /dev/null @@ -1,565 +0,0 @@ -/****************************************************** -The index tree persistent cursor - -(c) 1996 Innobase Oy - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - -#include "btr0pcur.h" - -#ifdef UNIV_NONINL -#include "btr0pcur.ic" -#endif - -#include "ut0byte.h" -#include "rem0cmp.h" -#include "trx0trx.h" - -/****************************************************************** -Allocates memory for a persistent cursor object and initializes the cursor. */ - -btr_pcur_t* -btr_pcur_create_for_mysql(void) -/*============================*/ - /* out, own: persistent cursor */ -{ - btr_pcur_t* pcur; - - pcur = mem_alloc(sizeof(btr_pcur_t)); - - pcur->btr_cur.index = NULL; - btr_pcur_init(pcur); - - return(pcur); -} - -/****************************************************************** -Frees the memory for a persistent cursor object. */ - -void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor) /* in, own: persistent cursor */ -{ - if (cursor->old_rec_buf != NULL) { - - mem_free(cursor->old_rec_buf); - - cursor->old_rec_buf = NULL; - } - - cursor->btr_cur.page_cur.rec = NULL; - cursor->old_rec = NULL; - cursor->old_n_fields = 0; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->latch_mode = BTR_NO_LATCHES; - cursor->pos_state = BTR_PCUR_NOT_POSITIONED; - - mem_free(cursor); -} - -/****************************************************************** -The position of the cursor is stored by taking an initial segment of the -record the cursor is positioned on, before, or after, and copying it to the -cursor data structure, or just setting a flag if the cursor id before the -first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the -page where the cursor is positioned must not be empty if the index tree is -not totally empty! */ - -void -btr_pcur_store_position( -/*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t* page_cursor; - rec_t* rec; - dict_index_t* index; - page_t* page; - ulint offs; - - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); - - page_cursor = btr_pcur_get_page_cur(cursor); - - rec = page_cur_get_rec(page_cursor); - page = page_align(rec); - offs = page_offset(rec); - - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - ut_a(cursor->latch_mode != BTR_NO_LATCHES); - - if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) { - /* It must be an empty index tree; NOTE that in this case - we do not store the modify_clock, but always do a search - if we restore the cursor position */ - - ut_a(btr_page_get_next(page, mtr) == FIL_NULL); - ut_a(btr_page_get_prev(page, mtr) == FIL_NULL); - - cursor->old_stored = BTR_PCUR_OLD_STORED; - - if (page_rec_is_supremum_low(offs)) { - - cursor->rel_pos = BTR_PCUR_AFTER_LAST_IN_TREE; - } else { - cursor->rel_pos = BTR_PCUR_BEFORE_FIRST_IN_TREE; - } - - return; - } - - if (page_rec_is_supremum_low(offs)) { - - rec = page_rec_get_prev(rec); - - cursor->rel_pos = BTR_PCUR_AFTER; - - } else if (page_rec_is_infimum_low(offs)) { - - rec = page_rec_get_next(rec); - - cursor->rel_pos = BTR_PCUR_BEFORE; - } else { - cursor->rel_pos = BTR_PCUR_ON; - } - - cursor->old_stored = BTR_PCUR_OLD_STORED; - cursor->old_rec = dict_index_copy_rec_order_prefix( - index, rec, &cursor->old_n_fields, - &cursor->old_rec_buf, &cursor->buf_size); - - cursor->block_when_stored = buf_block_align(page); - cursor->modify_clock = buf_block_get_modify_clock( - cursor->block_when_stored); -} - -/****************************************************************** -Copies the stored position of a pcur to another pcur. */ - -void -btr_pcur_copy_stored_position( -/*==========================*/ - btr_pcur_t* pcur_receive, /* in: pcur which will receive the - position info */ - btr_pcur_t* pcur_donate) /* in: pcur from which the info is - copied */ -{ - if (pcur_receive->old_rec_buf) { - mem_free(pcur_receive->old_rec_buf); - } - - ut_memcpy(pcur_receive, pcur_donate, sizeof(btr_pcur_t)); - - if (pcur_donate->old_rec_buf) { - - pcur_receive->old_rec_buf = mem_alloc(pcur_donate->buf_size); - - ut_memcpy(pcur_receive->old_rec_buf, pcur_donate->old_rec_buf, - pcur_donate->buf_size); - pcur_receive->old_rec = pcur_receive->old_rec_buf - + (pcur_donate->old_rec - pcur_donate->old_rec_buf); - } - - pcur_receive->old_n_fields = pcur_donate->old_n_fields; -} - -/****************************************************************** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. */ - -ibool -btr_pcur_restore_position( -/*======================*/ - /* out: TRUE if the cursor position - was stored when it was on a user record - and it can be restored on a user record - whose ordering fields are identical to - the ones of the original user record */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in: detached persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - page_t* page; - dtuple_t* tuple; - ulint mode; - ulint old_mode; - mem_heap_t* heap; - - index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); - - if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED) - || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED - && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) { - ut_print_buf(stderr, cursor, sizeof(btr_pcur_t)); - if (cursor->trx_if_known) { - trx_print(stderr, cursor->trx_if_known, 0); - } - - ut_error; - } - - if (UNIV_UNLIKELY( - cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE - || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { - - /* In these cases we do not try an optimistic restoration, - but always do a search */ - - btr_cur_open_at_index_side( - cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE, - index, latch_mode, btr_pcur_get_btr_cur(cursor), mtr); - - cursor->block_when_stored - = buf_block_align(btr_pcur_get_page(cursor)); - - return(FALSE); - } - - ut_a(cursor->old_rec); - ut_a(cursor->old_n_fields); - - page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); - - if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF) - || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) { - /* Try optimistic restoration */ - - if (UNIV_LIKELY(buf_page_optimistic_get( - latch_mode, - cursor->block_when_stored, page, - cursor->modify_clock, mtr))) { - cursor->pos_state = BTR_PCUR_IS_POSITIONED; -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - if (cursor->rel_pos == BTR_PCUR_ON) { -#ifdef UNIV_DEBUG - rec_t* rec; - ulint* offsets1; - ulint* offsets2; -#endif /* UNIV_DEBUG */ - cursor->latch_mode = latch_mode; -#ifdef UNIV_DEBUG - rec = btr_pcur_get_rec(cursor); - - heap = mem_heap_create(256); - offsets1 = rec_get_offsets( - cursor->old_rec, index, NULL, - cursor->old_n_fields, &heap); - offsets2 = rec_get_offsets( - rec, index, NULL, - cursor->old_n_fields, &heap); - - ut_ad(!cmp_rec_rec(cursor->old_rec, - rec, offsets1, offsets2, - index)); - mem_heap_free(heap); -#endif /* UNIV_DEBUG */ - return(TRUE); - } - - return(FALSE); - } - } - - /* If optimistic restoration did not succeed, open the cursor anew */ - - heap = mem_heap_create(256); - - tuple = dict_index_build_data_tuple(index, cursor->old_rec, - cursor->old_n_fields, heap); - - /* Save the old search mode of the cursor */ - old_mode = cursor->search_mode; - - if (UNIV_LIKELY(cursor->rel_pos == BTR_PCUR_ON)) { - mode = PAGE_CUR_LE; - } else if (cursor->rel_pos == BTR_PCUR_AFTER) { - mode = PAGE_CUR_G; - } else { - ut_ad(cursor->rel_pos == BTR_PCUR_BEFORE); - mode = PAGE_CUR_L; - } - - btr_pcur_open_with_no_init(index, tuple, mode, latch_mode, - cursor, 0, mtr); - - /* Restore the old search mode */ - cursor->search_mode = old_mode; - - if (cursor->rel_pos == BTR_PCUR_ON - && btr_pcur_is_on_user_rec(cursor, mtr) - && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), - rec_get_offsets( - btr_pcur_get_rec(cursor), index, - NULL, ULINT_UNDEFINED, &heap))) { - - /* We have to store the NEW value for the modify clock, since - the cursor can now be on a different page! But we can retain - the value of old_rec */ - - cursor->block_when_stored = buf_block_align( - btr_pcur_get_page(cursor)); - cursor->modify_clock = buf_block_get_modify_clock( - cursor->block_when_stored); - cursor->old_stored = BTR_PCUR_OLD_STORED; - - mem_heap_free(heap); - - return(TRUE); - } - - mem_heap_free(heap); - - /* We have to store new position information, modify_clock etc., - to the cursor because it can now be on a different page, the record - under it may have been removed, etc. */ - - btr_pcur_store_position(cursor, mtr); - - return(FALSE); -} - -/****************************************************************** -If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, -releases the page latch and bufferfix reserved by the cursor. -NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes -made by the current mini-transaction to the data protected by the -cursor latch, as then the latch must not be released until mtr_commit. */ - -void -btr_pcur_release_leaf( -/*==================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); - - btr_leaf_page_release(page, cursor->latch_mode, mtr); - - cursor->latch_mode = BTR_NO_LATCHES; - - cursor->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/************************************************************* -Moves the persistent cursor to the first record on the next page. Releases the -latch on the current page, and bufferunfixes it. Note that there must not be -modifications on the current page, as then the x-latch can be released only in -mtr_commit. */ - -void -btr_pcur_move_to_next_page( -/*=======================*/ - btr_pcur_t* cursor, /* in: persistent cursor; must be on the - last record of the current page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint next_page_no; - ulint space; - page_t* page; - page_t* next_page; - - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - ut_ad(btr_pcur_is_after_last_on_page(cursor, mtr)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - page = btr_pcur_get_page(cursor); - - next_page_no = btr_page_get_next(page, mtr); - space = buf_frame_get_space_id(page); - - ut_ad(next_page_no != FIL_NULL); - - next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - ut_a(page_is_comp(next_page) == page_is_comp(page)); - buf_block_align(next_page)->check_index_page_at_flush = TRUE; - - btr_leaf_page_release(page, cursor->latch_mode, mtr); - - page_cur_set_before_first(next_page, btr_pcur_get_page_cur(cursor)); - - page_check_dir(next_page); -} - -/************************************************************* -Moves the persistent cursor backward if it is on the first record of the page. -Commits mtr. Note that to prevent a possible deadlock, the operation -first stores the position of the cursor, commits mtr, acquires the necessary -latches and restores the cursor position again before returning. The -alphabetical position of the cursor is guaranteed to be sensible on -return, but it may happen that the cursor is not positioned on the last -record of any page, because the structure of the tree may have changed -during the time when the cursor had no latches. */ - -void -btr_pcur_move_backward_from_page( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor, must be on the first - record of the current page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint prev_page_no; - ulint space; - page_t* page; - page_t* prev_page; - ulint latch_mode; - ulint latch_mode2; - - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - ut_ad(btr_pcur_is_before_first_on_page(cursor, mtr)); - ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr)); - - latch_mode = cursor->latch_mode; - - if (latch_mode == BTR_SEARCH_LEAF) { - - latch_mode2 = BTR_SEARCH_PREV; - - } else if (latch_mode == BTR_MODIFY_LEAF) { - - latch_mode2 = BTR_MODIFY_PREV; - } else { - latch_mode2 = 0; /* To eliminate compiler warning */ - ut_error; - } - - btr_pcur_store_position(cursor, mtr); - - mtr_commit(mtr); - - mtr_start(mtr); - - btr_pcur_restore_position(latch_mode2, cursor, mtr); - - page = btr_pcur_get_page(cursor); - - prev_page_no = btr_page_get_prev(page, mtr); - space = buf_frame_get_space_id(page); - - if (btr_pcur_is_before_first_on_page(cursor, mtr) - && (prev_page_no != FIL_NULL)) { - - prev_page = btr_pcur_get_btr_cur(cursor)->left_page; - - btr_leaf_page_release(page, latch_mode, mtr); - - page_cur_set_after_last(prev_page, - btr_pcur_get_page_cur(cursor)); - } else if (prev_page_no != FIL_NULL) { - - /* The repositioned cursor did not end on an infimum record on - a page. Cursor repositioning acquired a latch also on the - previous page, but we do not need the latch: release it. */ - - prev_page = btr_pcur_get_btr_cur(cursor)->left_page; - - btr_leaf_page_release(prev_page, latch_mode, mtr); - } - - cursor->latch_mode = latch_mode; - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/************************************************************* -Moves the persistent cursor to the previous record in the tree. If no records -are left, the cursor stays 'before first in tree'. */ - -ibool -btr_pcur_move_to_prev( -/*==================*/ - /* out: TRUE if the cursor was not before first - in tree */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - if (btr_pcur_is_before_first_on_page(cursor, mtr)) { - - if (btr_pcur_is_before_first_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_backward_from_page(cursor, mtr); - - return(TRUE); - } - - btr_pcur_move_to_prev_on_page(cursor, mtr); - - return(TRUE); -} - -/****************************************************************** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ - -void -btr_pcur_open_on_user_rec( -/*======================*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ... */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /* in: memory buffer for persistent - cursor */ - mtr_t* mtr) /* in: mtr */ -{ - btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr); - - if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) { - - if (btr_pcur_is_after_last_on_page(cursor, mtr)) { - - btr_pcur_move_to_next_user_rec(cursor, mtr); - } - } else { - ut_ad((mode == PAGE_CUR_LE) || (mode == PAGE_CUR_L)); - - /* Not implemented yet */ - - ut_error; - } -} diff --git a/storage/innobase/btr/btr0sea.c b/storage/innobase/btr/btr0sea.c deleted file mode 100644 index 8d296fdd061..00000000000 --- a/storage/innobase/btr/btr0sea.c +++ /dev/null @@ -1,1762 +0,0 @@ -/************************************************************************ -The index tree adaptive search - -(c) 1996 Innobase Oy - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#include "btr0sea.h" -#ifdef UNIV_NONINL -#include "btr0sea.ic" -#endif - -#include "buf0buf.h" -#include "page0page.h" -#include "page0cur.h" -#include "btr0cur.h" -#include "btr0pcur.h" -#include "btr0btr.h" -#include "ha0ha.h" - -ulint btr_search_this_is_zero = 0; /* A dummy variable to fool the - compiler */ - -#ifdef UNIV_SEARCH_PERF_STAT -ulint btr_search_n_succ = 0; -ulint btr_search_n_hash_fail = 0; -#endif /* UNIV_SEARCH_PERF_STAT */ - -byte btr_sea_pad1[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line as btr_search_latch */ - -/* The latch protecting the adaptive search system: this latch protects the -(1) positions of records on those pages where a hash index has been built. -NOTE: It does not protect values of non-ordering fields within a record from -being updated in-place! We can use fact (1) to perform unique searches to -indexes. */ - -rw_lock_t* btr_search_latch_temp; /* We will allocate the latch from - dynamic memory to get it to the - same DRAM page as other hotspot - semaphores */ - -byte btr_sea_pad2[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line */ - -btr_search_sys_t* btr_search_sys; - -/* If the number of records on the page divided by this parameter -would have been successfully accessed using a hash index, the index -is then built on the page, assuming the global limit has been reached */ - -#define BTR_SEARCH_PAGE_BUILD_LIMIT 16 - -/* The global limit for consecutive potentially successful hash searches, -before hash index building is started */ - -#define BTR_SEARCH_BUILD_LIMIT 100 - -/************************************************************************ -Builds a hash index on a page with the given parameters. If the page already -has a hash index with different parameters, the old hash index is removed. -If index is non-NULL, this function checks if n_fields and n_bytes are -sensible values, and does not build a hash index if not. */ -static -void -btr_search_build_page_hash_index( -/*=============================*/ - dict_index_t* index, /* in: index for which to build, or NULL if - not known */ - page_t* page, /* in: index page, s- or x-latched */ - ulint n_fields,/* in: hash this many full fields */ - ulint n_bytes,/* in: hash this many bytes from the next - field */ - ibool left_side);/* in: hash for searches from left side? */ - -/********************************************************************* -This function should be called before reserving any btr search mutex, if -the intended operation might add nodes to the search system hash table. -Because of the latching order, once we have reserved the btr search system -latch, we cannot allocate a free frame from the buffer pool. Checks that -there is a free buffer frame allocated for hash table heap in the btr search -system. If not, allocates a free frames for the heap. This check makes it -probable that, when have reserved the btr search system latch and we need to -allocate a new node to the hash table, it will succeed. However, the check -will not guarantee success. */ -static -void -btr_search_check_free_space_in_heap(void) -/*=====================================*/ -{ - buf_frame_t* frame; - hash_table_t* table; - mem_heap_t* heap; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - table = btr_search_sys->hash_index; - - heap = table->heap; - - /* Note that we peek the value of heap->free_block without reserving - the latch: this is ok, because we will not guarantee that there will - be enough free space in the hash table. */ - - if (heap->free_block == NULL) { - frame = buf_frame_alloc(); - - rw_lock_x_lock(&btr_search_latch); - - if (heap->free_block == NULL) { - heap->free_block = frame; - } else { - buf_frame_free(frame); - } - - rw_lock_x_unlock(&btr_search_latch); - } -} - -/********************************************************************* -Creates and initializes the adaptive search system at a database start. */ - -void -btr_search_sys_create( -/*==================*/ - ulint hash_size) /* in: hash index hash table size */ -{ - /* We allocate the search latch from dynamic memory: - see above at the global variable definition */ - - btr_search_latch_temp = mem_alloc(sizeof(rw_lock_t)); - - rw_lock_create(&btr_search_latch, SYNC_SEARCH_SYS); - - btr_search_sys = mem_alloc(sizeof(btr_search_sys_t)); - - btr_search_sys->hash_index = ha_create(TRUE, hash_size, 0, 0); - -} - -/********************************************************************* -Creates and initializes a search info struct. */ - -btr_search_t* -btr_search_info_create( -/*===================*/ - /* out, own: search info struct */ - mem_heap_t* heap) /* in: heap where created */ -{ - btr_search_t* info; - - info = mem_heap_alloc(heap, sizeof(btr_search_t)); - -#ifdef UNIV_DEBUG - info->magic_n = BTR_SEARCH_MAGIC_N; -#endif /* UNIV_DEBUG */ - - info->ref_count = 0; - info->root_guess = NULL; - - info->hash_analysis = 0; - info->n_hash_potential = 0; - - info->last_hash_succ = FALSE; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_succ = 0; - info->n_hash_fail = 0; - info->n_patt_succ = 0; - info->n_searches = 0; -#endif /* UNIV_SEARCH_PERF_STAT */ - - /* Set some sensible values */ - info->n_fields = 1; - info->n_bytes = 0; - - info->left_side = TRUE; - - return(info); -} - -/********************************************************************* -Returns the value of ref_count. The value is protected by -btr_search_latch. */ -ulint -btr_search_info_get_ref_count( -/*==========================*/ - /* out: ref_count value. */ - btr_search_t* info) /* in: search info. */ -{ - ulint ret; - - ut_ad(info); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(&btr_search_latch); - ret = info->ref_count; - rw_lock_s_unlock(&btr_search_latch); - - return(ret); -} - -/************************************************************************* -Updates the search info of an index about hash successes. NOTE that info -is NOT protected by any semaphore, to save CPU time! Do not assume its fields -are consistent. */ -static -void -btr_search_info_update_hash( -/*========================*/ - btr_search_t* info, /* in/out: search info */ - btr_cur_t* cursor) /* in: cursor which was just positioned */ -{ - dict_index_t* index; - ulint n_unique; - int cmp; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - index = cursor->index; - - if (index->type & DICT_IBUF) { - /* So many deletes are performed on an insert buffer tree - that we do not consider a hash index useful on it: */ - - return; - } - - n_unique = dict_index_get_n_unique_in_tree(index); - - if (info->n_hash_potential == 0) { - - goto set_new_recomm; - } - - /* Test if the search would have succeeded using the recommended - hash prefix */ - - if (info->n_fields >= n_unique && cursor->up_match >= n_unique) { -increment_potential: - info->n_hash_potential++; - - return; - } - - cmp = ut_pair_cmp(info->n_fields, info->n_bytes, - cursor->low_match, cursor->low_bytes); - - if (info->left_side ? cmp <= 0 : cmp > 0) { - - goto set_new_recomm; - } - - cmp = ut_pair_cmp(info->n_fields, info->n_bytes, - cursor->up_match, cursor->up_bytes); - - if (info->left_side ? cmp <= 0 : cmp > 0) { - - goto increment_potential; - } - -set_new_recomm: - /* We have to set a new recommendation; skip the hash analysis - for a while to avoid unnecessary CPU time usage when there is no - chance for success */ - - info->hash_analysis = 0; - - cmp = ut_pair_cmp(cursor->up_match, cursor->up_bytes, - cursor->low_match, cursor->low_bytes); - if (cmp == 0) { - info->n_hash_potential = 0; - - /* For extra safety, we set some sensible values here */ - - info->n_fields = 1; - info->n_bytes = 0; - - info->left_side = TRUE; - - } else if (cmp > 0) { - info->n_hash_potential = 1; - - if (cursor->up_match >= n_unique) { - - info->n_fields = n_unique; - info->n_bytes = 0; - - } else if (cursor->low_match < cursor->up_match) { - - info->n_fields = cursor->low_match + 1; - info->n_bytes = 0; - } else { - info->n_fields = cursor->low_match; - info->n_bytes = cursor->low_bytes + 1; - } - - info->left_side = TRUE; - } else { - info->n_hash_potential = 1; - - if (cursor->low_match >= n_unique) { - - info->n_fields = n_unique; - info->n_bytes = 0; - - } else if (cursor->low_match > cursor->up_match) { - - info->n_fields = cursor->up_match + 1; - info->n_bytes = 0; - } else { - info->n_fields = cursor->up_match; - info->n_bytes = cursor->up_bytes + 1; - } - - info->left_side = FALSE; - } -} - -/************************************************************************* -Updates the block search info on hash successes. NOTE that info and -block->n_hash_helps, n_fields, n_bytes, side are NOT protected by any -semaphore, to save CPU time! Do not assume the fields are consistent. */ -static -ibool -btr_search_update_block_hash_info( -/*==============================*/ - /* out: TRUE if building a (new) hash index on - the block is recommended */ - btr_search_t* info, /* in: search info */ - buf_block_t* block, /* in: buffer block */ - btr_cur_t* cursor __attribute__((unused))) - /* in: cursor */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); - ut_ad(rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_SHARED) - || rw_lock_own(&((buf_block_t*) block)->lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(cursor); - - info->last_hash_succ = FALSE; - - ut_a(block->magic_n == BUF_BLOCK_MAGIC_N); - ut_ad(info->magic_n == BTR_SEARCH_MAGIC_N); - - if ((block->n_hash_helps > 0) - && (info->n_hash_potential > 0) - && (block->n_fields == info->n_fields) - && (block->n_bytes == info->n_bytes) - && (block->left_side == info->left_side)) { - - if ((block->is_hashed) - && (block->curr_n_fields == info->n_fields) - && (block->curr_n_bytes == info->n_bytes) - && (block->curr_left_side == info->left_side)) { - - /* The search would presumably have succeeded using - the hash index */ - - info->last_hash_succ = TRUE; - } - - block->n_hash_helps++; - } else { - block->n_hash_helps = 1; - block->n_fields = info->n_fields; - block->n_bytes = info->n_bytes; - block->left_side = info->left_side; - } - -#ifdef UNIV_DEBUG - if (cursor->index->table->does_not_fit_in_memory) { - block->n_hash_helps = 0; - } -#endif /* UNIV_DEBUG */ - - if ((block->n_hash_helps > page_get_n_recs(block->frame) - / BTR_SEARCH_PAGE_BUILD_LIMIT) - && (info->n_hash_potential >= BTR_SEARCH_BUILD_LIMIT)) { - - if ((!block->is_hashed) - || (block->n_hash_helps - > 2 * page_get_n_recs(block->frame)) - || (block->n_fields != block->curr_n_fields) - || (block->n_bytes != block->curr_n_bytes) - || (block->left_side != block->curr_left_side)) { - - /* Build a new hash index on the page */ - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************* -Updates a hash node reference when it has been unsuccessfully used in a -search which could have succeeded with the used hash parameters. This can -happen because when building a hash index for a page, we do not check -what happens at page boundaries, and therefore there can be misleading -hash nodes. Also, collisions in the fold value can lead to misleading -references. This function lazily fixes these imperfections in the hash -index. */ -static -void -btr_search_update_hash_ref( -/*=======================*/ - btr_search_t* info, /* in: search info */ - buf_block_t* block, /* in: buffer block where cursor positioned */ - btr_cur_t* cursor) /* in: cursor */ -{ - ulint fold; - rec_t* rec; - dulint index_id; - - ut_ad(cursor->flag == BTR_CUR_HASH_FAIL); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(buf_block_align(btr_cur_get_rec(cursor)) == block); - ut_a(!block->is_hashed || block->index == cursor->index); - - if (block->is_hashed - && (info->n_hash_potential > 0) - && (block->curr_n_fields == info->n_fields) - && (block->curr_n_bytes == info->n_bytes) - && (block->curr_left_side == info->left_side)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rec = btr_cur_get_rec(cursor); - - if (!page_rec_is_user_rec(rec)) { - - return; - } - - index_id = cursor->index->id; - fold = rec_fold(rec, - rec_get_offsets(rec, cursor->index, offsets_, - ULINT_UNDEFINED, &heap), - block->curr_n_fields, - block->curr_n_bytes, index_id); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - ha_insert_for_fold(btr_search_sys->hash_index, fold, rec); - } -} - -/************************************************************************* -Updates the search info. */ - -void -btr_search_info_update_slow( -/*========================*/ - btr_search_t* info, /* in/out: search info */ - btr_cur_t* cursor) /* in: cursor which was just positioned */ -{ - buf_block_t* block; - ibool build_index; - ulint* params; - ulint* params2; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - block = buf_block_align(btr_cur_get_rec(cursor)); - - /* NOTE that the following two function calls do NOT protect - info or block->n_fields etc. with any semaphore, to save CPU time! - We cannot assume the fields are consistent when we return from - those functions! */ - - btr_search_info_update_hash(info, cursor); - - build_index = btr_search_update_block_hash_info(info, block, cursor); - - if (build_index || (cursor->flag == BTR_CUR_HASH_FAIL)) { - - btr_search_check_free_space_in_heap(); - } - - if (cursor->flag == BTR_CUR_HASH_FAIL) { - /* Update the hash node reference, if appropriate */ - -#ifdef UNIV_SEARCH_PERF_STAT - btr_search_n_hash_fail++; -#endif /* UNIV_SEARCH_PERF_STAT */ - - rw_lock_x_lock(&btr_search_latch); - - btr_search_update_hash_ref(info, block, cursor); - - rw_lock_x_unlock(&btr_search_latch); - } - - if (build_index) { - /* Note that since we did not protect block->n_fields etc. - with any semaphore, the values can be inconsistent. We have - to check inside the function call that they make sense. We - also malloc an array and store the values there to make sure - the compiler does not let the function call parameters change - inside the called function. It might be that the compiler - would optimize the call just to pass pointers to block. */ - - params = mem_alloc(3 * sizeof(ulint)); - params[0] = block->n_fields; - params[1] = block->n_bytes; - params[2] = block->left_side; - - /* Make sure the compiler cannot deduce the values and do - optimizations */ - - params2 = params + btr_search_this_is_zero; - - btr_search_build_page_hash_index(cursor->index, - block->frame, - params2[0], - params2[1], - params2[2]); - mem_free(params); - } -} - -/********************************************************************** -Checks if a guessed position for a tree cursor is right. Note that if -mode is PAGE_CUR_LE, which is used in inserts, and the function returns -TRUE, then cursor->up_match and cursor->low_match both have sensible values. */ -static -ibool -btr_search_check_guess( -/*===================*/ - /* out: TRUE if success */ - btr_cur_t* cursor, /* in: guessed cursor position */ - ibool can_only_compare_to_cursor_rec, - /* in: if we do not have a latch on the page - of cursor, but only a latch on - btr_search_latch, then ONLY the columns - of the record UNDER the cursor are - protected, not the next or previous record - in the chain: we cannot look at the next or - previous record to check our guess! */ - dtuple_t* tuple, /* in: data tuple */ - ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - mtr_t* mtr) /* in: mtr */ -{ - rec_t* rec; - ulint n_unique; - ulint match; - ulint bytes; - int cmp; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ibool success = FALSE; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - n_unique = dict_index_get_n_unique_in_tree(cursor->index); - - rec = btr_cur_get_rec(cursor); - - ut_ad(page_rec_is_user_rec(rec)); - - match = 0; - bytes = 0; - - offsets = rec_get_offsets(rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, - offsets, &match, &bytes); - - if (mode == PAGE_CUR_GE) { - if (cmp == 1) { - goto exit_func; - } - - cursor->up_match = match; - - if (match >= n_unique) { - success = TRUE; - goto exit_func; - } - } else if (mode == PAGE_CUR_LE) { - if (cmp == -1) { - goto exit_func; - } - - cursor->low_match = match; - - } else if (mode == PAGE_CUR_G) { - if (cmp != -1) { - goto exit_func; - } - } else if (mode == PAGE_CUR_L) { - if (cmp != 1) { - goto exit_func; - } - } - - if (can_only_compare_to_cursor_rec) { - /* Since we could not determine if our guess is right just by - looking at the record under the cursor, return FALSE */ - goto exit_func; - } - - match = 0; - bytes = 0; - - if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE)) { - rec_t* prev_rec; - - ut_ad(!page_rec_is_infimum(rec)); - - prev_rec = page_rec_get_prev(rec); - - if (page_rec_is_infimum(prev_rec)) { - success = btr_page_get_prev( - buf_frame_align(prev_rec), mtr) == FIL_NULL; - - goto exit_func; - } - - offsets = rec_get_offsets(prev_rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec, - offsets, &match, &bytes); - if (mode == PAGE_CUR_GE) { - success = cmp == 1; - } else { - success = cmp != -1; - } - - goto exit_func; - } else { - rec_t* next_rec; - - ut_ad(!page_rec_is_supremum(rec)); - - next_rec = page_rec_get_next(rec); - - if (page_rec_is_supremum(next_rec)) { - if (btr_page_get_next( - buf_frame_align(next_rec), mtr) - == FIL_NULL) { - - cursor->up_match = 0; - success = TRUE; - } - - goto exit_func; - } - - offsets = rec_get_offsets(next_rec, cursor->index, offsets, - n_unique, &heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, - offsets, &match, &bytes); - if (mode == PAGE_CUR_LE) { - success = cmp == -1; - cursor->up_match = match; - } else { - success = cmp != 1; - } - } -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(success); -} - -/********************************************************************** -Tries to guess the right search position based on the hash search info -of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, -and the function returns TRUE, then cursor->up_match and cursor->low_match -both have sensible values. */ - -ibool -btr_search_guess_on_hash( -/*=====================*/ - /* out: TRUE if succeeded */ - dict_index_t* index, /* in: index */ - btr_search_t* info, /* in: index search info */ - dtuple_t* tuple, /* in: logical record */ - ulint mode, /* in: PAGE_CUR_L, ... */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ...; - NOTE that only if has_search_latch - is 0, we will have a latch set on - the cursor page, otherwise we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /* out: tree cursor */ - ulint has_search_latch,/* in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, RW_X_LATCH, or 0 */ - mtr_t* mtr) /* in: mtr */ -{ - buf_block_t* block; - rec_t* rec; - page_t* page; - ulint fold; - ulint tuple_n_fields; - dulint index_id; - ibool can_only_compare_to_cursor_rec = TRUE; -#ifdef notdefined - btr_cur_t cursor2; - btr_pcur_t pcur; -#endif - ut_ad(index && info && tuple && cursor && mtr); - ut_ad((latch_mode == BTR_SEARCH_LEAF) - || (latch_mode == BTR_MODIFY_LEAF)); - - /* Note that, for efficiency, the struct info may not be protected by - any latch here! */ - - if (UNIV_UNLIKELY(info->n_hash_potential == 0)) { - - return(FALSE); - } - - cursor->n_fields = info->n_fields; - cursor->n_bytes = info->n_bytes; - - tuple_n_fields = dtuple_get_n_fields(tuple); - - if (UNIV_UNLIKELY(tuple_n_fields < cursor->n_fields)) { - - return(FALSE); - } - - if (UNIV_UNLIKELY(tuple_n_fields == cursor->n_fields) - && (cursor->n_bytes > 0)) { - - return(FALSE); - } - - index_id = index->id; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_succ++; -#endif - fold = dtuple_fold(tuple, cursor->n_fields, cursor->n_bytes, index_id); - - cursor->fold = fold; - cursor->flag = BTR_CUR_HASH; - - if (UNIV_LIKELY(!has_search_latch)) { - rw_lock_s_lock(&btr_search_latch); - } - - ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX); - ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0); - - rec = ha_search_and_get_data(btr_search_sys->hash_index, fold); - - if (UNIV_UNLIKELY(!rec)) { - goto failure_unlock; - } - - page = buf_frame_align(rec); - - if (UNIV_LIKELY(!has_search_latch)) { - - if (UNIV_UNLIKELY( - !buf_page_get_known_nowait(latch_mode, page, - BUF_MAKE_YOUNG, - __FILE__, __LINE__, - mtr))) { - goto failure_unlock; - } - - rw_lock_s_unlock(&btr_search_latch); - can_only_compare_to_cursor_rec = FALSE; - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH); -#endif /* UNIV_SYNC_DEBUG */ - } - - block = buf_block_align(page); - - if (UNIV_UNLIKELY(block->state == BUF_BLOCK_REMOVE_HASH)) { - if (UNIV_LIKELY(!has_search_latch)) { - - btr_leaf_page_release(page, latch_mode, mtr); - } - - goto failure; - } - - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(page_rec_is_user_rec(rec)); - - btr_cur_position(index, rec, cursor); - - /* Check the validity of the guess within the page */ - - /* If we only have the latch on btr_search_latch, not on the - page, it only protects the columns of the record the cursor - is positioned on. We cannot look at the next of the previous - record to determine if our guess for the cursor position is - right. */ - if (UNIV_EXPECT( - ut_dulint_cmp(index_id, btr_page_get_index_id(page)), 0) - || !btr_search_check_guess(cursor, - can_only_compare_to_cursor_rec, - tuple, mode, mtr)) { - if (UNIV_LIKELY(!has_search_latch)) { - btr_leaf_page_release(page, latch_mode, mtr); - } - - goto failure; - } - - if (UNIV_LIKELY(info->n_hash_potential < BTR_SEARCH_BUILD_LIMIT + 5)) { - - info->n_hash_potential++; - } - -#ifdef notdefined - /* These lines of code can be used in a debug version to check - the correctness of the searched cursor position: */ - - info->last_hash_succ = FALSE; - - /* Currently, does not work if the following fails: */ - ut_ad(!has_search_latch); - - btr_leaf_page_release(page, latch_mode, mtr); - - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - &cursor2, 0, mtr); - if (mode == PAGE_CUR_GE - && page_rec_is_supremum(btr_cur_get_rec(&cursor2))) { - - /* If mode is PAGE_CUR_GE, then the binary search - in the index tree may actually take us to the supremum - of the previous page */ - - info->last_hash_succ = FALSE; - - btr_pcur_open_on_user_rec(index, tuple, mode, latch_mode, - &pcur, mtr); - ut_ad(btr_pcur_get_rec(&pcur) == btr_cur_get_rec(cursor)); - } else { - ut_ad(btr_cur_get_rec(&cursor2) == btr_cur_get_rec(cursor)); - } - - /* NOTE that it is theoretically possible that the above assertions - fail if the page of the cursor gets removed from the buffer pool - meanwhile! Thus it might not be a bug. */ -#endif - info->last_hash_succ = TRUE; - -#ifdef UNIV_SEARCH_PERF_STAT - btr_search_n_succ++; -#endif - if (UNIV_LIKELY(!has_search_latch) - && buf_block_peek_if_too_old(block)) { - - buf_page_make_young(page); - } - - /* Increment the page get statistics though we did not really - fix the page: for user info only */ - - buf_pool->n_page_gets++; - - return(TRUE); - - /*-------------------------------------------*/ -failure_unlock: - if (UNIV_LIKELY(!has_search_latch)) { - rw_lock_s_unlock(&btr_search_latch); - } -failure: - cursor->flag = BTR_CUR_HASH_FAIL; - -#ifdef UNIV_SEARCH_PERF_STAT - info->n_hash_fail++; - - if (info->n_hash_succ > 0) { - info->n_hash_succ--; - } -#endif - info->last_hash_succ = FALSE; - - return(FALSE); -} - -/************************************************************************ -Drops a page hash index. */ - -void -btr_search_drop_page_hash_index( -/*============================*/ - page_t* page) /* in: index page, s- or x-latched, or an index page - for which we know that block->buf_fix_count == 0 */ -{ - hash_table_t* table; - buf_block_t* block; - ulint n_fields; - ulint n_bytes; - rec_t* rec; - ulint fold; - ulint prev_fold; - dulint index_id; - ulint n_cached; - ulint n_recs; - ulint* folds; - ulint i; - mem_heap_t* heap; - dict_index_t* index; - ulint* offsets; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ -retry: - rw_lock_s_lock(&btr_search_latch); - - block = buf_block_align(page); - - if (UNIV_LIKELY(!block->is_hashed)) { - - rw_lock_s_unlock(&btr_search_latch); - - return; - } - - table = btr_search_sys->hash_index; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX) - || (block->buf_fix_count == 0)); -#endif /* UNIV_SYNC_DEBUG */ - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - index = block->index; - - /* NOTE: The fields of block must not be accessed after - releasing btr_search_latch, as the index page might only - be s-latched! */ - - rw_lock_s_unlock(&btr_search_latch); - - ut_a(n_fields + n_bytes > 0); - - n_recs = page_get_n_recs(page); - - /* Calculate and cache fold values into an array for fast deletion - from the hash index */ - - folds = mem_alloc(n_recs * sizeof(ulint)); - - n_cached = 0; - - rec = page_get_infimum_rec(page); - rec = page_rec_get_next(rec); - - index_id = btr_page_get_index_id(page); - - ut_a(0 == ut_dulint_cmp(index_id, index->id)); - - prev_fold = 0; - - heap = NULL; - offsets = NULL; - - while (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - ut_a(rec_offs_n_fields(offsets) == n_fields + (n_bytes > 0)); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - - if (fold == prev_fold && prev_fold != 0) { - - goto next_rec; - } - - /* Remove all hash nodes pointing to this page from the - hash chain */ - - folds[n_cached] = fold; - n_cached++; -next_rec: - rec = page_rec_get_next(rec); - prev_fold = fold; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - rw_lock_x_lock(&btr_search_latch); - - if (UNIV_UNLIKELY(!block->is_hashed)) { - /* Someone else has meanwhile dropped the hash index */ - - goto cleanup; - } - - ut_a(block->index == index); - - if (UNIV_UNLIKELY(block->curr_n_fields != n_fields) - || UNIV_UNLIKELY(block->curr_n_bytes != n_bytes)) { - - /* Someone else has meanwhile built a new hash index on the - page, with different parameters */ - - rw_lock_x_unlock(&btr_search_latch); - - mem_free(folds); - goto retry; - } - - for (i = 0; i < n_cached; i++) { - - ha_remove_all_nodes_to_page(table, folds[i], page); - } - - ut_a(index->search_info->ref_count > 0); - index->search_info->ref_count--; - - block->is_hashed = FALSE; - block->index = NULL; - -cleanup: - if (UNIV_UNLIKELY(block->n_pointers)) { - /* Corruption */ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Corruption of adaptive hash index." - " After dropping\n" - "InnoDB: the hash index to a page of %s," - " still %lu hash nodes remain.\n", - index->name, (ulong) block->n_pointers); - rw_lock_x_unlock(&btr_search_latch); - - btr_search_validate(); - } else { - rw_lock_x_unlock(&btr_search_latch); - } - - mem_free(folds); -} - -/************************************************************************ -Drops a page hash index when a page is freed from a fseg to the file system. -Drops possible hash index if the page happens to be in the buffer pool. */ - -void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /* in: space id */ - ulint page_no) /* in: page number */ -{ - ibool is_hashed; - page_t* page; - mtr_t mtr; - - is_hashed = buf_page_peek_if_search_hashed(space, page_no); - - if (!is_hashed) { - - return; - } - - mtr_start(&mtr); - - /* We assume that if the caller has a latch on the page, then the - caller has already dropped the hash index for the page, and we never - get here. Therefore we can acquire the s-latch to the page without - having to fear a deadlock. */ - - page = buf_page_get_gen(space, page_no, RW_S_LATCH, NULL, - BUF_GET_IF_IN_POOL, __FILE__, __LINE__, - &mtr); - /* Because the buffer pool mutex was released by - buf_page_peek_if_search_hashed(), it is possible that the - block was removed from the buffer pool by another thread - before buf_page_get_gen() got a chance to acquire the buffer - pool mutex again. Thus, we must check for a NULL return. */ - - if (UNIV_LIKELY(page != NULL)) { - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE_FROM_HASH); -#endif /* UNIV_SYNC_DEBUG */ - - btr_search_drop_page_hash_index(page); - } - - mtr_commit(&mtr); -} - -/************************************************************************ -Builds a hash index on a page with the given parameters. If the page already -has a hash index with different parameters, the old hash index is removed. -If index is non-NULL, this function checks if n_fields and n_bytes are -sensible values, and does not build a hash index if not. */ -static -void -btr_search_build_page_hash_index( -/*=============================*/ - dict_index_t* index, /* in: index for which to build */ - page_t* page, /* in: index page, s- or x-latched */ - ulint n_fields,/* in: hash this many full fields */ - ulint n_bytes,/* in: hash this many bytes from the next - field */ - ibool left_side)/* in: hash for searches from left side? */ -{ - hash_table_t* table; - buf_block_t* block; - rec_t* rec; - rec_t* next_rec; - ulint fold; - ulint next_fold; - dulint index_id; - ulint n_cached; - ulint n_recs; - ulint* folds; - rec_t** recs; - ulint i; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(index); - - block = buf_block_align(page); - table = btr_search_sys->hash_index; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - rw_lock_s_lock(&btr_search_latch); - - if (block->is_hashed && ((block->curr_n_fields != n_fields) - || (block->curr_n_bytes != n_bytes) - || (block->curr_left_side != left_side))) { - - rw_lock_s_unlock(&btr_search_latch); - - btr_search_drop_page_hash_index(page); - } else { - rw_lock_s_unlock(&btr_search_latch); - } - - n_recs = page_get_n_recs(page); - - if (n_recs == 0) { - - return; - } - - /* Check that the values for hash index build are sensible */ - - if (n_fields + n_bytes == 0) { - - return; - } - - if (dict_index_get_n_unique_in_tree(index) < n_fields - || (dict_index_get_n_unique_in_tree(index) == n_fields - && n_bytes > 0)) { - return; - } - - /* Calculate and cache fold values and corresponding records into - an array for fast insertion to the hash index */ - - folds = mem_alloc(n_recs * sizeof(ulint)); - recs = mem_alloc(n_recs * sizeof(rec_t*)); - - n_cached = 0; - - index_id = btr_page_get_index_id(page); - - rec = page_get_infimum_rec(page); - rec = page_rec_get_next(rec); - - offsets = rec_get_offsets(rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - - if (!page_rec_is_supremum(rec)) { - ut_a(n_fields <= rec_offs_n_fields(offsets)); - - if (n_bytes > 0) { - ut_a(n_fields < rec_offs_n_fields(offsets)); - } - } - - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - - if (left_side) { - - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - - for (;;) { - next_rec = page_rec_get_next(rec); - - if (page_rec_is_supremum(next_rec)) { - - if (!left_side) { - - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - - break; - } - - offsets = rec_get_offsets(next_rec, index, offsets, - n_fields + (n_bytes > 0), &heap); - next_fold = rec_fold(next_rec, offsets, n_fields, - n_bytes, index_id); - - if (fold != next_fold) { - /* Insert an entry into the hash index */ - - if (left_side) { - - folds[n_cached] = next_fold; - recs[n_cached] = next_rec; - n_cached++; - } else { - folds[n_cached] = fold; - recs[n_cached] = rec; - n_cached++; - } - } - - rec = next_rec; - fold = next_fold; - } - - btr_search_check_free_space_in_heap(); - - rw_lock_x_lock(&btr_search_latch); - - if (block->is_hashed && ((block->curr_n_fields != n_fields) - || (block->curr_n_bytes != n_bytes) - || (block->curr_left_side != left_side))) { - goto exit_func; - } - - /* This counter is decremented every time we drop page - hash index entries and is incremented here. Since we can - rebuild hash index for a page that is already hashed, we - have to take care not to increment the counter in that - case. */ - if (!block->is_hashed) { - index->search_info->ref_count++; - } - - block->is_hashed = TRUE; - block->n_hash_helps = 0; - - block->curr_n_fields = n_fields; - block->curr_n_bytes = n_bytes; - block->curr_left_side = left_side; - block->index = index; - - for (i = 0; i < n_cached; i++) { - - ha_insert_for_fold(table, folds[i], recs[i]); - } - -exit_func: - rw_lock_x_unlock(&btr_search_latch); - - mem_free(folds); - mem_free(recs); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/************************************************************************ -Moves or deletes hash entries for moved records. If new_page is already hashed, -then the hash index for page, if any, is dropped. If new_page is not hashed, -and page is hashed, then a new hash index is built to new_page with the same -parameters as page (this often happens when a page is split). */ - -void -btr_search_move_or_delete_hash_entries( -/*===================================*/ - page_t* new_page, /* in: records are copied - to this page */ - page_t* page, /* in: index page from which - records were copied, and the - copied records will be deleted - from this page */ - dict_index_t* index) /* in: record descriptor */ -{ - buf_block_t* block; - buf_block_t* new_block; - ulint n_fields; - ulint n_bytes; - ibool left_side; - - block = buf_block_align(page); - new_block = buf_block_align(new_page); - ut_a(page_is_comp(page) == page_is_comp(new_page)); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); - ut_ad(rw_lock_own(&(new_block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_a(!new_block->is_hashed || new_block->index == index); - ut_a(!block->is_hashed || block->index == index); - - rw_lock_s_lock(&btr_search_latch); - - if (new_block->is_hashed) { - - rw_lock_s_unlock(&btr_search_latch); - - btr_search_drop_page_hash_index(page); - - return; - } - - if (block->is_hashed) { - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - left_side = block->curr_left_side; - - new_block->n_fields = block->curr_n_fields; - new_block->n_bytes = block->curr_n_bytes; - new_block->left_side = left_side; - - rw_lock_s_unlock(&btr_search_latch); - - ut_a(n_fields + n_bytes > 0); - - btr_search_build_page_hash_index(index, new_page, n_fields, - n_bytes, left_side); -#if 1 /* TODO: safe to remove? */ - ut_a(n_fields == block->curr_n_fields); - ut_a(n_bytes == block->curr_n_bytes); - ut_a(left_side == block->curr_left_side); -#endif - return; - } - - rw_lock_s_unlock(&btr_search_latch); -} - -/************************************************************************ -Updates the page hash index when a single record is deleted from a page. */ - -void -btr_search_update_hash_on_delete( -/*=============================*/ - btr_cur_t* cursor) /* in: cursor which was positioned on the - record to delete using btr_cur_search_..., - the record is not yet deleted */ -{ - hash_table_t* table; - buf_block_t* block; - rec_t* rec; - ulint fold; - dulint index_id; - ibool found; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - mem_heap_t* heap = NULL; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rec = btr_cur_get_rec(cursor); - - block = buf_block_align(rec); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!block->is_hashed) { - - return; - } - - ut_a(block->index == cursor->index); - ut_a(block->curr_n_fields + block->curr_n_bytes > 0); - - table = btr_search_sys->hash_index; - - index_id = cursor->index->id; - fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, offsets_, - ULINT_UNDEFINED, &heap), - block->curr_n_fields, block->curr_n_bytes, index_id); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - rw_lock_x_lock(&btr_search_latch); - - found = ha_search_and_delete_if_found(table, fold, rec); - - rw_lock_x_unlock(&btr_search_latch); -} - -/************************************************************************ -Updates the page hash index when a single record is inserted on a page. */ - -void -btr_search_update_hash_node_on_insert( -/*==================================*/ - btr_cur_t* cursor) /* in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -{ - hash_table_t* table; - buf_block_t* block; - rec_t* rec; - - rec = btr_cur_get_rec(cursor); - - block = buf_block_align(rec); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!block->is_hashed) { - - return; - } - - ut_a(block->index == cursor->index); - - rw_lock_x_lock(&btr_search_latch); - - if ((cursor->flag == BTR_CUR_HASH) - && (cursor->n_fields == block->curr_n_fields) - && (cursor->n_bytes == block->curr_n_bytes) - && !block->curr_left_side) { - - table = btr_search_sys->hash_index; - - ha_search_and_update_if_found(table, cursor->fold, rec, - page_rec_get_next(rec)); - - rw_lock_x_unlock(&btr_search_latch); - } else { - rw_lock_x_unlock(&btr_search_latch); - - btr_search_update_hash_on_insert(cursor); - } -} - -/************************************************************************ -Updates the page hash index when a single record is inserted on a page. */ - -void -btr_search_update_hash_on_insert( -/*=============================*/ - btr_cur_t* cursor) /* in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -{ - hash_table_t* table; - buf_block_t* block; - rec_t* rec; - rec_t* ins_rec; - rec_t* next_rec; - dulint index_id; - ulint fold; - ulint ins_fold; - ulint next_fold = 0; /* remove warning (??? bug ???) */ - ulint n_fields; - ulint n_bytes; - ibool left_side; - ibool locked = FALSE; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - table = btr_search_sys->hash_index; - - btr_search_check_free_space_in_heap(); - - rec = btr_cur_get_rec(cursor); - - block = buf_block_align(rec); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!block->is_hashed) { - - return; - } - - ut_a(block->index == cursor->index); - - index_id = cursor->index->id; - - n_fields = block->curr_n_fields; - n_bytes = block->curr_n_bytes; - left_side = block->curr_left_side; - - ins_rec = page_rec_get_next(rec); - next_rec = page_rec_get_next(ins_rec); - - offsets = rec_get_offsets(ins_rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, index_id); - - if (!page_rec_is_supremum(next_rec)) { - offsets = rec_get_offsets(next_rec, cursor->index, offsets, - n_fields + (n_bytes > 0), &heap); - next_fold = rec_fold(next_rec, offsets, n_fields, - n_bytes, index_id); - } - - if (!page_rec_is_infimum(rec)) { - offsets = rec_get_offsets(rec, cursor->index, offsets, - n_fields + (n_bytes > 0), &heap); - fold = rec_fold(rec, offsets, n_fields, n_bytes, index_id); - } else { - if (left_side) { - - rw_lock_x_lock(&btr_search_latch); - - locked = TRUE; - - ha_insert_for_fold(table, ins_fold, ins_rec); - } - - goto check_next_rec; - } - - if (fold != ins_fold) { - - if (!locked) { - - rw_lock_x_lock(&btr_search_latch); - - locked = TRUE; - } - - if (!left_side) { - ha_insert_for_fold(table, fold, rec); - } else { - ha_insert_for_fold(table, ins_fold, ins_rec); - } - } - -check_next_rec: - if (page_rec_is_supremum(next_rec)) { - - if (!left_side) { - - if (!locked) { - rw_lock_x_lock(&btr_search_latch); - - locked = TRUE; - } - - ha_insert_for_fold(table, ins_fold, ins_rec); - } - - goto function_exit; - } - - if (ins_fold != next_fold) { - - if (!locked) { - - rw_lock_x_lock(&btr_search_latch); - - locked = TRUE; - } - - if (!left_side) { - - ha_insert_for_fold(table, ins_fold, ins_rec); - /* - fputs("Hash insert for ", stderr); - dict_index_name_print(stderr, cursor->index); - fprintf(stderr, " fold %lu\n", ins_fold); - */ - } else { - ha_insert_for_fold(table, next_fold, next_rec); - } - } - -function_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - if (locked) { - rw_lock_x_unlock(&btr_search_latch); - } -} - -/************************************************************************ -Validates the search system. */ - -ibool -btr_search_validate(void) -/*=====================*/ - /* out: TRUE if ok */ -{ - buf_block_t* block; - page_t* page; - ha_node_t* node; - ulint n_page_dumps = 0; - ibool ok = TRUE; - ulint i; - ulint cell_count; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - /* How many cells to check before temporarily releasing - btr_search_latch. */ - ulint chunk_size = 10000; - - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rw_lock_x_lock(&btr_search_latch); - - cell_count = hash_get_n_cells(btr_search_sys->hash_index); - - for (i = 0; i < cell_count; i++) { - /* We release btr_search_latch every once in a while to - give other queries a chance to run. */ - if ((i != 0) && ((i % chunk_size) == 0)) { - rw_lock_x_unlock(&btr_search_latch); - os_thread_yield(); - rw_lock_x_lock(&btr_search_latch); - } - - node = hash_get_nth_cell(btr_search_sys->hash_index, i)->node; - - while (node != NULL) { - block = buf_block_align(node->data); - page = buf_frame_align(node->data); - offsets = rec_get_offsets((rec_t*) node->data, - block->index, offsets, - block->curr_n_fields - + (block->curr_n_bytes > 0), - &heap); - - if (!block->is_hashed || node->fold - != rec_fold((rec_t*)(node->data), - offsets, - block->curr_n_fields, - block->curr_n_bytes, - btr_page_get_index_id(page))) { - ok = FALSE; - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error in an adaptive hash" - " index pointer to page %lu\n" - "InnoDB: ptr mem address %p" - " index id %lu %lu," - " node fold %lu, rec fold %lu\n", - (ulong) buf_frame_get_page_no(page), - node->data, - (ulong) ut_dulint_get_high( - btr_page_get_index_id(page)), - (ulong) ut_dulint_get_low( - btr_page_get_index_id(page)), - (ulong) node->fold, - (ulong) rec_fold((rec_t*)(node->data), - offsets, - block->curr_n_fields, - block->curr_n_bytes, - btr_page_get_index_id( - page))); - - fputs("InnoDB: Record ", stderr); - rec_print_new(stderr, (rec_t*)node->data, - offsets); - fprintf(stderr, "\nInnoDB: on that page." - " Page mem address %p, is hashed %lu," - " n fields %lu, n bytes %lu\n" - "InnoDB: side %lu\n", - (void*) page, (ulong) block->is_hashed, - (ulong) block->curr_n_fields, - (ulong) block->curr_n_bytes, - (ulong) block->curr_left_side); - - if (n_page_dumps < 20) { - buf_page_print(page); - n_page_dumps++; - } - } - - node = node->next; - } - } - - for (i = 0; i < cell_count; i += chunk_size) { - ulint end_index = ut_min(i + chunk_size - 1, cell_count - 1); - - /* We release btr_search_latch every once in a while to - give other queries a chance to run. */ - if (i != 0) { - rw_lock_x_unlock(&btr_search_latch); - os_thread_yield(); - rw_lock_x_lock(&btr_search_latch); - } - - if (!ha_validate(btr_search_sys->hash_index, i, end_index)) { - ok = FALSE; - } - } - - rw_lock_x_unlock(&btr_search_latch); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(ok); -} diff --git a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c deleted file mode 100644 index 5b4f0ee6ecb..00000000000 --- a/storage/innobase/buf/buf0buf.c +++ /dev/null @@ -1,2590 +0,0 @@ -/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License 2 - as published by the Free Software Foundation in June 1991. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License 2 - along with this program (in file COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/****************************************************** -The database buffer buf_pool - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0buf.h" - -#ifdef UNIV_NONINL -#include "buf0buf.ic" -#endif - -#include "mem0mem.h" -#include "btr0btr.h" -#include "fil0fil.h" -#include "lock0lock.h" -#include "btr0sea.h" -#include "ibuf0ibuf.h" -#include "dict0dict.h" -#include "log0recv.h" -#include "log0log.h" -#include "trx0undo.h" -#include "srv0srv.h" - -/* - IMPLEMENTATION OF THE BUFFER POOL - ================================= - -Performance improvement: ------------------------- -Thread scheduling in NT may be so slow that the OS wait mechanism should -not be used even in waiting for disk reads to complete. -Rather, we should put waiting query threads to the queue of -waiting jobs, and let the OS thread do something useful while the i/o -is processed. In this way we could remove most OS thread switches in -an i/o-intensive benchmark like TPC-C. - -A possibility is to put a user space thread library between the database -and NT. User space thread libraries might be very fast. - -SQL Server 7.0 can be configured to use 'fibers' which are lightweight -threads in NT. These should be studied. - - Buffer frames and blocks - ------------------------ -Following the terminology of Gray and Reuter, we call the memory -blocks where file pages are loaded buffer frames. For each buffer -frame there is a control block, or shortly, a block, in the buffer -control array. The control info which does not need to be stored -in the file along with the file page, resides in the control block. - - Buffer pool struct - ------------------ -The buffer buf_pool contains a single mutex which protects all the -control data structures of the buf_pool. The content of a buffer frame is -protected by a separate read-write lock in its control block, though. -These locks can be locked and unlocked without owning the buf_pool mutex. -The OS events in the buf_pool struct can be waited for without owning the -buf_pool mutex. - -The buf_pool mutex is a hot-spot in main memory, causing a lot of -memory bus traffic on multiprocessor systems when processors -alternately access the mutex. On our Pentium, the mutex is accessed -maybe every 10 microseconds. We gave up the solution to have mutexes -for each control block, for instance, because it seemed to be -complicated. - -A solution to reduce mutex contention of the buf_pool mutex is to -create a separate mutex for the page hash table. On Pentium, -accessing the hash table takes 2 microseconds, about half -of the total buf_pool mutex hold time. - - Control blocks - -------------- - -The control block contains, for instance, the bufferfix count -which is incremented when a thread wants a file page to be fixed -in a buffer frame. The bufferfix operation does not lock the -contents of the frame, however. For this purpose, the control -block contains a read-write lock. - -The buffer frames have to be aligned so that the start memory -address of a frame is divisible by the universal page size, which -is a power of two. - -We intend to make the buffer buf_pool size on-line reconfigurable, -that is, the buf_pool size can be changed without closing the database. -Then the database administarator may adjust it to be bigger -at night, for example. The control block array must -contain enough control blocks for the maximum buffer buf_pool size -which is used in the particular database. -If the buf_pool size is cut, we exploit the virtual memory mechanism of -the OS, and just refrain from using frames at high addresses. Then the OS -can swap them to disk. - -The control blocks containing file pages are put to a hash table -according to the file address of the page. -We could speed up the access to an individual page by using -"pointer swizzling": we could replace the page references on -non-leaf index pages by direct pointers to the page, if it exists -in the buf_pool. We could make a separate hash table where we could -chain all the page references in non-leaf pages residing in the buf_pool, -using the page reference as the hash key, -and at the time of reading of a page update the pointers accordingly. -Drawbacks of this solution are added complexity and, -possibly, extra space required on non-leaf pages for memory pointers. -A simpler solution is just to speed up the hash table mechanism -in the database, using tables whose size is a power of 2. - - Lists of blocks - --------------- - -There are several lists of control blocks. The free list contains -blocks which are currently not used. - -The LRU-list contains all the blocks holding a file page -except those for which the bufferfix count is non-zero. -The pages are in the LRU list roughly in the order of the last -access to the page, so that the oldest pages are at the end of the -list. We also keep a pointer to near the end of the LRU list, -which we can use when we want to artificially age a page in the -buf_pool. This is used if we know that some page is not needed -again for some time: we insert the block right after the pointer, -causing it to be replaced sooner than would noramlly be the case. -Currently this aging mechanism is used for read-ahead mechanism -of pages, and it can also be used when there is a scan of a full -table which cannot fit in the memory. Putting the pages near the -of the LRU list, we make sure that most of the buf_pool stays in the -main memory, undisturbed. - -The chain of modified blocks contains the blocks -holding file pages that have been modified in the memory -but not written to disk yet. The block with the oldest modification -which has not yet been written to disk is at the end of the chain. - - Loading a file page - ------------------- - -First, a victim block for replacement has to be found in the -buf_pool. It is taken from the free list or searched for from the -end of the LRU-list. An exclusive lock is reserved for the frame, -the io_fix field is set in the block fixing the block in buf_pool, -and the io-operation for loading the page is queued. The io-handler thread -releases the X-lock on the frame and resets the io_fix field -when the io operation completes. - -A thread may request the above operation using the function -buf_page_get(). It may then continue to request a lock on the frame. -The lock is granted when the io-handler releases the x-lock. - - Read-ahead - ---------- - -The read-ahead mechanism is intended to be intelligent and -isolated from the semantically higher levels of the database -index management. From the higher level we only need the -information if a file page has a natural successor or -predecessor page. On the leaf level of a B-tree index, -these are the next and previous pages in the natural -order of the pages. - -Let us first explain the read-ahead mechanism when the leafs -of a B-tree are scanned in an ascending or descending order. -When a read page is the first time referenced in the buf_pool, -the buffer manager checks if it is at the border of a so-called -linear read-ahead area. The tablespace is divided into these -areas of size 64 blocks, for example. So if the page is at the -border of such an area, the read-ahead mechanism checks if -all the other blocks in the area have been accessed in an -ascending or descending order. If this is the case, the system -looks at the natural successor or predecessor of the page, -checks if that is at the border of another area, and in this case -issues read-requests for all the pages in that area. Maybe -we could relax the condition that all the pages in the area -have to be accessed: if data is deleted from a table, there may -appear holes of unused pages in the area. - -A different read-ahead mechanism is used when there appears -to be a random access pattern to a file. -If a new page is referenced in the buf_pool, and several pages -of its random access area (for instance, 32 consecutive pages -in a tablespace) have recently been referenced, we may predict -that the whole area may be needed in the near future, and issue -the read requests for the whole area. - - AWE implementation - ------------------ - -By a 'block' we mean the buffer header of type buf_block_t. By a 'page' -we mean the physical 16 kB memory area allocated from RAM for that block. -By a 'frame' we mean a 16 kB area in the virtual address space of the -process, in the frame_mem of buf_pool. - -We can map pages to the frames of the buffer pool. - -1) A buffer block allocated to use as a non-data page, e.g., to the lock -table, is always mapped to a frame. -2) A bufferfixed or io-fixed data page is always mapped to a frame. -3) When we need to map a block to frame, we look from the list -awe_LRU_free_mapped and try to unmap its last block, but note that -bufferfixed or io-fixed pages cannot be unmapped. -4) For every frame in the buffer pool there is always a block whose page is -mapped to it. When we create the buffer pool, we map the first elements -in the free list to the frames. -5) When we have AWE enabled, we disable adaptive hash indexes. -*/ - -/* Value in microseconds */ -static const int WAIT_FOR_READ = 20000; - -buf_pool_t* buf_pool = NULL; /* The buffer buf_pool of the database */ - -#ifdef UNIV_DEBUG -ulint buf_dbg_counter = 0; /* This is used to insert validation - operations in excution in the - debug version */ -ibool buf_debug_prints = FALSE; /* If this is set TRUE, - the program prints info whenever - read-ahead or flush occurs */ -#endif /* UNIV_DEBUG */ -/************************************************************************ -Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value on -32-bit and 64-bit architectures. */ - -ulint -buf_calc_page_new_checksum( -/*=======================*/ - /* out: checksum */ - byte* page) /* in: buffer page */ -{ - ulint checksum; - - /* Since the field FIL_PAGE_FILE_FLUSH_LSN, and in versions <= 4.1.x - ..._ARCH_LOG_NO, are written outside the buffer pool to the first - pages of data files, we have to skip them in the page checksum - calculation. - We must also skip the field FIL_PAGE_SPACE_OR_CHKSUM where the - checksum is stored, and also the last 8 bytes of page because - there we store the old formula checksum. */ - - checksum = ut_fold_binary(page + FIL_PAGE_OFFSET, - FIL_PAGE_FILE_FLUSH_LSN - FIL_PAGE_OFFSET) - + ut_fold_binary(page + FIL_PAGE_DATA, - UNIV_PAGE_SIZE - FIL_PAGE_DATA - - FIL_PAGE_END_LSN_OLD_CHKSUM); - checksum = checksum & 0xFFFFFFFFUL; - - return(checksum); -} - -/************************************************************************ -In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only -looked at the first few bytes of the page. This calculates that old -checksum. -NOTE: we must first store the new formula checksum to -FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! */ - -ulint -buf_calc_page_old_checksum( -/*=======================*/ - /* out: checksum */ - byte* page) /* in: buffer page */ -{ - ulint checksum; - - checksum = ut_fold_binary(page, FIL_PAGE_FILE_FLUSH_LSN); - - checksum = checksum & 0xFFFFFFFFUL; - - return(checksum); -} - -/************************************************************************ -Checks if a page is corrupt. */ - -ibool -buf_page_is_corrupted( -/*==================*/ - /* out: TRUE if corrupted */ - byte* read_buf) /* in: a database page */ -{ - ulint checksum; - ulint old_checksum; - ulint checksum_field; - ulint old_checksum_field; -#ifndef UNIV_HOTBACKUP - dulint current_lsn; -#endif - if (mach_read_from_4(read_buf + FIL_PAGE_LSN + 4) - != mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { - - /* Stored log sequence numbers at the start and the end - of page do not match */ - - return(TRUE); - } - -#ifndef UNIV_HOTBACKUP - if (recv_lsn_checks_on && log_peek_lsn(¤t_lsn)) { - if (ut_dulint_cmp(current_lsn, - mach_read_from_8(read_buf + FIL_PAGE_LSN)) - < 0) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: page %lu log sequence number" - " %lu %lu\n" - "InnoDB: is in the future! Current system " - "log sequence number %lu %lu.\n" - "InnoDB: Your database may be corrupt or " - "you may have copied the InnoDB\n" - "InnoDB: tablespace but not the InnoDB " - "log files. See\n" - "InnoDB: http://dev.mysql.com/doc/refman/" - "5.1/en/forcing-recovery.html\n" - "InnoDB: for more information.\n", - (ulong) mach_read_from_4(read_buf - + FIL_PAGE_OFFSET), - (ulong) ut_dulint_get_high - (mach_read_from_8(read_buf + FIL_PAGE_LSN)), - (ulong) ut_dulint_get_low - (mach_read_from_8(read_buf + FIL_PAGE_LSN)), - (ulong) ut_dulint_get_high(current_lsn), - (ulong) ut_dulint_get_low(current_lsn)); - } - } -#endif - - /* If we use checksums validation, make additional check before - returning TRUE to ensure that the checksum is not equal to - BUF_NO_CHECKSUM_MAGIC which might be stored by InnoDB with checksums - disabled. Otherwise, skip checksum calculation and return FALSE */ - - if (srv_use_checksums) { - old_checksum = buf_calc_page_old_checksum(read_buf); - - old_checksum_field = mach_read_from_4( - read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM); - - /* There are 2 valid formulas for old_checksum_field: - - 1. Very old versions of InnoDB only stored 8 byte lsn to the - start and the end of the page. - - 2. Newer InnoDB versions store the old formula checksum - there. */ - - if (old_checksum_field != mach_read_from_4(read_buf - + FIL_PAGE_LSN) - && old_checksum_field != old_checksum - && old_checksum_field != BUF_NO_CHECKSUM_MAGIC) { - - return(TRUE); - } - - checksum = buf_calc_page_new_checksum(read_buf); - checksum_field = mach_read_from_4(read_buf - + FIL_PAGE_SPACE_OR_CHKSUM); - - /* InnoDB versions < 4.0.14 and < 4.1.1 stored the space id - (always equal to 0), to FIL_PAGE_SPACE_SPACE_OR_CHKSUM */ - - if (checksum_field != 0 && checksum_field != checksum - && checksum_field != BUF_NO_CHECKSUM_MAGIC) { - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************ -Prints a page to stderr. */ - -void -buf_page_print( -/*===========*/ - byte* read_buf) /* in: a database page */ -{ - dict_index_t* index; - ulint checksum; - ulint old_checksum; - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Page dump in ascii and hex (%lu bytes):\n", - (ulint)UNIV_PAGE_SIZE); - ut_print_buf(stderr, read_buf, UNIV_PAGE_SIZE); - fputs("InnoDB: End of page dump\n", stderr); - - checksum = srv_use_checksums - ? buf_calc_page_new_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC; - old_checksum = srv_use_checksums - ? buf_calc_page_old_checksum(read_buf) : BUF_NO_CHECKSUM_MAGIC; - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Page checksum %lu, prior-to-4.0.14-form" - " checksum %lu\n" - "InnoDB: stored checksum %lu, prior-to-4.0.14-form" - " stored checksum %lu\n" - "InnoDB: Page lsn %lu %lu, low 4 bytes of lsn" - " at page end %lu\n" - "InnoDB: Page number (if stored to page already) %lu,\n" - "InnoDB: space id (if created with >= MySQL-4.1.1" - " and stored already) %lu\n", - (ulong) checksum, (ulong) old_checksum, - (ulong) mach_read_from_4(read_buf + FIL_PAGE_SPACE_OR_CHKSUM), - (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM), - (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN), - (ulong) mach_read_from_4(read_buf + FIL_PAGE_LSN + 4), - (ulong) mach_read_from_4(read_buf + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), - (ulong) mach_read_from_4(read_buf + FIL_PAGE_OFFSET), - (ulong) mach_read_from_4(read_buf - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID)); - - if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT) { - fprintf(stderr, - "InnoDB: Page may be an insert undo log page\n"); - } else if (mach_read_from_2(read_buf + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE) { - fprintf(stderr, - "InnoDB: Page may be an update undo log page\n"); - } - - switch (fil_page_get_type(read_buf)) { - case FIL_PAGE_INDEX: - fprintf(stderr, - "InnoDB: Page may be an index page where" - " index id is %lu %lu\n", - (ulong) ut_dulint_get_high - (btr_page_get_index_id(read_buf)), - (ulong) ut_dulint_get_low - (btr_page_get_index_id(read_buf))); - - /* If the code is in ibbackup, dict_sys may be uninitialized, - i.e., NULL */ - - if (dict_sys != NULL) { - - index = dict_index_find_on_id_low( - btr_page_get_index_id(read_buf)); - if (index) { - fputs("InnoDB: (", stderr); - dict_index_name_print(stderr, NULL, index); - fputs(")\n", stderr); - } - } - break; - case FIL_PAGE_INODE: - fputs("InnoDB: Page may be an 'inode' page\n", stderr); - break; - case FIL_PAGE_IBUF_FREE_LIST: - fputs("InnoDB: Page may be an insert buffer free list page\n", - stderr); - break; - case FIL_PAGE_TYPE_ALLOCATED: - fputs("InnoDB: Page may be a freshly allocated page\n", - stderr); - break; - case FIL_PAGE_IBUF_BITMAP: - fputs("InnoDB: Page may be an insert buffer bitmap page\n", - stderr); - break; - case FIL_PAGE_TYPE_SYS: - fputs("InnoDB: Page may be a system page\n", - stderr); - break; - case FIL_PAGE_TYPE_TRX_SYS: - fputs("InnoDB: Page may be a transaction system page\n", - stderr); - break; - case FIL_PAGE_TYPE_FSP_HDR: - fputs("InnoDB: Page may be a file space header page\n", - stderr); - break; - case FIL_PAGE_TYPE_XDES: - fputs("InnoDB: Page may be an extent descriptor page\n", - stderr); - break; - case FIL_PAGE_TYPE_BLOB: - fputs("InnoDB: Page may be a BLOB page\n", - stderr); - break; - } -} - -/************************************************************************ -Initializes a buffer control block when the buf_pool is created. */ -static -void -buf_block_init( -/*===========*/ - buf_block_t* block, /* in: pointer to control block */ - byte* frame) /* in: pointer to buffer frame, or NULL if in - the case of AWE there is no frame */ -{ - block->magic_n = 0; - - block->state = BUF_BLOCK_NOT_USED; - - block->frame = frame; - - block->awe_info = NULL; - - block->buf_fix_count = 0; - block->io_fix = 0; - - block->modify_clock = ut_dulint_zero; - - block->file_page_was_freed = FALSE; - - block->check_index_page_at_flush = FALSE; - block->index = NULL; - - block->in_free_list = FALSE; - block->in_LRU_list = FALSE; - - block->n_pointers = 0; - - mutex_create(&block->mutex, SYNC_BUF_BLOCK); - - rw_lock_create(&block->lock, SYNC_LEVEL_VARYING); - ut_ad(rw_lock_validate(&(block->lock))); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_create(&block->debug_latch, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ -} - -/************************************************************************ -Creates the buffer pool. */ - -buf_pool_t* -buf_pool_init( -/*==========*/ - /* out, own: buf_pool object, NULL if not - enough memory or error */ - ulint max_size, /* in: maximum size of the buf_pool in - blocks */ - ulint curr_size, /* in: current size to use, must be <= - max_size, currently must be equal to - max_size */ - ulint n_frames) /* in: number of frames; if AWE is used, - this is the size of the address space window - where physical memory pages are mapped; if - AWE is not used then this must be the same - as max_size */ -{ - byte* frame; - ulint i; - buf_block_t* block; - - ut_a(max_size == curr_size); - ut_a(srv_use_awe || n_frames == max_size); - - if (n_frames > curr_size) { - fprintf(stderr, - "InnoDB: AWE: Error: you must specify in my.cnf" - " .._awe_mem_mb larger\n" - "InnoDB: than .._buffer_pool_size. Now the former" - " is %lu pages,\n" - "InnoDB: the latter %lu pages.\n", - (ulong) curr_size, (ulong) n_frames); - - return(NULL); - } - - buf_pool = mem_alloc(sizeof(buf_pool_t)); - - /* 1. Initialize general fields - ---------------------------- */ - mutex_create(&buf_pool->mutex, SYNC_BUF_POOL); - - mutex_enter(&(buf_pool->mutex)); - - if (srv_use_awe) { - /*----------------------------------------*/ - /* Allocate the virtual address space window, i.e., the - buffer pool frames */ - - buf_pool->frame_mem = os_awe_allocate_virtual_mem_window( - UNIV_PAGE_SIZE * (n_frames + 1)); - - /* Allocate the physical memory for AWE and the AWE info array - for buf_pool */ - - if ((curr_size % ((1024 * 1024) / UNIV_PAGE_SIZE)) != 0) { - - fprintf(stderr, - "InnoDB: AWE: Error: physical memory must be" - " allocated in full megabytes.\n" - "InnoDB: Trying to allocate %lu" - " database pages.\n", - (ulong) curr_size); - - return(NULL); - } - - if (!os_awe_allocate_physical_mem(&(buf_pool->awe_info), - curr_size - / ((1024 * 1024) - / UNIV_PAGE_SIZE))) { - - return(NULL); - } - /*----------------------------------------*/ - } else { - buf_pool->frame_mem = os_mem_alloc_large( - UNIV_PAGE_SIZE * (n_frames + 1), TRUE, FALSE); - } - - if (buf_pool->frame_mem == NULL) { - - return(NULL); - } - - buf_pool->blocks = ut_malloc(sizeof(buf_block_t) * max_size); - - if (buf_pool->blocks == NULL) { - - return(NULL); - } - - buf_pool->max_size = max_size; - buf_pool->curr_size = curr_size; - - buf_pool->n_frames = n_frames; - - /* Align pointer to the first frame */ - - frame = ut_align(buf_pool->frame_mem, UNIV_PAGE_SIZE); - - buf_pool->frame_zero = frame; - buf_pool->high_end = frame + UNIV_PAGE_SIZE * n_frames; - - if (srv_use_awe) { - /*----------------------------------------*/ - /* Map an initial part of the allocated physical memory to - the window */ - - os_awe_map_physical_mem_to_window(buf_pool->frame_zero, - n_frames - * (UNIV_PAGE_SIZE - / OS_AWE_X86_PAGE_SIZE), - buf_pool->awe_info); - /*----------------------------------------*/ - } - - buf_pool->blocks_of_frames = ut_malloc(sizeof(void*) * n_frames); - - if (buf_pool->blocks_of_frames == NULL) { - - return(NULL); - } - - /* Init block structs and assign frames for them; in the case of - AWE there are less frames than blocks. Then we assign the frames - to the first blocks (we already mapped the memory above). We also - init the awe_info for every block. */ - - for (i = 0; i < max_size; i++) { - - block = buf_pool_get_nth_block(buf_pool, i); - - if (i < n_frames) { - frame = buf_pool->frame_zero + i * UNIV_PAGE_SIZE; - *(buf_pool->blocks_of_frames + i) = block; - } else { - frame = NULL; - } - - buf_block_init(block, frame); - - if (srv_use_awe) { - /*----------------------------------------*/ - block->awe_info = buf_pool->awe_info - + i * (UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE); - /*----------------------------------------*/ - } - } - - buf_pool->page_hash = hash_create(2 * max_size); - - buf_pool->n_pend_reads = 0; - - buf_pool->last_printout_time = time(NULL); - - buf_pool->n_pages_read = 0; - buf_pool->n_pages_written = 0; - buf_pool->n_pages_created = 0; - buf_pool->n_pages_awe_remapped = 0; - - buf_pool->n_page_gets = 0; - buf_pool->n_page_gets_old = 0; - buf_pool->n_pages_read_old = 0; - buf_pool->n_pages_written_old = 0; - buf_pool->n_pages_created_old = 0; - buf_pool->n_pages_awe_remapped_old = 0; - - /* 2. Initialize flushing fields - ---------------------------- */ - UT_LIST_INIT(buf_pool->flush_list); - - for (i = BUF_FLUSH_LRU; i <= BUF_FLUSH_LIST; i++) { - buf_pool->n_flush[i] = 0; - buf_pool->init_flush[i] = FALSE; - buf_pool->no_flush[i] = os_event_create(NULL); - } - - buf_pool->LRU_flush_ended = 0; - - buf_pool->ulint_clock = 1; - buf_pool->freed_page_clock = 0; - - /* 3. Initialize LRU fields - ---------------------------- */ - UT_LIST_INIT(buf_pool->LRU); - - buf_pool->LRU_old = NULL; - - UT_LIST_INIT(buf_pool->awe_LRU_free_mapped); - - /* Add control blocks to the free list */ - UT_LIST_INIT(buf_pool->free); - - for (i = 0; i < curr_size; i++) { - - block = buf_pool_get_nth_block(buf_pool, i); - - if (block->frame) { - /* Wipe contents of frame to eliminate a Purify - warning */ - -#ifdef HAVE_purify - memset(block->frame, '\0', UNIV_PAGE_SIZE); -#endif - if (srv_use_awe) { - /* Add to the list of blocks mapped to - frames */ - - UT_LIST_ADD_LAST(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, - block); - } - } - - UT_LIST_ADD_LAST(free, buf_pool->free, block); - block->in_free_list = TRUE; - } - - mutex_exit(&(buf_pool->mutex)); - - if (srv_use_adaptive_hash_indexes) { - btr_search_sys_create(curr_size * UNIV_PAGE_SIZE - / sizeof(void*) / 64); - } else { - /* Create only a small dummy system */ - btr_search_sys_create(1000); - } - - return(buf_pool); -} - -/************************************************************************ -Maps the page of block to a frame, if not mapped yet. Unmaps some page -from the end of the awe_LRU_free_mapped. */ - -void -buf_awe_map_page_to_frame( -/*======================*/ - buf_block_t* block, /* in: block whose page should be - mapped to a frame */ - ibool add_to_mapped_list) /* in: TRUE if we in the case - we need to map the page should also - add the block to the - awe_LRU_free_mapped list */ -{ - buf_block_t* bck; - - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(block); - - if (block->frame) { - - return; - } - - /* Scan awe_LRU_free_mapped from the end and try to find a block - which is not bufferfixed or io-fixed */ - - bck = UT_LIST_GET_LAST(buf_pool->awe_LRU_free_mapped); - - while (bck) { - ibool skip; - - mutex_enter(&bck->mutex); - - skip = (bck->state == BUF_BLOCK_FILE_PAGE - && (bck->buf_fix_count != 0 || bck->io_fix != 0)); - - if (skip) { - mutex_exit(&bck->mutex); - - /* We have to skip this */ - bck = UT_LIST_GET_PREV(awe_LRU_free_mapped, bck); - } else { - /* We can map block to the frame of bck */ - - os_awe_map_physical_mem_to_window( - bck->frame, - UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE, - block->awe_info); - - block->frame = bck->frame; - - *(buf_pool->blocks_of_frames - + (((ulint)(block->frame - - buf_pool->frame_zero)) - >> UNIV_PAGE_SIZE_SHIFT)) - = block; - - bck->frame = NULL; - UT_LIST_REMOVE(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, - bck); - - if (add_to_mapped_list) { - UT_LIST_ADD_FIRST( - awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, - block); - } - - buf_pool->n_pages_awe_remapped++; - - mutex_exit(&bck->mutex); - - return; - } - } - - fprintf(stderr, - "InnoDB: AWE: Fatal error: cannot find a page to unmap\n" - "InnoDB: awe_LRU_free_mapped list length %lu\n", - (ulong) UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped)); - - ut_a(0); -} - -/************************************************************************ -Allocates a buffer block. */ -UNIV_INLINE -buf_block_t* -buf_block_alloc(void) -/*=================*/ - /* out, own: the allocated block; also if AWE - is used it is guaranteed that the page is - mapped to a frame */ -{ - buf_block_t* block; - - block = buf_LRU_get_free_block(); - - return(block); -} - -/************************************************************************ -Moves to the block to the start of the LRU list if there is a danger -that the block would drift out of the buffer pool. */ -UNIV_INLINE -void -buf_block_make_young( -/*=================*/ - buf_block_t* block) /* in: block to make younger */ -{ - ut_ad(!mutex_own(&(buf_pool->mutex))); - - /* Note that we read freed_page_clock's without holding any mutex: - this is allowed since the result is used only in heuristics */ - - if (buf_block_peek_if_too_old(block)) { - - mutex_enter(&buf_pool->mutex); - /* There has been freeing activity in the LRU list: - best to move to the head of the LRU list */ - - buf_LRU_make_block_young(block); - mutex_exit(&buf_pool->mutex); - } -} - -/************************************************************************ -Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from from slipping out of -the buffer pool. */ - -void -buf_page_make_young( -/*================*/ - buf_frame_t* frame) /* in: buffer frame of a file page */ -{ - buf_block_t* block; - - mutex_enter(&(buf_pool->mutex)); - - block = buf_block_align(frame); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - buf_LRU_make_block_young(block); - - mutex_exit(&(buf_pool->mutex)); -} - -/************************************************************************ -Frees a buffer block which does not contain a file page. */ -UNIV_INLINE -void -buf_block_free( -/*===========*/ - buf_block_t* block) /* in, own: block to be freed */ -{ - mutex_enter(&(buf_pool->mutex)); - - mutex_enter(&block->mutex); - - ut_a(block->state != BUF_BLOCK_FILE_PAGE); - - buf_LRU_block_free_non_file_page(block); - - mutex_exit(&block->mutex); - - mutex_exit(&(buf_pool->mutex)); -} - -/************************************************************************* -Allocates a buffer frame. */ - -buf_frame_t* -buf_frame_alloc(void) -/*=================*/ - /* out: buffer frame */ -{ - return(buf_block_alloc()->frame); -} - -/************************************************************************* -Frees a buffer frame which does not contain a file page. */ - -void -buf_frame_free( -/*===========*/ - buf_frame_t* frame) /* in: buffer frame */ -{ - buf_block_free(buf_block_align(frame)); -} - -/************************************************************************ -Returns the buffer control block if the page can be found in the buffer -pool. NOTE that it is possible that the page is not yet read -from disk, though. This is a very low-level function: use with care! */ - -buf_block_t* -buf_page_peek_block( -/*================*/ - /* out: control block if found from page hash table, - otherwise NULL; NOTE that the page is not necessarily - yet read from disk! */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - - mutex_enter_fast(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - mutex_exit(&(buf_pool->mutex)); - - return(block); -} - -/************************************************************************ -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ - -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - - mutex_enter_fast(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - if (block) { - block->check_index_page_at_flush = FALSE; - } - - mutex_exit(&(buf_pool->mutex)); -} - -/************************************************************************ -Returns the current state of is_hashed of a page. FALSE if the page is -not in the pool. NOTE that this operation does not fix the page in the -pool if it is found there. */ - -ibool -buf_page_peek_if_search_hashed( -/*===========================*/ - /* out: TRUE if page hash index is built in search - system */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - ibool is_hashed; - - mutex_enter_fast(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - if (!block) { - is_hashed = FALSE; - } else { - is_hashed = block->is_hashed; - } - - mutex_exit(&(buf_pool->mutex)); - - return(is_hashed); -} - -/************************************************************************ -Returns TRUE if the page can be found in the buffer pool hash table. NOTE -that it is possible that the page is not yet read from disk, though. */ - -ibool -buf_page_peek( -/*==========*/ - /* out: TRUE if found from page hash table, - NOTE that the page is not necessarily yet read - from disk! */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - if (buf_page_peek_block(space, offset)) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************ -Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. */ - -buf_block_t* -buf_page_set_file_page_was_freed( -/*=============================*/ - /* out: control block if found from page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - - mutex_enter_fast(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - if (block) { - block->file_page_was_freed = TRUE; - } - - mutex_exit(&(buf_pool->mutex)); - - return(block); -} - -/************************************************************************ -Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. */ - -buf_block_t* -buf_page_reset_file_page_was_freed( -/*===============================*/ - /* out: control block if found from page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - - mutex_enter_fast(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - if (block) { - block->file_page_was_freed = FALSE; - } - - mutex_exit(&(buf_pool->mutex)); - - return(block); -} - -/************************************************************************ -This is the general function used to get access to a database page. */ - -buf_frame_t* -buf_page_get_gen( -/*=============*/ - /* out: pointer to the frame or NULL */ - ulint space, /* in: space id */ - ulint offset, /* in: page number */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - buf_frame_t* guess, /* in: guessed frame or NULL */ - ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH, BUF_GET_NOWAIT */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ -{ - buf_block_t* block; - ibool accessed; - ulint fix_type; - ibool success; - ibool must_read; - - ut_ad(mtr); - ut_ad((rw_latch == RW_S_LATCH) - || (rw_latch == RW_X_LATCH) - || (rw_latch == RW_NO_LATCH)); - ut_ad((mode != BUF_GET_NO_LATCH) || (rw_latch == RW_NO_LATCH)); - ut_ad((mode == BUF_GET) || (mode == BUF_GET_IF_IN_POOL) - || (mode == BUF_GET_NO_LATCH) || (mode == BUF_GET_NOWAIT)); -#ifndef UNIV_LOG_DEBUG - ut_ad(!ibuf_inside() || ibuf_page(space, offset)); -#endif - buf_pool->n_page_gets++; -loop: - block = NULL; - mutex_enter_fast(&(buf_pool->mutex)); - - if (guess) { - block = buf_block_align(guess); - - if ((offset != block->offset) || (space != block->space) - || (block->state != BUF_BLOCK_FILE_PAGE)) { - - block = NULL; - } - } - - if (block == NULL) { - block = buf_page_hash_get(space, offset); - } - - if (block == NULL) { - /* Page not in buf_pool: needs to be read from file */ - - mutex_exit(&(buf_pool->mutex)); - - if (mode == BUF_GET_IF_IN_POOL) { - - return(NULL); - } - - buf_read_page(space, offset); - -#ifdef UNIV_DEBUG - buf_dbg_counter++; - - if (buf_dbg_counter % 37 == 0) { - ut_ad(buf_validate()); - } -#endif - goto loop; - } - - mutex_enter(&block->mutex); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - must_read = FALSE; - - if (block->io_fix == BUF_IO_READ) { - - must_read = TRUE; - - if (mode == BUF_GET_IF_IN_POOL) { - /* The page is only being read to buffer */ - mutex_exit(&buf_pool->mutex); - mutex_exit(&block->mutex); - - return(NULL); - } - } - - /* If AWE is enabled and the page is not mapped to a frame, then - map it */ - - if (block->frame == NULL) { - ut_a(srv_use_awe); - - /* We set second parameter TRUE because the block is in the - LRU list and we must put it to awe_LRU_free_mapped list once - mapped to a frame */ - - buf_awe_map_page_to_frame(block, TRUE); - } - -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, file, line); -#else - buf_block_buf_fix_inc(block); -#endif - mutex_exit(&buf_pool->mutex); - - /* Check if this is the first access to the page */ - - accessed = block->accessed; - - block->accessed = TRUE; - - mutex_exit(&block->mutex); - - buf_block_make_young(block); - -#ifdef UNIV_DEBUG_FILE_ACCESSES - ut_a(block->file_page_was_freed == FALSE); -#endif - -#ifdef UNIV_DEBUG - buf_dbg_counter++; - - if (buf_dbg_counter % 5771 == 0) { - ut_ad(buf_validate()); - } -#endif - ut_ad(block->buf_fix_count > 0); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - - if (mode == BUF_GET_NOWAIT) { - if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - ut_ad(rw_latch == RW_X_LATCH); - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - if (!success) { - mutex_enter(&block->mutex); - - block->buf_fix_count--; - - mutex_exit(&block->mutex); -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - - return(NULL); - } - } else if (rw_latch == RW_NO_LATCH) { - - if (must_read) { - /* Let us wait until the read operation - completes */ - - for (;;) { - mutex_enter(&block->mutex); - - if (block->io_fix == BUF_IO_READ) { - - mutex_exit(&block->mutex); - - os_thread_sleep(WAIT_FOR_READ); - } else { - - mutex_exit(&block->mutex); - - break; - } - } - } - - fix_type = MTR_MEMO_BUF_FIX; - } else if (rw_latch == RW_S_LATCH) { - - rw_lock_s_lock_func(&(block->lock), 0, file, line); - - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - rw_lock_x_lock_func(&(block->lock), 0, file, line); - - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - mtr_memo_push(mtr, block, fix_type); - - if (!accessed) { - /* In the case of a first access, try to apply linear - read-ahead */ - - buf_read_ahead_linear(space, offset); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - return(block->frame); -} - -/************************************************************************ -This is the general function used to get optimistic access to a database -page. */ - -ibool -buf_page_optimistic_get_func( -/*=========================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /* in: guessed buffer block */ - buf_frame_t* guess, /* in: guessed frame; note that AWE may move - frames */ - dulint modify_clock,/* in: modify clock value if mode is - ..._GUESS_ON_CLOCK */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ -{ - ibool accessed; - ibool success; - ulint fix_type; - - ut_ad(mtr && block); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - /* If AWE is used, block may have a different frame now, e.g., NULL */ - - mutex_enter(&block->mutex); - - if (UNIV_UNLIKELY(block->state != BUF_BLOCK_FILE_PAGE) - || UNIV_UNLIKELY(block->frame != guess)) { - - mutex_exit(&block->mutex); - - return(FALSE); - } - -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, file, line); -#else - buf_block_buf_fix_inc(block); -#endif - accessed = block->accessed; - block->accessed = TRUE; - - mutex_exit(&block->mutex); - - buf_block_make_young(block); - - /* Check if this is the first access to the page */ - - ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset)); - - if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - if (UNIV_UNLIKELY(!success)) { - mutex_enter(&block->mutex); - - block->buf_fix_count--; - - mutex_exit(&block->mutex); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - return(FALSE); - } - - if (UNIV_UNLIKELY(!UT_DULINT_EQ(modify_clock, block->modify_clock))) { -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(block->frame, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - if (rw_latch == RW_S_LATCH) { - rw_lock_s_unlock(&(block->lock)); - } else { - rw_lock_x_unlock(&(block->lock)); - } - - mutex_enter(&block->mutex); - - block->buf_fix_count--; - - mutex_exit(&block->mutex); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - return(FALSE); - } - - mtr_memo_push(mtr, block, fix_type); - -#ifdef UNIV_DEBUG - buf_dbg_counter++; - - if (buf_dbg_counter % 5771 == 0) { - ut_ad(buf_validate()); - } -#endif - ut_ad(block->buf_fix_count > 0); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - -#ifdef UNIV_DEBUG_FILE_ACCESSES - ut_a(block->file_page_was_freed == FALSE); -#endif - if (UNIV_UNLIKELY(!accessed)) { - /* In the case of a first access, try to apply linear - read-ahead */ - - buf_read_ahead_linear(buf_frame_get_space_id(guess), - buf_frame_get_page_no(guess)); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - buf_pool->n_page_gets++; - - return(TRUE); -} - -/************************************************************************ -This is used to get access to a known database page, when no waiting can be -done. For example, if a search in an adaptive hash index leads us to this -frame. */ - -ibool -buf_page_get_known_nowait( -/*======================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_frame_t* guess, /* in: the known page frame */ - ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr) /* in: mini-transaction */ -{ - buf_block_t* block; - ibool success; - ulint fix_type; - - ut_ad(mtr); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - block = buf_block_align(guess); - - mutex_enter(&block->mutex); - - if (block->state == BUF_BLOCK_REMOVE_HASH) { - /* Another thread is just freeing the block from the LRU list - of the buffer pool: do not try to access this page; this - attempt to access the page can only come through the hash - index because when the buffer block state is ..._REMOVE_HASH, - we have already removed it from the page address hash table - of the buffer pool. */ - - mutex_exit(&block->mutex); - - return(FALSE); - } - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, file, line); -#else - buf_block_buf_fix_inc(block); -#endif - mutex_exit(&block->mutex); - - if (mode == BUF_MAKE_YOUNG) { - buf_block_make_young(block); - } - - ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD)); - - if (rw_latch == RW_S_LATCH) { - success = rw_lock_s_lock_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_S_FIX; - } else { - success = rw_lock_x_lock_func_nowait(&(block->lock), - file, line); - fix_type = MTR_MEMO_PAGE_X_FIX; - } - - if (!success) { - mutex_enter(&block->mutex); - - block->buf_fix_count--; - - mutex_exit(&block->mutex); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - - return(FALSE); - } - - mtr_memo_push(mtr, block, fix_type); - -#ifdef UNIV_DEBUG - buf_dbg_counter++; - - if (buf_dbg_counter % 5771 == 0) { - ut_ad(buf_validate()); - } -#endif - ut_ad(block->buf_fix_count > 0); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); -#ifdef UNIV_DEBUG_FILE_ACCESSES - ut_a(block->file_page_was_freed == FALSE); -#endif - -#ifdef UNIV_IBUF_DEBUG - ut_a((mode == BUF_KEEP_OLD) - || (ibuf_count_get(block->space, block->offset) == 0)); -#endif - buf_pool->n_page_gets++; - - return(TRUE); -} - -/************************************************************************ -Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ - -void -buf_page_init_for_backup_restore( -/*=============================*/ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - buf_block_t* block) /* in: block to init */ -{ - /* Set the state of the block */ - block->magic_n = BUF_BLOCK_MAGIC_N; - - block->state = BUF_BLOCK_FILE_PAGE; - block->space = space; - block->offset = offset; - - block->lock_hash_val = 0; - - block->freed_page_clock = 0; - - block->newest_modification = ut_dulint_zero; - block->oldest_modification = ut_dulint_zero; - - block->accessed = FALSE; - block->buf_fix_count = 0; - block->io_fix = 0; - - block->n_hash_helps = 0; - block->is_hashed = FALSE; - block->n_fields = 1; - block->n_bytes = 0; - block->left_side = TRUE; - - block->file_page_was_freed = FALSE; -} - -/************************************************************************ -Inits a page to the buffer buf_pool. */ -static -void -buf_page_init( -/*==========*/ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - buf_block_t* block) /* in: block to init */ -{ - - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&(block->mutex))); - ut_a(block->state != BUF_BLOCK_FILE_PAGE); - - /* Set the state of the block */ - block->magic_n = BUF_BLOCK_MAGIC_N; - - block->state = BUF_BLOCK_FILE_PAGE; - block->space = space; - block->offset = offset; - - block->check_index_page_at_flush = FALSE; - block->index = NULL; - - block->lock_hash_val = lock_rec_hash(space, offset); - -#ifdef UNIV_DEBUG_VALGRIND - if (!space) { - /* Silence valid Valgrind warnings about uninitialized - data being written to data files. There are some unused - bytes on some pages that InnoDB does not initialize. */ - UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); - } -#endif /* UNIV_DEBUG_VALGRIND */ - - /* Insert into the hash table of file pages */ - - if (buf_page_hash_get(space, offset)) { - fprintf(stderr, - "InnoDB: Error: page %lu %lu already found" - " in the hash table\n", - (ulong) space, - (ulong) offset); -#ifdef UNIV_DEBUG - buf_print(); - buf_LRU_print(); - buf_validate(); - buf_LRU_validate(); -#endif /* UNIV_DEBUG */ - ut_a(0); - } - - HASH_INSERT(buf_block_t, hash, buf_pool->page_hash, - buf_page_address_fold(space, offset), block); - - block->freed_page_clock = 0; - - block->newest_modification = ut_dulint_zero; - block->oldest_modification = ut_dulint_zero; - - block->accessed = FALSE; - block->buf_fix_count = 0; - block->io_fix = 0; - - block->n_hash_helps = 0; - block->is_hashed = FALSE; - block->n_fields = 1; - block->n_bytes = 0; - block->left_side = TRUE; - - block->file_page_was_freed = FALSE; -} - -/************************************************************************ -Function which inits a page for read to the buffer buf_pool. If the page is -(1) already in buf_pool, or -(2) if we specify to read only ibuf pages and the page is not an ibuf page, or -(3) if the space is deleted or being deleted, -then this function does nothing. -Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock -on the buffer frame. The io-handler must take care that the flag is cleared -and the lock released later. This is one of the functions which perform the -state transition NOT_USED => FILE_PAGE to a block (the other is -buf_page_create). */ - -buf_block_t* -buf_page_init_for_read( -/*===================*/ - /* out: pointer to the block or NULL */ - ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */ - ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */ - ulint space, /* in: space id */ - ib_longlong tablespace_version,/* in: prevents reading from a wrong - version of the tablespace in case we have done - DISCARD + IMPORT */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - mtr_t mtr; - - ut_ad(buf_pool); - - *err = DB_SUCCESS; - - if (mode == BUF_READ_IBUF_PAGES_ONLY) { - /* It is a read-ahead within an ibuf routine */ - - ut_ad(!ibuf_bitmap_page(offset)); - ut_ad(ibuf_inside()); - - mtr_start(&mtr); - - if (!ibuf_page_low(space, offset, &mtr)) { - - mtr_commit(&mtr); - - return(NULL); - } - } else { - ut_ad(mode == BUF_READ_ANY_PAGE); - } - - block = buf_block_alloc(); - - ut_a(block); - - mutex_enter(&(buf_pool->mutex)); - mutex_enter(&block->mutex); - - if (fil_tablespace_deleted_or_being_deleted_in_mem( - space, tablespace_version)) { - *err = DB_TABLESPACE_DELETED; - } - - if (*err == DB_TABLESPACE_DELETED - || NULL != buf_page_hash_get(space, offset)) { - - /* The page belongs to a space which has been - deleted or is being deleted, or the page is - already in buf_pool, return */ - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - buf_block_free(block); - - if (mode == BUF_READ_IBUF_PAGES_ONLY) { - - mtr_commit(&mtr); - } - - return(NULL); - } - - ut_ad(block); - - buf_page_init(space, offset, block); - - /* The block must be put to the LRU list, to the old blocks */ - - buf_LRU_add_block(block, TRUE); /* TRUE == to old blocks */ - - block->io_fix = BUF_IO_READ; - - buf_pool->n_pend_reads++; - - /* We set a pass-type x-lock on the frame because then the same - thread which called for the read operation (and is running now at - this point of code) can wait for the read to complete by waiting - for the x-lock on the frame; if the x-lock were recursive, the - same thread would illegally get the x-lock before the page read - is completed. The x-lock is cleared by the io-handler thread. */ - - rw_lock_x_lock_gen(&(block->lock), BUF_IO_READ); - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - if (mode == BUF_READ_IBUF_PAGES_ONLY) { - - mtr_commit(&mtr); - } - - return(block); -} - -/************************************************************************ -Initializes a page to the buffer buf_pool. The page is usually not read -from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_init_for_read above). */ - -buf_frame_t* -buf_page_create( -/*============*/ - /* out: pointer to the frame, page bufferfixed */ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space in units of - a page */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - buf_frame_t* frame; - buf_block_t* block; - buf_block_t* free_block = NULL; - - ut_ad(mtr); - - free_block = buf_LRU_get_free_block(); - - mutex_enter(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - if (block != NULL) { -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - block->file_page_was_freed = FALSE; - - /* Page can be found in buf_pool */ - mutex_exit(&(buf_pool->mutex)); - - buf_block_free(free_block); - - frame = buf_page_get_with_no_latch(space, offset, mtr); - - return(frame); - } - - /* If we get here, the page was not in buf_pool: init it there */ - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, "Creating space %lu page %lu to buffer\n", - (ulong) space, (ulong) offset); - } -#endif /* UNIV_DEBUG */ - - block = free_block; - - mutex_enter(&block->mutex); - - buf_page_init(space, offset, block); - - /* The block must be put to the LRU list */ - buf_LRU_add_block(block, FALSE); - -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__); -#else - buf_block_buf_fix_inc(block); -#endif - buf_pool->n_pages_created++; - - mutex_exit(&(buf_pool->mutex)); - - mtr_memo_push(mtr, block, MTR_MEMO_BUF_FIX); - - block->accessed = TRUE; - - mutex_exit(&block->mutex); - - /* Delete possible entries for the page from the insert buffer: - such can exist if the page belonged to an index which was dropped */ - - ibuf_merge_or_delete_for_page(NULL, space, offset, TRUE); - - /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(); - - frame = block->frame; - - memset(frame + FIL_PAGE_PREV, 0xff, 4); - memset(frame + FIL_PAGE_NEXT, 0xff, 4); - mach_write_to_2(frame + FIL_PAGE_TYPE, FIL_PAGE_TYPE_ALLOCATED); - - /* Reset to zero the file flush lsn field in the page; if the first - page of an ibdata file is 'created' in this function into the buffer - pool then we lose the original contents of the file flush lsn stamp. - Then InnoDB could in a crash recovery print a big, false, corruption - warning if the stamp contains an lsn bigger than the ib_logfile lsn. */ - - memset(frame + FIL_PAGE_FILE_FLUSH_LSN, 0, 8); - -#ifdef UNIV_DEBUG - buf_dbg_counter++; - - if (buf_dbg_counter % 357 == 0) { - ut_ad(buf_validate()); - } -#endif -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - return(frame); -} - -/************************************************************************ -Completes an asynchronous read or write request of a file page to or from -the buffer pool. */ - -void -buf_page_io_complete( -/*=================*/ - buf_block_t* block) /* in: pointer to the block in question */ -{ - ulint io_type; - - ut_ad(block); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - /* We do not need protect block->io_fix here by block->mutex to read - it because this is the only function where we can change the value - from BUF_IO_READ or BUF_IO_WRITE to some other value, and our code - ensures that this is the only thread that handles the i/o for this - block. */ - - io_type = block->io_fix; - - if (io_type == BUF_IO_READ) { - /* If this page is not uninitialized and not in the - doublewrite buffer, then the page number and space id - should be the same as in block. */ - ulint read_page_no = mach_read_from_4( - block->frame + FIL_PAGE_OFFSET); - ulint read_space_id = mach_read_from_4( - block->frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - if (!block->space - && trx_doublewrite_page_inside(block->offset)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: reading page %lu\n" - "InnoDB: which is in the" - " doublewrite buffer!\n", - (ulong) block->offset); - } else if (!read_space_id && !read_page_no) { - /* This is likely an uninitialized page. */ - } else if ((block->space && block->space != read_space_id) - || block->offset != read_page_no) { - /* We did not compare space_id to read_space_id - if block->space == 0, because the field on the - page may contain garbage in MySQL < 4.1.1, - which only supported block->space == 0. */ - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: space id and page n:o" - " stored in the page\n" - "InnoDB: read in are %lu:%lu," - " should be %lu:%lu!\n", - (ulong) read_space_id, (ulong) read_page_no, - (ulong) block->space, (ulong) block->offset); - } - /* From version 3.23.38 up we store the page checksum - to the 4 first bytes of the page end lsn field */ - - if (buf_page_is_corrupted(block->frame)) { - fprintf(stderr, - "InnoDB: Database page corruption on disk" - " or a failed\n" - "InnoDB: file read of page %lu.\n", - (ulong) block->offset); - - fputs("InnoDB: You may have to recover" - " from a backup.\n", stderr); - - buf_page_print(block->frame); - - fprintf(stderr, - "InnoDB: Database page corruption on disk" - " or a failed\n" - "InnoDB: file read of page %lu.\n", - (ulong) block->offset); - fputs("InnoDB: You may have to recover" - " from a backup.\n", stderr); - fputs("InnoDB: It is also possible that" - " your operating\n" - "InnoDB: system has corrupted its" - " own file cache\n" - "InnoDB: and rebooting your computer" - " removes the\n" - "InnoDB: error.\n" - "InnoDB: If the corrupt page is an index page\n" - "InnoDB: you can also try to" - " fix the corruption\n" - "InnoDB: by dumping, dropping," - " and reimporting\n" - "InnoDB: the corrupt table." - " You can use CHECK\n" - "InnoDB: TABLE to scan your" - " table for corruption.\n" - "InnoDB: See also" - " http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); - - if (srv_force_recovery < SRV_FORCE_IGNORE_CORRUPT) { - fputs("InnoDB: Ending processing because of" - " a corrupt database page.\n", - stderr); - exit(1); - } - } - - if (recv_recovery_is_on()) { - recv_recover_page(FALSE, TRUE, block->frame, - block->space, block->offset); - } - - if (!recv_no_ibuf_operations) { - ibuf_merge_or_delete_for_page( - block->frame, block->space, block->offset, - TRUE); - } - } - - mutex_enter(&(buf_pool->mutex)); - mutex_enter(&block->mutex); - -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - /* Because this thread which does the unlocking is not the same that - did the locking, we use a pass value != 0 in unlock, which simply - removes the newest lock debug record, without checking the thread - id. */ - - block->io_fix = 0; - - if (io_type == BUF_IO_READ) { - /* NOTE that the call to ibuf may have moved the ownership of - the x-latch to this OS thread: do not let this confuse you in - debugging! */ - - ut_ad(buf_pool->n_pend_reads > 0); - buf_pool->n_pend_reads--; - buf_pool->n_pages_read++; - - rw_lock_x_unlock_gen(&(block->lock), BUF_IO_READ); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fputs("Has read ", stderr); - } -#endif /* UNIV_DEBUG */ - } else { - ut_ad(io_type == BUF_IO_WRITE); - - /* Write means a flush operation: call the completion - routine in the flush system */ - - buf_flush_write_complete(block); - - rw_lock_s_unlock_gen(&(block->lock), BUF_IO_WRITE); - - buf_pool->n_pages_written++; - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fputs("Has written ", stderr); - } -#endif /* UNIV_DEBUG */ - } - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, "page space %lu page no %lu\n", - (ulong) block->space, (ulong) block->offset); - } -#endif /* UNIV_DEBUG */ -} - -/************************************************************************* -Invalidates the file pages in the buffer pool when an archive recovery is -completed. All the file pages buffered must be in a replaceable state when -this function is called: not latched and not modified. */ - -void -buf_pool_invalidate(void) -/*=====================*/ -{ - ibool freed; - - ut_ad(buf_all_freed()); - - freed = TRUE; - - while (freed) { - freed = buf_LRU_search_and_free_block(100); - } - - mutex_enter(&(buf_pool->mutex)); - - ut_ad(UT_LIST_GET_LEN(buf_pool->LRU) == 0); - - mutex_exit(&(buf_pool->mutex)); -} - -#ifdef UNIV_DEBUG -/************************************************************************* -Validates the buffer buf_pool data structure. */ - -ibool -buf_validate(void) -/*==============*/ -{ - buf_block_t* block; - ulint i; - ulint n_single_flush = 0; - ulint n_lru_flush = 0; - ulint n_list_flush = 0; - ulint n_lru = 0; - ulint n_flush = 0; - ulint n_free = 0; - ulint n_page = 0; - - ut_ad(buf_pool); - - mutex_enter(&(buf_pool->mutex)); - - for (i = 0; i < buf_pool->curr_size; i++) { - - block = buf_pool_get_nth_block(buf_pool, i); - - mutex_enter(&block->mutex); - - if (block->state == BUF_BLOCK_FILE_PAGE) { - - ut_a(buf_page_hash_get(block->space, - block->offset) == block); - n_page++; - -#ifdef UNIV_IBUF_DEBUG - ut_a((block->io_fix == BUF_IO_READ) - || ibuf_count_get(block->space, block->offset) - == 0); -#endif - if (block->io_fix == BUF_IO_WRITE) { - - if (block->flush_type == BUF_FLUSH_LRU) { - n_lru_flush++; - ut_a(rw_lock_is_locked( - &block->lock, - RW_LOCK_SHARED)); - } else if (block->flush_type - == BUF_FLUSH_LIST) { - n_list_flush++; - } else if (block->flush_type - == BUF_FLUSH_SINGLE_PAGE) { - n_single_flush++; - } else { - ut_error; - } - - } else if (block->io_fix == BUF_IO_READ) { - - ut_a(rw_lock_is_locked(&(block->lock), - RW_LOCK_EX)); - } - - n_lru++; - - if (ut_dulint_cmp(block->oldest_modification, - ut_dulint_zero) > 0) { - n_flush++; - } - - } else if (block->state == BUF_BLOCK_NOT_USED) { - n_free++; - } - - mutex_exit(&block->mutex); - } - - if (n_lru + n_free > buf_pool->curr_size) { - fprintf(stderr, "n LRU %lu, n free %lu\n", - (ulong) n_lru, (ulong) n_free); - ut_error; - } - - ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == n_lru); - if (UT_LIST_GET_LEN(buf_pool->free) != n_free) { - fprintf(stderr, "Free list len %lu, free blocks %lu\n", - (ulong) UT_LIST_GET_LEN(buf_pool->free), - (ulong) n_free); - ut_error; - } - ut_a(UT_LIST_GET_LEN(buf_pool->flush_list) == n_flush); - - ut_a(buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE] == n_single_flush); - ut_a(buf_pool->n_flush[BUF_FLUSH_LIST] == n_list_flush); - ut_a(buf_pool->n_flush[BUF_FLUSH_LRU] == n_lru_flush); - - mutex_exit(&(buf_pool->mutex)); - - ut_a(buf_LRU_validate()); - ut_a(buf_flush_validate()); - - return(TRUE); -} - -/************************************************************************* -Prints info of the buffer buf_pool data structure. */ - -void -buf_print(void) -/*===========*/ -{ - dulint* index_ids; - ulint* counts; - ulint size; - ulint i; - ulint j; - dulint id; - ulint n_found; - buf_frame_t* frame; - dict_index_t* index; - - ut_ad(buf_pool); - - size = buf_pool->curr_size; - - index_ids = mem_alloc(sizeof(dulint) * size); - counts = mem_alloc(sizeof(ulint) * size); - - mutex_enter(&(buf_pool->mutex)); - - fprintf(stderr, - "buf_pool size %lu\n" - "database pages %lu\n" - "free pages %lu\n" - "modified database pages %lu\n" - "n pending reads %lu\n" - "n pending flush LRU %lu list %lu single page %lu\n" - "pages read %lu, created %lu, written %lu\n", - (ulong) size, - (ulong) UT_LIST_GET_LEN(buf_pool->LRU), - (ulong) UT_LIST_GET_LEN(buf_pool->free), - (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), - (ulong) buf_pool->n_pend_reads, - (ulong) buf_pool->n_flush[BUF_FLUSH_LRU], - (ulong) buf_pool->n_flush[BUF_FLUSH_LIST], - (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE], - (ulong) buf_pool->n_pages_read, buf_pool->n_pages_created, - (ulong) buf_pool->n_pages_written); - - /* Count the number of blocks belonging to each index in the buffer */ - - n_found = 0; - - for (i = 0; i < size; i++) { - frame = buf_pool_get_nth_block(buf_pool, i)->frame; - - if (fil_page_get_type(frame) == FIL_PAGE_INDEX) { - - id = btr_page_get_index_id(frame); - - /* Look for the id in the index_ids array */ - j = 0; - - while (j < n_found) { - - if (ut_dulint_cmp(index_ids[j], id) == 0) { - (counts[j])++; - - break; - } - j++; - } - - if (j == n_found) { - n_found++; - index_ids[j] = id; - counts[j] = 1; - } - } - } - - mutex_exit(&(buf_pool->mutex)); - - for (i = 0; i < n_found; i++) { - index = dict_index_get_if_in_cache(index_ids[i]); - - fprintf(stderr, - "Block count for index %lu in buffer is about %lu", - (ulong) ut_dulint_get_low(index_ids[i]), - (ulong) counts[i]); - - if (index) { - putc(' ', stderr); - dict_index_name_print(stderr, NULL, index); - } - - putc('\n', stderr); - } - - mem_free(index_ids); - mem_free(counts); - - ut_a(buf_validate()); -} - -/************************************************************************* -Returns the number of latched pages in the buffer pool. */ - -ulint -buf_get_latched_pages_number(void) -{ - buf_block_t* block; - ulint i; - ulint fixed_pages_number = 0; - - mutex_enter(&(buf_pool->mutex)); - - for (i = 0; i < buf_pool->curr_size; i++) { - - block = buf_pool_get_nth_block(buf_pool, i); - - if (block->magic_n == BUF_BLOCK_MAGIC_N) { - mutex_enter(&block->mutex); - - if (block->buf_fix_count != 0 || block->io_fix != 0) { - fixed_pages_number++; - } - - mutex_exit(&block->mutex); - } - } - - mutex_exit(&(buf_pool->mutex)); - - return(fixed_pages_number); -} -#endif /* UNIV_DEBUG */ - -/************************************************************************* -Returns the number of pending buf pool ios. */ - -ulint -buf_get_n_pending_ios(void) -/*=======================*/ -{ - return(buf_pool->n_pend_reads - + buf_pool->n_flush[BUF_FLUSH_LRU] - + buf_pool->n_flush[BUF_FLUSH_LIST] - + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); -} - -/************************************************************************* -Returns the ratio in percents of modified pages in the buffer pool / -database pages in the buffer pool. */ - -ulint -buf_get_modified_ratio_pct(void) -/*============================*/ -{ - ulint ratio; - - mutex_enter(&(buf_pool->mutex)); - - ratio = (100 * UT_LIST_GET_LEN(buf_pool->flush_list)) - / (1 + UT_LIST_GET_LEN(buf_pool->LRU) - + UT_LIST_GET_LEN(buf_pool->free)); - - /* 1 + is there to avoid division by zero */ - - mutex_exit(&(buf_pool->mutex)); - - return(ratio); -} - -/************************************************************************* -Prints info of the buffer i/o. */ - -void -buf_print_io( -/*=========*/ - FILE* file) /* in/out: buffer where to print */ -{ - time_t current_time; - double time_elapsed; - ulint size; - - ut_ad(buf_pool); - size = buf_pool->curr_size; - - mutex_enter(&(buf_pool->mutex)); - - if (srv_use_awe) { - fprintf(stderr, - "AWE: Buffer pool memory frames %lu\n", - (ulong) buf_pool->n_frames); - - fprintf(stderr, - "AWE: Database pages and free buffers" - " mapped in frames %lu\n", - (ulong) - UT_LIST_GET_LEN(buf_pool->awe_LRU_free_mapped)); - } - fprintf(file, - "Buffer pool size %lu\n" - "Free buffers %lu\n" - "Database pages %lu\n" - "Modified db pages %lu\n" - "Pending reads %lu\n" - "Pending writes: LRU %lu, flush list %lu, single page %lu\n", - (ulong) size, - (ulong) UT_LIST_GET_LEN(buf_pool->free), - (ulong) UT_LIST_GET_LEN(buf_pool->LRU), - (ulong) UT_LIST_GET_LEN(buf_pool->flush_list), - (ulong) buf_pool->n_pend_reads, - (ulong) buf_pool->n_flush[BUF_FLUSH_LRU] - + buf_pool->init_flush[BUF_FLUSH_LRU], - (ulong) buf_pool->n_flush[BUF_FLUSH_LIST] - + buf_pool->init_flush[BUF_FLUSH_LIST], - (ulong) buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]); - - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, - buf_pool->last_printout_time); - buf_pool->last_printout_time = current_time; - - fprintf(file, - "Pages read %lu, created %lu, written %lu\n" - "%.2f reads/s, %.2f creates/s, %.2f writes/s\n", - (ulong) buf_pool->n_pages_read, - (ulong) buf_pool->n_pages_created, - (ulong) buf_pool->n_pages_written, - (buf_pool->n_pages_read - buf_pool->n_pages_read_old) - / time_elapsed, - (buf_pool->n_pages_created - buf_pool->n_pages_created_old) - / time_elapsed, - (buf_pool->n_pages_written - buf_pool->n_pages_written_old) - / time_elapsed); - - if (srv_use_awe) { - fprintf(file, "AWE: %.2f page remaps/s\n", - (buf_pool->n_pages_awe_remapped - - buf_pool->n_pages_awe_remapped_old) - / time_elapsed); - } - - if (buf_pool->n_page_gets > buf_pool->n_page_gets_old) { - fprintf(file, "Buffer pool hit rate %lu / 1000\n", - (ulong) - (1000 - ((1000 * (buf_pool->n_pages_read - - buf_pool->n_pages_read_old)) - / (buf_pool->n_page_gets - - buf_pool->n_page_gets_old)))); - } else { - fputs("No buffer pool page gets since the last printout\n", - file); - } - - buf_pool->n_page_gets_old = buf_pool->n_page_gets; - buf_pool->n_pages_read_old = buf_pool->n_pages_read; - buf_pool->n_pages_created_old = buf_pool->n_pages_created; - buf_pool->n_pages_written_old = buf_pool->n_pages_written; - buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped; - - mutex_exit(&(buf_pool->mutex)); -} - -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -buf_refresh_io_stats(void) -/*======================*/ -{ - buf_pool->last_printout_time = time(NULL); - buf_pool->n_page_gets_old = buf_pool->n_page_gets; - buf_pool->n_pages_read_old = buf_pool->n_pages_read; - buf_pool->n_pages_created_old = buf_pool->n_pages_created; - buf_pool->n_pages_written_old = buf_pool->n_pages_written; - buf_pool->n_pages_awe_remapped_old = buf_pool->n_pages_awe_remapped; -} - -/************************************************************************* -Checks that all file pages in the buffer are in a replaceable state. */ - -ibool -buf_all_freed(void) -/*===============*/ -{ - buf_block_t* block; - ulint i; - - ut_ad(buf_pool); - - mutex_enter(&(buf_pool->mutex)); - - for (i = 0; i < buf_pool->curr_size; i++) { - - block = buf_pool_get_nth_block(buf_pool, i); - - mutex_enter(&block->mutex); - - if (block->state == BUF_BLOCK_FILE_PAGE) { - - if (!buf_flush_ready_for_replace(block)) { - - fprintf(stderr, - "Page %lu %lu still fixed or dirty\n", - (ulong) block->space, - (ulong) block->offset); - ut_error; - } - } - - mutex_exit(&block->mutex); - } - - mutex_exit(&(buf_pool->mutex)); - - return(TRUE); -} - -/************************************************************************* -Checks that there currently are no pending i/o-operations for the buffer -pool. */ - -ibool -buf_pool_check_no_pending_io(void) -/*==============================*/ - /* out: TRUE if there is no pending i/o */ -{ - ibool ret; - - mutex_enter(&(buf_pool->mutex)); - - if (buf_pool->n_pend_reads + buf_pool->n_flush[BUF_FLUSH_LRU] - + buf_pool->n_flush[BUF_FLUSH_LIST] - + buf_pool->n_flush[BUF_FLUSH_SINGLE_PAGE]) { - ret = FALSE; - } else { - ret = TRUE; - } - - mutex_exit(&(buf_pool->mutex)); - - return(ret); -} - -/************************************************************************* -Gets the current length of the free list of buffer blocks. */ - -ulint -buf_get_free_list_len(void) -/*=======================*/ -{ - ulint len; - - mutex_enter(&(buf_pool->mutex)); - - len = UT_LIST_GET_LEN(buf_pool->free); - - mutex_exit(&(buf_pool->mutex)); - - return(len); -} diff --git a/storage/innobase/buf/buf0flu.c b/storage/innobase/buf/buf0flu.c deleted file mode 100644 index 423c08c0569..00000000000 --- a/storage/innobase/buf/buf0flu.c +++ /dev/null @@ -1,1115 +0,0 @@ -/****************************************************** -The database buffer buf_pool flush algorithm - -(c) 1995-2001 Innobase Oy - -Created 11/11/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0flu.h" - -#ifdef UNIV_NONINL -#include "buf0flu.ic" -#include "trx0sys.h" -#endif - -#include "ut0byte.h" -#include "ut0lst.h" -#include "page0page.h" -#include "fil0fil.h" -#include "buf0buf.h" -#include "buf0lru.h" -#include "buf0rea.h" -#include "ibuf0ibuf.h" -#include "log0log.h" -#include "os0file.h" -#include "trx0sys.h" -#include "srv0srv.h" - -/* When flushed, dirty blocks are searched in neighborhoods of this size, and -flushed along with the original page. */ - -#define BUF_FLUSH_AREA ut_min(BUF_READ_AHEAD_AREA,\ - buf_pool->curr_size / 16) - -/********************************************************************** -Validates the flush list. */ -static -ibool -buf_flush_validate_low(void); -/*========================*/ - /* out: TRUE if ok */ - -/************************************************************************ -Inserts a modified block into the flush list. */ - -void -buf_flush_insert_into_flush_list( -/*=============================*/ - buf_block_t* block) /* in: block which is modified */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - ut_ad((UT_LIST_GET_FIRST(buf_pool->flush_list) == NULL) - || (ut_dulint_cmp((UT_LIST_GET_FIRST(buf_pool->flush_list)) - ->oldest_modification, - block->oldest_modification) <= 0)); - - UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block); - - ut_ad(buf_flush_validate_low()); -} - -/************************************************************************ -Inserts a modified block into the flush list in the right sorted position. -This function is used by recovery, because there the modifications do not -necessarily come in the order of lsn's. */ - -void -buf_flush_insert_sorted_into_flush_list( -/*====================================*/ - buf_block_t* block) /* in: block which is modified */ -{ - buf_block_t* prev_b; - buf_block_t* b; - - ut_ad(mutex_own(&(buf_pool->mutex))); - - prev_b = NULL; - b = UT_LIST_GET_FIRST(buf_pool->flush_list); - - while (b && (ut_dulint_cmp(b->oldest_modification, - block->oldest_modification) > 0)) { - prev_b = b; - b = UT_LIST_GET_NEXT(flush_list, b); - } - - if (prev_b == NULL) { - UT_LIST_ADD_FIRST(flush_list, buf_pool->flush_list, block); - } else { - UT_LIST_INSERT_AFTER(flush_list, buf_pool->flush_list, prev_b, - block); - } - - ut_ad(buf_flush_validate_low()); -} - -/************************************************************************ -Returns TRUE if the file page block is immediately suitable for replacement, -i.e., the transition FILE_PAGE => NOT_USED allowed. */ - -ibool -buf_flush_ready_for_replace( -/*========================*/ - /* out: TRUE if can replace immediately */ - buf_block_t* block) /* in: buffer control block, must be in state - BUF_BLOCK_FILE_PAGE and in the LRU list */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&block->mutex)); - if (block->state != BUF_BLOCK_FILE_PAGE) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: buffer block state %lu" - " in the LRU list!\n", - (ulong)block->state); - ut_print_buf(stderr, block, sizeof(buf_block_t)); - - return(FALSE); - } - - if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) - || (block->buf_fix_count != 0) - || (block->io_fix != 0)) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************ -Returns TRUE if the block is modified and ready for flushing. */ -UNIV_INLINE -ibool -buf_flush_ready_for_flush( -/*======================*/ - /* out: TRUE if can flush immediately */ - buf_block_t* block, /* in: buffer control block, must be in state - BUF_BLOCK_FILE_PAGE */ - ulint flush_type)/* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&(block->mutex))); - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if ((ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) > 0) - && (block->io_fix == 0)) { - if (flush_type != BUF_FLUSH_LRU) { - - return(TRUE); - - } else if (block->buf_fix_count == 0) { - - /* If we are flushing the LRU list, to avoid deadlocks - we require the block not to be bufferfixed, and hence - not latched. */ - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************ -Updates the flush system data structures when a write is completed. */ - -void -buf_flush_write_complete( -/*=====================*/ - buf_block_t* block) /* in: pointer to the block in question */ -{ - ut_ad(block); -#ifdef UNIV_SYNC_DEBUG - ut_ad(mutex_own(&(buf_pool->mutex))); -#endif /* UNIV_SYNC_DEBUG */ - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - block->oldest_modification = ut_dulint_zero; - - UT_LIST_REMOVE(flush_list, buf_pool->flush_list, block); - - ut_d(UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list)); - - (buf_pool->n_flush[block->flush_type])--; - - if (block->flush_type == BUF_FLUSH_LRU) { - /* Put the block to the end of the LRU list to wait to be - moved to the free list */ - - buf_LRU_make_block_old(block); - - buf_pool->LRU_flush_ended++; - } - - /* fprintf(stderr, "n pending flush %lu\n", - buf_pool->n_flush[block->flush_type]); */ - - if ((buf_pool->n_flush[block->flush_type] == 0) - && (buf_pool->init_flush[block->flush_type] == FALSE)) { - - /* The running flush batch has ended */ - - os_event_set(buf_pool->no_flush[block->flush_type]); - } -} - -/************************************************************************ -Flushes possible buffered writes from the doublewrite memory buffer to disk, -and also wakes up the aio thread if simulated aio is used. It is very -important to call this function after a batch of writes has been posted, -and also when we may have to wait for a page latch! Otherwise a deadlock -of threads can occur. */ -static -void -buf_flush_buffered_writes(void) -/*===========================*/ -{ - buf_block_t* block; - byte* write_buf; - ulint len; - ulint len2; - ulint i; - - if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) { - os_aio_simulated_wake_handler_threads(); - - return; - } - - mutex_enter(&(trx_doublewrite->mutex)); - - /* Write first to doublewrite buffer blocks. We use synchronous - aio and thus know that file write has been completed when the - control returns. */ - - if (trx_doublewrite->first_free == 0) { - - mutex_exit(&(trx_doublewrite->mutex)); - - return; - } - - for (i = 0; i < trx_doublewrite->first_free; i++) { - - block = trx_doublewrite->buf_block_arr[i]; - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4) - != mach_read_from_4(block->frame + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be written" - " seems corrupt!\n" - "InnoDB: The lsn fields do not match!" - " Noticed in the buffer pool\n" - "InnoDB: before posting to the" - " doublewrite buffer.\n"); - } - - if (block->check_index_page_at_flush - && !page_simple_validate(block->frame)) { - - buf_page_print(block->frame); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Apparent corruption of an" - " index page n:o %lu in space %lu\n" - "InnoDB: to be written to data file." - " We intentionally crash server\n" - "InnoDB: to prevent corrupt data" - " from ending up in data\n" - "InnoDB: files.\n", - (ulong) block->offset, (ulong) block->space); - - ut_error; - } - } - - /* increment the doublewrite flushed pages counter */ - srv_dblwr_pages_written+= trx_doublewrite->first_free; - srv_dblwr_writes++; - - if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - len = TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - } else { - len = trx_doublewrite->first_free * UNIV_PAGE_SIZE; - } - - fil_io(OS_FILE_WRITE, - TRUE, TRX_SYS_SPACE, - trx_doublewrite->block1, 0, len, - (void*)trx_doublewrite->write_buf, NULL); - - write_buf = trx_doublewrite->write_buf; - - for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; len2 += UNIV_PAGE_SIZE) { - if (mach_read_from_4(write_buf + len2 + FIL_PAGE_LSN + 4) - != mach_read_from_4(write_buf + len2 + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be written" - " seems corrupt!\n" - "InnoDB: The lsn fields do not match!" - " Noticed in the doublewrite block1.\n"); - } - } - - if (trx_doublewrite->first_free > TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - len = (trx_doublewrite->first_free - - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE; - - fil_io(OS_FILE_WRITE, - TRUE, TRX_SYS_SPACE, - trx_doublewrite->block2, 0, len, - (void*)(trx_doublewrite->write_buf - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE - * UNIV_PAGE_SIZE), - NULL); - - write_buf = trx_doublewrite->write_buf - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE; - for (len2 = 0; len2 + UNIV_PAGE_SIZE <= len; - len2 += UNIV_PAGE_SIZE) { - if (mach_read_from_4(write_buf + len2 - + FIL_PAGE_LSN + 4) - != mach_read_from_4(write_buf + len2 - + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM - + 4)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be" - " written seems corrupt!\n" - "InnoDB: The lsn fields do not match!" - " Noticed in" - " the doublewrite block2.\n"); - } - } - } - - /* Now flush the doublewrite buffer data to disk */ - - fil_flush(TRX_SYS_SPACE); - - /* We know that the writes have been flushed to disk now - and in recovery we will find them in the doublewrite buffer - blocks. Next do the writes to the intended positions. */ - - for (i = 0; i < trx_doublewrite->first_free; i++) { - block = trx_doublewrite->buf_block_arr[i]; - - if (mach_read_from_4(block->frame + FIL_PAGE_LSN + 4) - != mach_read_from_4(block->frame + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be written" - " seems corrupt!\n" - "InnoDB: The lsn fields do not match!" - " Noticed in the buffer pool\n" - "InnoDB: after posting and flushing" - " the doublewrite buffer.\n" - "InnoDB: Page buf fix count %lu," - " io fix %lu, state %lu\n", - (ulong)block->buf_fix_count, - (ulong)block->io_fix, - (ulong)block->state); - } - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE, - (void*)block->frame, (void*)block); - } - - /* Wake possible simulated aio thread to actually post the - writes to the operating system */ - - os_aio_simulated_wake_handler_threads(); - - /* Wait that all async writes to tablespaces have been posted to - the OS */ - - os_aio_wait_until_no_pending_writes(); - - /* Now we flush the data to disk (for example, with fsync) */ - - fil_flush_file_spaces(FIL_TABLESPACE); - - /* We can now reuse the doublewrite memory buffer: */ - - trx_doublewrite->first_free = 0; - - mutex_exit(&(trx_doublewrite->mutex)); -} - -/************************************************************************ -Posts a buffer page for writing. If the doublewrite memory buffer is -full, calls buf_flush_buffered_writes and waits for for free space to -appear. */ -static -void -buf_flush_post_to_doublewrite_buf( -/*==============================*/ - buf_block_t* block) /* in: buffer block to write */ -{ -try_again: - mutex_enter(&(trx_doublewrite->mutex)); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if (trx_doublewrite->first_free - >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - mutex_exit(&(trx_doublewrite->mutex)); - - buf_flush_buffered_writes(); - - goto try_again; - } - - ut_memcpy(trx_doublewrite->write_buf - + UNIV_PAGE_SIZE * trx_doublewrite->first_free, - block->frame, UNIV_PAGE_SIZE); - - trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = block; - - trx_doublewrite->first_free++; - - if (trx_doublewrite->first_free - >= 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - mutex_exit(&(trx_doublewrite->mutex)); - - buf_flush_buffered_writes(); - - return; - } - - mutex_exit(&(trx_doublewrite->mutex)); -} - -/************************************************************************ -Initializes a page for writing to the tablespace. */ - -void -buf_flush_init_for_writing( -/*=======================*/ - byte* page, /* in: page */ - dulint newest_lsn, /* in: newest modification lsn to the page */ - ulint space, /* in: space id */ - ulint page_no) /* in: page number */ -{ - /* Write the newest modification lsn to the page header and trailer */ - mach_write_to_8(page + FIL_PAGE_LSN, newest_lsn); - - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - newest_lsn); - /* Write the page number and the space id */ - - mach_write_to_4(page + FIL_PAGE_OFFSET, page_no); - mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space); - - /* Store the new formula checksum */ - - mach_write_to_4(page + FIL_PAGE_SPACE_OR_CHKSUM, - srv_use_checksums - ? buf_calc_page_new_checksum(page) - : BUF_NO_CHECKSUM_MAGIC); - - /* We overwrite the first 4 bytes of the end lsn field to store - the old formula checksum. Since it depends also on the field - FIL_PAGE_SPACE_OR_CHKSUM, it has to be calculated after storing the - new formula checksum. */ - - mach_write_to_4(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - srv_use_checksums - ? buf_calc_page_old_checksum(page) - : BUF_NO_CHECKSUM_MAGIC); -} - -/************************************************************************ -Does an asynchronous write of a buffer page. NOTE: in simulated aio and -also when the doublewrite buffer is used, we must call -buf_flush_buffered_writes after we have posted a batch of writes! */ -static -void -buf_flush_write_block_low( -/*======================*/ - buf_block_t* block) /* in: buffer block to write */ -{ -#ifdef UNIV_LOG_DEBUG - static ibool univ_log_debug_warned; -#endif /* UNIV_LOG_DEBUG */ - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(block->space, block->offset) == 0); -#endif - ut_ad(!ut_dulint_is_zero(block->newest_modification)); - -#ifdef UNIV_LOG_DEBUG - if (!univ_log_debug_warned) { - univ_log_debug_warned = TRUE; - fputs("Warning: cannot force log to disk if" - " UNIV_LOG_DEBUG is defined!\n" - "Crash recovery will not work!\n", - stderr); - } -#else - /* Force the log to the disk before writing the modified block */ - log_write_up_to(block->newest_modification, LOG_WAIT_ALL_GROUPS, TRUE); -#endif - buf_flush_init_for_writing(block->frame, block->newest_modification, - block->space, block->offset); - if (!srv_use_doublewrite_buf || !trx_doublewrite) { - fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER, - FALSE, block->space, block->offset, 0, UNIV_PAGE_SIZE, - (void*)block->frame, (void*)block); - } else { - buf_flush_post_to_doublewrite_buf(block); - } -} - -/************************************************************************ -Writes a page asynchronously from the buffer buf_pool to a file, if it can be -found in the buf_pool and it is in a flushable state. NOTE: in simulated aio -we must call os_aio_simulated_wake_handler_threads after we have posted a batch -of writes! */ -static -ulint -buf_flush_try_page( -/*===============*/ - /* out: 1 if a page was flushed, 0 otherwise */ - ulint space, /* in: space id */ - ulint offset, /* in: page offset */ - ulint flush_type) /* in: BUF_FLUSH_LRU, BUF_FLUSH_LIST, or - BUF_FLUSH_SINGLE_PAGE */ -{ - buf_block_t* block; - ibool locked; - - ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST - || flush_type == BUF_FLUSH_SINGLE_PAGE); - - mutex_enter(&(buf_pool->mutex)); - - block = buf_page_hash_get(space, offset); - - ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); - - if (!block) { - mutex_exit(&(buf_pool->mutex)); - return(0); - } - - mutex_enter(&block->mutex); - - if (flush_type == BUF_FLUSH_LIST - && buf_flush_ready_for_flush(block, flush_type)) { - - block->io_fix = BUF_IO_WRITE; - - /* If AWE is enabled and the page is not mapped to a frame, - then map it */ - - if (block->frame == NULL) { - ut_a(srv_use_awe); - - /* We set second parameter TRUE because the block is - in the LRU list and we must put it to - awe_LRU_free_mapped list once mapped to a frame */ - - buf_awe_map_page_to_frame(block, TRUE); - } - - block->flush_type = flush_type; - - if (buf_pool->n_flush[flush_type] == 0) { - - os_event_reset(buf_pool->no_flush[flush_type]); - } - - (buf_pool->n_flush[flush_type])++; - - locked = FALSE; - - /* If the simulated aio thread is not running, we must - not wait for any latch, as we may end up in a deadlock: - if buf_fix_count == 0, then we know we need not wait */ - - if (block->buf_fix_count == 0) { - rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); - - locked = TRUE; - } - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - if (!locked) { - buf_flush_buffered_writes(); - - rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); - } - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Flushing page space %lu, page no %lu \n", - (ulong) block->space, (ulong) block->offset); - } -#endif /* UNIV_DEBUG */ - - buf_flush_write_block_low(block); - - return(1); - - } else if (flush_type == BUF_FLUSH_LRU - && buf_flush_ready_for_flush(block, flush_type)) { - - /* VERY IMPORTANT: - Because any thread may call the LRU flush, even when owning - locks on pages, to avoid deadlocks, we must make sure that the - s-lock is acquired on the page without waiting: this is - accomplished because in the if-condition above we require - the page not to be bufferfixed (in function - ..._ready_for_flush). */ - - block->io_fix = BUF_IO_WRITE; - - /* If AWE is enabled and the page is not mapped to a frame, - then map it */ - - if (block->frame == NULL) { - ut_a(srv_use_awe); - - /* We set second parameter TRUE because the block is - in the LRU list and we must put it to - awe_LRU_free_mapped list once mapped to a frame */ - - buf_awe_map_page_to_frame(block, TRUE); - } - - block->flush_type = flush_type; - - if (buf_pool->n_flush[flush_type] == 0) { - - os_event_reset(buf_pool->no_flush[flush_type]); - } - - (buf_pool->n_flush[flush_type])++; - - rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); - - /* Note that the s-latch is acquired before releasing the - buf_pool mutex: this ensures that the latch is acquired - immediately. */ - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - buf_flush_write_block_low(block); - - return(1); - - } else if (flush_type == BUF_FLUSH_SINGLE_PAGE - && buf_flush_ready_for_flush(block, flush_type)) { - - block->io_fix = BUF_IO_WRITE; - - /* If AWE is enabled and the page is not mapped to a frame, - then map it */ - - if (block->frame == NULL) { - ut_a(srv_use_awe); - - /* We set second parameter TRUE because the block is - in the LRU list and we must put it to - awe_LRU_free_mapped list once mapped to a frame */ - - buf_awe_map_page_to_frame(block, TRUE); - } - - block->flush_type = flush_type; - - if (buf_pool->n_flush[block->flush_type] == 0) { - - os_event_reset(buf_pool->no_flush[block->flush_type]); - } - - (buf_pool->n_flush[flush_type])++; - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - rw_lock_s_lock_gen(&(block->lock), BUF_IO_WRITE); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Flushing single page space %lu," - " page no %lu \n", - (ulong) block->space, - (ulong) block->offset); - } -#endif /* UNIV_DEBUG */ - - buf_flush_write_block_low(block); - - return(1); - } - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - return(0); -} - -/*************************************************************** -Flushes to disk all flushable pages within the flush area. */ -static -ulint -buf_flush_try_neighbors( -/*====================*/ - /* out: number of pages flushed */ - ulint space, /* in: space id */ - ulint offset, /* in: page offset */ - ulint flush_type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -{ - buf_block_t* block; - ulint low, high; - ulint count = 0; - ulint i; - - ut_ad(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST); - - low = (offset / BUF_FLUSH_AREA) * BUF_FLUSH_AREA; - high = (offset / BUF_FLUSH_AREA + 1) * BUF_FLUSH_AREA; - - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { - /* If there is little space, it is better not to flush any - block except from the end of the LRU list */ - - low = offset; - high = offset + 1; - } - - /* fprintf(stderr, "Flush area: low %lu high %lu\n", low, high); */ - - if (high > fil_space_get_size(space)) { - high = fil_space_get_size(space); - } - - mutex_enter(&(buf_pool->mutex)); - - for (i = low; i < high; i++) { - - block = buf_page_hash_get(space, i); - ut_a(!block || block->state == BUF_BLOCK_FILE_PAGE); - - if (!block) { - - continue; - - } else if (flush_type == BUF_FLUSH_LRU && i != offset - && !block->old) { - - /* We avoid flushing 'non-old' blocks in an LRU flush, - because the flushed blocks are soon freed */ - - continue; - } else { - - mutex_enter(&block->mutex); - - if (buf_flush_ready_for_flush(block, flush_type) - && (i == offset || block->buf_fix_count == 0)) { - /* We only try to flush those - neighbors != offset where the buf fix count is - zero, as we then know that we probably can - latch the page without a semaphore wait. - Semaphore waits are expensive because we must - flush the doublewrite buffer before we start - waiting. */ - - mutex_exit(&block->mutex); - - mutex_exit(&(buf_pool->mutex)); - - /* Note: as we release the buf_pool mutex - above, in buf_flush_try_page we cannot be sure - the page is still in a flushable state: - therefore we check it again inside that - function. */ - - count += buf_flush_try_page(space, i, - flush_type); - - mutex_enter(&(buf_pool->mutex)); - } else { - mutex_exit(&block->mutex); - } - } - } - - mutex_exit(&(buf_pool->mutex)); - - return(count); -} - -/*********************************************************************** -This utility flushes dirty blocks from the end of the LRU list or flush_list. -NOTE 1: in the case of an LRU flush the calling thread may own latches to -pages: to avoid deadlocks, this function must be written so that it cannot -end up waiting for these latches! NOTE 2: in the case of a flush list flush, -the calling thread is not allowed to own any latches on pages! */ - -ulint -buf_flush_batch( -/*============*/ - /* out: number of blocks for which the write - request was queued; ULINT_UNDEFINED if there - was a flush of the same type already running */ - ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if - BUF_FLUSH_LIST, then the caller must not own - any latches on pages */ - ulint min_n, /* in: wished minimum mumber of blocks flushed - (it is not guaranteed that the actual number - is that big, though) */ - dulint lsn_limit) /* in the case BUF_FLUSH_LIST all blocks whose - oldest_modification is smaller than this - should be flushed (if their number does not - exceed min_n), otherwise ignored */ -{ - buf_block_t* block; - ulint page_count = 0; - ulint old_page_count; - ulint space; - ulint offset; - ibool found; - - ut_ad((flush_type == BUF_FLUSH_LRU) - || (flush_type == BUF_FLUSH_LIST)); -#ifdef UNIV_SYNC_DEBUG - ut_ad((flush_type != BUF_FLUSH_LIST) - || sync_thread_levels_empty_gen(TRUE)); -#endif /* UNIV_SYNC_DEBUG */ - mutex_enter(&(buf_pool->mutex)); - - if ((buf_pool->n_flush[flush_type] > 0) - || (buf_pool->init_flush[flush_type] == TRUE)) { - - /* There is already a flush batch of the same type running */ - - mutex_exit(&(buf_pool->mutex)); - - return(ULINT_UNDEFINED); - } - - (buf_pool->init_flush)[flush_type] = TRUE; - - for (;;) { - /* If we have flushed enough, leave the loop */ - if (page_count >= min_n) { - - break; - } - - /* Start from the end of the list looking for a suitable - block to be flushed. */ - - if (flush_type == BUF_FLUSH_LRU) { - block = UT_LIST_GET_LAST(buf_pool->LRU); - } else { - ut_ad(flush_type == BUF_FLUSH_LIST); - - block = UT_LIST_GET_LAST(buf_pool->flush_list); - if (!block - || (ut_dulint_cmp(block->oldest_modification, - lsn_limit) >= 0)) { - /* We have flushed enough */ - - break; - } - } - - found = FALSE; - - /* Note that after finding a single flushable page, we try to - flush also all its neighbors, and after that start from the - END of the LRU list or flush list again: the list may change - during the flushing and we cannot safely preserve within this - function a pointer to a block in the list! */ - - while ((block != NULL) && !found) { - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - mutex_enter(&block->mutex); - - if (buf_flush_ready_for_flush(block, flush_type)) { - - found = TRUE; - space = block->space; - offset = block->offset; - - mutex_exit(&block->mutex); - mutex_exit(&(buf_pool->mutex)); - - old_page_count = page_count; - - /* Try to flush also all the neighbors */ - page_count += buf_flush_try_neighbors( - space, offset, flush_type); - /* fprintf(stderr, - "Flush type %lu, page no %lu, neighb %lu\n", - flush_type, offset, - page_count - old_page_count); */ - - mutex_enter(&(buf_pool->mutex)); - - } else if (flush_type == BUF_FLUSH_LRU) { - - mutex_exit(&block->mutex); - - block = UT_LIST_GET_PREV(LRU, block); - } else { - ut_ad(flush_type == BUF_FLUSH_LIST); - - mutex_exit(&block->mutex); - - block = UT_LIST_GET_PREV(flush_list, block); - } - } - - /* If we could not find anything to flush, leave the loop */ - - if (!found) { - break; - } - } - - (buf_pool->init_flush)[flush_type] = FALSE; - - if ((buf_pool->n_flush[flush_type] == 0) - && (buf_pool->init_flush[flush_type] == FALSE)) { - - /* The running flush batch has ended */ - - os_event_set(buf_pool->no_flush[flush_type]); - } - - mutex_exit(&(buf_pool->mutex)); - - buf_flush_buffered_writes(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && page_count > 0) { - ut_a(flush_type == BUF_FLUSH_LRU - || flush_type == BUF_FLUSH_LIST); - fprintf(stderr, flush_type == BUF_FLUSH_LRU - ? "Flushed %lu pages in LRU flush\n" - : "Flushed %lu pages in flush list flush\n", - (ulong) page_count); - } -#endif /* UNIV_DEBUG */ - - srv_buf_pool_flushed += page_count; - - return(page_count); -} - -/********************************************************************** -Waits until a flush batch of the given type ends */ - -void -buf_flush_wait_batch_end( -/*=====================*/ - ulint type) /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -{ - ut_ad((type == BUF_FLUSH_LRU) || (type == BUF_FLUSH_LIST)); - - os_event_wait(buf_pool->no_flush[type]); -} - -/********************************************************************** -Gives a recommendation of how many blocks should be flushed to establish -a big enough margin of replaceable blocks near the end of the LRU list -and in the free list. */ -static -ulint -buf_flush_LRU_recommendation(void) -/*==============================*/ - /* out: number of blocks which should be flushed - from the end of the LRU list */ -{ - buf_block_t* block; - ulint n_replaceable; - ulint distance = 0; - - mutex_enter(&(buf_pool->mutex)); - - n_replaceable = UT_LIST_GET_LEN(buf_pool->free); - - block = UT_LIST_GET_LAST(buf_pool->LRU); - - while ((block != NULL) - && (n_replaceable < BUF_FLUSH_FREE_BLOCK_MARGIN - + BUF_FLUSH_EXTRA_MARGIN) - && (distance < BUF_LRU_FREE_SEARCH_LEN)) { - - mutex_enter(&block->mutex); - - if (buf_flush_ready_for_replace(block)) { - n_replaceable++; - } - - mutex_exit(&block->mutex); - - distance++; - - block = UT_LIST_GET_PREV(LRU, block); - } - - mutex_exit(&(buf_pool->mutex)); - - if (n_replaceable >= BUF_FLUSH_FREE_BLOCK_MARGIN) { - - return(0); - } - - return(BUF_FLUSH_FREE_BLOCK_MARGIN + BUF_FLUSH_EXTRA_MARGIN - - n_replaceable); -} - -/************************************************************************* -Flushes pages from the end of the LRU list if there is too small a margin -of replaceable pages there or in the free list. VERY IMPORTANT: this function -is called also by threads which have locks on pages. To avoid deadlocks, we -flush only pages such that the s-lock required for flushing can be acquired -immediately, without waiting. */ - -void -buf_flush_free_margin(void) -/*=======================*/ -{ - ulint n_to_flush; - ulint n_flushed; - - n_to_flush = buf_flush_LRU_recommendation(); - - if (n_to_flush > 0) { - n_flushed = buf_flush_batch(BUF_FLUSH_LRU, n_to_flush, - ut_dulint_zero); - if (n_flushed == ULINT_UNDEFINED) { - /* There was an LRU type flush batch already running; - let us wait for it to end */ - - buf_flush_wait_batch_end(BUF_FLUSH_LRU); - } - } -} - -/********************************************************************** -Validates the flush list. */ -static -ibool -buf_flush_validate_low(void) -/*========================*/ - /* out: TRUE if ok */ -{ - buf_block_t* block; - dulint om; - - UT_LIST_VALIDATE(flush_list, buf_block_t, buf_pool->flush_list); - - block = UT_LIST_GET_FIRST(buf_pool->flush_list); - - while (block != NULL) { - om = block->oldest_modification; - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(ut_dulint_cmp(om, ut_dulint_zero) > 0); - - block = UT_LIST_GET_NEXT(flush_list, block); - - if (block) { - ut_a(ut_dulint_cmp(om, block->oldest_modification) - >= 0); - } - } - - return(TRUE); -} - -/********************************************************************** -Validates the flush list. */ - -ibool -buf_flush_validate(void) -/*====================*/ - /* out: TRUE if ok */ -{ - ibool ret; - - mutex_enter(&(buf_pool->mutex)); - - ret = buf_flush_validate_low(); - - mutex_exit(&(buf_pool->mutex)); - - return(ret); -} diff --git a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c deleted file mode 100644 index d3c787d1578..00000000000 --- a/storage/innobase/buf/buf0lru.c +++ /dev/null @@ -1,1237 +0,0 @@ -/****************************************************** -The database buffer replacement algorithm - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0lru.h" - -#ifdef UNIV_NONINL -#include "buf0lru.ic" -#include "srv0srv.h" /* Needed to getsrv_print_innodb_monitor */ -#endif - -#include "ut0byte.h" -#include "ut0lst.h" -#include "ut0rnd.h" -#include "sync0sync.h" -#include "sync0rw.h" -#include "hash0hash.h" -#include "os0sync.h" -#include "fil0fil.h" -#include "btr0btr.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0rea.h" -#include "btr0sea.h" -#include "os0file.h" -#include "log0recv.h" - -/* The number of blocks from the LRU_old pointer onward, including the block -pointed to, must be 3/8 of the whole LRU list length, except that the -tolerance defined below is allowed. Note that the tolerance must be small -enough such that for even the BUF_LRU_OLD_MIN_LEN long LRU list, the -LRU_old pointer is not allowed to point to either end of the LRU list. */ - -#define BUF_LRU_OLD_TOLERANCE 20 - -/* The whole LRU list length is divided by this number to determine an -initial segment in buf_LRU_get_recent_limit */ - -#define BUF_LRU_INITIAL_RATIO 8 - -/* When dropping the search hash index entries before deleting an ibd -file, we build a local array of pages belonging to that tablespace -in the buffer pool. Following is the size of that array. */ -#define BUF_LRU_DROP_SEARCH_HASH_SIZE 1024 - -/* If we switch on the InnoDB monitor because there are too few available -frames in the buffer pool, we set this to TRUE */ -ibool buf_lru_switched_on_innodb_mon = FALSE; - -/********************************************************************** -Takes a block out of the LRU list and page hash table and sets the block -state to BUF_BLOCK_REMOVE_HASH. */ -static -void -buf_LRU_block_remove_hashed_page( -/*=============================*/ - buf_block_t* block); /* in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ -/********************************************************************** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block); /* in: block, must contain a file page and - be in a state where it can be freed */ - -/********************************************************************** -Attempts to drop page hash index on a batch of pages belonging to a -particular space id. */ -static -void -buf_LRU_drop_page_hash_batch( -/*=========================*/ - ulint id, /* in: space id */ - const ulint* arr, /* in: array of page_no */ - ulint count) /* in: number of entries in array */ -{ - ulint i; - - ut_ad(arr != NULL); - ut_ad(count <= BUF_LRU_DROP_SEARCH_HASH_SIZE); - - for (i = 0; i < count; ++i) { - btr_search_drop_page_hash_when_freed(id, arr[i]); - } -} - -/********************************************************************** -When doing a DROP TABLE/DISCARD TABLESPACE we have to drop all page -hash index entries belonging to that table. This function tries to -do that in batch. Note that this is a 'best effort' attempt and does -not guarantee that ALL hash entries will be removed. */ -static -void -buf_LRU_drop_page_hash_for_tablespace( -/*==================================*/ - ulint id) /* in: space id */ -{ - buf_block_t* block; - ulint* page_arr; - ulint num_entries; - - page_arr = ut_malloc(sizeof(ulint) - * BUF_LRU_DROP_SEARCH_HASH_SIZE); - mutex_enter(&buf_pool->mutex); - -scan_again: - num_entries = 0; - block = UT_LIST_GET_LAST(buf_pool->LRU); - - while (block != NULL) { - buf_block_t* prev_block; - - mutex_enter(&block->mutex); - prev_block = UT_LIST_GET_PREV(LRU, block); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if (block->space != id - || block->buf_fix_count > 0 - || block->io_fix != 0) { - /* We leave the fixed pages as is in this scan. - To be dealt with later in the final scan. */ - mutex_exit(&block->mutex); - goto next_page; - } - - ut_ad(block->space == id); - if (block->is_hashed) { - - /* Store the offset(i.e.: page_no) in the array - so that we can drop hash index in a batch - later. */ - page_arr[num_entries] = block->offset; - mutex_exit(&block->mutex); - ut_a(num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE); - ++num_entries; - - if (num_entries < BUF_LRU_DROP_SEARCH_HASH_SIZE) { - goto next_page; - } - /* Array full. We release the buf_pool->mutex to - obey the latching order. */ - mutex_exit(&buf_pool->mutex); - - buf_LRU_drop_page_hash_batch(id, page_arr, - num_entries); - num_entries = 0; - mutex_enter(&buf_pool->mutex); - } else { - mutex_exit(&block->mutex); - } - -next_page: - /* Note that we may have released the buf_pool->mutex - above after reading the prev_block during processing - of a page_hash_batch (i.e.: when the array was full). - This means that prev_block can change in LRU list. - This is OK because this function is a 'best effort' - to drop as many search hash entries as possible and - it does not guarantee that ALL such entries will be - dropped. */ - block = prev_block; - - /* If, however, block has been removed from LRU list - to the free list then we should restart the scan. - block->state is protected by buf_pool->mutex. */ - if (block && block->state != BUF_BLOCK_FILE_PAGE) { - ut_a(num_entries == 0); - goto scan_again; - } - } - - mutex_exit(&buf_pool->mutex); - - /* Drop any remaining batch of search hashed pages. */ - buf_LRU_drop_page_hash_batch(id, page_arr, num_entries); - ut_free(page_arr); -} - -/********************************************************************** -Invalidates all pages belonging to a given tablespace when we are deleting -the data file(s) of that tablespace. */ - -void -buf_LRU_invalidate_tablespace( -/*==========================*/ - ulint id) /* in: space id */ -{ - buf_block_t* block; - ulint page_no; - ibool all_freed; - - /* Before we attempt to drop pages one by one we first - attempt to drop page hash index entries in batches to make - it more efficient. The batching attempt is a best effort - attempt and does not guarantee that all pages hash entries - will be dropped. We get rid of remaining page hash entries - one by one below. */ - buf_LRU_drop_page_hash_for_tablespace(id); - -scan_again: - mutex_enter(&(buf_pool->mutex)); - - all_freed = TRUE; - - block = UT_LIST_GET_LAST(buf_pool->LRU); - - while (block != NULL) { - buf_block_t* prev_block; - - mutex_enter(&block->mutex); - prev_block = UT_LIST_GET_PREV(LRU, block); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if (block->space == id - && (block->buf_fix_count > 0 || block->io_fix != 0)) { - - /* We cannot remove this page during this scan yet; - maybe the system is currently reading it in, or - flushing the modifications to the file */ - - all_freed = FALSE; - - goto next_page; - } - - if (block->space == id) { -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Dropping space %lu page %lu\n", - (ulong) block->space, - (ulong) block->offset); - } -#endif - if (block->is_hashed) { - page_no = block->offset; - - mutex_exit(&block->mutex); - - mutex_exit(&(buf_pool->mutex)); - - /* Note that the following call will acquire - an S-latch on the page */ - - btr_search_drop_page_hash_when_freed(id, - page_no); - goto scan_again; - } - - if (0 != ut_dulint_cmp(block->oldest_modification, - ut_dulint_zero)) { - - /* Remove from the flush list of modified - blocks */ - block->oldest_modification = ut_dulint_zero; - - UT_LIST_REMOVE(flush_list, - buf_pool->flush_list, block); - } - - /* Remove from the LRU list */ - buf_LRU_block_remove_hashed_page(block); - buf_LRU_block_free_hashed_page(block); - } -next_page: - mutex_exit(&block->mutex); - block = prev_block; - } - - mutex_exit(&(buf_pool->mutex)); - - if (!all_freed) { - os_thread_sleep(20000); - - goto scan_again; - } -} - -/********************************************************************** -Gets the minimum LRU_position field for the blocks in an initial segment -(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. */ - -ulint -buf_LRU_get_recent_limit(void) -/*==========================*/ - /* out: the limit; zero if could not determine it */ -{ - buf_block_t* block; - ulint len; - ulint limit; - - mutex_enter(&(buf_pool->mutex)); - - len = UT_LIST_GET_LEN(buf_pool->LRU); - - if (len < BUF_LRU_OLD_MIN_LEN) { - /* The LRU list is too short to do read-ahead */ - - mutex_exit(&(buf_pool->mutex)); - - return(0); - } - - block = UT_LIST_GET_FIRST(buf_pool->LRU); - - limit = block->LRU_position - len / BUF_LRU_INITIAL_RATIO; - - mutex_exit(&(buf_pool->mutex)); - - return(limit); -} - -/********************************************************************** -Look for a replaceable block from the end of the LRU list and put it to -the free list if found. */ - -ibool -buf_LRU_search_and_free_block( -/*==========================*/ - /* out: TRUE if freed */ - ulint n_iterations) /* in: how many times this has been called - repeatedly without result: a high value means - that we should search farther; if value is - k < 10, then we only search k/10 * [number - of pages in the buffer pool] from the end - of the LRU list */ -{ - buf_block_t* block; - ulint distance = 0; - ibool freed; - - mutex_enter(&(buf_pool->mutex)); - - freed = FALSE; - block = UT_LIST_GET_LAST(buf_pool->LRU); - - while (block != NULL) { - ut_a(block->in_LRU_list); - - mutex_enter(&block->mutex); - - if (buf_flush_ready_for_replace(block)) { - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Putting space %lu page %lu" - " to free list\n", - (ulong) block->space, - (ulong) block->offset); - } -#endif /* UNIV_DEBUG */ - - buf_LRU_block_remove_hashed_page(block); - - mutex_exit(&(buf_pool->mutex)); - mutex_exit(&block->mutex); - - /* Remove possible adaptive hash index built on the - page; in the case of AWE the block may not have a - frame at all */ - - if (block->frame) { - /* The page was declared uninitialized - by buf_LRU_block_remove_hashed_page(). - We need to flag the contents of the - page valid (which it still is) in - order to avoid bogus Valgrind - warnings. */ - UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE); - btr_search_drop_page_hash_index(block->frame); - UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); - } - - ut_a(block->buf_fix_count == 0); - - mutex_enter(&(buf_pool->mutex)); - mutex_enter(&block->mutex); - - buf_LRU_block_free_hashed_page(block); - freed = TRUE; - mutex_exit(&block->mutex); - - break; - } - - mutex_exit(&block->mutex); - - block = UT_LIST_GET_PREV(LRU, block); - distance++; - - if (!freed && n_iterations <= 10 - && distance > 100 + (n_iterations * buf_pool->curr_size) - / 10) { - buf_pool->LRU_flush_ended = 0; - - mutex_exit(&(buf_pool->mutex)); - - return(FALSE); - } - } - if (buf_pool->LRU_flush_ended > 0) { - buf_pool->LRU_flush_ended--; - } - if (!freed) { - buf_pool->LRU_flush_ended = 0; - } - mutex_exit(&(buf_pool->mutex)); - - return(freed); -} - -/********************************************************************** -Tries to remove LRU flushed blocks from the end of the LRU list and put them -to the free list. This is beneficial for the efficiency of the insert buffer -operation, as flushed pages from non-unique non-clustered indexes are here -taken out of the buffer pool, and their inserts redirected to the insert -buffer. Otherwise, the flushed blocks could get modified again before read -operations need new buffer blocks, and the i/o work done in flushing would be -wasted. */ - -void -buf_LRU_try_free_flushed_blocks(void) -/*=================================*/ -{ - mutex_enter(&(buf_pool->mutex)); - - while (buf_pool->LRU_flush_ended > 0) { - - mutex_exit(&(buf_pool->mutex)); - - buf_LRU_search_and_free_block(1); - - mutex_enter(&(buf_pool->mutex)); - } - - mutex_exit(&(buf_pool->mutex)); -} - -/********************************************************************** -Returns TRUE if less than 25 % of the buffer pool is available. This can be -used in heuristics to prevent huge transactions eating up the whole buffer -pool for their locks. */ - -ibool -buf_LRU_buf_pool_running_out(void) -/*==============================*/ - /* out: TRUE if less than 25 % of buffer pool - left */ -{ - ibool ret = FALSE; - - mutex_enter(&(buf_pool->mutex)); - - if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 4) { - - ret = TRUE; - } - - mutex_exit(&(buf_pool->mutex)); - - return(ret); -} - -/********************************************************************** -Returns a free block from buf_pool. The block is taken off the free list. -If it is empty, blocks are moved from the end of the LRU list to the free -list. */ - -buf_block_t* -buf_LRU_get_free_block(void) -/*========================*/ - /* out: the free control block; also if AWE is - used, it is guaranteed that the block has its - page mapped to a frame when we return */ -{ - buf_block_t* block = NULL; - ibool freed; - ulint n_iterations = 1; - ibool mon_value_was = FALSE; - ibool started_monitor = FALSE; -loop: - mutex_enter(&(buf_pool->mutex)); - - if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 20) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: ERROR: over 95 percent of the buffer pool" - " is occupied by\n" - "InnoDB: lock heaps or the adaptive hash index!" - " Check that your\n" - "InnoDB: transactions do not set too many row locks.\n" - "InnoDB: Your buffer pool size is %lu MB." - " Maybe you should make\n" - "InnoDB: the buffer pool bigger?\n" - "InnoDB: We intentionally generate a seg fault" - " to print a stack trace\n" - "InnoDB: on Linux!\n", - (ulong) (buf_pool->curr_size - / (1024 * 1024 / UNIV_PAGE_SIZE))); - - ut_error; - - } else if (!recv_recovery_on && UT_LIST_GET_LEN(buf_pool->free) - + UT_LIST_GET_LEN(buf_pool->LRU) < buf_pool->max_size / 3) { - - if (!buf_lru_switched_on_innodb_mon) { - - /* Over 67 % of the buffer pool is occupied by lock - heaps or the adaptive hash index. This may be a memory - leak! */ - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: WARNING: over 67 percent of" - " the buffer pool is occupied by\n" - "InnoDB: lock heaps or the adaptive" - " hash index! Check that your\n" - "InnoDB: transactions do not set too many" - " row locks.\n" - "InnoDB: Your buffer pool size is %lu MB." - " Maybe you should make\n" - "InnoDB: the buffer pool bigger?\n" - "InnoDB: Starting the InnoDB Monitor to print" - " diagnostics, including\n" - "InnoDB: lock heap and hash index sizes.\n", - (ulong) (buf_pool->curr_size - / (1024 * 1024 / UNIV_PAGE_SIZE))); - - buf_lru_switched_on_innodb_mon = TRUE; - srv_print_innodb_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - } - } else if (buf_lru_switched_on_innodb_mon) { - - /* Switch off the InnoDB Monitor; this is a simple way - to stop the monitor if the situation becomes less urgent, - but may also surprise users if the user also switched on the - monitor! */ - - buf_lru_switched_on_innodb_mon = FALSE; - srv_print_innodb_monitor = FALSE; - } - - /* If there is a block in the free list, take it */ - if (UT_LIST_GET_LEN(buf_pool->free) > 0) { - - block = UT_LIST_GET_FIRST(buf_pool->free); - ut_a(block->in_free_list); - UT_LIST_REMOVE(free, buf_pool->free, block); - block->in_free_list = FALSE; - ut_a(block->state != BUF_BLOCK_FILE_PAGE); - ut_a(!block->in_LRU_list); - - if (srv_use_awe) { - if (block->frame) { - /* Remove from the list of mapped pages */ - - UT_LIST_REMOVE(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, - block); - } else { - /* We map the page to a frame; second param - FALSE below because we do not want it to be - added to the awe_LRU_free_mapped list */ - - buf_awe_map_page_to_frame(block, FALSE); - } - } - - mutex_enter(&block->mutex); - - block->state = BUF_BLOCK_READY_FOR_USE; - UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); - - mutex_exit(&block->mutex); - - mutex_exit(&(buf_pool->mutex)); - - if (started_monitor) { - srv_print_innodb_monitor = mon_value_was; - } - - return(block); - } - - /* If no block was in the free list, search from the end of the LRU - list and try to free a block there */ - - mutex_exit(&(buf_pool->mutex)); - - freed = buf_LRU_search_and_free_block(n_iterations); - - if (freed > 0) { - goto loop; - } - - if (n_iterations > 30) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Warning: difficult to find free blocks from\n" - "InnoDB: the buffer pool (%lu search iterations)!" - " Consider\n" - "InnoDB: increasing the buffer pool size.\n" - "InnoDB: It is also possible that" - " in your Unix version\n" - "InnoDB: fsync is very slow, or" - " completely frozen inside\n" - "InnoDB: the OS kernel. Then upgrading to" - " a newer version\n" - "InnoDB: of your operating system may help." - " Look at the\n" - "InnoDB: number of fsyncs in diagnostic info below.\n" - "InnoDB: Pending flushes (fsync) log: %lu;" - " buffer pool: %lu\n" - "InnoDB: %lu OS file reads, %lu OS file writes," - " %lu OS fsyncs\n" - "InnoDB: Starting InnoDB Monitor to print further\n" - "InnoDB: diagnostics to the standard output.\n", - (ulong) n_iterations, - (ulong) fil_n_pending_log_flushes, - (ulong) fil_n_pending_tablespace_flushes, - (ulong) os_n_file_reads, (ulong) os_n_file_writes, - (ulong) os_n_fsyncs); - - mon_value_was = srv_print_innodb_monitor; - started_monitor = TRUE; - srv_print_innodb_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - } - - /* No free block was found: try to flush the LRU list */ - - buf_flush_free_margin(); - ++srv_buf_pool_wait_free; - - os_aio_simulated_wake_handler_threads(); - - mutex_enter(&(buf_pool->mutex)); - - if (buf_pool->LRU_flush_ended > 0) { - /* We have written pages in an LRU flush. To make the insert - buffer more efficient, we try to move these pages to the free - list. */ - - mutex_exit(&(buf_pool->mutex)); - - buf_LRU_try_free_flushed_blocks(); - } else { - mutex_exit(&(buf_pool->mutex)); - } - - if (n_iterations > 10) { - - os_thread_sleep(500000); - } - - n_iterations++; - - goto loop; -} - -/*********************************************************************** -Moves the LRU_old pointer so that the length of the old blocks list -is inside the allowed limits. */ -UNIV_INLINE -void -buf_LRU_old_adjust_len(void) -/*========================*/ -{ - ulint old_len; - ulint new_len; - - ut_a(buf_pool->LRU_old); - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(3 * (BUF_LRU_OLD_MIN_LEN / 8) > BUF_LRU_OLD_TOLERANCE + 5); - - for (;;) { - old_len = buf_pool->LRU_old_len; - new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); - - ut_a(buf_pool->LRU_old->in_LRU_list); - - /* Update the LRU_old pointer if necessary */ - - if (old_len < new_len - BUF_LRU_OLD_TOLERANCE) { - - buf_pool->LRU_old = UT_LIST_GET_PREV( - LRU, buf_pool->LRU_old); - (buf_pool->LRU_old)->old = TRUE; - buf_pool->LRU_old_len++; - - } else if (old_len > new_len + BUF_LRU_OLD_TOLERANCE) { - - (buf_pool->LRU_old)->old = FALSE; - buf_pool->LRU_old = UT_LIST_GET_NEXT( - LRU, buf_pool->LRU_old); - buf_pool->LRU_old_len--; - } else { - ut_a(buf_pool->LRU_old); /* Check that we did not - fall out of the LRU list */ - return; - } - } -} - -/*********************************************************************** -Initializes the old blocks pointer in the LRU list. This function should be -called when the LRU list grows to BUF_LRU_OLD_MIN_LEN length. */ -static -void -buf_LRU_old_init(void) -/*==================*/ -{ - buf_block_t* block; - - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_a(UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN); - - /* We first initialize all blocks in the LRU list as old and then use - the adjust function to move the LRU_old pointer to the right - position */ - - block = UT_LIST_GET_FIRST(buf_pool->LRU); - - while (block != NULL) { - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(block->in_LRU_list); - block->old = TRUE; - block = UT_LIST_GET_NEXT(LRU, block); - } - - buf_pool->LRU_old = UT_LIST_GET_FIRST(buf_pool->LRU); - buf_pool->LRU_old_len = UT_LIST_GET_LEN(buf_pool->LRU); - - buf_LRU_old_adjust_len(); -} - -/********************************************************************** -Removes a block from the LRU list. */ -UNIV_INLINE -void -buf_LRU_remove_block( -/*=================*/ - buf_block_t* block) /* in: control block */ -{ - ut_ad(buf_pool); - ut_ad(block); - ut_ad(mutex_own(&(buf_pool->mutex))); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(block->in_LRU_list); - - /* If the LRU_old pointer is defined and points to just this block, - move it backward one step */ - - if (block == buf_pool->LRU_old) { - - /* Below: the previous block is guaranteed to exist, because - the LRU_old pointer is only allowed to differ by the - tolerance value from strict 3/8 of the LRU list length. */ - - buf_pool->LRU_old = UT_LIST_GET_PREV(LRU, block); - (buf_pool->LRU_old)->old = TRUE; - - buf_pool->LRU_old_len++; - ut_a(buf_pool->LRU_old); - } - - /* Remove the block from the LRU list */ - UT_LIST_REMOVE(LRU, buf_pool->LRU, block); - block->in_LRU_list = FALSE; - - if (srv_use_awe && block->frame) { - /* Remove from the list of mapped pages */ - - UT_LIST_REMOVE(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, block); - } - - /* If the LRU list is so short that LRU_old not defined, return */ - if (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN) { - - buf_pool->LRU_old = NULL; - - return; - } - - ut_ad(buf_pool->LRU_old); - - /* Update the LRU_old_len field if necessary */ - if (block->old) { - - buf_pool->LRU_old_len--; - } - - /* Adjust the length of the old block list if necessary */ - buf_LRU_old_adjust_len(); -} - -/********************************************************************** -Adds a block to the LRU list end. */ -UNIV_INLINE -void -buf_LRU_add_block_to_end_low( -/*=========================*/ - buf_block_t* block) /* in: control block */ -{ - buf_block_t* last_block; - - ut_ad(buf_pool); - ut_ad(block); - ut_ad(mutex_own(&(buf_pool->mutex))); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - block->old = TRUE; - - last_block = UT_LIST_GET_LAST(buf_pool->LRU); - - if (last_block) { - block->LRU_position = last_block->LRU_position; - } else { - block->LRU_position = buf_pool_clock_tic(); - } - - ut_a(!block->in_LRU_list); - UT_LIST_ADD_LAST(LRU, buf_pool->LRU, block); - block->in_LRU_list = TRUE; - - if (srv_use_awe && block->frame) { - /* Add to the list of mapped pages */ - - UT_LIST_ADD_LAST(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, block); - } - - if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { - - buf_pool->LRU_old_len++; - } - - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { - - ut_ad(buf_pool->LRU_old); - - /* Adjust the length of the old block list if necessary */ - - buf_LRU_old_adjust_len(); - - } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { - - /* The LRU list is now long enough for LRU_old to become - defined: init it */ - - buf_LRU_old_init(); - } -} - -/********************************************************************** -Adds a block to the LRU list. */ -UNIV_INLINE -void -buf_LRU_add_block_low( -/*==================*/ - buf_block_t* block, /* in: control block */ - ibool old) /* in: TRUE if should be put to the old blocks - in the LRU list, else put to the start; if the - LRU list is very short, the block is added to - the start, regardless of this parameter */ -{ - ulint cl; - - ut_ad(buf_pool); - ut_ad(block); - ut_ad(mutex_own(&(buf_pool->mutex))); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(!block->in_LRU_list); - - block->old = old; - cl = buf_pool_clock_tic(); - - if (srv_use_awe && block->frame) { - /* Add to the list of mapped pages; for simplicity we always - add to the start, even if the user would have set 'old' - TRUE */ - - UT_LIST_ADD_FIRST(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, block); - } - - if (!old || (UT_LIST_GET_LEN(buf_pool->LRU) < BUF_LRU_OLD_MIN_LEN)) { - - UT_LIST_ADD_FIRST(LRU, buf_pool->LRU, block); - - block->LRU_position = cl; - block->freed_page_clock = buf_pool->freed_page_clock; - } else { - UT_LIST_INSERT_AFTER(LRU, buf_pool->LRU, buf_pool->LRU_old, - block); - buf_pool->LRU_old_len++; - - /* We copy the LRU position field of the previous block - to the new block */ - - block->LRU_position = (buf_pool->LRU_old)->LRU_position; - } - - block->in_LRU_list = TRUE; - - if (UT_LIST_GET_LEN(buf_pool->LRU) > BUF_LRU_OLD_MIN_LEN) { - - ut_ad(buf_pool->LRU_old); - - /* Adjust the length of the old block list if necessary */ - - buf_LRU_old_adjust_len(); - - } else if (UT_LIST_GET_LEN(buf_pool->LRU) == BUF_LRU_OLD_MIN_LEN) { - - /* The LRU list is now long enough for LRU_old to become - defined: init it */ - - buf_LRU_old_init(); - } -} - -/********************************************************************** -Adds a block to the LRU list. */ - -void -buf_LRU_add_block( -/*==============*/ - buf_block_t* block, /* in: control block */ - ibool old) /* in: TRUE if should be put to the old - blocks in the LRU list, else put to the start; - if the LRU list is very short, the block is - added to the start, regardless of this - parameter */ -{ - buf_LRU_add_block_low(block, old); -} - -/********************************************************************** -Moves a block to the start of the LRU list. */ - -void -buf_LRU_make_block_young( -/*=====================*/ - buf_block_t* block) /* in: control block */ -{ - buf_LRU_remove_block(block); - buf_LRU_add_block_low(block, FALSE); -} - -/********************************************************************** -Moves a block to the end of the LRU list. */ - -void -buf_LRU_make_block_old( -/*===================*/ - buf_block_t* block) /* in: control block */ -{ - buf_LRU_remove_block(block); - buf_LRU_add_block_to_end_low(block); -} - -/********************************************************************** -Puts a block back to the free list. */ - -void -buf_LRU_block_free_non_file_page( -/*=============================*/ - buf_block_t* block) /* in: block, must not contain a file page */ -{ - - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&block->mutex)); - ut_ad(block); - - ut_a((block->state == BUF_BLOCK_MEMORY) - || (block->state == BUF_BLOCK_READY_FOR_USE)); - - ut_a(block->n_pointers == 0); - ut_a(!block->in_free_list); - - block->state = BUF_BLOCK_NOT_USED; - - UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE); -#ifdef UNIV_DEBUG - /* Wipe contents of page to reveal possible stale pointers to it */ - memset(block->frame, '\0', UNIV_PAGE_SIZE); -#endif - UT_LIST_ADD_FIRST(free, buf_pool->free, block); - block->in_free_list = TRUE; - - UNIV_MEM_ASSERT_AND_FREE(block->frame, UNIV_PAGE_SIZE); - - if (srv_use_awe && block->frame) { - /* Add to the list of mapped pages */ - - UT_LIST_ADD_FIRST(awe_LRU_free_mapped, - buf_pool->awe_LRU_free_mapped, block); - } -} - -/********************************************************************** -Takes a block out of the LRU list and page hash table and sets the block -state to BUF_BLOCK_REMOVE_HASH. */ -static -void -buf_LRU_block_remove_hashed_page( -/*=============================*/ - buf_block_t* block) /* in: block, must contain a file page and - be in a state where it can be freed; there - may or may not be a hash index to the page */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&block->mutex)); - ut_ad(block); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(block->io_fix == 0); - ut_a(block->buf_fix_count == 0); - ut_a(ut_dulint_cmp(block->oldest_modification, ut_dulint_zero) == 0); - - buf_LRU_remove_block(block); - - buf_pool->freed_page_clock += 1; - - /* Note that if AWE is enabled the block may not have a frame at all */ - - buf_block_modify_clock_inc(block); - - if (block != buf_page_hash_get(block->space, block->offset)) { - fprintf(stderr, - "InnoDB: Error: page %lu %lu not found" - " in the hash table\n", - (ulong) block->space, - (ulong) block->offset); - if (buf_page_hash_get(block->space, block->offset)) { - fprintf(stderr, - "InnoDB: In hash table we find block" - " %p of %lu %lu which is not %p\n", - (void*) buf_page_hash_get - (block->space, block->offset), - (ulong) buf_page_hash_get - (block->space, block->offset)->space, - (ulong) buf_page_hash_get - (block->space, block->offset)->offset, - (void*) block); - } - -#ifdef UNIV_DEBUG - buf_print(); - buf_LRU_print(); - buf_validate(); - buf_LRU_validate(); -#endif - ut_a(0); - } - - HASH_DELETE(buf_block_t, hash, buf_pool->page_hash, - buf_page_address_fold(block->space, block->offset), - block); - - UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE); - block->state = BUF_BLOCK_REMOVE_HASH; -} - -/********************************************************************** -Puts a file page whose has no hash index to the free list. */ -static -void -buf_LRU_block_free_hashed_page( -/*===========================*/ - buf_block_t* block) /* in: block, must contain a file page and - be in a state where it can be freed */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - ut_ad(mutex_own(&block->mutex)); - - ut_a(block->state == BUF_BLOCK_REMOVE_HASH); - - block->state = BUF_BLOCK_MEMORY; - - buf_LRU_block_free_non_file_page(block); -} - -#ifdef UNIV_DEBUG -/************************************************************************** -Validates the LRU list. */ - -ibool -buf_LRU_validate(void) -/*==================*/ -{ - buf_block_t* block; - ulint old_len; - ulint new_len; - ulint LRU_pos; - - ut_ad(buf_pool); - mutex_enter(&(buf_pool->mutex)); - - if (UT_LIST_GET_LEN(buf_pool->LRU) >= BUF_LRU_OLD_MIN_LEN) { - - ut_a(buf_pool->LRU_old); - old_len = buf_pool->LRU_old_len; - new_len = 3 * (UT_LIST_GET_LEN(buf_pool->LRU) / 8); - ut_a(old_len >= new_len - BUF_LRU_OLD_TOLERANCE); - ut_a(old_len <= new_len + BUF_LRU_OLD_TOLERANCE); - } - - UT_LIST_VALIDATE(LRU, buf_block_t, buf_pool->LRU); - - block = UT_LIST_GET_FIRST(buf_pool->LRU); - - old_len = 0; - - while (block != NULL) { - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - if (block->old) { - old_len++; - } - - if (buf_pool->LRU_old && (old_len == 1)) { - ut_a(buf_pool->LRU_old == block); - } - - LRU_pos = block->LRU_position; - - block = UT_LIST_GET_NEXT(LRU, block); - - if (block) { - /* If the following assert fails, it may - not be an error: just the buf_pool clock - has wrapped around */ - ut_a(LRU_pos >= block->LRU_position); - } - } - - if (buf_pool->LRU_old) { - ut_a(buf_pool->LRU_old_len == old_len); - } - - UT_LIST_VALIDATE(free, buf_block_t, buf_pool->free); - - block = UT_LIST_GET_FIRST(buf_pool->free); - - while (block != NULL) { - ut_a(block->state == BUF_BLOCK_NOT_USED); - - block = UT_LIST_GET_NEXT(free, block); - } - - mutex_exit(&(buf_pool->mutex)); - return(TRUE); -} - -/************************************************************************** -Prints the LRU list. */ - -void -buf_LRU_print(void) -/*===============*/ -{ - buf_block_t* block; - buf_frame_t* frame; - ulint len; - - ut_ad(buf_pool); - mutex_enter(&(buf_pool->mutex)); - - fprintf(stderr, "Pool ulint clock %lu\n", - (ulong) buf_pool->ulint_clock); - - block = UT_LIST_GET_FIRST(buf_pool->LRU); - - len = 0; - - while (block != NULL) { - - fprintf(stderr, "BLOCK %lu ", (ulong) block->offset); - - if (block->old) { - fputs("old ", stderr); - } - - if (block->buf_fix_count) { - fprintf(stderr, "buffix count %lu ", - (ulong) block->buf_fix_count); - } - - if (block->io_fix) { - fprintf(stderr, "io_fix %lu ", (ulong) block->io_fix); - } - - if (ut_dulint_cmp(block->oldest_modification, - ut_dulint_zero) > 0) { - fputs("modif. ", stderr); - } - - frame = buf_block_get_frame(block); - - fprintf(stderr, "LRU pos %lu type %lu index id %lu ", - (ulong) block->LRU_position, - (ulong) fil_page_get_type(frame), - (ulong) ut_dulint_get_low - (btr_page_get_index_id(frame))); - - block = UT_LIST_GET_NEXT(LRU, block); - if (++len == 10) { - len = 0; - putc('\n', stderr); - } - } - - mutex_exit(&(buf_pool->mutex)); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/buf/buf0rea.c b/storage/innobase/buf/buf0rea.c deleted file mode 100644 index fdec0206990..00000000000 --- a/storage/innobase/buf/buf0rea.c +++ /dev/null @@ -1,728 +0,0 @@ -/****************************************************** -The database buffer read - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0rea.h" - -#include "fil0fil.h" -#include "mtr0mtr.h" - -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0lru.h" -#include "ibuf0ibuf.h" -#include "log0recv.h" -#include "trx0sys.h" -#include "os0file.h" -#include "srv0start.h" - -extern ulint srv_read_ahead_rnd; -extern ulint srv_read_ahead_seq; -extern ulint srv_buf_pool_reads; - -/* The size in blocks of the area where the random read-ahead algorithm counts -the accessed pages when deciding whether to read-ahead */ -#define BUF_READ_AHEAD_RANDOM_AREA BUF_READ_AHEAD_AREA - -/* There must be at least this many pages in buf_pool in the area to start -a random read-ahead */ -#define BUF_READ_AHEAD_RANDOM_THRESHOLD (5 + BUF_READ_AHEAD_RANDOM_AREA / 8) - -/* The linear read-ahead area size */ -#define BUF_READ_AHEAD_LINEAR_AREA BUF_READ_AHEAD_AREA - -/* The linear read-ahead threshold */ -#define BUF_READ_AHEAD_LINEAR_THRESHOLD (3 * BUF_READ_AHEAD_LINEAR_AREA / 8) - -/* If there are buf_pool->curr_size per the number below pending reads, then -read-ahead is not done: this is to prevent flooding the buffer pool with -i/o-fixed buffer blocks */ -#define BUF_READ_AHEAD_PEND_LIMIT 2 - -/************************************************************************ -Low-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there, in which case does nothing. -Sets the io_fix flag and sets an exclusive lock on the buffer frame. The -flag is cleared and the x-lock released by an i/o-handler thread. */ -static -ulint -buf_read_page_low( -/*==============*/ - /* out: 1 if a read request was queued, 0 if the page - already resided in buf_pool, or if the page is in - the doublewrite buffer blocks in which case it is never - read into the pool, or if the tablespace does not - exist or is being dropped */ - ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED if we are - trying to read from a non-existent tablespace, or a - tablespace which is just now being dropped */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ..., - ORed to OS_AIO_SIMULATED_WAKE_LATER (see below - at read-ahead functions) */ - ulint space, /* in: space id */ - ib_longlong tablespace_version, /* in: if the space memory object has - this timestamp different from what we are giving here, - treat the tablespace as dropped; this is a timestamp we - use to stop dangling page reads from a tablespace - which we have DISCARDed + IMPORTed back */ - ulint offset) /* in: page number */ -{ - buf_block_t* block; - ulint wake_later; - - *err = DB_SUCCESS; - - wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; - mode = mode & ~OS_AIO_SIMULATED_WAKE_LATER; - - if (trx_doublewrite && space == TRX_SYS_SPACE - && ( (offset >= trx_doublewrite->block1 - && offset < trx_doublewrite->block1 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) - || (offset >= trx_doublewrite->block2 - && offset < trx_doublewrite->block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: trying to read" - " doublewrite buffer page %lu\n", - (ulong) offset); - - return(0); - } - - if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { - - /* Trx sys header is so low in the latching order that we play - safe and do not leave the i/o-completion to an asynchronous - i/o-thread. Ibuf bitmap pages must always be read with - syncronous i/o, to make sure they do not get involved in - thread deadlocks. */ - - sync = TRUE; - } - - /* The following call will also check if the tablespace does not exist - or is being dropped; if we succeed in initing the page in the buffer - pool for read, then DISCARD cannot proceed until the read has - completed */ - block = buf_page_init_for_read(err, mode, space, tablespace_version, - offset); - if (block == NULL) { - - return(0); - } - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Posting read request for page %lu, sync %lu\n", - (ulong) offset, - (ulong) sync); - } -#endif - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - - *err = fil_io(OS_FILE_READ | wake_later, - sync, space, - offset, 0, UNIV_PAGE_SIZE, - (void*)block->frame, (void*)block); - ut_a(*err == DB_SUCCESS); - - if (sync) { - /* The i/o is already completed when we arrive from - fil_read */ - buf_page_io_complete(block); - } - - return(1); -} - -/************************************************************************ -Applies a random read-ahead in buf_pool if there are at least a threshold -value of accessed pages from the random read-ahead area. Does not read any -page, not even the one at the position (space, offset), if the read-ahead -mechanism is not activated. NOTE 1: the calling thread may own latches on -pages: to avoid deadlocks this function must be written such that it cannot -end up waiting for these latches! NOTE 2: the calling thread must want -access to the page given: this rule is set to prevent unintended read-aheads -performed by ibuf routines, a situation which could result in a deadlock if -the OS does not support asynchronous i/o. */ -static -ulint -buf_read_ahead_random( -/*==================*/ - /* out: number of page read requests issued; NOTE - that if we read ibuf pages, it may happen that - the page at the given page number does not get - read even if we return a value > 0! */ - ulint space, /* in: space id */ - ulint offset) /* in: page number of a page which the current thread - wants to access */ -{ - ib_longlong tablespace_version; - buf_block_t* block; - ulint recent_blocks = 0; - ulint count; - ulint LRU_recent_limit; - ulint ibuf_mode; - ulint low, high; - ulint err; - ulint i; - - if (srv_startup_is_before_trx_rollback_phase) { - /* No read-ahead to avoid thread deadlocks */ - return(0); - } - - if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { - - /* If it is an ibuf bitmap page or trx sys hdr, we do - no read-ahead, as that could break the ibuf page access - order */ - - return(0); - } - - /* Remember the tablespace version before we ask te tablespace size - below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we - do not try to read outside the bounds of the tablespace! */ - - tablespace_version = fil_space_get_version(space); - - low = (offset / BUF_READ_AHEAD_RANDOM_AREA) - * BUF_READ_AHEAD_RANDOM_AREA; - high = (offset / BUF_READ_AHEAD_RANDOM_AREA + 1) - * BUF_READ_AHEAD_RANDOM_AREA; - if (high > fil_space_get_size(space)) { - - high = fil_space_get_size(space); - } - - /* Get the minimum LRU_position field value for an initial segment - of the LRU list, to determine which blocks have recently been added - to the start of the list. */ - - LRU_recent_limit = buf_LRU_get_recent_limit(); - - mutex_enter(&(buf_pool->mutex)); - - if (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - mutex_exit(&(buf_pool->mutex)); - - return(0); - } - - /* Count how many blocks in the area have been recently accessed, - that is, reside near the start of the LRU list. */ - - for (i = low; i < high; i++) { - block = buf_page_hash_get(space, i); - - if ((block) - && (block->LRU_position > LRU_recent_limit) - && block->accessed) { - - recent_blocks++; - } - } - - mutex_exit(&(buf_pool->mutex)); - - if (recent_blocks < BUF_READ_AHEAD_RANDOM_THRESHOLD) { - /* Do nothing */ - - return(0); - } - - /* Read all the suitable blocks within the area */ - - if (ibuf_inside()) { - ibuf_mode = BUF_READ_IBUF_PAGES_ONLY; - } else { - ibuf_mode = BUF_READ_ANY_PAGE; - } - - count = 0; - - for (i = low; i < high; i++) { - /* It is only sensible to do read-ahead in the non-sync aio - mode: hence FALSE as the first parameter */ - - if (!ibuf_bitmap_page(i)) { - count += buf_read_page_low( - &err, FALSE, - ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, tablespace_version, i); - if (err == DB_TABLESPACE_DELETED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: in random" - " readahead trying to access\n" - "InnoDB: tablespace %lu page %lu,\n" - "InnoDB: but the tablespace does not" - " exist or is just being dropped.\n", - (ulong) space, (ulong) i); - } - } - } - - /* In simulated aio we wake the aio handler threads only after - queuing all aio requests, in native aio the following call does - nothing: */ - - os_aio_simulated_wake_handler_threads(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && (count > 0)) { - fprintf(stderr, - "Random read-ahead space %lu offset %lu pages %lu\n", - (ulong) space, (ulong) offset, - (ulong) count); - } -#endif /* UNIV_DEBUG */ - - ++srv_read_ahead_rnd; - return(count); -} - -/************************************************************************ -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. */ - -ulint -buf_read_page( -/*==========*/ - /* out: number of page read requests issued: this can - be > 1 if read-ahead occurred */ - ulint space, /* in: space id */ - ulint offset) /* in: page number */ -{ - ib_longlong tablespace_version; - ulint count; - ulint count2; - ulint err; - - tablespace_version = fil_space_get_version(space); - - count = buf_read_ahead_random(space, offset); - - /* We do the i/o in the synchronous aio mode to save thread - switches: hence TRUE */ - - count2 = buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, space, - tablespace_version, offset); - srv_buf_pool_reads+= count2; - if (err == DB_TABLESPACE_DELETED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: trying to access" - " tablespace %lu page no. %lu,\n" - "InnoDB: but the tablespace does not exist" - " or is just being dropped.\n", - (ulong) space, (ulong) offset); - } - - /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(); - - return(count + count2); -} - -/************************************************************************ -Applies linear read-ahead if in the buf_pool the page is a border page of -a linear read-ahead area and all the pages in the area have been accessed. -Does not read any page if the read-ahead mechanism is not activated. Note -that the the algorithm looks at the 'natural' adjacent successor and -predecessor of the page, which on the leaf level of a B-tree are the next -and previous page in the chain of leaves. To know these, the page specified -in (space, offset) must already be present in the buf_pool. Thus, the -natural way to use this function is to call it when a page in the buf_pool -is accessed the first time, calling this function just after it has been -bufferfixed. -NOTE 1: as this function looks at the natural predecessor and successor -fields on the page, what happens, if these are not initialized to any -sensible value? No problem, before applying read-ahead we check that the -area to read is within the span of the space, if not, read-ahead is not -applied. An uninitialized value may result in a useless read operation, but -only very improbably. -NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this -function must be written such that it cannot end up waiting for these -latches! -NOTE 3: the calling thread must want access to the page given: this rule is -set to prevent unintended read-aheads performed by ibuf routines, a situation -which could result in a deadlock if the OS does not support asynchronous io. */ - -ulint -buf_read_ahead_linear( -/*==================*/ - /* out: number of page read requests issued */ - ulint space, /* in: space id */ - ulint offset) /* in: page number of a page; NOTE: the current thread - must want access to this page (see NOTE 3 above) */ -{ - ib_longlong tablespace_version; - buf_block_t* block; - buf_frame_t* frame; - buf_block_t* pred_block = NULL; - ulint pred_offset; - ulint succ_offset; - ulint count; - int asc_or_desc; - ulint new_offset; - ulint fail_count; - ulint ibuf_mode; - ulint low, high; - ulint err; - ulint i; - - if (srv_startup_is_before_trx_rollback_phase) { - /* No read-ahead to avoid thread deadlocks */ - return(0); - } - - if (ibuf_bitmap_page(offset) || trx_sys_hdr_page(space, offset)) { - - /* If it is an ibuf bitmap page or trx sys hdr, we do - no read-ahead, as that could break the ibuf page access - order */ - - return(0); - } - - low = (offset / BUF_READ_AHEAD_LINEAR_AREA) - * BUF_READ_AHEAD_LINEAR_AREA; - high = (offset / BUF_READ_AHEAD_LINEAR_AREA + 1) - * BUF_READ_AHEAD_LINEAR_AREA; - - if ((offset != low) && (offset != high - 1)) { - /* This is not a border page of the area: return */ - - return(0); - } - - /* Remember the tablespace version before we ask te tablespace size - below: if DISCARD + IMPORT changes the actual .ibd file meanwhile, we - do not try to read outside the bounds of the tablespace! */ - - tablespace_version = fil_space_get_version(space); - - mutex_enter(&(buf_pool->mutex)); - - if (high > fil_space_get_size(space)) { - mutex_exit(&(buf_pool->mutex)); - /* The area is not whole, return */ - - return(0); - } - - if (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - mutex_exit(&(buf_pool->mutex)); - - return(0); - } - - /* Check that almost all pages in the area have been accessed; if - offset == low, the accesses must be in a descending order, otherwise, - in an ascending order. */ - - asc_or_desc = 1; - - if (offset == low) { - asc_or_desc = -1; - } - - fail_count = 0; - - for (i = low; i < high; i++) { - block = buf_page_hash_get(space, i); - - if ((block == NULL) || !block->accessed) { - /* Not accessed */ - fail_count++; - - } else if (pred_block - && (ut_ulint_cmp(block->LRU_position, - pred_block->LRU_position) - != asc_or_desc)) { - /* Accesses not in the right order */ - - fail_count++; - pred_block = block; - } - } - - if (fail_count > BUF_READ_AHEAD_LINEAR_AREA - - BUF_READ_AHEAD_LINEAR_THRESHOLD) { - /* Too many failures: return */ - - mutex_exit(&(buf_pool->mutex)); - - return(0); - } - - /* If we got this far, we know that enough pages in the area have - been accessed in the right order: linear read-ahead can be sensible */ - - block = buf_page_hash_get(space, offset); - - if (block == NULL) { - mutex_exit(&(buf_pool->mutex)); - - return(0); - } - - frame = block->frame; - - /* Read the natural predecessor and successor page addresses from - the page; NOTE that because the calling thread may have an x-latch - on the page, we do not acquire an s-latch on the page, this is to - prevent deadlocks. Even if we read values which are nonsense, the - algorithm will work. */ - - pred_offset = fil_page_get_prev(frame); - succ_offset = fil_page_get_next(frame); - - mutex_exit(&(buf_pool->mutex)); - - if ((offset == low) && (succ_offset == offset + 1)) { - - /* This is ok, we can continue */ - new_offset = pred_offset; - - } else if ((offset == high - 1) && (pred_offset == offset - 1)) { - - /* This is ok, we can continue */ - new_offset = succ_offset; - } else { - /* Successor or predecessor not in the right order */ - - return(0); - } - - low = (new_offset / BUF_READ_AHEAD_LINEAR_AREA) - * BUF_READ_AHEAD_LINEAR_AREA; - high = (new_offset / BUF_READ_AHEAD_LINEAR_AREA + 1) - * BUF_READ_AHEAD_LINEAR_AREA; - - if ((new_offset != low) && (new_offset != high - 1)) { - /* This is not a border page of the area: return */ - - return(0); - } - - if (high > fil_space_get_size(space)) { - /* The area is not whole, return */ - - return(0); - } - - /* If we got this far, read-ahead can be sensible: do it */ - - if (ibuf_inside()) { - ibuf_mode = BUF_READ_IBUF_PAGES_ONLY; - } else { - ibuf_mode = BUF_READ_ANY_PAGE; - } - - count = 0; - - /* Since Windows XP seems to schedule the i/o handler thread - very eagerly, and consequently it does not wait for the - full read batch to be posted, we use special heuristics here */ - - os_aio_simulated_put_read_threads_to_sleep(); - - for (i = low; i < high; i++) { - /* It is only sensible to do read-ahead in the non-sync - aio mode: hence FALSE as the first parameter */ - - if (!ibuf_bitmap_page(i)) { - count += buf_read_page_low( - &err, FALSE, - ibuf_mode | OS_AIO_SIMULATED_WAKE_LATER, - space, tablespace_version, i); - if (err == DB_TABLESPACE_DELETED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: in" - " linear readahead trying to access\n" - "InnoDB: tablespace %lu page %lu,\n" - "InnoDB: but the tablespace does not" - " exist or is just being dropped.\n", - (ulong) space, (ulong) i); - } - } - } - - /* In simulated aio we wake the aio handler threads only after - queuing all aio requests, in native aio the following call does - nothing: */ - - os_aio_simulated_wake_handler_threads(); - - /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints && (count > 0)) { - fprintf(stderr, - "LINEAR read-ahead space %lu offset %lu pages %lu\n", - (ulong) space, (ulong) offset, (ulong) count); - } -#endif /* UNIV_DEBUG */ - - ++srv_read_ahead_seq; - return(count); -} - -/************************************************************************ -Issues read requests for pages which the ibuf module wants to read in, in -order to contract the insert buffer tree. Technically, this function is like -a read-ahead function. */ - -void -buf_read_ibuf_merge_pages( -/*======================*/ - ibool sync, /* in: TRUE if the caller wants this function - to wait for the highest address page to get - read in, before this function returns */ - ulint* space_ids, /* in: array of space ids */ - ib_longlong* space_versions,/* in: the spaces must have this version - number (timestamp), otherwise we discard the - read; we use this to cancel reads if - DISCARD + IMPORT may have changed the - tablespace size */ - ulint* page_nos, /* in: array of page numbers to read, with the - highest page number the last in the array */ - ulint n_stored) /* in: number of page numbers in the array */ -{ - ulint err; - ulint i; - - ut_ad(!ibuf_inside()); -#ifdef UNIV_IBUF_DEBUG - ut_a(n_stored < UNIV_PAGE_SIZE); -#endif - while (buf_pool->n_pend_reads - > buf_pool->curr_size / BUF_READ_AHEAD_PEND_LIMIT) { - os_thread_sleep(500000); - } - - for (i = 0; i < n_stored; i++) { - buf_read_page_low(&err, - (i + 1 == n_stored) && sync, - BUF_READ_ANY_PAGE, - space_ids[i], space_versions[i], - page_nos[i]); - - if (err == DB_TABLESPACE_DELETED) { - /* We have deleted or are deleting the single-table - tablespace: remove the entries for that page */ - - ibuf_merge_or_delete_for_page(NULL, space_ids[i], - page_nos[i], FALSE); - } - } - - os_aio_simulated_wake_handler_threads(); - - /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Ibuf merge read-ahead space %lu pages %lu\n", - (ulong) space_ids[0], (ulong) n_stored); - } -#endif /* UNIV_DEBUG */ -} - -/************************************************************************ -Issues read requests for pages which recovery wants to read in. */ - -void -buf_read_recv_pages( -/*================*/ - ibool sync, /* in: TRUE if the caller wants this function - to wait for the highest address page to get - read in, before this function returns */ - ulint space, /* in: space id */ - ulint* page_nos, /* in: array of page numbers to read, with the - highest page number the last in the array */ - ulint n_stored) /* in: number of page numbers in the array */ -{ - ib_longlong tablespace_version; - ulint count; - ulint err; - ulint i; - - tablespace_version = fil_space_get_version(space); - - for (i = 0; i < n_stored; i++) { - - count = 0; - - os_aio_print_debug = FALSE; - - while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) { - - os_aio_simulated_wake_handler_threads(); - os_thread_sleep(500000); - - count++; - - if (count > 100) { - fprintf(stderr, - "InnoDB: Error: InnoDB has waited for" - " 50 seconds for pending\n" - "InnoDB: reads to the buffer pool to" - " be finished.\n" - "InnoDB: Number of pending reads %lu," - " pending pread calls %lu\n", - (ulong) buf_pool->n_pend_reads, - (ulong)os_file_n_pending_preads); - - os_aio_print_debug = TRUE; - } - } - - os_aio_print_debug = FALSE; - - if ((i + 1 == n_stored) && sync) { - buf_read_page_low(&err, TRUE, BUF_READ_ANY_PAGE, - space, tablespace_version, - page_nos[i]); - } else { - buf_read_page_low(&err, FALSE, BUF_READ_ANY_PAGE - | OS_AIO_SIMULATED_WAKE_LATER, - space, tablespace_version, - page_nos[i]); - } - } - - os_aio_simulated_wake_handler_threads(); - - /* Flush pages from the end of the LRU list if necessary */ - buf_flush_free_margin(); - -#ifdef UNIV_DEBUG - if (buf_debug_prints) { - fprintf(stderr, - "Recovery applies read-ahead pages %lu\n", - (ulong) n_stored); - } -#endif /* UNIV_DEBUG */ -} diff --git a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c deleted file mode 100644 index 0f03de4ca9d..00000000000 --- a/storage/innobase/data/data0data.c +++ /dev/null @@ -1,681 +0,0 @@ -/************************************************************************ -SQL data field and tuple - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "data0data.h" - -#ifdef UNIV_NONINL -#include "data0data.ic" -#endif - -#include "rem0rec.h" -#include "rem0cmp.h" -#include "page0page.h" -#include "dict0dict.h" -#include "btr0cur.h" - -#include <ctype.h> - -#ifdef UNIV_DEBUG -byte data_error; /* data pointers of tuple fields are initialized - to point here for error checking */ - -ulint data_dummy; /* this is used to fool the compiler in - dtuple_validate */ -#endif /* UNIV_DEBUG */ - -/* Some non-inlined functions used in the MySQL interface: */ -void -dfield_set_data_noninline( - dfield_t* field, /* in: field */ - void* data, /* in: data */ - ulint len) /* in: length or UNIV_SQL_NULL */ -{ - dfield_set_data(field, data, len); -} -void* -dfield_get_data_noninline( - dfield_t* field) /* in: field */ -{ - return(dfield_get_data(field)); -} -ulint -dfield_get_len_noninline( - dfield_t* field) /* in: field */ -{ - return(dfield_get_len(field)); -} -ulint -dtuple_get_n_fields_noninline( - dtuple_t* tuple) /* in: tuple */ -{ - return(dtuple_get_n_fields(tuple)); -} -dfield_t* -dtuple_get_nth_field_noninline( - dtuple_t* tuple, /* in: tuple */ - ulint n) /* in: index of field */ -{ - return(dtuple_get_nth_field(tuple, n)); -} - -/************************************************************************* -Tests if dfield data length and content is equal to the given. */ - -ibool -dfield_data_is_binary_equal( -/*========================*/ - /* out: TRUE if equal */ - dfield_t* field, /* in: field */ - ulint len, /* in: data length or UNIV_SQL_NULL */ - byte* data) /* in: data */ -{ - if (len != field->len) { - - return(FALSE); - } - - if (len == UNIV_SQL_NULL) { - - return(TRUE); - } - - if (0 != ut_memcmp(field->data, data, len)) { - - return(FALSE); - } - - return(TRUE); -} - -/**************************************************************** -Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal when compared with collation in char fields (not as binary -strings). */ - -ibool -dtuple_datas_are_ordering_equal( -/*============================*/ - /* out: TRUE if length and fieds are equal - when compared with cmp_data_data: - NOTE: in character type fields some letters - are identified with others! (collation) */ - dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2) /* in: tuple 2 */ -{ - dfield_t* field1; - dfield_t* field2; - ulint n_fields; - ulint i; - - ut_ad(tuple1 && tuple2); - ut_ad(tuple1->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(tuple2->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple1)); - ut_ad(dtuple_check_typed(tuple2)); - - n_fields = dtuple_get_n_fields(tuple1); - - if (n_fields != dtuple_get_n_fields(tuple2)) { - - return(FALSE); - } - - for (i = 0; i < n_fields; i++) { - - field1 = dtuple_get_nth_field(tuple1, i); - field2 = dtuple_get_nth_field(tuple2, i); - - if (0 != cmp_dfield_dfield(field1, field2)) { - - return(FALSE); - } - } - - return(TRUE); -} - -/************************************************************************* -Creates a dtuple for use in MySQL. */ - -dtuple_t* -dtuple_create_for_mysql( -/*====================*/ - /* out, own created dtuple */ - void** heap, /* out: created memory heap */ - ulint n_fields) /* in: number of fields */ -{ - *heap = (void*)mem_heap_create(500); - - return(dtuple_create(*((mem_heap_t**)heap), n_fields)); -} - -/************************************************************************* -Frees a dtuple used in MySQL. */ - -void -dtuple_free_for_mysql( -/*==================*/ - void* heap) /* in: memory heap where tuple was created */ -{ - mem_heap_free((mem_heap_t*)heap); -} - -/************************************************************************* -Sets number of fields used in a tuple. Normally this is set in -dtuple_create, but if you want later to set it smaller, you can use this. */ - -void -dtuple_set_n_fields( -/*================*/ - dtuple_t* tuple, /* in: tuple */ - ulint n_fields) /* in: number of fields */ -{ - ut_ad(tuple); - - tuple->n_fields = n_fields; - tuple->n_fields_cmp = n_fields; -} - -/************************************************************** -Checks that a data field is typed. */ -static -ibool -dfield_check_typed_no_assert( -/*=========================*/ - /* out: TRUE if ok */ - dfield_t* field) /* in: data field */ -{ - if (dfield_get_type(field)->mtype > DATA_MYSQL - || dfield_get_type(field)->mtype < DATA_VARCHAR) { - - fprintf(stderr, - "InnoDB: Error: data field type %lu, len %lu\n", - (ulong) dfield_get_type(field)->mtype, - (ulong) dfield_get_len(field)); - return(FALSE); - } - - return(TRUE); -} - -/************************************************************** -Checks that a data tuple is typed. */ - -ibool -dtuple_check_typed_no_assert( -/*=========================*/ - /* out: TRUE if ok */ - dtuple_t* tuple) /* in: tuple */ -{ - dfield_t* field; - ulint i; - - if (dtuple_get_n_fields(tuple) > REC_MAX_N_FIELDS) { - fprintf(stderr, - "InnoDB: Error: index entry has %lu fields\n", - (ulong) dtuple_get_n_fields(tuple)); -dump: - fputs("InnoDB: Tuple contents: ", stderr); - dtuple_print(stderr, tuple); - putc('\n', stderr); - - return(FALSE); - } - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - field = dtuple_get_nth_field(tuple, i); - - if (!dfield_check_typed_no_assert(field)) { - goto dump; - } - } - - return(TRUE); -} - -/************************************************************** -Checks that a data field is typed. Asserts an error if not. */ - -ibool -dfield_check_typed( -/*===============*/ - /* out: TRUE if ok */ - dfield_t* field) /* in: data field */ -{ - if (dfield_get_type(field)->mtype > DATA_MYSQL - || dfield_get_type(field)->mtype < DATA_VARCHAR) { - - fprintf(stderr, - "InnoDB: Error: data field type %lu, len %lu\n", - (ulong) dfield_get_type(field)->mtype, - (ulong) dfield_get_len(field)); - - ut_error; - } - - return(TRUE); -} - -/************************************************************** -Checks that a data tuple is typed. Asserts an error if not. */ - -ibool -dtuple_check_typed( -/*===============*/ - /* out: TRUE if ok */ - dtuple_t* tuple) /* in: tuple */ -{ - dfield_t* field; - ulint i; - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - field = dtuple_get_nth_field(tuple, i); - - ut_a(dfield_check_typed(field)); - } - - return(TRUE); -} - -#ifdef UNIV_DEBUG -/************************************************************** -Validates the consistency of a tuple which must be complete, i.e, -all fields must have been set. */ - -ibool -dtuple_validate( -/*============*/ - /* out: TRUE if ok */ - dtuple_t* tuple) /* in: tuple */ -{ - dfield_t* field; - byte* data; - ulint n_fields; - ulint len; - ulint i; - ulint j; - - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - - n_fields = dtuple_get_n_fields(tuple); - - /* We dereference all the data of each field to test - for memory traps */ - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(tuple, i); - len = dfield_get_len(field); - - if (len != UNIV_SQL_NULL) { - - data = field->data; - - for (j = 0; j < len; j++) { - - data_dummy += *data; /* fool the compiler not - to optimize out this - code */ - data++; - } - } - } - - ut_a(dtuple_check_typed(tuple)); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/***************************************************************** -Pretty prints a dfield value according to its data type. */ - -void -dfield_print( -/*=========*/ - dfield_t* dfield) /* in: dfield */ -{ - byte* data; - ulint len; - ulint mtype; - ulint i; - - len = dfield_get_len(dfield); - data = dfield_get_data(dfield); - - if (len == UNIV_SQL_NULL) { - fputs("NULL", stderr); - - return; - } - - mtype = dtype_get_mtype(dfield_get_type(dfield)); - - if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { - - for (i = 0; i < len; i++) { - int c = *data++; - putc(isprint(c) ? c : ' ', stderr); - } - } else if (mtype == DATA_INT) { - ut_a(len == 4); /* only works for 32-bit integers */ - fprintf(stderr, "%d", (int)mach_read_from_4(data)); - } else { - ut_error; - } -} - -/***************************************************************** -Pretty prints a dfield value according to its data type. Also the hex string -is printed if a string contains non-printable characters. */ - -void -dfield_print_also_hex( -/*==================*/ - dfield_t* dfield) /* in: dfield */ -{ - byte* data; - ulint len; - ulint mtype; - ulint i; - ibool print_also_hex; - - len = dfield_get_len(dfield); - data = dfield_get_data(dfield); - - if (len == UNIV_SQL_NULL) { - fputs("NULL", stderr); - - return; - } - - mtype = dtype_get_mtype(dfield_get_type(dfield)); - - if ((mtype == DATA_CHAR) || (mtype == DATA_VARCHAR)) { - - print_also_hex = FALSE; - - for (i = 0; i < len; i++) { - int c = *data++; - if (!isprint(c)) { - print_also_hex = TRUE; - c = ' '; - } - putc(c, stderr); - } - - if (!print_also_hex) { - - return; - } - - fputs(" Hex: ", stderr); - - data = dfield_get_data(dfield); - - for (i = 0; i < len; i++) { - fprintf(stderr, "%02lx", (ulint)*data); - - data++; - } - } else if (mtype == DATA_INT) { - ut_a(len == 4); /* only works for 32-bit integers */ - fprintf(stderr, "%d", (int)mach_read_from_4(data)); - } else { - ut_error; - } -} - -/***************************************************************** -Print a dfield value using ut_print_buf. */ -static -void -dfield_print_raw( -/*=============*/ - FILE* f, /* in: output stream */ - dfield_t* dfield) /* in: dfield */ -{ - ulint len = dfield->len; - if (len != UNIV_SQL_NULL) { - ulint print_len = ut_min(len, 1000); - ut_print_buf(f, dfield->data, print_len); - if (len != print_len) { - fprintf(f, "(total %lu bytes)", (ulong) len); - } - } else { - fputs(" SQL NULL", f); - } -} - -/************************************************************** -The following function prints the contents of a tuple. */ - -void -dtuple_print( -/*=========*/ - FILE* f, /* in: output stream */ - dtuple_t* tuple) /* in: tuple */ -{ - ulint n_fields; - ulint i; - - n_fields = dtuple_get_n_fields(tuple); - - fprintf(f, "DATA TUPLE: %lu fields;\n", (ulong) n_fields); - - for (i = 0; i < n_fields; i++) { - fprintf(f, " %lu:", (ulong) i); - - dfield_print_raw(f, dtuple_get_nth_field(tuple, i)); - - putc(';', f); - } - - putc('\n', f); - ut_ad(dtuple_validate(tuple)); -} - -/****************************************************************** -Moves parts of long fields in entry to the big record vector so that -the size of tuple drops below the maximum record size allowed in the -database. Moves data only from those fields which are not necessary -to determine uniquely the insertion place of the tuple in the index. */ - -big_rec_t* -dtuple_convert_big_rec( -/*===================*/ - /* out, own: created big record vector, - NULL if we are not able to shorten - the entry enough, i.e., if there are - too many short fields in entry */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint* ext_vec,/* in: array of externally stored fields, - or NULL: if a field already is externally - stored, then we cannot move it to the vector - this function returns */ - ulint n_ext_vec)/* in: number of elements is ext_vec */ -{ - mem_heap_t* heap; - big_rec_t* vector; - dfield_t* dfield; - ulint size; - ulint n_fields; - ulint longest; - ulint longest_i = ULINT_MAX; - ibool is_externally_stored; - ulint i; - ulint j; - - ut_a(dtuple_check_typed_no_assert(entry)); - - size = rec_get_converted_size(index, entry); - - if (UNIV_UNLIKELY(size > 1000000000)) { - fprintf(stderr, - "InnoDB: Warning: tuple size very big: %lu\n", - (ulong) size); - fputs("InnoDB: Tuple contents: ", stderr); - dtuple_print(stderr, entry); - putc('\n', stderr); - } - - heap = mem_heap_create(size + dtuple_get_n_fields(entry) - * sizeof(big_rec_field_t) + 1000); - - vector = mem_heap_alloc(heap, sizeof(big_rec_t)); - - vector->heap = heap; - vector->fields = mem_heap_alloc(heap, dtuple_get_n_fields(entry) - * sizeof(big_rec_field_t)); - - /* Decide which fields to shorten: the algorithm is to look for - the longest field whose type is DATA_BLOB */ - - n_fields = 0; - - while (rec_get_converted_size(index, entry) - >= ut_min(page_get_free_space_of_empty( - dict_table_is_comp(index->table)) / 2, - REC_MAX_DATA_SIZE)) { - - longest = 0; - for (i = dict_index_get_n_unique_in_tree(index); - i < dtuple_get_n_fields(entry); i++) { - - /* Skip over fields which already are externally - stored */ - - is_externally_stored = FALSE; - - if (ext_vec) { - for (j = 0; j < n_ext_vec; j++) { - if (ext_vec[j] == i) { - is_externally_stored = TRUE; - } - } - } - - if (!is_externally_stored) { - - dfield = dtuple_get_nth_field(entry, i); - - if (dfield->len != UNIV_SQL_NULL - && dfield->len > longest) { - - longest = dfield->len; - - longest_i = i; - } - } - } - - /* We do not store externally fields which are smaller than - DICT_MAX_INDEX_COL_LEN */ - -#if DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT -# error "DICT_MAX_INDEX_COL_LEN <= REC_1BYTE_OFFS_LIMIT" -#endif - - if (longest < BTR_EXTERN_FIELD_REF_SIZE + 10 - + DICT_MAX_INDEX_COL_LEN) { - /* Cannot shorten more */ - - mem_heap_free(heap); - - return(NULL); - } - - /* Move data from field longest_i to big rec vector; - we do not let data size of the remaining entry - drop below 128 which is the limit for the 2-byte - offset storage format in a physical record. This - we accomplish by storing 128 bytes of data in entry - itself, and only the remaining part to big rec vec. - - We store the first bytes locally to the record. Then - we can calculate all ordering fields in all indexes - from locally stored data. */ - - dfield = dtuple_get_nth_field(entry, longest_i); - vector->fields[n_fields].field_no = longest_i; - - ut_a(dfield->len > DICT_MAX_INDEX_COL_LEN); - - vector->fields[n_fields].len = dfield->len - - DICT_MAX_INDEX_COL_LEN; - - vector->fields[n_fields].data = mem_heap_alloc( - heap, vector->fields[n_fields].len); - - /* Copy data (from the end of field) to big rec vector */ - - ut_memcpy(vector->fields[n_fields].data, - ((byte*)dfield->data) + dfield->len - - vector->fields[n_fields].len, - vector->fields[n_fields].len); - dfield->len = dfield->len - vector->fields[n_fields].len - + BTR_EXTERN_FIELD_REF_SIZE; - - /* Set the extern field reference in dfield to zero */ - memset(((byte*)dfield->data) - + dfield->len - BTR_EXTERN_FIELD_REF_SIZE, - 0, BTR_EXTERN_FIELD_REF_SIZE); - n_fields++; - } - - vector->n_fields = n_fields; - return(vector); -} - -/****************************************************************** -Puts back to entry the data stored in vector. Note that to ensure the -fields in entry can accommodate the data, vector must have been created -from entry with dtuple_convert_big_rec. */ - -void -dtuple_convert_back_big_rec( -/*========================*/ - dict_index_t* index __attribute__((unused)), /* in: index */ - dtuple_t* entry, /* in: entry whose data was put to vector */ - big_rec_t* vector) /* in, own: big rec vector; it is - freed in this function */ -{ - dfield_t* dfield; - ulint i; - - for (i = 0; i < vector->n_fields; i++) { - - dfield = dtuple_get_nth_field(entry, - vector->fields[i].field_no); - /* Copy data from big rec vector */ - - ut_memcpy(((byte*)dfield->data) - + dfield->len - BTR_EXTERN_FIELD_REF_SIZE, - vector->fields[i].data, - vector->fields[i].len); - dfield->len = dfield->len + vector->fields[i].len - - BTR_EXTERN_FIELD_REF_SIZE; - } - - mem_heap_free(vector->heap); -} - -/****************************************************************** -Frees the memory in a big rec vector. */ - -void -dtuple_big_rec_free( -/*================*/ - big_rec_t* vector) /* in, own: big rec vector; it is - freed in this function */ -{ - mem_heap_free(vector->heap); -} diff --git a/storage/innobase/data/data0type.c b/storage/innobase/data/data0type.c deleted file mode 100644 index 305000d7c0a..00000000000 --- a/storage/innobase/data/data0type.c +++ /dev/null @@ -1,295 +0,0 @@ -/****************************************************** -Data types - -(c) 1996 Innobase Oy - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#include "data0type.h" - -#ifdef UNIV_NONINL -#include "data0type.ic" -#endif - -/********************************************************************** -This function is used to find the storage length in bytes of the first n -characters for prefix indexes using a multibyte character set. The function -finds charset information and returns length of prefix_len characters in the -index field in bytes. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ - -ulint -innobase_get_at_most_n_mbchars( -/*===========================*/ - /* out: number of bytes occupied by the first - n characters */ - ulint charset_id, /* in: character set id */ - ulint prefix_len, /* in: prefix length in bytes of the index - (this has to be divided by mbmaxlen to get the - number of CHARACTERS n in the prefix) */ - ulint data_len, /* in: length of the string in bytes */ - const char* str); /* in: character string */ - -/* At the database startup we store the default-charset collation number of -this MySQL installation to this global variable. If we have < 4.1.2 format -column definitions, or records in the insert buffer, we use this -charset-collation code for them. */ - -ulint data_mysql_default_charset_coll = 99999999; - -/************************************************************************* -Determine how many bytes the first n characters of the given string occupy. -If the string is shorter than n characters, returns the number of bytes -the characters in the string occupy. */ - -ulint -dtype_get_at_most_n_mbchars( -/*========================*/ - /* out: length of the prefix, - in bytes */ - ulint prtype, /* in: precise type */ - ulint mbminlen, /* in: minimum length of a - multi-byte character */ - ulint mbmaxlen, /* in: maximum length of a - multi-byte character */ - ulint prefix_len, /* in: length of the requested - prefix, in characters, multiplied by - dtype_get_mbmaxlen(dtype) */ - ulint data_len, /* in: length of str (in bytes) */ - const char* str) /* in: the string whose prefix - length is being determined */ -{ -#ifndef UNIV_HOTBACKUP - ut_a(data_len != UNIV_SQL_NULL); - ut_ad(!mbmaxlen || !(prefix_len % mbmaxlen)); - - if (mbminlen != mbmaxlen) { - ut_a(!(prefix_len % mbmaxlen)); - return(innobase_get_at_most_n_mbchars( - dtype_get_charset_coll(prtype), - prefix_len, data_len, str)); - } - - if (prefix_len < data_len) { - - return(prefix_len); - - } - - return(data_len); -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* UNIV_HOTBACKUP */ -} - -/************************************************************************* -Checks if a data main type is a string type. Also a BLOB is considered a -string type. */ - -ibool -dtype_is_string_type( -/*=================*/ - /* out: TRUE if string type */ - ulint mtype) /* in: InnoDB main data type code: DATA_CHAR, ... */ -{ - if (mtype <= DATA_BLOB - || mtype == DATA_MYSQL - || mtype == DATA_VARMYSQL) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Checks if a type is a binary string type. Note that for tables created with -< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For -those DATA_BLOB columns this function currently returns FALSE. */ - -ibool -dtype_is_binary_string_type( -/*========================*/ - /* out: TRUE if binary string type */ - ulint mtype, /* in: main data type */ - ulint prtype) /* in: precise type */ -{ - if ((mtype == DATA_FIXBINARY) - || (mtype == DATA_BINARY) - || (mtype == DATA_BLOB && (prtype & DATA_BINARY_TYPE))) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Checks if a type is a non-binary string type. That is, dtype_is_string_type is -TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created -with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. -For those DATA_BLOB columns this function currently returns TRUE. */ - -ibool -dtype_is_non_binary_string_type( -/*============================*/ - /* out: TRUE if non-binary string type */ - ulint mtype, /* in: main data type */ - ulint prtype) /* in: precise type */ -{ - if (dtype_is_string_type(mtype) == TRUE - && dtype_is_binary_string_type(mtype, prtype) == FALSE) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Gets the MySQL charset-collation code for MySQL string types. */ - -ulint -dtype_get_charset_coll_noninline( -/*=============================*/ - ulint prtype) /* in: precise data type */ -{ - return(dtype_get_charset_coll(prtype)); -} - -/************************************************************************* -Forms a precise type from the < 4.1.2 format precise type plus the -charset-collation code. */ - -ulint -dtype_form_prtype( -/*==============*/ - ulint old_prtype, /* in: the MySQL type code and the flags - DATA_BINARY_TYPE etc. */ - ulint charset_coll) /* in: MySQL charset-collation code */ -{ - ut_a(old_prtype < 256 * 256); - ut_a(charset_coll < 256); - - return(old_prtype + (charset_coll << 16)); -} - -/************************************************************************* -Validates a data type structure. */ - -ibool -dtype_validate( -/*===========*/ - /* out: TRUE if ok */ - dtype_t* type) /* in: type struct to validate */ -{ - ut_a(type); - ut_a(type->mtype >= DATA_VARCHAR); - ut_a(type->mtype <= DATA_MYSQL); - - if (type->mtype == DATA_SYS) { - ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS); - } - - ut_a(type->mbminlen <= type->mbmaxlen); - - return(TRUE); -} - -/************************************************************************* -Prints a data type structure. */ - -void -dtype_print( -/*========*/ - dtype_t* type) /* in: type */ -{ - ulint mtype; - ulint prtype; - ulint len; - - ut_a(type); - - mtype = type->mtype; - prtype = type->prtype; - - switch (mtype) { - case DATA_VARCHAR: - fputs("DATA_VARCHAR", stderr); - break; - - case DATA_CHAR: - fputs("DATA_CHAR", stderr); - break; - - case DATA_BINARY: - fputs("DATA_BINARY", stderr); - break; - - case DATA_FIXBINARY: - fputs("DATA_FIXBINARY", stderr); - break; - - case DATA_BLOB: - fputs("DATA_BLOB", stderr); - break; - - case DATA_INT: - fputs("DATA_INT", stderr); - break; - - case DATA_MYSQL: - fputs("DATA_MYSQL", stderr); - break; - - case DATA_SYS: - fputs("DATA_SYS", stderr); - break; - - default: - fprintf(stderr, "type %lu", (ulong) mtype); - break; - } - - len = type->len; - - if ((type->mtype == DATA_SYS) - || (type->mtype == DATA_VARCHAR) - || (type->mtype == DATA_CHAR)) { - putc(' ', stderr); - if (prtype == DATA_ROW_ID) { - fputs("DATA_ROW_ID", stderr); - len = DATA_ROW_ID_LEN; - } else if (prtype == DATA_ROLL_PTR) { - fputs("DATA_ROLL_PTR", stderr); - len = DATA_ROLL_PTR_LEN; - } else if (prtype == DATA_TRX_ID) { - fputs("DATA_TRX_ID", stderr); - len = DATA_TRX_ID_LEN; - } else if (prtype == DATA_ENGLISH) { - fputs("DATA_ENGLISH", stderr); - } else { - fprintf(stderr, "prtype %lu", (ulong) prtype); - } - } else { - if (prtype & DATA_UNSIGNED) { - fputs(" DATA_UNSIGNED", stderr); - } - - if (prtype & DATA_BINARY_TYPE) { - fputs(" DATA_BINARY_TYPE", stderr); - } - - if (prtype & DATA_NOT_NULL) { - fputs(" DATA_NOT_NULL", stderr); - } - } - - fprintf(stderr, " len %lu", (ulong) len); -} diff --git a/storage/innobase/dict/dict0boot.c b/storage/innobase/dict/dict0boot.c deleted file mode 100644 index 5f9aaf71e18..00000000000 --- a/storage/innobase/dict/dict0boot.c +++ /dev/null @@ -1,425 +0,0 @@ -/****************************************************** -Data dictionary creation and booting - -(c) 1996 Innobase Oy - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0boot.h" - -#ifdef UNIV_NONINL -#include "dict0boot.ic" -#endif - -#include "dict0crea.h" -#include "btr0btr.h" -#include "dict0load.h" -#include "dict0load.h" -#include "trx0trx.h" -#include "srv0srv.h" -#include "ibuf0ibuf.h" -#include "buf0flu.h" -#include "log0recv.h" -#include "os0file.h" - -/************************************************************************** -Gets a pointer to the dictionary header and x-latches its page. */ - -dict_hdr_t* -dict_hdr_get( -/*=========*/ - /* out: pointer to the dictionary header, - page x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - dict_hdr_t* header; - - ut_ad(mtr); - - header = DICT_HDR + buf_page_get(DICT_HDR_SPACE, DICT_HDR_PAGE_NO, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(header, SYNC_DICT_HEADER); -#endif /* UNIV_SYNC_DEBUG */ - return(header); -} - -/************************************************************************** -Returns a new table, index, or tree id. */ - -dulint -dict_hdr_get_new_id( -/*================*/ - /* out: the new id */ - ulint type) /* in: DICT_HDR_ROW_ID, ... */ -{ - dict_hdr_t* dict_hdr; - dulint id; - mtr_t mtr; - - ut_ad((type == DICT_HDR_TABLE_ID) || (type == DICT_HDR_INDEX_ID)); - - mtr_start(&mtr); - - dict_hdr = dict_hdr_get(&mtr); - - id = mtr_read_dulint(dict_hdr + type, &mtr); - id = ut_dulint_add(id, 1); - - mlog_write_dulint(dict_hdr + type, id, &mtr); - - mtr_commit(&mtr); - - return(id); -} - -/************************************************************************** -Writes the current value of the row id counter to the dictionary header file -page. */ - -void -dict_hdr_flush_row_id(void) -/*=======================*/ -{ - dict_hdr_t* dict_hdr; - dulint id; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - id = dict_sys->row_id; - - mtr_start(&mtr); - - dict_hdr = dict_hdr_get(&mtr); - - mlog_write_dulint(dict_hdr + DICT_HDR_ROW_ID, id, &mtr); - - mtr_commit(&mtr); -} - -/********************************************************************* -Creates the file page for the dictionary header. This function is -called only at the database creation. */ -static -ibool -dict_hdr_create( -/*============*/ - /* out: TRUE if succeed */ - mtr_t* mtr) /* in: mtr */ -{ - dict_hdr_t* dict_header; - ulint hdr_page_no; - ulint root_page_no; - page_t* page; - - ut_ad(mtr); - - /* Create the dictionary header file block in a new, allocated file - segment in the system tablespace */ - page = fseg_create(DICT_HDR_SPACE, 0, - DICT_HDR + DICT_HDR_FSEG_HEADER, mtr); - - hdr_page_no = buf_frame_get_page_no(page); - - ut_a(DICT_HDR_PAGE_NO == hdr_page_no); - - dict_header = dict_hdr_get(mtr); - - /* Start counting row, table, index, and tree ids from - DICT_HDR_FIRST_ID */ - mlog_write_dulint(dict_header + DICT_HDR_ROW_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - - mlog_write_dulint(dict_header + DICT_HDR_TABLE_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - - mlog_write_dulint(dict_header + DICT_HDR_INDEX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - - /* Obsolete, but we must initialize it to 0 anyway. */ - mlog_write_dulint(dict_header + DICT_HDR_MIX_ID, - ut_dulint_create(0, DICT_HDR_FIRST_ID), mtr); - - /* Create the B-tree roots for the clustered indexes of the basic - system tables */ - - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_TABLES, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, - DICT_TABLE_IDS_ID, FALSE, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_TABLE_IDS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_COLUMNS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_INDEXES, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr); - if (root_page_no == FIL_NULL) { - - return(FALSE); - } - - mlog_write_ulint(dict_header + DICT_HDR_FIELDS, root_page_no, - MLOG_4BYTES, mtr); - /*--------------------------*/ - - return(TRUE); -} - -/********************************************************************* -Initializes the data dictionary memory structures when the database is -started. This function is also called when the data dictionary is created. */ - -void -dict_boot(void) -/*===========*/ -{ - dict_table_t* table; - dict_index_t* index; - dict_hdr_t* dict_hdr; - mem_heap_t* heap; - mtr_t mtr; - - mtr_start(&mtr); - - /* Create the hash tables etc. */ - dict_init(); - - heap = mem_heap_create(450); - - mutex_enter(&(dict_sys->mutex)); - - /* Get the dictionary header */ - dict_hdr = dict_hdr_get(&mtr); - - /* Because we only write new row ids to disk-based data structure - (dictionary header) when it is divisible by - DICT_HDR_ROW_ID_WRITE_MARGIN, in recovery we will not recover - the latest value of the row id counter. Therefore we advance - the counter at the database startup to avoid overlapping values. - Note that when a user after database startup first time asks for - a new row id, then because the counter is now divisible by - ..._MARGIN, it will immediately be updated to the disk-based - header. */ - - dict_sys->row_id = ut_dulint_add( - ut_dulint_align_up(mtr_read_dulint(dict_hdr + DICT_HDR_ROW_ID, - &mtr), - DICT_HDR_ROW_ID_WRITE_MARGIN), - DICT_HDR_ROW_ID_WRITE_MARGIN); - - /* Insert into the dictionary cache the descriptions of the basic - system tables */ - /*-------------------------*/ - table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, 0); - - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "N_COLS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); - - table->id = DICT_TABLES_ID; - - dict_table_add_to_cache(table, heap); - dict_sys->sys_tables = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_TABLES", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 1); - - dict_mem_index_add_field(index, "NAME", 0); - - index->id = DICT_TABLES_ID; - - dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr + DICT_HDR_TABLES, - MLOG_4BYTES, &mtr)); - - /*-------------------------*/ - index = dict_mem_index_create("SYS_TABLES", "ID_IND", - DICT_HDR_SPACE, DICT_UNIQUE, 1); - dict_mem_index_add_field(index, "ID", 0); - - index->id = DICT_TABLE_IDS_ID; - dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr + DICT_HDR_TABLE_IDS, - MLOG_4BYTES, &mtr)); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, 0); - - dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "MTYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PRTYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "LEN", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PREC", DATA_INT, 0, 4); - - table->id = DICT_COLUMNS_ID; - - dict_table_add_to_cache(table, heap); - dict_sys->sys_columns = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_COLUMNS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "TABLE_ID", 0); - dict_mem_index_add_field(index, "POS", 0); - - index->id = DICT_COLUMNS_ID; - dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr + DICT_HDR_COLUMNS, - MLOG_4BYTES, &mtr)); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, 0); - - dict_mem_table_add_col(table, heap, "TABLE_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "NAME", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "N_FIELDS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4); - - /* The '+ 2' below comes from the 2 system fields */ -#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2 -#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2" -#endif -#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2 -#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 5 + 2" -#endif -#if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2 -#error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2" -#endif - - table->id = DICT_INDEXES_ID; - dict_table_add_to_cache(table, heap); - dict_sys->sys_indexes = table; - mem_heap_empty(heap); - - index = dict_mem_index_create("SYS_INDEXES", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "TABLE_ID", 0); - dict_mem_index_add_field(index, "ID", 0); - - index->id = DICT_INDEXES_ID; - dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr + DICT_HDR_INDEXES, - MLOG_4BYTES, &mtr)); - - /*-------------------------*/ - table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, 0); - - dict_mem_table_add_col(table, heap, "INDEX_ID", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "POS", DATA_INT, 0, 4); - dict_mem_table_add_col(table, heap, "COL_NAME", DATA_BINARY, 0, 0); - - table->id = DICT_FIELDS_ID; - dict_table_add_to_cache(table, heap); - dict_sys->sys_fields = table; - mem_heap_free(heap); - - index = dict_mem_index_create("SYS_FIELDS", "CLUST_IND", - DICT_HDR_SPACE, - DICT_UNIQUE | DICT_CLUSTERED, 2); - - dict_mem_index_add_field(index, "INDEX_ID", 0); - dict_mem_index_add_field(index, "POS", 0); - - index->id = DICT_FIELDS_ID; - dict_index_add_to_cache(table, index, - mtr_read_ulint(dict_hdr + DICT_HDR_FIELDS, - MLOG_4BYTES, &mtr)); - - mtr_commit(&mtr); - /*-------------------------*/ - - /* Initialize the insert buffer table and index for each tablespace */ - - ibuf_init_at_db_start(); - - /* Load definitions of other indexes on system tables */ - - dict_load_sys_table(dict_sys->sys_tables); - dict_load_sys_table(dict_sys->sys_columns); - dict_load_sys_table(dict_sys->sys_indexes); - dict_load_sys_table(dict_sys->sys_fields); - - mutex_exit(&(dict_sys->mutex)); -} - -/********************************************************************* -Inserts the basic system table data into themselves in the database -creation. */ -static -void -dict_insert_initial_data(void) -/*==========================*/ -{ - /* Does nothing yet */ -} - -/********************************************************************* -Creates and initializes the data dictionary at the database creation. */ - -void -dict_create(void) -/*=============*/ -{ - mtr_t mtr; - - mtr_start(&mtr); - - dict_hdr_create(&mtr); - - mtr_commit(&mtr); - - dict_boot(); - - dict_insert_initial_data(); -} diff --git a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c deleted file mode 100644 index 4116230347d..00000000000 --- a/storage/innobase/dict/dict0crea.c +++ /dev/null @@ -1,1450 +0,0 @@ -/****************************************************** -Database object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0crea.h" - -#ifdef UNIV_NONINL -#include "dict0crea.ic" -#endif - -#include "btr0pcur.h" -#include "btr0btr.h" -#include "page0page.h" -#include "mach0data.h" -#include "dict0boot.h" -#include "dict0dict.h" -#include "que0que.h" -#include "row0ins.h" -#include "row0mysql.h" -#include "pars0pars.h" -#include "trx0roll.h" -#include "usr0sess.h" -#include "ut0vec.h" - -/********************************************************************* -Based on a table object, this function builds the entry to be inserted -in the SYS_TABLES system table. */ -static -dtuple_t* -dict_create_sys_tables_tuple( -/*=========================*/ - /* out: the tuple which should be inserted */ - dict_table_t* table, /* in: table */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the built tuple is allocated */ -{ - dict_table_t* sys_tables; - dtuple_t* entry; - dfield_t* dfield; - byte* ptr; - - ut_ad(table && heap); - - sys_tables = dict_sys->sys_tables; - - entry = dtuple_create(heap, 8 + DATA_N_SYS_COLS); - - /* 0: NAME -----------------------------*/ - dfield = dtuple_get_nth_field(entry, 0); - - dfield_set_data(dfield, table->name, ut_strlen(table->name)); - /* 3: ID -------------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); - - ptr = mem_heap_alloc(heap, 8); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - /* 4: N_COLS ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); - -#if DICT_TF_COMPACT != 1 -#error -#endif - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, table->n_def - | ((table->flags & DICT_TF_COMPACT) << 31)); - dfield_set_data(dfield, ptr, 4); - /* 5: TYPE -----------------------------*/ - dfield = dtuple_get_nth_field(entry, 3); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, DICT_TABLE_ORDINARY); - - dfield_set_data(dfield, ptr, 4); - /* 6: MIX_ID (obsolete) ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); - - ptr = mem_heap_alloc(heap, 8); - memset(ptr, 0, 8); - - dfield_set_data(dfield, ptr, 8); - /* 7: MIX_LEN (obsolete) --------------------------*/ - - dfield = dtuple_get_nth_field(entry, 5); - - ptr = mem_heap_alloc(heap, 4); - memset(ptr, 0, 4); - - dfield_set_data(dfield, ptr, 4); - /* 8: CLUSTER_NAME ---------------------*/ - dfield = dtuple_get_nth_field(entry, 6); - dfield_set_data(dfield, NULL, UNIV_SQL_NULL); /* not supported */ - - /* 9: SPACE ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 7); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, table->space); - - dfield_set_data(dfield, ptr, 4); - /*----------------------------------*/ - - dict_table_copy_types(entry, sys_tables); - - return(entry); -} - -/********************************************************************* -Based on a table object, this function builds the entry to be inserted -in the SYS_COLUMNS system table. */ -static -dtuple_t* -dict_create_sys_columns_tuple( -/*==========================*/ - /* out: the tuple which should be inserted */ - dict_table_t* table, /* in: table */ - ulint i, /* in: column number */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the built tuple is allocated */ -{ - dict_table_t* sys_columns; - dtuple_t* entry; - const dict_col_t* column; - dfield_t* dfield; - byte* ptr; - const char* col_name; - - ut_ad(table && heap); - - column = dict_table_get_nth_col(table, i); - - sys_columns = dict_sys->sys_columns; - - entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); - - /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); - - ptr = mem_heap_alloc(heap, 8); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - /* 1: POS ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, i); - - dfield_set_data(dfield, ptr, 4); - /* 4: NAME ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); - - col_name = dict_table_get_col_name(table, i); - dfield_set_data(dfield, col_name, ut_strlen(col_name)); - /* 5: MTYPE --------------------------*/ - dfield = dtuple_get_nth_field(entry, 3); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, column->mtype); - - dfield_set_data(dfield, ptr, 4); - /* 6: PRTYPE -------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, column->prtype); - - dfield_set_data(dfield, ptr, 4); - /* 7: LEN ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 5); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, column->len); - - dfield_set_data(dfield, ptr, 4); - /* 8: PREC ---------------------------*/ - dfield = dtuple_get_nth_field(entry, 6); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, 0/* unused */); - - dfield_set_data(dfield, ptr, 4); - /*---------------------------------*/ - - dict_table_copy_types(entry, sys_columns); - - return(entry); -} - -/******************************************************************* -Builds a table definition to insert. */ -static -ulint -dict_build_table_def_step( -/*======================*/ - /* out: DB_SUCCESS or error code */ - que_thr_t* thr, /* in: query thread */ - tab_node_t* node) /* in: table create node */ -{ - dict_table_t* table; - dtuple_t* row; - ulint error; - const char* path_or_name; - ibool is_path; - mtr_t mtr; - ulint i; - ulint row_len; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = node->table; - - table->id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); - - thr_get_trx(thr)->table_id = table->id; - - row_len = 0; - for (i = 0; i < table->n_def; i++) { - row_len += dict_col_get_min_size(&table->cols[i]); - } - if (row_len > BTR_PAGE_MAX_REC_SIZE) { - return(DB_TOO_BIG_RECORD); - } - - if (srv_file_per_table) { - /* We create a new single-table tablespace for the table. - We initially let it be 4 pages: - - page 0 is the fsp header and an extent descriptor page, - - page 1 is an ibuf bitmap page, - - page 2 is the first inode page, - - page 3 will contain the root of the clustered index of the - table we create here. */ - - ulint space = 0; /* reset to zero for the call below */ - - if (table->dir_path_of_temp_table) { - /* We place tables created with CREATE TEMPORARY - TABLE in the tmp dir of mysqld server */ - - path_or_name = table->dir_path_of_temp_table; - is_path = TRUE; - } else { - path_or_name = table->name; - is_path = FALSE; - } - - error = fil_create_new_single_table_tablespace( - &space, path_or_name, is_path, - FIL_IBD_FILE_INITIAL_SIZE); - table->space = (unsigned int) space; - - if (error != DB_SUCCESS) { - - return(error); - } - - mtr_start(&mtr); - - fsp_header_init(table->space, FIL_IBD_FILE_INITIAL_SIZE, &mtr); - - mtr_commit(&mtr); - } - - row = dict_create_sys_tables_tuple(table, node->heap); - - ins_node_set_new_row(node->tab_def, row); - - return(DB_SUCCESS); -} - -/******************************************************************* -Builds a column definition to insert. */ -static -ulint -dict_build_col_def_step( -/*====================*/ - /* out: DB_SUCCESS */ - tab_node_t* node) /* in: table create node */ -{ - dtuple_t* row; - - row = dict_create_sys_columns_tuple(node->table, node->col_no, - node->heap); - ins_node_set_new_row(node->col_def, row); - - return(DB_SUCCESS); -} - -/********************************************************************* -Based on an index object, this function builds the entry to be inserted -in the SYS_INDEXES system table. */ -static -dtuple_t* -dict_create_sys_indexes_tuple( -/*==========================*/ - /* out: the tuple which should be inserted */ - dict_index_t* index, /* in: index */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the built tuple is allocated */ -{ - dict_table_t* sys_indexes; - dict_table_t* table; - dtuple_t* entry; - dfield_t* dfield; - byte* ptr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(index && heap); - - sys_indexes = dict_sys->sys_indexes; - - table = dict_table_get_low(index->table_name); - - entry = dtuple_create(heap, 7 + DATA_N_SYS_COLS); - - /* 0: TABLE_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); - - ptr = mem_heap_alloc(heap, 8); - mach_write_to_8(ptr, table->id); - - dfield_set_data(dfield, ptr, 8); - /* 1: ID ----------------------------*/ - dfield = dtuple_get_nth_field(entry, 1); - - ptr = mem_heap_alloc(heap, 8); - mach_write_to_8(ptr, index->id); - - dfield_set_data(dfield, ptr, 8); - /* 4: NAME --------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); - - dfield_set_data(dfield, index->name, ut_strlen(index->name)); - /* 5: N_FIELDS ----------------------*/ - dfield = dtuple_get_nth_field(entry, 3); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, index->n_fields); - - dfield_set_data(dfield, ptr, 4); - /* 6: TYPE --------------------------*/ - dfield = dtuple_get_nth_field(entry, 4); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, index->type); - - dfield_set_data(dfield, ptr, 4); - /* 7: SPACE --------------------------*/ - -#if DICT_SYS_INDEXES_SPACE_NO_FIELD != 7 -#error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7" -#endif - - dfield = dtuple_get_nth_field(entry, 5); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, index->space); - - dfield_set_data(dfield, ptr, 4); - /* 8: PAGE_NO --------------------------*/ - -#if DICT_SYS_INDEXES_PAGE_NO_FIELD != 8 -#error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8" -#endif - - dfield = dtuple_get_nth_field(entry, 6); - - ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, FIL_NULL); - - dfield_set_data(dfield, ptr, 4); - /*--------------------------------*/ - - dict_table_copy_types(entry, sys_indexes); - - return(entry); -} - -/********************************************************************* -Based on an index object, this function builds the entry to be inserted -in the SYS_FIELDS system table. */ -static -dtuple_t* -dict_create_sys_fields_tuple( -/*=========================*/ - /* out: the tuple which should be inserted */ - dict_index_t* index, /* in: index */ - ulint i, /* in: field number */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the built tuple is allocated */ -{ - dict_table_t* sys_fields; - dtuple_t* entry; - dict_field_t* field; - dfield_t* dfield; - byte* ptr; - ibool index_contains_column_prefix_field = FALSE; - ulint j; - - ut_ad(index && heap); - - for (j = 0; j < index->n_fields; j++) { - if (dict_index_get_nth_field(index, j)->prefix_len > 0) { - index_contains_column_prefix_field = TRUE; - } - } - - field = dict_index_get_nth_field(index, i); - - sys_fields = dict_sys->sys_fields; - - entry = dtuple_create(heap, 3 + DATA_N_SYS_COLS); - - /* 0: INDEX_ID -----------------------*/ - dfield = dtuple_get_nth_field(entry, 0); - - ptr = mem_heap_alloc(heap, 8); - mach_write_to_8(ptr, index->id); - - dfield_set_data(dfield, ptr, 8); - /* 1: POS + PREFIX LENGTH ----------------------------*/ - - dfield = dtuple_get_nth_field(entry, 1); - - ptr = mem_heap_alloc(heap, 4); - - if (index_contains_column_prefix_field) { - /* If there are column prefix fields in the index, then - we store the number of the field to the 2 HIGH bytes - and the prefix length to the 2 low bytes, */ - - mach_write_to_4(ptr, (i << 16) + field->prefix_len); - } else { - /* Else we store the number of the field to the 2 LOW bytes. - This is to keep the storage format compatible with - InnoDB versions < 4.0.14. */ - - mach_write_to_4(ptr, i); - } - - dfield_set_data(dfield, ptr, 4); - /* 4: COL_NAME -------------------------*/ - dfield = dtuple_get_nth_field(entry, 2); - - dfield_set_data(dfield, field->name, - ut_strlen(field->name)); - /*---------------------------------*/ - - dict_table_copy_types(entry, sys_fields); - - return(entry); -} - -/********************************************************************* -Creates the tuple with which the index entry is searched for writing the index -tree root page number, if such a tree is created. */ -static -dtuple_t* -dict_create_search_tuple( -/*=====================*/ - /* out: the tuple for search */ - dtuple_t* tuple, /* in: the tuple inserted in the SYS_INDEXES - table */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the built tuple is allocated */ -{ - dtuple_t* search_tuple; - dfield_t* field1; - dfield_t* field2; - - ut_ad(tuple && heap); - - search_tuple = dtuple_create(heap, 2); - - field1 = dtuple_get_nth_field(tuple, 0); - field2 = dtuple_get_nth_field(search_tuple, 0); - - dfield_copy(field2, field1); - - field1 = dtuple_get_nth_field(tuple, 1); - field2 = dtuple_get_nth_field(search_tuple, 1); - - dfield_copy(field2, field1); - - ut_ad(dtuple_validate(search_tuple)); - - return(search_tuple); -} - -/******************************************************************* -Builds an index definition row to insert. */ -static -ulint -dict_build_index_def_step( -/*======================*/ - /* out: DB_SUCCESS or error code */ - que_thr_t* thr, /* in: query thread */ - ind_node_t* node) /* in: index create node */ -{ - dict_table_t* table; - dict_index_t* index; - dtuple_t* row; - trx_t* trx; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - index = node->index; - - table = dict_table_get_low(index->table_name); - - if (table == NULL) { - return(DB_TABLE_NOT_FOUND); - } - - trx->table_id = table->id; - - node->table = table; - - ut_ad((UT_LIST_GET_LEN(table->indexes) > 0) - || (index->type & DICT_CLUSTERED)); - - index->id = dict_hdr_get_new_id(DICT_HDR_INDEX_ID); - - /* Inherit the space id from the table; we store all indexes of a - table in the same tablespace */ - - index->space = table->space; - node->page_no = FIL_NULL; - row = dict_create_sys_indexes_tuple(index, node->heap); - node->ind_row = row; - - ins_node_set_new_row(node->ind_def, row); - - return(DB_SUCCESS); -} - -/******************************************************************* -Builds a field definition row to insert. */ -static -ulint -dict_build_field_def_step( -/*======================*/ - /* out: DB_SUCCESS */ - ind_node_t* node) /* in: index create node */ -{ - dict_index_t* index; - dtuple_t* row; - - index = node->index; - - row = dict_create_sys_fields_tuple(index, node->field_no, node->heap); - - ins_node_set_new_row(node->field_def, row); - - return(DB_SUCCESS); -} - -/******************************************************************* -Creates an index tree for the index if it is not a member of a cluster. */ -static -ulint -dict_create_index_tree_step( -/*========================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - ind_node_t* node) /* in: index create node */ -{ - dict_index_t* index; - dict_table_t* sys_indexes; - dict_table_t* table; - dtuple_t* search_tuple; - btr_pcur_t pcur; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - index = node->index; - table = node->table; - - sys_indexes = dict_sys->sys_indexes; - - /* Run a mini-transaction in which the index tree is allocated for - the index and its root address is written to the index entry in - sys_indexes */ - - mtr_start(&mtr); - - search_tuple = dict_create_search_tuple(node->ind_row, node->heap); - - btr_pcur_open(UT_LIST_GET_FIRST(sys_indexes->indexes), - search_tuple, PAGE_CUR_L, BTR_MODIFY_LEAF, - &pcur, &mtr); - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - node->page_no = btr_create(index->type, index->space, index->id, - dict_table_is_comp(table), &mtr); - /* printf("Created a new index tree in space %lu root page %lu\n", - index->space, index->page_no); */ - - page_rec_write_index_page_no(btr_pcur_get_rec(&pcur), - DICT_SYS_INDEXES_PAGE_NO_FIELD, - node->page_no, &mtr); - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - if (node->page_no == FIL_NULL) { - - return(DB_OUT_OF_FILE_SPACE); - } - - return(DB_SUCCESS); -} - -/*********************************************************************** -Drops the index tree associated with a row in SYS_INDEXES table. */ - -void -dict_drop_index_tree( -/*=================*/ - rec_t* rec, /* in: record in the clustered index of SYS_INDEXES - table */ - mtr_t* mtr) /* in: mtr having the latch on the record page */ -{ - ulint root_page_no; - ulint space; - byte* ptr; - ulint len; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); - ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); - - ut_ad(len == 4); - - root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (root_page_no == FIL_NULL) { - /* The tree has already been freed */ - - return; - } - - ptr = rec_get_nth_field_old(rec, - DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); - - ut_ad(len == 4); - - space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (!fil_tablespace_exists_in_mem(space)) { - /* It is a single table tablespace and the .ibd file is - missing: do nothing */ - - return; - } - - /* We free all the pages but the root page first; this operation - may span several mini-transactions */ - - btr_free_but_not_root(space, root_page_no); - - /* Then we free the root page in the same mini-transaction where - we write FIL_NULL to the appropriate field in the SYS_INDEXES - record: this mini-transaction marks the B-tree totally freed */ - - /* printf("Dropping index tree in space %lu root page %lu\n", space, - root_page_no); */ - btr_free_root(space, root_page_no, mtr); - - page_rec_write_index_page_no(rec, - DICT_SYS_INDEXES_PAGE_NO_FIELD, - FIL_NULL, mtr); -} - -/*********************************************************************** -Truncates the index tree associated with a row in SYS_INDEXES table. */ - -ulint -dict_truncate_index_tree( -/*=====================*/ - /* out: new root page number, or - FIL_NULL on failure */ - dict_table_t* table, /* in: the table the index belongs to */ - btr_pcur_t* pcur, /* in/out: persistent cursor pointing to - record in the clustered index of - SYS_INDEXES table. The cursor may be - repositioned in this call. */ - mtr_t* mtr) /* in: mtr having the latch - on the record page. The mtr may be - committed and restarted in this call. */ -{ - ulint root_page_no; - ulint space; - ulint type; - dulint index_id; - rec_t* rec; - byte* ptr; - ulint len; - ulint comp; - dict_index_t* index; - - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(!dict_table_is_comp(dict_sys->sys_indexes)); - rec = btr_pcur_get_rec(pcur); - ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); - - ut_ad(len == 4); - - root_page_no = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (root_page_no == FIL_NULL) { - /* The tree has been freed. */ - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Trying to TRUNCATE" - " a missing index of table %s!\n", table->name); - return(FIL_NULL); - } - - ptr = rec_get_nth_field_old(rec, - DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); - - ut_ad(len == 4); - - space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); - - if (!fil_tablespace_exists_in_mem(space)) { - /* It is a single table tablespace and the .ibd file is - missing: do nothing */ - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Trying to TRUNCATE" - " a missing .ibd file of table %s!\n", table->name); - return(FIL_NULL); - } - - ptr = rec_get_nth_field_old(rec, - DICT_SYS_INDEXES_TYPE_FIELD, &len); - ut_ad(len == 4); - type = mach_read_from_4(ptr); - - ptr = rec_get_nth_field_old(rec, 1, &len); - ut_ad(len == 8); - index_id = mach_read_from_8(ptr); - - /* We free all the pages but the root page first; this operation - may span several mini-transactions */ - - btr_free_but_not_root(space, root_page_no); - - /* Then we free the root page in the same mini-transaction where - we create the b-tree and write its new root page number to the - appropriate field in the SYS_INDEXES record: this mini-transaction - marks the B-tree totally truncated */ - - comp = page_is_comp(btr_page_get(space, root_page_no, RW_X_LATCH, - mtr)); - - btr_free_root(space, root_page_no, mtr); - /* We will temporarily write FIL_NULL to the PAGE_NO field - in SYS_INDEXES, so that the database will not get into an - inconsistent state in case it crashes between the mtr_commit() - below and the following mtr_commit() call. */ - page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, - FIL_NULL, mtr); - - /* We will need to commit the mini-transaction in order to avoid - deadlocks in the btr_create() call, because otherwise we would - be freeing and allocating pages in the same mini-transaction. */ - btr_pcur_store_position(pcur, mtr); - mtr_commit(mtr); - - mtr_start(mtr); - btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); - - /* Find the index corresponding to this SYS_INDEXES record. */ - for (index = UT_LIST_GET_FIRST(table->indexes); - index; - index = UT_LIST_GET_NEXT(indexes, index)) { - if (!ut_dulint_cmp(index->id, index_id)) { - break; - } - } - - root_page_no = btr_create(type, space, index_id, comp, mtr); - if (index) { - index->page = (unsigned int) root_page_no; - } else { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Index %lu %lu of table %s is missing\n" - "InnoDB: from the data dictionary during TRUNCATE!\n", - ut_dulint_get_high(index_id), - ut_dulint_get_low(index_id), - table->name); - } - - return(root_page_no); -} - -/************************************************************************* -Creates a table create graph. */ - -tab_node_t* -tab_create_graph_create( -/*====================*/ - /* out, own: table create node */ - dict_table_t* table, /* in: table to create, built as a memory data - structure */ - mem_heap_t* heap) /* in: heap where created */ -{ - tab_node_t* node; - - node = mem_heap_alloc(heap, sizeof(tab_node_t)); - - node->common.type = QUE_NODE_CREATE_TABLE; - - node->table = table; - - node->state = TABLE_BUILD_TABLE_DEF; - node->heap = mem_heap_create(256); - - node->tab_def = ins_node_create(INS_DIRECT, dict_sys->sys_tables, - heap); - node->tab_def->common.parent = node; - - node->col_def = ins_node_create(INS_DIRECT, dict_sys->sys_columns, - heap); - node->col_def->common.parent = node; - - node->commit_node = commit_node_create(heap); - node->commit_node->common.parent = node; - - return(node); -} - -/************************************************************************* -Creates an index create graph. */ - -ind_node_t* -ind_create_graph_create( -/*====================*/ - /* out, own: index create node */ - dict_index_t* index, /* in: index to create, built as a memory data - structure */ - mem_heap_t* heap) /* in: heap where created */ -{ - ind_node_t* node; - - node = mem_heap_alloc(heap, sizeof(ind_node_t)); - - node->common.type = QUE_NODE_CREATE_INDEX; - - node->index = index; - - node->state = INDEX_BUILD_INDEX_DEF; - node->page_no = FIL_NULL; - node->heap = mem_heap_create(256); - - node->ind_def = ins_node_create(INS_DIRECT, - dict_sys->sys_indexes, heap); - node->ind_def->common.parent = node; - - node->field_def = ins_node_create(INS_DIRECT, - dict_sys->sys_fields, heap); - node->field_def->common.parent = node; - - node->commit_node = commit_node_create(heap); - node->commit_node->common.parent = node; - - return(node); -} - -/*************************************************************** -Creates a table. This is a high-level function used in SQL execution graphs. */ - -que_thr_t* -dict_create_table_step( -/*===================*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - tab_node_t* node; - ulint err = DB_ERROR; - trx_t* trx; - - ut_ad(thr); - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_TABLE); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = TABLE_BUILD_TABLE_DEF; - } - - if (node->state == TABLE_BUILD_TABLE_DEF) { - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = dict_build_table_def_step(thr, node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = TABLE_BUILD_COL_DEF; - node->col_no = 0; - - thr->run_node = node->tab_def; - - return(thr); - } - - if (node->state == TABLE_BUILD_COL_DEF) { - - if (node->col_no < (node->table)->n_def) { - - err = dict_build_col_def_step(node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->col_no++; - - thr->run_node = node->col_def; - - return(thr); - } else { - node->state = TABLE_COMMIT_WORK; - } - } - - if (node->state == TABLE_COMMIT_WORK) { - - /* Table was correctly defined: do NOT commit the transaction - (CREATE TABLE does NOT do an implicit commit of the current - transaction) */ - - node->state = TABLE_ADD_TO_CACHE; - - /* thr->run_node = node->commit_node; - - return(thr); */ - } - - if (node->state == TABLE_ADD_TO_CACHE) { - - dict_table_add_to_cache(node->table, node->heap); - - err = DB_SUCCESS; - } - -function_exit: - trx->error_state = err; - - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - - return(NULL); - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/*************************************************************** -Creates an index. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -dict_create_index_step( -/*===================*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - ind_node_t* node; - ulint err = DB_ERROR; - trx_t* trx; - - ut_ad(thr); - ut_ad(mutex_own(&(dict_sys->mutex))); - - trx = thr_get_trx(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_CREATE_INDEX); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = INDEX_BUILD_INDEX_DEF; - } - - if (node->state == INDEX_BUILD_INDEX_DEF) { - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - err = dict_build_index_def_step(thr, node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = INDEX_BUILD_FIELD_DEF; - node->field_no = 0; - - thr->run_node = node->ind_def; - - return(thr); - } - - if (node->state == INDEX_BUILD_FIELD_DEF) { - - if (node->field_no < (node->index)->n_fields) { - - err = dict_build_field_def_step(node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->field_no++; - - thr->run_node = node->field_def; - - return(thr); - } else { - node->state = INDEX_CREATE_INDEX_TREE; - } - } - - if (node->state == INDEX_CREATE_INDEX_TREE) { - - err = dict_create_index_tree_step(node); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->state = INDEX_COMMIT_WORK; - } - - if (node->state == INDEX_COMMIT_WORK) { - - /* Index was correctly defined: do NOT commit the transaction - (CREATE INDEX does NOT currently do an implicit commit of - the current transaction) */ - - node->state = INDEX_ADD_TO_CACHE; - - /* thr->run_node = node->commit_node; - - return(thr); */ - } - - if (node->state == INDEX_ADD_TO_CACHE) { - - dict_index_add_to_cache(node->table, node->index, - node->page_no); - - err = DB_SUCCESS; - } - -function_exit: - trx->error_state = err; - - if (err == DB_SUCCESS) { - /* Ok: do nothing */ - - } else if (err == DB_LOCK_WAIT) { - - return(NULL); - } else { - /* SQL error detected */ - - return(NULL); - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/******************************************************************** -Creates the foreign key constraints system tables inside InnoDB -at database creation or database start if they are not found or are -not of the right form. */ - -ulint -dict_create_or_check_foreign_constraint_tables(void) -/*================================================*/ - /* out: DB_SUCCESS or error code */ -{ - dict_table_t* table1; - dict_table_t* table2; - ulint error; - trx_t* trx; - - mutex_enter(&(dict_sys->mutex)); - - table1 = dict_table_get_low("SYS_FOREIGN"); - table2 = dict_table_get_low("SYS_FOREIGN_COLS"); - - if (table1 && table2 - && UT_LIST_GET_LEN(table1->indexes) == 3 - && UT_LIST_GET_LEN(table2->indexes) == 1) { - - /* Foreign constraint system tables have already been - created, and they are ok */ - - mutex_exit(&(dict_sys->mutex)); - - return(DB_SUCCESS); - } - - mutex_exit(&(dict_sys->mutex)); - - trx = trx_allocate_for_mysql(); - - trx->op_info = "creating foreign key sys tables"; - - row_mysql_lock_data_dictionary(trx); - - if (table1) { - fprintf(stderr, - "InnoDB: dropping incompletely created" - " SYS_FOREIGN table\n"); - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); - } - - if (table2) { - fprintf(stderr, - "InnoDB: dropping incompletely created" - " SYS_FOREIGN_COLS table\n"); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); - } - - fprintf(stderr, - "InnoDB: Creating foreign key constraint system tables\n"); - - /* NOTE: in dict_load_foreigns we use the fact that - there are 2 secondary indexes on SYS_FOREIGN, and they - are defined just like below */ - - /* NOTE: when designing InnoDB's foreign key support in 2001, we made - an error and made the table names and the foreign key id of type - 'CHAR' (internally, really a VARCHAR). We should have made the type - VARBINARY, like in other InnoDB system tables, to get a clean - design. */ - - error = que_eval_sql(NULL, - "PROCEDURE CREATE_FOREIGN_SYS_TABLES_PROC () IS\n" - "BEGIN\n" - "CREATE TABLE\n" - "SYS_FOREIGN(ID CHAR, FOR_NAME CHAR," - " REF_NAME CHAR, N_COLS INT);\n" - "CREATE UNIQUE CLUSTERED INDEX ID_IND" - " ON SYS_FOREIGN (ID);\n" - "CREATE INDEX FOR_IND" - " ON SYS_FOREIGN (FOR_NAME);\n" - "CREATE INDEX REF_IND" - " ON SYS_FOREIGN (REF_NAME);\n" - "CREATE TABLE\n" - "SYS_FOREIGN_COLS(ID CHAR, POS INT," - " FOR_COL_NAME CHAR, REF_COL_NAME CHAR);\n" - "CREATE UNIQUE CLUSTERED INDEX ID_IND" - " ON SYS_FOREIGN_COLS (ID, POS);\n" - "COMMIT WORK;\n" - "END;\n" - , FALSE, trx); - - if (error != DB_SUCCESS) { - fprintf(stderr, "InnoDB: error %lu in creation\n", - (ulong) error); - - ut_a(error == DB_OUT_OF_FILE_SPACE - || error == DB_TOO_MANY_CONCURRENT_TRXS); - - fprintf(stderr, - "InnoDB: creation failed\n" - "InnoDB: tablespace is full\n" - "InnoDB: dropping incompletely created" - " SYS_FOREIGN tables\n"); - - row_drop_table_for_mysql("SYS_FOREIGN", trx, TRUE); - row_drop_table_for_mysql("SYS_FOREIGN_COLS", trx, TRUE); - - error = DB_MUST_GET_MORE_FILE_SPACE; - } - - trx->op_info = ""; - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - if (error == DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Foreign key constraint system tables" - " created\n"); - } - - return(error); -} - -/******************************************************************** -Evaluate the given foreign key SQL statement. */ - -ulint -dict_foreign_eval_sql( -/*==================*/ - /* out: error code or DB_SUCCESS */ - pars_info_t* info, /* in: info struct, or NULL */ - const char* sql, /* in: SQL string to evaluate */ - dict_table_t* table, /* in: table */ - dict_foreign_t* foreign,/* in: foreign */ - trx_t* trx) /* in: transaction */ -{ - ulint error; - FILE* ef = dict_foreign_err_file; - - error = que_eval_sql(info, sql, FALSE, trx); - - if (error == DB_DUPLICATE_KEY) { - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Error in foreign key constraint creation for table ", - ef); - ut_print_name(ef, trx, TRUE, table->name); - fputs(".\nA foreign key constraint of name ", ef); - ut_print_name(ef, trx, FALSE, foreign->id); - fputs("\nalready exists." - " (Note that internally InnoDB adds 'databasename/'\n" - "in front of the user-defined constraint name).\n", - ef); - fputs("Note that InnoDB's FOREIGN KEY system tables store\n" - "constraint names as case-insensitive, with the\n" - "MySQL standard latin1_swedish_ci collation. If you\n" - "create tables or databases whose names differ only in\n" - "the character case, then collisions in constraint\n" - "names can occur. Workaround: name your constraints\n" - "explicitly with unique names.\n", - ef); - - mutex_exit(&dict_foreign_err_mutex); - - return(error); - } - - if (error != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Foreign key constraint creation failed:\n" - "InnoDB: internal error number %lu\n", (ulong) error); - - mutex_enter(&dict_foreign_err_mutex); - ut_print_timestamp(ef); - fputs(" Internal error in foreign key constraint creation" - " for table ", ef); - ut_print_name(ef, trx, TRUE, table->name); - fputs(".\n" - "See the MySQL .err log in the datadir" - " for more information.\n", ef); - mutex_exit(&dict_foreign_err_mutex); - - return(error); - } - - return(DB_SUCCESS); -} - -/************************************************************************ -Add a single foreign key field definition to the data dictionary tables in -the database. */ -static -ulint -dict_create_add_foreign_field_to_dictionary( -/*========================================*/ - /* out: error code or DB_SUCCESS */ - ulint field_nr, /* in: foreign field number */ - dict_table_t* table, /* in: table */ - dict_foreign_t* foreign, /* in: foreign */ - trx_t* trx) /* in: transaction */ -{ - pars_info_t* info = pars_info_create(); - - pars_info_add_str_literal(info, "id", foreign->id); - - pars_info_add_int4_literal(info, "pos", field_nr); - - pars_info_add_str_literal(info, "for_col_name", - foreign->foreign_col_names[field_nr]); - - pars_info_add_str_literal(info, "ref_col_name", - foreign->referenced_col_names[field_nr]); - - return(dict_foreign_eval_sql( - info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_FOREIGN_COLS VALUES" - "(:id, :pos, :for_col_name, :ref_col_name);\n" - "END;\n", - table, foreign, trx)); -} - -/************************************************************************ -Add a single foreign key definition to the data dictionary tables in the -database. We also generate names to constraints that were not named by the -user. A generated constraint has a name of the format -databasename/tablename_ibfk_<number>, where the numbers start from 1, and -are given locally for this table, that is, the number is not global, as in -the old format constraints < 4.0.18 it used to be. */ -static -ulint -dict_create_add_foreign_to_dictionary( -/*==================================*/ - /* out: error code or DB_SUCCESS */ - ulint* id_nr, /* in/out: number to use in id generation; - incremented if used */ - dict_table_t* table, /* in: table */ - dict_foreign_t* foreign,/* in: foreign */ - trx_t* trx) /* in: transaction */ -{ - ulint error; - ulint i; - - pars_info_t* info = pars_info_create(); - - if (foreign->id == NULL) { - /* Generate a new constraint id */ - ulint namelen = strlen(table->name); - char* id = mem_heap_alloc(foreign->heap, namelen + 20); - /* no overflow if number < 1e13 */ - sprintf(id, "%s_ibfk_%lu", table->name, (ulong) (*id_nr)++); - foreign->id = id; - } - - pars_info_add_str_literal(info, "id", foreign->id); - - pars_info_add_str_literal(info, "for_name", table->name); - - pars_info_add_str_literal(info, "ref_name", - foreign->referenced_table_name); - - pars_info_add_int4_literal(info, "n_cols", - foreign->n_fields + (foreign->type << 24)); - - error = dict_foreign_eval_sql(info, - "PROCEDURE P () IS\n" - "BEGIN\n" - "INSERT INTO SYS_FOREIGN VALUES" - "(:id, :for_name, :ref_name, :n_cols);\n" - "END;\n" - , table, foreign, trx); - - if (error != DB_SUCCESS) { - - return(error); - } - - for (i = 0; i < foreign->n_fields; i++) { - error = dict_create_add_foreign_field_to_dictionary( - i, table, foreign, trx); - - if (error != DB_SUCCESS) { - - return(error); - } - } - - error = dict_foreign_eval_sql(NULL, - "PROCEDURE P () IS\n" - "BEGIN\n" - "COMMIT WORK;\n" - "END;\n" - , table, foreign, trx); - - return(error); -} - -/************************************************************************ -Adds foreign key definitions to data dictionary tables in the database. */ - -ulint -dict_create_add_foreigns_to_dictionary( -/*===================================*/ - /* out: error code or DB_SUCCESS */ - ulint start_id,/* in: if we are actually doing ALTER TABLE - ADD CONSTRAINT, we want to generate constraint - numbers which are bigger than in the table so - far; we number the constraints from - start_id + 1 up; start_id should be set to 0 if - we are creating a new table, or if the table - so far has no constraints for which the name - was generated here */ - dict_table_t* table, /* in: table */ - trx_t* trx) /* in: transaction */ -{ - dict_foreign_t* foreign; - ulint number = start_id + 1; - ulint error; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - if (NULL == dict_table_get_low("SYS_FOREIGN")) { - fprintf(stderr, - "InnoDB: table SYS_FOREIGN not found" - " in internal data dictionary\n"); - - return(DB_ERROR); - } - - for (foreign = UT_LIST_GET_FIRST(table->foreign_list); - foreign; - foreign = UT_LIST_GET_NEXT(foreign_list, foreign)) { - - error = dict_create_add_foreign_to_dictionary(&number, table, - foreign, trx); - - if (error != DB_SUCCESS) { - - return(error); - } - } - - return(DB_SUCCESS); -} diff --git a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c deleted file mode 100644 index c7a57d6a2b8..00000000000 --- a/storage/innobase/dict/dict0dict.c +++ /dev/null @@ -1,4253 +0,0 @@ -/********************************************************************** -Data dictionary system - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "dict0dict.h" - -#ifdef UNIV_NONINL -#include "dict0dict.ic" -#endif - -#include "buf0buf.h" -#include "data0type.h" -#include "mach0data.h" -#include "dict0boot.h" -#include "dict0mem.h" -#include "dict0crea.h" -#include "trx0undo.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "pars0pars.h" -#include "pars0sym.h" -#include "que0que.h" -#include "rem0cmp.h" -#ifndef UNIV_HOTBACKUP -# include "m_ctype.h" /* my_isspace() */ -#endif /* !UNIV_HOTBACKUP */ - -#include <ctype.h> - -dict_sys_t* dict_sys = NULL; /* the dictionary system */ - -rw_lock_t dict_operation_lock; /* table create, drop, etc. reserve - this in X-mode; implicit or backround - operations purge, rollback, foreign - key checks reserve this in S-mode; we - cannot trust that MySQL protects - implicit or background operations - a table drop since MySQL does not - know of them; therefore we need this; - NOTE: a transaction which reserves - this must keep book on the mode in - trx->dict_operation_lock_mode */ - -#define DICT_HEAP_SIZE 100 /* initial memory heap size when - creating a table or index object */ -#define DICT_POOL_PER_TABLE_HASH 512 /* buffer pool max size per table - hash table fixed size in bytes */ -#define DICT_POOL_PER_VARYING 4 /* buffer pool max size per data - dictionary varying size in bytes */ - -/* Identifies generated InnoDB foreign key names */ -static char dict_ibfk[] = "_ibfk_"; - -#ifndef UNIV_HOTBACKUP -/********************************************************************** -Converts an identifier to a table name. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -extern -void -innobase_convert_from_table_id( -/*===========================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len); /* in: length of 'to', in bytes; - should be at least 5 * strlen(to) + 1 */ -/********************************************************************** -Converts an identifier to UTF-8. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -extern -void -innobase_convert_from_id( -/*=====================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len); /* in: length of 'to', in bytes; - should be at least 3 * strlen(to) + 1 */ -/********************************************************************** -Compares NUL-terminated UTF-8 strings case insensitively. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -extern -int -innobase_strcasecmp( -/*================*/ - /* out: 0 if a=b, <0 if a<b, >1 if a>b */ - const char* a, /* in: first string to compare */ - const char* b); /* in: second string to compare */ - -/********************************************************************** -Makes all characters in a NUL-terminated UTF-8 string lower case. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -extern -void -innobase_casedn_str( -/*================*/ - char* a); /* in/out: string to put in lower case */ - -/************************************************************************** -Determines the connection character set. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -struct charset_info_st* -innobase_get_charset( -/*=================*/ - /* out: connection character set */ - void* mysql_thd); /* in: MySQL thread handle */ -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************** -Removes an index from the dictionary cache. */ -static -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /* in: table */ - dict_index_t* index); /* in, own: index */ -/*********************************************************************** -Copies fields contained in index2 to index1. */ -static -void -dict_index_copy( -/*============*/ - dict_index_t* index1, /* in: index to copy to */ - dict_index_t* index2, /* in: index to copy from */ - dict_table_t* table, /* in: table */ - ulint start, /* in: first position to copy */ - ulint end); /* in: last position to copy */ -/*********************************************************************** -Tries to find column names for the index and sets the col field of the -index. */ -static -void -dict_index_find_cols( -/*=================*/ - dict_table_t* table, /* in: table */ - dict_index_t* index); /* in: index */ -/*********************************************************************** -Builds the internal dictionary cache representation for a clustered -index, containing also system fields not defined by the user. */ -static -dict_index_t* -dict_index_build_internal_clust( -/*============================*/ - /* out, own: the internal representation - of the clustered index */ - dict_table_t* table, /* in: table */ - dict_index_t* index); /* in: user representation of a clustered - index */ -/*********************************************************************** -Builds the internal dictionary cache representation for a non-clustered -index, containing also system fields not defined by the user. */ -static -dict_index_t* -dict_index_build_internal_non_clust( -/*================================*/ - /* out, own: the internal representation - of the non-clustered index */ - dict_table_t* table, /* in: table */ - dict_index_t* index); /* in: user representation of a non-clustered - index */ -/************************************************************************** -Removes a foreign constraint struct from the dictionary cache. */ -static -void -dict_foreign_remove_from_cache( -/*===========================*/ - dict_foreign_t* foreign); /* in, own: foreign constraint */ -/************************************************************************** -Prints a column data. */ -static -void -dict_col_print_low( -/*===============*/ - const dict_table_t* table, /* in: table */ - const dict_col_t* col); /* in: column */ -/************************************************************************** -Prints an index data. */ -static -void -dict_index_print_low( -/*=================*/ - dict_index_t* index); /* in: index */ -/************************************************************************** -Prints a field data. */ -static -void -dict_field_print_low( -/*=================*/ - dict_field_t* field); /* in: field */ -/************************************************************************* -Frees a foreign key struct. */ -static -void -dict_foreign_free( -/*==============*/ - dict_foreign_t* foreign); /* in, own: foreign key struct */ - -/* Stream for storing detailed information about the latest foreign key -and unique key errors */ -FILE* dict_foreign_err_file = NULL; -mutex_t dict_foreign_err_mutex; /* mutex protecting the foreign - and unique error buffers */ - -#ifndef UNIV_HOTBACKUP -/********************************************************************** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ - -void -dict_casedn_str( -/*============*/ - char* a) /* in/out: string to put in lower case */ -{ - innobase_casedn_str(a); -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************ -Checks if the database name in two table names is the same. */ - -ibool -dict_tables_have_same_db( -/*=====================*/ - /* out: TRUE if same db name */ - const char* name1, /* in: table name in the form - dbname '/' tablename */ - const char* name2) /* in: table name in the form - dbname '/' tablename */ -{ - for (; *name1 == *name2; name1++, name2++) { - if (*name1 == '/') { - return(TRUE); - } - ut_a(*name1); /* the names must contain '/' */ - } - return(FALSE); -} - -/************************************************************************ -Return the end of table name where we have removed dbname and '/'. */ - -const char* -dict_remove_db_name( -/*================*/ - /* out: table name */ - const char* name) /* in: table name in the form - dbname '/' tablename */ -{ - const char* s = strchr(name, '/'); - ut_a(s); - - return(s + 1); -} - -/************************************************************************ -Get the database name length in a table name. */ - -ulint -dict_get_db_name_len( -/*=================*/ - /* out: database name length */ - const char* name) /* in: table name in the form - dbname '/' tablename */ -{ - const char* s; - s = strchr(name, '/'); - ut_a(s); - return(s - name); -} - -/************************************************************************ -Reserves the dictionary system mutex for MySQL. */ - -void -dict_mutex_enter_for_mysql(void) -/*============================*/ -{ - mutex_enter(&(dict_sys->mutex)); -} - -/************************************************************************ -Releases the dictionary system mutex for MySQL. */ - -void -dict_mutex_exit_for_mysql(void) -/*===========================*/ -{ - mutex_exit(&(dict_sys->mutex)); -} - -/************************************************************************ -Decrements the count of open MySQL handles to a table. */ - -void -dict_table_decrement_handle_count( -/*==============================*/ - dict_table_t* table) /* in: table */ -{ - mutex_enter(&(dict_sys->mutex)); - - ut_a(table->n_mysql_handles_opened > 0); - - table->n_mysql_handles_opened--; - - mutex_exit(&(dict_sys->mutex)); -} - -/************************************************************************* -Gets the column data type. */ - -void -dict_col_copy_type_noninline( -/*=========================*/ - const dict_col_t* col, /* in: column */ - dtype_t* type) /* out: data type */ -{ - dict_col_copy_type(col, type); -} - -/************************************************************************ -Gets the nth column of a table. */ - -const dict_col_t* -dict_table_get_nth_col_noninline( -/*=============================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint pos) /* in: position of column */ -{ - return(dict_table_get_nth_col(table, pos)); -} - -/************************************************************************ -Gets the first index on the table (the clustered index). */ - -dict_index_t* -dict_table_get_first_index_noninline( -/*=================================*/ - /* out: index, NULL if none exists */ - dict_table_t* table) /* in: table */ -{ - return(dict_table_get_first_index(table)); -} - -/************************************************************************ -Gets the next index on the table. */ - -dict_index_t* -dict_table_get_next_index_noninline( -/*================================*/ - /* out: index, NULL if none left */ - dict_index_t* index) /* in: index */ -{ - return(dict_table_get_next_index(index)); -} - -/************************************************************************** -Returns an index object. */ - -dict_index_t* -dict_table_get_index_noninline( -/*===========================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name) /* in: index name */ -{ - return(dict_table_get_index(table, name)); -} - -/************************************************************************** -Returns a column's name. */ - -const char* -dict_table_get_col_name( -/*====================*/ - /* out: column name. NOTE: not - guaranteed to stay valid if table is - modified in any way (columns added, - etc.). */ - const dict_table_t* table, /* in: table */ - ulint col_nr) /* in: column number */ -{ - ulint i; - const char* s; - - ut_ad(table); - ut_ad(col_nr < table->n_def); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - s = table->col_names; - if (s) { - for (i = 0; i < col_nr; i++) { - s += strlen(s) + 1; - } - } - - return(s); -} - - -/************************************************************************ -Acquire the autoinc lock.*/ - -void -dict_table_autoinc_lock( -/*====================*/ - dict_table_t* table) -{ - mutex_enter(&table->autoinc_mutex); -} - -/************************************************************************ -Unconditionally set the autoinc counter. */ - -void -dict_table_autoinc_initialize( -/*==========================*/ - dict_table_t* table, /* in: table */ - ib_ulonglong value) /* in: next value to assign to a row */ -{ - ut_ad(mutex_own(&table->autoinc_mutex)); - - table->autoinc = value; -} - -/************************************************************************ -Reads the next autoinc value (== autoinc counter value), 0 if not yet -initialized. */ - -ib_ulonglong -dict_table_autoinc_read( -/*====================*/ - /* out: value for a new row, or 0 */ - dict_table_t* table) /* in: table */ -{ - ut_ad(mutex_own(&table->autoinc_mutex)); - - return(table->autoinc); -} - -/************************************************************************ -Updates the autoinc counter if the value supplied is greater than the -current value. */ - -void -dict_table_autoinc_update_if_greater( -/*=================================*/ - - dict_table_t* table, /* in: table */ - ib_ulonglong value) /* in: value which was assigned to a row */ -{ - ut_ad(mutex_own(&table->autoinc_mutex)); - - if (value > table->autoinc) { - - table->autoinc = value; - } -} - -/************************************************************************ -Release the autoinc lock.*/ - -void -dict_table_autoinc_unlock( -/*======================*/ - dict_table_t* table) /* in: release autoinc lock for this table */ -{ - mutex_exit(&table->autoinc_mutex); -} - -/************************************************************************ -Looks for column n in an index. */ - -ulint -dict_index_get_nth_col_pos( -/*=======================*/ - /* out: position in internal representation - of the index; if not contained, returns - ULINT_UNDEFINED */ - dict_index_t* index, /* in: index */ - ulint n) /* in: column number */ -{ - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - col = dict_table_get_nth_col(index->table, n); - - if (index->type & DICT_CLUSTERED) { - - return(dict_col_get_clust_pos(col, index)); - } - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col && field->prefix_len == 0) { - - return(pos); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************ -Returns TRUE if the index contains a column or a prefix of that column. */ - -ibool -dict_index_contains_col_or_prefix( -/*==============================*/ - /* out: TRUE if contains the column or its - prefix */ - dict_index_t* index, /* in: index */ - ulint n) /* in: column number */ -{ - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - if (index->type & DICT_CLUSTERED) { - - return(TRUE); - } - - col = dict_table_get_nth_col(index->table, n); - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col) { - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************ -Looks for a matching field in an index. The column has to be the same. The -column in index must be complete, or must contain a prefix longer than the -column in index2. That is, we must be able to construct the prefix in index2 -from the prefix in index. */ - -ulint -dict_index_get_nth_field_pos( -/*=========================*/ - /* out: position in internal representation - of the index; if not contained, returns - ULINT_UNDEFINED */ - dict_index_t* index, /* in: index from which to search */ - dict_index_t* index2, /* in: index */ - ulint n) /* in: field number in index2 */ -{ - dict_field_t* field; - dict_field_t* field2; - ulint n_fields; - ulint pos; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - field2 = dict_index_get_nth_field(index2, n); - - n_fields = dict_index_get_n_fields(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (field->col == field2->col - && (field->prefix_len == 0 - || (field->prefix_len >= field2->prefix_len - && field2->prefix_len != 0))) { - - return(pos); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Returns a table object based on table id. */ - -dict_table_t* -dict_table_get_on_id( -/*=================*/ - /* out: table, NULL if does not exist */ - dulint table_id, /* in: table id */ - trx_t* trx) /* in: transaction handle */ -{ - dict_table_t* table; - - if (ut_dulint_cmp(table_id, DICT_FIELDS_ID) <= 0 - || trx->dict_operation_lock_mode == RW_X_LATCH) { - /* It is a system table which will always exist in the table - cache: we avoid acquiring the dictionary mutex, because - if we are doing a rollback to handle an error in TABLE - CREATE, for example, we already have the mutex! */ - - ut_ad(mutex_own(&(dict_sys->mutex)) - || trx->dict_operation_lock_mode == RW_X_LATCH); - - return(dict_table_get_on_id_low(table_id)); - } - - mutex_enter(&(dict_sys->mutex)); - - table = dict_table_get_on_id_low(table_id); - - mutex_exit(&(dict_sys->mutex)); - - return(table); -} - -/************************************************************************ -Looks for column n position in the clustered index. */ - -ulint -dict_table_get_nth_col_pos( -/*=======================*/ - /* out: position in internal representation - of the clustered index */ - dict_table_t* table, /* in: table */ - ulint n) /* in: column number */ -{ - return(dict_index_get_nth_col_pos(dict_table_get_first_index(table), - n)); -} - -/************************************************************************ -Check whether the table uses the compact page format. */ - -ibool -dict_table_is_comp_noninline( -/*=========================*/ - /* out: TRUE if table uses the - compact page format */ - const dict_table_t* table) /* in: table */ -{ - return(dict_table_is_comp(table)); -} - -/************************************************************************ -Checks if a column is in the ordering columns of the clustered index of a -table. Column prefixes are treated like whole columns. */ - -ibool -dict_table_col_in_clustered_key( -/*============================*/ - /* out: TRUE if the column, or its prefix, is - in the clustered key */ - dict_table_t* table, /* in: table */ - ulint n) /* in: column number */ -{ - dict_index_t* index; - const dict_field_t* field; - const dict_col_t* col; - ulint pos; - ulint n_fields; - - ut_ad(table); - - col = dict_table_get_nth_col(table, n); - - index = dict_table_get_first_index(table); - - n_fields = dict_index_get_n_unique(index); - - for (pos = 0; pos < n_fields; pos++) { - field = dict_index_get_nth_field(index, pos); - - if (col == field->col) { - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************** -Inits the data dictionary module. */ - -void -dict_init(void) -/*===========*/ -{ - dict_sys = mem_alloc(sizeof(dict_sys_t)); - - mutex_create(&dict_sys->mutex, SYNC_DICT); - - dict_sys->table_hash = hash_create(buf_pool_get_max_size() - / (DICT_POOL_PER_TABLE_HASH - * UNIV_WORD_SIZE)); - dict_sys->table_id_hash = hash_create(buf_pool_get_max_size() - / (DICT_POOL_PER_TABLE_HASH - * UNIV_WORD_SIZE)); - dict_sys->size = 0; - - UT_LIST_INIT(dict_sys->table_LRU); - - rw_lock_create(&dict_operation_lock, SYNC_DICT_OPERATION); - - dict_foreign_err_file = os_file_create_tmpfile(); - ut_a(dict_foreign_err_file); - - mutex_create(&dict_foreign_err_mutex, SYNC_ANY_LATCH); -} - -/************************************************************************** -Returns a table object and optionally increment its MySQL open handle count. -NOTE! This is a high-level function to be used mainly from outside the -'dict' directory. Inside this directory dict_table_get_low is usually the -appropriate function. */ - -dict_table_t* -dict_table_get( -/*===========*/ - /* out: table, NULL if - does not exist */ - const char* table_name, /* in: table name */ - ibool inc_mysql_count) - /* in: whether to increment the open - handle count on the table */ -{ - dict_table_t* table; - - mutex_enter(&(dict_sys->mutex)); - - table = dict_table_get_low(table_name); - - if (inc_mysql_count && table) { - table->n_mysql_handles_opened++; - } - - mutex_exit(&(dict_sys->mutex)); - - if (table != NULL) { - if (!table->stat_initialized) { - /* If table->ibd_file_missing == TRUE, this will - print an error message and return without doing - anything. */ - dict_update_statistics(table); - } - } - - return(table); -} - -/************************************************************************** -Adds system columns to a table object. */ - -void -dict_table_add_system_columns( -/*==========================*/ - dict_table_t* table, /* in/out: table */ - mem_heap_t* heap) /* in: temporary heap */ -{ - ut_ad(table); - ut_ad(table->n_def == table->n_cols - DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(!table->cached); - - /* NOTE: the system columns MUST be added in the following order - (so that they can be indexed by the numerical value of DATA_ROW_ID, - etc.) and as the last columns of the table memory object. - The clustered index will not always physically contain all - system columns. */ - - dict_mem_table_add_col(table, heap, "DB_ROW_ID", DATA_SYS, - DATA_ROW_ID | DATA_NOT_NULL, - DATA_ROW_ID_LEN); -#if DATA_ROW_ID != 0 -#error "DATA_ROW_ID != 0" -#endif - dict_mem_table_add_col(table, heap, "DB_TRX_ID", DATA_SYS, - DATA_TRX_ID | DATA_NOT_NULL, - DATA_TRX_ID_LEN); -#if DATA_TRX_ID != 1 -#error "DATA_TRX_ID != 1" -#endif - dict_mem_table_add_col(table, heap, "DB_ROLL_PTR", DATA_SYS, - DATA_ROLL_PTR | DATA_NOT_NULL, - DATA_ROLL_PTR_LEN); -#if DATA_ROLL_PTR != 2 -#error "DATA_ROLL_PTR != 2" -#endif - - /* This check reminds that if a new system column is added to - the program, it should be dealt with here */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" -#endif -} - -/************************************************************************** -Adds a table object to the dictionary cache. */ - -void -dict_table_add_to_cache( -/*====================*/ - dict_table_t* table, /* in: table */ - mem_heap_t* heap) /* in: temporary heap */ -{ - ulint fold; - ulint id_fold; - ulint i; - ulint row_len; - - /* The lower limit for what we consider a "big" row */ -#define BIG_ROW_SIZE 1024 - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_table_add_system_columns(table, heap); - - table->cached = TRUE; - - fold = ut_fold_string(table->name); - id_fold = ut_fold_dulint(table->id); - - row_len = 0; - for (i = 0; i < table->n_def; i++) { - ulint col_len = dict_col_get_max_size( - dict_table_get_nth_col(table, i)); - - row_len += col_len; - - /* If we have a single unbounded field, or several gigantic - fields, mark the maximum row size as BIG_ROW_SIZE. */ - if (row_len >= BIG_ROW_SIZE || col_len >= BIG_ROW_SIZE) { - row_len = BIG_ROW_SIZE; - - break; - } - } - - table->big_rows = row_len >= BIG_ROW_SIZE; - - /* Look for a table with the same name: error if such exists */ - { - dict_table_t* table2; - HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2, - (ut_strcmp(table2->name, table->name) == 0)); - ut_a(table2 == NULL); - } - - /* Look for a table with the same id: error if such exists */ - { - dict_table_t* table2; - HASH_SEARCH(id_hash, dict_sys->table_id_hash, id_fold, table2, - (ut_dulint_cmp(table2->id, table->id) == 0)); - ut_a(table2 == NULL); - } - - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - table); - - /* Add table to hash table of tables based on table id */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, id_fold, - table); - /* Add table to LRU list of tables */ - UT_LIST_ADD_FIRST(table_LRU, dict_sys->table_LRU, table); - - dict_sys->size += mem_heap_get_size(table->heap); -} - -/************************************************************************** -Looks for an index with the given id. NOTE that we do not reserve -the dictionary mutex: this function is for emergency purposes like -printing info of a corrupt database page! */ - -dict_index_t* -dict_index_find_on_id_low( -/*======================*/ - /* out: index or NULL if not found from cache */ - dulint id) /* in: index id */ -{ - dict_table_t* table; - dict_index_t* index; - - table = UT_LIST_GET_FIRST(dict_sys->table_LRU); - - while (table) { - index = dict_table_get_first_index(table); - - while (index) { - if (0 == ut_dulint_cmp(id, index->id)) { - /* Found */ - - return(index); - } - - index = dict_table_get_next_index(index); - } - - table = UT_LIST_GET_NEXT(table_LRU, table); - } - - return(NULL); -} - -/************************************************************************** -Renames a table object. */ - -ibool -dict_table_rename_in_cache( -/*=======================*/ - /* out: TRUE if success */ - dict_table_t* table, /* in: table */ - const char* new_name, /* in: new name */ - ibool rename_also_foreigns)/* in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ -{ - dict_foreign_t* foreign; - dict_index_t* index; - ulint fold; - ulint old_size; - char* old_name; - ibool success; - - ut_ad(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - - old_size = mem_heap_get_size(table->heap); - - fold = ut_fold_string(new_name); - - /* Look for a table with the same name: error if such exists */ - { - dict_table_t* table2; - HASH_SEARCH(name_hash, dict_sys->table_hash, fold, table2, - (ut_strcmp(table2->name, new_name) == 0)); - if (table2) { - fprintf(stderr, - "InnoDB: Error: dictionary cache" - " already contains a table of name %s\n", - new_name); - return(FALSE); - } - } - - /* If the table is stored in a single-table tablespace, rename the - .ibd file */ - - if (table->space != 0) { - if (table->dir_path_of_temp_table != NULL) { - fprintf(stderr, - "InnoDB: Error: trying to rename a table" - " %s (%s) created with CREATE\n" - "InnoDB: TEMPORARY TABLE\n", - table->name, table->dir_path_of_temp_table); - success = FALSE; - } else { - success = fil_rename_tablespace( - table->name, table->space, new_name); - } - - if (!success) { - - return(FALSE); - } - } - - /* Remove table from the hash tables of tables */ - HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, - ut_fold_string(table->name), table); - old_name = mem_heap_strdup(table->heap, table->name); - table->name = mem_heap_strdup(table->heap, new_name); - - /* Add table to hash table of tables */ - HASH_INSERT(dict_table_t, name_hash, dict_sys->table_hash, fold, - table); - dict_sys->size += (mem_heap_get_size(table->heap) - old_size); - - /* Update the table_name field in indexes */ - index = dict_table_get_first_index(table); - - while (index != NULL) { - index->table_name = table->name; - - index = dict_table_get_next_index(index); - } - - if (!rename_also_foreigns) { - /* In ALTER TABLE we think of the rename table operation - in the direction table -> temporary table (#sql...) - as dropping the table with the old name and creating - a new with the new name. Thus we kind of drop the - constraints from the dictionary cache here. The foreign key - constraints will be inherited to the new table from the - system tables through a call of dict_load_foreigns. */ - - /* Remove the foreign constraints from the cache */ - foreign = UT_LIST_GET_LAST(table->foreign_list); - - while (foreign != NULL) { - dict_foreign_remove_from_cache(foreign); - foreign = UT_LIST_GET_LAST(table->foreign_list); - } - - /* Reset table field in referencing constraints */ - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign != NULL) { - foreign->referenced_table = NULL; - foreign->referenced_index = NULL; - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - /* Make the list of referencing constraints empty */ - - UT_LIST_INIT(table->referenced_list); - - return(TRUE); - } - - /* Update the table name fields in foreign constraints, and update also - the constraint id of new format >= 4.0.18 constraints. Note that at - this point we have already changed table->name to the new name. */ - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign != NULL) { - if (ut_strlen(foreign->foreign_table_name) - < ut_strlen(table->name)) { - /* Allocate a longer name buffer; - TODO: store buf len to save memory */ - - foreign->foreign_table_name - = mem_heap_alloc(foreign->heap, - ut_strlen(table->name) + 1); - } - - strcpy(foreign->foreign_table_name, table->name); - - if (strchr(foreign->id, '/')) { - ulint db_len; - char* old_id; - - /* This is a >= 4.0.18 format id */ - - old_id = mem_strdup(foreign->id); - - if (ut_strlen(foreign->id) > ut_strlen(old_name) - + ((sizeof dict_ibfk) - 1) - && !memcmp(foreign->id, old_name, - ut_strlen(old_name)) - && !memcmp(foreign->id + ut_strlen(old_name), - dict_ibfk, (sizeof dict_ibfk) - 1)) { - - /* This is a generated >= 4.0.18 format id */ - - if (strlen(table->name) > strlen(old_name)) { - foreign->id = mem_heap_alloc( - foreign->heap, - strlen(table->name) - + strlen(old_id) + 1); - } - - /* Replace the prefix 'databasename/tablename' - with the new names */ - strcpy(foreign->id, table->name); - strcat(foreign->id, - old_id + ut_strlen(old_name)); - } else { - /* This is a >= 4.0.18 format id where the user - gave the id name */ - db_len = dict_get_db_name_len(table->name) + 1; - - if (dict_get_db_name_len(table->name) - > dict_get_db_name_len(foreign->id)) { - - foreign->id = mem_heap_alloc( - foreign->heap, - db_len + strlen(old_id) + 1); - } - - /* Replace the database prefix in id with the - one from table->name */ - - ut_memcpy(foreign->id, table->name, db_len); - - strcpy(foreign->id + db_len, - dict_remove_db_name(old_id)); - } - - mem_free(old_id); - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign != NULL) { - if (ut_strlen(foreign->referenced_table_name) - < ut_strlen(table->name)) { - /* Allocate a longer name buffer; - TODO: store buf len to save memory */ - - foreign->referenced_table_name = mem_heap_alloc( - foreign->heap, strlen(table->name) + 1); - } - - strcpy(foreign->referenced_table_name, table->name); - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - return(TRUE); -} - -/************************************************************************** -Change the id of a table object in the dictionary cache. This is used in -DISCARD TABLESPACE. */ - -void -dict_table_change_id_in_cache( -/*==========================*/ - dict_table_t* table, /* in: table object already in cache */ - dulint new_id) /* in: new id to set */ -{ - ut_ad(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Remove the table from the hash table of id's */ - - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_dulint(table->id), table); - table->id = new_id; - - /* Add the table back to the hash table */ - HASH_INSERT(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_dulint(table->id), table); -} - -/************************************************************************** -Removes a table object from the dictionary cache. */ - -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table) /* in, own: table */ -{ - dict_foreign_t* foreign; - dict_index_t* index; - ulint size; - - ut_ad(table); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - -#if 0 - fputs("Removing table ", stderr); - ut_print_name(stderr, table->name, ULINT_UNDEFINED); - fputs(" from dictionary cache\n", stderr); -#endif - - /* Remove the foreign constraints from the cache */ - foreign = UT_LIST_GET_LAST(table->foreign_list); - - while (foreign != NULL) { - dict_foreign_remove_from_cache(foreign); - foreign = UT_LIST_GET_LAST(table->foreign_list); - } - - /* Reset table field in referencing constraints */ - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign != NULL) { - foreign->referenced_table = NULL; - foreign->referenced_index = NULL; - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - /* Remove the indexes from the cache */ - index = UT_LIST_GET_LAST(table->indexes); - - while (index != NULL) { - dict_index_remove_from_cache(table, index); - index = UT_LIST_GET_LAST(table->indexes); - } - - /* Remove table from the hash tables of tables */ - HASH_DELETE(dict_table_t, name_hash, dict_sys->table_hash, - ut_fold_string(table->name), table); - HASH_DELETE(dict_table_t, id_hash, dict_sys->table_id_hash, - ut_fold_dulint(table->id), table); - - /* Remove table from LRU list of tables */ - UT_LIST_REMOVE(table_LRU, dict_sys->table_LRU, table); - - size = mem_heap_get_size(table->heap); - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - - dict_mem_table_free(table); -} - -/************************************************************************* -Gets the column position in the clustered index. */ - -ulint -dict_col_get_clust_pos_noninline( -/*=============================*/ - const dict_col_t* col, /* in: table column */ - const dict_index_t* clust_index) /* in: clustered index */ -{ - return(dict_col_get_clust_pos(col, clust_index)); -} - -/******************************************************************** -If the given column name is reserved for InnoDB system columns, return -TRUE. */ - -ibool -dict_col_name_is_reserved( -/*======================*/ - /* out: TRUE if name is reserved */ - const char* name) /* in: column name */ -{ - /* This check reminds that if a new system column is added to - the program, it should be dealt with here. */ -#if DATA_N_SYS_COLS != 3 -#error "DATA_N_SYS_COLS != 3" -#endif - - static const char* reserved_names[] = { - "DB_ROW_ID", "DB_TRX_ID", "DB_ROLL_PTR" - }; - - ulint i; - - for (i = 0; i < UT_ARR_SIZE(reserved_names); i++) { - if (strcmp(name, reserved_names[i]) == 0) { - - return(TRUE); - } - } - - return(FALSE); -} - -/************************************************************************** -Adds an index to the dictionary cache. */ - -void -dict_index_add_to_cache( -/*====================*/ - dict_table_t* table, /* in: table on which the index is */ - dict_index_t* index, /* in, own: index; NOTE! The index memory - object is freed in this function! */ - ulint page_no)/* in: root page number of the index */ -{ - dict_index_t* new_index; - ulint n_ord; - ulint i; - - ut_ad(index); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(index->n_def == index->n_fields); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - ut_ad(mem_heap_validate(index->heap)); - -#ifdef UNIV_DEBUG - { - dict_index_t* index2; - index2 = UT_LIST_GET_FIRST(table->indexes); - - while (index2 != NULL) { - ut_ad(ut_strcmp(index->name, index2->name) != 0); - - index2 = UT_LIST_GET_NEXT(indexes, index2); - } - } -#endif /* UNIV_DEBUG */ - - ut_a(!(index->type & DICT_CLUSTERED) - || UT_LIST_GET_LEN(table->indexes) == 0); - - dict_index_find_cols(table, index); - - /* Build the cache internal representation of the index, - containing also the added system fields */ - - if (index->type & DICT_CLUSTERED) { - new_index = dict_index_build_internal_clust(table, index); - } else { - new_index = dict_index_build_internal_non_clust(table, index); - } - - new_index->search_info = btr_search_info_create(new_index->heap); - - /* Set the n_fields value in new_index to the actual defined - number of fields in the cache internal representation */ - - new_index->n_fields = new_index->n_def; - - /* Add the new index as the last index for the table */ - - UT_LIST_ADD_LAST(indexes, table->indexes, new_index); - new_index->table = table; - new_index->table_name = table->name; - - /* Increment the ord_part counts in columns which are ordering */ - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - n_ord = new_index->n_fields; - } else { - n_ord = dict_index_get_n_unique(new_index); - } - - for (i = 0; i < n_ord; i++) { - - dict_index_get_nth_field(new_index, i)->col->ord_part = 1; - } - - new_index->page = (unsigned int) page_no; - rw_lock_create(&new_index->lock, SYNC_INDEX_TREE); - - if (!UNIV_UNLIKELY(new_index->type & DICT_UNIVERSAL)) { - - new_index->stat_n_diff_key_vals = mem_heap_alloc( - new_index->heap, - (1 + dict_index_get_n_unique(new_index)) - * sizeof(ib_longlong)); - /* Give some sensible values to stat_n_... in case we do - not calculate statistics quickly enough */ - - for (i = 0; i <= dict_index_get_n_unique(new_index); i++) { - - new_index->stat_n_diff_key_vals[i] = 100; - } - } - - dict_sys->size += mem_heap_get_size(new_index->heap); - - dict_mem_index_free(index); -} - -/************************************************************************** -Removes an index from the dictionary cache. */ -static -void -dict_index_remove_from_cache( -/*=========================*/ - dict_table_t* table, /* in: table */ - dict_index_t* index) /* in, own: index */ -{ - ulint size; - ulint retries = 0; - btr_search_t* info; - - ut_ad(table && index); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* We always create search info whether or not adaptive - hash index is enabled or not. */ - info = index->search_info; - ut_ad(info); - - /* We are not allowed to free the in-memory index struct - dict_index_t until all entries in the adaptive hash index - that point to any of the page belonging to his b-tree index - are dropped. This is so because dropping of these entries - require access to dict_index_t struct. To avoid such scenario - We keep a count of number of such pages in the search_info and - only free the dict_index_t struct when this count drops to - zero. */ - - for (;;) { - ulint ref_count = btr_search_info_get_ref_count(info); - if (ref_count == 0) { - break; - } - - /* Sleep for 10ms before trying again. */ - os_thread_sleep(10000); - ++retries; - - if (retries % 500 == 0) { - /* No luck after 5 seconds of wait. */ - fprintf(stderr, "InnoDB: Error: Waited for" - " %lu secs for hash index" - " ref_count (%lu) to drop" - " to 0.\n" - "index: \"%s\"" - " table: \"%s\"\n", - retries/100, - ref_count, - index->name, - table->name); - } - - /* To avoid a hang here we commit suicide if the - ref_count doesn't drop to zero in 600 seconds. */ - if (retries >= 60000) { - ut_error; - } - } - - rw_lock_free(&index->lock); - - /* Remove the index from the list of indexes of the table */ - UT_LIST_REMOVE(indexes, table->indexes, index); - - size = mem_heap_get_size(index->heap); - - ut_ad(dict_sys->size >= size); - - dict_sys->size -= size; - - dict_mem_index_free(index); -} - -/*********************************************************************** -Tries to find column names for the index and sets the col field of the -index. */ -static -void -dict_index_find_cols( -/*=================*/ - dict_table_t* table, /* in: table */ - dict_index_t* index) /* in: index */ -{ - ulint i; - - ut_ad(table && index); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(mutex_own(&(dict_sys->mutex))); - - for (i = 0; i < index->n_fields; i++) { - ulint j; - dict_field_t* field = dict_index_get_nth_field(index, i); - - for (j = 0; j < table->n_cols; j++) { - if (!strcmp(dict_table_get_col_name(table, j), - field->name)) { - field->col = (dict_col_t*) - dict_table_get_nth_col(table, j); - - goto found; - } - } - - /* It is an error not to find a matching column. */ - ut_error; - - found: - ; - } -} - -/*********************************************************************** -Adds a column to index. */ - -void -dict_index_add_col( -/*===============*/ - dict_index_t* index, /* in: index */ - dict_table_t* table, /* in: table */ - dict_col_t* col, /* in: column */ - ulint prefix_len) /* in: column prefix length */ -{ - dict_field_t* field; - const char* col_name; - - col_name = dict_table_get_col_name(table, dict_col_get_no(col)); - - dict_mem_index_add_field(index, col_name, prefix_len); - - field = dict_index_get_nth_field(index, index->n_def - 1); - - field->col = col; - field->fixed_len = (unsigned int) dict_col_get_fixed_size(col); - - if (prefix_len && field->fixed_len > prefix_len) { - field->fixed_len = (unsigned int) prefix_len; - } - - /* Long fixed-length fields that need external storage are treated as - variable-length fields, so that the extern flag can be embedded in - the length word. */ - - if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) { - field->fixed_len = 0; - } -#if DICT_MAX_INDEX_COL_LEN != 768 - /* The comparison limit above must be constant. If it were - changed, the disk format of some fixed-length columns would - change, which would be a disaster. */ -# error "DICT_MAX_INDEX_COL_LEN != 768" -#endif - - if (!(col->prtype & DATA_NOT_NULL)) { - index->n_nullable++; - } -} - -/*********************************************************************** -Copies fields contained in index2 to index1. */ -static -void -dict_index_copy( -/*============*/ - dict_index_t* index1, /* in: index to copy to */ - dict_index_t* index2, /* in: index to copy from */ - dict_table_t* table, /* in: table */ - ulint start, /* in: first position to copy */ - ulint end) /* in: last position to copy */ -{ - dict_field_t* field; - ulint i; - - /* Copy fields contained in index2 */ - - for (i = start; i < end; i++) { - - field = dict_index_get_nth_field(index2, i); - dict_index_add_col(index1, table, field->col, - field->prefix_len); - } -} - -/*********************************************************************** -Copies types of fields contained in index to tuple. */ - -void -dict_index_copy_types( -/*==================*/ - dtuple_t* tuple, /* in: data tuple */ - dict_index_t* index, /* in: index */ - ulint n_fields) /* in: number of field types to copy */ -{ - ulint i; - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - dtuple_set_types_binary(tuple, n_fields); - - return; - } - - for (i = 0; i < n_fields; i++) { - dict_field_t* ifield; - dtype_t* dfield_type; - - ifield = dict_index_get_nth_field(index, i); - dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); - dict_col_copy_type(dict_field_get_col(ifield), dfield_type); - } -} - -/*********************************************************************** -Copies types of columns contained in table to tuple. */ - -void -dict_table_copy_types( -/*==================*/ - dtuple_t* tuple, /* in: data tuple */ - dict_table_t* table) /* in: index */ -{ - dtype_t* dfield_type; - ulint i; - - for (i = 0; i < dtuple_get_n_fields(tuple); i++) { - - dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); - dict_col_copy_type(dict_table_get_nth_col(table, i), - dfield_type); - } -} - -/*********************************************************************** -Builds the internal dictionary cache representation for a clustered -index, containing also system fields not defined by the user. */ -static -dict_index_t* -dict_index_build_internal_clust( -/*============================*/ - /* out, own: the internal representation - of the clustered index */ - dict_table_t* table, /* in: table */ - dict_index_t* index) /* in: user representation of a clustered - index */ -{ - dict_index_t* new_index; - dict_field_t* field; - ulint fixed_size; - ulint trx_id_pos; - ulint i; - ibool* indexed; - - ut_ad(table && index); - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* Create a new index object with certainly enough fields */ - new_index = dict_mem_index_create(table->name, - index->name, table->space, - index->type, - index->n_fields + table->n_cols); - - /* Copy other relevant data from the old index struct to the new - struct: it inherits the values */ - - new_index->n_user_defined_cols = index->n_fields; - - new_index->id = index->id; - - /* Copy the fields of index */ - dict_index_copy(new_index, index, table, 0, index->n_fields); - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - /* No fixed number of fields determines an entry uniquely */ - - new_index->n_uniq = REC_MAX_N_FIELDS; - - } else if (index->type & DICT_UNIQUE) { - /* Only the fields defined so far are needed to identify - the index entry uniquely */ - - new_index->n_uniq = new_index->n_def; - } else { - /* Also the row id is needed to identify the entry */ - new_index->n_uniq = 1 + new_index->n_def; - } - - new_index->trx_id_offset = 0; - - if (!(index->type & DICT_IBUF)) { - /* Add system columns, trx id first */ - - trx_id_pos = new_index->n_def; - -#if DATA_ROW_ID != 0 -# error "DATA_ROW_ID != 0" -#endif -#if DATA_TRX_ID != 1 -# error "DATA_TRX_ID != 1" -#endif -#if DATA_ROLL_PTR != 2 -# error "DATA_ROLL_PTR != 2" -#endif - - if (!(index->type & DICT_UNIQUE)) { - dict_index_add_col(new_index, table, (dict_col_t*) - dict_table_get_sys_col( - table, DATA_ROW_ID), - 0); - trx_id_pos++; - } - - dict_index_add_col(new_index, table, (dict_col_t*) - dict_table_get_sys_col(table, DATA_TRX_ID), - 0); - - dict_index_add_col(new_index, table, (dict_col_t*) - dict_table_get_sys_col(table, - DATA_ROLL_PTR), - 0); - - for (i = 0; i < trx_id_pos; i++) { - - fixed_size = dict_col_get_fixed_size( - dict_index_get_nth_col(new_index, i)); - - if (fixed_size == 0) { - new_index->trx_id_offset = 0; - - break; - } - - if (dict_index_get_nth_field(new_index, i)->prefix_len - > 0) { - new_index->trx_id_offset = 0; - - break; - } - - new_index->trx_id_offset += (unsigned int) fixed_size; - } - - } - - /* Remember the table columns already contained in new_index */ - indexed = mem_alloc(table->n_cols * sizeof *indexed); - memset(indexed, 0, table->n_cols * sizeof *indexed); - - /* Mark with 0 the table columns already contained in new_index */ - for (i = 0; i < new_index->n_def; i++) { - - field = dict_index_get_nth_field(new_index, i); - - /* If there is only a prefix of the column in the index - field, do not mark the column as contained in the index */ - - if (field->prefix_len == 0) { - - indexed[field->col->ind] = TRUE; - } - } - - /* Add to new_index non-system columns of table not yet included - there */ - for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { - - dict_col_t* col = (dict_col_t*) - dict_table_get_nth_col(table, i); - ut_ad(col->mtype != DATA_SYS); - - if (!indexed[col->ind]) { - dict_index_add_col(new_index, table, col, 0); - } - } - - mem_free(indexed); - - ut_ad((index->type & DICT_IBUF) - || (UT_LIST_GET_LEN(table->indexes) == 0)); - - new_index->cached = TRUE; - - return(new_index); -} - -/*********************************************************************** -Builds the internal dictionary cache representation for a non-clustered -index, containing also system fields not defined by the user. */ -static -dict_index_t* -dict_index_build_internal_non_clust( -/*================================*/ - /* out, own: the internal representation - of the non-clustered index */ - dict_table_t* table, /* in: table */ - dict_index_t* index) /* in: user representation of a non-clustered - index */ -{ - dict_field_t* field; - dict_index_t* new_index; - dict_index_t* clust_index; - ulint i; - ibool* indexed; - - ut_ad(table && index); - ut_ad(0 == (index->type & DICT_CLUSTERED)); - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - /* The clustered index should be the first in the list of indexes */ - clust_index = UT_LIST_GET_FIRST(table->indexes); - - ut_ad(clust_index); - ut_ad(clust_index->type & DICT_CLUSTERED); - ut_ad(!(clust_index->type & DICT_UNIVERSAL)); - - /* Create a new index */ - new_index = dict_mem_index_create( - table->name, index->name, index->space, index->type, - index->n_fields + 1 + clust_index->n_uniq); - - /* Copy other relevant data from the old index - struct to the new struct: it inherits the values */ - - new_index->n_user_defined_cols = index->n_fields; - - new_index->id = index->id; - - /* Copy fields from index to new_index */ - dict_index_copy(new_index, index, table, 0, index->n_fields); - - /* Remember the table columns already contained in new_index */ - indexed = mem_alloc(table->n_cols * sizeof *indexed); - memset(indexed, 0, table->n_cols * sizeof *indexed); - - /* Mark with 0 table columns already contained in new_index */ - for (i = 0; i < new_index->n_def; i++) { - - field = dict_index_get_nth_field(new_index, i); - - /* If there is only a prefix of the column in the index - field, do not mark the column as contained in the index */ - - if (field->prefix_len == 0) { - - indexed[field->col->ind] = TRUE; - } - } - - /* Add to new_index the columns necessary to determine the clustered - index entry uniquely */ - - for (i = 0; i < clust_index->n_uniq; i++) { - - field = dict_index_get_nth_field(clust_index, i); - - if (!indexed[field->col->ind]) { - dict_index_add_col(new_index, table, field->col, - field->prefix_len); - } - } - - mem_free(indexed); - - if ((index->type) & DICT_UNIQUE) { - new_index->n_uniq = index->n_fields; - } else { - new_index->n_uniq = new_index->n_def; - } - - /* Set the n_fields value in new_index to the actual defined - number of fields */ - - new_index->n_fields = new_index->n_def; - - new_index->cached = TRUE; - - return(new_index); -} - -/*====================== FOREIGN KEY PROCESSING ========================*/ - -/************************************************************************* -Checks if a table is referenced by foreign keys. */ - -ibool -dict_table_referenced_by_foreign_key( -/*=================================*/ - /* out: TRUE if table is referenced by a - foreign key */ - dict_table_t* table) /* in: InnoDB table */ -{ - if (UT_LIST_GET_LEN(table->referenced_list) > 0) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Frees a foreign key struct. */ -static -void -dict_foreign_free( -/*==============*/ - dict_foreign_t* foreign) /* in, own: foreign key struct */ -{ - mem_heap_free(foreign->heap); -} - -/************************************************************************** -Removes a foreign constraint struct from the dictionary cache. */ -static -void -dict_foreign_remove_from_cache( -/*===========================*/ - dict_foreign_t* foreign) /* in, own: foreign constraint */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_a(foreign); - - if (foreign->referenced_table) { - UT_LIST_REMOVE(referenced_list, - foreign->referenced_table->referenced_list, - foreign); - } - - if (foreign->foreign_table) { - UT_LIST_REMOVE(foreign_list, - foreign->foreign_table->foreign_list, - foreign); - } - - dict_foreign_free(foreign); -} - -/************************************************************************** -Looks for the foreign constraint from the foreign and referenced lists -of a table. */ -static -dict_foreign_t* -dict_foreign_find( -/*==============*/ - /* out: foreign constraint */ - dict_table_t* table, /* in: table object */ - const char* id) /* in: foreign constraint id */ -{ - dict_foreign_t* foreign; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign) { - if (ut_strcmp(id, foreign->id) == 0) { - - return(foreign); - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign) { - if (ut_strcmp(id, foreign->id) == 0) { - - return(foreign); - } - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - return(NULL); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Tries to find an index whose first fields are the columns in the array, -in the same order. */ -static -dict_index_t* -dict_foreign_find_index( -/*====================*/ - /* out: matching index, NULL if not found */ - dict_table_t* table, /* in: table */ - const char** columns,/* in: array of column names */ - ulint n_cols, /* in: number of columns */ - dict_index_t* types_idx, /* in: NULL or an index to whose types the - column types must match */ - ibool check_charsets, - /* in: whether to check charsets. - only has an effect if types_idx != NULL */ - ulint check_null) - /* in: nonzero if none of the columns must - be declared NOT NULL */ -{ - dict_index_t* index; - dict_field_t* field; - const char* col_name; - ulint i; - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (dict_index_get_n_fields(index) >= n_cols) { - - for (i = 0; i < n_cols; i++) { - field = dict_index_get_nth_field(index, i); - - col_name = dict_table_get_col_name( - table, dict_col_get_no(field->col)); - - if (field->prefix_len != 0) { - /* We do not accept column prefix - indexes here */ - - break; - } - - if (0 != innobase_strcasecmp(columns[i], - col_name)) { - break; - } - - if (check_null - && (field->col->prtype & DATA_NOT_NULL)) { - - return(NULL); - } - - if (types_idx && !cmp_cols_are_equal( - dict_index_get_nth_col(index, i), - dict_index_get_nth_col(types_idx, - i), - check_charsets)) { - - break; - } - } - - if (i == n_cols) { - /* We found a matching index */ - - return(index); - } - } - - index = dict_table_get_next_index(index); - } - - return(NULL); -} - -/************************************************************************** -Report an error in a foreign key definition. */ -static -void -dict_foreign_error_report_low( -/*==========================*/ - FILE* file, /* in: output stream */ - const char* name) /* in: table name */ -{ - rewind(file); - ut_print_timestamp(file); - fprintf(file, " Error in foreign key constraint of table %s:\n", - name); -} - -/************************************************************************** -Report an error in a foreign key definition. */ -static -void -dict_foreign_error_report( -/*======================*/ - FILE* file, /* in: output stream */ - dict_foreign_t* fk, /* in: foreign key constraint */ - const char* msg) /* in: the error message */ -{ - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(file, fk->foreign_table_name); - fputs(msg, file); - fputs(" Constraint:\n", file); - dict_print_info_on_foreign_key_in_create_format(file, NULL, fk, TRUE); - putc('\n', file); - if (fk->foreign_index) { - fputs("The index in the foreign key in table is ", file); - ut_print_name(file, NULL, FALSE, fk->foreign_index->name); - fputs("\n" - "See http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - file); - } - mutex_exit(&dict_foreign_err_mutex); -} - -/************************************************************************** -Adds a foreign key constraint object to the dictionary cache. May free -the object if there already is an object with the same identifier in. -At least one of the foreign table and the referenced table must already -be in the dictionary cache! */ - -ulint -dict_foreign_add_to_cache( -/*======================*/ - /* out: DB_SUCCESS or error code */ - dict_foreign_t* foreign, /* in, own: foreign key constraint */ - ibool check_charsets) /* in: TRUE=check charset - compatibility */ -{ - dict_table_t* for_table; - dict_table_t* ref_table; - dict_foreign_t* for_in_cache = NULL; - dict_index_t* index; - ibool added_to_referenced_list= FALSE; - FILE* ef = dict_foreign_err_file; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - for_table = dict_table_check_if_in_cache_low( - foreign->foreign_table_name); - - ref_table = dict_table_check_if_in_cache_low( - foreign->referenced_table_name); - ut_a(for_table || ref_table); - - if (for_table) { - for_in_cache = dict_foreign_find(for_table, foreign->id); - } - - if (!for_in_cache && ref_table) { - for_in_cache = dict_foreign_find(ref_table, foreign->id); - } - - if (for_in_cache) { - /* Free the foreign object */ - mem_heap_free(foreign->heap); - } else { - for_in_cache = foreign; - } - - if (for_in_cache->referenced_table == NULL && ref_table) { - index = dict_foreign_find_index( - ref_table, - (const char**) for_in_cache->referenced_col_names, - for_in_cache->n_fields, for_in_cache->foreign_index, - check_charsets, FALSE); - - if (index == NULL) { - dict_foreign_error_report( - ef, for_in_cache, - "there is no index in referenced table" - " which would contain\n" - "the columns as the first columns," - " or the data types in the\n" - "referenced table do not match" - " the ones in table."); - - if (for_in_cache == foreign) { - mem_heap_free(foreign->heap); - } - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for_in_cache->referenced_table = ref_table; - for_in_cache->referenced_index = index; - UT_LIST_ADD_LAST(referenced_list, - ref_table->referenced_list, - for_in_cache); - added_to_referenced_list = TRUE; - } - - if (for_in_cache->foreign_table == NULL && for_table) { - index = dict_foreign_find_index( - for_table, - (const char**) for_in_cache->foreign_col_names, - for_in_cache->n_fields, - for_in_cache->referenced_index, check_charsets, - for_in_cache->type - & (DICT_FOREIGN_ON_DELETE_SET_NULL - | DICT_FOREIGN_ON_UPDATE_SET_NULL)); - - if (index == NULL) { - dict_foreign_error_report( - ef, for_in_cache, - "there is no index in the table" - " which would contain\n" - "the columns as the first columns," - " or the data types in the\n" - "table do not match" - " the ones in the referenced table\n" - "or one of the ON ... SET NULL columns" - " is declared NOT NULL."); - - if (for_in_cache == foreign) { - if (added_to_referenced_list) { - UT_LIST_REMOVE( - referenced_list, - ref_table->referenced_list, - for_in_cache); - } - - mem_heap_free(foreign->heap); - } - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for_in_cache->foreign_table = for_table; - for_in_cache->foreign_index = index; - UT_LIST_ADD_LAST(foreign_list, - for_table->foreign_list, - for_in_cache); - } - - return(DB_SUCCESS); -} - -/************************************************************************* -Scans from pointer onwards. Stops if is at the start of a copy of -'string' where characters are compared without case sensitivity, and -only outside `` or "" quotes. Stops also at '\0'. */ - -const char* -dict_scan_to( -/*=========*/ - /* out: scanned up to this */ - const char* ptr, /* in: scan from */ - const char* string) /* in: look for this */ -{ - char quote = '\0'; - - for (; *ptr; ptr++) { - if (*ptr == quote) { - /* Closing quote character: do not look for - starting quote or the keyword. */ - quote = '\0'; - } else if (quote) { - /* Within quotes: do nothing. */ - } else if (*ptr == '`' || *ptr == '"') { - /* Starting quote: remember the quote character. */ - quote = *ptr; - } else { - /* Outside quotes: look for the keyword. */ - ulint i; - for (i = 0; string[i]; i++) { - if (toupper((int)(unsigned char)(ptr[i])) - != toupper((int)(unsigned char) - (string[i]))) { - goto nomatch; - } - } - break; -nomatch: - ; - } - } - - return(ptr); -} - -/************************************************************************* -Accepts a specified string. Comparisons are case-insensitive. */ -static -const char* -dict_accept( -/*========*/ - /* out: if string was accepted, the pointer - is moved after that, else ptr is returned */ - struct charset_info_st* cs,/* in: the character set of ptr */ - const char* ptr, /* in: scan from this */ - const char* string, /* in: accept only this string as the next - non-whitespace string */ - ibool* success)/* out: TRUE if accepted */ -{ - const char* old_ptr = ptr; - const char* old_ptr2; - - *success = FALSE; - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - old_ptr2 = ptr; - - ptr = dict_scan_to(ptr, string); - - if (*ptr == '\0' || old_ptr2 != ptr) { - return(old_ptr); - } - - *success = TRUE; - - return(ptr + ut_strlen(string)); -} - -/************************************************************************* -Scans an id. For the lexical definition of an 'id', see the code below. -Strips backquotes or double quotes from around the id. */ -static -const char* -dict_scan_id( -/*=========*/ - /* out: scanned to */ - struct charset_info_st* cs,/* in: the character set of ptr */ - const char* ptr, /* in: scanned to */ - mem_heap_t* heap, /* in: heap where to allocate the id - (NULL=id will not be allocated, but it - will point to string near ptr) */ - const char** id, /* out,own: the id; NULL if no id was - scannable */ - ibool table_id,/* in: TRUE=convert the allocated id - as a table name; FALSE=convert to UTF-8 */ - ibool accept_also_dot) - /* in: TRUE if also a dot can appear in a - non-quoted id; in a quoted id it can appear - always */ -{ - char quote = '\0'; - ulint len = 0; - const char* s; - char* str; - char* dst; - - *id = NULL; - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - if (*ptr == '\0') { - - return(ptr); - } - - if (*ptr == '`' || *ptr == '"') { - quote = *ptr++; - } - - s = ptr; - - if (quote) { - for (;;) { - if (!*ptr) { - /* Syntax error */ - return(ptr); - } - if (*ptr == quote) { - ptr++; - if (*ptr != quote) { - break; - } - } - ptr++; - len++; - } - } else { - while (!my_isspace(cs, *ptr) && *ptr != '(' && *ptr != ')' - && (accept_also_dot || *ptr != '.') - && *ptr != ',' && *ptr != '\0') { - - ptr++; - } - - len = ptr - s; - } - - if (UNIV_UNLIKELY(!heap)) { - /* no heap given: id will point to source string */ - *id = s; - return(ptr); - } - - if (quote) { - char* d; - str = d = mem_heap_alloc(heap, len + 1); - while (len--) { - if ((*d++ = *s++) == quote) { - s++; - } - } - *d++ = 0; - len = d - str; - ut_ad(*s == quote); - ut_ad(s + 1 == ptr); - } else { - str = mem_heap_strdupl(heap, s, len); - } - - if (!table_id) { -convert_id: - /* Convert the identifier from connection character set - to UTF-8. */ - len = 3 * len + 1; - *id = dst = mem_heap_alloc(heap, len); - - innobase_convert_from_id(dst, str, len); - } else if (!strncmp(str, srv_mysql50_table_name_prefix, - sizeof srv_mysql50_table_name_prefix)) { - /* This is a pre-5.1 table name - containing chars other than [A-Za-z0-9]. - Discard the prefix and use raw UTF-8 encoding. */ - str += sizeof srv_mysql50_table_name_prefix; - len -= sizeof srv_mysql50_table_name_prefix; - goto convert_id; - } else { - /* Encode using filename-safe characters. */ - len = 5 * len + 1; - *id = dst = mem_heap_alloc(heap, len); - - innobase_convert_from_table_id(dst, str, len); - } - - return(ptr); -} - -/************************************************************************* -Tries to scan a column name. */ -static -const char* -dict_scan_col( -/*==========*/ - /* out: scanned to */ - struct charset_info_st* cs, /* in: the character set of ptr */ - const char* ptr, /* in: scanned to */ - ibool* success,/* out: TRUE if success */ - dict_table_t* table, /* in: table in which the column is */ - const dict_col_t** column, /* out: pointer to column if success */ - mem_heap_t* heap, /* in: heap where to allocate */ - const char** name) /* out,own: the column name; - NULL if no name was scannable */ -{ - ulint i; - - *success = FALSE; - - ptr = dict_scan_id(cs, ptr, heap, name, FALSE, TRUE); - - if (*name == NULL) { - - return(ptr); /* Syntax error */ - } - - if (table == NULL) { - *success = TRUE; - *column = NULL; - } else { - for (i = 0; i < dict_table_get_n_cols(table); i++) { - - const char* col_name = dict_table_get_col_name( - table, i); - - if (0 == innobase_strcasecmp(col_name, *name)) { - /* Found */ - - *success = TRUE; - *column = dict_table_get_nth_col(table, i); - strcpy((char*) *name, col_name); - - break; - } - } - } - - return(ptr); -} - -/************************************************************************* -Scans a table name from an SQL string. */ -static -const char* -dict_scan_table_name( -/*=================*/ - /* out: scanned to */ - struct charset_info_st* cs,/* in: the character set of ptr */ - const char* ptr, /* in: scanned to */ - dict_table_t** table, /* out: table object or NULL */ - const char* name, /* in: foreign key table name */ - ibool* success,/* out: TRUE if ok name found */ - mem_heap_t* heap, /* in: heap where to allocate the id */ - const char** ref_name)/* out,own: the table name; - NULL if no name was scannable */ -{ - const char* database_name = NULL; - ulint database_name_len = 0; - const char* table_name = NULL; - ulint table_name_len; - const char* scan_name; - char* ref; - - *success = FALSE; - *table = NULL; - - ptr = dict_scan_id(cs, ptr, heap, &scan_name, TRUE, FALSE); - - if (scan_name == NULL) { - - return(ptr); /* Syntax error */ - } - - if (*ptr == '.') { - /* We scanned the database name; scan also the table name */ - - ptr++; - - database_name = scan_name; - database_name_len = strlen(database_name); - - ptr = dict_scan_id(cs, ptr, heap, &table_name, TRUE, FALSE); - - if (table_name == NULL) { - - return(ptr); /* Syntax error */ - } - } else { - /* To be able to read table dumps made with InnoDB-4.0.17 or - earlier, we must allow the dot separator between the database - name and the table name also to appear within a quoted - identifier! InnoDB used to print a constraint as: - ... REFERENCES `databasename.tablename` ... - starting from 4.0.18 it is - ... REFERENCES `databasename`.`tablename` ... */ - const char* s; - - for (s = scan_name; *s; s++) { - if (*s == '.') { - database_name = scan_name; - database_name_len = s - scan_name; - scan_name = ++s; - break;/* to do: multiple dots? */ - } - } - - table_name = scan_name; - } - - if (database_name == NULL) { - /* Use the database name of the foreign key table */ - - database_name = name; - database_name_len = dict_get_db_name_len(name); - } - - table_name_len = strlen(table_name); - - /* Copy database_name, '/', table_name, '\0' */ - ref = mem_heap_alloc(heap, database_name_len + table_name_len + 2); - memcpy(ref, database_name, database_name_len); - ref[database_name_len] = '/'; - memcpy(ref + database_name_len + 1, table_name, table_name_len + 1); -#ifndef __WIN__ - if (srv_lower_case_table_names) { -#endif /* !__WIN__ */ - /* The table name is always put to lower case on Windows. */ - innobase_casedn_str(ref); -#ifndef __WIN__ - } -#endif /* !__WIN__ */ - - *success = TRUE; - *ref_name = ref; - *table = dict_table_get_low(ref); - - return(ptr); -} - -/************************************************************************* -Skips one id. The id is allowed to contain also '.'. */ -static -const char* -dict_skip_word( -/*===========*/ - /* out: scanned to */ - struct charset_info_st* cs,/* in: the character set of ptr */ - const char* ptr, /* in: scanned to */ - ibool* success)/* out: TRUE if success, FALSE if just spaces - left in string or a syntax error */ -{ - const char* start; - - *success = FALSE; - - ptr = dict_scan_id(cs, ptr, NULL, &start, FALSE, TRUE); - - if (start) { - *success = TRUE; - } - - return(ptr); -} - -/************************************************************************* -Removes MySQL comments from an SQL string. A comment is either -(a) '#' to the end of the line, -(b) '--<space>' to the end of the line, or -(c) '<slash><asterisk>' till the next '<asterisk><slash>' (like the familiar -C comment syntax). */ -static -char* -dict_strip_comments( -/*================*/ - /* out, own: SQL string stripped from - comments; the caller must free this - with mem_free()! */ - const char* sql_string) /* in: SQL string */ -{ - char* str; - const char* sptr; - char* ptr; - /* unclosed quote character (0 if none) */ - char quote = 0; - - str = mem_alloc(strlen(sql_string) + 1); - - sptr = sql_string; - ptr = str; - - for (;;) { -scan_more: - if (*sptr == '\0') { - *ptr = '\0'; - - ut_a(ptr <= str + strlen(sql_string)); - - return(str); - } - - if (*sptr == quote) { - /* Closing quote character: do not look for - starting quote or comments. */ - quote = 0; - } else if (quote) { - /* Within quotes: do not look for - starting quotes or comments. */ - } else if (*sptr == '"' || *sptr == '`') { - /* Starting quote: remember the quote character. */ - quote = *sptr; - } else if (*sptr == '#' - || (sptr[0] == '-' && sptr[1] == '-' - && sptr[2] == ' ')) { - for (;;) { - /* In Unix a newline is 0x0A while in Windows - it is 0x0D followed by 0x0A */ - - if (*sptr == (char)0x0A - || *sptr == (char)0x0D - || *sptr == '\0') { - - goto scan_more; - } - - sptr++; - } - } else if (!quote && *sptr == '/' && *(sptr + 1) == '*') { - for (;;) { - if (*sptr == '*' && *(sptr + 1) == '/') { - - sptr += 2; - - goto scan_more; - } - - if (*sptr == '\0') { - - goto scan_more; - } - - sptr++; - } - } - - *ptr = *sptr; - - ptr++; - sptr++; - } -} - -/************************************************************************* -Finds the highest <number> for foreign key constraints of the table. Looks -only at the >= 4.0.18-format id's, which are of the form -databasename/tablename_ibfk_<number>. */ -static -ulint -dict_table_get_highest_foreign_id( -/*==============================*/ - /* out: highest number, 0 if table has no new - format foreign key constraints */ - dict_table_t* table) /* in: table in the dictionary memory cache */ -{ - dict_foreign_t* foreign; - char* endp; - ulint biggest_id = 0; - ulint id; - ulint len; - - ut_a(table); - - len = ut_strlen(table->name); - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign) { - if (ut_strlen(foreign->id) > ((sizeof dict_ibfk) - 1) + len - && 0 == ut_memcmp(foreign->id, table->name, len) - && 0 == ut_memcmp(foreign->id + len, - dict_ibfk, (sizeof dict_ibfk) - 1) - && foreign->id[len + ((sizeof dict_ibfk) - 1)] != '0') { - /* It is of the >= 4.0.18 format */ - - id = strtoul(foreign->id + len - + ((sizeof dict_ibfk) - 1), - &endp, 10); - if (*endp == '\0') { - ut_a(id != biggest_id); - - if (id > biggest_id) { - biggest_id = id; - } - } - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - return(biggest_id); -} - -/************************************************************************* -Reports a simple foreign key create clause syntax error. */ -static -void -dict_foreign_report_syntax_err( -/*===========================*/ - const char* name, /* in: table name */ - const char* start_of_latest_foreign, - /* in: start of the foreign key clause - in the SQL string */ - const char* ptr) /* in: place of the syntax error */ -{ - FILE* ef = dict_foreign_err_file; - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\nSyntax error close to:\n%s\n", - start_of_latest_foreign, ptr); - mutex_exit(&dict_foreign_err_mutex); -} - -/************************************************************************* -Scans a table create SQL string and adds to the data dictionary the foreign -key constraints declared in the string. This function should be called after -the indexes for a table have been created. Each foreign key constraint must -be accompanied with indexes in both participating tables. The indexes are -allowed to contain more fields than mentioned in the constraint. */ -static -ulint -dict_create_foreign_constraints_low( -/*================================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap, /* in: memory heap */ - struct charset_info_st* cs,/* in: the character set of sql_string */ - const char* sql_string, - /* in: CREATE TABLE or ALTER TABLE statement - where foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the database - name before it: test.table2; the default - database is the database of parameter name */ - const char* name, /* in: table full name in the normalized form - database_name/table_name */ - ibool reject_fks) - /* in: if TRUE, fail with error code - DB_CANNOT_ADD_CONSTRAINT if any foreign - keys are found. */ -{ - dict_table_t* table; - dict_table_t* referenced_table; - dict_table_t* table_to_alter; - ulint highest_id_so_far = 0; - dict_index_t* index; - dict_foreign_t* foreign; - const char* ptr = sql_string; - const char* start_of_latest_foreign = sql_string; - FILE* ef = dict_foreign_err_file; - const char* constraint_name; - ibool success; - ulint error; - const char* ptr1; - const char* ptr2; - ulint i; - ulint j; - ibool is_on_delete; - ulint n_on_deletes; - ulint n_on_updates; - const dict_col_t*columns[500]; - const char* column_names[500]; - const char* referenced_table_name; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = dict_table_get_low(name); - - if (table == NULL) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, - "Cannot find the table in the internal" - " data dictionary of InnoDB.\n" - "Create table statement:\n%s\n", sql_string); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_ERROR); - } - - /* First check if we are actually doing an ALTER TABLE, and in that - case look for the table being altered */ - - ptr = dict_accept(cs, ptr, "ALTER", &success); - - if (!success) { - - goto loop; - } - - ptr = dict_accept(cs, ptr, "TABLE", &success); - - if (!success) { - - goto loop; - } - - /* We are doing an ALTER TABLE: scan the table name we are altering */ - - ptr = dict_scan_table_name(cs, ptr, &table_to_alter, name, - &success, heap, &referenced_table_name); - if (!success) { - fprintf(stderr, - "InnoDB: Error: could not find" - " the table being ALTERED in:\n%s\n", - sql_string); - - return(DB_ERROR); - } - - /* Starting from 4.0.18 and 4.1.2, we generate foreign key id's in the - format databasename/tablename_ibfk_<number>, where <number> is local - to the table; look for the highest <number> for table_to_alter, so - that we can assign to new constraints higher numbers. */ - - /* If we are altering a temporary table, the table name after ALTER - TABLE does not correspond to the internal table name, and - table_to_alter is NULL. TODO: should we fix this somehow? */ - - if (table_to_alter == NULL) { - highest_id_so_far = 0; - } else { - highest_id_so_far = dict_table_get_highest_foreign_id( - table_to_alter); - } - - /* Scan for foreign key declarations in a loop */ -loop: - /* Scan either to "CONSTRAINT" or "FOREIGN", whichever is closer */ - - ptr1 = dict_scan_to(ptr, "CONSTRAINT"); - ptr2 = dict_scan_to(ptr, "FOREIGN"); - - constraint_name = NULL; - - if (ptr1 < ptr2) { - /* The user may have specified a constraint name. Pick it so - that we can store 'databasename/constraintname' as the id of - of the constraint to system tables. */ - ptr = ptr1; - - ptr = dict_accept(cs, ptr, "CONSTRAINT", &success); - - ut_a(success); - - if (!my_isspace(cs, *ptr) && *ptr != '"' && *ptr != '`') { - goto loop; - } - - while (my_isspace(cs, *ptr)) { - ptr++; - } - - /* read constraint name unless got "CONSTRAINT FOREIGN" */ - if (ptr != ptr2) { - ptr = dict_scan_id(cs, ptr, heap, - &constraint_name, FALSE, FALSE); - } - } else { - ptr = ptr2; - } - - if (*ptr == '\0') { - /* The proper way to reject foreign keys for temporary - tables would be to split the lexing and syntactical - analysis of foreign key clauses from the actual adding - of them, so that ha_innodb.cc could first parse the SQL - command, determine if there are any foreign keys, and - if so, immediately reject the command if the table is a - temporary one. For now, this kludge will work. */ - if (reject_fks && (UT_LIST_GET_LEN(table->foreign_list) > 0)) { - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /**********************************************************/ - /* The following call adds the foreign key constraints - to the data dictionary system tables on disk */ - - error = dict_create_add_foreigns_to_dictionary( - highest_id_so_far, table, trx); - return(error); - } - - start_of_latest_foreign = ptr; - - ptr = dict_accept(cs, ptr, "FOREIGN", &success); - - if (!success) { - goto loop; - } - - if (!my_isspace(cs, *ptr)) { - goto loop; - } - - ptr = dict_accept(cs, ptr, "KEY", &success); - - if (!success) { - goto loop; - } - - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - /* MySQL allows also an index id before the '('; we - skip it */ - ptr = dict_skip_word(cs, ptr, &success); - - if (!success) { - dict_foreign_report_syntax_err( - name, start_of_latest_foreign, ptr); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - /* We do not flag a syntax error here because in an - ALTER TABLE we may also have DROP FOREIGN KEY abc */ - - goto loop; - } - } - - i = 0; - - /* Scan the columns in the first list */ -col_loop1: - ut_a(i < (sizeof column_names) / sizeof *column_names); - ptr = dict_scan_col(cs, ptr, &success, table, columns + i, - heap, column_names + i); - if (!success) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\nCannot resolve column name close to:\n%s\n", - start_of_latest_foreign, ptr); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - i++; - - ptr = dict_accept(cs, ptr, ",", &success); - - if (success) { - goto col_loop1; - } - - ptr = dict_accept(cs, ptr, ")", &success); - - if (!success) { - dict_foreign_report_syntax_err( - name, start_of_latest_foreign, ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Try to find an index which contains the columns - as the first fields and in the right order */ - - index = dict_foreign_find_index(table, column_names, i, - NULL, TRUE, FALSE); - - if (!index) { - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fputs("There is no index in table ", ef); - ut_print_name(ef, NULL, TRUE, name); - fprintf(ef, " where the columns appear\n" - "as the first columns. Constraint:\n%s\n" - "See http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - ptr = dict_accept(cs, ptr, "REFERENCES", &success); - - if (!success || !my_isspace(cs, *ptr)) { - dict_foreign_report_syntax_err( - name, start_of_latest_foreign, ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Let us create a constraint struct */ - - foreign = dict_mem_foreign_create(); - - if (constraint_name) { - ulint db_len; - - /* Catenate 'databasename/' to the constraint name specified - by the user: we conceive the constraint as belonging to the - same MySQL 'database' as the table itself. We store the name - to foreign->id. */ - - db_len = dict_get_db_name_len(table->name); - - foreign->id = mem_heap_alloc( - foreign->heap, db_len + strlen(constraint_name) + 2); - - ut_memcpy(foreign->id, table->name, db_len); - foreign->id[db_len] = '/'; - strcpy(foreign->id + db_len + 1, constraint_name); - } - - foreign->foreign_table = table; - foreign->foreign_table_name = mem_heap_strdup(foreign->heap, - table->name); - foreign->foreign_index = index; - foreign->n_fields = (unsigned int) i; - foreign->foreign_col_names = mem_heap_alloc(foreign->heap, - i * sizeof(void*)); - for (i = 0; i < foreign->n_fields; i++) { - foreign->foreign_col_names[i] = mem_heap_strdup( - foreign->heap, - dict_table_get_col_name(table, - dict_col_get_no(columns[i]))); - } - - ptr = dict_scan_table_name(cs, ptr, &referenced_table, name, - &success, heap, &referenced_table_name); - - /* Note that referenced_table can be NULL if the user has suppressed - checking of foreign key constraints! */ - - if (!success || (!referenced_table && trx->check_foreigns)) { - dict_foreign_free(foreign); - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\nCannot resolve table name close to:\n" - "%s\n", - start_of_latest_foreign, ptr); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - ptr = dict_accept(cs, ptr, "(", &success); - - if (!success) { - dict_foreign_free(foreign); - dict_foreign_report_syntax_err(name, start_of_latest_foreign, - ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Scan the columns in the second list */ - i = 0; - -col_loop2: - ptr = dict_scan_col(cs, ptr, &success, referenced_table, columns + i, - heap, column_names + i); - i++; - - if (!success) { - dict_foreign_free(foreign); - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\nCannot resolve column name close to:\n" - "%s\n", - start_of_latest_foreign, ptr); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - ptr = dict_accept(cs, ptr, ",", &success); - - if (success) { - goto col_loop2; - } - - ptr = dict_accept(cs, ptr, ")", &success); - - if (!success || foreign->n_fields != i) { - dict_foreign_free(foreign); - - dict_foreign_report_syntax_err(name, start_of_latest_foreign, - ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - n_on_deletes = 0; - n_on_updates = 0; - -scan_on_conditions: - /* Loop here as long as we can find ON ... conditions */ - - ptr = dict_accept(cs, ptr, "ON", &success); - - if (!success) { - - goto try_find_index; - } - - ptr = dict_accept(cs, ptr, "DELETE", &success); - - if (!success) { - ptr = dict_accept(cs, ptr, "UPDATE", &success); - - if (!success) { - dict_foreign_free(foreign); - - dict_foreign_report_syntax_err( - name, start_of_latest_foreign, ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - is_on_delete = FALSE; - n_on_updates++; - } else { - is_on_delete = TRUE; - n_on_deletes++; - } - - ptr = dict_accept(cs, ptr, "RESTRICT", &success); - - if (success) { - goto scan_on_conditions; - } - - ptr = dict_accept(cs, ptr, "CASCADE", &success); - - if (success) { - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_CASCADE; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_CASCADE; - } - - goto scan_on_conditions; - } - - ptr = dict_accept(cs, ptr, "NO", &success); - - if (success) { - ptr = dict_accept(cs, ptr, "ACTION", &success); - - if (!success) { - dict_foreign_free(foreign); - dict_foreign_report_syntax_err( - name, start_of_latest_foreign, ptr); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_NO_ACTION; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_NO_ACTION; - } - - goto scan_on_conditions; - } - - ptr = dict_accept(cs, ptr, "SET", &success); - - if (!success) { - dict_foreign_free(foreign); - dict_foreign_report_syntax_err(name, start_of_latest_foreign, - ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - ptr = dict_accept(cs, ptr, "NULL", &success); - - if (!success) { - dict_foreign_free(foreign); - dict_foreign_report_syntax_err(name, start_of_latest_foreign, - ptr); - return(DB_CANNOT_ADD_CONSTRAINT); - } - - for (j = 0; j < foreign->n_fields; j++) { - if ((dict_index_get_nth_col(foreign->foreign_index, j)->prtype) - & DATA_NOT_NULL) { - - /* It is not sensible to define SET NULL - if the column is not allowed to be NULL! */ - - dict_foreign_free(foreign); - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\n" - "You have defined a SET NULL condition" - " though some of the\n" - "columns are defined as NOT NULL.\n", - start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - } - - if (is_on_delete) { - foreign->type |= DICT_FOREIGN_ON_DELETE_SET_NULL; - } else { - foreign->type |= DICT_FOREIGN_ON_UPDATE_SET_NULL; - } - - goto scan_on_conditions; - -try_find_index: - if (n_on_deletes > 1 || n_on_updates > 1) { - /* It is an error to define more than 1 action */ - - dict_foreign_free(foreign); - - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\n" - "You have twice an ON DELETE clause" - " or twice an ON UPDATE clause.\n", - start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - - /* Try to find an index which contains the columns as the first fields - and in the right order, and the types are the same as in - foreign->foreign_index */ - - if (referenced_table) { - index = dict_foreign_find_index(referenced_table, - column_names, i, - foreign->foreign_index, - TRUE, FALSE); - if (!index) { - dict_foreign_free(foreign); - mutex_enter(&dict_foreign_err_mutex); - dict_foreign_error_report_low(ef, name); - fprintf(ef, "%s:\n" - "Cannot find an index in the" - " referenced table where the\n" - "referenced columns appear as the" - " first columns, or column types\n" - "in the table and the referenced table" - " do not match for constraint.\n" - "Note that the internal storage type of" - " ENUM and SET changed in\n" - "tables created with >= InnoDB-4.1.12," - " and such columns in old tables\n" - "cannot be referenced by such columns" - " in new tables.\n" - "See http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-foreign-key-constraints.html\n" - "for correct foreign key definition.\n", - start_of_latest_foreign); - mutex_exit(&dict_foreign_err_mutex); - - return(DB_CANNOT_ADD_CONSTRAINT); - } - } else { - ut_a(trx->check_foreigns == FALSE); - index = NULL; - } - - foreign->referenced_index = index; - foreign->referenced_table = referenced_table; - - foreign->referenced_table_name - = mem_heap_strdup(foreign->heap, referenced_table_name); - - foreign->referenced_col_names = mem_heap_alloc(foreign->heap, - i * sizeof(void*)); - for (i = 0; i < foreign->n_fields; i++) { - foreign->referenced_col_names[i] - = mem_heap_strdup(foreign->heap, column_names[i]); - } - - /* We found an ok constraint definition: add to the lists */ - - UT_LIST_ADD_LAST(foreign_list, table->foreign_list, foreign); - - if (referenced_table) { - UT_LIST_ADD_LAST(referenced_list, - referenced_table->referenced_list, - foreign); - } - - goto loop; -} - -/************************************************************************** -Determines whether a string starts with the specified keyword. */ - -ibool -dict_str_starts_with_keyword( -/*=========================*/ - /* out: TRUE if str starts - with keyword */ - void* mysql_thd, /* in: MySQL thread handle */ - const char* str, /* in: string to scan for keyword */ - const char* keyword) /* in: keyword to look for */ -{ - struct charset_info_st* cs = innobase_get_charset(mysql_thd); - ibool success; - - dict_accept(cs, str, keyword, &success); - return(success); -} - -/************************************************************************* -Scans a table create SQL string and adds to the data dictionary the foreign -key constraints declared in the string. This function should be called after -the indexes for a table have been created. Each foreign key constraint must -be accompanied with indexes in both participating tables. The indexes are -allowed to contain more fields than mentioned in the constraint. */ - -ulint -dict_create_foreign_constraints( -/*============================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction */ - const char* sql_string, /* in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES - table2(c, d), table2 can be written - also with the database - name before it: test.table2; the - default database id the database of - parameter name */ - const char* name, /* in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /* in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ -{ - char* str; - ulint err; - mem_heap_t* heap; - - ut_a(trx); - ut_a(trx->mysql_thd); - - str = dict_strip_comments(sql_string); - heap = mem_heap_create(10000); - - err = dict_create_foreign_constraints_low( - trx, heap, innobase_get_charset(trx->mysql_thd), str, name, - reject_fks); - - mem_heap_free(heap); - mem_free(str); - - return(err); -} - -/************************************************************************** -Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */ - -ulint -dict_foreign_parse_drop_constraints( -/*================================*/ - /* out: DB_SUCCESS or - DB_CANNOT_DROP_CONSTRAINT if - syntax error or the constraint - id does not match */ - mem_heap_t* heap, /* in: heap from which we can - allocate memory */ - trx_t* trx, /* in: transaction */ - dict_table_t* table, /* in: table */ - ulint* n, /* out: number of constraints - to drop */ - const char*** constraints_to_drop) /* out: id's of the - constraints to drop */ -{ - dict_foreign_t* foreign; - ibool success; - char* str; - const char* ptr; - const char* id; - FILE* ef = dict_foreign_err_file; - struct charset_info_st* cs; - - ut_a(trx); - ut_a(trx->mysql_thd); - - cs = innobase_get_charset(trx->mysql_thd); - - *n = 0; - - *constraints_to_drop = mem_heap_alloc(heap, 1000 * sizeof(char*)); - - str = dict_strip_comments(*(trx->mysql_query_str)); - ptr = str; - - ut_ad(mutex_own(&(dict_sys->mutex))); -loop: - ptr = dict_scan_to(ptr, "DROP"); - - if (*ptr == '\0') { - mem_free(str); - - return(DB_SUCCESS); - } - - ptr = dict_accept(cs, ptr, "DROP", &success); - - if (!my_isspace(cs, *ptr)) { - - goto loop; - } - - ptr = dict_accept(cs, ptr, "FOREIGN", &success); - - if (!success || !my_isspace(cs, *ptr)) { - - goto loop; - } - - ptr = dict_accept(cs, ptr, "KEY", &success); - - if (!success) { - - goto syntax_error; - } - - ptr = dict_scan_id(cs, ptr, heap, &id, FALSE, TRUE); - - if (id == NULL) { - - goto syntax_error; - } - - ut_a(*n < 1000); - (*constraints_to_drop)[*n] = id; - (*n)++; - - /* Look for the given constraint id */ - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign != NULL) { - if (0 == strcmp(foreign->id, id) - || (strchr(foreign->id, '/') - && 0 == strcmp(id, - dict_remove_db_name(foreign->id)))) { - /* Found */ - break; - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - if (foreign == NULL) { - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Error in dropping of a foreign key constraint" - " of table ", ef); - ut_print_name(ef, NULL, TRUE, table->name); - fputs(",\n" - "in SQL command\n", ef); - fputs(str, ef); - fputs("\nCannot find a constraint with the given id ", ef); - ut_print_name(ef, NULL, FALSE, id); - fputs(".\n", ef); - mutex_exit(&dict_foreign_err_mutex); - - mem_free(str); - - return(DB_CANNOT_DROP_CONSTRAINT); - } - - goto loop; - -syntax_error: - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Syntax error in dropping of a" - " foreign key constraint of table ", ef); - ut_print_name(ef, NULL, TRUE, table->name); - fprintf(ef, ",\n" - "close to:\n%s\n in SQL command\n%s\n", ptr, str); - mutex_exit(&dict_foreign_err_mutex); - - mem_free(str); - - return(DB_CANNOT_DROP_CONSTRAINT); -} -#endif /* UNIV_HOTBACKUP */ - -/*==================== END OF FOREIGN KEY PROCESSING ====================*/ - -#ifdef UNIV_DEBUG -/************************************************************************** -Returns an index object if it is found in the dictionary cache. */ - -dict_index_t* -dict_index_get_if_in_cache( -/*=======================*/ - /* out: index, NULL if not found */ - dulint index_id) /* in: index id */ -{ - dict_index_t* index; - - if (dict_sys == NULL) { - return(NULL); - } - - mutex_enter(&(dict_sys->mutex)); - - index = dict_index_find_on_id_low(index_id); - - mutex_exit(&(dict_sys->mutex)); - - return(index); -} -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_DEBUG -/************************************************************************** -Checks that a tuple has n_fields_cmp value in a sensible range, so that -no comparison can occur with the page number field in a node pointer. */ - -ibool -dict_index_check_search_tuple( -/*==========================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index tree */ - dtuple_t* tuple) /* in: tuple used in a search */ -{ - ut_a(index); - ut_a(dtuple_get_n_fields_cmp(tuple) - <= dict_index_get_n_unique_in_tree(index)); - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/************************************************************************** -Builds a node pointer out of a physical record and a page number. */ - -dtuple_t* -dict_index_build_node_ptr( -/*======================*/ - /* out, own: node pointer */ - dict_index_t* index, /* in: index tree */ - rec_t* rec, /* in: record for which to build node - pointer */ - ulint page_no,/* in: page number to put in node pointer */ - mem_heap_t* heap, /* in: memory heap where pointer created */ - ulint level) /* in: level of rec in tree: 0 means leaf - level */ -{ - dtuple_t* tuple; - dfield_t* field; - byte* buf; - ulint n_unique; - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - /* In a universal index tree, we take the whole record as - the node pointer if the record is on the leaf level, - on non-leaf levels we remove the last field, which - contains the page number of the child page */ - - ut_a(!dict_table_is_comp(index->table)); - n_unique = rec_get_n_fields_old(rec); - - if (level > 0) { - ut_a(n_unique > 1); - n_unique--; - } - } else { - n_unique = dict_index_get_n_unique_in_tree(index); - } - - tuple = dtuple_create(heap, n_unique + 1); - - /* When searching in the tree for the node pointer, we must not do - comparison on the last field, the page number field, as on upper - levels in the tree there may be identical node pointers with a - different page number; therefore, we set the n_fields_cmp to one - less: */ - - dtuple_set_n_fields_cmp(tuple, n_unique); - - dict_index_copy_types(tuple, index, n_unique); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, page_no); - - field = dtuple_get_nth_field(tuple, n_unique); - dfield_set_data(field, buf, 4); - - dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4); - - rec_copy_prefix_to_dtuple(tuple, rec, index, n_unique, heap); - dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple) - | REC_STATUS_NODE_PTR); - - ut_ad(dtuple_check_typed(tuple)); - - return(tuple); -} - -/************************************************************************** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. */ - -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - /* out: pointer to the prefix record */ - dict_index_t* index, /* in: index tree */ - rec_t* rec, /* in: record for which to copy prefix */ - ulint* n_fields,/* out: number of fields copied */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size)/* in/out: buffer size */ -{ - ulint n; - - UNIV_PREFETCH_R(rec); - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - ut_a(!dict_table_is_comp(index->table)); - n = rec_get_n_fields_old(rec); - } else { - n = dict_index_get_n_unique_in_tree(index); - } - - *n_fields = n; - return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); -} - -/************************************************************************** -Builds a typed data tuple out of a physical record. */ - -dtuple_t* -dict_index_build_data_tuple( -/*========================*/ - /* out, own: data tuple */ - dict_index_t* index, /* in: index tree */ - rec_t* rec, /* in: record for which to build data tuple */ - ulint n_fields,/* in: number of data fields */ - mem_heap_t* heap) /* in: memory heap where tuple created */ -{ - dtuple_t* tuple; - - ut_ad(dict_table_is_comp(index->table) - || n_fields <= rec_get_n_fields_old(rec)); - - tuple = dtuple_create(heap, n_fields); - - dict_index_copy_types(tuple, index, n_fields); - - rec_copy_prefix_to_dtuple(tuple, rec, index, n_fields, heap); - - ut_ad(dtuple_check_typed(tuple)); - - return(tuple); -} - -/************************************************************************* -Calculates the minimum record length in an index. */ - -ulint -dict_index_calc_min_rec_len( -/*========================*/ - dict_index_t* index) /* in: index */ -{ - ulint sum = 0; - ulint i; - - if (dict_table_is_comp(index->table)) { - ulint nullable = 0; - sum = REC_N_NEW_EXTRA_BYTES; - for (i = 0; i < dict_index_get_n_fields(index); i++) { - const dict_col_t* col - = dict_index_get_nth_col(index, i); - ulint size = dict_col_get_fixed_size(col); - sum += size; - if (!size) { - size = col->len; - sum += size < 128 ? 1 : 2; - } - if (!(col->prtype & DATA_NOT_NULL)) { - nullable++; - } - } - - /* round the NULL flags up to full bytes */ - sum += UT_BITS_IN_BYTES(nullable); - - return(sum); - } - - for (i = 0; i < dict_index_get_n_fields(index); i++) { - sum += dict_col_get_fixed_size( - dict_index_get_nth_col(index, i)); - } - - if (sum > 127) { - sum += 2 * dict_index_get_n_fields(index); - } else { - sum += dict_index_get_n_fields(index); - } - - sum += REC_N_OLD_EXTRA_BYTES; - - return(sum); -} - -/************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ - -void -dict_update_statistics_low( -/*=======================*/ - dict_table_t* table, /* in: table */ - ibool has_dict_mutex __attribute__((unused))) - /* in: TRUE if the caller has the - dictionary mutex */ -{ - dict_index_t* index; - ulint size; - ulint sum_of_index_sizes = 0; - - if (table->ibd_file_missing) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: cannot calculate statistics for table %s\n" - "InnoDB: because the .ibd file is missing. For help," - " please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - table->name); - - return; - } - - /* If we have set a high innodb_force_recovery level, do not calculate - statistics, as a badly corrupted index can cause a crash in it. */ - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - - return; - } - - /* Find out the sizes of the indexes and how many different values - for the key they approximately have */ - - index = dict_table_get_first_index(table); - - if (index == NULL) { - /* Table definition is corrupt */ - - return; - } - - while (index) { - size = btr_get_size(index, BTR_TOTAL_SIZE); - - index->stat_index_size = size; - - sum_of_index_sizes += size; - - size = btr_get_size(index, BTR_N_LEAF_PAGES); - - if (size == 0) { - /* The root node of the tree is a leaf */ - size = 1; - } - - index->stat_n_leaf_pages = size; - - btr_estimate_number_of_different_key_vals(index); - - index = dict_table_get_next_index(index); - } - - index = dict_table_get_first_index(table); - - table->stat_n_rows = index->stat_n_diff_key_vals[ - dict_index_get_n_unique(index)]; - - table->stat_clustered_index_size = index->stat_index_size; - - table->stat_sum_of_other_index_sizes = sum_of_index_sizes - - index->stat_index_size; - - table->stat_initialized = TRUE; - - table->stat_modified_counter = 0; -} - -/************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ - -void -dict_update_statistics( -/*===================*/ - dict_table_t* table) /* in: table */ -{ - dict_update_statistics_low(table, FALSE); -} - -/************************************************************************** -A noninlined version of dict_table_get_low. */ - -dict_table_t* -dict_table_get_low_noninlined( -/*==========================*/ - /* out: table, NULL if not found */ - const char* table_name) /* in: table name */ -{ - return(dict_table_get_low(table_name)); -} - -/************************************************************************** -Prints info of a foreign key constraint. */ -static -void -dict_foreign_print_low( -/*===================*/ - dict_foreign_t* foreign) /* in: foreign key constraint */ -{ - ulint i; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - fprintf(stderr, " FOREIGN KEY CONSTRAINT %s: %s (", - foreign->id, foreign->foreign_table_name); - - for (i = 0; i < foreign->n_fields; i++) { - fprintf(stderr, " %s", foreign->foreign_col_names[i]); - } - - fprintf(stderr, " )\n" - " REFERENCES %s (", - foreign->referenced_table_name); - - for (i = 0; i < foreign->n_fields; i++) { - fprintf(stderr, " %s", foreign->referenced_col_names[i]); - } - - fputs(" )\n", stderr); -} - -/************************************************************************** -Prints a table data. */ - -void -dict_table_print( -/*=============*/ - dict_table_t* table) /* in: table */ -{ - mutex_enter(&(dict_sys->mutex)); - dict_table_print_low(table); - mutex_exit(&(dict_sys->mutex)); -} - -/************************************************************************** -Prints a table data when we know the table name. */ - -void -dict_table_print_by_name( -/*=====================*/ - const char* name) -{ - dict_table_t* table; - - mutex_enter(&(dict_sys->mutex)); - - table = dict_table_get_low(name); - - ut_a(table); - - dict_table_print_low(table); - mutex_exit(&(dict_sys->mutex)); -} - -/************************************************************************** -Prints a table data. */ - -void -dict_table_print_low( -/*=================*/ - dict_table_t* table) /* in: table */ -{ - dict_index_t* index; - dict_foreign_t* foreign; - ulint i; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_update_statistics_low(table, TRUE); - - fprintf(stderr, - "--------------------------------------\n" - "TABLE: name %s, id %lu %lu, columns %lu, indexes %lu," - " appr.rows %lu\n" - " COLUMNS: ", - table->name, - (ulong) ut_dulint_get_high(table->id), - (ulong) ut_dulint_get_low(table->id), - (ulong) table->n_cols, - (ulong) UT_LIST_GET_LEN(table->indexes), - (ulong) table->stat_n_rows); - - for (i = 0; i + 1 < (ulint) table->n_cols; i++) { - dict_col_print_low(table, dict_table_get_nth_col(table, i)); - fputs("; ", stderr); - } - - putc('\n', stderr); - - index = UT_LIST_GET_FIRST(table->indexes); - - while (index != NULL) { - dict_index_print_low(index); - index = UT_LIST_GET_NEXT(indexes, index); - } - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign != NULL) { - dict_foreign_print_low(foreign); - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign != NULL) { - dict_foreign_print_low(foreign); - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } -} - -/************************************************************************** -Prints a column data. */ -static -void -dict_col_print_low( -/*===============*/ - const dict_table_t* table, /* in: table */ - const dict_col_t* col) /* in: column */ -{ - dtype_t type; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - dict_col_copy_type(col, &type); - fprintf(stderr, "%s: ", dict_table_get_col_name(table, - dict_col_get_no(col))); - - dtype_print(&type); -} - -/************************************************************************** -Prints an index data. */ -static -void -dict_index_print_low( -/*=================*/ - dict_index_t* index) /* in: index */ -{ - ib_longlong n_vals; - ulint i; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - if (index->n_user_defined_cols > 0) { - n_vals = index->stat_n_diff_key_vals[ - index->n_user_defined_cols]; - } else { - n_vals = index->stat_n_diff_key_vals[1]; - } - - fprintf(stderr, - " INDEX: name %s, id %lu %lu, fields %lu/%lu," - " uniq %lu, type %lu\n" - " root page %lu, appr.key vals %lu," - " leaf pages %lu, size pages %lu\n" - " FIELDS: ", - index->name, - (ulong) ut_dulint_get_high(index->id), - (ulong) ut_dulint_get_low(index->id), - (ulong) index->n_user_defined_cols, - (ulong) index->n_fields, - (ulong) index->n_uniq, - (ulong) index->type, - (ulong) index->page, - (ulong) n_vals, - (ulong) index->stat_n_leaf_pages, - (ulong) index->stat_index_size); - - for (i = 0; i < index->n_fields; i++) { - dict_field_print_low(dict_index_get_nth_field(index, i)); - } - - putc('\n', stderr); - -#ifdef UNIV_BTR_PRINT - btr_print_size(index); - - btr_print_index(index, 7); -#endif /* UNIV_BTR_PRINT */ -} - -/************************************************************************** -Prints a field data. */ -static -void -dict_field_print_low( -/*=================*/ - dict_field_t* field) /* in: field */ -{ - ut_ad(mutex_own(&(dict_sys->mutex))); - - fprintf(stderr, " %s", field->name); - - if (field->prefix_len != 0) { - fprintf(stderr, "(%lu)", (ulong) field->prefix_len); - } -} - -/************************************************************************** -Outputs info on a foreign key of a table in a format suitable for -CREATE TABLE. */ - -void -dict_print_info_on_foreign_key_in_create_format( -/*============================================*/ - FILE* file, /* in: file where to print */ - trx_t* trx, /* in: transaction */ - dict_foreign_t* foreign, /* in: foreign key constraint */ - ibool add_newline) /* in: whether to add a newline */ -{ - const char* stripped_id; - ulint i; - - if (strchr(foreign->id, '/')) { - /* Strip the preceding database name from the constraint id */ - stripped_id = foreign->id + 1 - + dict_get_db_name_len(foreign->id); - } else { - stripped_id = foreign->id; - } - - putc(',', file); - - if (add_newline) { - /* SHOW CREATE TABLE wants constraints each printed nicely - on its own line, while error messages want no newlines - inserted. */ - fputs("\n ", file); - } - - fputs(" CONSTRAINT ", file); - ut_print_name(file, trx, FALSE, stripped_id); - fputs(" FOREIGN KEY (", file); - - for (i = 0;;) { - ut_print_name(file, trx, FALSE, foreign->foreign_col_names[i]); - if (++i < foreign->n_fields) { - fputs(", ", file); - } else { - break; - } - } - - fputs(") REFERENCES ", file); - - if (dict_tables_have_same_db(foreign->foreign_table_name, - foreign->referenced_table_name)) { - /* Do not print the database name of the referenced table */ - ut_print_name(file, trx, TRUE, - dict_remove_db_name( - foreign->referenced_table_name)); - } else { - ut_print_name(file, trx, TRUE, - foreign->referenced_table_name); - } - - putc(' ', file); - putc('(', file); - - for (i = 0;;) { - ut_print_name(file, trx, FALSE, - foreign->referenced_col_names[i]); - if (++i < foreign->n_fields) { - fputs(", ", file); - } else { - break; - } - } - - putc(')', file); - - if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) { - fputs(" ON DELETE CASCADE", file); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) { - fputs(" ON DELETE SET NULL", file); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - fputs(" ON DELETE NO ACTION", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - fputs(" ON UPDATE CASCADE", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - fputs(" ON UPDATE SET NULL", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - fputs(" ON UPDATE NO ACTION", file); - } -} - -/************************************************************************** -Outputs info on foreign keys of a table. */ - -void -dict_print_info_on_foreign_keys( -/*============================*/ - ibool create_table_format, /* in: if TRUE then print in - a format suitable to be inserted into - a CREATE TABLE, otherwise in the format - of SHOW TABLE STATUS */ - FILE* file, /* in: file where to print */ - trx_t* trx, /* in: transaction */ - dict_table_t* table) /* in: table */ -{ - dict_foreign_t* foreign; - - mutex_enter(&(dict_sys->mutex)); - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - if (foreign == NULL) { - mutex_exit(&(dict_sys->mutex)); - - return; - } - - while (foreign != NULL) { - if (create_table_format) { - dict_print_info_on_foreign_key_in_create_format( - file, trx, foreign, TRUE); - } else { - ulint i; - fputs("; (", file); - - for (i = 0; i < foreign->n_fields; i++) { - if (i) { - putc(' ', file); - } - - ut_print_name(file, trx, FALSE, - foreign->foreign_col_names[i]); - } - - fputs(") REFER ", file); - ut_print_name(file, trx, TRUE, - foreign->referenced_table_name); - putc('(', file); - - for (i = 0; i < foreign->n_fields; i++) { - if (i) { - putc(' ', file); - } - ut_print_name( - file, trx, FALSE, - foreign->referenced_col_names[i]); - } - - putc(')', file); - - if (foreign->type == DICT_FOREIGN_ON_DELETE_CASCADE) { - fputs(" ON DELETE CASCADE", file); - } - - if (foreign->type == DICT_FOREIGN_ON_DELETE_SET_NULL) { - fputs(" ON DELETE SET NULL", file); - } - - if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) { - fputs(" ON DELETE NO ACTION", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) { - fputs(" ON UPDATE CASCADE", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) { - fputs(" ON UPDATE SET NULL", file); - } - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) { - fputs(" ON UPDATE NO ACTION", file); - } - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - mutex_exit(&(dict_sys->mutex)); -} - -/************************************************************************ -Displays the names of the index and the table. */ -void -dict_index_name_print( -/*==================*/ - FILE* file, /* in: output stream */ - trx_t* trx, /* in: transaction */ - const dict_index_t* index) /* in: index to print */ -{ - fputs("index ", file); - ut_print_name(file, trx, FALSE, index->name); - fputs(" of table ", file); - ut_print_name(file, trx, TRUE, index->table_name); -} diff --git a/storage/innobase/dict/dict0load.c b/storage/innobase/dict/dict0load.c deleted file mode 100644 index 65f1c9536bd..00000000000 --- a/storage/innobase/dict/dict0load.c +++ /dev/null @@ -1,1360 +0,0 @@ -/****************************************************** -Loads to the memory cache database object definitions -from dictionary tables - -(c) 1996 Innobase Oy - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0load.h" -#ifndef UNIV_HOTBACKUP -#include "mysql_version.h" -#endif /* !UNIV_HOTBACKUP */ - -#ifdef UNIV_NONINL -#include "dict0load.ic" -#endif - -#include "btr0pcur.h" -#include "btr0btr.h" -#include "page0page.h" -#include "mach0data.h" -#include "dict0dict.h" -#include "dict0boot.h" -#include "rem0cmp.h" -#include "srv0start.h" -#include "srv0srv.h" - -/******************************************************************** -Returns TRUE if index's i'th column's name is 'name' .*/ -static -ibool -name_of_col_is( -/*===========*/ - /* out: */ - dict_table_t* table, /* in: table */ - dict_index_t* index, /* in: index */ - ulint i, /* in: */ - const char* name) /* in: name to compare to */ -{ - ulint tmp = dict_col_get_no(dict_field_get_col( - dict_index_get_nth_field( - index, i))); - - return(strcmp(name, dict_table_get_col_name(table, tmp)) == 0); -} - -/************************************************************************ -Finds the first table name in the given database. */ - -char* -dict_get_first_table_name_in_db( -/*============================*/ - /* out, own: table name, NULL if - does not exist; the caller must - free the memory in the string! */ - const char* name) /* in: database name which ends in '/' */ -{ - dict_table_t* sys_tables; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(1000); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_a(!dict_table_is_comp(sys_tables)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, name, ut_strlen(name)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); -loop: - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - field = rec_get_nth_field_old(rec, 0, &len); - - if (len < strlen(name) - || ut_memcmp(name, field, strlen(name)) != 0) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - if (!rec_get_deleted_flag(rec, 0)) { - - /* We found one */ - - char* table_name = mem_strdupl((char*) field, len); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(table_name); - } - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - goto loop; -} - -/************************************************************************ -Prints to the standard output information on all tables found in the data -dictionary system table. */ - -void -dict_print(void) -/*============*/ -{ - dict_table_t* sys_tables; - dict_index_t* sys_index; - dict_table_t* table; - btr_pcur_t pcur; - rec_t* rec; - byte* field; - ulint len; - mtr_t mtr; - - /* Enlarge the fatal semaphore wait timeout during the InnoDB table - monitor printout */ - - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - mutex_enter(&(dict_sys->mutex)); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - - btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, - TRUE, &mtr); -loop: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - /* end of index */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - mutex_exit(&(dict_sys->mutex)); - - /* Restore the fatal semaphore wait timeout */ - - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - return; - } - - field = rec_get_nth_field_old(rec, 0, &len); - - if (!rec_get_deleted_flag(rec, 0)) { - - /* We found one */ - - char* table_name = mem_strdupl((char*) field, len); - - btr_pcur_store_position(&pcur, &mtr); - - mtr_commit(&mtr); - - table = dict_table_get_low(table_name); - mem_free(table_name); - - if (table == NULL) { - fputs("InnoDB: Failed to load table ", stderr); - ut_print_namel(stderr, NULL, TRUE, (char*) field, len); - putc('\n', stderr); - } else { - /* The table definition was corrupt if there - is no index */ - - if (dict_table_get_first_index(table)) { - dict_update_statistics_low(table, TRUE); - } - - dict_table_print_low(table); - } - - mtr_start(&mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); - } - - goto loop; -} - -/************************************************************************ -In a crash recovery we already have all the tablespace objects created. -This function compares the space id information in the InnoDB data dictionary -to what we already read with fil_load_single_table_tablespaces(). - -In a normal startup, we create the tablespace objects for every table in -InnoDB's data dictionary, if the corresponding .ibd file exists. -We also scan the biggest space id, and store it to fil_system. */ - -void -dict_check_tablespaces_and_store_max_id( -/*====================================*/ - ibool in_crash_recovery) /* in: are we doing a crash recovery */ -{ - dict_table_t* sys_tables; - dict_index_t* sys_index; - btr_pcur_t pcur; - rec_t* rec; - byte* field; - ulint len; - ulint space_id; - ulint max_space_id = 0; - mtr_t mtr; - - mutex_enter(&(dict_sys->mutex)); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_a(!dict_table_is_comp(sys_tables)); - - btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, - TRUE, &mtr); -loop: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - /* end of index */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - /* We must make the tablespace cache aware of the biggest - known space id */ - - /* printf("Biggest space id in data dictionary %lu\n", - max_space_id); */ - fil_set_max_space_id_if_bigger(max_space_id); - - mutex_exit(&(dict_sys->mutex)); - - return; - } - - field = rec_get_nth_field_old(rec, 0, &len); - - if (!rec_get_deleted_flag(rec, 0)) { - - /* We found one */ - - char* name = mem_strdupl((char*) field, len); - - field = rec_get_nth_field_old(rec, 9, &len); - ut_a(len == 4); - - space_id = mach_read_from_4(field); - - btr_pcur_store_position(&pcur, &mtr); - - mtr_commit(&mtr); - - if (space_id != 0 && in_crash_recovery) { - /* Check that the tablespace (the .ibd file) really - exists; print a warning to the .err log if not */ - - fil_space_for_table_exists_in_mem(space_id, name, - FALSE, TRUE, TRUE); - } - - if (space_id != 0 && !in_crash_recovery) { - /* It is a normal database startup: create the space - object and check that the .ibd file exists. */ - - fil_open_single_table_tablespace(FALSE, space_id, - name); - } - - mem_free(name); - - if (space_id > max_space_id) { - max_space_id = space_id; - } - - mtr_start(&mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); - } - - goto loop; -} - -/************************************************************************ -Loads definitions for table columns. */ -static -void -dict_load_columns( -/*==============*/ - dict_table_t* table, /* in: table */ - mem_heap_t* heap) /* in: memory heap for temporary storage */ -{ - dict_table_t* sys_columns; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - byte* buf; - char* name; - ulint mtype; - ulint prtype; - ulint col_len; - ulint i; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_columns = dict_table_get_low("SYS_COLUMNS"); - sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); - ut_a(!dict_table_is_comp(sys_columns)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 8); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i + DATA_N_SYS_COLS < (ulint) table->n_cols; i++) { - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - - ut_a(!rec_get_deleted_flag(rec, 0)); - - field = rec_get_nth_field_old(rec, 0, &len); - ut_ad(len == 8); - ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0); - - field = rec_get_nth_field_old(rec, 1, &len); - ut_ad(len == 4); - ut_a(i == mach_read_from_4(field)); - - ut_a(name_of_col_is(sys_columns, sys_index, 4, "NAME")); - - field = rec_get_nth_field_old(rec, 4, &len); - name = mem_heap_strdupl(heap, (char*) field, len); - - field = rec_get_nth_field_old(rec, 5, &len); - mtype = mach_read_from_4(field); - - field = rec_get_nth_field_old(rec, 6, &len); - prtype = mach_read_from_4(field); - - if (dtype_get_charset_coll(prtype) == 0 - && dtype_is_string_type(mtype)) { - /* The table was created with < 4.1.2. */ - - if (dtype_is_binary_string_type(mtype, prtype)) { - /* Use the binary collation for - string columns of binary type. */ - - prtype = dtype_form_prtype( - prtype, - DATA_MYSQL_BINARY_CHARSET_COLL); - } else { - /* Use the default charset for - other than binary columns. */ - - prtype = dtype_form_prtype( - prtype, - data_mysql_default_charset_coll); - } - } - - field = rec_get_nth_field_old(rec, 7, &len); - col_len = mach_read_from_4(field); - - ut_a(name_of_col_is(sys_columns, sys_index, 8, "PREC")); - - dict_mem_table_add_col(table, heap, name, - mtype, prtype, col_len); - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); -} - -/************************************************************************ -Report that an index field or index for a table has been delete marked. */ -static -void -dict_load_report_deleted_index( -/*===========================*/ - const char* name, /* in: table name */ - ulint field) /* in: index field, or ULINT_UNDEFINED */ -{ - fprintf(stderr, "InnoDB: Error: data dictionary entry" - " for table %s is corrupt!\n", name); - if (field != ULINT_UNDEFINED) { - fprintf(stderr, - "InnoDB: Index field %lu is delete marked.\n", field); - } else { - fputs("InnoDB: An index is delete marked.\n", stderr); - } -} - -/************************************************************************ -Loads definitions for index fields. */ -static -void -dict_load_fields( -/*=============*/ - dict_table_t* table, /* in: table */ - dict_index_t* index, /* in: index whose fields to load */ - mem_heap_t* heap) /* in: memory heap for temporary storage */ -{ - dict_table_t* sys_fields; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - ulint pos_and_prefix_len; - ulint prefix_len; - rec_t* rec; - byte* field; - ulint len; - byte* buf; - ulint i; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - mtr_start(&mtr); - - sys_fields = dict_table_get_low("SYS_FIELDS"); - sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); - ut_a(!dict_table_is_comp(sys_fields)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 8); - mach_write_to_8(buf, index->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < index->n_fields; i++) { - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - if (rec_get_deleted_flag(rec, 0)) { - dict_load_report_deleted_index(table->name, i); - } - - field = rec_get_nth_field_old(rec, 0, &len); - ut_ad(len == 8); - ut_a(ut_memcmp(buf, field, len) == 0); - - field = rec_get_nth_field_old(rec, 1, &len); - ut_a(len == 4); - - /* The next field stores the field position in the index - and a possible column prefix length if the index field - does not contain the whole column. The storage format is - like this: if there is at least one prefix field in the index, - then the HIGH 2 bytes contain the field number (== i) and the - low 2 bytes the prefix length for the field. Otherwise the - field number (== i) is contained in the 2 LOW bytes. */ - - pos_and_prefix_len = mach_read_from_4(field); - - ut_a((pos_and_prefix_len & 0xFFFFUL) == i - || (pos_and_prefix_len & 0xFFFF0000UL) == (i << 16)); - - if ((i == 0 && pos_and_prefix_len > 0) - || (pos_and_prefix_len & 0xFFFF0000UL) > 0) { - - prefix_len = pos_and_prefix_len & 0xFFFFUL; - } else { - prefix_len = 0; - } - - ut_a(name_of_col_is(sys_fields, sys_index, 4, "COL_NAME")); - - field = rec_get_nth_field_old(rec, 4, &len); - - dict_mem_index_add_field(index, - mem_heap_strdupl(heap, - (char*) field, len), - prefix_len); - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); -} - -/************************************************************************ -Loads definitions for table indexes. Adds them to the data dictionary -cache. */ -static -ulint -dict_load_indexes( -/*==============*/ - /* out: DB_SUCCESS if ok, DB_CORRUPTION - if corruption of dictionary table or - DB_UNSUPPORTED if table has unknown index - type */ - dict_table_t* table, /* in: table */ - mem_heap_t* heap) /* in: memory heap for temporary storage */ -{ - dict_table_t* sys_indexes; - dict_index_t* sys_index; - dict_index_t* index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - ulint name_len; - char* name_buf; - ulint type; - ulint space; - ulint page_no; - ulint n_fields; - byte* buf; - ibool is_sys_table; - dulint id; - mtr_t mtr; - ulint error = DB_SUCCESS; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - if ((ut_dulint_get_high(table->id) == 0) - && (ut_dulint_get_low(table->id) < DICT_HDR_FIRST_ID)) { - is_sys_table = TRUE; - } else { - is_sys_table = FALSE; - } - - mtr_start(&mtr); - - sys_indexes = dict_table_get_low("SYS_INDEXES"); - sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); - ut_a(!dict_table_is_comp(sys_indexes)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 8); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (;;) { - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - - break; - } - - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old(rec, 0, &len); - ut_ad(len == 8); - - if (ut_memcmp(buf, field, len) != 0) { - break; - } - - if (rec_get_deleted_flag(rec, 0)) { - dict_load_report_deleted_index(table->name, - ULINT_UNDEFINED); - - error = DB_CORRUPTION; - goto func_exit; - } - - field = rec_get_nth_field_old(rec, 1, &len); - ut_ad(len == 8); - id = mach_read_from_8(field); - - ut_a(name_of_col_is(sys_indexes, sys_index, 4, "NAME")); - - field = rec_get_nth_field_old(rec, 4, &name_len); - name_buf = mem_heap_strdupl(heap, (char*) field, name_len); - - field = rec_get_nth_field_old(rec, 5, &len); - n_fields = mach_read_from_4(field); - - field = rec_get_nth_field_old(rec, 6, &len); - type = mach_read_from_4(field); - - field = rec_get_nth_field_old(rec, 7, &len); - space = mach_read_from_4(field); - - ut_a(name_of_col_is(sys_indexes, sys_index, 8, "PAGE_NO")); - - field = rec_get_nth_field_old(rec, 8, &len); - page_no = mach_read_from_4(field); - - /* We check for unsupported types first, so that the - subsequent checks are relevant for the supported types. */ - if (type & ~(DICT_CLUSTERED | DICT_UNIQUE)) { - - fprintf(stderr, - "InnoDB: Error: unknown type %lu" - " of index %s of table %s\n", - (ulong) type, name_buf, table->name); - - error = DB_UNSUPPORTED; - goto func_exit; - } else if (page_no == FIL_NULL) { - - fprintf(stderr, - "InnoDB: Error: trying to load index %s" - " for table %s\n" - "InnoDB: but the index tree has been freed!\n", - name_buf, table->name); - - error = DB_CORRUPTION; - goto func_exit; - } else if ((type & DICT_CLUSTERED) == 0 - && NULL == dict_table_get_first_index(table)) { - - fprintf(stderr, - "InnoDB: Error: trying to load index %s" - " for table %s\n" - "InnoDB: but the first index" - " is not clustered!\n", - name_buf, table->name); - - error = DB_CORRUPTION; - goto func_exit; - } else if (is_sys_table - && ((type & DICT_CLUSTERED) - || ((table == dict_sys->sys_tables) - && (name_len == (sizeof "ID_IND") - 1) - && (0 == ut_memcmp(name_buf, - "ID_IND", name_len))))) { - - /* The index was created in memory already at booting - of the database server */ - } else { - index = dict_mem_index_create(table->name, name_buf, - space, type, n_fields); - index->id = id; - - dict_load_fields(table, index, heap); - dict_index_add_to_cache(table, index, page_no); - } - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - -func_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(error); -} - -/************************************************************************ -Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. Also loads -all foreign key constraints where the foreign key is in the table or where -a foreign key references columns in this table. Adds all these to the data -dictionary cache. */ - -dict_table_t* -dict_load_table( -/*============*/ - /* out: table, NULL if does not exist; - if the table is stored in an .ibd file, - but the file does not exist, - then we set the ibd_file_missing flag TRUE - in the table object we return */ - const char* name) /* in: table name in the - databasename/tablename format */ -{ - ibool ibd_file_missing = FALSE; - dict_table_t* table; - dict_table_t* sys_tables; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - ulint space; - ulint n_cols; - ulint flags; - ulint err; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(32000); - - mtr_start(&mtr); - - sys_tables = dict_table_get_low("SYS_TABLES"); - sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); - ut_a(!dict_table_is_comp(sys_tables)); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, name, ut_strlen(name)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ -err_exit: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - field = rec_get_nth_field_old(rec, 0, &len); - - /* Check if the table name in record is the searched one */ - if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { - - goto err_exit; - } - - ut_a(name_of_col_is(sys_tables, sys_index, 9, "SPACE")); - - field = rec_get_nth_field_old(rec, 9, &len); - space = mach_read_from_4(field); - - /* Check if the tablespace exists and has the right name */ - if (space != 0) { - if (fil_space_for_table_exists_in_mem(space, name, FALSE, - FALSE, FALSE)) { - /* Ok; (if we did a crash recovery then the tablespace - can already be in the memory cache) */ - } else { - /* In >= 4.1.9, InnoDB scans the data dictionary also - at a normal mysqld startup. It is an error if the - space object does not exist in memory. */ - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: error: space object of table %s,\n" - "InnoDB: space id %lu did not exist in memory." - " Retrying an open.\n", - name, (ulong)space); - /* Try to open the tablespace */ - if (!fil_open_single_table_tablespace(TRUE, - space, name)) { - /* We failed to find a sensible tablespace - file */ - - ibd_file_missing = TRUE; - } - } - } - - ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS")); - - field = rec_get_nth_field_old(rec, 4, &len); - n_cols = mach_read_from_4(field); - - flags = 0; - - /* The high-order bit of N_COLS is the "compact format" flag. */ - if (n_cols & 0x80000000UL) { - flags |= DICT_TF_COMPACT; - } - - table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL, - flags); - - table->ibd_file_missing = (unsigned int) ibd_file_missing; - - ut_a(name_of_col_is(sys_tables, sys_index, 3, "ID")); - - field = rec_get_nth_field_old(rec, 3, &len); - table->id = mach_read_from_8(field); - - field = rec_get_nth_field_old(rec, 5, &len); - if (UNIV_UNLIKELY(mach_read_from_4(field) != DICT_TABLE_ORDINARY)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: table %s: unknown table type %lu\n", - name, (ulong) mach_read_from_4(field)); - goto err_exit; - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - dict_load_columns(table, heap); - - dict_table_add_to_cache(table, heap); - - mem_heap_empty(heap); - - err = dict_load_indexes(table, heap); - - /* If the force recovery flag is set, we open the table irrespective - of the error condition, since the user may want to dump data from the - clustered index. However we load the foreign key information only if - all indexes were loaded. */ - if (err == DB_SUCCESS) { - err = dict_load_foreigns(table->name, TRUE); - } else if (!srv_force_recovery) { - dict_table_remove_from_cache(table); - table = NULL; - } -#if 0 - if (err != DB_SUCCESS && table != NULL) { - - mutex_enter(&dict_foreign_err_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: could not make a foreign key" - " definition to match\n" - "InnoDB: the foreign key table" - " or the referenced table!\n" - "InnoDB: The data dictionary of InnoDB is corrupt." - " You may need to drop\n" - "InnoDB: and recreate the foreign key table" - " or the referenced table.\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n" - "InnoDB: Latest foreign key error printout:\n%s\n", - dict_foreign_err_buf); - - mutex_exit(&dict_foreign_err_mutex); - } -#endif /* 0 */ - mem_heap_free(heap); - - return(table); -} - -/*************************************************************************** -Loads a table object based on the table id. */ - -dict_table_t* -dict_load_table_on_id( -/*==================*/ - /* out: table; NULL if table does not exist */ - dulint table_id) /* in: table id */ -{ - byte id_buf[8]; - btr_pcur_t pcur; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sys_table_ids; - dict_table_t* sys_tables; - rec_t* rec; - byte* field; - ulint len; - dict_table_t* table; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* NOTE that the operation of this function is protected by - the dictionary mutex, and therefore no deadlocks can occur - with other dictionary operations. */ - - mtr_start(&mtr); - /*---------------------------------------------------*/ - /* Get the secondary index based on ID for table SYS_TABLES */ - sys_tables = dict_sys->sys_tables; - sys_table_ids = dict_table_get_next_index( - dict_table_get_first_index(sys_tables)); - ut_a(!dict_table_is_comp(sys_tables)); - heap = mem_heap_create(256); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - /* Write the table id in byte format to id_buf */ - mach_write_to_8(id_buf, table_id); - - dfield_set_data(dfield, id_buf, 8); - dict_index_copy_types(tuple, sys_table_ids, 1); - - btr_pcur_open_on_user_rec(sys_table_ids, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - /*---------------------------------------------------*/ - /* Now we have the record in the secondary index containing the - table ID and NAME */ - - rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field_old(rec, 0, &len); - ut_ad(len == 8); - - /* Check if the table id in record is the one searched for */ - if (ut_dulint_cmp(table_id, mach_read_from_8(field)) != 0) { - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(NULL); - } - - /* Now we get the table name from the record */ - field = rec_get_nth_field_old(rec, 1, &len); - /* Load the table definition to memory */ - table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - return(table); -} - -/************************************************************************ -This function is called when the database is booted. Loads system table -index definitions except for the clustered index which is added to the -dictionary cache at booting before calling this function. */ - -void -dict_load_sys_table( -/*================*/ - dict_table_t* table) /* in: system table */ -{ - mem_heap_t* heap; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap = mem_heap_create(1000); - - dict_load_indexes(table, heap); - - mem_heap_free(heap); -} - -/************************************************************************ -Loads foreign key constraint col names (also for the referenced table). */ -static -void -dict_load_foreign_cols( -/*===================*/ - const char* id, /* in: foreign constraint id as a - null-terminated string */ - dict_foreign_t* foreign)/* in: foreign constraint object */ -{ - dict_table_t* sys_foreign_cols; - dict_index_t* sys_index; - btr_pcur_t pcur; - dtuple_t* tuple; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - ulint i; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - foreign->foreign_col_names = mem_heap_alloc( - foreign->heap, foreign->n_fields * sizeof(void*)); - - foreign->referenced_col_names = mem_heap_alloc( - foreign->heap, foreign->n_fields * sizeof(void*)); - mtr_start(&mtr); - - sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); - sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); - ut_a(!dict_table_is_comp(sys_foreign_cols)); - - tuple = dtuple_create(foreign->heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, id, ut_strlen(id)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - for (i = 0; i < foreign->n_fields; i++) { - - rec = btr_pcur_get_rec(&pcur); - - ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - ut_a(!rec_get_deleted_flag(rec, 0)); - - field = rec_get_nth_field_old(rec, 0, &len); - ut_a(len == ut_strlen(id)); - ut_a(ut_memcmp(id, field, len) == 0); - - field = rec_get_nth_field_old(rec, 1, &len); - ut_a(len == 4); - ut_a(i == mach_read_from_4(field)); - - field = rec_get_nth_field_old(rec, 4, &len); - foreign->foreign_col_names[i] = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - field = rec_get_nth_field_old(rec, 5, &len); - foreign->referenced_col_names[i] = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); -} - -/*************************************************************************** -Loads a foreign key constraint to the dictionary cache. */ -static -ulint -dict_load_foreign( -/*==============*/ - /* out: DB_SUCCESS or error code */ - const char* id, /* in: foreign constraint id as a - null-terminated string */ - ibool check_charsets) - /* in: TRUE=check charset compatibility */ -{ - dict_foreign_t* foreign; - dict_table_t* sys_foreign; - btr_pcur_t pcur; - dict_index_t* sys_index; - dtuple_t* tuple; - mem_heap_t* heap2; - dfield_t* dfield; - rec_t* rec; - byte* field; - ulint len; - ulint n_fields_and_type; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - heap2 = mem_heap_create(1000); - - mtr_start(&mtr); - - sys_foreign = dict_table_get_low("SYS_FOREIGN"); - sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); - ut_a(!dict_table_is_comp(sys_foreign)); - - tuple = dtuple_create(heap2, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, id, ut_strlen(id)); - dict_index_copy_types(tuple, sys_index, 1); - - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec, 0)) { - /* Not found */ - - fprintf(stderr, - "InnoDB: Error A: cannot load foreign constraint %s\n", - id); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - return(DB_ERROR); - } - - field = rec_get_nth_field_old(rec, 0, &len); - - /* Check if the id in record is the searched one */ - if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { - - fprintf(stderr, - "InnoDB: Error B: cannot load foreign constraint %s\n", - id); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap2); - - return(DB_ERROR); - } - - /* Read the table names and the number of columns associated - with the constraint */ - - mem_heap_free(heap2); - - foreign = dict_mem_foreign_create(); - - n_fields_and_type = mach_read_from_4( - rec_get_nth_field_old(rec, 5, &len)); - - ut_a(len == 4); - - /* We store the type in the bits 24..29 of n_fields_and_type. */ - - foreign->type = (unsigned int) (n_fields_and_type >> 24); - foreign->n_fields = (unsigned int) (n_fields_and_type & 0x3FFUL); - - foreign->id = mem_heap_strdup(foreign->heap, id); - - field = rec_get_nth_field_old(rec, 3, &len); - foreign->foreign_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - field = rec_get_nth_field_old(rec, 4, &len); - foreign->referenced_table_name = mem_heap_strdupl( - foreign->heap, (char*) field, len); - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - dict_load_foreign_cols(id, foreign); - - /* If the foreign table is not yet in the dictionary cache, we - have to load it so that we are able to make type comparisons - in the next function call. */ - - dict_table_get_low(foreign->foreign_table_name); - - /* Note that there may already be a foreign constraint object in - the dictionary cache for this constraint: then the following - call only sets the pointers in it to point to the appropriate table - and index objects and frees the newly created object foreign. - Adding to the cache should always succeed since we are not creating - a new foreign key constraint but loading one from the data - dictionary. */ - - return(dict_foreign_add_to_cache(foreign, check_charsets)); -} - -/*************************************************************************** -Loads foreign key constraints where the table is either the foreign key -holder or where the table is referenced by a foreign key. Adds these -constraints to the data dictionary. Note that we know that the dictionary -cache already contains all constraints where the other relevant table is -already in the dictionary cache. */ - -ulint -dict_load_foreigns( -/*===============*/ - /* out: DB_SUCCESS or error code */ - const char* table_name, /* in: table name */ - ibool check_charsets) /* in: TRUE=check charset - compatibility */ -{ - btr_pcur_t pcur; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sec_index; - dict_table_t* sys_foreign; - rec_t* rec; - byte* field; - ulint len; - char* id ; - ulint err; - mtr_t mtr; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - sys_foreign = dict_table_get_low("SYS_FOREIGN"); - - if (sys_foreign == NULL) { - /* No foreign keys defined yet in this database */ - - fprintf(stderr, - "InnoDB: Error: no foreign key system tables" - " in the database\n"); - - return(DB_ERROR); - } - - ut_a(!dict_table_is_comp(sys_foreign)); - mtr_start(&mtr); - - /* Get the secondary index based on FOR_NAME from table - SYS_FOREIGN */ - - sec_index = dict_table_get_next_index( - dict_table_get_first_index(sys_foreign)); -start_load: - heap = mem_heap_create(256); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(dfield, table_name, ut_strlen(table_name)); - dict_index_copy_types(tuple, sec_index, 1); - - btr_pcur_open_on_user_rec(sec_index, tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); -loop: - rec = btr_pcur_get_rec(&pcur); - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - /* End of index */ - - goto load_next_index; - } - - /* Now we have the record in the secondary index containing a table - name and a foreign constraint ID */ - - rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field_old(rec, 0, &len); - - /* Check if the table name in the record is the one searched for; the - following call does the comparison in the latin1_swedish_ci - charset-collation, in a case-insensitive way. */ - - if (0 != cmp_data_data(dfield_get_type(dfield)->mtype, - dfield_get_type(dfield)->prtype, - dfield_get_data(dfield), dfield_get_len(dfield), - field, len)) { - - goto load_next_index; - } - - /* Since table names in SYS_FOREIGN are stored in a case-insensitive - order, we have to check that the table name matches also in a binary - string comparison. On Unix, MySQL allows table names that only differ - in character case. */ - - if (0 != ut_memcmp(field, table_name, len)) { - - goto next_rec; - } - - if (rec_get_deleted_flag(rec, 0)) { - - goto next_rec; - } - - /* Now we get a foreign key constraint id */ - field = rec_get_nth_field_old(rec, 1, &len); - id = mem_heap_strdupl(heap, (char*) field, len); - - btr_pcur_store_position(&pcur, &mtr); - - mtr_commit(&mtr); - - /* Load the foreign constraint definition to the dictionary cache */ - - err = dict_load_foreign(id, check_charsets); - - if (err != DB_SUCCESS) { - btr_pcur_close(&pcur); - mem_heap_free(heap); - - return(err); - } - - mtr_start(&mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, &pcur, &mtr); -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - - goto loop; - -load_next_index: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - mem_heap_free(heap); - - sec_index = dict_table_get_next_index(sec_index); - - if (sec_index != NULL) { - - mtr_start(&mtr); - - goto start_load; - } - - return(DB_SUCCESS); -} diff --git a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c deleted file mode 100644 index 168771ca307..00000000000 --- a/storage/innobase/dict/dict0mem.c +++ /dev/null @@ -1,344 +0,0 @@ -/********************************************************************** -Data dictionary memory object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "dict0mem.h" - -#ifdef UNIV_NONINL -#include "dict0mem.ic" -#endif - -#include "rem0rec.h" -#include "data0type.h" -#include "mach0data.h" -#include "dict0dict.h" -#include "que0que.h" -#include "pars0pars.h" -#include "lock0lock.h" - -#define DICT_HEAP_SIZE 100 /* initial memory heap size when - creating a table or index object */ - -/************************************************************************** -Creates a table memory object. */ - -dict_table_t* -dict_mem_table_create( -/*==================*/ - /* out, own: table object */ - const char* name, /* in: table name */ - ulint space, /* in: space where the clustered index of - the table is placed; this parameter is - ignored if the table is made a member of - a cluster */ - ulint n_cols, /* in: number of columns */ - ulint flags) /* in: table flags */ -{ - dict_table_t* table; - mem_heap_t* heap; - - ut_ad(name); - ut_ad(!(flags & ~DICT_TF_COMPACT)); - - heap = mem_heap_create(DICT_HEAP_SIZE); - - table = mem_heap_alloc(heap, sizeof(dict_table_t)); - - table->heap = heap; - - table->flags = (unsigned int) flags; - table->name = mem_heap_strdup(heap, name); - table->dir_path_of_temp_table = NULL; - table->space = (unsigned int) space; - table->ibd_file_missing = FALSE; - table->tablespace_discarded = FALSE; - table->n_def = 0; - table->n_cols = (unsigned int) (n_cols + DATA_N_SYS_COLS); - - table->n_mysql_handles_opened = 0; - table->n_foreign_key_checks_running = 0; - - table->cached = FALSE; - - table->cols = mem_heap_alloc(heap, (n_cols + DATA_N_SYS_COLS) - * sizeof(dict_col_t)); - table->col_names = NULL; - UT_LIST_INIT(table->indexes); - - table->auto_inc_lock = mem_heap_alloc(heap, lock_get_size()); - - table->query_cache_inv_trx_id = ut_dulint_zero; - - UT_LIST_INIT(table->locks); - UT_LIST_INIT(table->foreign_list); - UT_LIST_INIT(table->referenced_list); - -#ifdef UNIV_DEBUG - table->does_not_fit_in_memory = FALSE; -#endif /* UNIV_DEBUG */ - - table->stat_initialized = FALSE; - - table->stat_modified_counter = 0; - - table->big_rows = 0; - - mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX); - - table->autoinc = 0; - - /* The number of transactions that are either waiting on the - AUTOINC lock or have been granted the lock. */ - table->n_waiting_or_granted_auto_inc_locks = 0; - -#ifdef UNIV_DEBUG - table->magic_n = DICT_TABLE_MAGIC_N; -#endif /* UNIV_DEBUG */ - return(table); -} - -/******************************************************************** -Free a table memory object. */ - -void -dict_mem_table_free( -/*================*/ - dict_table_t* table) /* in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - mutex_free(&(table->autoinc_mutex)); - mem_heap_free(table->heap); -} - -/******************************************************************** -Append 'name' to 'col_names' (@see dict_table_t::col_names). */ -static -const char* -dict_add_col_name( -/*==============*/ - /* out: new column names array */ - const char* col_names, /* in: existing column names, or - NULL */ - ulint cols, /* in: number of existing columns */ - const char* name, /* in: new column name */ - mem_heap_t* heap) /* in: heap */ -{ - ulint old_len; - ulint new_len; - ulint total_len; - char* res; - - ut_ad(!cols == !col_names); - - /* Find out length of existing array. */ - if (col_names) { - const char* s = col_names; - ulint i; - - for (i = 0; i < cols; i++) { - s += strlen(s) + 1; - } - - old_len = s - col_names; - } else { - old_len = 0; - } - - new_len = strlen(name) + 1; - total_len = old_len + new_len; - - res = mem_heap_alloc(heap, total_len); - - if (old_len > 0) { - memcpy(res, col_names, old_len); - } - - memcpy(res + old_len, name, new_len); - - return(res); -} - -/************************************************************************** -Adds a column definition to a table. */ - -void -dict_mem_table_add_col( -/*===================*/ - dict_table_t* table, /* in: table */ - mem_heap_t* heap, /* in: temporary memory heap, or NULL */ - const char* name, /* in: column name, or NULL */ - ulint mtype, /* in: main datatype */ - ulint prtype, /* in: precise type */ - ulint len) /* in: precision */ -{ - dict_col_t* col; - ulint mbminlen; - ulint mbmaxlen; - ulint i; - - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(!heap == !name); - - i = table->n_def++; - - if (name) { - if (UNIV_UNLIKELY(table->n_def == table->n_cols)) { - heap = table->heap; - } - if (UNIV_LIKELY(i) && UNIV_UNLIKELY(!table->col_names)) { - /* All preceding column names are empty. */ - char* s = mem_heap_alloc(heap, table->n_def); - memset(s, 0, table->n_def); - table->col_names = s; - } - - table->col_names = dict_add_col_name(table->col_names, - i, name, heap); - } - - col = (dict_col_t*) dict_table_get_nth_col(table, i); - - col->ind = (unsigned int) i; - col->ord_part = 0; - - col->mtype = (unsigned int) mtype; - col->prtype = (unsigned int) prtype; - col->len = (unsigned int) len; - - dtype_get_mblen(mtype, prtype, &mbminlen, &mbmaxlen); - - col->mbminlen = (unsigned int) mbminlen; - col->mbmaxlen = (unsigned int) mbmaxlen; -} - -/************************************************************************** -Creates an index memory object. */ - -dict_index_t* -dict_mem_index_create( -/*==================*/ - /* out, own: index object */ - const char* table_name, /* in: table name */ - const char* index_name, /* in: index name */ - ulint space, /* in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /* in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields) /* in: number of fields */ -{ - dict_index_t* index; - mem_heap_t* heap; - - ut_ad(table_name && index_name); - - heap = mem_heap_create(DICT_HEAP_SIZE); - index = mem_heap_alloc(heap, sizeof(dict_index_t)); - - index->heap = heap; - - index->type = type; - index->space = (unsigned int) space; - index->page = 0; - index->name = mem_heap_strdup(heap, index_name); - index->table_name = table_name; - index->table = NULL; - index->n_def = index->n_nullable = 0; - index->n_fields = (unsigned int) n_fields; - index->fields = mem_heap_alloc(heap, 1 + n_fields - * sizeof(dict_field_t)); - /* The '1 +' above prevents allocation - of an empty mem block */ - index->stat_n_diff_key_vals = NULL; - - index->cached = FALSE; - memset(&index->lock, 0, sizeof index->lock); -#ifdef UNIV_DEBUG - index->magic_n = DICT_INDEX_MAGIC_N; -#endif /* UNIV_DEBUG */ - return(index); -} - -/************************************************************************** -Creates and initializes a foreign constraint memory object. */ - -dict_foreign_t* -dict_mem_foreign_create(void) -/*=========================*/ - /* out, own: foreign constraint struct */ -{ - dict_foreign_t* foreign; - mem_heap_t* heap; - - heap = mem_heap_create(100); - - foreign = mem_heap_alloc(heap, sizeof(dict_foreign_t)); - - foreign->heap = heap; - - foreign->id = NULL; - - foreign->type = 0; - foreign->foreign_table_name = NULL; - foreign->foreign_table = NULL; - foreign->foreign_col_names = NULL; - - foreign->referenced_table_name = NULL; - foreign->referenced_table = NULL; - foreign->referenced_col_names = NULL; - - foreign->n_fields = 0; - - foreign->foreign_index = NULL; - foreign->referenced_index = NULL; - - return(foreign); -} - -/************************************************************************** -Adds a field definition to an index. NOTE: does not take a copy -of the column name if the field is a column. The memory occupied -by the column name may be released only after publishing the index. */ - -void -dict_mem_index_add_field( -/*=====================*/ - dict_index_t* index, /* in: index */ - const char* name, /* in: column name */ - ulint prefix_len) /* in: 0 or the column prefix length - in a MySQL index like - INDEX (textcol(25)) */ -{ - dict_field_t* field; - - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->n_def++; - - field = dict_index_get_nth_field(index, index->n_def - 1); - - field->name = name; - field->prefix_len = (unsigned int) prefix_len; -} - -/************************************************************************** -Frees an index memory object. */ - -void -dict_mem_index_free( -/*================*/ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - mem_heap_free(index->heap); -} diff --git a/storage/innobase/dyn/dyn0dyn.c b/storage/innobase/dyn/dyn0dyn.c deleted file mode 100644 index bcf2fda2b08..00000000000 --- a/storage/innobase/dyn/dyn0dyn.c +++ /dev/null @@ -1,48 +0,0 @@ -/****************************************************** -The dynamically allocated array - -(c) 1996 Innobase Oy - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -#include "dyn0dyn.h" -#ifdef UNIV_NONINL -#include "dyn0dyn.ic" -#endif - -/**************************************************************** -Adds a new block to a dyn array. */ - -dyn_block_t* -dyn_array_add_block( -/*================*/ - /* out: created block */ - dyn_array_t* arr) /* in: dyn array */ -{ - mem_heap_t* heap; - dyn_block_t* block; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - if (arr->heap == NULL) { - UT_LIST_INIT(arr->base); - UT_LIST_ADD_FIRST(list, arr->base, arr); - - arr->heap = mem_heap_create(sizeof(dyn_block_t)); - } - - block = dyn_array_get_last_block(arr); - block->used = block->used | DYN_BLOCK_FULL_FLAG; - - heap = arr->heap; - - block = mem_heap_alloc(heap, sizeof(dyn_block_t)); - - block->used = 0; - - UT_LIST_ADD_LAST(list, arr->base, block); - - return(block); -} diff --git a/storage/innobase/eval/eval0eval.c b/storage/innobase/eval/eval0eval.c deleted file mode 100644 index cbc47ec508f..00000000000 --- a/storage/innobase/eval/eval0eval.c +++ /dev/null @@ -1,836 +0,0 @@ -/****************************************************** -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -(c) 1997 Innobase Oy - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#include "eval0eval.h" - -#ifdef UNIV_NONINL -#include "eval0eval.ic" -#endif - -#include "data0data.h" -#include "row0sel.h" - -/* The RND function seed */ -ulint eval_rnd = 128367121; - -/* Dummy adress used when we should allocate a buffer of size 0 in -the function below */ - -byte eval_dummy; - -/********************************************************************* -Allocate a buffer from global dynamic memory for a value of a que_node. -NOTE that this memory must be explicitly freed when the query graph is -freed. If the node already has an allocated buffer, that buffer is freed -here. NOTE that this is the only function where dynamic memory should be -allocated for a query node val field. */ - -byte* -eval_node_alloc_val_buf( -/*====================*/ - /* out: pointer to allocated buffer */ - que_node_t* node, /* in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size) /* in: buffer size */ -{ - dfield_t* dfield; - byte* data; - - ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL - || que_node_get_type(node) == QUE_NODE_FUNC); - - dfield = que_node_get_val(node); - - data = dfield_get_data(dfield); - - if (data && data != &eval_dummy) { - mem_free(data); - } - - if (size == 0) { - data = &eval_dummy; - } else { - data = mem_alloc(size); - } - - que_node_set_val_buf_size(node, size); - - dfield_set_data(dfield, data, size); - - return(data); -} - -/********************************************************************* -Free the buffer from global dynamic memory for a value of a que_node, -if it has been allocated in the above function. The freeing for pushed -column values is done in sel_col_prefetch_buf_free. */ - -void -eval_node_free_val_buf( -/*===================*/ - que_node_t* node) /* in: query graph node */ -{ - dfield_t* dfield; - byte* data; - - ut_ad(que_node_get_type(node) == QUE_NODE_SYMBOL - || que_node_get_type(node) == QUE_NODE_FUNC); - - dfield = que_node_get_val(node); - - data = dfield_get_data(dfield); - - if (que_node_get_val_buf_size(node) > 0) { - ut_a(data); - - mem_free(data); - } -} - -/********************************************************************* -Evaluates a comparison node. */ - -ibool -eval_cmp( -/*=====*/ - /* out: the result of the comparison */ - func_node_t* cmp_node) /* in: comparison node */ -{ - que_node_t* arg1; - que_node_t* arg2; - int res; - ibool val; - int func; - - ut_ad(que_node_get_type(cmp_node) == QUE_NODE_FUNC); - - arg1 = cmp_node->args; - arg2 = que_node_get_next(arg1); - - res = cmp_dfield_dfield(que_node_get_val(arg1), - que_node_get_val(arg2)); - val = TRUE; - - func = cmp_node->func; - - if (func == '=') { - if (res != 0) { - val = FALSE; - } - } else if (func == '<') { - if (res != -1) { - val = FALSE; - } - } else if (func == PARS_LE_TOKEN) { - if (res == 1) { - val = FALSE; - } - } else if (func == PARS_NE_TOKEN) { - if (res == 0) { - val = FALSE; - } - } else if (func == PARS_GE_TOKEN) { - if (res == -1) { - val = FALSE; - } - } else { - ut_ad(func == '>'); - - if (res != 1) { - val = FALSE; - } - } - - eval_node_set_ibool_val(cmp_node, val); - - return(val); -} - -/********************************************************************* -Evaluates a logical operation node. */ -UNIV_INLINE -void -eval_logical( -/*=========*/ - func_node_t* logical_node) /* in: logical operation node */ -{ - que_node_t* arg1; - que_node_t* arg2; - ibool val1; - ibool val2 = 0; /* remove warning */ - ibool val = 0; /* remove warning */ - int func; - - ut_ad(que_node_get_type(logical_node) == QUE_NODE_FUNC); - - arg1 = logical_node->args; - arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is 'NOT' */ - - val1 = eval_node_get_ibool_val(arg1); - - if (arg2) { - val2 = eval_node_get_ibool_val(arg2); - } - - func = logical_node->func; - - if (func == PARS_AND_TOKEN) { - val = val1 & val2; - } else if (func == PARS_OR_TOKEN) { - val = val1 | val2; - } else if (func == PARS_NOT_TOKEN) { - val = TRUE - val1; - } else { - ut_error; - } - - eval_node_set_ibool_val(logical_node, val); -} - -/********************************************************************* -Evaluates an arithmetic operation node. */ -UNIV_INLINE -void -eval_arith( -/*=======*/ - func_node_t* arith_node) /* in: arithmetic operation node */ -{ - que_node_t* arg1; - que_node_t* arg2; - lint val1; - lint val2 = 0; /* remove warning */ - lint val; - int func; - - ut_ad(que_node_get_type(arith_node) == QUE_NODE_FUNC); - - arg1 = arith_node->args; - arg2 = que_node_get_next(arg1); /* arg2 is NULL if func is unary '-' */ - - val1 = eval_node_get_int_val(arg1); - - if (arg2) { - val2 = eval_node_get_int_val(arg2); - } - - func = arith_node->func; - - if (func == '+') { - val = val1 + val2; - } else if ((func == '-') && arg2) { - val = val1 - val2; - } else if (func == '-') { - val = -val1; - } else if (func == '*') { - val = val1 * val2; - } else { - ut_ad(func == '/'); - val = val1 / val2; - } - - eval_node_set_int_val(arith_node, val); -} - -/********************************************************************* -Evaluates an aggregate operation node. */ -UNIV_INLINE -void -eval_aggregate( -/*===========*/ - func_node_t* node) /* in: aggregate operation node */ -{ - que_node_t* arg; - lint val; - lint arg_val; - int func; - - ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); - - val = eval_node_get_int_val(node); - - func = node->func; - - if (func == PARS_COUNT_TOKEN) { - - val = val + 1; - } else { - ut_ad(func == PARS_SUM_TOKEN); - - arg = node->args; - arg_val = eval_node_get_int_val(arg); - - val = val + arg_val; - } - - eval_node_set_int_val(node, val); -} - -/********************************************************************* -Evaluates a predefined function node where the function is not relevant -in benchmarks. */ -static -void -eval_predefined_2( -/*==============*/ - func_node_t* func_node) /* in: predefined function node */ -{ - que_node_t* arg; - que_node_t* arg1; - que_node_t* arg2 = 0; /* remove warning (??? bug ???) */ - lint int_val; - byte* data; - ulint len1; - ulint len2; - int func; - ulint i; - - ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC); - - arg1 = func_node->args; - - if (arg1) { - arg2 = que_node_get_next(arg1); - } - - func = func_node->func; - - if (func == PARS_PRINTF_TOKEN) { - - arg = arg1; - - while (arg) { - dfield_print(que_node_get_val(arg)); - - arg = que_node_get_next(arg); - } - - putc('\n', stderr); - - } else if (func == PARS_ASSERT_TOKEN) { - - if (!eval_node_get_ibool_val(arg1)) { - fputs("SQL assertion fails in a stored procedure!\n", - stderr); - } - - ut_a(eval_node_get_ibool_val(arg1)); - - /* This function, or more precisely, a debug procedure, - returns no value */ - - } else if (func == PARS_RND_TOKEN) { - - len1 = (ulint)eval_node_get_int_val(arg1); - len2 = (ulint)eval_node_get_int_val(arg2); - - ut_ad(len2 >= len1); - - if (len2 > len1) { - int_val = (lint) (len1 - + (eval_rnd % (len2 - len1 + 1))); - } else { - int_val = (lint) len1; - } - - eval_rnd = ut_rnd_gen_next_ulint(eval_rnd); - - eval_node_set_int_val(func_node, int_val); - - } else if (func == PARS_RND_STR_TOKEN) { - - len1 = (ulint)eval_node_get_int_val(arg1); - - data = eval_node_ensure_val_buf(func_node, len1); - - for (i = 0; i < len1; i++) { - data[i] = (byte)(97 + (eval_rnd % 3)); - - eval_rnd = ut_rnd_gen_next_ulint(eval_rnd); - } - } else { - ut_error; - } -} - -/********************************************************************* -Evaluates a notfound-function node. */ -UNIV_INLINE -void -eval_notfound( -/*==========*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - sym_node_t* cursor; - sel_node_t* sel_node; - ibool ibool_val; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - ut_ad(func_node->func == PARS_NOTFOUND_TOKEN); - - cursor = arg1; - - ut_ad(que_node_get_type(cursor) == QUE_NODE_SYMBOL); - - if (cursor->token_type == SYM_LIT) { - - ut_ad(ut_memcmp(dfield_get_data(que_node_get_val(cursor)), - "SQL", 3) == 0); - - sel_node = cursor->sym_table->query_graph->last_sel_node; - } else { - sel_node = cursor->alias->cursor_def; - } - - if (sel_node->state == SEL_NODE_NO_MORE_ROWS) { - ibool_val = TRUE; - } else { - ibool_val = FALSE; - } - - eval_node_set_ibool_val(func_node, ibool_val); -} - -/********************************************************************* -Evaluates a substr-function node. */ -UNIV_INLINE -void -eval_substr( -/*========*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - que_node_t* arg3; - dfield_t* dfield; - byte* str1; - ulint len1; - ulint len2; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - ut_ad(func_node->func == PARS_SUBSTR_TOKEN); - - arg3 = que_node_get_next(arg2); - - str1 = dfield_get_data(que_node_get_val(arg1)); - - len1 = (ulint)eval_node_get_int_val(arg2); - len2 = (ulint)eval_node_get_int_val(arg3); - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1 + len1, len2); -} - -/********************************************************************* -Evaluates a replstr-procedure node. */ -static -void -eval_replstr( -/*=========*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - que_node_t* arg3; - que_node_t* arg4; - byte* str1; - byte* str2; - ulint len1; - ulint len2; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - ut_ad(que_node_get_type(arg1) == QUE_NODE_SYMBOL); - - arg3 = que_node_get_next(arg2); - arg4 = que_node_get_next(arg3); - - str1 = dfield_get_data(que_node_get_val(arg1)); - str2 = dfield_get_data(que_node_get_val(arg2)); - - len1 = (ulint)eval_node_get_int_val(arg3); - len2 = (ulint)eval_node_get_int_val(arg4); - - if ((dfield_get_len(que_node_get_val(arg1)) < len1 + len2) - || (dfield_get_len(que_node_get_val(arg2)) < len2)) { - - ut_error; - } - - ut_memcpy(str1 + len1, str2, len2); -} - -/********************************************************************* -Evaluates an instr-function node. */ -static -void -eval_instr( -/*=======*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - dfield_t* dfield1; - dfield_t* dfield2; - lint int_val; - byte* str1; - byte* str2; - byte match_char; - ulint len1; - ulint len2; - ulint i; - ulint j; - - arg1 = func_node->args; - arg2 = que_node_get_next(arg1); - - dfield1 = que_node_get_val(arg1); - dfield2 = que_node_get_val(arg2); - - str1 = dfield_get_data(dfield1); - str2 = dfield_get_data(dfield2); - - len1 = dfield_get_len(dfield1); - len2 = dfield_get_len(dfield2); - - if (len2 == 0) { - ut_error; - } - - match_char = str2[0]; - - for (i = 0; i < len1; i++) { - /* In this outer loop, the number of matched characters is 0 */ - - if (str1[i] == match_char) { - - if (i + len2 > len1) { - - break; - } - - for (j = 1;; j++) { - /* We have already matched j characters */ - - if (j == len2) { - int_val = i + 1; - - goto match_found; - } - - if (str1[i + j] != str2[j]) { - - break; - } - } - } - } - - int_val = 0; - -match_found: - eval_node_set_int_val(func_node, int_val); -} - -/********************************************************************* -Evaluates a predefined function node. */ -UNIV_INLINE -void -eval_binary_to_number( -/*==================*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - dfield_t* dfield; - byte* str1; - byte* str2; - ulint len1; - ulint int_val; - - arg1 = func_node->args; - - dfield = que_node_get_val(arg1); - - str1 = dfield_get_data(dfield); - len1 = dfield_get_len(dfield); - - if (len1 > 4) { - ut_error; - } - - if (len1 == 4) { - str2 = str1; - } else { - int_val = 0; - str2 = (byte*)&int_val; - - ut_memcpy(str2 + (4 - len1), str1, len1); - } - - eval_node_copy_and_alloc_val(func_node, str2, 4); -} - -/********************************************************************* -Evaluates a predefined function node. */ -static -void -eval_concat( -/*========*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg; - dfield_t* dfield; - byte* data; - ulint len; - ulint len1; - - arg = func_node->args; - len = 0; - - while (arg) { - len1 = dfield_get_len(que_node_get_val(arg)); - - len += len1; - - arg = que_node_get_next(arg); - } - - data = eval_node_ensure_val_buf(func_node, len); - - arg = func_node->args; - len = 0; - - while (arg) { - dfield = que_node_get_val(arg); - len1 = dfield_get_len(dfield); - - ut_memcpy(data + len, dfield_get_data(dfield), len1); - - len += len1; - - arg = que_node_get_next(arg); - } -} - -/********************************************************************* -Evaluates a predefined function node. If the first argument is an integer, -this function looks at the second argument which is the integer length in -bytes, and converts the integer to a VARCHAR. -If the first argument is of some other type, this function converts it to -BINARY. */ -UNIV_INLINE -void -eval_to_binary( -/*===========*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - que_node_t* arg2; - dfield_t* dfield; - byte* str1; - ulint len; - ulint len1; - - arg1 = func_node->args; - - str1 = dfield_get_data(que_node_get_val(arg1)); - - if (dtype_get_mtype(que_node_get_data_type(arg1)) != DATA_INT) { - - len = dfield_get_len(que_node_get_val(arg1)); - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1, len); - - return; - } - - arg2 = que_node_get_next(arg1); - - len1 = (ulint)eval_node_get_int_val(arg2); - - if (len1 > 4) { - - ut_error; - } - - dfield = que_node_get_val(func_node); - - dfield_set_data(dfield, str1 + (4 - len1), len1); -} - -/********************************************************************* -Evaluates a predefined function node. */ -UNIV_INLINE -void -eval_predefined( -/*============*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg1; - lint int_val; - byte* data; - int func; - - func = func_node->func; - - arg1 = func_node->args; - - if (func == PARS_LENGTH_TOKEN) { - - int_val = (lint)dfield_get_len(que_node_get_val(arg1)); - - } else if (func == PARS_TO_CHAR_TOKEN) { - - /* Convert number to character string as a - signed decimal integer. */ - - ulint uint_val; - int int_len; - - int_val = eval_node_get_int_val(arg1); - - /* Determine the length of the string. */ - - if (int_val == 0) { - int_len = 1; /* the number 0 occupies 1 byte */ - } else { - int_len = 0; - if (int_val < 0) { - uint_val = ((ulint) -int_val - 1) + 1; - int_len++; /* reserve space for minus sign */ - } else { - uint_val = (ulint) int_val; - } - for (; uint_val > 0; int_len++) { - uint_val /= 10; - } - } - - /* allocate the string */ - data = eval_node_ensure_val_buf(func_node, int_len + 1); - - /* add terminating NUL character */ - data[int_len] = 0; - - /* convert the number */ - - if (int_val == 0) { - data[0] = '0'; - } else { - int tmp; - if (int_val < 0) { - data[0] = '-'; /* preceding minus sign */ - uint_val = ((ulint) -int_val - 1) + 1; - } else { - uint_val = (ulint) int_val; - } - for (tmp = int_len; uint_val > 0; uint_val /= 10) { - data[--tmp] = (byte) - ('0' + (byte)(uint_val % 10)); - } - } - - dfield_set_len((dfield_t*) que_node_get_val(func_node), - int_len); - - return; - - } else if (func == PARS_TO_NUMBER_TOKEN) { - - int_val = atoi((char*) - dfield_get_data(que_node_get_val(arg1))); - - } else if (func == PARS_SYSDATE_TOKEN) { - int_val = (lint)ut_time(); - } else { - eval_predefined_2(func_node); - - return; - } - - eval_node_set_int_val(func_node, int_val); -} - -/********************************************************************* -Evaluates a function node. */ - -void -eval_func( -/*======*/ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg; - ulint class; - ulint func; - - ut_ad(que_node_get_type(func_node) == QUE_NODE_FUNC); - - class = func_node->class; - func = func_node->func; - - arg = func_node->args; - - /* Evaluate first the argument list */ - while (arg) { - eval_exp(arg); - - /* The functions are not defined for SQL null argument - values, except for eval_cmp and notfound */ - - if ((dfield_get_len(que_node_get_val(arg)) == UNIV_SQL_NULL) - && (class != PARS_FUNC_CMP) - && (func != PARS_NOTFOUND_TOKEN) - && (func != PARS_PRINTF_TOKEN)) { - ut_error; - } - - arg = que_node_get_next(arg); - } - - if (class == PARS_FUNC_CMP) { - eval_cmp(func_node); - } else if (class == PARS_FUNC_ARITH) { - eval_arith(func_node); - } else if (class == PARS_FUNC_AGGREGATE) { - eval_aggregate(func_node); - } else if (class == PARS_FUNC_PREDEFINED) { - - if (func == PARS_NOTFOUND_TOKEN) { - eval_notfound(func_node); - } else if (func == PARS_SUBSTR_TOKEN) { - eval_substr(func_node); - } else if (func == PARS_REPLSTR_TOKEN) { - eval_replstr(func_node); - } else if (func == PARS_INSTR_TOKEN) { - eval_instr(func_node); - } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) { - eval_binary_to_number(func_node); - } else if (func == PARS_CONCAT_TOKEN) { - eval_concat(func_node); - } else if (func == PARS_TO_BINARY_TOKEN) { - eval_to_binary(func_node); - } else { - eval_predefined(func_node); - } - } else { - ut_ad(class == PARS_FUNC_LOGICAL); - - eval_logical(func_node); - } -} diff --git a/storage/innobase/eval/eval0proc.c b/storage/innobase/eval/eval0proc.c deleted file mode 100644 index a513e8e4024..00000000000 --- a/storage/innobase/eval/eval0proc.c +++ /dev/null @@ -1,278 +0,0 @@ -/****************************************************** -Executes SQL stored procedures and their control structures - -(c) 1998 Innobase Oy - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#include "eval0proc.h" - -#ifdef UNIV_NONINL -#include "eval0proc.ic" -#endif - -/************************************************************************** -Performs an execution step of an if-statement node. */ - -que_thr_t* -if_step( -/*====*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - if_node_t* node; - elsif_node_t* elsif_node; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_IF); - - if (thr->prev_node == que_node_get_parent(node)) { - - /* Evaluate the condition */ - - eval_exp(node->cond); - - if (eval_node_get_ibool_val(node->cond)) { - - /* The condition evaluated to TRUE: start execution - from the first statement in the statement list */ - - thr->run_node = node->stat_list; - - } else if (node->else_part) { - thr->run_node = node->else_part; - - } else if (node->elsif_list) { - elsif_node = node->elsif_list; - - for (;;) { - eval_exp(elsif_node->cond); - - if (eval_node_get_ibool_val( - elsif_node->cond)) { - - /* The condition evaluated to TRUE: - start execution from the first - statement in the statement list */ - - thr->run_node = elsif_node->stat_list; - - break; - } - - elsif_node = que_node_get_next(elsif_node); - - if (elsif_node == NULL) { - thr->run_node = NULL; - - break; - } - } - } else { - thr->run_node = NULL; - } - } else { - /* Move to the next statement */ - ut_ad(que_node_get_next(thr->prev_node) == NULL); - - thr->run_node = NULL; - } - - if (thr->run_node == NULL) { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/************************************************************************** -Performs an execution step of a while-statement node. */ - -que_thr_t* -while_step( -/*=======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - while_node_t* node; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_WHILE); - - ut_ad((thr->prev_node == que_node_get_parent(node)) - || (que_node_get_next(thr->prev_node) == NULL)); - - /* Evaluate the condition */ - - eval_exp(node->cond); - - if (eval_node_get_ibool_val(node->cond)) { - - /* The condition evaluated to TRUE: start execution - from the first statement in the statement list */ - - thr->run_node = node->stat_list; - } else { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/************************************************************************** -Performs an execution step of an assignment statement node. */ - -que_thr_t* -assign_step( -/*========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - assign_node_t* node; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_ASSIGNMENT); - - /* Evaluate the value to assign */ - - eval_exp(node->val); - - eval_node_copy_val(node->var->alias, node->val); - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/************************************************************************** -Performs an execution step of a for-loop node. */ - -que_thr_t* -for_step( -/*=====*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - for_node_t* node; - que_node_t* parent; - lint loop_var_value; - - ut_ad(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_FOR); - - parent = que_node_get_parent(node); - - if (thr->prev_node != parent) { - - /* Move to the next statement */ - thr->run_node = que_node_get_next(thr->prev_node); - - if (thr->run_node != NULL) { - - return(thr); - } - - /* Increment the value of loop_var */ - - loop_var_value = 1 + eval_node_get_int_val(node->loop_var); - } else { - /* Initialize the loop */ - - eval_exp(node->loop_start_limit); - eval_exp(node->loop_end_limit); - - loop_var_value = eval_node_get_int_val(node->loop_start_limit); - - node->loop_end_value - = (int) eval_node_get_int_val(node->loop_end_limit); - } - - /* Check if we should do another loop */ - - if (loop_var_value > node->loop_end_value) { - - /* Enough loops done */ - - thr->run_node = parent; - } else { - eval_node_set_int_val(node->loop_var, loop_var_value); - - thr->run_node = node->stat_list; - } - - return(thr); -} - -/************************************************************************** -Performs an execution step of an exit statement node. */ - -que_thr_t* -exit_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - exit_node_t* node; - que_node_t* loop_node; - - ut_ad(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_EXIT); - - /* Loops exit by setting thr->run_node as the loop node's parent, so - find our containing loop node and get its parent. */ - - loop_node = que_node_get_containing_loop_node(node); - - /* If someone uses an EXIT statement outside of a loop, this will - trigger. */ - ut_a(loop_node); - - thr->run_node = que_node_get_parent(loop_node); - - return(thr); -} - -/************************************************************************** -Performs an execution step of a return-statement node. */ - -que_thr_t* -return_step( -/*========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - return_node_t* node; - que_node_t* parent; - - ut_ad(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_RETURN); - - parent = node; - - while (que_node_get_type(parent) != QUE_NODE_PROC) { - - parent = que_node_get_parent(parent); - } - - ut_a(parent); - - thr->run_node = que_node_get_parent(parent); - - return(thr); -} diff --git a/storage/innobase/fil/fil0fil.c b/storage/innobase/fil/fil0fil.c deleted file mode 100644 index c63d67cae60..00000000000 --- a/storage/innobase/fil/fil0fil.c +++ /dev/null @@ -1,4566 +0,0 @@ -/****************************************************** -The tablespace memory cache - -(c) 1995 Innobase Oy - -Created 10/25/1995 Heikki Tuuri -*******************************************************/ - -#include "fil0fil.h" - -#include "mem0mem.h" -#include "sync0sync.h" -#include "hash0hash.h" -#include "os0file.h" -#include "os0sync.h" -#include "mach0data.h" -#include "ibuf0ibuf.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0lru.h" -#include "log0log.h" -#include "log0recv.h" -#include "fsp0fsp.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "dict0dict.h" - - -/* - IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE - ============================================= - -The tablespace cache is responsible for providing fast read/write access to -tablespaces and logs of the database. File creation and deletion is done -in other modules which know more of the logic of the operation, however. - -A tablespace consists of a chain of files. The size of the files does not -have to be divisible by the database block size, because we may just leave -the last incomplete block unused. When a new file is appended to the -tablespace, the maximum size of the file is also specified. At the moment, -we think that it is best to extend the file to its maximum size already at -the creation of the file, because then we can avoid dynamically extending -the file when more space is needed for the tablespace. - -A block's position in the tablespace is specified with a 32-bit unsigned -integer. The files in the chain are thought to be catenated, and the block -corresponding to an address n is the nth block in the catenated file (where -the first block is named the 0th block, and the incomplete block fragments -at the end of files are not taken into account). A tablespace can be extended -by appending a new file at the end of the chain. - -Our tablespace concept is similar to the one of Oracle. - -To acquire more speed in disk transfers, a technique called disk striping is -sometimes used. This means that logical block addresses are divided in a -round-robin fashion across several disks. Windows NT supports disk striping, -so there we do not need to support it in the database. Disk striping is -implemented in hardware in RAID disks. We conclude that it is not necessary -to implement it in the database. Oracle 7 does not support disk striping, -either. - -Another trick used at some database sites is replacing tablespace files by -raw disks, that is, the whole physical disk drive, or a partition of it, is -opened as a single file, and it is accessed through byte offsets calculated -from the start of the disk or the partition. This is recommended in some -books on database tuning to achieve more speed in i/o. Using raw disk -certainly prevents the OS from fragmenting disk space, but it is not clear -if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file -system + EIDE Conner disk only a negligible difference in speed when reading -from a file, versus reading from a raw disk. - -To have fast access to a tablespace or a log file, we put the data structures -to a hash table. Each tablespace and log file is given an unique 32-bit -identifier. - -Some operating systems do not support many open files at the same time, -though NT seems to tolerate at least 900 open files. Therefore, we put the -open files in an LRU-list. If we need to open another file, we may close the -file at the end of the LRU-list. When an i/o-operation is pending on a file, -the file cannot be closed. We take the file nodes with pending i/o-operations -out of the LRU-list and keep a count of pending operations. When an operation -completes, we decrement the count and return the file node to the LRU-list if -the count drops to zero. */ - -/* When mysqld is run, the default directory "." is the mysqld datadir, -but in the MySQL Embedded Server Library and ibbackup it is not the default -directory, and we must set the base file path explicitly */ -const char* fil_path_to_mysql_datadir = "."; - -/* The number of fsyncs done to the log */ -ulint fil_n_log_flushes = 0; - -ulint fil_n_pending_log_flushes = 0; -ulint fil_n_pending_tablespace_flushes = 0; - -/* Null file address */ -fil_addr_t fil_addr_null = {FIL_NULL, 0}; - -/* File node of a tablespace or the log data space */ -struct fil_node_struct { - fil_space_t* space; /* backpointer to the space where this node - belongs */ - char* name; /* path to the file */ - ibool open; /* TRUE if file open */ - os_file_t handle; /* OS handle to the file, if file open */ - ibool is_raw_disk;/* TRUE if the 'file' is actually a raw - device or a raw disk partition */ - ulint size; /* size of the file in database pages, 0 if - not known yet; the possible last incomplete - megabyte may be ignored if space == 0 */ - ulint n_pending; - /* count of pending i/o's on this file; - closing of the file is not allowed if - this is > 0 */ - ulint n_pending_flushes; - /* count of pending flushes on this file; - closing of the file is not allowed if - this is > 0 */ - ib_longlong modification_counter;/* when we write to the file we - increment this by one */ - ib_longlong flush_counter;/* up to what modification_counter value - we have flushed the modifications to disk */ - UT_LIST_NODE_T(fil_node_t) chain; - /* link field for the file chain */ - UT_LIST_NODE_T(fil_node_t) LRU; - /* link field for the LRU list */ - ulint magic_n; -}; - -#define FIL_NODE_MAGIC_N 89389 - -/* Tablespace or log data space: let us call them by a common name space */ -struct fil_space_struct { - char* name; /* space name = the path to the first file in - it */ - ulint id; /* space id */ - ib_longlong tablespace_version; - /* in DISCARD/IMPORT this timestamp is used to - check if we should ignore an insert buffer - merge request for a page because it actually - was for the previous incarnation of the - space */ - ibool mark; /* this is set to TRUE at database startup if - the space corresponds to a table in the InnoDB - data dictionary; so we can print a warning of - orphaned tablespaces */ - ibool stop_ios;/* TRUE if we want to rename the .ibd file of - tablespace and want to stop temporarily - posting of new i/o requests on the file */ - ibool stop_ibuf_merges; - /* we set this TRUE when we start deleting a - single-table tablespace */ - ibool is_being_deleted; - /* this is set to TRUE when we start - deleting a single-table tablespace and its - file; when this flag is set no further i/o - or flush requests can be placed on this space, - though there may be such requests still being - processed on this space */ - ulint purpose;/* FIL_TABLESPACE, FIL_LOG, or FIL_ARCH_LOG */ - UT_LIST_BASE_NODE_T(fil_node_t) chain; - /* base node for the file chain */ - ulint size; /* space size in pages; 0 if a single-table - tablespace whose size we do not know yet; - last incomplete megabytes in data files may be - ignored if space == 0 */ - ulint n_reserved_extents; - /* number of reserved free extents for - ongoing operations like B-tree page split */ - ulint n_pending_flushes; /* this is > 0 when flushing - the tablespace to disk; dropping of the - tablespace is forbidden if this is > 0 */ - ulint n_pending_ibuf_merges;/* this is > 0 when merging - insert buffer entries to a page so that we - may need to access the ibuf bitmap page in the - tablespade: dropping of the tablespace is - forbidden if this is > 0 */ - hash_node_t hash; /* hash chain node */ - hash_node_t name_hash;/* hash chain the name_hash table */ - rw_lock_t latch; /* latch protecting the file space storage - allocation */ - UT_LIST_NODE_T(fil_space_t) unflushed_spaces; - /* list of spaces with at least one unflushed - file we have written to */ - ibool is_in_unflushed_spaces; /* TRUE if this space is - currently in the list above */ - UT_LIST_NODE_T(fil_space_t) space_list; - /* list of all spaces */ - ibuf_data_t* ibuf_data; - /* insert buffer data */ - ulint magic_n; -}; - -#define FIL_SPACE_MAGIC_N 89472 - -/* The tablespace memory cache; also the totality of logs = the log data space, -is stored here; below we talk about tablespaces, but also the ib_logfiles -form a 'space' and it is handled here */ - -typedef struct fil_system_struct fil_system_t; -struct fil_system_struct { - mutex_t mutex; /* The mutex protecting the cache */ - hash_table_t* spaces; /* The hash table of spaces in the - system; they are hashed on the space - id */ - hash_table_t* name_hash; /* hash table based on the space - name */ - UT_LIST_BASE_NODE_T(fil_node_t) LRU; - /* base node for the LRU list of the - most recently used open files with no - pending i/o's; if we start an i/o on - the file, we first remove it from this - list, and return it to the start of - the list when the i/o ends; - log files and the system tablespace are - not put to this list: they are opened - after the startup, and kept open until - shutdown */ - UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces; - /* base node for the list of those - tablespaces whose files contain - unflushed writes; those spaces have - at least one file node where - modification_counter > flush_counter */ - ulint n_open; /* number of files currently open */ - ulint max_n_open; /* n_open is not allowed to exceed - this */ - ib_longlong modification_counter;/* when we write to a file we - increment this by one */ - ulint max_assigned_id;/* maximum space id in the existing - tables, or assigned during the time - mysqld has been up; at an InnoDB - startup we scan the data dictionary - and set here the maximum of the - space id's of the tables there */ - ib_longlong tablespace_version; - /* a counter which is incremented for - every space object memory creation; - every space mem object gets a - 'timestamp' from this; in DISCARD/ - IMPORT this is used to check if we - should ignore an insert buffer merge - request */ - UT_LIST_BASE_NODE_T(fil_space_t) space_list; - /* list of all file spaces */ -}; - -/* The tablespace memory cache. This variable is NULL before the module is -initialized. */ -fil_system_t* fil_system = NULL; - - -/************************************************************************ -NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! - -Prepares a file node for i/o. Opens the file if it is closed. Updates the -pending i/o's field in the node and the system appropriately. Takes the node -off the LRU list if it is in the LRU list. The caller must hold the fil_sys -mutex. */ -static -void -fil_node_prepare_for_io( -/*====================*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - fil_space_t* space); /* in: space */ -/************************************************************************ -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ -static -void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - ulint type); /* in: OS_FILE_WRITE or OS_FILE_READ; marks - the node as modified if - type == OS_FILE_WRITE */ -/*********************************************************************** -Checks if a single-table tablespace for a given table name exists in the -tablespace memory cache. */ -static -ulint -fil_get_space_id_for_table( -/*=======================*/ - /* out: space id, ULINT_UNDEFINED if not - found */ - const char* name); /* in: table name in the standard - 'databasename/tablename' format */ - - -/*********************************************************************** -Returns the version number of a tablespace, -1 if not found. */ - -ib_longlong -fil_space_get_version( -/*==================*/ - /* out: version number, -1 if the tablespace does not - exist in the memory cache */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ib_longlong version = -1; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space) { - version = space->tablespace_version; - } - - mutex_exit(&(system->mutex)); - - return(version); -} - -/*********************************************************************** -Returns the latch of a file space. */ - -rw_lock_t* -fil_space_get_latch( -/*================*/ - /* out: latch protecting storage allocation */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - - mutex_exit(&(system->mutex)); - - return(&(space->latch)); -} - -/*********************************************************************** -Returns the type of a file space. */ - -ulint -fil_space_get_type( -/*===============*/ - /* out: FIL_TABLESPACE or FIL_LOG */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - - mutex_exit(&(system->mutex)); - - return(space->purpose); -} - -/*********************************************************************** -Returns the ibuf data of a file space. */ - -ibuf_data_t* -fil_space_get_ibuf_data( -/*====================*/ - /* out: ibuf data for this space */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - ut_a(id == 0); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - mutex_exit(&(system->mutex)); - - ut_a(space); - - return(space->ibuf_data); -} - -/************************************************************************** -Checks if all the file nodes in a space are flushed. The caller must hold -the fil_system mutex. */ -static -ibool -fil_space_is_flushed( -/*=================*/ - /* out: TRUE if all are flushed */ - fil_space_t* space) /* in: space */ -{ - fil_node_t* node; - - ut_ad(mutex_own(&(fil_system->mutex))); - - node = UT_LIST_GET_FIRST(space->chain); - - while (node) { - if (node->modification_counter > node->flush_counter) { - - return(FALSE); - } - - node = UT_LIST_GET_NEXT(chain, node); - } - - return(TRUE); -} - -/*********************************************************************** -Appends a new file to the chain of files of a space. File must be closed. */ - -void -fil_node_create( -/*============*/ - const char* name, /* in: file name (file must be closed) */ - ulint size, /* in: file size in database blocks, rounded - downwards to an integer */ - ulint id, /* in: space id where to append */ - ibool is_raw) /* in: TRUE if a raw device or - a raw disk partition */ -{ - fil_system_t* system = fil_system; - fil_node_t* node; - fil_space_t* space; - - ut_a(system); - ut_a(name); - - mutex_enter(&(system->mutex)); - - node = mem_alloc(sizeof(fil_node_t)); - - node->name = mem_strdup(name); - node->open = FALSE; - - ut_a(!is_raw || srv_start_raw_disk_in_use); - - node->is_raw_disk = is_raw; - node->size = size; - node->magic_n = FIL_NODE_MAGIC_N; - node->n_pending = 0; - node->n_pending_flushes = 0; - - node->modification_counter = 0; - node->flush_counter = 0; - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (!space) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: Could not find tablespace %lu for\n" - "InnoDB: file ", (ulong) id); - ut_print_filename(stderr, name); - fputs(" in the tablespace memory cache.\n", stderr); - mem_free(node->name); - - mem_free(node); - - mutex_exit(&(system->mutex)); - - return; - } - - space->size += size; - - node->space = space; - - UT_LIST_ADD_LAST(chain, space->chain, node); - - mutex_exit(&(system->mutex)); -} - -/************************************************************************ -Opens a the file of a node of a tablespace. The caller must own the fil_system -mutex. */ -static -void -fil_node_open_file( -/*===============*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - fil_space_t* space) /* in: space */ -{ - ib_longlong size_bytes; - ulint size_low; - ulint size_high; - ibool ret; - ibool success; -#ifndef UNIV_HOTBACKUP - byte* buf2; - byte* page; - ulint space_id; -#endif /* !UNIV_HOTBACKUP */ - - ut_ad(mutex_own(&(system->mutex))); - ut_a(node->n_pending == 0); - ut_a(node->open == FALSE); - - if (node->size == 0) { - /* It must be a single-table tablespace and we do not know the - size of the file yet. First we open the file in the normal - mode, no async I/O here, for simplicity. Then do some checks, - and close the file again. - NOTE that we could not use the simple file read function - os_file_read() in Windows to read from a file opened for - async I/O! */ - - node->handle = os_file_create_simple_no_error_handling( - node->name, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Fatal error: cannot open %s\n." - "InnoDB: Have you deleted .ibd files" - " under a running mysqld server?\n", - node->name); - ut_a(0); - } - - os_file_get_size(node->handle, &size_low, &size_high); - - size_bytes = (((ib_longlong)size_high) << 32) - + (ib_longlong)size_low; -#ifdef UNIV_HOTBACKUP - node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); - -#else - ut_a(space->purpose != FIL_LOG); - ut_a(space->id != 0); - - if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Error: the size of single-table" - " tablespace file %s\n" - "InnoDB: is only %lu %lu," - " should be at least %lu!\n", - node->name, - (ulong) size_high, - (ulong) size_low, - (ulong) (FIL_IBD_FILE_INITIAL_SIZE - * UNIV_PAGE_SIZE)); - - ut_a(0); - } - - /* Read the first page of the tablespace */ - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT - set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - - success = os_file_read(node->handle, page, 0, 0, - UNIV_PAGE_SIZE); - space_id = fsp_header_get_space_id(page); - - ut_free(buf2); - - /* Close the file now that we have read the space id from it */ - - os_file_close(node->handle); - - if (space_id == ULINT_UNDEFINED || space_id == 0) { - fprintf(stderr, - "InnoDB: Error: tablespace id %lu" - " in file %s is not sensible\n", - (ulong) space_id, node->name); - - ut_a(0); - } - - if (space_id != space->id) { - fprintf(stderr, - "InnoDB: Error: tablespace id is %lu" - " in the data dictionary\n" - "InnoDB: but in file %s it is %lu!\n", - space->id, node->name, space_id); - - ut_a(0); - } - - if (size_bytes >= FSP_EXTENT_SIZE * UNIV_PAGE_SIZE) { - node->size = (ulint) - ((size_bytes / (1024 * 1024)) - * ((1024 * 1024) / UNIV_PAGE_SIZE)); - } else { - node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE); - } -#endif - space->size += node->size; - } - - /* printf("Opening file %s\n", node->name); */ - - /* Open the file for reading and writing, in Windows normally in the - unbuffered async I/O mode, though global variables may make - os_file_create() to fall back to the normal file I/O mode. */ - - if (space->purpose == FIL_LOG) { - node->handle = os_file_create(node->name, OS_FILE_OPEN, - OS_FILE_AIO, OS_LOG_FILE, &ret); - } else if (node->is_raw_disk) { - node->handle = os_file_create(node->name, - OS_FILE_OPEN_RAW, - OS_FILE_AIO, OS_DATA_FILE, &ret); - } else { - node->handle = os_file_create(node->name, OS_FILE_OPEN, - OS_FILE_AIO, OS_DATA_FILE, &ret); - } - - ut_a(ret); - - node->open = TRUE; - - system->n_open++; - - if (space->purpose == FIL_TABLESPACE && space->id != 0) { - /* Put the node to the LRU list */ - UT_LIST_ADD_FIRST(LRU, system->LRU, node); - } -} - -/************************************************************************** -Closes a file. */ -static -void -fil_node_close_file( -/*================*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system) /* in: tablespace memory cache */ -{ - ibool ret; - - ut_ad(node && system); - ut_ad(mutex_own(&(system->mutex))); - ut_a(node->open); - ut_a(node->n_pending == 0); - ut_a(node->n_pending_flushes == 0); - ut_a(node->modification_counter == node->flush_counter); - - ret = os_file_close(node->handle); - ut_a(ret); - - /* printf("Closing file %s\n", node->name); */ - - node->open = FALSE; - ut_a(system->n_open > 0); - system->n_open--; - - if (node->space->purpose == FIL_TABLESPACE && node->space->id != 0) { - ut_a(UT_LIST_GET_LEN(system->LRU) > 0); - - /* The node is in the LRU list, remove it */ - UT_LIST_REMOVE(LRU, system->LRU, node); - } -} - -/************************************************************************ -Tries to close a file in the LRU list. The caller must hold the fil_sys -mutex. */ -static -ibool -fil_try_to_close_file_in_LRU( -/*=========================*/ - /* out: TRUE if success, FALSE if should retry - later; since i/o's generally complete in < - 100 ms, and as InnoDB writes at most 128 pages - from the buffer pool in a batch, and then - immediately flushes the files, there is a good - chance that the next time we find a suitable - node from the LRU list */ - ibool print_info) /* in: if TRUE, prints information why it - cannot close a file */ -{ - fil_system_t* system = fil_system; - fil_node_t* node; - - ut_ad(mutex_own(&(system->mutex))); - - node = UT_LIST_GET_LAST(system->LRU); - - if (print_info) { - fprintf(stderr, - "InnoDB: fil_sys open file LRU len %lu\n", - (ulong) UT_LIST_GET_LEN(system->LRU)); - } - - while (node != NULL) { - if (node->modification_counter == node->flush_counter - && node->n_pending_flushes == 0) { - - fil_node_close_file(node, system); - - return(TRUE); - } - - if (print_info && node->n_pending_flushes > 0) { - fputs("InnoDB: cannot close file ", stderr); - ut_print_filename(stderr, node->name); - fprintf(stderr, ", because n_pending_flushes %lu\n", - (ulong) node->n_pending_flushes); - } - - if (print_info - && node->modification_counter != node->flush_counter) { - fputs("InnoDB: cannot close file ", stderr); - ut_print_filename(stderr, node->name); - fprintf(stderr, - ", because mod_count %ld != fl_count %ld\n", - (long) node->modification_counter, - (long) node->flush_counter); - } - - node = UT_LIST_GET_PREV(LRU, node); - } - - return(FALSE); -} - -/*********************************************************************** -Reserves the fil_system mutex and tries to make sure we can open at least one -file while holding it. This should be called before calling -fil_node_prepare_for_io(), because that function may need to open a file. */ -static -void -fil_mutex_enter_and_prepare_for_io( -/*===============================*/ - ulint space_id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ibool success; - ibool print_info = FALSE; - ulint count = 0; - ulint count2 = 0; - - ut_ad(!mutex_own(&(system->mutex))); -retry: - mutex_enter(&(system->mutex)); - - if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) { - /* We keep log files and system tablespace files always open; - this is important in preventing deadlocks in this module, as - a page read completion often performs another read from the - insert buffer. The insert buffer is in tablespace 0, and we - cannot end up waiting in this function. */ - - return; - } - - if (system->n_open < system->max_n_open) { - - return; - } - - HASH_SEARCH(hash, system->spaces, space_id, space, - space->id == space_id); - if (space != NULL && space->stop_ios) { - /* We are going to do a rename file and want to stop new i/o's - for a while */ - - if (count2 > 20000) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, - " has i/o ops stopped for a long time %lu\n", - (ulong) count2); - } - - mutex_exit(&(system->mutex)); - - os_thread_sleep(20000); - - count2++; - - goto retry; - } - - /* If the file is already open, no need to do anything; if the space - does not exist, we handle the situation in the function which called - this function */ - - if (!space || UT_LIST_GET_FIRST(space->chain)->open) { - - return; - } - - if (count > 1) { - print_info = TRUE; - } - - /* Too many files are open, try to close some */ -close_more: - success = fil_try_to_close_file_in_LRU(print_info); - - if (success && system->n_open >= system->max_n_open) { - - goto close_more; - } - - if (system->n_open < system->max_n_open) { - /* Ok */ - - return; - } - - if (count >= 2) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: too many (%lu) files stay open" - " while the maximum\n" - "InnoDB: allowed value would be %lu.\n" - "InnoDB: You may need to raise the value of" - " innodb_max_files_open in\n" - "InnoDB: my.cnf.\n", - (ulong) system->n_open, (ulong) system->max_n_open); - - return; - } - - mutex_exit(&(system->mutex)); - -#ifndef UNIV_HOTBACKUP - /* Wake the i/o-handler threads to make sure pending i/o's are - performed */ - os_aio_simulated_wake_handler_threads(); - - os_thread_sleep(20000); -#endif - /* Flush tablespaces so that we can close modified files in the LRU - list */ - - fil_flush_file_spaces(FIL_TABLESPACE); - - count++; - - goto retry; -} - -/*********************************************************************** -Frees a file node object from a tablespace memory cache. */ -static -void -fil_node_free( -/*==========*/ - fil_node_t* node, /* in, own: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - fil_space_t* space) /* in: space where the file node is chained */ -{ - ut_ad(node && system && space); - ut_ad(mutex_own(&(system->mutex))); - ut_a(node->magic_n == FIL_NODE_MAGIC_N); - ut_a(node->n_pending == 0); - - if (node->open) { - /* We fool the assertion in fil_node_close_file() to think - there are no unflushed modifications in the file */ - - node->modification_counter = node->flush_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = FALSE; - - UT_LIST_REMOVE(unflushed_spaces, - system->unflushed_spaces, - space); - } - - fil_node_close_file(node, system); - } - - space->size -= node->size; - - UT_LIST_REMOVE(chain, space->chain, node); - - mem_free(node->name); - mem_free(node); -} - -/******************************************************************** -Drops files from the start of a file space, so that its size is cut by -the amount given. */ - -void -fil_space_truncate_start( -/*=====================*/ - ulint id, /* in: space id */ - ulint trunc_len) /* in: truncate by this much; it is an error - if this does not equal to the combined size of - some initial files in the space */ -{ - fil_system_t* system = fil_system; - fil_node_t* node; - fil_space_t* space; - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - - while (trunc_len > 0) { - node = UT_LIST_GET_FIRST(space->chain); - - ut_a(node->size * UNIV_PAGE_SIZE >= trunc_len); - - trunc_len -= node->size * UNIV_PAGE_SIZE; - - fil_node_free(node, system, space); - } - - mutex_exit(&(system->mutex)); -} - -/*********************************************************************** -Creates a space memory object and puts it to the tablespace memory cache. If -there is an error, prints an error message to the .err log. */ - -ibool -fil_space_create( -/*=============*/ - /* out: TRUE if success */ - const char* name, /* in: space name */ - ulint id, /* in: space id */ - ulint purpose)/* in: FIL_TABLESPACE, or FIL_LOG if log */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ulint namesake_id; -try_again: - /*printf( - "InnoDB: Adding tablespace %lu of name %s, purpose %lu\n", id, name, - purpose);*/ - - ut_a(system); - ut_a(name); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(name), space, - 0 == strcmp(name, space->name)); - if (space != NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: trying to init to the" - " tablespace memory cache\n" - "InnoDB: a tablespace %lu of name ", (ulong) id); - ut_print_filename(stderr, name); - fprintf(stderr, ",\n" - "InnoDB: but a tablespace %lu of the same name\n" - "InnoDB: already exists in the" - " tablespace memory cache!\n", - (ulong) space->id); - - if (id == 0 || purpose != FIL_TABLESPACE) { - - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - fprintf(stderr, - "InnoDB: We assume that InnoDB did a crash recovery," - " and you had\n" - "InnoDB: an .ibd file for which the table" - " did not exist in the\n" - "InnoDB: InnoDB internal data dictionary in the" - " ibdata files.\n" - "InnoDB: We assume that you later removed the" - " .ibd and .frm files,\n" - "InnoDB: and are now trying to recreate the table." - " We now remove the\n" - "InnoDB: conflicting tablespace object" - " from the memory cache and try\n" - "InnoDB: the init again.\n"); - - namesake_id = space->id; - - mutex_exit(&(system->mutex)); - - fil_space_free(namesake_id); - - goto try_again; - } - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space != NULL) { - fprintf(stderr, - "InnoDB: Error: trying to add tablespace %lu" - " of name ", (ulong) id); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: to the tablespace memory cache," - " but tablespace\n" - "InnoDB: %lu of name ", (ulong) space->id); - ut_print_filename(stderr, space->name); - fputs(" already exists in the tablespace\n" - "InnoDB: memory cache!\n", stderr); - - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - space = mem_alloc(sizeof(fil_space_t)); - - space->name = mem_strdup(name); - space->id = id; - - system->tablespace_version++; - space->tablespace_version = system->tablespace_version; - space->mark = FALSE; - - if (purpose == FIL_TABLESPACE && id > system->max_assigned_id) { - system->max_assigned_id = id; - } - - space->stop_ios = FALSE; - space->stop_ibuf_merges = FALSE; - space->is_being_deleted = FALSE; - space->purpose = purpose; - space->size = 0; - - space->n_reserved_extents = 0; - - space->n_pending_flushes = 0; - space->n_pending_ibuf_merges = 0; - - UT_LIST_INIT(space->chain); - space->magic_n = FIL_SPACE_MAGIC_N; - - space->ibuf_data = NULL; - - rw_lock_create(&space->latch, SYNC_FSP); - - HASH_INSERT(fil_space_t, hash, system->spaces, id, space); - - HASH_INSERT(fil_space_t, name_hash, system->name_hash, - ut_fold_string(name), space); - space->is_in_unflushed_spaces = FALSE; - - UT_LIST_ADD_LAST(space_list, system->space_list, space); - - mutex_exit(&(system->mutex)); - - return(TRUE); -} - -/*********************************************************************** -Assigns a new space id for a new single-table tablespace. This works simply by -incrementing the global counter. If 4 billion id's is not enough, we may need -to recycle id's. */ -static -ulint -fil_assign_new_space_id(void) -/*=========================*/ - /* out: new tablespace id; ULINT_UNDEFINED if could - not assign an id */ -{ - fil_system_t* system = fil_system; - ulint id; - - mutex_enter(&(system->mutex)); - - system->max_assigned_id++; - - id = system->max_assigned_id; - - if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Warning: you are running out of new" - " single-table tablespace id's.\n" - "InnoDB: Current counter is %lu and it" - " must not exceed %lu!\n" - "InnoDB: To reset the counter to zero" - " you have to dump all your tables and\n" - "InnoDB: recreate the whole InnoDB installation.\n", - (ulong) id, - (ulong) SRV_LOG_SPACE_FIRST_ID); - } - - if (id >= SRV_LOG_SPACE_FIRST_ID) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: You have run out of single-table" - " tablespace id's!\n" - "InnoDB: Current counter is %lu.\n" - "InnoDB: To reset the counter to zero you" - " have to dump all your tables and\n" - "InnoDB: recreate the whole InnoDB installation.\n", - (ulong) id); - system->max_assigned_id--; - - id = ULINT_UNDEFINED; - } - - mutex_exit(&(system->mutex)); - - return(id); -} - -/*********************************************************************** -Frees a space object from the tablespace memory cache. Closes the files in -the chain but does not delete them. There must not be any pending i/o's or -flushes on the files. */ - -ibool -fil_space_free( -/*===========*/ - /* out: TRUE if success */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - fil_space_t* namespace; - fil_node_t* fil_node; - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (!space) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: trying to remove tablespace %lu" - " from the cache but\n" - "InnoDB: it is not there.\n", (ulong) id); - - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - HASH_DELETE(fil_space_t, hash, system->spaces, id, space); - - HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(space->name), - namespace, 0 == strcmp(space->name, namespace->name)); - ut_a(namespace); - ut_a(space == namespace); - - HASH_DELETE(fil_space_t, name_hash, system->name_hash, - ut_fold_string(space->name), space); - - if (space->is_in_unflushed_spaces) { - space->is_in_unflushed_spaces = FALSE; - - UT_LIST_REMOVE(unflushed_spaces, system->unflushed_spaces, - space); - } - - UT_LIST_REMOVE(space_list, system->space_list, space); - - ut_a(space->magic_n == FIL_SPACE_MAGIC_N); - ut_a(0 == space->n_pending_flushes); - - fil_node = UT_LIST_GET_FIRST(space->chain); - - while (fil_node != NULL) { - fil_node_free(fil_node, system, space); - - fil_node = UT_LIST_GET_FIRST(space->chain); - } - - ut_a(0 == UT_LIST_GET_LEN(space->chain)); - - mutex_exit(&(system->mutex)); - - rw_lock_free(&(space->latch)); - - mem_free(space->name); - mem_free(space); - - return(TRUE); -} - -#ifdef UNIV_HOTBACKUP -/*********************************************************************** -Returns the tablespace object for a given id, or NULL if not found from the -tablespace memory cache. */ -static -fil_space_t* -fil_get_space_for_id_low( -/*=====================*/ - /* out: tablespace object or NULL; NOTE that you must - own &(fil_system->mutex) to call this function! */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - return(space); -} -#endif - -/*********************************************************************** -Returns the size of the space in pages. The tablespace must be cached in the -memory cache. */ - -ulint -fil_space_get_size( -/*===============*/ - /* out: space size, 0 if space not found */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_node_t* node; - fil_space_t* space; - ulint size; - - ut_ad(system); - - fil_mutex_enter_and_prepare_for_io(id); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - mutex_exit(&(system->mutex)); - - return(0); - } - - if (space->size == 0 && space->purpose == FIL_TABLESPACE) { - ut_a(id != 0); - - ut_a(1 == UT_LIST_GET_LEN(space->chain)); - - node = UT_LIST_GET_FIRST(space->chain); - - /* It must be a single-table tablespace and we have not opened - the file yet; the following calls will open it and update the - size fields */ - - fil_node_prepare_for_io(node, system, space); - fil_node_complete_io(node, system, OS_FILE_READ); - } - - size = space->size; - - mutex_exit(&(system->mutex)); - - return(size); -} - -/*********************************************************************** -Checks if the pair space, page_no refers to an existing page in a tablespace -file space. The tablespace must be cached in the memory cache. */ - -ibool -fil_check_adress_in_tablespace( -/*===========================*/ - /* out: TRUE if the address is meaningful */ - ulint id, /* in: space id */ - ulint page_no)/* in: page number */ -{ - if (fil_space_get_size(id) > page_no) { - - return(TRUE); - } - - return(FALSE); -} - -/******************************************************************** -Creates a the tablespace memory cache. */ -static -fil_system_t* -fil_system_create( -/*==============*/ - /* out, own: tablespace memory cache */ - ulint hash_size, /* in: hash table size */ - ulint max_n_open) /* in: maximum number of open files; must be - > 10 */ -{ - fil_system_t* system; - - ut_a(hash_size > 0); - ut_a(max_n_open > 0); - - system = mem_alloc(sizeof(fil_system_t)); - - mutex_create(&system->mutex, SYNC_ANY_LATCH); - - system->spaces = hash_create(hash_size); - system->name_hash = hash_create(hash_size); - - UT_LIST_INIT(system->LRU); - - system->n_open = 0; - system->max_n_open = max_n_open; - - system->modification_counter = 0; - system->max_assigned_id = 0; - - system->tablespace_version = 0; - - UT_LIST_INIT(system->unflushed_spaces); - UT_LIST_INIT(system->space_list); - - return(system); -} - -/******************************************************************** -Initializes the tablespace memory cache. */ - -void -fil_init( -/*=====*/ - ulint max_n_open) /* in: max number of open files */ -{ - ulint hash_size; - - ut_a(fil_system == NULL); - - if (srv_file_per_table) { - hash_size = 50000; - } else { - hash_size = 5000; - } - - fil_system = fil_system_create(hash_size, max_n_open); -} - -/*********************************************************************** -Opens all log files and system tablespace data files. They stay open until the -database server shutdown. This should be called at a server startup after the -space objects for the log and the system tablespace have been created. The -purpose of this operation is to make sure we never run out of file descriptors -if we need to read from the insert buffer or to write to the log. */ - -void -fil_open_log_and_system_tablespace_files(void) -/*==========================================*/ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - fil_node_t* node; - - mutex_enter(&(system->mutex)); - - space = UT_LIST_GET_FIRST(system->space_list); - - while (space != NULL) { - if (space->purpose != FIL_TABLESPACE || space->id == 0) { - node = UT_LIST_GET_FIRST(space->chain); - - while (node != NULL) { - if (!node->open) { - fil_node_open_file(node, system, - space); - } - if (system->max_n_open < 10 + system->n_open) { - fprintf(stderr, - "InnoDB: Warning: you must" - " raise the value of" - " innodb_max_open_files in\n" - "InnoDB: my.cnf! Remember that" - " InnoDB keeps all log files" - " and all system\n" - "InnoDB: tablespace files open" - " for the whole time mysqld is" - " running, and\n" - "InnoDB: needs to open also" - " some .ibd files if the" - " file-per-table storage\n" - "InnoDB: model is used." - " Current open files %lu," - " max allowed" - " open files %lu.\n", - (ulong) system->n_open, - (ulong) system->max_n_open); - } - node = UT_LIST_GET_NEXT(chain, node); - } - } - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&(system->mutex)); -} - -/*********************************************************************** -Closes all open files. There must not be any pending i/o's or not flushed -modifications in the files. */ - -void -fil_close_all_files(void) -/*=====================*/ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - fil_node_t* node; - - mutex_enter(&(system->mutex)); - - space = UT_LIST_GET_FIRST(system->space_list); - - while (space != NULL) { - node = UT_LIST_GET_FIRST(space->chain); - - while (node != NULL) { - if (node->open) { - fil_node_close_file(node, system); - } - node = UT_LIST_GET_NEXT(chain, node); - } - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&(system->mutex)); -} - -/*********************************************************************** -Sets the max tablespace id counter if the given number is bigger than the -previous value. */ - -void -fil_set_max_space_id_if_bigger( -/*===========================*/ - ulint max_id) /* in: maximum known id */ -{ - fil_system_t* system = fil_system; - - if (max_id >= SRV_LOG_SPACE_FIRST_ID) { - fprintf(stderr, - "InnoDB: Fatal error: max tablespace id" - " is too high, %lu\n", (ulong) max_id); - ut_a(0); - } - - mutex_enter(&(system->mutex)); - - if (system->max_assigned_id < max_id) { - - system->max_assigned_id = max_id; - } - - mutex_exit(&(system->mutex)); -} - -/******************************************************************** -Initializes the ibuf data structure for space 0 == the system tablespace. -This can be called after the file space headers have been created and the -dictionary system has been initialized. */ - -void -fil_ibuf_init_at_db_start(void) -/*===========================*/ -{ - fil_space_t* space; - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - ut_a(space); - ut_a(space->purpose == FIL_TABLESPACE); - - space->ibuf_data = ibuf_data_init_for_space(space->id); -} - -/******************************************************************** -Writes the flushed lsn and the latest archived log number to the page header -of the first page of a data file. */ -static -ulint -fil_write_lsn_and_arch_no_to_file( -/*==============================*/ - ulint space_id, /* in: space number */ - ulint sum_of_sizes, /* in: combined size of previous files in - space, in database pages */ - dulint lsn, /* in: lsn to write */ - ulint arch_log_no /* in: archived log number to write */ - __attribute__((unused))) -{ - byte* buf1; - byte* buf; - - buf1 = mem_alloc(2 * UNIV_PAGE_SIZE); - buf = ut_align(buf1, UNIV_PAGE_SIZE); - - fil_read(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); - - mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn); - - fil_write(TRUE, space_id, sum_of_sizes, 0, UNIV_PAGE_SIZE, buf, NULL); - - return(DB_SUCCESS); -} - -/******************************************************************** -Writes the flushed lsn and the latest archived log number to the page -header of the first page of each data file in the system tablespace. */ - -ulint -fil_write_flushed_lsn_to_data_files( -/*================================*/ - /* out: DB_SUCCESS or error number */ - dulint lsn, /* in: lsn to write */ - ulint arch_log_no) /* in: latest archived log file number */ -{ - fil_space_t* space; - fil_node_t* node; - ulint sum_of_sizes; - ulint err; - - mutex_enter(&(fil_system->mutex)); - - space = UT_LIST_GET_FIRST(fil_system->space_list); - - while (space) { - /* We only write the lsn to all existing data files which have - been open during the lifetime of the mysqld process; they are - represented by the space objects in the tablespace memory - cache. Note that all data files in the system tablespace 0 are - always open. */ - - if (space->purpose == FIL_TABLESPACE - && space->id == 0) { - sum_of_sizes = 0; - - node = UT_LIST_GET_FIRST(space->chain); - while (node) { - mutex_exit(&(fil_system->mutex)); - - err = fil_write_lsn_and_arch_no_to_file( - space->id, sum_of_sizes, lsn, - arch_log_no); - if (err != DB_SUCCESS) { - - return(err); - } - - mutex_enter(&(fil_system->mutex)); - - sum_of_sizes += node->size; - node = UT_LIST_GET_NEXT(chain, node); - } - } - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&(fil_system->mutex)); - - return(DB_SUCCESS); -} - -/*********************************************************************** -Reads the flushed lsn and arch no fields from a data file at database -startup. */ - -void -fil_read_flushed_lsn_and_arch_log_no( -/*=================================*/ - os_file_t data_file, /* in: open data file */ - ibool one_read_already, /* in: TRUE if min and max parameters - below already contain sensible data */ -#ifdef UNIV_LOG_ARCHIVE - ulint* min_arch_log_no, /* in/out: */ - ulint* max_arch_log_no, /* in/out: */ -#endif /* UNIV_LOG_ARCHIVE */ - dulint* min_flushed_lsn, /* in/out: */ - dulint* max_flushed_lsn) /* in/out: */ -{ - byte* buf; - byte* buf2; - dulint flushed_lsn; - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for a possible read from a raw device */ - buf = ut_align(buf2, UNIV_PAGE_SIZE); - - os_file_read(data_file, buf, 0, 0, UNIV_PAGE_SIZE); - - flushed_lsn = mach_read_from_8(buf + FIL_PAGE_FILE_FLUSH_LSN); - - ut_free(buf2); - - if (!one_read_already) { - *min_flushed_lsn = flushed_lsn; - *max_flushed_lsn = flushed_lsn; -#ifdef UNIV_LOG_ARCHIVE - *min_arch_log_no = arch_log_no; - *max_arch_log_no = arch_log_no; -#endif /* UNIV_LOG_ARCHIVE */ - return; - } - - if (ut_dulint_cmp(*min_flushed_lsn, flushed_lsn) > 0) { - *min_flushed_lsn = flushed_lsn; - } - if (ut_dulint_cmp(*max_flushed_lsn, flushed_lsn) < 0) { - *max_flushed_lsn = flushed_lsn; - } -#ifdef UNIV_LOG_ARCHIVE - if (*min_arch_log_no > arch_log_no) { - *min_arch_log_no = arch_log_no; - } - if (*max_arch_log_no < arch_log_no) { - *max_arch_log_no = arch_log_no; - } -#endif /* UNIV_LOG_ARCHIVE */ -} - -/*================ SINGLE-TABLE TABLESPACES ==========================*/ - -/*********************************************************************** -Increments the count of pending insert buffer page merges, if space is not -being deleted. */ - -ibool -fil_inc_pending_ibuf_merges( -/*========================*/ - /* out: TRUE if being deleted, and ibuf merges should - be skipped */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - fprintf(stderr, - "InnoDB: Error: trying to do ibuf merge to a" - " dropped tablespace %lu\n", - (ulong) id); - } - - if (space == NULL || space->stop_ibuf_merges) { - mutex_exit(&(system->mutex)); - - return(TRUE); - } - - space->n_pending_ibuf_merges++; - - mutex_exit(&(system->mutex)); - - return(FALSE); -} - -/*********************************************************************** -Decrements the count of pending insert buffer page merges. */ - -void -fil_decr_pending_ibuf_merges( -/*=========================*/ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - fprintf(stderr, - "InnoDB: Error: decrementing ibuf merge of a" - " dropped tablespace %lu\n", - (ulong) id); - } - - if (space != NULL) { - space->n_pending_ibuf_merges--; - } - - mutex_exit(&(system->mutex)); -} - -/************************************************************ -Creates the database directory for a table if it does not exist yet. */ -static -void -fil_create_directory_for_tablename( -/*===============================*/ - const char* name) /* in: name in the standard - 'databasename/tablename' format */ -{ - const char* namend; - char* path; - ulint len; - - len = strlen(fil_path_to_mysql_datadir); - namend = strchr(name, '/'); - ut_a(namend); - path = mem_alloc(len + (namend - name) + 2); - - memcpy(path, fil_path_to_mysql_datadir, len); - path[len] = '/'; - memcpy(path + len + 1, name, namend - name); - path[len + (namend - name) + 1] = 0; - - srv_normalize_path_for_win(path); - - ut_a(os_file_create_directory(path, FALSE)); - mem_free(path); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************ -Writes a log record about an .ibd file create/rename/delete. */ -static -void -fil_op_write_log( -/*=============*/ - ulint type, /* in: MLOG_FILE_CREATE, - MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id, /* in: space id */ - const char* name, /* in: table name in the familiar - 'databasename/tablename' format, or - the file path in the case of - MLOG_FILE_DELETE */ - const char* new_name, /* in: if type is MLOG_FILE_RENAME, - the new table name in the - 'databasename/tablename' format */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - ulint len; - - log_ptr = mlog_open(mtr, 11 + 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0, - log_ptr, mtr); - /* Let us store the strings as null-terminated for easier readability - and handling */ - - len = strlen(name) + 1; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, (byte*) name, len); - - if (type == MLOG_FILE_RENAME) { - ulint len = strlen(new_name) + 1; - log_ptr = mlog_open(mtr, 2 + len); - ut_a(log_ptr); - mach_write_to_2(log_ptr, len); - log_ptr += 2; - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, (byte*) new_name, len); - } -} -#endif - -/*********************************************************************** -Parses the body of a log record written about an .ibd file operation. That is, -the log record part after the standard (type, space id, page no) header of the -log record. - -If desired, also replays the delete or rename operation if the .ibd file -exists and the space id in it matches. Replays the create operation if a file -at that path does not exist yet. If the database directory for the file to be -created does not exist, then we create the directory, too. - -Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the -datadir that we should use in replaying the file operations. */ - -byte* -fil_op_log_parse_or_replay( -/*=======================*/ - /* out: end of log record, or NULL if the - record was not completely contained between - ptr and end_ptr */ - byte* ptr, /* in: buffer containing the log record body, - or an initial segment of it, if the record does - not fir completely between ptr and end_ptr */ - byte* end_ptr, /* in: buffer end */ - ulint type, /* in: the type of this log record */ - ibool do_replay, /* in: TRUE if we want to replay the - operation, and not just parse the log record */ - ulint space_id) /* in: if do_replay is TRUE, the space id of - the tablespace in question; otherwise - ignored */ -{ - ulint name_len; - ulint new_name_len; - const char* name; - const char* new_name = NULL; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - name_len = mach_read_from_2(ptr); - - ptr += 2; - - if (end_ptr < ptr + name_len) { - - return(NULL); - } - - name = (const char*) ptr; - - ptr += name_len; - - if (type == MLOG_FILE_RENAME) { - if (end_ptr < ptr + 2) { - - return(NULL); - } - - new_name_len = mach_read_from_2(ptr); - - ptr += 2; - - if (end_ptr < ptr + new_name_len) { - - return(NULL); - } - - new_name = (const char*) ptr; - - ptr += new_name_len; - } - - /* We managed to parse a full log record body */ - /* - printf("Parsed log rec of type %lu space %lu\n" - "name %s\n", type, space_id, name); - - if (type == MLOG_FILE_RENAME) { - printf("new name %s\n", new_name); - } - */ - if (do_replay == FALSE) { - - return(ptr); - } - - /* Let us try to perform the file operation, if sensible. Note that - ibbackup has at this stage already read in all space id info to the - fil0fil.c data structures. - - NOTE that our algorithm is not guaranteed to work correctly if there - were renames of tables during the backup. See ibbackup code for more - on the problem. */ - - if (type == MLOG_FILE_DELETE) { - if (fil_tablespace_exists_in_mem(space_id)) { - ut_a(fil_delete_tablespace(space_id)); - } - } else if (type == MLOG_FILE_RENAME) { - /* We do the rename based on space id, not old file name; - this should guarantee that after the log replay each .ibd file - has the correct name for the latest log sequence number; the - proof is left as an exercise :) */ - - if (fil_tablespace_exists_in_mem(space_id)) { - /* Create the database directory for the new name, if - it does not exist yet */ - fil_create_directory_for_tablename(new_name); - - /* Rename the table if there is not yet a tablespace - with the same name */ - - if (fil_get_space_id_for_table(new_name) - == ULINT_UNDEFINED) { - /* We do not care of the old name, that is - why we pass NULL as the first argument */ - if (!fil_rename_tablespace(NULL, space_id, - new_name)) { - ut_error; - } - } - } - } else { - ut_a(type == MLOG_FILE_CREATE); - - if (fil_tablespace_exists_in_mem(space_id)) { - /* Do nothing */ - } else if (fil_get_space_id_for_table(name) - != ULINT_UNDEFINED) { - /* Do nothing */ - } else { - /* Create the database directory for name, if it does - not exist yet */ - fil_create_directory_for_tablename(name); - - ut_a(space_id != 0); - - if (fil_create_new_single_table_tablespace( - &space_id, name, FALSE, - FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) { - ut_error; - } - } - } - - return(ptr); -} - -/*********************************************************************** -Deletes a single-table tablespace. The tablespace must be cached in the -memory cache. */ - -ibool -fil_delete_tablespace( -/*==================*/ - /* out: TRUE if success */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - ibool success; - fil_space_t* space; - fil_node_t* node; - ulint count = 0; - char* path; - - ut_a(id != 0); -stop_ibuf_merges: - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space != NULL) { - space->stop_ibuf_merges = TRUE; - - if (space->n_pending_ibuf_merges == 0) { - mutex_exit(&(system->mutex)); - - count = 0; - - goto try_again; - } else { - if (count > 5000) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: trying to" - " delete tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, ",\n" - "InnoDB: but there are %lu pending" - " ibuf merges on it.\n" - "InnoDB: Loop %lu.\n", - (ulong) space->n_pending_ibuf_merges, - (ulong) count); - } - - mutex_exit(&(system->mutex)); - - os_thread_sleep(20000); - count++; - - goto stop_ibuf_merges; - } - } - - mutex_exit(&(system->mutex)); - count = 0; - -try_again: - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: cannot delete tablespace %lu\n" - "InnoDB: because it is not found in the" - " tablespace memory cache.\n", - (ulong) id); - - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - ut_a(space); - ut_a(space->n_pending_ibuf_merges == 0); - - space->is_being_deleted = TRUE; - - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - node = UT_LIST_GET_FIRST(space->chain); - - if (space->n_pending_flushes > 0 || node->n_pending > 0) { - if (count > 1000) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: trying to" - " delete tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, ",\n" - "InnoDB: but there are %lu flushes" - " and %lu pending i/o's on it\n" - "InnoDB: Loop %lu.\n", - (ulong) space->n_pending_flushes, - (ulong) node->n_pending, - (ulong) count); - } - mutex_exit(&(system->mutex)); - os_thread_sleep(20000); - - count++; - - goto try_again; - } - - path = mem_strdup(space->name); - - mutex_exit(&(system->mutex)); -#ifndef UNIV_HOTBACKUP - /* Invalidate in the buffer pool all pages belonging to the - tablespace. Since we have set space->is_being_deleted = TRUE, readahead - or ibuf merge can no longer read more pages of this tablespace to the - buffer pool. Thus we can clean the tablespace out of the buffer pool - completely and permanently. The flag is_being_deleted also prevents - fil_flush() from being applied to this tablespace. */ - - buf_LRU_invalidate_tablespace(id); -#endif - /* printf("Deleting tablespace %s id %lu\n", space->name, id); */ - - success = fil_space_free(id); - - if (success) { - success = os_file_delete(path); - - if (!success) { - success = os_file_delete_if_exists(path); - } - } - - if (success) { -#ifndef UNIV_HOTBACKUP - /* Write a log record about the deletion of the .ibd - file, so that ibbackup can replay it in the - --apply-log phase. We use a dummy mtr and the familiar - log write mechanism. */ - mtr_t mtr; - - /* When replaying the operation in ibbackup, do not try - to write any log record */ - mtr_start(&mtr); - - fil_op_write_log(MLOG_FILE_DELETE, id, path, NULL, &mtr); - mtr_commit(&mtr); -#endif - mem_free(path); - - return(TRUE); - } - - mem_free(path); - - return(FALSE); -} - -/*********************************************************************** -Discards a single-table tablespace. The tablespace must be cached in the -memory cache. Discarding is like deleting a tablespace, but -1) we do not drop the table from the data dictionary; -2) we remove all insert buffer entries for the tablespace immediately; in DROP -TABLE they are only removed gradually in the background; -3) when the user does IMPORT TABLESPACE, the tablespace will have the same id -as it originally had. */ - -ibool -fil_discard_tablespace( -/*===================*/ - /* out: TRUE if success */ - ulint id) /* in: space id */ -{ - ibool success; - - success = fil_delete_tablespace(id); - - if (!success) { - fprintf(stderr, - "InnoDB: Warning: cannot delete tablespace %lu" - " in DISCARD TABLESPACE.\n" - "InnoDB: But let us remove the" - " insert buffer entries for this tablespace.\n", - (ulong) id); - } - - /* Remove all insert buffer entries for the tablespace */ - - ibuf_delete_for_discarded_space(id); - - return(TRUE); -} - -/*********************************************************************** -Renames the memory cache structures of a single-table tablespace. */ -static -ibool -fil_rename_tablespace_in_mem( -/*=========================*/ - /* out: TRUE if success */ - fil_space_t* space, /* in: tablespace memory object */ - fil_node_t* node, /* in: file node of that tablespace */ - const char* path) /* in: new name */ -{ - fil_system_t* system = fil_system; - fil_space_t* space2; - const char* old_name = space->name; - - HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(old_name), - space2, 0 == strcmp(old_name, space2->name)); - if (space != space2) { - fputs("InnoDB: Error: cannot find ", stderr); - ut_print_filename(stderr, old_name); - fputs(" in tablespace memory cache\n", stderr); - - return(FALSE); - } - - HASH_SEARCH(name_hash, system->name_hash, ut_fold_string(path), - space2, 0 == strcmp(path, space2->name)); - if (space2 != NULL) { - fputs("InnoDB: Error: ", stderr); - ut_print_filename(stderr, path); - fputs(" is already in tablespace memory cache\n", stderr); - - return(FALSE); - } - - HASH_DELETE(fil_space_t, name_hash, system->name_hash, - ut_fold_string(space->name), space); - mem_free(space->name); - mem_free(node->name); - - space->name = mem_strdup(path); - node->name = mem_strdup(path); - - HASH_INSERT(fil_space_t, name_hash, system->name_hash, - ut_fold_string(path), space); - return(TRUE); -} - -/*********************************************************************** -Allocates a file name for a single-table tablespace. The string must be freed -by caller with mem_free(). */ -static -char* -fil_make_ibd_name( -/*==============*/ - /* out, own: file name */ - const char* name, /* in: table name or a dir path of a - TEMPORARY table */ - ibool is_temp) /* in: TRUE if it is a dir path */ -{ - ulint namelen = strlen(name); - ulint dirlen = strlen(fil_path_to_mysql_datadir); - char* filename = mem_alloc(namelen + dirlen + sizeof "/.ibd"); - - if (is_temp) { - memcpy(filename, name, namelen); - memcpy(filename + namelen, ".ibd", sizeof ".ibd"); - } else { - memcpy(filename, fil_path_to_mysql_datadir, dirlen); - filename[dirlen] = '/'; - - memcpy(filename + dirlen + 1, name, namelen); - memcpy(filename + dirlen + namelen + 1, ".ibd", sizeof ".ibd"); - } - - srv_normalize_path_for_win(filename); - - return(filename); -} - -/*********************************************************************** -Renames a single-table tablespace. The tablespace must be cached in the -tablespace memory cache. */ - -ibool -fil_rename_tablespace( -/*==================*/ - /* out: TRUE if success */ - const char* old_name, /* in: old table name in the standard - databasename/tablename format of - InnoDB, or NULL if we do the rename - based on the space id only */ - ulint id, /* in: space id */ - const char* new_name) /* in: new table name in the standard - databasename/tablename format - of InnoDB */ -{ - fil_system_t* system = fil_system; - ibool success; - fil_space_t* space; - fil_node_t* node; - ulint count = 0; - char* path; - ibool old_name_was_specified = TRUE; - char* old_path; - - ut_a(id != 0); - - if (old_name == NULL) { - old_name = "(name not specified)"; - old_name_was_specified = FALSE; - } -retry: - count++; - - if (count > 1000) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: problems renaming ", stderr); - ut_print_filename(stderr, old_name); - fputs(" to ", stderr); - ut_print_filename(stderr, new_name); - fprintf(stderr, ", %lu iterations\n", (ulong) count); - } - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - fprintf(stderr, - "InnoDB: Error: cannot find space id %lu" - " in the tablespace memory cache\n" - "InnoDB: though the table ", (ulong) id); - ut_print_filename(stderr, old_name); - fputs(" in a rename operation should have that id\n", stderr); - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - if (count > 25000) { - space->stop_ios = FALSE; - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - /* We temporarily close the .ibd file because we do not trust that - operating systems can rename an open file. For the closing we have to - wait until there are no pending i/o's or flushes on the file. */ - - space->stop_ios = TRUE; - - ut_a(UT_LIST_GET_LEN(space->chain) == 1); - node = UT_LIST_GET_FIRST(space->chain); - - if (node->n_pending > 0 || node->n_pending_flushes > 0) { - /* There are pending i/o's or flushes, sleep for a while and - retry */ - - mutex_exit(&(system->mutex)); - - os_thread_sleep(20000); - - goto retry; - - } else if (node->modification_counter > node->flush_counter) { - /* Flush the space */ - - mutex_exit(&(system->mutex)); - - os_thread_sleep(20000); - - fil_flush(id); - - goto retry; - - } else if (node->open) { - /* Close the file */ - - fil_node_close_file(node, system); - } - - /* Check that the old name in the space is right */ - - if (old_name_was_specified) { - old_path = fil_make_ibd_name(old_name, FALSE); - - ut_a(strcmp(space->name, old_path) == 0); - ut_a(strcmp(node->name, old_path) == 0); - } else { - old_path = mem_strdup(space->name); - } - - /* Rename the tablespace and the node in the memory cache */ - path = fil_make_ibd_name(new_name, FALSE); - success = fil_rename_tablespace_in_mem(space, node, path); - - if (success) { - success = os_file_rename(old_path, path); - - if (!success) { - /* We have to revert the changes we made - to the tablespace memory cache */ - - ut_a(fil_rename_tablespace_in_mem(space, node, - old_path)); - } - } - - mem_free(path); - mem_free(old_path); - - space->stop_ios = FALSE; - - mutex_exit(&(system->mutex)); - -#ifndef UNIV_HOTBACKUP - if (success) { - mtr_t mtr; - - mtr_start(&mtr); - - fil_op_write_log(MLOG_FILE_RENAME, id, old_name, new_name, - &mtr); - mtr_commit(&mtr); - } -#endif - return(success); -} - -/*********************************************************************** -Creates a new single-table tablespace to a database directory of MySQL. -Database directories are under the 'datadir' of MySQL. The datadir is the -directory of a running mysqld program. We can refer to it by simply the -path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp -dir of the mysqld server. */ - -ulint -fil_create_new_single_table_tablespace( -/*===================================*/ - /* out: DB_SUCCESS or error code */ - ulint* space_id, /* in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ - const char* tablename, /* in: the table name in the usual - databasename/tablename format - of InnoDB, or a dir path to a temp - table */ - ibool is_temp, /* in: TRUE if a table created with - CREATE TEMPORARY TABLE */ - ulint size) /* in: the initial size of the - tablespace file in pages, - must be >= FIL_IBD_FILE_INITIAL_SIZE */ -{ - os_file_t file; - ibool ret; - ulint err; - byte* buf2; - byte* page; - ibool success; - char* path; - - ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE); - - path = fil_make_ibd_name(tablename, is_temp); - - file = os_file_create(path, OS_FILE_CREATE, OS_FILE_NORMAL, - OS_DATA_FILE, &ret); - if (ret == FALSE) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error creating file ", stderr); - ut_print_filename(stderr, path); - fputs(".\n", stderr); - - /* The following call will print an error message */ - - err = os_file_get_last_error(TRUE); - - if (err == OS_FILE_ALREADY_EXISTS) { - fputs("InnoDB: The file already exists though" - " the corresponding table did not\n" - "InnoDB: exist in the InnoDB data dictionary." - " Have you moved InnoDB\n" - "InnoDB: .ibd files around without using the" - " SQL commands\n" - "InnoDB: DISCARD TABLESPACE and" - " IMPORT TABLESPACE, or did\n" - "InnoDB: mysqld crash in the middle of" - " CREATE TABLE? You can\n" - "InnoDB: resolve the problem by" - " removing the file ", stderr); - ut_print_filename(stderr, path); - fputs("\n" - "InnoDB: under the 'datadir' of MySQL.\n", - stderr); - - mem_free(path); - return(DB_TABLESPACE_ALREADY_EXISTS); - } - - if (err == OS_FILE_DISK_FULL) { - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - mem_free(path); - return(DB_ERROR); - } - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - - ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE, 0); - - if (!ret) { - ut_free(buf2); - os_file_close(file); - os_file_delete(path); - - mem_free(path); - return(DB_OUT_OF_FILE_SPACE); - } - - if (*space_id == 0) { - *space_id = fil_assign_new_space_id(); - } - - /* printf("Creating tablespace %s id %lu\n", path, *space_id); */ - - if (*space_id == ULINT_UNDEFINED) { - ut_free(buf2); -error_exit: - os_file_close(file); -error_exit2: - os_file_delete(path); - - mem_free(path); - return(DB_ERROR); - } - - /* We have to write the space id to the file immediately and flush the - file to disk. This is because in crash recovery we must be aware what - tablespaces exist and what are their space id's, so that we can apply - the log records to the right file. It may take quite a while until - buffer pool flush algorithms write anything to the file and flush it to - disk. If we would not write here anything, the file would be filled - with zeros from the call of os_file_set_size(), until a buffer pool - flush would write to it. */ - - memset(page, '\0', UNIV_PAGE_SIZE); - - fsp_header_write_space_id(page, *space_id); - - buf_flush_init_for_writing(page, ut_dulint_zero, *space_id, 0); - - ret = os_file_write(path, file, page, 0, 0, UNIV_PAGE_SIZE); - - ut_free(buf2); - - if (!ret) { - fputs("InnoDB: Error: could not write the first page" - " to tablespace ", stderr); - ut_print_filename(stderr, path); - putc('\n', stderr); - goto error_exit; - } - - ret = os_file_flush(file); - - if (!ret) { - fputs("InnoDB: Error: file flush of tablespace ", stderr); - ut_print_filename(stderr, path); - fputs(" failed\n", stderr); - goto error_exit; - } - - os_file_close(file); - - if (*space_id == ULINT_UNDEFINED) { - goto error_exit2; - } - - success = fil_space_create(path, *space_id, FIL_TABLESPACE); - - if (!success) { - goto error_exit2; - } - - fil_node_create(path, size, *space_id, FALSE); - -#ifndef UNIV_HOTBACKUP - { - mtr_t mtr; - - mtr_start(&mtr); - - fil_op_write_log(MLOG_FILE_CREATE, *space_id, tablename, - NULL, &mtr); - - mtr_commit(&mtr); - } -#endif - mem_free(path); - return(DB_SUCCESS); -} - -/************************************************************************ -It is possible, though very improbable, that the lsn's in the tablespace to be -imported have risen above the current system lsn, if a lengthy purge, ibuf -merge, or rollback was performed on a backup taken with ibbackup. If that is -the case, reset page lsn's in the file. We assume that mysqld was shut down -after it performed these cleanup operations on the .ibd file, so that it at -the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the -first page of the .ibd file, and we can determine whether we need to reset the -lsn's just by looking at that flush lsn. */ - -ibool -fil_reset_too_high_lsns( -/*====================*/ - /* out: TRUE if success */ - const char* name, /* in: table name in the - databasename/tablename format */ - dulint current_lsn) /* in: reset lsn's if the lsn stamped - to FIL_PAGE_FILE_FLUSH_LSN in the - first page is too high */ -{ - os_file_t file; - char* filepath; - byte* page; - byte* buf2; - dulint flush_lsn; - ulint space_id; - ib_longlong file_size; - ib_longlong offset; - ulint page_no; - ibool success; - - filepath = fil_make_ibd_name(name, FALSE); - - file = os_file_create_simple_no_error_handling( - filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE, &success); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: trying to open a table," - " but could not\n" - "InnoDB: open the tablespace file ", stderr); - ut_print_filename(stderr, filepath); - fputs("!\n", stderr); - mem_free(filepath); - - return(FALSE); - } - - /* Read the first page of the tablespace */ - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); - if (!success) { - - goto func_exit; - } - - /* We have to read the file flush lsn from the header of the file */ - - flush_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN); - - if (ut_dulint_cmp(current_lsn, flush_lsn) >= 0) { - /* Ok */ - success = TRUE; - - goto func_exit; - } - - space_id = fsp_header_get_space_id(page); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Flush lsn in the tablespace file %lu" - " to be imported\n" - "InnoDB: is %lu %lu, which exceeds current" - " system lsn %lu %lu.\n" - "InnoDB: We reset the lsn's in the file ", - (ulong) space_id, - (ulong) ut_dulint_get_high(flush_lsn), - (ulong) ut_dulint_get_low(flush_lsn), - (ulong) ut_dulint_get_high(current_lsn), - (ulong) ut_dulint_get_low(current_lsn)); - ut_print_filename(stderr, filepath); - fputs(".\n", stderr); - - /* Loop through all the pages in the tablespace and reset the lsn and - the page checksum if necessary */ - - file_size = os_file_get_size_as_iblonglong(file); - - for (offset = 0; offset < file_size; offset += UNIV_PAGE_SIZE) { - success = os_file_read(file, page, - (ulint)(offset & 0xFFFFFFFFUL), - (ulint)(offset >> 32), UNIV_PAGE_SIZE); - if (!success) { - - goto func_exit; - } - if (ut_dulint_cmp(mach_read_from_8(page + FIL_PAGE_LSN), - current_lsn) > 0) { - /* We have to reset the lsn */ - space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); - - buf_flush_init_for_writing(page, current_lsn, space_id, - page_no); - success = os_file_write(filepath, file, page, - (ulint)(offset & 0xFFFFFFFFUL), - (ulint)(offset >> 32), - UNIV_PAGE_SIZE); - if (!success) { - - goto func_exit; - } - } - } - - success = os_file_flush(file); - if (!success) { - - goto func_exit; - } - - /* We now update the flush_lsn stamp at the start of the file */ - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); - if (!success) { - - goto func_exit; - } - - mach_write_to_8(page + FIL_PAGE_FILE_FLUSH_LSN, current_lsn); - - success = os_file_write(filepath, file, page, 0, 0, UNIV_PAGE_SIZE); - if (!success) { - - goto func_exit; - } - success = os_file_flush(file); -func_exit: - os_file_close(file); - ut_free(buf2); - mem_free(filepath); - - return(success); -} - -/************************************************************************ -Tries to open a single-table tablespace and optionally checks the space id is -right in it. If does not succeed, prints an error message to the .err log. This -function is used to open a tablespace when we start up mysqld, and also in -IMPORT TABLESPACE. -NOTE that we assume this operation is used either at the database startup -or under the protection of the dictionary mutex, so that two users cannot -race here. This operation does not leave the file associated with the -tablespace open, but closes it after we have looked at the space id in it. */ - -ibool -fil_open_single_table_tablespace( -/*=============================*/ - /* out: TRUE if success */ - ibool check_space_id, /* in: should we check that the space - id in the file is right; we assume - that this function runs much faster - if no check is made, since accessing - the file inode probably is much - faster (the OS caches them) than - accessing the first page of the file */ - ulint id, /* in: space id */ - const char* name) /* in: table name in the - databasename/tablename format */ -{ - os_file_t file; - char* filepath; - ibool success; - byte* buf2; - byte* page; - ulint space_id; - ibool ret = TRUE; - - filepath = fil_make_ibd_name(name, FALSE); - - file = os_file_create_simple_no_error_handling( - filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: trying to open a table," - " but could not\n" - "InnoDB: open the tablespace file ", stderr); - ut_print_filename(stderr, filepath); - fputs("!\n" - "InnoDB: Have you moved InnoDB .ibd files around" - " without using the\n" - "InnoDB: commands DISCARD TABLESPACE and" - " IMPORT TABLESPACE?\n" - "InnoDB: It is also possible that this is" - " a temporary table #sql...,\n" - "InnoDB: and MySQL removed the .ibd file for this.\n" - "InnoDB: Please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: for how to resolve the issue.\n", stderr); - - mem_free(filepath); - - return(FALSE); - } - - if (!check_space_id) { - space_id = id; - - goto skip_check; - } - - /* Read the first page of the tablespace */ - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); - - /* We have to read the tablespace id from the file */ - - space_id = fsp_header_get_space_id(page); - - ut_free(buf2); - - if (space_id != id) { - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: tablespace id in file ", stderr); - ut_print_filename(stderr, filepath); - fprintf(stderr, " is %lu, but in the InnoDB\n" - "InnoDB: data dictionary it is %lu.\n" - "InnoDB: Have you moved InnoDB .ibd files" - " around without using the\n" - "InnoDB: commands DISCARD TABLESPACE and" - " IMPORT TABLESPACE?\n" - "InnoDB: Please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: for how to resolve the issue.\n", - (ulong) space_id, (ulong) id); - - ret = FALSE; - - goto func_exit; - } - -skip_check: - success = fil_space_create(filepath, space_id, FIL_TABLESPACE); - - if (!success) { - goto func_exit; - } - - /* We do not measure the size of the file, that is why we pass the 0 - below */ - - fil_node_create(filepath, 0, space_id, FALSE); -func_exit: - os_file_close(file); - mem_free(filepath); - - return(ret); -} - -#ifdef UNIV_HOTBACKUP -/*********************************************************************** -Allocates a file name for an old version of a single-table tablespace. -The string must be freed by caller with mem_free()! */ -static -char* -fil_make_ibbackup_old_name( -/*=======================*/ - /* out, own: file name */ - const char* name) /* in: original file name */ -{ - static const char suffix[] = "_ibbackup_old_vers_"; - ulint len = strlen(name); - char* path = mem_alloc(len + (15 + sizeof suffix)); - - memcpy(path, name, len); - memcpy(path + len, suffix, (sizeof suffix) - 1); - ut_sprintf_timestamp_without_extra_chars(path + len + sizeof suffix); - return(path); -} -#endif /* UNIV_HOTBACKUP */ - -/************************************************************************ -Opens an .ibd file and adds the associated single-table tablespace to the -InnoDB fil0fil.c data structures. */ -static -void -fil_load_single_table_tablespace( -/*=============================*/ - const char* dbname, /* in: database name */ - const char* filename) /* in: file name (not a path), - including the .ibd extension */ -{ - os_file_t file; - char* filepath; - ibool success; - byte* buf2; - byte* page; - ulint space_id; - ulint size_low; - ulint size_high; - ib_longlong size; -#ifdef UNIV_HOTBACKUP - fil_space_t* space; -#endif - filepath = mem_alloc(strlen(dbname) + strlen(filename) - + strlen(fil_path_to_mysql_datadir) + 3); - - sprintf(filepath, "%s/%s/%s", fil_path_to_mysql_datadir, dbname, - filename); - srv_normalize_path_for_win(filepath); -#ifdef __WIN__ -# ifndef UNIV_HOTBACKUP - /* If lower_case_table_names is 0 or 2, then MySQL allows database - directory names with upper case letters. On Windows, all table and - database names in InnoDB are internally always in lower case. Put the - file path to lower case, so that we are consistent with InnoDB's - internal data dictionary. */ - - dict_casedn_str(filepath); -# endif /* !UNIV_HOTBACKUP */ -#endif - file = os_file_create_simple_no_error_handling( - filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success); - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - - fprintf(stderr, - "InnoDB: Error: could not open single-table tablespace" - " file\n" - "InnoDB: %s!\n" - "InnoDB: We do not continue the crash recovery," - " because the table may become\n" - "InnoDB: corrupt if we cannot apply the log records" - " in the InnoDB log to it.\n" - "InnoDB: To fix the problem and start mysqld:\n" - "InnoDB: 1) If there is a permission problem" - " in the file and mysqld cannot\n" - "InnoDB: open the file, you should" - " modify the permissions.\n" - "InnoDB: 2) If the table is not needed, or you can" - " restore it from a backup,\n" - "InnoDB: then you can remove the .ibd file," - " and InnoDB will do a normal\n" - "InnoDB: crash recovery and ignore that table.\n" - "InnoDB: 3) If the file system or the" - " disk is broken, and you cannot remove\n" - "InnoDB: the .ibd file, you can set" - " innodb_force_recovery > 0 in my.cnf\n" - "InnoDB: and force InnoDB to continue crash" - " recovery here.\n", filepath); - - mem_free(filepath); - - if (srv_force_recovery > 0) { - fprintf(stderr, - "InnoDB: innodb_force_recovery" - " was set to %lu. Continuing crash recovery\n" - "InnoDB: even though we cannot access" - " the .ibd file of this table.\n", - srv_force_recovery); - return; - } - - exit(1); - } - - success = os_file_get_size(file, &size_low, &size_high); - - if (!success) { - /* The following call prints an error message */ - os_file_get_last_error(TRUE); - - fprintf(stderr, - "InnoDB: Error: could not measure the size" - " of single-table tablespace file\n" - "InnoDB: %s!\n" - "InnoDB: We do not continue crash recovery," - " because the table will become\n" - "InnoDB: corrupt if we cannot apply the log records" - " in the InnoDB log to it.\n" - "InnoDB: To fix the problem and start mysqld:\n" - "InnoDB: 1) If there is a permission problem" - " in the file and mysqld cannot\n" - "InnoDB: access the file, you should" - " modify the permissions.\n" - "InnoDB: 2) If the table is not needed," - " or you can restore it from a backup,\n" - "InnoDB: then you can remove the .ibd file," - " and InnoDB will do a normal\n" - "InnoDB: crash recovery and ignore that table.\n" - "InnoDB: 3) If the file system or the disk is broken," - " and you cannot remove\n" - "InnoDB: the .ibd file, you can set" - " innodb_force_recovery > 0 in my.cnf\n" - "InnoDB: and force InnoDB to continue" - " crash recovery here.\n", filepath); - - os_file_close(file); - mem_free(filepath); - - if (srv_force_recovery > 0) { - fprintf(stderr, - "InnoDB: innodb_force_recovery" - " was set to %lu. Continuing crash recovery\n" - "InnoDB: even though we cannot access" - " the .ibd file of this table.\n", - srv_force_recovery); - return; - } - - exit(1); - } - - /* TODO: What to do in other cases where we cannot access an .ibd - file during a crash recovery? */ - - /* Every .ibd file is created >= 4 pages in size. Smaller files - cannot be ok. */ - - size = (((ib_longlong)size_high) << 32) + (ib_longlong)size_low; -#ifndef UNIV_HOTBACKUP - if (size < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Error: the size of single-table tablespace" - " file %s\n" - "InnoDB: is only %lu %lu, should be at least %lu!", - filepath, - (ulong) size_high, - (ulong) size_low, (ulong) (4 * UNIV_PAGE_SIZE)); - os_file_close(file); - mem_free(filepath); - - return; - } -#endif - /* Read the first page of the tablespace if the size big enough */ - - buf2 = ut_malloc(2 * UNIV_PAGE_SIZE); - /* Align the memory for file i/o if we might have O_DIRECT set */ - page = ut_align(buf2, UNIV_PAGE_SIZE); - - if (size >= FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) { - success = os_file_read(file, page, 0, 0, UNIV_PAGE_SIZE); - - /* We have to read the tablespace id from the file */ - - space_id = fsp_header_get_space_id(page); - } else { - space_id = ULINT_UNDEFINED; - } - -#ifndef UNIV_HOTBACKUP - if (space_id == ULINT_UNDEFINED || space_id == 0) { - fprintf(stderr, - "InnoDB: Error: tablespace id %lu in file %s" - " is not sensible\n", - (ulong) space_id, - filepath); - goto func_exit; - } -#else - if (space_id == ULINT_UNDEFINED || space_id == 0) { - char* new_path; - - fprintf(stderr, - "InnoDB: Renaming tablespace %s of id %lu,\n" - "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" - "InnoDB: because its size %lld is too small" - " (< 4 pages 16 kB each),\n" - "InnoDB: or the space id in the file header" - " is not sensible.\n" - "InnoDB: This can happen in an ibbackup run," - " and is not dangerous.\n", - filepath, space_id, filepath, size); - os_file_close(file); - - new_path = fil_make_ibbackup_old_name(filepath); - ut_a(os_file_rename(filepath, new_path)); - - ut_free(buf2); - mem_free(filepath); - mem_free(new_path); - - return; - } - - /* A backup may contain the same space several times, if the space got - renamed at a sensitive time. Since it is enough to have one version of - the space, we rename the file if a space with the same space id - already exists in the tablespace memory cache. We rather rename the - file than delete it, because if there is a bug, we do not want to - destroy valuable data. */ - - mutex_enter(&(fil_system->mutex)); - - space = fil_get_space_for_id_low(space_id); - - if (space) { - char* new_path; - - fprintf(stderr, - "InnoDB: Renaming tablespace %s of id %lu,\n" - "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n" - "InnoDB: because space %s with the same id\n" - "InnoDB: was scanned earlier. This can happen" - " if you have renamed tables\n" - "InnoDB: during an ibbackup run.\n", - filepath, space_id, filepath, - space->name); - os_file_close(file); - - new_path = fil_make_ibbackup_old_name(filepath); - - mutex_exit(&(fil_system->mutex)); - - ut_a(os_file_rename(filepath, new_path)); - - ut_free(buf2); - mem_free(filepath); - mem_free(new_path); - - return; - } - mutex_exit(&(fil_system->mutex)); -#endif - success = fil_space_create(filepath, space_id, FIL_TABLESPACE); - - if (!success) { - - goto func_exit; - } - - /* We do not use the size information we have about the file, because - the rounding formula for extents and pages is somewhat complex; we - let fil_node_open() do that task. */ - - fil_node_create(filepath, 0, space_id, FALSE); -func_exit: - os_file_close(file); - ut_free(buf2); - mem_free(filepath); -} - -/*************************************************************************** -A fault-tolerant function that tries to read the next file name in the -directory. We retry 100 times if os_file_readdir_next_file() returns -1. The -idea is to read as much good data as we can and jump over bad data. */ -static -int -fil_file_readdir_next_file( -/*=======================*/ - /* out: 0 if ok, -1 if error even after the - retries, 1 if at the end of the directory */ - ulint* err, /* out: this is set to DB_ERROR if an error - was encountered, otherwise not changed */ - const char* dirname,/* in: directory name or path */ - os_file_dir_t dir, /* in: directory stream */ - os_file_stat_t* info) /* in/out: buffer where the info is returned */ -{ - ulint i; - int ret; - - for (i = 0; i < 100; i++) { - ret = os_file_readdir_next_file(dirname, dir, info); - - if (ret != -1) { - - return(ret); - } - - fprintf(stderr, - "InnoDB: Error: os_file_readdir_next_file()" - " returned -1 in\n" - "InnoDB: directory %s\n" - "InnoDB: Crash recovery may have failed" - " for some .ibd files!\n", dirname); - - *err = DB_ERROR; - } - - return(-1); -} - -/************************************************************************ -At the server startup, if we need crash recovery, scans the database -directories under the MySQL datadir, looking for .ibd files. Those files are -single-table tablespaces. We need to know the space id in each of them so that -we know into which file we should look to check the contents of a page stored -in the doublewrite buffer, also to know where to apply log records where the -space id is != 0. */ - -ulint -fil_load_single_table_tablespaces(void) -/*===================================*/ - /* out: DB_SUCCESS or error number */ -{ - int ret; - char* dbpath = NULL; - ulint dbpath_len = 100; - os_file_dir_t dir; - os_file_dir_t dbdir; - os_file_stat_t dbinfo; - os_file_stat_t fileinfo; - ulint err = DB_SUCCESS; - - /* The datadir of MySQL is always the default directory of mysqld */ - - dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE); - - if (dir == NULL) { - - return(DB_ERROR); - } - - dbpath = mem_alloc(dbpath_len); - - /* Scan all directories under the datadir. They are the database - directories of MySQL. */ - - ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir, - &dbinfo); - while (ret == 0) { - ulint len; - /* printf("Looking at %s in datadir\n", dbinfo.name); */ - - if (dbinfo.type == OS_FILE_TYPE_FILE - || dbinfo.type == OS_FILE_TYPE_UNKNOWN) { - - goto next_datadir_item; - } - - /* We found a symlink or a directory; try opening it to see - if a symlink is a directory */ - - len = strlen(fil_path_to_mysql_datadir) - + strlen (dbinfo.name) + 2; - if (len > dbpath_len) { - dbpath_len = len; - - if (dbpath) { - mem_free(dbpath); - } - - dbpath = mem_alloc(dbpath_len); - } - sprintf(dbpath, "%s/%s", fil_path_to_mysql_datadir, - dbinfo.name); - srv_normalize_path_for_win(dbpath); - - dbdir = os_file_opendir(dbpath, FALSE); - - if (dbdir != NULL) { - /* printf("Opened dir %s\n", dbinfo.name); */ - - /* We found a database directory; loop through it, - looking for possible .ibd files in it */ - - ret = fil_file_readdir_next_file(&err, dbpath, dbdir, - &fileinfo); - while (ret == 0) { - /* printf( - " Looking at file %s\n", fileinfo.name); */ - - if (fileinfo.type == OS_FILE_TYPE_DIR) { - - goto next_file_item; - } - - /* We found a symlink or a file */ - if (strlen(fileinfo.name) > 4 - && 0 == strcmp(fileinfo.name - + strlen(fileinfo.name) - 4, - ".ibd")) { - /* The name ends in .ibd; try opening - the file */ - fil_load_single_table_tablespace( - dbinfo.name, fileinfo.name); - } -next_file_item: - ret = fil_file_readdir_next_file(&err, - dbpath, dbdir, - &fileinfo); - } - - if (0 != os_file_closedir(dbdir)) { - fputs("InnoDB: Warning: could not" - " close database directory ", stderr); - ut_print_filename(stderr, dbpath); - putc('\n', stderr); - - err = DB_ERROR; - } - } - -next_datadir_item: - ret = fil_file_readdir_next_file(&err, - fil_path_to_mysql_datadir, - dir, &dbinfo); - } - - mem_free(dbpath); - - if (0 != os_file_closedir(dir)) { - fprintf(stderr, - "InnoDB: Error: could not close MySQL datadir\n"); - - return(DB_ERROR); - } - - return(err); -} - -/************************************************************************ -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ - -void -fil_print_orphaned_tablespaces(void) -/*================================*/ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - mutex_enter(&(system->mutex)); - - space = UT_LIST_GET_FIRST(system->space_list); - - while (space) { - if (space->purpose == FIL_TABLESPACE && space->id != 0 - && !space->mark) { - fputs("InnoDB: Warning: tablespace ", stderr); - ut_print_filename(stderr, space->name); - fprintf(stderr, " of id %lu has no matching table in\n" - "InnoDB: the InnoDB data dictionary.\n", - (ulong) space->id); - } - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&(system->mutex)); -} - -/*********************************************************************** -Returns TRUE if a single-table tablespace does not exist in the memory cache, -or is being deleted there. */ - -ibool -fil_tablespace_deleted_or_being_deleted_in_mem( -/*===========================================*/ - /* out: TRUE if does not exist or is being\ - deleted */ - ulint id, /* in: space id */ - ib_longlong version)/* in: tablespace_version should be this; if - you pass -1 as the value of this, then this - parameter is ignored */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL || space->is_being_deleted) { - mutex_exit(&(system->mutex)); - - return(TRUE); - } - - if (version != ((ib_longlong)-1) - && space->tablespace_version != version) { - mutex_exit(&(system->mutex)); - - return(TRUE); - } - - mutex_exit(&(system->mutex)); - - return(FALSE); -} - -/*********************************************************************** -Returns TRUE if a single-table tablespace exists in the memory cache. */ - -ibool -fil_tablespace_exists_in_mem( -/*=========================*/ - /* out: TRUE if exists */ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - if (space == NULL) { - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - mutex_exit(&(system->mutex)); - - return(TRUE); -} - -/*********************************************************************** -Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory -cache. Note that if we have not done a crash recovery at the database startup, -there may be many tablespaces which are not yet in the memory cache. */ - -ibool -fil_space_for_table_exists_in_mem( -/*==============================*/ - /* out: TRUE if a matching tablespace - exists in the memory cache */ - ulint id, /* in: space id */ - const char* name, /* in: table name in the standard - 'databasename/tablename' format or - the dir path to a temp table */ - ibool is_temp, /* in: TRUE if created with CREATE - TEMPORARY TABLE */ - ibool mark_space, /* in: in crash recovery, at database - startup we mark all spaces which have - an associated table in the InnoDB - data dictionary, so that - we can print a warning about orphaned - tablespaces */ - ibool print_error_if_does_not_exist) - /* in: print detailed error - information to the .err log if a - matching tablespace is not found from - memory */ -{ - fil_system_t* system = fil_system; - fil_space_t* namespace; - fil_space_t* space; - char* path; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - path = fil_make_ibd_name(name, is_temp); - - /* Look if there is a space with the same id */ - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - /* Look if there is a space with the same name; the name is the - directory path from the datadir to the file */ - - HASH_SEARCH(name_hash, system->name_hash, - ut_fold_string(path), namespace, - 0 == strcmp(namespace->name, path)); - if (space && space == namespace) { - /* Found */ - - if (mark_space) { - space->mark = TRUE; - } - - mem_free(path); - mutex_exit(&(system->mutex)); - - return(TRUE); - } - - if (!print_error_if_does_not_exist) { - - mem_free(path); - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - if (space == NULL) { - if (namespace == NULL) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary" - " has tablespace id %lu,\n" - "InnoDB: but tablespace with that id" - " or name does not exist. Have\n" - "InnoDB: you deleted or moved .ibd files?\n" - "InnoDB: This may also be a table created with" - " CREATE TEMPORARY TABLE\n" - "InnoDB: whose .ibd and .frm files" - " MySQL automatically removed, but the\n" - "InnoDB: table still exists in the" - " InnoDB internal data dictionary.\n", - (ulong) id); - } else { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary has" - " tablespace id %lu,\n" - "InnoDB: but a tablespace with that id" - " does not exist. There is\n" - "InnoDB: a tablespace of name %s and id %lu," - " though. Have\n" - "InnoDB: you deleted or moved .ibd files?\n", - (ulong) id, namespace->name, - (ulong) namespace->id); - } -error_exit: - fputs("InnoDB: Please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: for how to resolve the issue.\n", stderr); - - mem_free(path); - mutex_exit(&(system->mutex)); - - return(FALSE); - } - - if (0 != strcmp(space->name, path)) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_filename(stderr, name); - fprintf(stderr, "\n" - "InnoDB: in InnoDB data dictionary has" - " tablespace id %lu,\n" - "InnoDB: but the tablespace with that id" - " has name %s.\n" - "InnoDB: Have you deleted or moved .ibd files?\n", - (ulong) id, space->name); - - if (namespace != NULL) { - fputs("InnoDB: There is a tablespace" - " with the right name\n" - "InnoDB: ", stderr); - ut_print_filename(stderr, namespace->name); - fprintf(stderr, ", but its id is %lu.\n", - (ulong) namespace->id); - } - - goto error_exit; - } - - mem_free(path); - mutex_exit(&(system->mutex)); - - return(FALSE); -} - -/*********************************************************************** -Checks if a single-table tablespace for a given table name exists in the -tablespace memory cache. */ -static -ulint -fil_get_space_id_for_table( -/*=======================*/ - /* out: space id, ULINT_UNDEFINED if not - found */ - const char* name) /* in: table name in the standard - 'databasename/tablename' format */ -{ - fil_system_t* system = fil_system; - fil_space_t* namespace; - ulint id = ULINT_UNDEFINED; - char* path; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - path = fil_make_ibd_name(name, FALSE); - - /* Look if there is a space with the same name; the name is the - directory path to the file */ - - HASH_SEARCH(name_hash, system->name_hash, - ut_fold_string(path), namespace, - 0 == strcmp(namespace->name, path)); - if (namespace) { - id = namespace->id; - } - - mem_free(path); - - mutex_exit(&(system->mutex)); - - return(id); -} - -/************************************************************************** -Tries to extend a data file so that it would accommodate the number of pages -given. The tablespace must be cached in the memory cache. If the space is big -enough already, does nothing. */ - -ibool -fil_extend_space_to_desired_size( -/*=============================*/ - /* out: TRUE if success */ - ulint* actual_size, /* out: size of the space after extension; - if we ran out of disk space this may be lower - than the desired size */ - ulint space_id, /* in: space id */ - ulint size_after_extend)/* in: desired size in pages after the - extension; if the current space size is bigger - than this already, the function does nothing */ -{ - fil_system_t* system = fil_system; - fil_node_t* node; - fil_space_t* space; - byte* buf2; - byte* buf; - ulint buf_size; - ulint start_page_no; - ulint file_start_page_no; - ulint offset_high; - ulint offset_low; - ibool success = TRUE; - - fil_mutex_enter_and_prepare_for_io(space_id); - - HASH_SEARCH(hash, system->spaces, space_id, space, - space->id == space_id); - ut_a(space); - - if (space->size >= size_after_extend) { - /* Space already big enough */ - - *actual_size = space->size; - - mutex_exit(&(system->mutex)); - - return(TRUE); - } - - node = UT_LIST_GET_LAST(space->chain); - - fil_node_prepare_for_io(node, system, space); - - start_page_no = space->size; - file_start_page_no = space->size - node->size; - - /* Extend at most 64 pages at a time */ - buf_size = ut_min(64, size_after_extend - start_page_no) - * UNIV_PAGE_SIZE; - buf2 = mem_alloc(buf_size + UNIV_PAGE_SIZE); - buf = ut_align(buf2, UNIV_PAGE_SIZE); - - memset(buf, 0, buf_size); - - while (start_page_no < size_after_extend) { - ulint n_pages = ut_min(buf_size / UNIV_PAGE_SIZE, - size_after_extend - start_page_no); - - offset_high = (start_page_no - file_start_page_no) - / (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE)); - offset_low = ((start_page_no - file_start_page_no) - % (4096 * ((1024 * 1024) / UNIV_PAGE_SIZE))) - * UNIV_PAGE_SIZE; -#ifdef UNIV_HOTBACKUP - success = os_file_write(node->name, node->handle, buf, - offset_low, offset_high, - UNIV_PAGE_SIZE * n_pages); -#else - success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC, - node->name, node->handle, buf, - offset_low, offset_high, - UNIV_PAGE_SIZE * n_pages, - NULL, NULL); -#endif - if (success) { - node->size += n_pages; - space->size += n_pages; - - os_has_said_disk_full = FALSE; - } else { - /* Let us measure the size of the file to determine - how much we were able to extend it */ - - n_pages = ((ulint) - (os_file_get_size_as_iblonglong - (node->handle) - / UNIV_PAGE_SIZE)) - node->size; - - node->size += n_pages; - space->size += n_pages; - - break; - } - - start_page_no += n_pages; - } - - mem_free(buf2); - - fil_node_complete_io(node, system, OS_FILE_WRITE); - - *actual_size = space->size; - -#ifndef UNIV_HOTBACKUP - if (space_id == 0) { - ulint pages_per_mb = (1024 * 1024) / UNIV_PAGE_SIZE; - - /* Keep the last data file size info up to date, rounded to - full megabytes */ - - srv_data_file_sizes[srv_n_data_files - 1] - = (node->size / pages_per_mb) * pages_per_mb; - } -#endif /* !UNIV_HOTBACKUP */ - - /* - printf("Extended %s to %lu, actual size %lu pages\n", space->name, - size_after_extend, *actual_size); */ - mutex_exit(&(system->mutex)); - - fil_flush(space_id); - - return(success); -} - -#ifdef UNIV_HOTBACKUP -/************************************************************************ -Extends all tablespaces to the size stored in the space header. During the -ibbackup --apply-log phase we extended the spaces on-demand so that log records -could be applied, but that may have left spaces still too small compared to -the size stored in the space header. */ - -void -fil_extend_tablespaces_to_stored_len(void) -/*======================================*/ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - byte* buf; - ulint actual_size; - ulint size_in_header; - ulint error; - ibool success; - - buf = mem_alloc(UNIV_PAGE_SIZE); - - mutex_enter(&(system->mutex)); - - space = UT_LIST_GET_FIRST(system->space_list); - - while (space) { - ut_a(space->purpose == FIL_TABLESPACE); - - mutex_exit(&(system->mutex)); /* no need to protect with a - mutex, because this is a - single-threaded operation */ - error = fil_read(TRUE, space->id, 0, 0, UNIV_PAGE_SIZE, buf, - NULL); - ut_a(error == DB_SUCCESS); - - size_in_header = fsp_get_size_low(buf); - - success = fil_extend_space_to_desired_size( - &actual_size, space->id, size_in_header); - if (!success) { - fprintf(stderr, - "InnoDB: Error: could not extend the" - " tablespace of %s\n" - "InnoDB: to the size stored in header," - " %lu pages;\n" - "InnoDB: size after extension %lu pages\n" - "InnoDB: Check that you have free disk space" - " and retry!\n", - space->name, size_in_header, actual_size); - exit(1); - } - - mutex_enter(&(system->mutex)); - - space = UT_LIST_GET_NEXT(space_list, space); - } - - mutex_exit(&(system->mutex)); - - mem_free(buf); -} -#endif - -/*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/ - -/*********************************************************************** -Tries to reserve free extents in a file space. */ - -ibool -fil_space_reserve_free_extents( -/*===========================*/ - /* out: TRUE if succeed */ - ulint id, /* in: space id */ - ulint n_free_now, /* in: number of free extents now */ - ulint n_to_reserve) /* in: how many one wants to reserve */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ibool success; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - - if (space->n_reserved_extents + n_to_reserve > n_free_now) { - success = FALSE; - } else { - space->n_reserved_extents += n_to_reserve; - success = TRUE; - } - - mutex_exit(&(system->mutex)); - - return(success); -} - -/*********************************************************************** -Releases free extents in a file space. */ - -void -fil_space_release_free_extents( -/*===========================*/ - ulint id, /* in: space id */ - ulint n_reserved) /* in: how many one reserved */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - ut_a(space->n_reserved_extents >= n_reserved); - - space->n_reserved_extents -= n_reserved; - - mutex_exit(&(system->mutex)); -} - -/*********************************************************************** -Gets the number of reserved extents. If the database is silent, this number -should be zero. */ - -ulint -fil_space_get_n_reserved_extents( -/*=============================*/ - ulint id) /* in: space id */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ulint n; - - ut_ad(system); - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, id, space, space->id == id); - - ut_a(space); - - n = space->n_reserved_extents; - - mutex_exit(&(system->mutex)); - - return(n); -} - -/*============================ FILE I/O ================================*/ - -/************************************************************************ -NOTE: you must call fil_mutex_enter_and_prepare_for_io() first! - -Prepares a file node for i/o. Opens the file if it is closed. Updates the -pending i/o's field in the node and the system appropriately. Takes the node -off the LRU list if it is in the LRU list. The caller must hold the fil_sys -mutex. */ -static -void -fil_node_prepare_for_io( -/*====================*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - fil_space_t* space) /* in: space */ -{ - ut_ad(node && system && space); - ut_ad(mutex_own(&(system->mutex))); - - if (system->n_open > system->max_n_open + 5) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: open files %lu" - " exceeds the limit %lu\n", - (ulong) system->n_open, - (ulong) system->max_n_open); - } - - if (node->open == FALSE) { - /* File is closed: open it */ - ut_a(node->n_pending == 0); - - fil_node_open_file(node, system, space); - } - - if (node->n_pending == 0 && space->purpose == FIL_TABLESPACE - && space->id != 0) { - /* The node is in the LRU list, remove it */ - - ut_a(UT_LIST_GET_LEN(system->LRU) > 0); - - UT_LIST_REMOVE(LRU, system->LRU, node); - } - - node->n_pending++; -} - -/************************************************************************ -Updates the data structures when an i/o operation finishes. Updates the -pending i/o's field in the node appropriately. */ -static -void -fil_node_complete_io( -/*=================*/ - fil_node_t* node, /* in: file node */ - fil_system_t* system, /* in: tablespace memory cache */ - ulint type) /* in: OS_FILE_WRITE or OS_FILE_READ; marks - the node as modified if - type == OS_FILE_WRITE */ -{ - ut_ad(node); - ut_ad(system); - ut_ad(mutex_own(&(system->mutex))); - - ut_a(node->n_pending > 0); - - node->n_pending--; - - if (type == OS_FILE_WRITE) { - system->modification_counter++; - node->modification_counter = system->modification_counter; - - if (!node->space->is_in_unflushed_spaces) { - - node->space->is_in_unflushed_spaces = TRUE; - UT_LIST_ADD_FIRST(unflushed_spaces, - system->unflushed_spaces, - node->space); - } - } - - if (node->n_pending == 0 && node->space->purpose == FIL_TABLESPACE - && node->space->id != 0) { - /* The node must be put back to the LRU list */ - UT_LIST_ADD_FIRST(LRU, system->LRU, node); - } -} - -/************************************************************************ -Report information about an invalid page access. */ -static -void -fil_report_invalid_page_access( -/*===========================*/ - ulint block_offset, /* in: block offset */ - ulint space_id, /* in: space id */ - const char* space_name, /* in: space name */ - ulint byte_offset, /* in: byte offset */ - ulint len, /* in: I/O length */ - ulint type) /* in: I/O type */ -{ - fprintf(stderr, - "InnoDB: Error: trying to access page number %lu" - " in space %lu,\n" - "InnoDB: space name %s,\n" - "InnoDB: which is outside the tablespace bounds.\n" - "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n" - "InnoDB: If you get this error at mysqld startup," - " please check that\n" - "InnoDB: your my.cnf matches the ibdata files" - " that you have in the\n" - "InnoDB: MySQL server.\n", - (ulong) block_offset, (ulong) space_id, space_name, - (ulong) byte_offset, (ulong) len, (ulong) type); -} - -/************************************************************************ -Reads or writes data. This operation is asynchronous (aio). */ - -ulint -fil_io( -/*===*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE, - ORed to OS_FILE_LOG, if a log i/o - and ORed to OS_AIO_SIMULATED_WAKE_LATER - if simulated aio and we want to post a - batch of i/os; NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len, /* in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ - void* buf, /* in/out: buffer where to store read data - or from where to write; in aio this must be - appropriately aligned */ - void* message) /* in: message for aio handler if non-sync - aio used, else ignored */ -{ - fil_system_t* system = fil_system; - ulint mode; - fil_space_t* space; - fil_node_t* node; - ulint offset_high; - ulint offset_low; - ibool ret; - ulint is_log; - ulint wake_later; - - is_log = type & OS_FILE_LOG; - type = type & ~OS_FILE_LOG; - - wake_later = type & OS_AIO_SIMULATED_WAKE_LATER; - type = type & ~OS_AIO_SIMULATED_WAKE_LATER; - - ut_ad(byte_offset < UNIV_PAGE_SIZE); - ut_ad(buf); - ut_ad(len > 0); - ut_a((1 << UNIV_PAGE_SIZE_SHIFT) == UNIV_PAGE_SIZE); - ut_ad(fil_validate()); -#ifndef UNIV_LOG_DEBUG - /* ibuf bitmap pages must be read in the sync aio mode: */ - ut_ad(recv_no_ibuf_operations || (type == OS_FILE_WRITE) - || !ibuf_bitmap_page(block_offset) || sync || is_log); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!ibuf_inside() || is_log || (type == OS_FILE_WRITE) - || ibuf_page(space_id, block_offset)); -#endif -#endif - if (sync) { - mode = OS_AIO_SYNC; - } else if (type == OS_FILE_READ && !is_log - && ibuf_page(space_id, block_offset)) { - mode = OS_AIO_IBUF; - } else if (is_log) { - mode = OS_AIO_LOG; - } else { - mode = OS_AIO_NORMAL; - } - - if (type == OS_FILE_READ) { - srv_data_read+= len; - } else if (type == OS_FILE_WRITE) { - srv_data_written+= len; - } - - /* Reserve the fil_system mutex and make sure that we can open at - least one file while holding it, if the file is not already open */ - - fil_mutex_enter_and_prepare_for_io(space_id); - - HASH_SEARCH(hash, system->spaces, space_id, space, - space->id == space_id); - if (!space) { - mutex_exit(&(system->mutex)); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: trying to do i/o" - " to a tablespace which does not exist.\n" - "InnoDB: i/o type %lu, space id %lu," - " page no. %lu, i/o length %lu bytes\n", - (ulong) type, (ulong) space_id, (ulong) block_offset, - (ulong) len); - - return(DB_TABLESPACE_DELETED); - } - - ut_ad((mode != OS_AIO_IBUF) || (space->purpose == FIL_TABLESPACE)); - - node = UT_LIST_GET_FIRST(space->chain); - - for (;;) { - if (node == NULL) { - fil_report_invalid_page_access( - block_offset, space_id, space->name, - byte_offset, len, type); - - ut_error; - } - - if (space->id != 0 && node->size == 0) { - /* We do not know the size of a single-table tablespace - before we open the file */ - - break; - } - - if (node->size > block_offset) { - /* Found! */ - break; - } else { - block_offset -= node->size; - node = UT_LIST_GET_NEXT(chain, node); - } - } - - /* Open file if closed */ - fil_node_prepare_for_io(node, system, space); - - /* Check that at least the start offset is within the bounds of a - single-table tablespace */ - if (space->purpose == FIL_TABLESPACE && space->id != 0 - && node->size <= block_offset) { - - fil_report_invalid_page_access( - block_offset, space_id, space->name, byte_offset, - len, type); - - ut_error; - } - - /* Now we have made the changes in the data structures of system */ - mutex_exit(&(system->mutex)); - - /* Calculate the low 32 bits and the high 32 bits of the file offset */ - - offset_high = (block_offset >> (32 - UNIV_PAGE_SIZE_SHIFT)); - offset_low = ((block_offset << UNIV_PAGE_SIZE_SHIFT) & 0xFFFFFFFFUL) - + byte_offset; - - ut_a(node->size - block_offset - >= (byte_offset + len + (UNIV_PAGE_SIZE - 1)) / UNIV_PAGE_SIZE); - - /* Do aio */ - - ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0); - -#ifdef UNIV_HOTBACKUP - /* In ibbackup do normal i/o, not aio */ - if (type == OS_FILE_READ) { - ret = os_file_read(node->handle, buf, offset_low, offset_high, - len); - } else { - ret = os_file_write(node->name, node->handle, buf, - offset_low, offset_high, len); - } -#else - /* Queue the aio request */ - ret = os_aio(type, mode | wake_later, node->name, node->handle, buf, - offset_low, offset_high, len, node, message); -#endif - ut_a(ret); - - if (mode == OS_AIO_SYNC) { - /* The i/o operation is already completed when we return from - os_aio: */ - - mutex_enter(&(system->mutex)); - - fil_node_complete_io(node, system, type); - - mutex_exit(&(system->mutex)); - - ut_ad(fil_validate()); - } - - return(DB_SUCCESS); -} - -/************************************************************************ -Reads data from a space to a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. */ - -ulint -fil_read( -/*=====*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /* in: how many bytes to read; this must not - cross a file boundary; in aio this must be a - block size multiple */ - void* buf, /* in/out: buffer where to store data read; - in aio this must be appropriately aligned */ - void* message) /* in: message for aio handler if non-sync - aio used, else ignored */ -{ - return(fil_io(OS_FILE_READ, sync, space_id, block_offset, - byte_offset, len, buf, message)); -} - -/************************************************************************ -Writes data to a space from a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. */ - -ulint -fil_write( -/*======*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /* in: how many bytes to write; this must - not cross a file boundary; in aio this must - be a block size multiple */ - void* buf, /* in: buffer from which to write; in aio - this must be appropriately aligned */ - void* message) /* in: message for aio handler if non-sync - aio used, else ignored */ -{ - return(fil_io(OS_FILE_WRITE, sync, space_id, block_offset, - byte_offset, len, buf, message)); -} - -/************************************************************************** -Waits for an aio operation to complete. This function is used to write the -handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.c for more info). The thread specifies which -segment it wants to wait for. */ - -void -fil_aio_wait( -/*=========*/ - ulint segment) /* in: the number of the segment in the aio - array to wait for */ -{ - fil_system_t* system = fil_system; - ibool ret; - fil_node_t* fil_node; - void* message; - ulint type; - - ut_ad(fil_validate()); - - if (os_aio_use_native_aio) { - srv_set_io_thread_op_info(segment, "native aio handle"); -#ifdef WIN_ASYNC_IO - ret = os_aio_windows_handle(segment, 0, &fil_node, - &message, &type); -#elif defined(POSIX_ASYNC_IO) - ret = os_aio_posix_handle(segment, &fil_node, &message); -#else - ret = 0; /* Eliminate compiler warning */ - ut_error; -#endif - } else { - srv_set_io_thread_op_info(segment, "simulated aio handle"); - - ret = os_aio_simulated_handle(segment, &fil_node, - &message, &type); - } - - ut_a(ret); - - srv_set_io_thread_op_info(segment, "complete io for fil node"); - - mutex_enter(&(system->mutex)); - - fil_node_complete_io(fil_node, fil_system, type); - - mutex_exit(&(system->mutex)); - - ut_ad(fil_validate()); - - /* Do the i/o handling */ - /* IMPORTANT: since i/o handling for reads will read also the insert - buffer in tablespace 0, you have to be very careful not to introduce - deadlocks in the i/o system. We keep tablespace 0 data files always - open, and use a special i/o thread to serve insert buffer requests. */ - - if (buf_pool_is_block(message)) { - srv_set_io_thread_op_info(segment, "complete io for buf page"); - buf_page_io_complete(message); - } else { - srv_set_io_thread_op_info(segment, "complete io for log"); - log_io_complete(message); - } -} - -/************************************************************************** -Flushes to disk possible writes cached by the OS. If the space does not exist -or is being dropped, does not do anything. */ - -void -fil_flush( -/*======*/ - ulint space_id) /* in: file space id (this can be a group of - log files or a tablespace of the database) */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - fil_node_t* node; - os_file_t file; - ib_longlong old_mod_counter; - - mutex_enter(&(system->mutex)); - - HASH_SEARCH(hash, system->spaces, space_id, space, - space->id == space_id); - if (!space || space->is_being_deleted) { - mutex_exit(&(system->mutex)); - - return; - } - - space->n_pending_flushes++; /* prevent dropping of the space while - we are flushing */ - node = UT_LIST_GET_FIRST(space->chain); - - while (node) { - if (node->modification_counter > node->flush_counter) { - ut_a(node->open); - - /* We want to flush the changes at least up to - old_mod_counter */ - old_mod_counter = node->modification_counter; - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes++; - } else { - fil_n_pending_log_flushes++; - fil_n_log_flushes++; - } -#ifdef __WIN__ - if (node->is_raw_disk) { - - goto skip_flush; - } -#endif -retry: - if (node->n_pending_flushes > 0) { - /* We want to avoid calling os_file_flush() on - the file twice at the same time, because we do - not know what bugs OS's may contain in file - i/o; sleep for a while */ - - mutex_exit(&(system->mutex)); - - os_thread_sleep(20000); - - mutex_enter(&(system->mutex)); - - if (node->flush_counter >= old_mod_counter) { - - goto skip_flush; - } - - goto retry; - } - - ut_a(node->open); - file = node->handle; - node->n_pending_flushes++; - - mutex_exit(&(system->mutex)); - - /* fprintf(stderr, "Flushing to file %s\n", - node->name); */ - - os_file_flush(file); - - mutex_enter(&(system->mutex)); - - node->n_pending_flushes--; -skip_flush: - if (node->flush_counter < old_mod_counter) { - node->flush_counter = old_mod_counter; - - if (space->is_in_unflushed_spaces - && fil_space_is_flushed(space)) { - - space->is_in_unflushed_spaces = FALSE; - - UT_LIST_REMOVE( - unflushed_spaces, - system->unflushed_spaces, - space); - } - } - - if (space->purpose == FIL_TABLESPACE) { - fil_n_pending_tablespace_flushes--; - } else { - fil_n_pending_log_flushes--; - } - } - - node = UT_LIST_GET_NEXT(chain, node); - } - - space->n_pending_flushes--; - - mutex_exit(&(system->mutex)); -} - -/************************************************************************** -Flushes to disk the writes in file spaces of the given type possibly cached by -the OS. */ - -void -fil_flush_file_spaces( -/*==================*/ - ulint purpose) /* in: FIL_TABLESPACE, FIL_LOG */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - ulint* space_ids; - ulint n_space_ids; - ulint i; - - mutex_enter(&(system->mutex)); - - n_space_ids = UT_LIST_GET_LEN(system->unflushed_spaces); - if (n_space_ids == 0) { - - mutex_exit(&system->mutex); - return; - } - - /* Assemble a list of space ids to flush. Previously, we - traversed system->unflushed_spaces and called UT_LIST_GET_NEXT() - on a space that was just removed from the list by fil_flush(). - Thus, the space could be dropped and the memory overwritten. */ - space_ids = mem_alloc(n_space_ids * sizeof *space_ids); - - n_space_ids = 0; - - for (space = UT_LIST_GET_FIRST(system->unflushed_spaces); - space; - space = UT_LIST_GET_NEXT(unflushed_spaces, space)) { - - if (space->purpose == purpose && !space->is_being_deleted) { - - space_ids[n_space_ids++] = space->id; - } - } - - mutex_exit(&system->mutex); - - /* Flush the spaces. It will not hurt to call fil_flush() on - a non-existing space id. */ - for (i = 0; i < n_space_ids; i++) { - - fil_flush(space_ids[i]); - } - - mem_free(space_ids); -} - -/********************************************************************** -Checks the consistency of the tablespace cache. */ - -ibool -fil_validate(void) -/*==============*/ - /* out: TRUE if ok */ -{ - fil_system_t* system = fil_system; - fil_space_t* space; - fil_node_t* fil_node; - ulint n_open = 0; - ulint i; - - mutex_enter(&(system->mutex)); - - /* Look for spaces in the hash table */ - - for (i = 0; i < hash_get_n_cells(system->spaces); i++) { - - space = HASH_GET_FIRST(system->spaces, i); - - while (space != NULL) { - UT_LIST_VALIDATE(chain, fil_node_t, space->chain); - - fil_node = UT_LIST_GET_FIRST(space->chain); - - while (fil_node != NULL) { - if (fil_node->n_pending > 0) { - ut_a(fil_node->open); - } - - if (fil_node->open) { - n_open++; - } - fil_node = UT_LIST_GET_NEXT(chain, fil_node); - } - space = HASH_GET_NEXT(hash, space); - } - } - - ut_a(system->n_open == n_open); - - UT_LIST_VALIDATE(LRU, fil_node_t, system->LRU); - - fil_node = UT_LIST_GET_FIRST(system->LRU); - - while (fil_node != NULL) { - ut_a(fil_node->n_pending == 0); - ut_a(fil_node->open); - ut_a(fil_node->space->purpose == FIL_TABLESPACE); - ut_a(fil_node->space->id != 0); - - fil_node = UT_LIST_GET_NEXT(LRU, fil_node); - } - - mutex_exit(&(system->mutex)); - - return(TRUE); -} - -/************************************************************************ -Returns TRUE if file address is undefined. */ -ibool -fil_addr_is_null( -/*=============*/ - /* out: TRUE if undefined */ - fil_addr_t addr) /* in: address */ -{ - if (addr.page == FIL_NULL) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************ -Accessor functions for a file page */ - -ulint -fil_page_get_prev(byte* page) -{ - return(mach_read_from_4(page + FIL_PAGE_PREV)); -} - -ulint -fil_page_get_next(byte* page) -{ - return(mach_read_from_4(page + FIL_PAGE_NEXT)); -} - -/************************************************************************* -Sets the file page type. */ - -void -fil_page_set_type( -/*==============*/ - byte* page, /* in: file page */ - ulint type) /* in: type */ -{ - ut_ad(page); - - mach_write_to_2(page + FIL_PAGE_TYPE, type); -} - -/************************************************************************* -Gets the file page type. */ - -ulint -fil_page_get_type( -/*==============*/ - /* out: type; NOTE that if the type has not been - written to page, the return value not defined */ - byte* page) /* in: file page */ -{ - ut_ad(page); - - return(mach_read_from_2(page + FIL_PAGE_TYPE)); -} diff --git a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c deleted file mode 100644 index e1074933fe8..00000000000 --- a/storage/innobase/fsp/fsp0fsp.c +++ /dev/null @@ -1,3990 +0,0 @@ -/********************************************************************** -File space management - -(c) 1995 Innobase Oy - -Created 11/29/1995 Heikki Tuuri -***********************************************************************/ - -#include "fsp0fsp.h" - -#ifdef UNIV_NONINL -#include "fsp0fsp.ic" -#endif - -#include "buf0buf.h" -#include "fil0fil.h" -#include "sync0sync.h" -#include "mtr0log.h" -#include "fut0fut.h" -#include "ut0byte.h" -#include "srv0srv.h" -#include "page0types.h" -#include "ibuf0ibuf.h" -#include "btr0btr.h" -#include "btr0sea.h" -#include "dict0boot.h" -#include "dict0mem.h" -#include "log0log.h" - - -#define FSP_HEADER_OFFSET FIL_PAGE_DATA /* Offset of the space header - within a file page */ - -/* The data structures in files are defined just as byte strings in C */ -typedef byte fsp_header_t; -typedef byte xdes_t; - -/* SPACE HEADER - ============ - -File space header data structure: this data structure is contained in the -first page of a space. The space for this header is reserved in every extent -descriptor page, but used only in the first. */ - -/*-------------------------------------*/ -#define FSP_SPACE_ID 0 /* space id */ -#define FSP_NOT_USED 4 /* this field contained a value up to - which we know that the modifications - in the database have been flushed to - the file space; not used now */ -#define FSP_SIZE 8 /* Current size of the space in - pages */ -#define FSP_FREE_LIMIT 12 /* Minimum page number for which the - free list has not been initialized: - the pages >= this limit are, by - definition, free; note that in a - single-table tablespace where size - < 64 pages, this number is 64, i.e., - we have initialized the space - about the first extent, but have not - physically allocted those pages to the - file */ -#define FSP_LOWEST_NO_WRITE 16 /* The lowest page offset for which - the page has not been written to disk - (if it has been written, we know that - the OS has really reserved the - physical space for the page) */ -#define FSP_FRAG_N_USED 20 /* number of used pages in the - FSP_FREE_FRAG list */ -#define FSP_FREE 24 /* list of free extents */ -#define FSP_FREE_FRAG (24 + FLST_BASE_NODE_SIZE) - /* list of partially free extents not - belonging to any segment */ -#define FSP_FULL_FRAG (24 + 2 * FLST_BASE_NODE_SIZE) - /* list of full extents not belonging - to any segment */ -#define FSP_SEG_ID (24 + 3 * FLST_BASE_NODE_SIZE) - /* 8 bytes which give the first unused - segment id */ -#define FSP_SEG_INODES_FULL (32 + 3 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where all the segment inode - slots are reserved */ -#define FSP_SEG_INODES_FREE (32 + 4 * FLST_BASE_NODE_SIZE) - /* list of pages containing segment - headers, where not all the segment - header slots are reserved */ -/*-------------------------------------*/ -/* File space header size */ -#define FSP_HEADER_SIZE (32 + 5 * FLST_BASE_NODE_SIZE) - -#define FSP_FREE_ADD 4 /* this many free extents are added - to the free list from above - FSP_FREE_LIMIT at a time */ - -/* FILE SEGMENT INODE - ================== - -Segment inode which is created for each segment in a tablespace. NOTE: in -purge we assume that a segment having only one currently used page can be -freed in a few steps, so that the freeing cannot fill the file buffer with -bufferfixed file pages. */ - -typedef byte fseg_inode_t; - -#define FSEG_INODE_PAGE_NODE FSEG_PAGE_DATA - /* the list node for linking - segment inode pages */ - -#define FSEG_ARR_OFFSET (FSEG_PAGE_DATA + FLST_NODE_SIZE) -/*-------------------------------------*/ -#define FSEG_ID 0 /* 8 bytes of segment id: if this is - ut_dulint_zero, it means that the - header is unused */ -#define FSEG_NOT_FULL_N_USED 8 - /* number of used segment pages in - the FSEG_NOT_FULL list */ -#define FSEG_FREE 12 - /* list of free extents of this - segment */ -#define FSEG_NOT_FULL (12 + FLST_BASE_NODE_SIZE) - /* list of partially free extents */ -#define FSEG_FULL (12 + 2 * FLST_BASE_NODE_SIZE) - /* list of full extents */ -#define FSEG_MAGIC_N (12 + 3 * FLST_BASE_NODE_SIZE) - /* magic number used in debugging */ -#define FSEG_FRAG_ARR (16 + 3 * FLST_BASE_NODE_SIZE) - /* array of individual pages - belonging to this segment in fsp - fragment extent lists */ -#define FSEG_FRAG_ARR_N_SLOTS (FSP_EXTENT_SIZE / 2) - /* number of slots in the array for - the fragment pages */ -#define FSEG_FRAG_SLOT_SIZE 4 /* a fragment page slot contains its - page number within space, FIL_NULL - means that the slot is not in use */ -/*-------------------------------------*/ -#define FSEG_INODE_SIZE \ - (16 + 3 * FLST_BASE_NODE_SIZE \ - + FSEG_FRAG_ARR_N_SLOTS * FSEG_FRAG_SLOT_SIZE) - -#define FSP_SEG_INODES_PER_PAGE \ - ((UNIV_PAGE_SIZE - FSEG_ARR_OFFSET - 10) / FSEG_INODE_SIZE) - /* Number of segment inodes which fit on a - single page */ - -#define FSEG_MAGIC_N_VALUE 97937874 - -#define FSEG_FILLFACTOR 8 /* If this value is x, then if - the number of unused but reserved - pages in a segment is less than - reserved pages * 1/x, and there are - at least FSEG_FRAG_LIMIT used pages, - then we allow a new empty extent to - be added to the segment in - fseg_alloc_free_page. Otherwise, we - use unused pages of the segment. */ - -#define FSEG_FRAG_LIMIT FSEG_FRAG_ARR_N_SLOTS - /* If the segment has >= this many - used pages, it may be expanded by - allocating extents to the segment; - until that only individual fragment - pages are allocated from the space */ - -#define FSEG_FREE_LIST_LIMIT 40 /* If the reserved size of a segment - is at least this many extents, we - allow extents to be put to the free - list of the extent: at most - FSEG_FREE_LIST_MAX_LEN many */ -#define FSEG_FREE_LIST_MAX_LEN 4 - - -/* EXTENT DESCRIPTOR - ================= - -File extent descriptor data structure: contains bits to tell which pages in -the extent are free and which contain old tuple version to clean. */ - -/*-------------------------------------*/ -#define XDES_ID 0 /* The identifier of the segment - to which this extent belongs */ -#define XDES_FLST_NODE 8 /* The list node data structure - for the descriptors */ -#define XDES_STATE (FLST_NODE_SIZE + 8) - /* contains state information - of the extent */ -#define XDES_BITMAP (FLST_NODE_SIZE + 12) - /* Descriptor bitmap of the pages - in the extent */ -/*-------------------------------------*/ - -#define XDES_BITS_PER_PAGE 2 /* How many bits are there per page */ -#define XDES_FREE_BIT 0 /* Index of the bit which tells if - the page is free */ -#define XDES_CLEAN_BIT 1 /* NOTE: currently not used! - Index of the bit which tells if - there are old versions of tuples - on the page */ -/* States of a descriptor */ -#define XDES_FREE 1 /* extent is in free list of space */ -#define XDES_FREE_FRAG 2 /* extent is in free fragment list of - space */ -#define XDES_FULL_FRAG 3 /* extent is in full fragment list of - space */ -#define XDES_FSEG 4 /* extent belongs to a segment */ - -/* File extent data structure size in bytes. */ -#define XDES_SIZE \ - (XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE)) - -/* Offset of the descriptor array on a descriptor page */ -#define XDES_ARR_OFFSET (FSP_HEADER_OFFSET + FSP_HEADER_SIZE) - -/************************************************************************** -Returns an extent to the free list of a space. */ -static -void -fsp_free_extent( -/*============*/ - ulint space, /* in: space id */ - ulint page, /* in: page offset in the extent */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Frees an extent of a segment to the space free list. */ -static -void -fseg_free_extent( -/*=============*/ - fseg_inode_t* seg_inode, /* in: segment inode */ - ulint space, /* in: space id */ - ulint page, /* in: page offset in the extent */ - mtr_t* mtr); /* in: mtr handle */ -/************************************************************************** -Calculates the number of pages reserved by a segment, and how -many pages are currently used. */ -static -ulint -fseg_n_reserved_pages_low( -/*======================*/ - /* out: number of reserved pages */ - fseg_inode_t* header, /* in: segment inode */ - ulint* used, /* out: number of pages used (<= reserved) */ - mtr_t* mtr); /* in: mtr handle */ -/************************************************************************ -Marks a page used. The page must reside within the extents of the given -segment. */ -static -void -fseg_mark_page_used( -/*================*/ - fseg_inode_t* seg_inode,/* in: segment inode */ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Returns the first extent descriptor for a segment. We think of the extent -lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL --> FSEG_FREE. */ -static -xdes_t* -fseg_get_first_extent( -/*==================*/ - /* out: the first extent descriptor, or NULL if - none */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Puts new extents to the free list if -there are free extents above the free limit. If an extent happens -to contain an extent descriptor page, the extent is put to -the FSP_FREE_FRAG list with the page marked as used. */ -static -void -fsp_fill_free_list( -/*===============*/ - ibool init_space, /* in: TRUE if this is a single-table - tablespace and we are only initing - the tablespace's first extent - descriptor page and ibuf bitmap page; - then we do not allocate more extents */ - ulint space, /* in: space */ - fsp_header_t* header, /* in: space header */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. */ -static -ulint -fseg_alloc_free_page_low( -/*=====================*/ - /* out: the allocated page number, FIL_NULL - if no page could be allocated */ - ulint space, /* in: space */ - fseg_inode_t* seg_inode, /* in: segment inode */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction, /* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /* in: mtr handle */ - - -/************************************************************************** -Reads the file space size stored in the header page. */ - -ulint -fsp_get_size_low( -/*=============*/ - /* out: tablespace size stored in the space header */ - page_t* page) /* in: header page (page 0 in the tablespace) */ -{ - return(mach_read_from_4(page + FSP_HEADER_OFFSET + FSP_SIZE)); -} - -/************************************************************************** -Gets a pointer to the space header and x-locks its page. */ -UNIV_INLINE -fsp_header_t* -fsp_get_space_header( -/*=================*/ - /* out: pointer to the space header, page x-locked */ - ulint id, /* in: space id */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_header_t* header; - - ut_ad(mtr); - - header = FSP_HEADER_OFFSET + buf_page_get(id, 0, RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(header, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - return(header); -} - -/************************************************************************** -Gets a descriptor bit of a page. */ -UNIV_INLINE -ibool -xdes_get_bit( -/*=========*/ - /* out: TRUE if free */ - xdes_t* descr, /* in: descriptor */ - ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset, /* in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - mtr_t* mtr) /* in: mtr */ -{ - ulint index; - ulint byte_index; - ulint bit_index; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT)); - ut_ad(offset < FSP_EXTENT_SIZE); - - index = bit + XDES_BITS_PER_PAGE * offset; - - byte_index = index / 8; - bit_index = index % 8; - - return(ut_bit_get_nth(mtr_read_ulint(descr + XDES_BITMAP + byte_index, - MLOG_1BYTE, mtr), - bit_index)); -} - -/************************************************************************** -Sets a descriptor bit of a page. */ -UNIV_INLINE -void -xdes_set_bit( -/*=========*/ - xdes_t* descr, /* in: descriptor */ - ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ulint offset, /* in: page offset within extent: - 0 ... FSP_EXTENT_SIZE - 1 */ - ibool val, /* in: bit value */ - mtr_t* mtr) /* in: mtr */ -{ - ulint index; - ulint byte_index; - ulint bit_index; - ulint descr_byte; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - ut_ad((bit == XDES_FREE_BIT) || (bit == XDES_CLEAN_BIT)); - ut_ad(offset < FSP_EXTENT_SIZE); - - index = bit + XDES_BITS_PER_PAGE * offset; - - byte_index = index / 8; - bit_index = index % 8; - - descr_byte = mtr_read_ulint(descr + XDES_BITMAP + byte_index, - MLOG_1BYTE, mtr); - descr_byte = ut_bit_set_nth(descr_byte, bit_index, val); - - mlog_write_ulint(descr + XDES_BITMAP + byte_index, descr_byte, - MLOG_1BYTE, mtr); -} - -/************************************************************************** -Looks for a descriptor bit having the desired value. Starts from hint -and scans upward; at the end of the extent the search is wrapped to -the start of the extent. */ -UNIV_INLINE -ulint -xdes_find_bit( -/*==========*/ - /* out: bit index of the bit, ULINT_UNDEFINED if not - found */ - xdes_t* descr, /* in: descriptor */ - ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ibool val, /* in: desired bit value */ - ulint hint, /* in: hint of which bit position would be desirable */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - - ut_ad(descr && mtr); - ut_ad(val <= TRUE); - ut_ad(hint < FSP_EXTENT_SIZE); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - for (i = hint; i < FSP_EXTENT_SIZE; i++) { - if (val == xdes_get_bit(descr, bit, i, mtr)) { - - return(i); - } - } - - for (i = 0; i < hint; i++) { - if (val == xdes_get_bit(descr, bit, i, mtr)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Looks for a descriptor bit having the desired value. Scans the extent in -a direction opposite to xdes_find_bit. */ -UNIV_INLINE -ulint -xdes_find_bit_downward( -/*===================*/ - /* out: bit index of the bit, ULINT_UNDEFINED if not - found */ - xdes_t* descr, /* in: descriptor */ - ulint bit, /* in: XDES_FREE_BIT or XDES_CLEAN_BIT */ - ibool val, /* in: desired bit value */ - ulint hint, /* in: hint of which bit position would be desirable */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - - ut_ad(descr && mtr); - ut_ad(val <= TRUE); - ut_ad(hint < FSP_EXTENT_SIZE); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - for (i = hint + 1; i > 0; i--) { - if (val == xdes_get_bit(descr, bit, i - 1, mtr)) { - - return(i - 1); - } - } - - for (i = FSP_EXTENT_SIZE - 1; i > hint; i--) { - if (val == xdes_get_bit(descr, bit, i, mtr)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Returns the number of used pages in a descriptor. */ -UNIV_INLINE -ulint -xdes_get_n_used( -/*============*/ - /* out: number of pages used */ - xdes_t* descr, /* in: descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - ulint count = 0; - - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - for (i = 0; i < FSP_EXTENT_SIZE; i++) { - if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) { - count++; - } - } - - return(count); -} - -/************************************************************************** -Returns true if extent contains no used pages. */ -UNIV_INLINE -ibool -xdes_is_free( -/*=========*/ - /* out: TRUE if totally free */ - xdes_t* descr, /* in: descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - if (0 == xdes_get_n_used(descr, mtr)) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************** -Returns true if extent contains no free pages. */ -UNIV_INLINE -ibool -xdes_is_full( -/*=========*/ - /* out: TRUE if full */ - xdes_t* descr, /* in: descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************** -Sets the state of an xdes. */ -UNIV_INLINE -void -xdes_set_state( -/*===========*/ - xdes_t* descr, /* in: descriptor */ - ulint state, /* in: state to set */ - mtr_t* mtr) /* in: mtr handle */ -{ - ut_ad(descr && mtr); - ut_ad(state >= XDES_FREE); - ut_ad(state <= XDES_FSEG); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - - mlog_write_ulint(descr + XDES_STATE, state, MLOG_4BYTES, mtr); -} - -/************************************************************************** -Gets the state of an xdes. */ -UNIV_INLINE -ulint -xdes_get_state( -/*===========*/ - /* out: state */ - xdes_t* descr, /* in: descriptor */ - mtr_t* mtr) /* in: mtr handle */ -{ - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - - return(mtr_read_ulint(descr + XDES_STATE, MLOG_4BYTES, mtr)); -} - -/************************************************************************** -Inits an extent descriptor to the free and clean state. */ -UNIV_INLINE -void -xdes_init( -/*======*/ - xdes_t* descr, /* in: descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - - ut_ad(descr && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(descr), - MTR_MEMO_PAGE_X_FIX)); - ut_ad((XDES_SIZE - XDES_BITMAP) % 4 == 0); - - for (i = XDES_BITMAP; i < XDES_SIZE; i += 4) { - mlog_write_ulint(descr + i, 0xFFFFFFFFUL, MLOG_4BYTES, mtr); - } - - xdes_set_state(descr, XDES_FREE, mtr); -} - -/************************************************************************ -Calculates the page where the descriptor of a page resides. */ -UNIV_INLINE -ulint -xdes_calc_descriptor_page( -/*======================*/ - /* out: descriptor page offset */ - ulint offset) /* in: page offset */ -{ -#if UNIV_PAGE_SIZE <= XDES_ARR_OFFSET \ - + (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE) * XDES_SIZE -# error -#endif - - return(ut_2pow_round(offset, XDES_DESCRIBED_PER_PAGE)); -} - -/************************************************************************ -Calculates the descriptor index within a descriptor page. */ -UNIV_INLINE -ulint -xdes_calc_descriptor_index( -/*=======================*/ - /* out: descriptor index */ - ulint offset) /* in: page offset */ -{ - return(ut_2pow_remainder(offset, XDES_DESCRIBED_PER_PAGE) - / FSP_EXTENT_SIZE); -} - -/************************************************************************ -Gets pointer to a the extent descriptor of a page. The page where the extent -descriptor resides is x-locked. If the page offset is equal to the free limit -of the space, adds new extents from above the free limit to the space free -list, if not free limit == space size. This adding is necessary to make the -descriptor defined, as they are uninitialized above the free limit. */ -UNIV_INLINE -xdes_t* -xdes_get_descriptor_with_space_hdr( -/*===============================*/ - /* out: pointer to the extent descriptor, - NULL if the page does not exist in the - space or if offset > free limit */ - fsp_header_t* sp_header,/* in: space header, x-latched */ - ulint space, /* in: space id */ - ulint offset, /* in: page offset; - if equal to the free limit, - we try to add new extents to - the space free list */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint limit; - ulint size; - ulint descr_page_no; - page_t* descr_page; - - ut_ad(mtr); - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - /* Read free limit and space size */ - limit = mtr_read_ulint(sp_header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr); - size = mtr_read_ulint(sp_header + FSP_SIZE, MLOG_4BYTES, mtr); - - /* If offset is >= size or > limit, return NULL */ - - if ((offset >= size) || (offset > limit)) { - - return(NULL); - } - - /* If offset is == limit, fill free list of the space. */ - - if (offset == limit) { - fsp_fill_free_list(FALSE, space, sp_header, mtr); - } - - descr_page_no = xdes_calc_descriptor_page(offset); - - if (descr_page_no == 0) { - /* It is on the space header page */ - - descr_page = buf_frame_align(sp_header); - } else { - descr_page = buf_page_get(space, descr_page_no, RW_X_LATCH, - mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(descr_page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - } - - return(descr_page + XDES_ARR_OFFSET - + XDES_SIZE * xdes_calc_descriptor_index(offset)); -} - -/************************************************************************ -Gets pointer to a the extent descriptor of a page. The page where the -extent descriptor resides is x-locked. If the page offset is equal to -the free limit of the space, adds new extents from above the free limit -to the space free list, if not free limit == space size. This adding -is necessary to make the descriptor defined, as they are uninitialized -above the free limit. */ -static -xdes_t* -xdes_get_descriptor( -/*================*/ - /* out: pointer to the extent descriptor, NULL if the - page does not exist in the space or if offset > free - limit */ - ulint space, /* in: space id */ - ulint offset, /* in: page offset; if equal to the free limit, - we try to add new extents to the space free list */ - mtr_t* mtr) /* in: mtr handle */ -{ - fsp_header_t* sp_header; - - sp_header = FSP_HEADER_OFFSET - + buf_page_get(space, 0, RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(sp_header, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - return(xdes_get_descriptor_with_space_hdr(sp_header, space, offset, - mtr)); -} - -/************************************************************************ -Gets pointer to a the extent descriptor if the file address -of the descriptor list node is known. The page where the -extent descriptor resides is x-locked. */ -UNIV_INLINE -xdes_t* -xdes_lst_get_descriptor( -/*====================*/ - /* out: pointer to the extent descriptor */ - ulint space, /* in: space id */ - fil_addr_t lst_node,/* in: file address of the list node - contained in the descriptor */ - mtr_t* mtr) /* in: mtr handle */ -{ - xdes_t* descr; - - ut_ad(mtr); - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - descr = fut_get_ptr(space, lst_node, RW_X_LATCH, mtr) - XDES_FLST_NODE; - - return(descr); -} - -/************************************************************************ -Gets pointer to the next descriptor in a descriptor list and x-locks its -page. */ -UNIV_INLINE -xdes_t* -xdes_lst_get_next( -/*==============*/ - xdes_t* descr, /* in: pointer to a descriptor */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint space; - - ut_ad(mtr && descr); - - space = buf_frame_get_space_id(descr); - - return(xdes_lst_get_descriptor( - space, - flst_get_next_addr(descr + XDES_FLST_NODE, mtr), mtr)); -} - -/************************************************************************ -Returns page offset of the first page in extent described by a descriptor. */ -UNIV_INLINE -ulint -xdes_get_offset( -/*============*/ - /* out: offset of the first page in extent */ - xdes_t* descr) /* in: extent descriptor */ -{ - ut_ad(descr); - - return(buf_frame_get_page_no(descr) - + ((descr - buf_frame_align(descr) - XDES_ARR_OFFSET) - / XDES_SIZE) - * FSP_EXTENT_SIZE); -} - -/*************************************************************** -Inits a file page whose prior contents should be ignored. */ -static -void -fsp_init_file_page_low( -/*===================*/ - byte* ptr) /* in: pointer to a page */ -{ - page_t* page; - page = buf_frame_align(ptr); - - buf_block_align(page)->check_index_page_at_flush = FALSE; - -#ifdef UNIV_BASIC_LOG_DEBUG - memset(page, 0xff, UNIV_PAGE_SIZE); -#endif - mach_write_to_8(page + UNIV_PAGE_SIZE - FIL_PAGE_END_LSN_OLD_CHKSUM, - ut_dulint_zero); - mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero); -} - -/*************************************************************** -Inits a file page whose prior contents should be ignored. */ -static -void -fsp_init_file_page( -/*===============*/ - page_t* page, /* in: page */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_init_file_page_low(page); - - mlog_write_initial_log_record(page, MLOG_INIT_FILE_PAGE, mtr); -} - -/*************************************************************** -Parses a redo log record of a file page init. */ - -byte* -fsp_parse_init_file_page( -/*=====================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page) /* in: page or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (page) { - fsp_init_file_page_low(page); - } - - return(ptr); -} - -/************************************************************************** -Initializes the fsp system. */ - -void -fsp_init(void) -/*==========*/ -{ - /* Does nothing at the moment */ -} - -/************************************************************************** -Writes the space id to a tablespace header. This function is used past the -buffer pool when we in fil0fil.c create a new single-table tablespace. */ - -void -fsp_header_write_space_id( -/*======================*/ - page_t* page, /* in: first page in the space */ - ulint space_id) /* in: space id */ -{ - mach_write_to_4(page + FSP_HEADER_OFFSET + FSP_SPACE_ID, space_id); -} - -/************************************************************************** -Initializes the space header of a new created space and creates also the -insert buffer tree root if space == 0. */ - -void -fsp_header_init( -/*============*/ - ulint space, /* in: space id */ - ulint size, /* in: current size in blocks */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - fsp_header_t* header; - page_t* page; - - ut_ad(mtr); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - page = buf_page_create(space, 0, mtr); - buf_page_get(space, 0, RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - /* The prior contents of the file page should be ignored */ - - fsp_init_file_page(page, mtr); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_FSP_HDR, - MLOG_2BYTES, mtr); - - header = FSP_HEADER_OFFSET + page; - - mlog_write_ulint(header + FSP_SPACE_ID, space, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_NOT_USED, 0, MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSP_SIZE, size, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FREE_LIMIT, 0, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_LOWEST_NO_WRITE, 0, MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, 0, MLOG_4BYTES, mtr); - - flst_init(header + FSP_FREE, mtr); - flst_init(header + FSP_FREE_FRAG, mtr); - flst_init(header + FSP_FULL_FRAG, mtr); - flst_init(header + FSP_SEG_INODES_FULL, mtr); - flst_init(header + FSP_SEG_INODES_FREE, mtr); - - mlog_write_dulint(header + FSP_SEG_ID, ut_dulint_create(0, 1), mtr); - if (space == 0) { - fsp_fill_free_list(FALSE, space, header, mtr); - btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space, - ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr); - } else { - fsp_fill_free_list(TRUE, space, header, mtr); - } -} - -/************************************************************************** -Reads the space id from the first page of a tablespace. */ - -ulint -fsp_header_get_space_id( -/*====================*/ - /* out: space id, ULINT UNDEFINED if error */ - page_t* page) /* in: first page of a tablespace */ -{ - ulint fsp_id; - ulint id; - - fsp_id = mach_read_from_4(FSP_HEADER_OFFSET + page + FSP_SPACE_ID); - - id = mach_read_from_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - if (id != fsp_id) { - fprintf(stderr, - "InnoDB: Error: space id in fsp header %lu," - " but in the page header %lu\n", - (ulong) fsp_id, (ulong) id); - - return(ULINT_UNDEFINED); - } - - return(id); -} - -/************************************************************************** -Increases the space size field of a space. */ - -void -fsp_header_inc_size( -/*================*/ - ulint space, /* in: space id */ - ulint size_inc,/* in: size increment in pages */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - fsp_header_t* header; - ulint size; - - ut_ad(mtr); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - header = fsp_get_space_header(space, mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSP_SIZE, size + size_inc, MLOG_4BYTES, - mtr); -} - -/************************************************************************** -Gets the current free limit of a tablespace. The free limit means the -place of the first page which has never been put to the the free list -for allocation. The space above that address is initialized to zero. -Sets also the global variable log_fsp_current_free_limit. */ - -ulint -fsp_header_get_free_limit( -/*======================*/ - /* out: free limit in megabytes */ - ulint space) /* in: space id, must be 0 */ -{ - fsp_header_t* header; - ulint limit; - mtr_t mtr; - - ut_a(space == 0); /* We have only one log_fsp_current_... variable */ - - mtr_start(&mtr); - - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, &mtr); - - limit = limit / ((1024 * 1024) / UNIV_PAGE_SIZE); - - log_fsp_current_free_limit_set_and_checkpoint(limit); - - mtr_commit(&mtr); - - return(limit); -} - -/************************************************************************** -Gets the size of the tablespace from the tablespace header. If we do not -have an auto-extending data file, this should be equal to the size of the -data files. If there is an auto-extending data file, this can be smaller. */ - -ulint -fsp_header_get_tablespace_size( -/*===========================*/ - /* out: size in pages */ - ulint space) /* in: space id, must be 0 */ -{ - fsp_header_t* header; - ulint size; - mtr_t mtr; - - ut_a(space == 0); /* We have only one log_fsp_current_... variable */ - - mtr_start(&mtr); - - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - - mtr_commit(&mtr); - - return(size); -} - -/*************************************************************************** -Tries to extend a single-table tablespace so that a page would fit in the -data file. */ -static -ibool -fsp_try_extend_data_file_with_pages( -/*================================*/ - /* out: TRUE if success */ - ulint space, /* in: space */ - ulint page_no, /* in: page number */ - fsp_header_t* header, /* in: space header */ - mtr_t* mtr) /* in: mtr */ -{ - ibool success; - ulint actual_size; - ulint size; - - ut_a(space != 0); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - ut_a(page_no >= size); - - success = fil_extend_space_to_desired_size(&actual_size, space, - page_no + 1); - /* actual_size now has the space size in pages; it may be less than - we wanted if we ran out of disk space */ - - mlog_write_ulint(header + FSP_SIZE, actual_size, MLOG_4BYTES, mtr); - - return(success); -} - -/*************************************************************************** -Tries to extend the last data file of a tablespace if it is auto-extending. */ -static -ibool -fsp_try_extend_data_file( -/*=====================*/ - /* out: FALSE if not auto-extending */ - ulint* actual_increase,/* out: actual increase in pages, where - we measure the tablespace size from - what the header field says; it may be - the actual file size rounded down to - megabyte */ - ulint space, /* in: space */ - fsp_header_t* header, /* in: space header */ - mtr_t* mtr) /* in: mtr */ -{ - ulint size; - ulint new_size; - ulint old_size; - ulint size_increase; - ulint actual_size; - ibool success; - - *actual_increase = 0; - - if (space == 0 && !srv_auto_extend_last_data_file) { - - return(FALSE); - } - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - old_size = size; - - if (space == 0 && srv_last_file_size_max != 0) { - if (srv_last_file_size_max - < srv_data_file_sizes[srv_n_data_files - 1]) { - - fprintf(stderr, - "InnoDB: Error: Last data file size is %lu," - " max size allowed %lu\n", - (ulong) srv_data_file_sizes[ - srv_n_data_files - 1], - (ulong) srv_last_file_size_max); - } - - size_increase = srv_last_file_size_max - - srv_data_file_sizes[srv_n_data_files - 1]; - if (size_increase > SRV_AUTO_EXTEND_INCREMENT) { - size_increase = SRV_AUTO_EXTEND_INCREMENT; - } - } else { - if (space == 0) { - size_increase = SRV_AUTO_EXTEND_INCREMENT; - } else { - /* We extend single-table tablespaces first one extent - at a time, but for bigger tablespaces more. It is not - enough to extend always by one extent, because some - extents are frag page extents. */ - - if (size < FSP_EXTENT_SIZE) { - /* Let us first extend the file to 64 pages */ - success = fsp_try_extend_data_file_with_pages( - space, FSP_EXTENT_SIZE - 1, - header, mtr); - if (!success) { - new_size = mtr_read_ulint( - header + FSP_SIZE, - MLOG_4BYTES, mtr); - - *actual_increase = new_size - old_size; - - return(FALSE); - } - - size = FSP_EXTENT_SIZE; - } - - if (size < 32 * FSP_EXTENT_SIZE) { - size_increase = FSP_EXTENT_SIZE; - } else { - /* Below in fsp_fill_free_list() we assume - that we add at most FSP_FREE_ADD extents at - a time */ - size_increase = FSP_FREE_ADD * FSP_EXTENT_SIZE; - } - } - } - - if (size_increase == 0) { - - return(TRUE); - } - - success = fil_extend_space_to_desired_size(&actual_size, space, - size + size_increase); - /* We ignore any fragments of a full megabyte when storing the size - to the space header */ - - mlog_write_ulint(header + FSP_SIZE, - ut_calc_align_down(actual_size, - (1024 * 1024) / UNIV_PAGE_SIZE), - MLOG_4BYTES, mtr); - new_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - *actual_increase = new_size - old_size; - - return(TRUE); -} - -/************************************************************************** -Puts new extents to the free list if there are free extents above the free -limit. If an extent happens to contain an extent descriptor page, the extent -is put to the FSP_FREE_FRAG list with the page marked as used. */ -static -void -fsp_fill_free_list( -/*===============*/ - ibool init_space, /* in: TRUE if this is a single-table - tablespace and we are only initing - the tablespace's first extent - descriptor page and ibuf bitmap page; - then we do not allocate more extents */ - ulint space, /* in: space */ - fsp_header_t* header, /* in: space header */ - mtr_t* mtr) /* in: mtr */ -{ - ulint limit; - ulint size; - xdes_t* descr; - ulint count = 0; - ulint frag_n_used; - page_t* descr_page; - page_t* ibuf_page; - ulint actual_increase; - ulint i; - mtr_t ibuf_mtr; - - ut_ad(header && mtr); - - /* Check if we can fill free list from above the free list limit */ - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, mtr); - - if (space == 0 && srv_auto_extend_last_data_file - && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) { - - /* Try to increase the last data file size */ - fsp_try_extend_data_file(&actual_increase, space, header, mtr); - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - } - - if (space != 0 && !init_space - && size < limit + FSP_EXTENT_SIZE * FSP_FREE_ADD) { - - /* Try to increase the .ibd file size */ - fsp_try_extend_data_file(&actual_increase, space, header, mtr); - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - } - - i = limit; - - while ((init_space && i < 1) - || ((i + FSP_EXTENT_SIZE <= size) && (count < FSP_FREE_ADD))) { - - mlog_write_ulint(header + FSP_FREE_LIMIT, i + FSP_EXTENT_SIZE, - MLOG_4BYTES, mtr); - - /* Update the free limit info in the log system and make - a checkpoint */ - if (space == 0) { - log_fsp_current_free_limit_set_and_checkpoint( - (i + FSP_EXTENT_SIZE) - / ((1024 * 1024) / UNIV_PAGE_SIZE)); - } - - if (0 == i % XDES_DESCRIBED_PER_PAGE) { - - /* We are going to initialize a new descriptor page - and a new ibuf bitmap page: the prior contents of the - pages should be ignored. */ - - if (i > 0) { - descr_page = buf_page_create(space, i, mtr); - buf_page_get(space, i, RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(descr_page, - SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - fsp_init_file_page(descr_page, mtr); - mlog_write_ulint(descr_page + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_XDES, - MLOG_2BYTES, mtr); - } - - /* Initialize the ibuf bitmap page in a separate - mini-transaction because it is low in the latching - order, and we must be able to release its latch - before returning from the fsp routine */ - - mtr_start(&ibuf_mtr); - - ibuf_page = buf_page_create(space, - i + FSP_IBUF_BITMAP_OFFSET, - &ibuf_mtr); - buf_page_get(space, i + FSP_IBUF_BITMAP_OFFSET, - RW_X_LATCH, &ibuf_mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(ibuf_page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - fsp_init_file_page(ibuf_page, &ibuf_mtr); - - ibuf_bitmap_page_init(ibuf_page, &ibuf_mtr); - - mtr_commit(&ibuf_mtr); - } - - descr = xdes_get_descriptor_with_space_hdr(header, space, i, - mtr); - xdes_init(descr, mtr); - -#if XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE -# error "XDES_DESCRIBED_PER_PAGE % FSP_EXTENT_SIZE != 0" -#endif - - if (0 == i % XDES_DESCRIBED_PER_PAGE) { - - /* The first page in the extent is a descriptor page - and the second is an ibuf bitmap page: mark them - used */ - - xdes_set_bit(descr, XDES_FREE_BIT, 0, FALSE, mtr); - xdes_set_bit(descr, XDES_FREE_BIT, - FSP_IBUF_BITMAP_OFFSET, FALSE, mtr); - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - - flst_add_last(header + FSP_FREE_FRAG, - descr + XDES_FLST_NODE, mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, - MLOG_4BYTES, mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used + 2, MLOG_4BYTES, mtr); - } else { - flst_add_last(header + FSP_FREE, - descr + XDES_FLST_NODE, mtr); - count++; - } - - i += FSP_EXTENT_SIZE; - } -} - -/************************************************************************** -Allocates a new free extent. */ -static -xdes_t* -fsp_alloc_free_extent( -/*==================*/ - /* out: extent descriptor, NULL if cannot be - allocated */ - ulint space, /* in: space id */ - ulint hint, /* in: hint of which extent would be desirable: any - page offset in the extent goes; the hint must not - be > FSP_FREE_LIMIT */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_header_t* header; - fil_addr_t first; - xdes_t* descr; - - ut_ad(mtr); - - header = fsp_get_space_header(space, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr); - - if (descr && (xdes_get_state(descr, mtr) == XDES_FREE)) { - /* Ok, we can take this extent */ - } else { - /* Take the first extent in the free list */ - first = flst_get_first(header + FSP_FREE, mtr); - - if (fil_addr_is_null(first)) { - fsp_fill_free_list(FALSE, space, header, mtr); - - first = flst_get_first(header + FSP_FREE, mtr); - } - - if (fil_addr_is_null(first)) { - - return(NULL); /* No free extents left */ - } - - descr = xdes_lst_get_descriptor(space, first, mtr); - } - - flst_remove(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); - - return(descr); -} - -/************************************************************************** -Allocates a single free page from a space. The page is marked as used. */ -static -ulint -fsp_alloc_free_page( -/*================*/ - /* out: the page offset, FIL_NULL if no page could - be allocated */ - ulint space, /* in: space id */ - ulint hint, /* in: hint of which page would be desirable */ - mtr_t* mtr) /* in: mtr handle */ -{ - fsp_header_t* header; - fil_addr_t first; - xdes_t* descr; - page_t* page; - ulint free; - ulint frag_n_used; - ulint page_no; - ulint space_size; - ibool success; - - ut_ad(mtr); - - header = fsp_get_space_header(space, mtr); - - /* Get the hinted descriptor */ - descr = xdes_get_descriptor_with_space_hdr(header, space, hint, mtr); - - if (descr && (xdes_get_state(descr, mtr) == XDES_FREE_FRAG)) { - /* Ok, we can take this extent */ - } else { - /* Else take the first extent in free_frag list */ - first = flst_get_first(header + FSP_FREE_FRAG, mtr); - - if (fil_addr_is_null(first)) { - /* There are no partially full fragments: allocate - a free extent and add it to the FREE_FRAG list. NOTE - that the allocation may have as a side-effect that an - extent containing a descriptor page is added to the - FREE_FRAG list. But we will allocate our page from the - the free extent anyway. */ - - descr = fsp_alloc_free_extent(space, hint, mtr); - - if (descr == NULL) { - /* No free space left */ - - return(FIL_NULL); - } - - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - flst_add_last(header + FSP_FREE_FRAG, - descr + XDES_FLST_NODE, mtr); - } else { - descr = xdes_lst_get_descriptor(space, first, mtr); - } - - /* Reset the hint */ - hint = 0; - } - - /* Now we have in descr an extent with at least one free page. Look - for a free page in the extent. */ - - free = xdes_find_bit(descr, XDES_FREE_BIT, TRUE, - hint % FSP_EXTENT_SIZE, mtr); - if (free == ULINT_UNDEFINED) { - - ut_print_buf(stderr, ((byte*)descr) - 500, 1000); - - ut_error; - } - - page_no = xdes_get_offset(descr) + free; - - space_size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, mtr); - - if (space_size <= page_no) { - /* It must be that we are extending a single-table tablespace - whose size is still < 64 pages */ - - ut_a(space != 0); - if (page_no >= FSP_EXTENT_SIZE) { - fprintf(stderr, - "InnoDB: Error: trying to extend a" - " single-table tablespace %lu\n" - "InnoDB: by single page(s) though the" - " space size %lu. Page no %lu.\n", - (ulong) space, (ulong) space_size, - (ulong) page_no); - return(FIL_NULL); - } - success = fsp_try_extend_data_file_with_pages(space, page_no, - header, mtr); - if (!success) { - /* No disk space left */ - return(FIL_NULL); - } - } - - xdes_set_bit(descr, XDES_FREE_BIT, free, FALSE, mtr); - - /* Update the FRAG_N_USED field */ - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - frag_n_used++; - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used, MLOG_4BYTES, - mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FULL_FRAG, mtr); - - flst_add_last(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used - FSP_EXTENT_SIZE, MLOG_4BYTES, - mtr); - } - - /* Initialize the allocated page to the buffer pool, so that it can - be obtained immediately with buf_page_get without need for a disk - read. */ - - buf_page_create(space, page_no, mtr); - - page = buf_page_get(space, page_no, RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - /* Prior contents of the page should be ignored */ - fsp_init_file_page(page, mtr); - - return(page_no); -} - -/************************************************************************** -Frees a single page of a space. The page is marked as free and clean. */ -static -void -fsp_free_page( -/*==========*/ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr) /* in: mtr handle */ -{ - fsp_header_t* header; - xdes_t* descr; - ulint state; - ulint frag_n_used; - - ut_ad(mtr); - - /* fprintf(stderr, "Freeing page %lu in space %lu\n", page, space); */ - - header = fsp_get_space_header(space, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); - - state = xdes_get_state(descr, mtr); - - if (state != XDES_FREE_FRAG && state != XDES_FULL_FRAG) { - fprintf(stderr, - "InnoDB: Error: File space extent descriptor" - " of page %lu has state %lu\n", - (ulong) page, - (ulong) state); - fputs("InnoDB: Dump of descriptor: ", stderr); - ut_print_buf(stderr, ((byte*)descr) - 50, 200); - putc('\n', stderr); - - if (state == XDES_FREE) { - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; - } - - ut_error; - } - - if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) { - fprintf(stderr, - "InnoDB: Error: File space extent descriptor" - " of page %lu says it is free\n" - "InnoDB: Dump of descriptor: ", (ulong) page); - ut_print_buf(stderr, ((byte*)descr) - 50, 200); - putc('\n', stderr); - - /* We put here some fault tolerance: if the page - is already free, return without doing anything! */ - - return; - } - - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - mtr); - if (state == XDES_FULL_FRAG) { - /* The fragment was full: move it to another list */ - flst_remove(header + FSP_FULL_FRAG, descr + XDES_FLST_NODE, - mtr); - xdes_set_state(descr, XDES_FREE_FRAG, mtr); - flst_add_last(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - mlog_write_ulint(header + FSP_FRAG_N_USED, - frag_n_used + FSP_EXTENT_SIZE - 1, - MLOG_4BYTES, mtr); - } else { - ut_a(frag_n_used > 0); - mlog_write_ulint(header + FSP_FRAG_N_USED, frag_n_used - 1, - MLOG_4BYTES, mtr); - } - - if (xdes_is_free(descr, mtr)) { - /* The extent has become free: move it to another list */ - flst_remove(header + FSP_FREE_FRAG, descr + XDES_FLST_NODE, - mtr); - fsp_free_extent(space, page, mtr); - } -} - -/************************************************************************** -Returns an extent to the free list of a space. */ -static -void -fsp_free_extent( -/*============*/ - ulint space, /* in: space id */ - ulint page, /* in: page offset in the extent */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_header_t* header; - xdes_t* descr; - - ut_ad(mtr); - - header = fsp_get_space_header(space, mtr); - - descr = xdes_get_descriptor_with_space_hdr(header, space, page, mtr); - - if (xdes_get_state(descr, mtr) == XDES_FREE) { - - ut_print_buf(stderr, (byte*)descr - 500, 1000); - - ut_error; - } - - xdes_init(descr, mtr); - - flst_add_last(header + FSP_FREE, descr + XDES_FLST_NODE, mtr); -} - -/************************************************************************** -Returns the nth inode slot on an inode page. */ -UNIV_INLINE -fseg_inode_t* -fsp_seg_inode_page_get_nth_inode( -/*=============================*/ - /* out: segment inode */ - page_t* page, /* in: segment inode page */ - ulint i, /* in: inode index on page */ - mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ -{ - ut_ad(i < FSP_SEG_INODES_PER_PAGE); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - - return(page + FSEG_ARR_OFFSET + FSEG_INODE_SIZE * i); -} - -/************************************************************************** -Looks for a used segment inode on a segment inode page. */ -static -ulint -fsp_seg_inode_page_find_used( -/*=========================*/ - /* out: segment inode index, or ULINT_UNDEFINED - if not found */ - page_t* page, /* in: segment inode page */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint i; - fseg_inode_t* inode; - - for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) { - - inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr); - - if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID), - ut_dulint_zero) != 0) { - /* This is used */ - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Looks for an unused segment inode on a segment inode page. */ -static -ulint -fsp_seg_inode_page_find_free( -/*=========================*/ - /* out: segment inode index, or ULINT_UNDEFINED - if not found */ - page_t* page, /* in: segment inode page */ - ulint j, /* in: search forward starting from this index */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint i; - fseg_inode_t* inode; - - for (i = j; i < FSP_SEG_INODES_PER_PAGE; i++) { - - inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr); - - if (ut_dulint_cmp(mach_read_from_8(inode + FSEG_ID), - ut_dulint_zero) == 0) { - /* This is unused */ - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Allocates a new file segment inode page. */ -static -ibool -fsp_alloc_seg_inode_page( -/*=====================*/ - /* out: TRUE if could be allocated */ - fsp_header_t* space_header, /* in: space header */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - fseg_inode_t* inode; - page_t* page; - ulint page_no; - ulint space; - ulint i; - - space = buf_frame_get_space_id(space_header); - - page_no = fsp_alloc_free_page(space, 0, mtr); - - if (page_no == FIL_NULL) { - - return(FALSE); - } - - page = buf_page_get(space, page_no, RW_X_LATCH, mtr); - - buf_block_align(page)->check_index_page_at_flush = FALSE; - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_INODE, - MLOG_2BYTES, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - for (i = 0; i < FSP_SEG_INODES_PER_PAGE; i++) { - - inode = fsp_seg_inode_page_get_nth_inode(page, i, mtr); - - mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr); - } - - flst_add_last(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - return(TRUE); -} - -/************************************************************************** -Allocates a new file segment inode. */ -static -fseg_inode_t* -fsp_alloc_seg_inode( -/*================*/ - /* out: segment inode, or NULL if - not enough space */ - fsp_header_t* space_header, /* in: space header */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint page_no; - page_t* page; - fseg_inode_t* inode; - ibool success; - ulint n; - - if (flst_get_len(space_header + FSP_SEG_INODES_FREE, mtr) == 0) { - /* Allocate a new segment inode page */ - - success = fsp_alloc_seg_inode_page(space_header, mtr); - - if (!success) { - - return(NULL); - } - } - - page_no = flst_get_first(space_header + FSP_SEG_INODES_FREE, mtr).page; - - page = buf_page_get(buf_frame_get_space_id(space_header), page_no, - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - n = fsp_seg_inode_page_find_free(page, 0, mtr); - - ut_a(n != ULINT_UNDEFINED); - - inode = fsp_seg_inode_page_get_nth_inode(page, n, mtr); - - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, n + 1, - mtr)) { - /* There are no other unused headers left on the page: move it - to another list */ - - flst_remove(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - - flst_add_last(space_header + FSP_SEG_INODES_FULL, - page + FSEG_INODE_PAGE_NODE, mtr); - } - - return(inode); -} - -/************************************************************************** -Frees a file segment inode. */ -static -void -fsp_free_seg_inode( -/*===============*/ - ulint space, /* in: space id */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - page_t* page; - fsp_header_t* space_header; - - page = buf_frame_align(inode); - - space_header = fsp_get_space_header(space, mtr); - - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_free(page, 0, mtr)) { - - /* Move the page to another list */ - - flst_remove(space_header + FSP_SEG_INODES_FULL, - page + FSEG_INODE_PAGE_NODE, mtr); - - flst_add_last(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - } - - mlog_write_dulint(inode + FSEG_ID, ut_dulint_zero, mtr); - mlog_write_ulint(inode + FSEG_MAGIC_N, 0, MLOG_4BYTES, mtr); - - if (ULINT_UNDEFINED == fsp_seg_inode_page_find_used(page, mtr)) { - - /* There are no other used headers left on the page: free it */ - - flst_remove(space_header + FSP_SEG_INODES_FREE, - page + FSEG_INODE_PAGE_NODE, mtr); - - fsp_free_page(space, buf_frame_get_page_no(page), mtr); - } -} - -/************************************************************************** -Returns the file segment inode, page x-latched. */ -static -fseg_inode_t* -fseg_inode_get( -/*===========*/ - /* out: segment inode, page x-latched */ - fseg_header_t* header, /* in: segment header */ - mtr_t* mtr) /* in: mtr handle */ -{ - fil_addr_t inode_addr; - fseg_inode_t* inode; - - inode_addr.page = mach_read_from_4(header + FSEG_HDR_PAGE_NO); - inode_addr.boffset = mach_read_from_2(header + FSEG_HDR_OFFSET); - - inode = fut_get_ptr(mach_read_from_4(header + FSEG_HDR_SPACE), - inode_addr, RW_X_LATCH, mtr); - - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - return(inode); -} - -/************************************************************************** -Gets the page number from the nth fragment page slot. */ -UNIV_INLINE -ulint -fseg_get_nth_frag_page_no( -/*======================*/ - /* out: page number, FIL_NULL if not in use */ - fseg_inode_t* inode, /* in: segment inode */ - ulint n, /* in: slot index */ - mtr_t* mtr __attribute__((unused))) /* in: mtr handle */ -{ - ut_ad(inode && mtr); - ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); - ut_ad(mtr_memo_contains(mtr, buf_block_align(inode), - MTR_MEMO_PAGE_X_FIX)); - return(mach_read_from_4(inode + FSEG_FRAG_ARR - + n * FSEG_FRAG_SLOT_SIZE)); -} - -/************************************************************************** -Sets the page number in the nth fragment page slot. */ -UNIV_INLINE -void -fseg_set_nth_frag_page_no( -/*======================*/ - fseg_inode_t* inode, /* in: segment inode */ - ulint n, /* in: slot index */ - ulint page_no,/* in: page number to set */ - mtr_t* mtr) /* in: mtr handle */ -{ - ut_ad(inode && mtr); - ut_ad(n < FSEG_FRAG_ARR_N_SLOTS); - ut_ad(mtr_memo_contains(mtr, buf_block_align(inode), - MTR_MEMO_PAGE_X_FIX)); - - mlog_write_ulint(inode + FSEG_FRAG_ARR + n * FSEG_FRAG_SLOT_SIZE, - page_no, MLOG_4BYTES, mtr); -} - -/************************************************************************** -Finds a fragment page slot which is free. */ -static -ulint -fseg_find_free_frag_page_slot( -/*==========================*/ - /* out: slot index; ULINT_UNDEFINED if none - found */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint i; - ulint page_no; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - page_no = fseg_get_nth_frag_page_no(inode, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Finds a fragment page slot which is used and last in the array. */ -static -ulint -fseg_find_last_used_frag_page_slot( -/*===============================*/ - /* out: slot index; ULINT_UNDEFINED if none - found */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint i; - ulint page_no; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - page_no = fseg_get_nth_frag_page_no( - inode, FSEG_FRAG_ARR_N_SLOTS - i - 1, mtr); - - if (page_no != FIL_NULL) { - - return(FSEG_FRAG_ARR_N_SLOTS - i - 1); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Calculates reserved fragment page slots. */ -static -ulint -fseg_get_n_frag_pages( -/*==================*/ - /* out: number of fragment pages */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint i; - ulint count = 0; - - ut_ad(inode && mtr); - - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - if (FIL_NULL != fseg_get_nth_frag_page_no(inode, i, mtr)) { - count++; - } - } - - return(count); -} - -/************************************************************************** -Creates a new segment. */ - -page_t* -fseg_create_general( -/*================*/ - /* out: the page where the segment header is placed, - x-latched, NULL if could not create segment - because of lack of space */ - ulint space, /* in: space id */ - ulint page, /* in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /* in: byte offset of the created segment header - on the page */ - ibool has_done_reservation, /* in: TRUE if the caller has already - done the reservation for the pages with - fsp_reserve_free_extents (at least 2 extents: one for - the inode and the other for the segment) then there is - no need to do the check for this individual - operation */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_header_t* space_header; - fseg_inode_t* inode; - dulint seg_id; - fseg_header_t* header = 0; /* remove warning */ - rw_lock_t* latch; - ibool success; - ulint n_reserved; - page_t* ret = NULL; - ulint i; - - ut_ad(mtr); - - if (page != 0) { - header = byte_offset + buf_page_get(space, page, RW_X_LATCH, - mtr); - } - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - latch = fil_space_get_latch(space); - - mtr_x_lock(latch, mtr); - - if (rw_lock_get_x_lock_count(latch) == 1) { - /* This thread did not own the latch before this call: free - excess pages from the insert buffer free list */ - - if (space == 0) { - ibuf_free_excess_pages(space); - } - } - - if (!has_done_reservation) { - success = fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr); - if (!success) { - return(NULL); - } - } - - space_header = fsp_get_space_header(space, mtr); - - inode = fsp_alloc_seg_inode(space_header, mtr); - - if (inode == NULL) { - - goto funct_exit; - } - - /* Read the next segment id from space header and increment the - value in space header */ - - seg_id = mtr_read_dulint(space_header + FSP_SEG_ID, mtr); - - mlog_write_dulint(space_header + FSP_SEG_ID, ut_dulint_add(seg_id, 1), - mtr); - - mlog_write_dulint(inode + FSEG_ID, seg_id, mtr); - mlog_write_ulint(inode + FSEG_NOT_FULL_N_USED, 0, MLOG_4BYTES, mtr); - - flst_init(inode + FSEG_FREE, mtr); - flst_init(inode + FSEG_NOT_FULL, mtr); - flst_init(inode + FSEG_FULL, mtr); - - mlog_write_ulint(inode + FSEG_MAGIC_N, FSEG_MAGIC_N_VALUE, - MLOG_4BYTES, mtr); - for (i = 0; i < FSEG_FRAG_ARR_N_SLOTS; i++) { - fseg_set_nth_frag_page_no(inode, i, FIL_NULL, mtr); - } - - if (page == 0) { - page = fseg_alloc_free_page_low(space, inode, 0, FSP_UP, mtr); - - if (page == FIL_NULL) { - - fsp_free_seg_inode(space, inode, mtr); - - goto funct_exit; - } - - header = byte_offset - + buf_page_get(space, page, RW_X_LATCH, mtr); - mlog_write_ulint(header - byte_offset + FIL_PAGE_TYPE, - FIL_PAGE_TYPE_SYS, MLOG_2BYTES, mtr); - } - - mlog_write_ulint(header + FSEG_HDR_OFFSET, - inode - buf_frame_align(inode), MLOG_2BYTES, mtr); - - mlog_write_ulint(header + FSEG_HDR_PAGE_NO, - buf_frame_get_page_no(inode), MLOG_4BYTES, mtr); - - mlog_write_ulint(header + FSEG_HDR_SPACE, space, MLOG_4BYTES, mtr); - - ret = buf_frame_align(header); - -funct_exit: - if (!has_done_reservation) { - - fil_space_release_free_extents(space, n_reserved); - } - - return(ret); -} - -/************************************************************************** -Creates a new segment. */ - -page_t* -fseg_create( -/*========*/ - /* out: the page where the segment header is placed, - x-latched, NULL if could not create segment - because of lack of space */ - ulint space, /* in: space id */ - ulint page, /* in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /* in: byte offset of the created segment header - on the page */ - mtr_t* mtr) /* in: mtr */ -{ - return(fseg_create_general(space, page, byte_offset, FALSE, mtr)); -} - -/************************************************************************** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. */ -static -ulint -fseg_n_reserved_pages_low( -/*======================*/ - /* out: number of reserved pages */ - fseg_inode_t* inode, /* in: segment inode */ - ulint* used, /* out: number of pages used (<= reserved) */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint ret; - - ut_ad(inode && used && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(inode), - MTR_MEMO_PAGE_X_FIX)); - - *used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr) - + fseg_get_n_frag_pages(inode, mtr); - - ret = fseg_get_n_frag_pages(inode, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FREE, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_NOT_FULL, mtr) - + FSP_EXTENT_SIZE * flst_get_len(inode + FSEG_FULL, mtr); - - return(ret); -} - -/************************************************************************** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. */ - -ulint -fseg_n_reserved_pages( -/*==================*/ - /* out: number of reserved pages */ - fseg_header_t* header, /* in: segment header */ - ulint* used, /* out: number of pages used (<= reserved) */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint ret; - fseg_inode_t* inode; - ulint space; - - space = buf_frame_get_space_id(header); - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - inode = fseg_inode_get(header, mtr); - - ret = fseg_n_reserved_pages_low(inode, used, mtr); - - return(ret); -} - -/************************************************************************* -Tries to fill the free list of a segment with consecutive free extents. -This happens if the segment is big enough to allow extents in the free list, -the free list is empty, and the extents can be allocated consecutively from -the hint onward. */ -static -void -fseg_fill_free_list( -/*================*/ - fseg_inode_t* inode, /* in: segment inode */ - ulint space, /* in: space id */ - ulint hint, /* in: hint which extent would be good as - the first extent */ - mtr_t* mtr) /* in: mtr */ -{ - xdes_t* descr; - ulint i; - dulint seg_id; - ulint reserved; - ulint used; - - ut_ad(inode && mtr); - - reserved = fseg_n_reserved_pages_low(inode, &used, mtr); - - if (reserved < FSEG_FREE_LIST_LIMIT * FSP_EXTENT_SIZE) { - - /* The segment is too small to allow extents in free list */ - - return; - } - - if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - /* Free list is not empty */ - - return; - } - - for (i = 0; i < FSEG_FREE_LIST_MAX_LEN; i++) { - descr = xdes_get_descriptor(space, hint, mtr); - - if ((descr == NULL) - || (XDES_FREE != xdes_get_state(descr, mtr))) { - - /* We cannot allocate the desired extent: stop */ - - return; - } - - descr = fsp_alloc_free_extent(space, hint, mtr); - - xdes_set_state(descr, XDES_FSEG, mtr); - - seg_id = mtr_read_dulint(inode + FSEG_ID, mtr); - mlog_write_dulint(descr + XDES_ID, seg_id, mtr); - - flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr); - hint += FSP_EXTENT_SIZE; - } -} - -/************************************************************************* -Allocates a free extent for the segment: looks first in the free list of the -segment, then tries to allocate from the space free list. NOTE that the extent -returned still resides in the segment free list, it is not yet taken off it! */ -static -xdes_t* -fseg_alloc_free_extent( -/*===================*/ - /* out: allocated extent, still placed in the - segment free list, NULL if could - not be allocated */ - fseg_inode_t* inode, /* in: segment inode */ - ulint space, /* in: space id */ - mtr_t* mtr) /* in: mtr */ -{ - xdes_t* descr; - dulint seg_id; - fil_addr_t first; - - if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - /* Segment free list is not empty, allocate from it */ - - first = flst_get_first(inode + FSEG_FREE, mtr); - - descr = xdes_lst_get_descriptor(space, first, mtr); - } else { - /* Segment free list was empty, allocate from space */ - descr = fsp_alloc_free_extent(space, 0, mtr); - - if (descr == NULL) { - - return(NULL); - } - - seg_id = mtr_read_dulint(inode + FSEG_ID, mtr); - - xdes_set_state(descr, XDES_FSEG, mtr); - mlog_write_dulint(descr + XDES_ID, seg_id, mtr); - flst_add_last(inode + FSEG_FREE, descr + XDES_FLST_NODE, mtr); - - /* Try to fill the segment free list */ - fseg_fill_free_list(inode, space, - xdes_get_offset(descr) + FSP_EXTENT_SIZE, - mtr); - } - - return(descr); -} - -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. */ -static -ulint -fseg_alloc_free_page_low( -/*=====================*/ - /* out: the allocated page number, FIL_NULL - if no page could be allocated */ - ulint space, /* in: space */ - fseg_inode_t* seg_inode, /* in: segment inode */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction, /* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /* in: mtr handle */ -{ - fsp_header_t* space_header; - ulint space_size; - dulint seg_id; - ulint used; - ulint reserved; - xdes_t* descr; /* extent of the hinted page */ - ulint ret_page; /* the allocated page offset, FIL_NULL - if could not be allocated */ - xdes_t* ret_descr; /* the extent of the allocated page */ - page_t* page; - ibool frag_page_allocated = FALSE; - ibool success; - ulint n; - - ut_ad(mtr); - ut_ad((direction >= FSP_UP) && (direction <= FSP_NO_DIR)); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr); - - ut_ad(ut_dulint_cmp(seg_id, ut_dulint_zero) > 0); - - reserved = fseg_n_reserved_pages_low(seg_inode, &used, mtr); - - space_header = fsp_get_space_header(space, mtr); - - descr = xdes_get_descriptor_with_space_hdr(space_header, space, - hint, mtr); - if (descr == NULL) { - /* Hint outside space or too high above free limit: reset - hint */ - hint = 0; - descr = xdes_get_descriptor(space, hint, mtr); - } - - /* In the big if-else below we look for ret_page and ret_descr */ - /*-------------------------------------------------------------*/ - if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, - mtr), seg_id)) - && (xdes_get_bit(descr, XDES_FREE_BIT, - hint % FSP_EXTENT_SIZE, mtr) == TRUE)) { - - /* 1. We can take the hinted page - =================================*/ - ret_descr = descr; - ret_page = hint; - /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FREE) - && ((reserved - used) < reserved / FSEG_FILLFACTOR) - && (used >= FSEG_FRAG_LIMIT)) { - - /* 2. We allocate the free extent from space and can take - ========================================================= - the hinted page - ===============*/ - ret_descr = fsp_alloc_free_extent(space, hint, mtr); - - ut_a(ret_descr == descr); - - xdes_set_state(ret_descr, XDES_FSEG, mtr); - mlog_write_dulint(ret_descr + XDES_ID, seg_id, mtr); - flst_add_last(seg_inode + FSEG_FREE, - ret_descr + XDES_FLST_NODE, mtr); - - /* Try to fill the segment free list */ - fseg_fill_free_list(seg_inode, space, - hint + FSP_EXTENT_SIZE, mtr); - ret_page = hint; - /*-----------------------------------------------------------*/ - } else if ((direction != FSP_NO_DIR) - && ((reserved - used) < reserved / FSEG_FILLFACTOR) - && (used >= FSEG_FRAG_LIMIT) - && (!!(ret_descr - = fseg_alloc_free_extent(seg_inode, space, mtr)))) { - - /* 3. We take any free extent (which was already assigned above - =============================================================== - in the if-condition to ret_descr) and take the lowest or - ======================================================== - highest page in it, depending on the direction - ==============================================*/ - ret_page = xdes_get_offset(ret_descr); - - if (direction == FSP_DOWN) { - ret_page += FSP_EXTENT_SIZE - 1; - } - /*-----------------------------------------------------------*/ - } else if ((xdes_get_state(descr, mtr) == XDES_FSEG) - && (0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, - mtr), seg_id)) - && (!xdes_is_full(descr, mtr))) { - - /* 4. We can take the page from the same extent as the - ====================================================== - hinted page (and the extent already belongs to the - ================================================== - segment) - ========*/ - ret_descr = descr; - ret_page = xdes_get_offset(ret_descr) - + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, - hint % FSP_EXTENT_SIZE, mtr); - /*-----------------------------------------------------------*/ - } else if (reserved - used > 0) { - /* 5. We take any unused page from the segment - ==============================================*/ - fil_addr_t first; - - if (flst_get_len(seg_inode + FSEG_NOT_FULL, mtr) > 0) { - first = flst_get_first(seg_inode + FSEG_NOT_FULL, - mtr); - } else if (flst_get_len(seg_inode + FSEG_FREE, mtr) > 0) { - first = flst_get_first(seg_inode + FSEG_FREE, mtr); - } else { - ut_error; - return(FIL_NULL); - } - - ret_descr = xdes_lst_get_descriptor(space, first, mtr); - ret_page = xdes_get_offset(ret_descr) - + xdes_find_bit(ret_descr, XDES_FREE_BIT, TRUE, - 0, mtr); - /*-----------------------------------------------------------*/ - } else if (used < FSEG_FRAG_LIMIT) { - /* 6. We allocate an individual page from the space - ===================================================*/ - ret_page = fsp_alloc_free_page(space, hint, mtr); - ret_descr = NULL; - - frag_page_allocated = TRUE; - - if (ret_page != FIL_NULL) { - /* Put the page in the fragment page array of the - segment */ - n = fseg_find_free_frag_page_slot(seg_inode, mtr); - ut_a(n != FIL_NULL); - - fseg_set_nth_frag_page_no(seg_inode, n, ret_page, - mtr); - } - /*-----------------------------------------------------------*/ - } else { - /* 7. We allocate a new extent and take its first page - ======================================================*/ - ret_descr = fseg_alloc_free_extent(seg_inode, space, mtr); - - if (ret_descr == NULL) { - ret_page = FIL_NULL; - } else { - ret_page = xdes_get_offset(ret_descr); - } - } - - if (ret_page == FIL_NULL) { - /* Page could not be allocated */ - - return(FIL_NULL); - } - - if (space != 0) { - space_size = fil_space_get_size(space); - - if (space_size <= ret_page) { - /* It must be that we are extending a single-table - tablespace whose size is still < 64 pages */ - - if (ret_page >= FSP_EXTENT_SIZE) { - fprintf(stderr, - "InnoDB: Error (2): trying to extend" - " a single-table tablespace %lu\n" - "InnoDB: by single page(s) though" - " the space size %lu. Page no %lu.\n", - (ulong) space, (ulong) space_size, - (ulong) ret_page); - return(FIL_NULL); - } - - success = fsp_try_extend_data_file_with_pages( - space, ret_page, space_header, mtr); - if (!success) { - /* No disk space left */ - return(FIL_NULL); - } - } - } - - if (!frag_page_allocated) { - /* Initialize the allocated page to buffer pool, so that it - can be obtained immediately with buf_page_get without need - for a disk read */ - - page = buf_page_create(space, ret_page, mtr); - - ut_a(page == buf_page_get(space, ret_page, RW_X_LATCH, mtr)); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_FSP_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - /* The prior contents of the page should be ignored */ - fsp_init_file_page(page, mtr); - - /* At this point we know the extent and the page offset. - The extent is still in the appropriate list (FSEG_NOT_FULL - or FSEG_FREE), and the page is not yet marked as used. */ - - ut_ad(xdes_get_descriptor(space, ret_page, mtr) == ret_descr); - ut_ad(xdes_get_bit(ret_descr, XDES_FREE_BIT, - ret_page % FSP_EXTENT_SIZE, mtr) == TRUE); - - fseg_mark_page_used(seg_inode, space, ret_page, mtr); - } - - buf_reset_check_index_page_at_flush(space, ret_page); - - return(ret_page); -} - -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. */ - -ulint -fseg_alloc_free_page_general( -/*=========================*/ - /* out: allocated page offset, FIL_NULL if no - page could be allocated */ - fseg_header_t* seg_header,/* in: segment header */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction,/* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - ibool has_done_reservation, /* in: TRUE if the caller has - already done the reservation for the page - with fsp_reserve_free_extents, then there - is no need to do the check for this individual - page */ - mtr_t* mtr) /* in: mtr handle */ -{ - fseg_inode_t* inode; - ulint space; - rw_lock_t* latch; - ibool success; - ulint page_no; - ulint n_reserved; - - space = buf_frame_get_space_id(seg_header); - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - latch = fil_space_get_latch(space); - - mtr_x_lock(latch, mtr); - - if (rw_lock_get_x_lock_count(latch) == 1) { - /* This thread did not own the latch before this call: free - excess pages from the insert buffer free list */ - - if (space == 0) { - ibuf_free_excess_pages(space); - } - } - - inode = fseg_inode_get(seg_header, mtr); - - if (!has_done_reservation) { - success = fsp_reserve_free_extents(&n_reserved, space, 2, - FSP_NORMAL, mtr); - if (!success) { - return(FIL_NULL); - } - } - - page_no = fseg_alloc_free_page_low(buf_frame_get_space_id(inode), - inode, hint, direction, mtr); - if (!has_done_reservation) { - fil_space_release_free_extents(space, n_reserved); - } - - return(page_no); -} - -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. */ - -ulint -fseg_alloc_free_page( -/*=================*/ - /* out: allocated page offset, FIL_NULL if no - page could be allocated */ - fseg_header_t* seg_header,/* in: segment header */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction,/* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr) /* in: mtr handle */ -{ - return(fseg_alloc_free_page_general(seg_header, hint, direction, - FALSE, mtr)); -} - -/************************************************************************** -Checks that we have at least 2 frag pages free in the first extent of a -single-table tablespace, and they are also physically initialized to the data -file. That is we have already extended the data file so that those pages are -inside the data file. If not, this function extends the tablespace with -pages. */ -static -ibool -fsp_reserve_free_pages( -/*===================*/ - /* out: TRUE if there were >= 3 free - pages, or we were able to extend */ - ulint space, /* in: space id, must be != 0 */ - fsp_header_t* space_header, /* in: header of that space, - x-latched */ - ulint size, /* in: size of the tablespace in pages, - must be < FSP_EXTENT_SIZE / 2 */ - mtr_t* mtr) /* in: mtr */ -{ - xdes_t* descr; - ulint n_used; - - ut_a(space != 0); - ut_a(size < FSP_EXTENT_SIZE / 2); - - descr = xdes_get_descriptor_with_space_hdr(space_header, space, 0, - mtr); - n_used = xdes_get_n_used(descr, mtr); - - ut_a(n_used <= size); - - if (size >= n_used + 2) { - - return(TRUE); - } - - return(fsp_try_extend_data_file_with_pages(space, n_used + 1, - space_header, mtr)); -} - -/************************************************************************** -Reserves free pages from a tablespace. All mini-transactions which may -use several pages from the tablespace should call this function beforehand -and reserve enough free extents so that they certainly will be able -to do their operation, like a B-tree page split, fully. Reservations -must be released with function fil_space_release_free_extents! - -The alloc_type below has the following meaning: FSP_NORMAL means an -operation which will probably result in more space usage, like an -insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are -deleting rows, then this allocation will in the long run result in -less space usage (after a purge); FSP_CLEANING means allocation done -in a physical record delete (like in a purge) or other cleaning operation -which will result in less space usage in the long run. We prefer the latter -two types of allocation: when space is scarce, FSP_NORMAL allocations -will not succeed, but the latter two allocations will succeed, if possible. -The purpose is to avoid dead end where the database is full but the -user cannot free any space because these freeing operations temporarily -reserve some space. - -Single-table tablespaces whose size is < 32 pages are a special case. In this -function we would liberally reserve several 64 page extents for every page -split or merge in a B-tree. But we do not want to waste disk space if the table -only occupies < 32 pages. That is why we apply different rules in that special -case, just ensuring that there are 3 free pages available. */ - -ibool -fsp_reserve_free_extents( -/*=====================*/ - /* out: TRUE if we were able to make the reservation */ - ulint* n_reserved,/* out: number of extents actually reserved; if we - return TRUE and the tablespace size is < 64 pages, - then this can be 0, otherwise it is n_ext */ - ulint space, /* in: space id */ - ulint n_ext, /* in: number of extents to reserve */ - ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ - mtr_t* mtr) /* in: mtr */ -{ - fsp_header_t* space_header; - rw_lock_t* latch; - ulint n_free_list_ext; - ulint free_limit; - ulint size; - ulint n_free; - ulint n_free_up; - ulint reserve; - ibool success; - ulint n_pages_added; - - ut_ad(mtr); - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - *n_reserved = n_ext; - - latch = fil_space_get_latch(space); - - mtr_x_lock(latch, mtr); - - space_header = fsp_get_space_header(space, mtr); -try_again: - size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, mtr); - - if (size < FSP_EXTENT_SIZE / 2) { - /* Use different rules for small single-table tablespaces */ - *n_reserved = 0; - return(fsp_reserve_free_pages(space, space_header, size, mtr)); - } - - n_free_list_ext = flst_get_len(space_header + FSP_FREE, mtr); - - free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, - MLOG_4BYTES, mtr); - - /* Below we play safe when counting free extents above the free limit: - some of them will contain extent descriptor pages, and therefore - will not be free extents */ - - n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; - - if (n_free_up > 0) { - n_free_up--; - n_free_up = n_free_up - n_free_up - / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE); - } - - n_free = n_free_list_ext + n_free_up; - - if (alloc_type == FSP_NORMAL) { - /* We reserve 1 extent + 0.5 % of the space size to undo logs - and 1 extent + 0.5 % to cleaning operations; NOTE: this source - code is duplicated in the function below! */ - - reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200; - - if (n_free <= reserve + n_ext) { - - goto try_to_extend; - } - } else if (alloc_type == FSP_UNDO) { - /* We reserve 0.5 % of the space size to cleaning operations */ - - reserve = 1 + ((size / FSP_EXTENT_SIZE) * 1) / 200; - - if (n_free <= reserve + n_ext) { - - goto try_to_extend; - } - } else { - ut_a(alloc_type == FSP_CLEANING); - } - - success = fil_space_reserve_free_extents(space, n_free, n_ext); - - if (success) { - return(TRUE); - } -try_to_extend: - success = fsp_try_extend_data_file(&n_pages_added, space, - space_header, mtr); - if (success && n_pages_added > 0) { - - goto try_again; - } - - return(FALSE); -} - -/************************************************************************** -This function should be used to get information on how much we still -will be able to insert new data to the database without running out the -tablespace. Only free extents are taken into account and we also subtract -the safety margin required by the above function fsp_reserve_free_extents. */ - -ullint -fsp_get_available_space_in_free_extents( -/*====================================*/ - /* out: available space in kB */ - ulint space) /* in: space id */ -{ - fsp_header_t* space_header; - ulint n_free_list_ext; - ulint free_limit; - ulint size; - ulint n_free; - ulint n_free_up; - ulint reserve; - rw_lock_t* latch; - mtr_t mtr; - - ut_ad(!mutex_own(&kernel_mutex)); - - mtr_start(&mtr); - - latch = fil_space_get_latch(space); - - mtr_x_lock(latch, &mtr); - - space_header = fsp_get_space_header(space, &mtr); - - size = mtr_read_ulint(space_header + FSP_SIZE, MLOG_4BYTES, &mtr); - - n_free_list_ext = flst_get_len(space_header + FSP_FREE, &mtr); - - free_limit = mtr_read_ulint(space_header + FSP_FREE_LIMIT, - MLOG_4BYTES, &mtr); - mtr_commit(&mtr); - - if (size < FSP_EXTENT_SIZE) { - ut_a(space != 0); /* This must be a single-table - tablespace */ - - return(0); /* TODO: count free frag pages and - return a value based on that */ - } - - /* Below we play safe when counting free extents above the free limit: - some of them will contain extent descriptor pages, and therefore - will not be free extents */ - - n_free_up = (size - free_limit) / FSP_EXTENT_SIZE; - - if (n_free_up > 0) { - n_free_up--; - n_free_up = n_free_up - n_free_up - / (XDES_DESCRIBED_PER_PAGE / FSP_EXTENT_SIZE); - } - - n_free = n_free_list_ext + n_free_up; - - /* We reserve 1 extent + 0.5 % of the space size to undo logs - and 1 extent + 0.5 % to cleaning operations; NOTE: this source - code is duplicated in the function above! */ - - reserve = 2 + ((size / FSP_EXTENT_SIZE) * 2) / 200; - - if (reserve > n_free) { - return(0); - } - - return((ullint)(n_free - reserve) - * FSP_EXTENT_SIZE - * (UNIV_PAGE_SIZE / 1024)); -} - -/************************************************************************ -Marks a page used. The page must reside within the extents of the given -segment. */ -static -void -fseg_mark_page_used( -/*================*/ - fseg_inode_t* seg_inode,/* in: segment inode */ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr) /* in: mtr */ -{ - xdes_t* descr; - ulint not_full_n_used; - - ut_ad(seg_inode && mtr); - - descr = xdes_get_descriptor(space, page, mtr); - - ut_ad(mtr_read_ulint(seg_inode + FSEG_ID, MLOG_4BYTES, mtr) - == mtr_read_ulint(descr + XDES_ID, MLOG_4BYTES, mtr)); - - if (xdes_is_free(descr, mtr)) { - /* We move the extent from the free list to the - NOT_FULL list */ - flst_remove(seg_inode + FSEG_FREE, descr + XDES_FLST_NODE, - mtr); - flst_add_last(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - } - - ut_ad(xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr) - == TRUE); - /* We mark the page as used */ - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, FALSE, mtr); - - not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - not_full_n_used++; - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, not_full_n_used, - MLOG_4BYTES, mtr); - if (xdes_is_full(descr, mtr)) { - /* We move the extent from the NOT_FULL list to the - FULL list */ - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - flst_add_last(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - FSP_EXTENT_SIZE, - MLOG_4BYTES, mtr); - } -} - -/************************************************************************** -Frees a single page of a segment. */ -static -void -fseg_free_page_low( -/*===============*/ - fseg_inode_t* seg_inode, /* in: segment inode */ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr) /* in: mtr handle */ -{ - xdes_t* descr; - ulint not_full_n_used; - ulint state; - dulint descr_id; - dulint seg_id; - ulint i; - - ut_ad(seg_inode && mtr); - ut_ad(mach_read_from_4(seg_inode + FSEG_MAGIC_N) - == FSEG_MAGIC_N_VALUE); - - /* Drop search system page hash index if the page is found in - the pool and is hashed */ - - btr_search_drop_page_hash_when_freed(space, page); - - descr = xdes_get_descriptor(space, page, mtr); - - ut_a(descr); - if (xdes_get_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, mtr)) { - fputs("InnoDB: Dump of the tablespace extent descriptor: ", - stderr); - ut_print_buf(stderr, descr, 40); - - fprintf(stderr, "\n" - "InnoDB: Serious error! InnoDB is trying to" - " free page %lu\n" - "InnoDB: though it is already marked as free" - " in the tablespace!\n" - "InnoDB: The tablespace free space info is corrupt.\n" - "InnoDB: You may need to dump your" - " InnoDB tables and recreate the whole\n" - "InnoDB: database!\n", (ulong) page); -crash: - fputs("InnoDB: Please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); - ut_error; - } - - state = xdes_get_state(descr, mtr); - - if (state != XDES_FSEG) { - /* The page is in the fragment pages of the segment */ - - for (i = 0;; i++) { - if (fseg_get_nth_frag_page_no(seg_inode, i, mtr) - == page) { - - fseg_set_nth_frag_page_no(seg_inode, i, - FIL_NULL, mtr); - break; - } - } - - fsp_free_page(space, page, mtr); - - return; - } - - /* If we get here, the page is in some extent of the segment */ - - descr_id = mtr_read_dulint(descr + XDES_ID, mtr); - seg_id = mtr_read_dulint(seg_inode + FSEG_ID, mtr); -#if 0 - fprintf(stderr, - "InnoDB: InnoDB is freeing space %lu page %lu,\n" - "InnoDB: which belongs to descr seg %lu %lu\n" - "InnoDB: segment %lu %lu.\n", - (ulong) space, (ulong) page, - (ulong) ut_dulint_get_high(descr_id), - (ulong) ut_dulint_get_low(descr_id), - (ulong) ut_dulint_get_high(seg_id), - (ulong) ut_dulint_get_low(seg_id)); -#endif /* 0 */ - if (0 != ut_dulint_cmp(descr_id, seg_id)) { - fputs("InnoDB: Dump of the tablespace extent descriptor: ", - stderr); - ut_print_buf(stderr, descr, 40); - fputs("\nInnoDB: Dump of the segment inode: ", stderr); - ut_print_buf(stderr, seg_inode, 40); - putc('\n', stderr); - - fprintf(stderr, - "InnoDB: Serious error: InnoDB is trying to" - " free space %lu page %lu,\n" - "InnoDB: which does not belong to" - " segment %lu %lu but belongs\n" - "InnoDB: to segment %lu %lu.\n", - (ulong) space, (ulong) page, - (ulong) ut_dulint_get_high(descr_id), - (ulong) ut_dulint_get_low(descr_id), - (ulong) ut_dulint_get_high(seg_id), - (ulong) ut_dulint_get_low(seg_id)); - goto crash; - } - - not_full_n_used = mtr_read_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - if (xdes_is_full(descr, mtr)) { - /* The fragment is full: move it to another list */ - flst_remove(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - flst_add_last(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used + FSP_EXTENT_SIZE - 1, - MLOG_4BYTES, mtr); - } else { - ut_a(not_full_n_used > 0); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - 1, MLOG_4BYTES, mtr); - } - - xdes_set_bit(descr, XDES_FREE_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - xdes_set_bit(descr, XDES_CLEAN_BIT, page % FSP_EXTENT_SIZE, TRUE, mtr); - - if (xdes_is_free(descr, mtr)) { - /* The extent has become free: free it to space */ - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - fsp_free_extent(space, page, mtr); - } -} - -/************************************************************************** -Frees a single page of a segment. */ - -void -fseg_free_page( -/*===========*/ - fseg_header_t* seg_header, /* in: segment header */ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr) /* in: mtr handle */ -{ - fseg_inode_t* seg_inode; - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - seg_inode = fseg_inode_get(seg_header, mtr); - - fseg_free_page_low(seg_inode, space, page, mtr); - -#ifdef UNIV_DEBUG_FILE_ACCESSES - buf_page_set_file_page_was_freed(space, page); -#endif -} - -/************************************************************************** -Frees an extent of a segment to the space free list. */ -static -void -fseg_free_extent( -/*=============*/ - fseg_inode_t* seg_inode, /* in: segment inode */ - ulint space, /* in: space id */ - ulint page, /* in: a page in the extent */ - mtr_t* mtr) /* in: mtr handle */ -{ - ulint first_page_in_extent; - xdes_t* descr; - ulint not_full_n_used; - ulint descr_n_used; - ulint i; - - ut_ad(seg_inode && mtr); - - descr = xdes_get_descriptor(space, page, mtr); - - ut_a(xdes_get_state(descr, mtr) == XDES_FSEG); - ut_a(0 == ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, mtr), - mtr_read_dulint(seg_inode + FSEG_ID, mtr))); - - first_page_in_extent = page - (page % FSP_EXTENT_SIZE); - - for (i = 0; i < FSP_EXTENT_SIZE; i++) { - if (FALSE == xdes_get_bit(descr, XDES_FREE_BIT, i, mtr)) { - - /* Drop search system page hash index if the page is - found in the pool and is hashed */ - - btr_search_drop_page_hash_when_freed( - space, first_page_in_extent + i); - } - } - - if (xdes_is_full(descr, mtr)) { - flst_remove(seg_inode + FSEG_FULL, - descr + XDES_FLST_NODE, mtr); - } else if (xdes_is_free(descr, mtr)) { - flst_remove(seg_inode + FSEG_FREE, - descr + XDES_FLST_NODE, mtr); - } else { - flst_remove(seg_inode + FSEG_NOT_FULL, - descr + XDES_FLST_NODE, mtr); - - not_full_n_used = mtr_read_ulint( - seg_inode + FSEG_NOT_FULL_N_USED, MLOG_4BYTES, mtr); - - descr_n_used = xdes_get_n_used(descr, mtr); - ut_a(not_full_n_used >= descr_n_used); - mlog_write_ulint(seg_inode + FSEG_NOT_FULL_N_USED, - not_full_n_used - descr_n_used, - MLOG_4BYTES, mtr); - } - - fsp_free_extent(space, page, mtr); - -#ifdef UNIV_DEBUG_FILE_ACCESSES - for (i = 0; i < FSP_EXTENT_SIZE; i++) { - - buf_page_set_file_page_was_freed(space, - first_page_in_extent + i); - } -#endif -} - -/************************************************************************** -Frees part of a segment. This function can be used to free a segment by -repeatedly calling this function in different mini-transactions. Doing -the freeing in a single mini-transaction might result in too big a -mini-transaction. */ - -ibool -fseg_free_step( -/*===========*/ - /* out: TRUE if freeing completed */ - fseg_header_t* header, /* in, own: segment header; NOTE: if the header - resides on the first page of the frag list - of the segment, this pointer becomes obsolete - after the last freeing step */ - mtr_t* mtr) /* in: mtr */ -{ - ulint n; - ulint page; - xdes_t* descr; - fseg_inode_t* inode; - ulint space; - - space = buf_frame_get_space_id(header); - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - descr = xdes_get_descriptor(space, buf_frame_get_page_no(header), mtr); - - /* Check that the header resides on a page which has not been - freed yet */ - - ut_a(descr); - ut_a(xdes_get_bit(descr, XDES_FREE_BIT, buf_frame_get_page_no(header) - % FSP_EXTENT_SIZE, mtr) == FALSE); - inode = fseg_inode_get(header, mtr); - - descr = fseg_get_first_extent(inode, mtr); - - if (descr != NULL) { - /* Free the extent held by the segment */ - page = xdes_get_offset(descr); - - fseg_free_extent(inode, space, page, mtr); - - return(FALSE); - } - - /* Free a frag page */ - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - /* Freeing completed: free the segment inode */ - fsp_free_seg_inode(space, inode, mtr); - - return(TRUE); - } - - fseg_free_page_low(inode, space, - fseg_get_nth_frag_page_no(inode, n, mtr), mtr); - - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - /* Freeing completed: free the segment inode */ - fsp_free_seg_inode(space, inode, mtr); - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************** -Frees part of a segment. Differs from fseg_free_step because this function -leaves the header page unfreed. */ - -ibool -fseg_free_step_not_header( -/*======================*/ - /* out: TRUE if freeing completed, except the - header page */ - fseg_header_t* header, /* in: segment header which must reside on - the first fragment page of the segment */ - mtr_t* mtr) /* in: mtr */ -{ - ulint n; - ulint page; - xdes_t* descr; - fseg_inode_t* inode; - ulint space; - ulint page_no; - - space = buf_frame_get_space_id(header); - - ut_ad(!mutex_own(&kernel_mutex) - || mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - inode = fseg_inode_get(header, mtr); - - descr = fseg_get_first_extent(inode, mtr); - - if (descr != NULL) { - /* Free the extent held by the segment */ - page = xdes_get_offset(descr); - - fseg_free_extent(inode, space, page, mtr); - - return(FALSE); - } - - /* Free a frag page */ - - n = fseg_find_last_used_frag_page_slot(inode, mtr); - - if (n == ULINT_UNDEFINED) { - ut_error; - } - - page_no = fseg_get_nth_frag_page_no(inode, n, mtr); - - if (page_no == buf_frame_get_page_no(header)) { - - return(TRUE); - } - - fseg_free_page_low(inode, space, page_no, mtr); - - return(FALSE); -} - -/*********************************************************************** -Frees a segment. The freeing is performed in several mini-transactions, -so that there is no danger of bufferfixing too many buffer pages. */ - -void -fseg_free( -/*======*/ - ulint space, /* in: space id */ - ulint page_no,/* in: page number where the segment header is - placed */ - ulint offset) /* in: byte offset of the segment header on that - page */ -{ - mtr_t mtr; - ibool finished; - fseg_header_t* header; - fil_addr_t addr; - - addr.page = page_no; - addr.boffset = offset; - - for (;;) { - mtr_start(&mtr); - - header = fut_get_ptr(space, addr, RW_X_LATCH, &mtr); - - finished = fseg_free_step(header, &mtr); - - mtr_commit(&mtr); - - if (finished) { - - return; - } - } -} - -/************************************************************************** -Returns the first extent descriptor for a segment. We think of the extent -lists of the segment catenated in the order FSEG_FULL -> FSEG_NOT_FULL --> FSEG_FREE. */ -static -xdes_t* -fseg_get_first_extent( -/*==================*/ - /* out: the first extent descriptor, or NULL if - none */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mtr */ -{ - fil_addr_t first; - ulint space; - xdes_t* descr; - - ut_ad(inode && mtr); - - space = buf_frame_get_space_id(inode); - - first = fil_addr_null; - - if (flst_get_len(inode + FSEG_FULL, mtr) > 0) { - - first = flst_get_first(inode + FSEG_FULL, mtr); - - } else if (flst_get_len(inode + FSEG_NOT_FULL, mtr) > 0) { - - first = flst_get_first(inode + FSEG_NOT_FULL, mtr); - - } else if (flst_get_len(inode + FSEG_FREE, mtr) > 0) { - - first = flst_get_first(inode + FSEG_FREE, mtr); - } - - if (first.page == FIL_NULL) { - - return(NULL); - } - descr = xdes_lst_get_descriptor(space, first, mtr); - - return(descr); -} - -/*********************************************************************** -Validates a segment. */ -static -ibool -fseg_validate_low( -/*==============*/ - /* out: TRUE if ok */ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr2) /* in: mtr */ -{ - ulint space; - dulint seg_id; - mtr_t mtr; - xdes_t* descr; - fil_addr_t node_addr; - ulint n_used = 0; - ulint n_used2 = 0; - - ut_ad(mtr_memo_contains(mtr2, buf_block_align(inode), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mach_read_from_4(inode + FSEG_MAGIC_N) == FSEG_MAGIC_N_VALUE); - - space = buf_frame_get_space_id(inode); - - seg_id = mtr_read_dulint(inode + FSEG_ID, mtr2); - n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr2); - flst_validate(inode + FSEG_FREE, mtr2); - flst_validate(inode + FSEG_NOT_FULL, mtr2); - flst_validate(inode + FSEG_FULL, mtr2); - - /* Validate FSEG_FREE list */ - node_addr = flst_get_first(inode + FSEG_FREE, mtr2); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == 0); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr), - seg_id)); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSEG_NOT_FULL list */ - - node_addr = flst_get_first(inode + FSEG_NOT_FULL, mtr2); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) > 0); - ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr), - seg_id)); - - n_used2 += xdes_get_n_used(descr, &mtr); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSEG_FULL list */ - - node_addr = flst_get_first(inode + FSEG_FULL, mtr2); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FSEG); - ut_a(!ut_dulint_cmp(mtr_read_dulint(descr + XDES_ID, &mtr), - seg_id)); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - ut_a(n_used == n_used2); - - return(TRUE); -} - -/*********************************************************************** -Validates a segment. */ - -ibool -fseg_validate( -/*==========*/ - /* out: TRUE if ok */ - fseg_header_t* header, /* in: segment header */ - mtr_t* mtr2) /* in: mtr */ -{ - fseg_inode_t* inode; - ibool ret; - ulint space; - - space = buf_frame_get_space_id(header); - - mtr_x_lock(fil_space_get_latch(space), mtr2); - - inode = fseg_inode_get(header, mtr2); - - ret = fseg_validate_low(inode, mtr2); - - return(ret); -} - -/*********************************************************************** -Writes info of a segment. */ -static -void -fseg_print_low( -/*===========*/ - fseg_inode_t* inode, /* in: segment inode */ - mtr_t* mtr) /* in: mtr */ -{ - ulint space; - ulint seg_id_low; - ulint seg_id_high; - ulint n_used; - ulint n_frag; - ulint n_free; - ulint n_not_full; - ulint n_full; - ulint reserved; - ulint used; - ulint page_no; - dulint d_var; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(inode), - MTR_MEMO_PAGE_X_FIX)); - space = buf_frame_get_space_id(inode); - page_no = buf_frame_get_page_no(inode); - - reserved = fseg_n_reserved_pages_low(inode, &used, mtr); - - d_var = mtr_read_dulint(inode + FSEG_ID, mtr); - - seg_id_low = ut_dulint_get_low(d_var); - seg_id_high = ut_dulint_get_high(d_var); - - n_used = mtr_read_ulint(inode + FSEG_NOT_FULL_N_USED, - MLOG_4BYTES, mtr); - n_frag = fseg_get_n_frag_pages(inode, mtr); - n_free = flst_get_len(inode + FSEG_FREE, mtr); - n_not_full = flst_get_len(inode + FSEG_NOT_FULL, mtr); - n_full = flst_get_len(inode + FSEG_FULL, mtr); - - fprintf(stderr, - "SEGMENT id %lu %lu space %lu; page %lu;" - " res %lu used %lu; full ext %lu\n" - "fragm pages %lu; free extents %lu;" - " not full extents %lu: pages %lu\n", - (ulong) seg_id_high, (ulong) seg_id_low, - (ulong) space, (ulong) page_no, - (ulong) reserved, (ulong) used, (ulong) n_full, - (ulong) n_frag, (ulong) n_free, (ulong) n_not_full, - (ulong) n_used); -} - -/*********************************************************************** -Writes info of a segment. */ - -void -fseg_print( -/*=======*/ - fseg_header_t* header, /* in: segment header */ - mtr_t* mtr) /* in: mtr */ -{ - fseg_inode_t* inode; - ulint space; - - space = buf_frame_get_space_id(header); - - mtr_x_lock(fil_space_get_latch(space), mtr); - - inode = fseg_inode_get(header, mtr); - - fseg_print_low(inode, mtr); -} - -/*********************************************************************** -Validates the file space system and its segments. */ - -ibool -fsp_validate( -/*=========*/ - /* out: TRUE if ok */ - ulint space) /* in: space id */ -{ - fsp_header_t* header; - fseg_inode_t* seg_inode; - page_t* seg_inode_page; - ulint size; - ulint free_limit; - ulint frag_n_used; - mtr_t mtr; - mtr_t mtr2; - xdes_t* descr; - fil_addr_t node_addr; - fil_addr_t next_node_addr; - ulint descr_count = 0; - ulint n_used = 0; - ulint n_used2 = 0; - ulint n_full_frag_pages; - ulint n; - ulint seg_inode_len_free; - ulint seg_inode_len_full; - - /* Start first a mini-transaction mtr2 to lock out all other threads - from the fsp system */ - mtr_start(&mtr2); - mtr_x_lock(fil_space_get_latch(space), &mtr2); - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, - MLOG_4BYTES, &mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, - MLOG_4BYTES, &mtr); - - n_full_frag_pages = FSP_EXTENT_SIZE - * flst_get_len(header + FSP_FULL_FRAG, &mtr); - - if (UNIV_UNLIKELY(free_limit > size)) { - - ut_a(space != 0); - ut_a(size < FSP_EXTENT_SIZE); - } - - flst_validate(header + FSP_FREE, &mtr); - flst_validate(header + FSP_FREE_FRAG, &mtr); - flst_validate(header + FSP_FULL_FRAG, &mtr); - - mtr_commit(&mtr); - - /* Validate FSP_FREE list */ - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - node_addr = flst_get_first(header + FSP_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == 0); - ut_a(xdes_get_state(descr, &mtr) == XDES_FREE); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate FSP_FREE_FRAG list */ - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - node_addr = flst_get_first(header + FSP_FREE_FRAG, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) > 0); - ut_a(xdes_get_n_used(descr, &mtr) < FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FREE_FRAG); - - n_used += xdes_get_n_used(descr, &mtr); - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - - mtr_commit(&mtr); - } - - /* Validate FSP_FULL_FRAG list */ - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - node_addr = flst_get_first(header + FSP_FULL_FRAG, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - descr_count++; - descr = xdes_lst_get_descriptor(space, node_addr, &mtr); - - ut_a(xdes_get_n_used(descr, &mtr) == FSP_EXTENT_SIZE); - ut_a(xdes_get_state(descr, &mtr) == XDES_FULL_FRAG); - - node_addr = flst_get_next_addr(descr + XDES_FLST_NODE, &mtr); - mtr_commit(&mtr); - } - - /* Validate segments */ - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr); - - seg_inode_len_full = flst_get_len(header + FSP_SEG_INODES_FULL, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) { - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - seg_inode_page = fut_get_ptr( - space, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, &mtr); - ut_a(ut_dulint_cmp( - mach_read_from_8(seg_inode + FSEG_ID), - ut_dulint_zero) != 0); - fseg_validate_low(seg_inode, &mtr); - - descr_count += flst_get_len(seg_inode + FSEG_FREE, - &mtr); - descr_count += flst_get_len(seg_inode + FSEG_FULL, - &mtr); - descr_count += flst_get_len(seg_inode + FSEG_NOT_FULL, - &mtr); - - n_used2 += fseg_get_n_frag_pages(seg_inode, &mtr); - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } - - node_addr = next_node_addr; - } - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr); - - seg_inode_len_free = flst_get_len(header + FSP_SEG_INODES_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) { - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - seg_inode_page = fut_get_ptr( - space, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, &mtr); - if (ut_dulint_cmp( - mach_read_from_8(seg_inode + FSEG_ID), - ut_dulint_zero) != 0) { - fseg_validate_low(seg_inode, &mtr); - - descr_count += flst_get_len( - seg_inode + FSEG_FREE, &mtr); - descr_count += flst_get_len( - seg_inode + FSEG_FULL, &mtr); - descr_count += flst_get_len( - seg_inode + FSEG_NOT_FULL, &mtr); - n_used2 += fseg_get_n_frag_pages( - seg_inode, &mtr); - } - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } - - node_addr = next_node_addr; - } - - ut_a(descr_count * FSP_EXTENT_SIZE == free_limit); - ut_a(n_used + n_full_frag_pages - == n_used2 + 2* ((free_limit + XDES_DESCRIBED_PER_PAGE - 1) - / XDES_DESCRIBED_PER_PAGE) - + seg_inode_len_full + seg_inode_len_free); - ut_a(frag_n_used == n_used); - - mtr_commit(&mtr2); - - return(TRUE); -} - -/*********************************************************************** -Prints info of a file space. */ - -void -fsp_print( -/*======*/ - ulint space) /* in: space id */ -{ - fsp_header_t* header; - fseg_inode_t* seg_inode; - page_t* seg_inode_page; - ulint size; - ulint free_limit; - ulint frag_n_used; - fil_addr_t node_addr; - fil_addr_t next_node_addr; - ulint n_free; - ulint n_free_frag; - ulint n_full_frag; - ulint seg_id_low; - ulint seg_id_high; - ulint n; - ulint n_segs = 0; - dulint d_var; - mtr_t mtr; - mtr_t mtr2; - - /* Start first a mini-transaction mtr2 to lock out all other threads - from the fsp system */ - - mtr_start(&mtr2); - - mtr_x_lock(fil_space_get_latch(space), &mtr2); - - mtr_start(&mtr); - - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - size = mtr_read_ulint(header + FSP_SIZE, MLOG_4BYTES, &mtr); - - free_limit = mtr_read_ulint(header + FSP_FREE_LIMIT, MLOG_4BYTES, - &mtr); - frag_n_used = mtr_read_ulint(header + FSP_FRAG_N_USED, MLOG_4BYTES, - &mtr); - n_free = flst_get_len(header + FSP_FREE, &mtr); - n_free_frag = flst_get_len(header + FSP_FREE_FRAG, &mtr); - n_full_frag = flst_get_len(header + FSP_FULL_FRAG, &mtr); - - d_var = mtr_read_dulint(header + FSP_SEG_ID, &mtr); - - seg_id_low = ut_dulint_get_low(d_var); - seg_id_high = ut_dulint_get_high(d_var); - - fprintf(stderr, - "FILE SPACE INFO: id %lu\n" - "size %lu, free limit %lu, free extents %lu\n" - "not full frag extents %lu: used pages %lu," - " full frag extents %lu\n" - "first seg id not used %lu %lu\n", - (long) space, - (ulong) size, (ulong) free_limit, (ulong) n_free, - (ulong) n_free_frag, (ulong) frag_n_used, (ulong) n_full_frag, - (ulong) seg_id_high, (ulong) seg_id_low); - - mtr_commit(&mtr); - - /* Print segments */ - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FULL, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) { - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - seg_inode_page = fut_get_ptr( - space, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, &mtr); - ut_a(ut_dulint_cmp( - mach_read_from_8(seg_inode + FSEG_ID), - ut_dulint_zero) != 0); - fseg_print_low(seg_inode, &mtr); - - n_segs++; - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } - - node_addr = next_node_addr; - } - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header = fsp_get_space_header(space, &mtr); - - node_addr = flst_get_first(header + FSP_SEG_INODES_FREE, &mtr); - - mtr_commit(&mtr); - - while (!fil_addr_is_null(node_addr)) { - - for (n = 0; n < FSP_SEG_INODES_PER_PAGE; n++) { - - mtr_start(&mtr); - mtr_x_lock(fil_space_get_latch(space), &mtr); - - seg_inode_page = fut_get_ptr( - space, node_addr, RW_X_LATCH, &mtr) - - FSEG_INODE_PAGE_NODE; - - seg_inode = fsp_seg_inode_page_get_nth_inode( - seg_inode_page, n, &mtr); - if (ut_dulint_cmp( - mach_read_from_8(seg_inode + FSEG_ID), - ut_dulint_zero) != 0) { - - fseg_print_low(seg_inode, &mtr); - n_segs++; - } - - next_node_addr = flst_get_next_addr( - seg_inode_page + FSEG_INODE_PAGE_NODE, &mtr); - mtr_commit(&mtr); - } - - node_addr = next_node_addr; - } - - mtr_commit(&mtr2); - - fprintf(stderr, "NUMBER of file segments: %lu\n", (ulong) n_segs); -} diff --git a/storage/innobase/fut/fut0fut.c b/storage/innobase/fut/fut0fut.c deleted file mode 100644 index 7f7a8fa39e7..00000000000 --- a/storage/innobase/fut/fut0fut.c +++ /dev/null @@ -1,14 +0,0 @@ -/********************************************************************** -File-based utilities - -(c) 1995 Innobase Oy - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0fut.h" - -#ifdef UNIV_NONINL -#include "fut0fut.ic" -#endif - diff --git a/storage/innobase/fut/fut0lst.c b/storage/innobase/fut/fut0lst.c deleted file mode 100644 index 75fa8bf5552..00000000000 --- a/storage/innobase/fut/fut0lst.c +++ /dev/null @@ -1,518 +0,0 @@ -/********************************************************************** -File-based list utilities - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0lst.h" - -#ifdef UNIV_NONINL -#include "fut0lst.ic" -#endif - -#include "buf0buf.h" - - -/************************************************************************ -Adds a node to an empty list. */ -static -void -flst_add_to_empty( -/*==============*/ - flst_base_node_t* base, /* in: pointer to base node of - empty list */ - flst_node_t* node, /* in: node to add */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node), - MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - ut_a(len == 0); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* Update first and last fields of base node */ - flst_write_addr(base + FLST_FIRST, node_addr, mtr); - flst_write_addr(base + FLST_LAST, node_addr, mtr); - - /* Set prev and next fields of node to add */ - flst_write_addr(node + FLST_PREV, fil_addr_null, mtr); - flst_write_addr(node + FLST_NEXT, fil_addr_null, mtr); - - /* Update len of base node */ - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Adds a node as the last node in a list. */ - -void -flst_add_last( -/*==========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node, /* in: node to add */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - fil_addr_t last_addr; - flst_node_t* last_node; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node), - MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - last_addr = flst_get_last(base, mtr); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* If the list is not empty, call flst_insert_after */ - if (len != 0) { - if (last_addr.page == node_addr.page) { - last_node = buf_frame_align(node) + last_addr.boffset; - } else { - last_node = fut_get_ptr(space, last_addr, RW_X_LATCH, - mtr); - } - - flst_insert_after(base, last_node, node, mtr); - } else { - /* else call flst_add_to_empty */ - flst_add_to_empty(base, node, mtr); - } -} - -/************************************************************************ -Adds a node as the first node in a list. */ - -void -flst_add_first( -/*===========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node, /* in: node to add */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node_addr; - ulint len; - fil_addr_t first_addr; - flst_node_t* first_node; - - ut_ad(mtr && base && node); - ut_ad(base != node); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node), - MTR_MEMO_PAGE_X_FIX)); - len = flst_get_len(base, mtr); - first_addr = flst_get_first(base, mtr); - - buf_ptr_get_fsp_addr(node, &space, &node_addr); - - /* If the list is not empty, call flst_insert_before */ - if (len != 0) { - if (first_addr.page == node_addr.page) { - first_node = buf_frame_align(node) - + first_addr.boffset; - } else { - first_node = fut_get_ptr(space, first_addr, - RW_X_LATCH, mtr); - } - - flst_insert_before(base, node, first_node, mtr); - } else { - /* else call flst_add_to_empty */ - flst_add_to_empty(base, node, mtr); - } -} - -/************************************************************************ -Inserts a node after another in a list. */ - -void -flst_insert_after( -/*==============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node1, /* in: node to insert after */ - flst_node_t* node2, /* in: node to add */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - flst_node_t* node3; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node1 && node2 && base); - ut_ad(base != node1); - ut_ad(base != node2); - ut_ad(node2 != node1); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node1), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node2), - MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node1, &space, &node1_addr); - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - - node3_addr = flst_get_next_addr(node1, mtr); - - /* Set prev and next fields of node2 */ - flst_write_addr(node2 + FLST_PREV, node1_addr, mtr); - flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr); - - if (!fil_addr_is_null(node3_addr)) { - /* Update prev field of node3 */ - node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, mtr); - flst_write_addr(node3 + FLST_PREV, node2_addr, mtr); - } else { - /* node1 was last in list: update last field in base */ - flst_write_addr(base + FLST_LAST, node2_addr, mtr); - } - - /* Set next field of node1 */ - flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Inserts a node before another in a list. */ - -void -flst_insert_before( -/*===============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: node to insert */ - flst_node_t* node3, /* in: node to insert before */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - flst_node_t* node1; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node2 && node3 && base); - ut_ad(base != node2); - ut_ad(base != node3); - ut_ad(node2 != node3); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node2), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node3), - MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - buf_ptr_get_fsp_addr(node3, &space, &node3_addr); - - node1_addr = flst_get_prev_addr(node3, mtr); - - /* Set prev and next fields of node2 */ - flst_write_addr(node2 + FLST_PREV, node1_addr, mtr); - flst_write_addr(node2 + FLST_NEXT, node3_addr, mtr); - - if (!fil_addr_is_null(node1_addr)) { - /* Update next field of node1 */ - node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, mtr); - flst_write_addr(node1 + FLST_NEXT, node2_addr, mtr); - } else { - /* node3 was first in list: update first field in base */ - flst_write_addr(base + FLST_FIRST, node2_addr, mtr); - } - - /* Set prev field of node3 */ - flst_write_addr(node3 + FLST_PREV, node2_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - mlog_write_ulint(base + FLST_LEN, len + 1, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Removes a node. */ - -void -flst_remove( -/*========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: node to remove */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - flst_node_t* node1; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - flst_node_t* node3; - fil_addr_t node3_addr; - ulint len; - - ut_ad(mtr && node2 && base); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node2), - MTR_MEMO_PAGE_X_FIX)); - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - - node1_addr = flst_get_prev_addr(node2, mtr); - node3_addr = flst_get_next_addr(node2, mtr); - - if (!fil_addr_is_null(node1_addr)) { - - /* Update next field of node1 */ - - if (node1_addr.page == node2_addr.page) { - - node1 = buf_frame_align(node2) + node1_addr.boffset; - } else { - node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, - mtr); - } - - ut_ad(node1 != node2); - - flst_write_addr(node1 + FLST_NEXT, node3_addr, mtr); - } else { - /* node2 was first in list: update first field in base */ - flst_write_addr(base + FLST_FIRST, node3_addr, mtr); - } - - if (!fil_addr_is_null(node3_addr)) { - /* Update prev field of node3 */ - - if (node3_addr.page == node2_addr.page) { - - node3 = buf_frame_align(node2) + node3_addr.boffset; - } else { - node3 = fut_get_ptr(space, node3_addr, RW_X_LATCH, - mtr); - } - - ut_ad(node2 != node3); - - flst_write_addr(node3 + FLST_PREV, node1_addr, mtr); - } else { - /* node2 was last in list: update last field in base */ - flst_write_addr(base + FLST_LAST, node1_addr, mtr); - } - - /* Update len of base node */ - len = flst_get_len(base, mtr); - ut_ad(len > 0); - - mlog_write_ulint(base + FLST_LEN, len - 1, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Cuts off the tail of the list, including the node given. The number of -nodes which will be removed must be provided by the caller, as this function -does not measure the length of the tail. */ - -void -flst_cut_end( -/*=========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: first node to remove */ - ulint n_nodes,/* in: number of nodes to remove, - must be >= 1 */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint space; - flst_node_t* node1; - fil_addr_t node1_addr; - fil_addr_t node2_addr; - ulint len; - - ut_ad(mtr && node2 && base); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node2), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(n_nodes > 0); - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - - node1_addr = flst_get_prev_addr(node2, mtr); - - if (!fil_addr_is_null(node1_addr)) { - - /* Update next field of node1 */ - - if (node1_addr.page == node2_addr.page) { - - node1 = buf_frame_align(node2) + node1_addr.boffset; - } else { - node1 = fut_get_ptr(space, node1_addr, RW_X_LATCH, - mtr); - } - - flst_write_addr(node1 + FLST_NEXT, fil_addr_null, mtr); - } else { - /* node2 was first in list: update the field in base */ - flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); - } - - flst_write_addr(base + FLST_LAST, node1_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - ut_ad(len >= n_nodes); - - mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Cuts off the tail of the list, not including the given node. The number of -nodes which will be removed must be provided by the caller, as this function -does not measure the length of the tail. */ - -void -flst_truncate_end( -/*==============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: first node not to remove */ - ulint n_nodes,/* in: number of nodes to remove */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - fil_addr_t node2_addr; - ulint len; - ulint space; - - ut_ad(mtr && node2 && base); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(node2), - MTR_MEMO_PAGE_X_FIX)); - if (n_nodes == 0) { - - ut_ad(fil_addr_is_null(flst_get_next_addr(node2, mtr))); - - return; - } - - buf_ptr_get_fsp_addr(node2, &space, &node2_addr); - - /* Update next field of node2 */ - flst_write_addr(node2 + FLST_NEXT, fil_addr_null, mtr); - - flst_write_addr(base + FLST_LAST, node2_addr, mtr); - - /* Update len of base node */ - len = flst_get_len(base, mtr); - ut_ad(len >= n_nodes); - - mlog_write_ulint(base + FLST_LEN, len - n_nodes, MLOG_4BYTES, mtr); -} - -/************************************************************************ -Validates a file-based list. */ - -ibool -flst_validate( -/*==========*/ - /* out: TRUE if ok */ - flst_base_node_t* base, /* in: pointer to base node of list */ - mtr_t* mtr1) /* in: mtr */ -{ - ulint space; - flst_node_t* node; - fil_addr_t node_addr; - fil_addr_t base_addr; - ulint len; - ulint i; - mtr_t mtr2; - - ut_ad(base); - ut_ad(mtr_memo_contains(mtr1, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - - /* We use two mini-transaction handles: the first is used to - lock the base node, and prevent other threads from modifying the - list. The second is used to traverse the list. We cannot run the - second mtr without committing it at times, because if the list - is long, then the x-locked pages could fill the buffer resulting - in a deadlock. */ - - /* Find out the space id */ - buf_ptr_get_fsp_addr(base, &space, &base_addr); - - len = flst_get_len(base, mtr1); - node_addr = flst_get_first(base, mtr1); - - for (i = 0; i < len; i++) { - mtr_start(&mtr2); - - node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2); - node_addr = flst_get_next_addr(node, &mtr2); - - mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer - becoming full */ - } - - ut_a(fil_addr_is_null(node_addr)); - - node_addr = flst_get_last(base, mtr1); - - for (i = 0; i < len; i++) { - mtr_start(&mtr2); - - node = fut_get_ptr(space, node_addr, RW_X_LATCH, &mtr2); - node_addr = flst_get_prev_addr(node, &mtr2); - - mtr_commit(&mtr2); /* Commit mtr2 each round to prevent buffer - becoming full */ - } - - ut_a(fil_addr_is_null(node_addr)); - - return(TRUE); -} - -/************************************************************************ -Prints info of a file-based list. */ - -void -flst_print( -/*=======*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - mtr_t* mtr) /* in: mtr */ -{ - buf_frame_t* frame; - ulint len; - - ut_ad(base && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - frame = buf_frame_align(base); - - len = flst_get_len(base, mtr); - - fprintf(stderr, - "FILE-BASED LIST:\n" - "Base node in space %lu page %lu byte offset %lu; len %lu\n", - (ulong) buf_frame_get_space_id(frame), - (ulong) buf_frame_get_page_no(frame), - (ulong) (base - frame), (ulong) len); -} diff --git a/storage/innobase/ha/ha0ha.c b/storage/innobase/ha/ha0ha.c deleted file mode 100644 index 077497493b4..00000000000 --- a/storage/innobase/ha/ha0ha.c +++ /dev/null @@ -1,380 +0,0 @@ -/************************************************************************ -The hash table with external chains - -(c) 1994-1997 Innobase Oy - -Created 8/22/1994 Heikki Tuuri -*************************************************************************/ - -#include "ha0ha.h" -#ifdef UNIV_NONINL -#include "ha0ha.ic" -#endif - -#include "buf0buf.h" - -/***************************************************************** -Creates a hash table with >= n array cells. The actual number of cells is -chosen to be a prime number slightly bigger than n. */ - -hash_table_t* -ha_create_func( -/*===========*/ - /* out, own: created table */ - ibool in_btr_search, /* in: TRUE if the hash table is used in - the btr_search module */ - ulint n, /* in: number of array cells */ -#ifdef UNIV_SYNC_DEBUG - ulint mutex_level, /* in: level of the mutexes in the latching - order: this is used in the debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes) /* in: number of mutexes to protect the - hash table: must be a power of 2, or 0 */ -{ - hash_table_t* table; - ulint i; - - table = hash_create(n); - - if (in_btr_search) { - table->adaptive = TRUE; - } else { - table->adaptive = FALSE; - } - - /* Creating MEM_HEAP_BTR_SEARCH type heaps can potentially fail, - but in practise it never should in this case, hence the asserts. */ - - if (n_mutexes == 0) { - if (in_btr_search) { - table->heap = mem_heap_create_in_btr_search(4096); - ut_a(table->heap); - } else { - table->heap = mem_heap_create_in_buffer(4096); - } - - return(table); - } - - hash_create_mutexes(table, n_mutexes, mutex_level); - - table->heaps = mem_alloc(n_mutexes * sizeof(void*)); - - for (i = 0; i < n_mutexes; i++) { - if (in_btr_search) { - table->heaps[i] = mem_heap_create_in_btr_search(4096); - ut_a(table->heaps[i]); - } else { - table->heaps[i] = mem_heap_create_in_buffer(4096); - } - } - - return(table); -} - -/***************************************************************** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. */ - -ibool -ha_insert_for_fold( -/*===============*/ - /* out: TRUE if succeed, FALSE if no more - memory could be allocated */ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of data; if a node with - the same fold value already exists, it is - updated to point to the same data, and no new - node is created! */ - void* data) /* in: data, must not be NULL */ -{ - hash_cell_t* cell; - ha_node_t* node; - ha_node_t* prev_node; - buf_block_t* prev_block; - ulint hash; - - ut_ad(table && data); - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - hash = hash_calc_hash(fold, table); - - cell = hash_get_nth_cell(table, hash); - - prev_node = cell->node; - - while (prev_node != NULL) { - if (prev_node->fold == fold) { - if (table->adaptive) { - prev_block = buf_block_align(prev_node->data); - ut_a(prev_block->n_pointers > 0); - prev_block->n_pointers--; - buf_block_align(data)->n_pointers++; - } - - prev_node->data = data; - - return(TRUE); - } - - prev_node = prev_node->next; - } - - /* We have to allocate a new chain node */ - - node = mem_heap_alloc(hash_get_heap(table, fold), sizeof(ha_node_t)); - - if (node == NULL) { - /* It was a btr search type memory heap and at the moment - no more memory could be allocated: return */ - - ut_ad(hash_get_heap(table, fold)->type & MEM_HEAP_BTR_SEARCH); - - return(FALSE); - } - - ha_node_set_data(node, data); - - if (table->adaptive) { - buf_block_align(data)->n_pointers++; - } - - node->fold = fold; - - node->next = NULL; - - prev_node = cell->node; - - if (prev_node == NULL) { - - cell->node = node; - - return(TRUE); - } - - while (prev_node->next != NULL) { - - prev_node = prev_node->next; - } - - prev_node->next = node; - - return(TRUE); -} - -/*************************************************************** -Deletes a hash node. */ - -void -ha_delete_hash_node( -/*================*/ - hash_table_t* table, /* in: hash table */ - ha_node_t* del_node) /* in: node to be deleted */ -{ - if (table->adaptive) { - ut_a(buf_block_align(del_node->data)->n_pointers > 0); - buf_block_align(del_node->data)->n_pointers--; - } - - HASH_DELETE_AND_COMPACT(ha_node_t, next, table, del_node); -} - -/***************************************************************** -Deletes an entry from a hash table. */ - -void -ha_delete( -/*======*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of data */ - void* data) /* in: data, must not be NULL and must exist - in the hash table */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_search_with_data(table, fold, data); - - ut_a(node); - - ha_delete_hash_node(table, node); -} - -/************************************************************* -Looks for an element when we know the pointer to the data, and updates -the pointer to data, if found. */ - -void -ha_search_and_update_if_found( -/*==========================*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of the searched data */ - void* data, /* in: pointer to the data */ - void* new_data)/* in: new pointer to the data */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_search_with_data(table, fold, data); - - if (node) { - if (table->adaptive) { - ut_a(buf_block_align(node->data)->n_pointers > 0); - buf_block_align(node->data)->n_pointers--; - buf_block_align(new_data)->n_pointers++; - } - - node->data = new_data; - } -} - -/********************************************************************* -Removes from the chain determined by fold all nodes whose data pointer -points to the page given. */ - -void -ha_remove_all_nodes_to_page( -/*========================*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: fold value */ - page_t* page) /* in: buffer page */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (buf_frame_align(ha_node_get_data(node)) == page) { - - /* Remove the hash node */ - - ha_delete_hash_node(table, node); - - /* Start again from the first node in the chain - because the deletion may compact the heap of - nodes and move other nodes! */ - - node = ha_chain_get_first(table, fold); - } else { - node = ha_chain_get_next(node); - } - } -#ifdef UNIV_DEBUG - /* Check that all nodes really got deleted */ - - node = ha_chain_get_first(table, fold); - - while (node) { - ut_a(buf_frame_align(ha_node_get_data(node)) != page); - - node = ha_chain_get_next(node); - } -#endif -} - -/***************************************************************** -Validates a given range of the cells in hash table. */ - -ibool -ha_validate( -/*========*/ - /* out: TRUE if ok */ - hash_table_t* table, /* in: hash table */ - ulint start_index, /* in: start index */ - ulint end_index) /* in: end index */ -{ - hash_cell_t* cell; - ha_node_t* node; - ibool ok = TRUE; - ulint i; - - ut_a(start_index <= end_index); - ut_a(start_index < hash_get_n_cells(table)); - ut_a(end_index < hash_get_n_cells(table)); - - for (i = start_index; i <= end_index; i++) { - - cell = hash_get_nth_cell(table, i); - - node = cell->node; - - while (node) { - if (hash_calc_hash(node->fold, table) != i) { - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Error: hash table node" - " fold value %lu does not\n" - "InnoDB: match the cell number %lu.\n", - (ulong) node->fold, (ulong) i); - - ok = FALSE; - } - - node = node->next; - } - } - - return(ok); -} - -/***************************************************************** -Prints info of a hash table. */ - -void -ha_print_info( -/*==========*/ - FILE* file, /* in: file where to print */ - hash_table_t* table) /* in: hash table */ -{ -#ifdef UNIV_DEBUG -/* Some of the code here is disabled for performance reasons in production -builds, see http://bugs.mysql.com/36941 */ -#define PRINT_USED_CELLS -#endif /* UNIV_DEBUG */ - -#ifdef PRINT_USED_CELLS - hash_cell_t* cell; - ulint cells = 0; - ulint i; -#endif /* PRINT_USED_CELLS */ - ulint n_bufs; - -#ifdef PRINT_USED_CELLS - for (i = 0; i < hash_get_n_cells(table); i++) { - - cell = hash_get_nth_cell(table, i); - - if (cell->node) { - - cells++; - } - } -#endif /* PRINT_USED_CELLS */ - - fprintf(file, "Hash table size %lu", - (ulong) hash_get_n_cells(table)); - -#ifdef PRINT_USED_CELLS - fprintf(file, ", used cells %lu", (ulong) cells); -#endif /* PRINT_USED_CELLS */ - - if (table->heaps == NULL && table->heap != NULL) { - - /* This calculation is intended for the adaptive hash - index: how many buffer frames we have reserved? */ - - n_bufs = UT_LIST_GET_LEN(table->heap->base) - 1; - - if (table->heap->free_block) { - n_bufs++; - } - - fprintf(file, ", node heap has %lu buffer(s)\n", - (ulong) n_bufs); - } -} diff --git a/storage/innobase/ha/hash0hash.c b/storage/innobase/ha/hash0hash.c deleted file mode 100644 index 4807015eee5..00000000000 --- a/storage/innobase/ha/hash0hash.c +++ /dev/null @@ -1,153 +0,0 @@ -/****************************************************** -The simple hash table utility - -(c) 1997 Innobase Oy - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#include "hash0hash.h" -#ifdef UNIV_NONINL -#include "hash0hash.ic" -#endif - -#include "mem0mem.h" - -/**************************************************************** -Reserves the mutex for a fold value in a hash table. */ - -void -hash_mutex_enter( -/*=============*/ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold */ -{ - mutex_enter(hash_get_mutex(table, fold)); -} - -/**************************************************************** -Releases the mutex for a fold value in a hash table. */ - -void -hash_mutex_exit( -/*============*/ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold */ -{ - mutex_exit(hash_get_mutex(table, fold)); -} - -/**************************************************************** -Reserves all the mutexes of a hash table, in an ascending order. */ - -void -hash_mutex_enter_all( -/*=================*/ - hash_table_t* table) /* in: hash table */ -{ - ulint i; - - for (i = 0; i < table->n_mutexes; i++) { - - mutex_enter(table->mutexes + i); - } -} - -/**************************************************************** -Releases all the mutexes of a hash table. */ - -void -hash_mutex_exit_all( -/*================*/ - hash_table_t* table) /* in: hash table */ -{ - ulint i; - - for (i = 0; i < table->n_mutexes; i++) { - - mutex_exit(table->mutexes + i); - } -} - -/***************************************************************** -Creates a hash table with >= n array cells. The actual number of cells is -chosen to be a prime number slightly bigger than n. */ - -hash_table_t* -hash_create( -/*========*/ - /* out, own: created table */ - ulint n) /* in: number of array cells */ -{ - hash_cell_t* array; - ulint prime; - hash_table_t* table; - ulint i; - hash_cell_t* cell; - - prime = ut_find_prime(n); - - table = mem_alloc(sizeof(hash_table_t)); - - array = ut_malloc(sizeof(hash_cell_t) * prime); - - table->adaptive = FALSE; - table->array = array; - table->n_cells = prime; - table->n_mutexes = 0; - table->mutexes = NULL; - table->heaps = NULL; - table->heap = NULL; - table->magic_n = HASH_TABLE_MAGIC_N; - - /* Initialize the cell array */ - - for (i = 0; i < prime; i++) { - - cell = hash_get_nth_cell(table, i); - cell->node = NULL; - } - - return(table); -} - -/***************************************************************** -Frees a hash table. */ - -void -hash_table_free( -/*============*/ - hash_table_t* table) /* in, own: hash table */ -{ - ut_a(table->mutexes == NULL); - - ut_free(table->array); - mem_free(table); -} - -/***************************************************************** -Creates a mutex array to protect a hash table. */ - -void -hash_create_mutexes_func( -/*=====================*/ - hash_table_t* table, /* in: hash table */ -#ifdef UNIV_SYNC_DEBUG - ulint sync_level, /* in: latching order level of the - mutexes: used in the debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes) /* in: number of mutexes, must be a - power of 2 */ -{ - ulint i; - - ut_a(n_mutexes == ut_2_power_up(n_mutexes)); - - table->mutexes = mem_alloc(n_mutexes * sizeof(mutex_t)); - - for (i = 0; i < n_mutexes; i++) { - mutex_create(table->mutexes + i, sync_level); - } - - table->n_mutexes = n_mutexes; -} diff --git a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc deleted file mode 100644 index 828dcdb843d..00000000000 --- a/storage/innobase/handler/ha_innodb.cc +++ /dev/null @@ -1,8534 +0,0 @@ -/* Copyright (C) 2000-2005 MySQL AB & Innobase Oy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* This file defines the InnoDB handler: the interface between MySQL and InnoDB -NOTE: You can only use noninlined InnoDB functions in this file, because we -have disabled the InnoDB inlining in this file. */ - -/* TODO list for the InnoDB handler in 5.0: - - Remove the flag trx->active_trans and look at trx->conc_state - - fix savepoint functions to use savepoint storage area - - Find out what kind of problems the OS X case-insensitivity causes to - table and database names; should we 'normalize' the names like we do - in Windows? -*/ - -#ifdef USE_PRAGMA_IMPLEMENTATION -#pragma implementation // gcc: Class implementation -#endif - -#include <mysql_priv.h> -#include <mysqld_error.h> - -#include <m_ctype.h> -#include <hash.h> -#include <myisampack.h> -#include <mysys_err.h> -#include <my_sys.h> -#include "ha_innodb.h" -#include <mysql/plugin.h> - -#ifndef MYSQL_SERVER -/* This is needed because of Bug #3596. Let us hope that pthread_mutex_t -is defined the same in both builds: the MySQL server and the InnoDB plugin. */ -extern pthread_mutex_t LOCK_thread_count; -#endif /* MYSQL_SERVER */ - -/** to protect innobase_open_files */ -static pthread_mutex_t innobase_share_mutex; -/** to force correct commit order in binlog */ -static pthread_mutex_t prepare_commit_mutex; -static ulong commit_threads = 0; -static pthread_mutex_t commit_threads_m; -static pthread_cond_t commit_cond; -static pthread_mutex_t commit_cond_m; -static bool innodb_inited = 0; - -/* - This needs to exist until the query cache callback is removed - or learns to pass hton. -*/ -static handlerton *innodb_hton_ptr; - -#define INSIDE_HA_INNOBASE_CC - -/* Include necessary InnoDB headers */ -extern "C" { -#include "../storage/innobase/include/univ.i" -#include "../storage/innobase/include/os0file.h" -#include "../storage/innobase/include/os0thread.h" -#include "../storage/innobase/include/srv0start.h" -#include "../storage/innobase/include/srv0srv.h" -#include "../storage/innobase/include/trx0roll.h" -#include "../storage/innobase/include/trx0trx.h" -#include "../storage/innobase/include/trx0sys.h" -#include "../storage/innobase/include/mtr0mtr.h" -#include "../storage/innobase/include/row0ins.h" -#include "../storage/innobase/include/row0mysql.h" -#include "../storage/innobase/include/row0sel.h" -#include "../storage/innobase/include/row0upd.h" -#include "../storage/innobase/include/log0log.h" -#include "../storage/innobase/include/lock0lock.h" -#include "../storage/innobase/include/dict0crea.h" -#include "../storage/innobase/include/btr0cur.h" -#include "../storage/innobase/include/btr0btr.h" -#include "../storage/innobase/include/fsp0fsp.h" -#include "../storage/innobase/include/sync0sync.h" -#include "../storage/innobase/include/fil0fil.h" -#include "../storage/innobase/include/trx0xa.h" -#include "../storage/innobase/include/thr0loc.h" -#include "../storage/innobase/include/ha_prototypes.h" -} - -static const long AUTOINC_OLD_STYLE_LOCKING = 0; -static const long AUTOINC_NEW_STYLE_LOCKING = 1; -static const long AUTOINC_NO_LOCKING = 2; - -static long innobase_mirrored_log_groups, innobase_log_files_in_group, - innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb, - innobase_additional_mem_pool_size, - innobase_lock_wait_timeout, innobase_force_recovery, - innobase_open_files, innobase_autoinc_lock_mode; - -static long long innobase_buffer_pool_size, innobase_log_file_size; - -/* The default values for the following char* start-up parameters -are determined in innobase_init below: */ - -static char* innobase_data_home_dir = NULL; -static char* innobase_data_file_path = NULL; -static char* innobase_log_group_home_dir = NULL; -/* The following has a misleading name: starting from 4.0.5, this also -affects Windows: */ -static char* innobase_unix_file_flush_method = NULL; - -/* Below we have boolean-valued start-up parameters, and their default -values */ - -static ulong innobase_fast_shutdown = 1; -#ifdef UNIV_LOG_ARCHIVE -static my_bool innobase_log_archive = FALSE; -static char* innobase_log_arch_dir = NULL; -#endif /* UNIV_LOG_ARCHIVE */ -static my_bool innobase_use_doublewrite = TRUE; -static my_bool innobase_use_checksums = TRUE; -static my_bool innobase_file_per_table = FALSE; -static my_bool innobase_locks_unsafe_for_binlog = FALSE; -static my_bool innobase_rollback_on_timeout = FALSE; -static my_bool innobase_create_status_file = FALSE; -static my_bool innobase_stats_on_metadata = TRUE; -static my_bool innobase_adaptive_hash_index = TRUE; - -static char* internal_innobase_data_file_path = NULL; - -/* Default number of IO per second supported by server. Tunes background - IO rate. */ -static long innobase_io_capacity = 100; - -/* Write dirty pages when pct dirty is less than max pct dirty */ -static my_bool innobase_extra_dirty_writes = TRUE; - -/* Max number of IO requests merged to perform large IO in background - IO threads. -*/ -long innobase_max_merged_io = 64; - -/* Number of background IO threads for read and write. */ -long innobase_read_io_threads, innobase_write_io_threads; - -/* Use timer based InnoDB concurrency throttling flag */ -static my_bool innobase_thread_concurrency_timer_based; - -/* The following counter is used to convey information to InnoDB -about server activity: in selects it is not sensible to call -srv_active_wake_master_thread after each fetch or search, we only do -it every INNOBASE_WAKE_INTERVAL'th step. */ - -#define INNOBASE_WAKE_INTERVAL 32 -static ulong innobase_active_counter = 0; - -static HASH innobase_open_tables; - -#ifdef __NETWARE__ /* some special cleanup for NetWare */ -bool nw_panic = FALSE; -#endif - -static uchar* innobase_get_key(INNOBASE_SHARE *share, size_t *length, - my_bool not_used __attribute__((unused))); -static INNOBASE_SHARE *get_share(const char *table_name); -static void free_share(INNOBASE_SHARE *share); -static int innobase_close_connection(handlerton *hton, THD* thd); -static int innobase_commit(handlerton *hton, THD* thd, bool all); -static int innobase_rollback(handlerton *hton, THD* thd, bool all); -static int innobase_rollback_to_savepoint(handlerton *hton, THD* thd, - void *savepoint); -static int innobase_savepoint(handlerton *hton, THD* thd, void *savepoint); -static int innobase_release_savepoint(handlerton *hton, THD* thd, - void *savepoint); -static handler *innobase_create_handler(handlerton *hton, - TABLE_SHARE *table, - MEM_ROOT *mem_root); - -static const char innobase_hton_name[]= "InnoDB"; - - -static MYSQL_THDVAR_BOOL(support_xa, PLUGIN_VAR_OPCMDARG, - "Enable InnoDB support for the XA two-phase commit", - /* check_func */ NULL, /* update_func */ NULL, - /* default */ TRUE); - -static MYSQL_THDVAR_BOOL(table_locks, PLUGIN_VAR_OPCMDARG, - "Enable InnoDB locking in LOCK TABLES", - /* check_func */ NULL, /* update_func */ NULL, - /* default */ TRUE); - -static handler *innobase_create_handler(handlerton *hton, - TABLE_SHARE *table, - MEM_ROOT *mem_root) -{ - return new (mem_root) ha_innobase(hton, table); -} - -/*********************************************************************** -This function is used to prepare X/Open XA distributed transaction */ -static -int -innobase_xa_prepare( -/*================*/ - /* out: 0 or error number */ - handlerton* hton, - THD* thd, /* in: handle to the MySQL thread of the user - whose XA transaction should be prepared */ - bool all); /* in: TRUE - commit transaction - FALSE - the current SQL statement ended */ -/*********************************************************************** -This function is used to recover X/Open XA distributed transactions */ -static -int -innobase_xa_recover( -/*================*/ - /* out: number of prepared transactions - stored in xid_list */ - handlerton* hton, - XID* xid_list, /* in/out: prepared transactions */ - uint len); /* in: number of slots in xid_list */ -/*********************************************************************** -This function is used to commit one X/Open XA distributed transaction -which is in the prepared state */ -static -int -innobase_commit_by_xid( -/*===================*/ - /* out: 0 or error number */ - handlerton* hton, - XID* xid); /* in: X/Open XA transaction identification */ -/*********************************************************************** -This function is used to rollback one X/Open XA distributed transaction -which is in the prepared state */ -static -int -innobase_rollback_by_xid( -/*=====================*/ - /* out: 0 or error number */ - handlerton* hton, - XID *xid); /* in: X/Open XA transaction identification */ -/*********************************************************************** -Create a consistent view for a cursor based on current transaction -which is created if the corresponding MySQL thread still lacks one. -This consistent view is then used inside of MySQL when accessing records -using a cursor. */ -static -void* -innobase_create_cursor_view( -/*========================*/ - /* out: pointer to cursor view or NULL */ - handlerton* hton, /* in: innobase hton */ - THD* thd); /* in: user thread handle */ -/*********************************************************************** -Set the given consistent cursor view to a transaction which is created -if the corresponding MySQL thread still lacks one. If the given -consistent cursor view is NULL global read view of a transaction is -restored to a transaction read view. */ -static -void -innobase_set_cursor_view( -/*=====================*/ - handlerton* hton, - THD* thd, /* in: user thread handle */ - void* curview);/* in: Consistent cursor view to be set */ -/*********************************************************************** -Close the given consistent cursor view of a transaction and restore -global read view to a transaction read view. Transaction is created if the -corresponding MySQL thread still lacks one. */ -static -void -innobase_close_cursor_view( -/*=======================*/ - handlerton* hton, - THD* thd, /* in: user thread handle */ - void* curview);/* in: Consistent read view to be closed */ -/********************************************************************* -Removes all tables in the named database inside InnoDB. */ -static -void -innobase_drop_database( -/*===================*/ - /* out: error number */ - handlerton* hton, /* in: handlerton of Innodb */ - char* path); /* in: database path; inside InnoDB the name - of the last directory in the path is used as - the database name: for example, in 'mysql/data/test' - the database name is 'test' */ -/*********************************************************************** -Closes an InnoDB database. */ -static -int -innobase_end(handlerton *hton, ha_panic_function type); - -/********************************************************************* -Creates an InnoDB transaction struct for the thd if it does not yet have one. -Starts a new InnoDB transaction if a transaction is not yet started. And -assigns a new snapshot for a consistent read if the transaction does not yet -have one. */ -static -int -innobase_start_trx_and_assign_read_view( -/*====================================*/ - /* out: 0 */ - handlerton* hton, /* in: Innodb handlerton */ - THD* thd); /* in: MySQL thread handle of the user for whom - the transaction should be committed */ -/******************************************************************** -Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes -the logs, and the name of this function should be innobase_checkpoint. */ -static -bool -innobase_flush_logs( -/*================*/ - /* out: TRUE if error */ - handlerton* hton); /* in: InnoDB handlerton */ - -/**************************************************************************** -Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB -Monitor to the client. */ -static -bool -innodb_show_status( -/*===============*/ - handlerton* hton, /* in: the innodb handlerton */ - THD* thd, /* in: the MySQL query thread of the caller */ - stat_print_fn *stat_print); -static -bool innobase_show_status(handlerton *hton, THD* thd, - stat_print_fn* stat_print, - enum ha_stat_type stat_type); - -/********************************************************************* -Commits a transaction in an InnoDB database. */ -static -void -innobase_commit_low( -/*================*/ - trx_t* trx); /* in: transaction handle */ - -static SHOW_VAR innodb_status_variables[]= { - {"buffer_pool_pages_data", - (char*) &export_vars.innodb_buffer_pool_pages_data, SHOW_LONG}, - {"buffer_pool_pages_dirty", - (char*) &export_vars.innodb_buffer_pool_pages_dirty, SHOW_LONG}, - {"buffer_pool_pages_flushed", - (char*) &export_vars.innodb_buffer_pool_pages_flushed, SHOW_LONG}, - {"buffer_pool_pages_free", - (char*) &export_vars.innodb_buffer_pool_pages_free, SHOW_LONG}, -#ifdef UNIV_DEBUG - {"buffer_pool_pages_latched", - (char*) &export_vars.innodb_buffer_pool_pages_latched, SHOW_LONG}, -#endif /* UNIV_DEBUG */ - {"buffer_pool_pages_misc", - (char*) &export_vars.innodb_buffer_pool_pages_misc, SHOW_LONG}, - {"buffer_pool_pages_total", - (char*) &export_vars.innodb_buffer_pool_pages_total, SHOW_LONG}, - {"buffer_pool_read_ahead_rnd", - (char*) &export_vars.innodb_buffer_pool_read_ahead_rnd, SHOW_LONG}, - {"buffer_pool_read_ahead_seq", - (char*) &export_vars.innodb_buffer_pool_read_ahead_seq, SHOW_LONG}, - {"buffer_pool_read_requests", - (char*) &export_vars.innodb_buffer_pool_read_requests, SHOW_LONG}, - {"buffer_pool_reads", - (char*) &export_vars.innodb_buffer_pool_reads, SHOW_LONG}, - {"buffer_pool_wait_free", - (char*) &export_vars.innodb_buffer_pool_wait_free, SHOW_LONG}, - {"buffer_pool_write_requests", - (char*) &export_vars.innodb_buffer_pool_write_requests, SHOW_LONG}, - {"data_fsyncs", - (char*) &export_vars.innodb_data_fsyncs, SHOW_LONG}, - {"data_pending_fsyncs", - (char*) &export_vars.innodb_data_pending_fsyncs, SHOW_LONG}, - {"data_pending_reads", - (char*) &export_vars.innodb_data_pending_reads, SHOW_LONG}, - {"data_pending_writes", - (char*) &export_vars.innodb_data_pending_writes, SHOW_LONG}, - {"data_read", - (char*) &export_vars.innodb_data_read, SHOW_LONG}, - {"data_reads", - (char*) &export_vars.innodb_data_reads, SHOW_LONG}, - {"data_writes", - (char*) &export_vars.innodb_data_writes, SHOW_LONG}, - {"data_written", - (char*) &export_vars.innodb_data_written, SHOW_LONG}, - {"dblwr_pages_written", - (char*) &export_vars.innodb_dblwr_pages_written, SHOW_LONG}, - {"dblwr_writes", - (char*) &export_vars.innodb_dblwr_writes, SHOW_LONG}, - {"have_sync_atomic", - (char*) &export_vars.innodb_have_sync_atomic, SHOW_BOOL}, - {"heap_enabled", - (char*) &export_vars.innodb_heap_enabled, SHOW_BOOL}, - {"log_waits", - (char*) &export_vars.innodb_log_waits, SHOW_LONG}, - {"log_write_requests", - (char*) &export_vars.innodb_log_write_requests, SHOW_LONG}, - {"log_writes", - (char*) &export_vars.innodb_log_writes, SHOW_LONG}, - {"os_log_fsyncs", - (char*) &export_vars.innodb_os_log_fsyncs, SHOW_LONG}, - {"os_log_pending_fsyncs", - (char*) &export_vars.innodb_os_log_pending_fsyncs, SHOW_LONG}, - {"os_log_pending_writes", - (char*) &export_vars.innodb_os_log_pending_writes, SHOW_LONG}, - {"os_log_written", - (char*) &export_vars.innodb_os_log_written, SHOW_LONG}, - {"page_size", - (char*) &export_vars.innodb_page_size, SHOW_LONG}, - {"pages_created", - (char*) &export_vars.innodb_pages_created, SHOW_LONG}, - {"pages_read", - (char*) &export_vars.innodb_pages_read, SHOW_LONG}, - {"pages_written", - (char*) &export_vars.innodb_pages_written, SHOW_LONG}, - {"row_lock_current_waits", - (char*) &export_vars.innodb_row_lock_current_waits, SHOW_LONG}, - {"row_lock_time", - (char*) &export_vars.innodb_row_lock_time, SHOW_LONGLONG}, - {"row_lock_time_avg", - (char*) &export_vars.innodb_row_lock_time_avg, SHOW_LONG}, - {"row_lock_time_max", - (char*) &export_vars.innodb_row_lock_time_max, SHOW_LONG}, - {"row_lock_waits", - (char*) &export_vars.innodb_row_lock_waits, SHOW_LONG}, - {"rows_deleted", - (char*) &export_vars.innodb_rows_deleted, SHOW_LONG}, - {"rows_inserted", - (char*) &export_vars.innodb_rows_inserted, SHOW_LONG}, - {"rows_read", - (char*) &export_vars.innodb_rows_read, SHOW_LONG}, - {"rows_updated", - (char*) &export_vars.innodb_rows_updated, SHOW_LONG}, - {"wake_ups", - (char*) &export_vars.innodb_wake_ups, SHOW_LONG}, - {NullS, NullS, SHOW_LONG} -}; - -/* General functions */ - -/********************************************************************** -Returns true if the thread is the replication thread on the slave -server. Used in srv_conc_enter_innodb() to determine if the thread -should be allowed to enter InnoDB - the replication thread is treated -differently than other threads. Also used in -srv_conc_force_exit_innodb(). */ -extern "C" -ibool -thd_is_replication_slave_thread( -/*============================*/ - /* out: true if thd is the replication thread */ - void* thd) /* in: thread handle (THD*) */ -{ - return((ibool) thd_slave_thread((THD*) thd)); -} - -/********************************************************************** -Save some CPU by testing the value of srv_thread_concurrency in inline -functions. */ -inline -void -innodb_srv_conc_enter_innodb( -/*=========================*/ - trx_t* trx) /* in: transaction handle */ -{ - if (UNIV_LIKELY(!srv_thread_concurrency)) { - - return; - } - - srv_conc_enter_innodb(trx); -} - -/********************************************************************** -Save some CPU by testing the value of srv_thread_concurrency in inline -functions. */ -inline -void -innodb_srv_conc_exit_innodb( -/*========================*/ - trx_t* trx) /* in: transaction handle */ -{ - if (UNIV_LIKELY(!trx->declared_to_be_inside_innodb)) { - - return; - } - - srv_conc_exit_innodb(trx); -} - -/********************************************************************** -Releases possible search latch and InnoDB thread FIFO ticket. These should -be released at each SQL statement end, and also when mysqld passes the -control to the client. It does no harm to release these also in the middle -of an SQL statement. */ -inline -void -innobase_release_stat_resources( -/*============================*/ - trx_t* trx) /* in: transaction object */ -{ - if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } - - if (trx->declared_to_be_inside_innodb) { - /* Release our possible ticket in the FIFO */ - - srv_conc_force_exit_innodb(trx); - } -} - -/********************************************************************** -Returns true if the transaction this thread is processing has edited -non-transactional tables. Used by the deadlock detector when deciding -which transaction to rollback in case of a deadlock - we try to avoid -rolling back transactions that have edited non-transactional tables. */ -extern "C" -ibool -thd_has_edited_nontrans_tables( -/*===========================*/ - /* out: true if non-transactional tables have - been edited */ - void* thd) /* in: thread handle (THD*) */ -{ - return((ibool) thd_non_transactional_update((THD*) thd)); -} - -/********************************************************************** -Returns true if the thread is executing a SELECT statement. */ -extern "C" -ibool -thd_is_select( -/*==========*/ - /* out: true if thd is executing SELECT */ - const void* thd) /* in: thread handle (THD*) */ -{ - return(thd_sql_command((const THD*) thd) == SQLCOM_SELECT); -} - -/************************************************************************ -Obtain the InnoDB transaction of a MySQL thread. */ -inline -trx_t*& -thd_to_trx( -/*=======*/ - /* out: reference to transaction pointer */ - THD* thd) /* in: MySQL thread */ -{ - return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr)); -} - -/************************************************************************ -Call this function when mysqld passes control to the client. That is to -avoid deadlocks on the adaptive hash S-latch possibly held by thd. For more -documentation, see handler.cc. */ -static -int -innobase_release_temporary_latches( -/*===============================*/ - /* out: 0 */ - handlerton* hton, /* in: handlerton */ - THD* thd) /* in: MySQL thread */ -{ - trx_t* trx; - - DBUG_ASSERT(hton == innodb_hton_ptr); - - if (!innodb_inited) { - - return 0; - } - - trx = thd_to_trx(thd); - - if (trx) { - innobase_release_stat_resources(trx); - } - return 0; -} - -/************************************************************************ -Increments innobase_active_counter and every INNOBASE_WAKE_INTERVALth -time calls srv_active_wake_master_thread. This function should be used -when a single database operation may introduce a small need for -server utility activity, like checkpointing. */ -inline -void -innobase_active_small(void) -/*=======================*/ -{ - innobase_active_counter++; - - if ((innobase_active_counter % INNOBASE_WAKE_INTERVAL) == 0) { - srv_active_wake_master_thread(); - } -} - -/************************************************************************ -Converts an InnoDB error code to a MySQL error code and also tells to MySQL -about a possible transaction rollback inside InnoDB caused by a lock wait -timeout or a deadlock. */ -static -int -convert_error_code_to_mysql( -/*========================*/ - /* out: MySQL error code */ - int error, /* in: InnoDB error code */ - THD* thd) /* in: user thread handle or NULL */ -{ - if (error == DB_SUCCESS) { - - return(0); - - } else if (error == (int) DB_DUPLICATE_KEY) { - - return(HA_ERR_FOUND_DUPP_KEY); - - } else if (error == (int) DB_FOREIGN_DUPLICATE_KEY) { - - return(HA_ERR_FOREIGN_DUPLICATE_KEY); - - } else if (error == (int) DB_RECORD_NOT_FOUND) { - - return(HA_ERR_NO_ACTIVE_RECORD); - - } else if (error == (int) DB_ERROR) { - - return(-1); /* unspecified error */ - - } else if (error == (int) DB_DEADLOCK) { - /* Since we rolled back the whole transaction, we must - tell it also to MySQL so that MySQL knows to empty the - cached binlog for this transaction */ - - if (thd) { - thd_mark_transaction_to_rollback(thd, TRUE); - } - - return(HA_ERR_LOCK_DEADLOCK); - } else if (error == (int) DB_LOCK_WAIT_TIMEOUT) { - - /* Starting from 5.0.13, we let MySQL just roll back the - latest SQL statement in a lock wait timeout. Previously, we - rolled back the whole transaction. */ - - if (thd) { - thd_mark_transaction_to_rollback( - thd, (bool)row_rollback_on_timeout); - } - - return(HA_ERR_LOCK_WAIT_TIMEOUT); - - } else if (error == (int) DB_NO_REFERENCED_ROW) { - - return(HA_ERR_NO_REFERENCED_ROW); - - } else if (error == (int) DB_ROW_IS_REFERENCED) { - - return(HA_ERR_ROW_IS_REFERENCED); - - } else if (error == (int) DB_CANNOT_ADD_CONSTRAINT) { - - return(HA_ERR_CANNOT_ADD_FOREIGN); - - } else if (error == (int) DB_CANNOT_DROP_CONSTRAINT) { - - return(HA_ERR_ROW_IS_REFERENCED); /* TODO: This is a bit - misleading, a new MySQL error - code should be introduced */ - } else if (error == (int) DB_COL_APPEARS_TWICE_IN_INDEX) { - - return(HA_ERR_CRASHED); - - } else if (error == (int) DB_OUT_OF_FILE_SPACE) { - - return(HA_ERR_RECORD_FILE_FULL); - - } else if (error == (int) DB_TABLE_IS_BEING_USED) { - - return(HA_ERR_WRONG_COMMAND); - - } else if (error == (int) DB_TABLE_NOT_FOUND) { - - return(HA_ERR_NO_SUCH_TABLE); - - } else if (error == (int) DB_TOO_BIG_RECORD) { - - return(HA_ERR_TO_BIG_ROW); - - } else if (error == (int) DB_CORRUPTION) { - - return(HA_ERR_CRASHED); - } else if (error == (int) DB_NO_SAVEPOINT) { - - return(HA_ERR_NO_SAVEPOINT); - } else if (error == (int) DB_LOCK_TABLE_FULL) { - /* Since we rolled back the whole transaction, we must - tell it also to MySQL so that MySQL knows to empty the - cached binlog for this transaction */ - - if (thd) { - thd_mark_transaction_to_rollback(thd, TRUE); - } - - return(HA_ERR_LOCK_TABLE_FULL); - } else if (error == DB_TOO_MANY_CONCURRENT_TRXS) { - - /* Once MySQL add the appropriate code to errmsg.txt then - we can get rid of this #ifdef. NOTE: The code checked by - the #ifdef is the suggested name for the error condition - and the actual error code name could very well be different. - This will require some monitoring, ie. the status - of this request on our part.*/ -#ifdef ER_TOO_MANY_CONCURRENT_TRXS - return(ER_TOO_MANY_CONCURRENT_TRXS); -#else - return(HA_ERR_RECORD_FILE_FULL); -#endif - - } else if (error == DB_UNSUPPORTED) { - - return(HA_ERR_UNSUPPORTED); - } else { - return(-1); // Unknown error - } -} - -/***************************************************************** -If you want to print a thd that is not associated with the current thread, -you must call this function before reserving the InnoDB kernel_mutex, to -protect MySQL from setting thd->query NULL. If you print a thd of the current -thread, we know that MySQL cannot modify thd->query, and it is not necessary -to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release -the kernel_mutex. -NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this -function! */ -extern "C" -void -innobase_mysql_prepare_print_arbitrary_thd(void) -/*============================================*/ -{ - VOID(pthread_mutex_lock(&LOCK_thread_count)); -} - -/***************************************************************** -Releases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd(). -NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this -function! */ -extern "C" -void -innobase_mysql_end_print_arbitrary_thd(void) -/*========================================*/ -{ - VOID(pthread_mutex_unlock(&LOCK_thread_count)); -} - -/***************************************************************** -Prints info of a THD object (== user session thread) to the given file. -NOTE that /mysql/innobase/trx/trx0trx.c must contain the prototype for -this function! */ -extern "C" -void -innobase_mysql_print_thd( -/*=====================*/ - FILE* f, /* in: output stream */ - void* input_thd, /* in: pointer to a MySQL THD object */ - uint max_query_len) /* in: max query length to print, or 0 to - use the default max length */ -{ - THD* thd; - char buffer[1024]; - - thd = (THD*) input_thd; - fputs(thd_security_context(thd, buffer, sizeof(buffer), - max_query_len), f); - putc('\n', f); -} - -/********************************************************************** -Get the variable length bounds of the given character set. - -NOTE that the exact prototype of this function has to be in -/innobase/include/data0type.ic! */ -extern "C" -void -innobase_get_cset_width( -/*====================*/ - ulint cset, /* in: MySQL charset-collation code */ - ulint* mbminlen, /* out: minimum length of a char (in bytes) */ - ulint* mbmaxlen) /* out: maximum length of a char (in bytes) */ -{ - CHARSET_INFO* cs; - ut_ad(cset < 256); - ut_ad(mbminlen); - ut_ad(mbmaxlen); - - cs = all_charsets[cset]; - if (cs) { - *mbminlen = cs->mbminlen; - *mbmaxlen = cs->mbmaxlen; - } else { - ut_a(cset == 0); - *mbminlen = *mbmaxlen = 0; - } -} - -/********************************************************************** -Converts an identifier to a table name. - -NOTE that the exact prototype of this function has to be in -/innobase/dict/dict0dict.c! */ -extern "C" -void -innobase_convert_from_table_id( -/*===========================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len) /* in: length of 'to', in bytes */ -{ - uint errors; - - strconvert(thd_charset(current_thd), from, - &my_charset_filename, to, (uint) len, &errors); -} - -/********************************************************************** -Converts an identifier to UTF-8. - -NOTE that the exact prototype of this function has to be in -/innobase/dict/dict0dict.c! */ -extern "C" -void -innobase_convert_from_id( -/*=====================*/ - char* to, /* out: converted identifier */ - const char* from, /* in: identifier to convert */ - ulint len) /* in: length of 'to', in bytes */ -{ - uint errors; - - strconvert(thd_charset(current_thd), from, - system_charset_info, to, (uint) len, &errors); -} - -/********************************************************************** -Compares NUL-terminated UTF-8 strings case insensitively. - -NOTE that the exact prototype of this function has to be in -/innobase/dict/dict0dict.c! */ -extern "C" -int -innobase_strcasecmp( -/*================*/ - /* out: 0 if a=b, <0 if a<b, >1 if a>b */ - const char* a, /* in: first string to compare */ - const char* b) /* in: second string to compare */ -{ - return(my_strcasecmp(system_charset_info, a, b)); -} - -/********************************************************************** -Makes all characters in a NUL-terminated UTF-8 string lower case. - -NOTE that the exact prototype of this function has to be in -/innobase/dict/dict0dict.c! */ -extern "C" -void -innobase_casedn_str( -/*================*/ - char* a) /* in/out: string to put in lower case */ -{ - my_casedn_str(system_charset_info, a); -} - -/************************************************************************** -Determines the connection character set. - -NOTE that the exact prototype of this function has to be in -/innobase/dict/dict0dict.c! */ -extern "C" -struct charset_info_st* -innobase_get_charset( -/*=================*/ - /* out: connection character set */ - void* mysql_thd) /* in: MySQL thread handle */ -{ - return(thd_charset((THD*) mysql_thd)); -} - -/************************************************************************* -Creates a temporary file. */ -extern "C" -int -innobase_mysql_tmpfile(void) -/*========================*/ - /* out: temporary file descriptor, or < 0 on error */ -{ - int fd2 = -1; - File fd = mysql_tmpfile("ib"); - if (fd >= 0) { - /* Copy the file descriptor, so that the additional resources - allocated by create_temp_file() can be freed by invoking - my_close(). - - Because the file descriptor returned by this function - will be passed to fdopen(), it will be closed by invoking - fclose(), which in turn will invoke close() instead of - my_close(). */ - fd2 = dup(fd); - if (fd2 < 0) { - DBUG_PRINT("error",("Got error %d on dup",fd2)); - my_errno=errno; - my_error(EE_OUT_OF_FILERESOURCES, - MYF(ME_BELL+ME_WAITTANG), - "ib*", my_errno); - } - my_close(fd, MYF(MY_WME)); - } - return(fd2); -} - -/************************************************************************* -Wrapper around MySQL's copy_and_convert function, see it for -documentation. */ -extern "C" -ulint -innobase_convert_string( -/*====================*/ - void* to, - ulint to_length, - CHARSET_INFO* to_cs, - const void* from, - ulint from_length, - CHARSET_INFO* from_cs, - uint* errors) -{ - return(copy_and_convert((char*)to, (uint32) to_length, to_cs, - (const char*)from, (uint32) from_length, from_cs, - errors)); -} - -/************************************************************************* -Compute the next autoinc value. - -For MySQL replication the autoincrement values can be partitioned among -the nodes. The offset is the start or origin of the autoincrement value -for a particular node. For n nodes the increment will be n and the offset -will be in the interval [1, n]. The formula tries to allocate the next -value for a particular node. - -Note: This function is also called with increment set to the number of -values we want to reserve for multi-value inserts e.g., - - INSERT INTO T VALUES(), (), (); - -innobase_next_autoinc() will be called with increment set to -n * 3 where autoinc_lock_mode != TRADITIONAL because we want -to reserve 3 values for the multi-value INSERT above. */ -static -ulonglong -innobase_next_autoinc( -/*==================*/ - /* out: the next value */ - ulonglong current, /* in: Current value */ - ulonglong increment, /* in: increment current by */ - ulonglong offset, /* in: AUTOINC offset */ - ulonglong max_value) /* in: max value for type */ -{ - ulonglong next_value; - - /* Should never be 0. */ - ut_a(increment > 0); - - /* According to MySQL documentation, if the offset is greater than - the increment then the offset is ignored. */ - if (offset > increment) { - offset = 0; - } - - if (max_value <= current) { - next_value = max_value; - } else if (offset <= 1) { - /* Offset 0 and 1 are the same, because there must be at - least one node in the system. */ - if (max_value - current <= increment) { - next_value = max_value; - } else { - next_value = current + increment; - } - } else { - if (current > offset) { - next_value = ((current - offset) / increment) + 1; - } else { - next_value = ((offset - current) / increment) + 1; - } - - ut_a(increment > 0); - ut_a(next_value > 0); - - /* Check for multiplication overflow. */ - if (increment > (max_value / next_value)) { - - next_value = max_value; - } else { - next_value *= increment; - - ut_a(max_value >= next_value); - - /* Check for overflow. */ - if (max_value - next_value <= offset) { - next_value = max_value; - } else { - next_value += offset; - } - } - } - - ut_a(next_value <= max_value); - - return(next_value); -} - -/************************************************************************* -Gets the InnoDB transaction handle for a MySQL handler object, creates -an InnoDB transaction struct if the corresponding MySQL thread struct still -lacks one. */ -static -trx_t* -check_trx_exists( -/*=============*/ - /* out: InnoDB transaction handle */ - THD* thd) /* in: user thread handle */ -{ - trx_t*& trx = thd_to_trx(thd); - - ut_ad(thd == current_thd); - - if (trx == NULL) { - DBUG_ASSERT(thd != NULL); - trx = trx_allocate_for_mysql(); - - trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); - - /* Update the info whether we should skip XA steps that eat - CPU time */ - trx->support_xa = THDVAR(thd, support_xa); - } else { - if (trx->magic_n != TRX_MAGIC_N) { - mem_analyze_corruption(trx); - - ut_error; - } - } - - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - trx->check_foreigns = FALSE; - } else { - trx->check_foreigns = TRUE; - } - - if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { - trx->check_unique_secondary = FALSE; - } else { - trx->check_unique_secondary = TRUE; - } - - return(trx); -} - - -/************************************************************************* -Construct ha_innobase handler. */ - -ha_innobase::ha_innobase(handlerton *hton, TABLE_SHARE *table_arg) - :handler(hton, table_arg), - int_table_flags(HA_REC_NOT_IN_SEQ | - HA_NULL_IN_KEY | - HA_CAN_INDEX_BLOBS | - HA_CAN_SQL_HANDLER | - HA_PRIMARY_KEY_REQUIRED_FOR_POSITION | - HA_PRIMARY_KEY_IN_READ_INDEX | - HA_BINLOG_ROW_CAPABLE | - HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ | - HA_TABLE_SCAN_ON_INDEX), - start_of_scan(0), - num_write_row(0) -{} - -/************************************************************************* -Updates the user_thd field in a handle and also allocates a new InnoDB -transaction handle if needed, and updates the transaction fields in the -prebuilt struct. */ -inline -int -ha_innobase::update_thd( -/*====================*/ - /* out: 0 or error code */ - THD* thd) /* in: thd to use the handle */ -{ - trx_t* trx; - - trx = check_trx_exists(thd); - - if (prebuilt->trx != trx) { - - row_update_prebuilt_trx(prebuilt, trx); - } - - user_thd = thd; - - return(0); -} - -/************************************************************************* -Registers that InnoDB takes part in an SQL statement, so that MySQL knows to -roll back the statement if the statement results in an error. This MUST be -called for every SQL statement that may be rolled back by MySQL. Calling this -several times to register the same statement is allowed, too. */ -inline -void -innobase_register_stmt( -/*===================*/ - handlerton* hton, /* in: Innobase hton */ - THD* thd) /* in: MySQL thd (connection) object */ -{ - /* Register the statement */ - trans_register_ha(thd, FALSE, hton); -} - -/************************************************************************* -Registers an InnoDB transaction in MySQL, so that the MySQL XA code knows -to call the InnoDB prepare and commit, or rollback for the transaction. This -MUST be called for every transaction for which the user may call commit or -rollback. Calling this several times to register the same transaction is -allowed, too. -This function also registers the current SQL statement. */ -inline -void -innobase_register_trx_and_stmt( -/*===========================*/ - handlerton *hton, /* in: Innobase handlerton */ - THD* thd) /* in: MySQL thd (connection) object */ -{ - /* NOTE that actually innobase_register_stmt() registers also - the transaction in the AUTOCOMMIT=1 mode. */ - - innobase_register_stmt(hton, thd); - - if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - /* No autocommit mode, register for a transaction */ - trans_register_ha(thd, TRUE, hton); - } -} - -/* BACKGROUND INFO: HOW THE MYSQL QUERY CACHE WORKS WITH INNODB - ------------------------------------------------------------ - -1) The use of the query cache for TBL is disabled when there is an -uncommitted change to TBL. - -2) When a change to TBL commits, InnoDB stores the current value of -its global trx id counter, let us denote it by INV_TRX_ID, to the table object -in the InnoDB data dictionary, and does only allow such transactions whose -id <= INV_TRX_ID to use the query cache. - -3) When InnoDB does an INSERT/DELETE/UPDATE to a table TBL, or an implicit -modification because an ON DELETE CASCADE, we invalidate the MySQL query cache -of TBL immediately. - -How this is implemented inside InnoDB: - -1) Since every modification always sets an IX type table lock on the InnoDB -table, it is easy to check if there can be uncommitted modifications for a -table: just check if there are locks in the lock list of the table. - -2) When a transaction inside InnoDB commits, it reads the global trx id -counter and stores the value INV_TRX_ID to the tables on which it had a lock. - -3) If there is an implicit table change from ON DELETE CASCADE or SET NULL, -InnoDB calls an invalidate method for the MySQL query cache for that table. - -How this is implemented inside sql_cache.cc: - -1) The query cache for an InnoDB table TBL is invalidated immediately at an -INSERT/UPDATE/DELETE, just like in the case of MyISAM. No need to delay -invalidation to the transaction commit. - -2) To store or retrieve a value from the query cache of an InnoDB table TBL, -any query must first ask InnoDB's permission. We must pass the thd as a -parameter because InnoDB will look at the trx id, if any, associated with -that thd. - -3) Use of the query cache for InnoDB tables is now allowed also when -AUTOCOMMIT==0 or we are inside BEGIN ... COMMIT. Thus transactions no longer -put restrictions on the use of the query cache. -*/ - -/********************************************************************** -The MySQL query cache uses this to check from InnoDB if the query cache at -the moment is allowed to operate on an InnoDB table. The SQL query must -be a non-locking SELECT. - -The query cache is allowed to operate on certain query only if this function -returns TRUE for all tables in the query. - -If thd is not in the autocommit state, this function also starts a new -transaction for thd if there is no active trx yet, and assigns a consistent -read view to it if there is no read view yet. - -Why a deadlock of threads is not possible: the query cache calls this function -at the start of a SELECT processing. Then the calling thread cannot be -holding any InnoDB semaphores. The calling thread is holding the -query cache mutex, and this function will reserver the InnoDB kernel mutex. -Thus, the 'rank' in sync0sync.h of the MySQL query cache mutex is above -the InnoDB kernel mutex. */ -static -my_bool -innobase_query_caching_of_table_permitted( -/*======================================*/ - /* out: TRUE if permitted, FALSE if not; - note that the value FALSE does not mean - we should invalidate the query cache: - invalidation is called explicitly */ - THD* thd, /* in: thd of the user who is trying to - store a result to the query cache or - retrieve it */ - char* full_name, /* in: concatenation of database name, - the null character '\0', and the table - name */ - uint full_name_len, /* in: length of the full name, i.e. - len(dbname) + len(tablename) + 1 */ - ulonglong *unused) /* unused for this engine */ -{ - ibool is_autocommit; - trx_t* trx; - char norm_name[1000]; - - ut_a(full_name_len < 999); - - trx = check_trx_exists(thd); - - if (trx->isolation_level == TRX_ISO_SERIALIZABLE) { - /* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every - plain SELECT if AUTOCOMMIT is not on. */ - - return((my_bool)FALSE); - } - - if (trx->has_search_latch) { - sql_print_error("The calling thread is holding the adaptive " - "search, latch though calling " - "innobase_query_caching_of_table_permitted."); - - mutex_enter_noninline(&kernel_mutex); - trx_print(stderr, trx, 1024); - mutex_exit_noninline(&kernel_mutex); - } - - innobase_release_stat_resources(trx); - - if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - is_autocommit = TRUE; - } else { - is_autocommit = FALSE; - - } - - if (is_autocommit && trx->n_mysql_tables_in_use == 0) { - /* We are going to retrieve the query result from the query - cache. This cannot be a store operation to the query cache - because then MySQL would have locks on tables already. - - TODO: if the user has used LOCK TABLES to lock the table, - then we open a transaction in the call of row_.. below. - That trx can stay open until UNLOCK TABLES. The same problem - exists even if we do not use the query cache. MySQL should be - modified so that it ALWAYS calls some cleanup function when - the processing of a query ends! - - We can imagine we instantaneously serialize this consistent - read trx to the current trx id counter. If trx2 would have - changed the tables of a query result stored in the cache, and - trx2 would have already committed, making the result obsolete, - then trx2 would have already invalidated the cache. Thus we - can trust the result in the cache is ok for this query. */ - - return((my_bool)TRUE); - } - - /* Normalize the table name to InnoDB format */ - - memcpy(norm_name, full_name, full_name_len); - - norm_name[strlen(norm_name)] = '/'; /* InnoDB uses '/' as the - separator between db and table */ - norm_name[full_name_len] = '\0'; -#ifdef __WIN__ - innobase_casedn_str(norm_name); -#endif - /* The call of row_search_.. will start a new transaction if it is - not yet started */ - - if (trx->active_trans == 0) { - - innobase_register_trx_and_stmt(innodb_hton_ptr, thd); - trx->active_trans = 1; - } - - if (row_search_check_if_query_cache_permitted(trx, norm_name)) { - - /* printf("Query cache for %s permitted\n", norm_name); */ - - return((my_bool)TRUE); - } - - /* printf("Query cache for %s NOT permitted\n", norm_name); */ - - return((my_bool)FALSE); -} - -/********************************************************************* -Invalidates the MySQL query cache for the table. -NOTE that the exact prototype of this function has to be in -/innobase/row/row0ins.c! */ -extern "C" -void -innobase_invalidate_query_cache( -/*============================*/ - trx_t* trx, /* in: transaction which modifies the table */ - char* full_name, /* in: concatenation of database name, null - char '\0', table name, null char'\0'; - NOTE that in Windows this is always - in LOWER CASE! */ - ulint full_name_len) /* in: full name length where also the null - chars count */ -{ - /* Note that the sync0sync.h rank of the query cache mutex is just - above the InnoDB kernel mutex. The caller of this function must not - have latches of a lower rank. */ - - /* Argument TRUE below means we are using transactions */ -#ifdef HAVE_QUERY_CACHE - mysql_query_cache_invalidate4((THD*) trx->mysql_thd, - (const char*) full_name, - (uint32) full_name_len, - TRUE); -#endif -} - -/********************************************************************* -Display an SQL identifier. */ -extern "C" -void -innobase_print_identifier( -/*======================*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /* in: name to print */ - ulint namelen)/* in: length of name */ -{ - const char* s = name; - char* qname = NULL; - int q; - - if (table_id) { - /* Decode the table name. The filename_to_tablename() - function expects a NUL-terminated string. The input and - output strings buffers must not be shared. The function - only produces more output when the name contains other - characters than [0-9A-Z_a-z]. */ - char* temp_name = (char*) my_malloc((uint) namelen + 1, MYF(MY_WME)); - uint qnamelen = (uint) (namelen - + (1 + sizeof srv_mysql50_table_name_prefix)); - - if (temp_name) { - qname = (char*) my_malloc(qnamelen, MYF(MY_WME)); - if (qname) { - memcpy(temp_name, name, namelen); - temp_name[namelen] = 0; - s = qname; - namelen = filename_to_tablename(temp_name, - qname, qnamelen); - } - my_free(temp_name, MYF(0)); - } - } - - if (!trx || !trx->mysql_thd) { - - q = '"'; - } else { - q = get_quote_char_for_identifier((THD*) trx->mysql_thd, - s, (int) namelen); - } - - if (q == EOF) { - fwrite(s, 1, namelen, f); - } else { - const char* e = s + namelen; - putc(q, f); - while (s < e) { - int c = *s++; - if (c == q) { - putc(c, f); - } - putc(c, f); - } - putc(q, f); - } - - my_free(qname, MYF(MY_ALLOW_ZERO_PTR)); -} - -/************************************************************************** -Determines if the currently running transaction has been interrupted. */ -extern "C" -ibool -trx_is_interrupted( -/*===============*/ - /* out: TRUE if interrupted */ - trx_t* trx) /* in: transaction */ -{ - return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd)); -} - -/****************************************************************** -Resets some fields of a prebuilt struct. The template is used in fast -retrieval of just those column values MySQL needs in its processing. */ -static -void -reset_template( -/*===========*/ - row_prebuilt_t* prebuilt) /* in/out: prebuilt struct */ -{ - prebuilt->keep_other_fields_on_keyread = 0; - prebuilt->read_just_key = 0; -} - -/********************************************************************* -Call this when you have opened a new table handle in HANDLER, before you -call index_read_idx() etc. Actually, we can let the cursor stay open even -over a transaction commit! Then you should call this before every operation, -fetch next etc. This function inits the necessary things even after a -transaction commit. */ - -void -ha_innobase::init_table_handle_for_HANDLER(void) -/*============================================*/ -{ - /* If current thd does not yet have a trx struct, create one. - If the current handle does not yet have a prebuilt struct, create - one. Update the trx pointers in the prebuilt struct. Normally - this operation is done in external_lock. */ - - update_thd(ha_thd()); - - /* Initialize the prebuilt struct much like it would be inited in - external_lock */ - - innobase_release_stat_resources(prebuilt->trx); - - /* If the transaction is not started yet, start it */ - - trx_start_if_not_started_noninline(prebuilt->trx); - - /* Assign a read view if the transaction does not have it yet */ - - trx_assign_read_view(prebuilt->trx); - - /* Set the MySQL flag to mark that there is an active transaction */ - - if (prebuilt->trx->active_trans == 0) { - - innobase_register_trx_and_stmt(ht, user_thd); - - prebuilt->trx->active_trans = 1; - } - - /* We did the necessary inits in this function, no need to repeat them - in row_search_for_mysql */ - - prebuilt->sql_stat_start = FALSE; - - /* We let HANDLER always to do the reads as consistent reads, even - if the trx isolation level would have been specified as SERIALIZABLE */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - - /* Always fetch all columns in the index record */ - - prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS; - - /* We want always to fetch all columns in the whole row? Or do - we???? */ - - prebuilt->used_in_HANDLER = TRUE; - reset_template(prebuilt); -} - -/************************************************************************* -Opens an InnoDB database. */ -static -int -innobase_init( -/*==========*/ - /* out: 0 on success, error code on failure */ - void *p) /* in: InnoDB handlerton */ -{ - static char current_dir[3]; /* Set if using current lib */ - int err; - bool ret; - char *default_path; - - DBUG_ENTER("innobase_init"); - handlerton *innobase_hton= (handlerton *)p; - innodb_hton_ptr = innobase_hton; - - innobase_hton->state = SHOW_OPTION_YES; - innobase_hton->db_type= DB_TYPE_INNODB; - innobase_hton->savepoint_offset=sizeof(trx_named_savept_t); - innobase_hton->close_connection=innobase_close_connection; - innobase_hton->savepoint_set=innobase_savepoint; - innobase_hton->savepoint_rollback=innobase_rollback_to_savepoint; - innobase_hton->savepoint_release=innobase_release_savepoint; - innobase_hton->commit=innobase_commit; - innobase_hton->rollback=innobase_rollback; - innobase_hton->prepare=innobase_xa_prepare; - innobase_hton->recover=innobase_xa_recover; - innobase_hton->commit_by_xid=innobase_commit_by_xid; - innobase_hton->rollback_by_xid=innobase_rollback_by_xid; - innobase_hton->create_cursor_read_view=innobase_create_cursor_view; - innobase_hton->set_cursor_read_view=innobase_set_cursor_view; - innobase_hton->close_cursor_read_view=innobase_close_cursor_view; - innobase_hton->create=innobase_create_handler; - innobase_hton->drop_database=innobase_drop_database; - innobase_hton->panic=innobase_end; - innobase_hton->start_consistent_snapshot=innobase_start_trx_and_assign_read_view; - innobase_hton->flush_logs=innobase_flush_logs; - innobase_hton->show_status=innobase_show_status; - innobase_hton->flags=HTON_NO_FLAGS; - innobase_hton->release_temporary_latches=innobase_release_temporary_latches; - - ut_a(DATA_MYSQL_TRUE_VARCHAR == (ulint)MYSQL_TYPE_VARCHAR); - -#ifdef UNIV_DEBUG - static const char test_filename[] = "-@"; - char test_tablename[sizeof test_filename - + sizeof srv_mysql50_table_name_prefix]; - if ((sizeof test_tablename) - 1 - != filename_to_tablename(test_filename, test_tablename, - sizeof test_tablename) - || strncmp(test_tablename, - srv_mysql50_table_name_prefix, - sizeof srv_mysql50_table_name_prefix) - || strcmp(test_tablename - + sizeof srv_mysql50_table_name_prefix, - test_filename)) { - sql_print_error("tablename encoding has been changed"); - goto error; - } -#endif /* UNIV_DEBUG */ - - /* Check that values don't overflow on 32-bit systems. */ - if (sizeof(ulint) == 4) { - if (innobase_buffer_pool_size > UINT_MAX32) { - sql_print_error( - "innobase_buffer_pool_size can't be over 4GB" - " on 32-bit systems"); - - goto error; - } - - if (innobase_log_file_size > UINT_MAX32) { - sql_print_error( - "innobase_log_file_size can't be over 4GB" - " on 32-bit systems"); - - goto error; - } - } - - os_innodb_umask = (ulint)my_umask; - - /* First calculate the default path for innodb_data_home_dir etc., - in case the user has not given any value. - - Note that when using the embedded server, the datadirectory is not - necessarily the current directory of this program. */ - - if (mysqld_embedded) { - default_path = mysql_real_data_home; - fil_path_to_mysql_datadir = mysql_real_data_home; - } else { - /* It's better to use current lib, to keep paths short */ - current_dir[0] = FN_CURLIB; - current_dir[1] = FN_LIBCHAR; - current_dir[2] = 0; - default_path = current_dir; - } - - ut_a(default_path); - - if (specialflag & SPECIAL_NO_PRIOR) { - srv_set_thread_priorities = FALSE; - } else { - srv_set_thread_priorities = TRUE; - srv_query_thread_priority = QUERY_PRIOR; - } - - /* Set InnoDB initialization parameters according to the values - read from MySQL .cnf file */ - - /*--------------- Data files -------------------------*/ - - /* The default dir for data files is the datadir of MySQL */ - - srv_data_home = (innobase_data_home_dir ? innobase_data_home_dir : - default_path); - - /* Set default InnoDB data file size to 10 MB and let it be - auto-extending. Thus users can use InnoDB in >= 4.0 without having - to specify any startup options. */ - - if (!innobase_data_file_path) { - innobase_data_file_path = (char*) "ibdata1:10M:autoextend"; - } - - /* Since InnoDB edits the argument in the next call, we make another - copy of it: */ - - internal_innobase_data_file_path = my_strdup(innobase_data_file_path, - MYF(MY_FAE)); - - ret = (bool) srv_parse_data_file_paths_and_sizes( - internal_innobase_data_file_path, - &srv_data_file_names, - &srv_data_file_sizes, - &srv_data_file_is_raw_partition, - &srv_n_data_files, - &srv_auto_extend_last_data_file, - &srv_last_file_size_max); - if (ret == FALSE) { - sql_print_error( - "InnoDB: syntax error in innodb_data_file_path"); - my_free(internal_innobase_data_file_path, - MYF(MY_ALLOW_ZERO_PTR)); - goto error; - } - - /* -------------- Log files ---------------------------*/ - - /* The default dir for log files is the datadir of MySQL */ - - if (!innobase_log_group_home_dir) { - innobase_log_group_home_dir = default_path; - } - -#ifdef UNIV_LOG_ARCHIVE - /* Since innodb_log_arch_dir has no relevance under MySQL, - starting from 4.0.6 we always set it the same as - innodb_log_group_home_dir: */ - - innobase_log_arch_dir = innobase_log_group_home_dir; - - srv_arch_dir = innobase_log_arch_dir; -#endif /* UNIG_LOG_ARCHIVE */ - - ret = (bool) - srv_parse_log_group_home_dirs(innobase_log_group_home_dir, - &srv_log_group_home_dirs); - - if (ret == FALSE || innobase_mirrored_log_groups != 1) { - sql_print_error("syntax error in innodb_log_group_home_dir, or a " - "wrong number of mirrored log groups"); - - my_free(internal_innobase_data_file_path, - MYF(MY_ALLOW_ZERO_PTR)); - goto error; - } - - /* --------------------------------------------------*/ - - srv_file_flush_method_str = innobase_unix_file_flush_method; - - srv_n_log_groups = (ulint) innobase_mirrored_log_groups; - srv_n_log_files = (ulint) innobase_log_files_in_group; - srv_log_file_size = (ulint) innobase_log_file_size; - - srv_thread_concurrency_timer_based = - (ibool) innobase_thread_concurrency_timer_based; - -#ifdef UNIV_LOG_ARCHIVE - srv_log_archive_on = (ulint) innobase_log_archive; -#endif /* UNIV_LOG_ARCHIVE */ - srv_log_buffer_size = (ulint) innobase_log_buffer_size; - - srv_io_capacity = (ulint) innobase_io_capacity; - srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes; - - /* We set srv_pool_size here in units of 1 kB. InnoDB internally - changes the value so that it becomes the number of database pages. */ - - if (innobase_buffer_pool_awe_mem_mb == 0) { - srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024); - } else { - srv_use_awe = TRUE; - srv_pool_size = (ulint) - (1024 * innobase_buffer_pool_awe_mem_mb); - srv_awe_window_size = (ulint) innobase_buffer_pool_size; - - /* Note that what the user specified as - innodb_buffer_pool_size is actually the AWE memory window - size in this case, and the real buffer pool size is - determined by .._awe_mem_mb. */ - } - - srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size; - - srv_n_read_io_threads = (ulint) innobase_read_io_threads; - srv_n_write_io_threads = (ulint) innobase_write_io_threads; - srv_max_merged_io = (ulint) innobase_max_merged_io; - - srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout; - srv_force_recovery = (ulint) innobase_force_recovery; - - srv_use_doublewrite_buf = (ibool) innobase_use_doublewrite; - srv_use_checksums = (ibool) innobase_use_checksums; - -#ifdef HAVE_LARGE_PAGES - if ((os_use_large_pages = (ibool) my_use_large_pages)) - os_large_page_size = (ulint) opt_large_page_size; -#endif - - row_rollback_on_timeout = (ibool) innobase_rollback_on_timeout; - - srv_file_per_table = (ibool) innobase_file_per_table; - srv_locks_unsafe_for_binlog = (ibool) innobase_locks_unsafe_for_binlog; - - srv_max_n_open_files = (ulint) innobase_open_files; - srv_innodb_status = (ibool) innobase_create_status_file; - - srv_use_adaptive_hash_indexes = - (ibool) innobase_adaptive_hash_index; - - srv_print_verbose_log = mysqld_embedded ? 0 : 1; - - /* Store the default charset-collation number of this MySQL - installation */ - - data_mysql_default_charset_coll = (ulint)default_charset_info->number; - - ut_a(DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL == - my_charset_latin1.number); - ut_a(DATA_MYSQL_BINARY_CHARSET_COLL == my_charset_bin.number); - - /* Store the latin1_swedish_ci character ordering table to InnoDB. For - non-latin1_swedish_ci charsets we use the MySQL comparison functions, - and consequently we do not need to know the ordering internally in - InnoDB. */ - - ut_a(0 == strcmp((char*)my_charset_latin1.name, - (char*)"latin1_swedish_ci")); - memcpy(srv_latin1_ordering, my_charset_latin1.sort_order, 256); - - /* Since we in this module access directly the fields of a trx - struct, and due to different headers and flags it might happen that - mutex_t has a different size in this module and in InnoDB - modules, we check at run time that the size is the same in - these compilation modules. */ - - srv_sizeof_trx_t_in_ha_innodb_cc = sizeof(trx_t); - - err = innobase_start_or_create_for_mysql(); - - if (err != DB_SUCCESS) { - my_free(internal_innobase_data_file_path, - MYF(MY_ALLOW_ZERO_PTR)); - goto error; - } - - (void) hash_init(&innobase_open_tables,system_charset_info, 32, 0, 0, - (hash_get_key) innobase_get_key, 0, 0); - pthread_mutex_init(&innobase_share_mutex, MY_MUTEX_INIT_FAST); - pthread_mutex_init(&prepare_commit_mutex, MY_MUTEX_INIT_FAST); - pthread_mutex_init(&commit_threads_m, MY_MUTEX_INIT_FAST); - pthread_mutex_init(&commit_cond_m, MY_MUTEX_INIT_FAST); - pthread_cond_init(&commit_cond, NULL); - innodb_inited= 1; - - DBUG_RETURN(FALSE); -error: - DBUG_RETURN(TRUE); -} - -/*********************************************************************** -Closes an InnoDB database. */ -static -int -innobase_end(handlerton *hton, ha_panic_function type) -/*==============*/ - /* out: TRUE if error */ -{ - int err= 0; - - DBUG_ENTER("innobase_end"); - -#ifdef __NETWARE__ /* some special cleanup for NetWare */ - if (nw_panic) { - set_panic_flag_for_netware(); - } -#endif - if (innodb_inited) { - - srv_fast_shutdown = (ulint) innobase_fast_shutdown; - innodb_inited = 0; - if (innobase_shutdown_for_mysql() != DB_SUCCESS) { - err = 1; - } - hash_free(&innobase_open_tables); - my_free(internal_innobase_data_file_path, - MYF(MY_ALLOW_ZERO_PTR)); - pthread_mutex_destroy(&innobase_share_mutex); - pthread_mutex_destroy(&prepare_commit_mutex); - pthread_mutex_destroy(&commit_threads_m); - pthread_mutex_destroy(&commit_cond_m); - pthread_cond_destroy(&commit_cond); - } - - DBUG_RETURN(err); -} - -/******************************************************************** -Flushes InnoDB logs to disk and makes a checkpoint. Really, a commit flushes -the logs, and the name of this function should be innobase_checkpoint. */ -static -bool -innobase_flush_logs(handlerton *hton) -/*=====================*/ - /* out: TRUE if error */ -{ - bool result = 0; - - DBUG_ENTER("innobase_flush_logs"); - - log_buffer_flush_to_disk(); - - DBUG_RETURN(result); -} - -/********************************************************************* -Commits a transaction in an InnoDB database. */ -static -void -innobase_commit_low( -/*================*/ - trx_t* trx) /* in: transaction handle */ -{ - if (trx->conc_state == TRX_NOT_STARTED) { - - return; - } - - trx_commit_for_mysql(trx); -} - -/********************************************************************* -Creates an InnoDB transaction struct for the thd if it does not yet have one. -Starts a new InnoDB transaction if a transaction is not yet started. And -assigns a new snapshot for a consistent read if the transaction does not yet -have one. */ -static -int -innobase_start_trx_and_assign_read_view( -/*====================================*/ - /* out: 0 */ - handlerton *hton, /* in: Innodb handlerton */ - THD* thd) /* in: MySQL thread handle of the user for whom - the transaction should be committed */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_start_trx_and_assign_read_view"); - - /* Create a new trx struct for thd, if it does not yet have one */ - - trx = check_trx_exists(thd); - - /* This is just to play safe: release a possible FIFO ticket and - search latch. Since we will reserve the kernel mutex, we have to - release the search system latch first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* If the transaction is not started yet, start it */ - - trx_start_if_not_started_noninline(trx); - - /* Assign a read view if the transaction does not have it yet */ - - trx_assign_read_view(trx); - - /* Set the MySQL flag to mark that there is an active transaction */ - - if (trx->active_trans == 0) { - innobase_register_trx_and_stmt(hton, current_thd); - trx->active_trans = 1; - } - - DBUG_RETURN(0); -} - -/********************************************************************* -Commits a transaction in an InnoDB database or marks an SQL statement -ended. */ -static -int -innobase_commit( -/*============*/ - /* out: 0 */ - handlerton *hton, /* in: Innodb handlerton */ - THD* thd, /* in: MySQL thread handle of the user for whom - the transaction should be committed */ - bool all) /* in: TRUE - commit transaction - FALSE - the current SQL statement ended */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_commit"); - DBUG_PRINT("trans", ("ending transaction")); - - trx = check_trx_exists(thd); - - /* Update the info whether we should skip XA steps that eat CPU time */ - trx->support_xa = THDVAR(thd, support_xa); - - /* Since we will reserve the kernel mutex, we have to release - the search system latch first to obey the latching order. */ - - if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } - - /* The flag trx->active_trans is set to 1 in - - 1. ::external_lock(), - 2. ::start_stmt(), - 3. innobase_query_caching_of_table_permitted(), - 4. innobase_savepoint(), - 5. ::init_table_handle_for_HANDLER(), - 6. innobase_start_trx_and_assign_read_view(), - 7. ::transactional_table_lock() - - and it is only set to 0 in a commit or a rollback. If it is 0 we know - there cannot be resources to be freed and we could return immediately. - For the time being, we play safe and do the cleanup though there should - be nothing to clean up. */ - - if (trx->active_trans == 0 - && trx->conc_state != TRX_NOT_STARTED) { - - sql_print_error("trx->active_trans == 0, but" - " trx->conc_state != TRX_NOT_STARTED"); - } - if (all - || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - /* We were instructed to commit the whole transaction, or - this is an SQL statement end and autocommit is on */ - - /* We need current binlog position for ibbackup to work. - Note, the position is current because of - prepare_commit_mutex */ -retry: - if (srv_commit_concurrency > 0) { - pthread_mutex_lock(&commit_cond_m); - commit_threads++; - - if (commit_threads > srv_commit_concurrency) { - commit_threads--; - pthread_cond_wait(&commit_cond, - &commit_cond_m); - pthread_mutex_unlock(&commit_cond_m); - goto retry; - } - else { - pthread_mutex_unlock(&commit_cond_m); - } - } - - trx->mysql_log_file_name = mysql_bin_log_file_name(); - trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos(); - - innobase_commit_low(trx); - - if (srv_commit_concurrency > 0) { - pthread_mutex_lock(&commit_cond_m); - commit_threads--; - pthread_cond_signal(&commit_cond); - pthread_mutex_unlock(&commit_cond_m); - } - - if (trx->active_trans == 2) { - - pthread_mutex_unlock(&prepare_commit_mutex); - } - - trx->active_trans = 0; - - } else { - /* We just mark the SQL statement ended and do not do a - transaction commit */ - - /* If we had reserved the auto-inc lock for some - table in this SQL statement we release it now */ - - row_unlock_table_autoinc_for_mysql(trx); - - /* Store the current undo_no of the transaction so that we - know where to roll back if we have to roll back the next - SQL statement */ - - trx_mark_sql_stat_end(trx); - } - - trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */ - - if (trx->declared_to_be_inside_innodb) { - /* Release our possible ticket in the FIFO */ - - srv_conc_force_exit_innodb(trx); - } - - /* Tell the InnoDB server that there might be work for utility - threads: */ - srv_active_wake_master_thread(); - - DBUG_RETURN(0); -} - -/********************************************************************* -Rolls back a transaction or the latest SQL statement. */ -static -int -innobase_rollback( -/*==============*/ - /* out: 0 or error number */ - handlerton *hton, /* in: Innodb handlerton */ - THD* thd, /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ - bool all) /* in: TRUE - commit transaction - FALSE - the current SQL statement ended */ -{ - int error = 0; - trx_t* trx; - - DBUG_ENTER("innobase_rollback"); - DBUG_PRINT("trans", ("aborting transaction")); - - trx = check_trx_exists(thd); - - /* Update the info whether we should skip XA steps that eat CPU time */ - trx->support_xa = THDVAR(thd, support_xa); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the kernel mutex, we have to release the search system latch - first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* If we had reserved the auto-inc lock for some table (if - we come here to roll back the latest SQL statement) we - release it now before a possibly lengthy rollback */ - - row_unlock_table_autoinc_for_mysql(trx); - - if (all - || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - error = trx_rollback_for_mysql(trx); - trx->active_trans = 0; - } else { - error = trx_rollback_last_sql_stat_for_mysql(trx); - } - - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); -} - -/********************************************************************* -Rolls back a transaction */ -static -int -innobase_rollback_trx( -/*==================*/ - /* out: 0 or error number */ - trx_t* trx) /* in: transaction */ -{ - int error = 0; - - DBUG_ENTER("innobase_rollback_trx"); - DBUG_PRINT("trans", ("aborting transaction")); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the kernel mutex, we have to release the search system latch - first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* If we had reserved the auto-inc lock for some table (if - we come here to roll back the latest SQL statement) we - release it now before a possibly lengthy rollback */ - - row_unlock_table_autoinc_for_mysql(trx); - - error = trx_rollback_for_mysql(trx); - - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); -} - -/********************************************************************* -Rolls back a transaction to a savepoint. */ -static -int -innobase_rollback_to_savepoint( -/*===========================*/ - /* out: 0 if success, HA_ERR_NO_SAVEPOINT if - no savepoint with the given name */ - handlerton *hton, /* in: Innodb handlerton */ - THD* thd, /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ - void* savepoint) /* in: savepoint data */ -{ - ib_longlong mysql_binlog_cache_pos; - int error = 0; - trx_t* trx; - char name[64]; - - DBUG_ENTER("innobase_rollback_to_savepoint"); - - trx = check_trx_exists(thd); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the kernel mutex, we have to release the search system latch - first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* TODO: use provided savepoint data area to store savepoint data */ - - longlong2str((ulint)savepoint, name, 36); - - error = (int) trx_rollback_to_savepoint_for_mysql(trx, name, - &mysql_binlog_cache_pos); - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); -} - -/********************************************************************* -Release transaction savepoint name. */ -static -int -innobase_release_savepoint( -/*=======================*/ - /* out: 0 if success, HA_ERR_NO_SAVEPOINT if - no savepoint with the given name */ - handlerton* hton, /* in: handlerton for Innodb */ - THD* thd, /* in: handle to the MySQL thread of the user - whose transaction should be rolled back */ - void* savepoint) /* in: savepoint data */ -{ - int error = 0; - trx_t* trx; - char name[64]; - - DBUG_ENTER("innobase_release_savepoint"); - - trx = check_trx_exists(thd); - - /* TODO: use provided savepoint data area to store savepoint data */ - - longlong2str((ulint)savepoint, name, 36); - - error = (int) trx_release_savepoint_for_mysql(trx, name); - - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); -} - -/********************************************************************* -Sets a transaction savepoint. */ -static -int -innobase_savepoint( -/*===============*/ - /* out: always 0, that is, always succeeds */ - handlerton* hton, /* in: handle to the Innodb handlerton */ - THD* thd, /* in: handle to the MySQL thread */ - void* savepoint) /* in: savepoint data */ -{ - int error = 0; - trx_t* trx; - - DBUG_ENTER("innobase_savepoint"); - - /* - In the autocommit mode there is no sense to set a savepoint - (unless we are in sub-statement), so SQL layer ensures that - this method is never called in such situation. - */ -#ifdef MYSQL_SERVER /* plugins cannot access thd->in_sub_stmt */ - DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) || - thd->in_sub_stmt); -#endif /* MYSQL_SERVER */ - - trx = check_trx_exists(thd); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the kernel mutex, we have to release the search system latch - first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* cannot happen outside of transaction */ - DBUG_ASSERT(trx->active_trans); - - /* TODO: use provided savepoint data area to store savepoint data */ - char name[64]; - longlong2str((ulint)savepoint,name,36); - - error = (int) trx_savepoint_for_mysql(trx, name, (ib_longlong)0); - - DBUG_RETURN(convert_error_code_to_mysql(error, NULL)); -} - -/********************************************************************* -Frees a possible InnoDB trx object associated with the current THD. */ -static -int -innobase_close_connection( -/*======================*/ - /* out: 0 or error number */ - handlerton* hton, /* in: innobase handlerton */ - THD* thd) /* in: handle to the MySQL thread of the user - whose resources should be free'd */ -{ - trx_t* trx; - - DBUG_ENTER("innobase_close_connection"); - DBUG_ASSERT(hton == innodb_hton_ptr); - trx = thd_to_trx(thd); - - ut_a(trx); - - if (trx->active_trans == 0 - && trx->conc_state != TRX_NOT_STARTED) { - - sql_print_error("trx->active_trans == 0, but" - " trx->conc_state != TRX_NOT_STARTED"); - } - - - if (trx->conc_state != TRX_NOT_STARTED && - global_system_variables.log_warnings) { - sql_print_warning( - "MySQL is closing a connection that has an active " - "InnoDB transaction. %lu row modifications will " - "roll back.", - (ulong) trx->undo_no.low); - } - - innobase_rollback_trx(trx); - - thr_local_free(trx->mysql_thread_id); - trx_free_for_mysql(trx); - - DBUG_RETURN(0); -} - - -/***************************************************************************** -** InnoDB database tables -*****************************************************************************/ - -/******************************************************************** -Get the record format from the data dictionary. */ -enum row_type -ha_innobase::get_row_type() const -/*=============================*/ - /* out: ROW_TYPE_REDUNDANT or ROW_TYPE_COMPACT */ -{ - if (prebuilt && prebuilt->table) { - if (dict_table_is_comp_noninline(prebuilt->table)) { - return(ROW_TYPE_COMPACT); - } else { - return(ROW_TYPE_REDUNDANT); - } - } - ut_ad(0); - return(ROW_TYPE_NOT_USED); -} - - - -/******************************************************************** -Get the table flags to use for the statement. */ -handler::Table_flags -ha_innobase::table_flags() const -{ - /* Need to use tx_isolation here since table flags is (also) - called before prebuilt is inited. */ - ulong const tx_isolation = thd_tx_isolation(current_thd); - if (tx_isolation <= ISO_READ_COMMITTED) - return int_table_flags; - return int_table_flags | HA_BINLOG_STMT_CAPABLE; -} - -/******************************************************************** -Gives the file extension of an InnoDB single-table tablespace. */ -static const char* ha_innobase_exts[] = { - ".ibd", - NullS -}; - -const char** -ha_innobase::bas_ext() const -/*========================*/ - /* out: file extension string */ -{ - return ha_innobase_exts; -} - - -/********************************************************************* -Normalizes a table name string. A normalized name consists of the -database name catenated to '/' and table name. An example: -test/mytable. On Windows normalization puts both the database name and the -table name always to lower case. */ -static -void -normalize_table_name( -/*=================*/ - char* norm_name, /* out: normalized name as a - null-terminated string */ - const char* name) /* in: table name string */ -{ - char* name_ptr; - char* db_ptr; - char* ptr; - - /* Scan name from the end */ - - ptr = strend(name)-1; - - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - name_ptr = ptr + 1; - - DBUG_ASSERT(ptr > name); - - ptr--; - - while (ptr >= name && *ptr != '\\' && *ptr != '/') { - ptr--; - } - - db_ptr = ptr + 1; - - memcpy(norm_name, db_ptr, strlen(name) + 1 - (db_ptr - name)); - - norm_name[name_ptr - db_ptr - 1] = '/'; - -#ifdef __WIN__ - innobase_casedn_str(norm_name); -#endif -} - -/************************************************************************ -Set the autoinc column max value. This should only be called once from -ha_innobase::open(). Therefore there's no need for a covering lock. */ - -ulong -ha_innobase::innobase_initialize_autoinc() -/*======================================*/ -{ - dict_index_t* index; - ulonglong auto_inc; - const char* col_name; - ulint error = DB_SUCCESS; - dict_table_t* innodb_table = prebuilt->table; - - col_name = table->found_next_number_field->field_name; - index = innobase_get_index(table->s->next_number_index); - - /* Execute SELECT MAX(col_name) FROM TABLE; */ - error = row_search_max_autoinc(index, col_name, &auto_inc); - - if (error == DB_SUCCESS) { - - /* At the this stage we dont' know the increment - or the offset, so use default inrement of 1. */ - ++auto_inc; - - dict_table_autoinc_initialize(innodb_table, auto_inc); - - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error: (%lu) Couldn't read " - "the MAX(%s) autoinc value from the " - "index (%s).\n", error, col_name, index->name); - } - - return(ulong(error)); -} - -/********************************************************************* -Creates and opens a handle to a table which already exists in an InnoDB -database. */ - -int -ha_innobase::open( -/*==============*/ - /* out: 1 if error, 0 if success */ - const char* name, /* in: table name */ - int mode, /* in: not used */ - uint test_if_locked) /* in: not used */ -{ - dict_table_t* ib_table; - char norm_name[1000]; - THD* thd; - ulint retries = 0; - char* is_part = NULL; - - DBUG_ENTER("ha_innobase::open"); - - UT_NOT_USED(mode); - UT_NOT_USED(test_if_locked); - - thd = ha_thd(); - - /* Under some cases MySQL seems to call this function while - holding btr_search_latch. This breaks the latching order as - we acquire dict_sys->mutex below and leads to a deadlock. */ - if (thd != NULL) { - innobase_release_temporary_latches(ht, thd); - } - - normalize_table_name(norm_name, name); - - user_thd = NULL; - - if (!(share=get_share(name))) { - - DBUG_RETURN(1); - } - - /* Create buffers for packing the fields of a record. Why - table->reclength did not work here? Obviously, because char - fields when packed actually became 1 byte longer, when we also - stored the string length as the first byte. */ - - upd_and_key_val_buff_len = - table->s->reclength + table->s->max_key_length - + MAX_REF_PARTS * 3; - if (!(uchar*) my_multi_malloc(MYF(MY_WME), - &upd_buff, upd_and_key_val_buff_len, - &key_val_buff, upd_and_key_val_buff_len, - NullS)) { - free_share(share); - - DBUG_RETURN(1); - } - - /* We look for pattern #P# to see if the table is partitioned - MySQL table. The retry logic for partitioned tables is a - workaround for http://bugs.mysql.com/bug.php?id=33349. Look - at support issue https://support.mysql.com/view.php?id=21080 - for more details. */ - is_part = strstr(norm_name, "#P#"); -retry: - /* Get pointer to a table object in InnoDB dictionary cache */ - ib_table = dict_table_get(norm_name, TRUE); - - if (NULL == ib_table) { - if (is_part && retries < 10) { - ++retries; - os_thread_sleep(100000); - goto retry; - } - - if (is_part) { - sql_print_error("Failed to open table %s after " - "%lu attemtps.\n", norm_name, - retries); - } - - sql_print_error("Cannot find or open table %s from\n" - "the internal data dictionary of InnoDB " - "though the .frm file for the\n" - "table exists. Maybe you have deleted and " - "recreated InnoDB data\n" - "files but have forgotten to delete the " - "corresponding .frm files\n" - "of InnoDB tables, or you have moved .frm " - "files to another database?\n" - "or, the table contains indexes that this " - "version of the engine\n" - "doesn't support.\n" - "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n" - "how you can resolve the problem.\n", - norm_name); - free_share(share); - my_free(upd_buff, MYF(0)); - my_errno = ENOENT; - - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - } - - if (ib_table->ibd_file_missing && !thd_tablespace_op(thd)) { - sql_print_error("MySQL is trying to open a table handle but " - "the .ibd file for\ntable %s does not exist.\n" - "Have you deleted the .ibd file from the " - "database directory under\nthe MySQL datadir, " - "or have you used DISCARD TABLESPACE?\n" - "See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n" - "how you can resolve the problem.\n", - norm_name); - free_share(share); - my_free(upd_buff, MYF(0)); - my_errno = ENOENT; - - dict_table_decrement_handle_count(ib_table); - DBUG_RETURN(HA_ERR_NO_SUCH_TABLE); - } - - prebuilt = row_create_prebuilt(ib_table); - - prebuilt->mysql_row_len = table->s->reclength; - prebuilt->default_rec = table->s->default_values; - ut_ad(prebuilt->default_rec); - - /* Looks like MySQL-3.23 sometimes has primary key number != 0 */ - - primary_key = table->s->primary_key; - key_used_on_scan = primary_key; - - /* Allocate a buffer for a 'row reference'. A row reference is - a string of bytes of length ref_length which uniquely specifies - a row in our table. Note that MySQL may also compare two row - references for equality by doing a simple memcmp on the strings - of length ref_length! */ - - if (!row_table_got_default_clust_index(ib_table)) { - if (primary_key >= MAX_KEY) { - sql_print_error("Table %s has a primary key in InnoDB data " - "dictionary, but not in MySQL!", name); - } - - prebuilt->clust_index_was_generated = FALSE; - - /* MySQL allocates the buffer for ref. key_info->key_length - includes space for all key columns + one byte for each column - that may be NULL. ref_length must be as exact as possible to - save space, because all row reference buffers are allocated - based on ref_length. */ - - ref_length = table->key_info[primary_key].key_length; - } else { - if (primary_key != MAX_KEY) { - sql_print_error("Table %s has no primary key in InnoDB data " - "dictionary, but has one in MySQL! If you " - "created the table with a MySQL version < " - "3.23.54 and did not define a primary key, " - "but defined a unique key with all non-NULL " - "columns, then MySQL internally treats that " - "key as the primary key. You can fix this " - "error by dump + DROP + CREATE + reimport " - "of the table.", name); - } - - prebuilt->clust_index_was_generated = TRUE; - - ref_length = DATA_ROW_ID_LEN; - - /* If we automatically created the clustered index, then - MySQL does not know about it, and MySQL must NOT be aware - of the index used on scan, to make it avoid checking if we - update the column of the index. That is why we assert below - that key_used_on_scan is the undefined value MAX_KEY. - The column is the row id in the automatical generation case, - and it will never be updated anyway. */ - - if (key_used_on_scan != MAX_KEY) { - sql_print_warning( - "Table %s key_used_on_scan is %lu even " - "though there is no primary key inside " - "InnoDB.", name, (ulong) key_used_on_scan); - } - } - - stats.block_size = 16 * 1024; /* Index block size in InnoDB: used by MySQL - in query optimization */ - - /* Init table lock structure */ - thr_lock_data_init(&share->lock,&lock,(void*) 0); - - info(HA_STATUS_NO_LOCK | HA_STATUS_VARIABLE | HA_STATUS_CONST); - - /* Only if the table has an AUTOINC column. */ - if (prebuilt->table != NULL && table->found_next_number_field != NULL) { - ulint error; - - dict_table_autoinc_lock(prebuilt->table); - - /* Since a table can already be "open" in InnoDB's internal - data dictionary, we only init the autoinc counter once, the - first time the table is loaded. We can safely reuse the - autoinc value from a previous MySQL open. */ - if (dict_table_autoinc_read(prebuilt->table) == 0) { - - error = innobase_initialize_autoinc(); - /* Should always succeed! */ - ut_a(error == DB_SUCCESS); - } - - dict_table_autoinc_unlock(prebuilt->table); - } - - DBUG_RETURN(0); -} - -uint -ha_innobase::max_supported_key_part_length() const -{ - return(DICT_MAX_INDEX_COL_LEN - 1); -} - -/********************************************************************** -Closes a handle to an InnoDB table. */ - -int -ha_innobase::close(void) -/*====================*/ - /* out: 0 */ -{ - THD* thd; - - DBUG_ENTER("ha_innobase::close"); - - thd = current_thd; // avoid calling current_thd twice, it may be slow - if (thd != NULL) { - innobase_release_temporary_latches(ht, thd); - } - - row_prebuilt_free(prebuilt); - - my_free(upd_buff, MYF(0)); - free_share(share); - - /* Tell InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - DBUG_RETURN(0); -} - -/* The following accessor functions should really be inside MySQL code! */ - -/****************************************************************** -Gets field offset for a field in a table. */ -inline -uint -get_field_offset( -/*=============*/ - /* out: offset */ - TABLE* table, /* in: MySQL table object */ - Field* field) /* in: MySQL field object */ -{ - return((uint) (field->ptr - table->record[0])); -} - -/****************************************************************** -Checks if a field in a record is SQL NULL. Uses the record format -information in table to track the null bit in record. */ -static inline -uint -field_in_record_is_null( -/*====================*/ - /* out: 1 if NULL, 0 otherwise */ - TABLE* table, /* in: MySQL table object */ - Field* field, /* in: MySQL field object */ - char* record) /* in: a row in MySQL format */ -{ - int null_offset; - - if (!field->null_ptr) { - - return(0); - } - - null_offset = (uint) ((char*) field->null_ptr - - (char*) table->record[0]); - - if (record[null_offset] & field->null_bit) { - - return(1); - } - - return(0); -} - -/****************************************************************** -Sets a field in a record to SQL NULL. Uses the record format -information in table to track the null bit in record. */ -inline -void -set_field_in_record_to_null( -/*========================*/ - TABLE* table, /* in: MySQL table object */ - Field* field, /* in: MySQL field object */ - char* record) /* in: a row in MySQL format */ -{ - int null_offset; - - null_offset = (uint) ((char*) field->null_ptr - - (char*) table->record[0]); - - record[null_offset] = record[null_offset] | field->null_bit; -} - -extern "C" { -/***************************************************************** -InnoDB uses this function to compare two data fields for which the data type -is such that we must use MySQL code to compare them. NOTE that the prototype -of this function is in rem0cmp.c in InnoDB source code! If you change this -function, remember to update the prototype there! */ - -int -innobase_mysql_cmp( -/*===============*/ - /* out: 1, 0, -1, if a is greater, - equal, less than b, respectively */ - int mysql_type, /* in: MySQL type */ - uint charset_number, /* in: number of the charset */ - unsigned char* a, /* in: data field */ - unsigned int a_length, /* in: data field length, - not UNIV_SQL_NULL */ - unsigned char* b, /* in: data field */ - unsigned int b_length) /* in: data field length, - not UNIV_SQL_NULL */ -{ - CHARSET_INFO* charset; - enum_field_types mysql_tp; - int ret; - - DBUG_ASSERT(a_length != UNIV_SQL_NULL); - DBUG_ASSERT(b_length != UNIV_SQL_NULL); - - mysql_tp = (enum_field_types) mysql_type; - - switch (mysql_tp) { - - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: - case MYSQL_TYPE_VAR_STRING: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - case MYSQL_TYPE_VARCHAR: - /* Use the charset number to pick the right charset struct for - the comparison. Since the MySQL function get_charset may be - slow before Bar removes the mutex operation there, we first - look at 2 common charsets directly. */ - - if (charset_number == default_charset_info->number) { - charset = default_charset_info; - } else if (charset_number == my_charset_latin1.number) { - charset = &my_charset_latin1; - } else { - charset = get_charset(charset_number, MYF(MY_WME)); - - if (charset == NULL) { - sql_print_error("InnoDB needs charset %lu for doing " - "a comparison, but MySQL cannot " - "find that charset.", - (ulong) charset_number); - ut_a(0); - } - } - - /* Starting from 4.1.3, we use strnncollsp() in comparisons of - non-latin1_swedish_ci strings. NOTE that the collation order - changes then: 'b\0\0...' is ordered BEFORE 'b ...'. Users - having indexes on such data need to rebuild their tables! */ - - ret = charset->coll->strnncollsp(charset, - a, a_length, - b, b_length, 0); - if (ret < 0) { - return(-1); - } else if (ret > 0) { - return(1); - } else { - return(0); - } - default: - assert(0); - } - - return(0); -} -} - -/****************************************************************** -Converts a MySQL type to an InnoDB type. Note that this function returns -the 'mtype' of InnoDB. InnoDB differentiates between MySQL's old <= 4.1 -VARCHAR and the new true VARCHAR in >= 5.0.3 by the 'prtype'. */ -inline -ulint -get_innobase_type_from_mysql_type( -/*==============================*/ - /* out: DATA_BINARY, DATA_VARCHAR, ... */ - ulint* unsigned_flag, /* out: DATA_UNSIGNED if an 'unsigned type'; - at least ENUM and SET, and unsigned integer - types are 'unsigned types' */ - Field* field) /* in: MySQL field */ -{ - /* The following asserts try to check that the MySQL type code fits in - 8 bits: this is used in ibuf and also when DATA_NOT_NULL is ORed to - the type */ - - DBUG_ASSERT((ulint)MYSQL_TYPE_STRING < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_VAR_STRING < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_DOUBLE < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_FLOAT < 256); - DBUG_ASSERT((ulint)MYSQL_TYPE_DECIMAL < 256); - - if (field->flags & UNSIGNED_FLAG) { - - *unsigned_flag = DATA_UNSIGNED; - } else { - *unsigned_flag = 0; - } - - if (field->real_type() == MYSQL_TYPE_ENUM - || field->real_type() == MYSQL_TYPE_SET) { - - /* MySQL has field->type() a string type for these, but the - data is actually internally stored as an unsigned integer - code! */ - - *unsigned_flag = DATA_UNSIGNED; /* MySQL has its own unsigned - flag set to zero, even though - internally this is an unsigned - integer type */ - return(DATA_INT); - } - - switch (field->type()) { - /* NOTE that we only allow string types in DATA_MYSQL and - DATA_VARMYSQL */ - case MYSQL_TYPE_VAR_STRING: /* old <= 4.1 VARCHAR */ - case MYSQL_TYPE_VARCHAR: /* new >= 5.0.3 true VARCHAR */ - if (field->binary()) { - return(DATA_BINARY); - } else if (strcmp( - field->charset()->name, - "latin1_swedish_ci") == 0) { - return(DATA_VARCHAR); - } else { - return(DATA_VARMYSQL); - } - case MYSQL_TYPE_BIT: - case MYSQL_TYPE_STRING: if (field->binary()) { - - return(DATA_FIXBINARY); - } else if (strcmp( - field->charset()->name, - "latin1_swedish_ci") == 0) { - return(DATA_CHAR); - } else { - return(DATA_MYSQL); - } - case MYSQL_TYPE_NEWDECIMAL: - return(DATA_FIXBINARY); - case MYSQL_TYPE_LONG: - case MYSQL_TYPE_LONGLONG: - case MYSQL_TYPE_TINY: - case MYSQL_TYPE_SHORT: - case MYSQL_TYPE_INT24: - case MYSQL_TYPE_DATE: - case MYSQL_TYPE_DATETIME: - case MYSQL_TYPE_YEAR: - case MYSQL_TYPE_NEWDATE: - case MYSQL_TYPE_TIME: - case MYSQL_TYPE_TIMESTAMP: - return(DATA_INT); - case MYSQL_TYPE_FLOAT: - return(DATA_FLOAT); - case MYSQL_TYPE_DOUBLE: - return(DATA_DOUBLE); - case MYSQL_TYPE_DECIMAL: - return(DATA_DECIMAL); - case MYSQL_TYPE_GEOMETRY: - case MYSQL_TYPE_TINY_BLOB: - case MYSQL_TYPE_MEDIUM_BLOB: - case MYSQL_TYPE_BLOB: - case MYSQL_TYPE_LONG_BLOB: - return(DATA_BLOB); - default: - assert(0); - } - - return(0); -} - -/*********************************************************************** -Writes an unsigned integer value < 64k to 2 bytes, in the little-endian -storage format. */ -inline -void -innobase_write_to_2_little_endian( -/*==============================*/ - byte* buf, /* in: where to store */ - ulint val) /* in: value to write, must be < 64k */ -{ - ut_a(val < 256 * 256); - - buf[0] = (byte)(val & 0xFF); - buf[1] = (byte)(val / 256); -} - -/*********************************************************************** -Reads an unsigned integer value < 64k from 2 bytes, in the little-endian -storage format. */ -inline -uint -innobase_read_from_2_little_endian( -/*===============================*/ - /* out: value */ - const uchar* buf) /* in: from where to read */ -{ - return (uint) ((ulint)(buf[0]) + 256 * ((ulint)(buf[1]))); -} - -/*********************************************************************** -Stores a key value for a row to a buffer. */ - -uint -ha_innobase::store_key_val_for_row( -/*===============================*/ - /* out: key value length as stored in buff */ - uint keynr, /* in: key number */ - char* buff, /* in/out: buffer for the key value (in MySQL - format) */ - uint buff_len,/* in: buffer length */ - const uchar* record)/* in: row in MySQL format */ -{ - KEY* key_info = table->key_info + keynr; - KEY_PART_INFO* key_part = key_info->key_part; - KEY_PART_INFO* end = key_part + key_info->key_parts; - char* buff_start = buff; - enum_field_types mysql_type; - Field* field; - ibool is_null; - - DBUG_ENTER("store_key_val_for_row"); - - /* The format for storing a key field in MySQL is the following: - - 1. If the column can be NULL, then in the first byte we put 1 if the - field value is NULL, 0 otherwise. - - 2. If the column is of a BLOB type (it must be a column prefix field - in this case), then we put the length of the data in the field to the - next 2 bytes, in the little-endian format. If the field is SQL NULL, - then these 2 bytes are set to 0. Note that the length of data in the - field is <= column prefix length. - - 3. In a column prefix field, prefix_len next bytes are reserved for - data. In a normal field the max field length next bytes are reserved - for data. For a VARCHAR(n) the max field length is n. If the stored - value is the SQL NULL then these data bytes are set to 0. - - 4. We always use a 2 byte length for a true >= 5.0.3 VARCHAR. Note that - in the MySQL row format, the length is stored in 1 or 2 bytes, - depending on the maximum allowed length. But in the MySQL key value - format, the length always takes 2 bytes. - - We have to zero-fill the buffer so that MySQL is able to use a - simple memcmp to compare two key values to determine if they are - equal. MySQL does this to compare contents of two 'ref' values. */ - - bzero(buff, buff_len); - - for (; key_part != end; key_part++) { - is_null = FALSE; - - if (key_part->null_bit) { - if (record[key_part->null_offset] - & key_part->null_bit) { - *buff = 1; - is_null = TRUE; - } else { - *buff = 0; - } - buff++; - } - - field = key_part->field; - mysql_type = field->type(); - - if (mysql_type == MYSQL_TYPE_VARCHAR) { - /* >= 5.0.3 true VARCHAR */ - ulint lenlen; - ulint len; - byte* data; - ulint key_len; - ulint true_len; - CHARSET_INFO* cs; - int error=0; - - key_len = key_part->length; - - if (is_null) { - buff += key_len + 2; - - continue; - } - cs = field->charset(); - - lenlen = (ulint) - (((Field_varstring*)field)->length_bytes); - - data = row_mysql_read_true_varchar(&len, - (byte*) (record - + (ulint)get_field_offset(table, field)), - lenlen); - - true_len = len; - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) data, - (const char *) data + len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - - /* In a column prefix index, we may need to truncate - the stored value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - /* The length in a key value is always stored in 2 - bytes */ - - row_mysql_store_true_var_len((byte*)buff, true_len, 2); - buff += 2; - - memcpy(buff, data, true_len); - - /* Note that we always reserve the maximum possible - length of the true VARCHAR in the key value, though - only len first bytes after the 2 length bytes contain - actual data. The rest of the space was reset to zero - in the bzero() call above. */ - - buff += key_len; - - } else if (mysql_type == MYSQL_TYPE_TINY_BLOB - || mysql_type == MYSQL_TYPE_MEDIUM_BLOB - || mysql_type == MYSQL_TYPE_BLOB - || mysql_type == MYSQL_TYPE_LONG_BLOB) { - - CHARSET_INFO* cs; - ulint key_len; - ulint true_len; - int error=0; - ulint blob_len; - byte* blob_data; - - ut_a(key_part->key_part_flag & HA_PART_KEY_SEG); - - key_len = key_part->length; - - if (is_null) { - buff += key_len + 2; - - continue; - } - - cs = field->charset(); - - blob_data = row_mysql_read_blob_ref(&blob_len, - (byte*) (record - + (ulint)get_field_offset(table, field)), - (ulint) field->pack_length()); - - true_len = blob_len; - - ut_a(get_field_offset(table, field) - == key_part->offset); - - /* For multi byte character sets we need to calculate - the true length of the key */ - - if (blob_len > 0 && cs->mbmaxlen > 1) { - true_len = (ulint) cs->cset->well_formed_len(cs, - (const char *) blob_data, - (const char *) blob_data - + blob_len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - - /* All indexes on BLOB and TEXT are column prefix - indexes, and we may need to truncate the data to be - stored in the key value: */ - - if (true_len > key_len) { - true_len = key_len; - } - - /* MySQL reserves 2 bytes for the length and the - storage of the number is little-endian */ - - innobase_write_to_2_little_endian( - (byte*)buff, true_len); - buff += 2; - - memcpy(buff, blob_data, true_len); - - /* Note that we always reserve the maximum possible - length of the BLOB prefix in the key value. */ - - buff += key_len; - } else { - /* Here we handle all other data types except the - true VARCHAR, BLOB and TEXT. Note that the column - value we store may be also in a column prefix - index. */ - - CHARSET_INFO* cs; - ulint true_len; - ulint key_len; - const uchar* src_start; - int error=0; - enum_field_types real_type; - - key_len = key_part->length; - - if (is_null) { - buff += key_len; - - continue; - } - - src_start = record + key_part->offset; - real_type = field->real_type(); - true_len = key_len; - - /* Character set for the field is defined only - to fields whose type is string and real field - type is not enum or set. For these fields check - if character set is multi byte. */ - - if (real_type != MYSQL_TYPE_ENUM - && real_type != MYSQL_TYPE_SET - && ( mysql_type == MYSQL_TYPE_VAR_STRING - || mysql_type == MYSQL_TYPE_STRING)) { - - cs = field->charset(); - - /* For multi byte character sets we need to - calculate the true length of the key */ - - if (key_len > 0 && cs->mbmaxlen > 1) { - - true_len = (ulint) - cs->cset->well_formed_len(cs, - (const char *)src_start, - (const char *)src_start - + key_len, - (uint) (key_len / - cs->mbmaxlen), - &error); - } - } - - memcpy(buff, src_start, true_len); - buff += true_len; - - /* Pad the unused space with spaces. Note that no - padding is ever needed for UCS-2 because in MySQL, - all UCS2 characters are 2 bytes, as MySQL does not - support surrogate pairs, which are needed to represent - characters in the range U+10000 to U+10FFFF. */ - - if (true_len < key_len) { - ulint pad_len = key_len - true_len; - memset(buff, ' ', pad_len); - buff += pad_len; - } - } - } - - ut_a(buff <= buff_start + buff_len); - - DBUG_RETURN((uint)(buff - buff_start)); -} - -/****************************************************************** -Builds a 'template' to the prebuilt struct. The template is used in fast -retrieval of just those column values MySQL needs in its processing. */ -static -void -build_template( -/*===========*/ - row_prebuilt_t* prebuilt, /* in/out: prebuilt struct */ - THD* thd, /* in: current user thread, used - only if templ_type is - ROW_MYSQL_REC_FIELDS */ - TABLE* table, /* in: MySQL table */ - uint templ_type) /* in: ROW_MYSQL_WHOLE_ROW or - ROW_MYSQL_REC_FIELDS */ -{ - dict_index_t* index; - dict_index_t* clust_index; - mysql_row_templ_t* templ; - Field* field; - ulint n_fields; - ulint n_requested_fields = 0; - ibool fetch_all_in_key = FALSE; - ibool fetch_primary_key_cols = FALSE; - ulint i; - /* byte offset of the end of last requested column */ - ulint mysql_prefix_len = 0; - - if (prebuilt->select_lock_type == LOCK_X) { - /* We always retrieve the whole clustered index record if we - use exclusive row level locks, for example, if the read is - done in an UPDATE statement. */ - - templ_type = ROW_MYSQL_WHOLE_ROW; - } - - if (templ_type == ROW_MYSQL_REC_FIELDS) { - if (prebuilt->hint_need_to_fetch_extra_cols - == ROW_RETRIEVE_ALL_COLS) { - - /* We know we must at least fetch all columns in the - key, or all columns in the table */ - - if (prebuilt->read_just_key) { - /* MySQL has instructed us that it is enough - to fetch the columns in the key; looks like - MySQL can set this flag also when there is - only a prefix of the column in the key: in - that case we retrieve the whole column from - the clustered index */ - - fetch_all_in_key = TRUE; - } else { - templ_type = ROW_MYSQL_WHOLE_ROW; - } - } else if (prebuilt->hint_need_to_fetch_extra_cols - == ROW_RETRIEVE_PRIMARY_KEY) { - /* We must at least fetch all primary key cols. Note - that if the clustered index was internally generated - by InnoDB on the row id (no primary key was - defined), then row_search_for_mysql() will always - retrieve the row id to a special buffer in the - prebuilt struct. */ - - fetch_primary_key_cols = TRUE; - } - } - - clust_index = dict_table_get_first_index_noninline(prebuilt->table); - - if (templ_type == ROW_MYSQL_REC_FIELDS) { - index = prebuilt->index; - } else { - index = clust_index; - } - - if (index == clust_index) { - prebuilt->need_to_access_clustered = TRUE; - } else { - prebuilt->need_to_access_clustered = FALSE; - /* Below we check column by column if we need to access - the clustered index */ - } - - n_fields = (ulint)table->s->fields; /* number of columns */ - - if (!prebuilt->mysql_template) { - prebuilt->mysql_template = (mysql_row_templ_t*) - mem_alloc_noninline( - n_fields * sizeof(mysql_row_templ_t)); - } - - prebuilt->template_type = templ_type; - prebuilt->null_bitmap_len = table->s->null_bytes; - - prebuilt->templ_contains_blob = FALSE; - - /* Note that in InnoDB, i is the column number. MySQL calls columns - 'fields'. */ - for (i = 0; i < n_fields; i++) { - templ = prebuilt->mysql_template + n_requested_fields; - field = table->field[i]; - - if (UNIV_LIKELY(templ_type == ROW_MYSQL_REC_FIELDS)) { - /* Decide which columns we should fetch - and which we can skip. */ - register const ibool index_contains_field = - dict_index_contains_col_or_prefix(index, i); - - if (!index_contains_field && prebuilt->read_just_key) { - /* If this is a 'key read', we do not need - columns that are not in the key */ - - goto skip_field; - } - - if (index_contains_field && fetch_all_in_key) { - /* This field is needed in the query */ - - goto include_field; - } - - if (bitmap_is_set(table->read_set, i) || - bitmap_is_set(table->write_set, i)) { - /* This field is needed in the query */ - - goto include_field; - } - - if (fetch_primary_key_cols - && dict_table_col_in_clustered_key( - index->table, i)) { - /* This field is needed in the query */ - - goto include_field; - } - - /* This field is not needed in the query, skip it */ - - goto skip_field; - } -include_field: - n_requested_fields++; - - templ->col_no = i; - - if (index == clust_index) { - templ->rec_field_no = dict_col_get_clust_pos_noninline( - &index->table->cols[i], index); - } else { - templ->rec_field_no = dict_index_get_nth_col_pos( - index, i); - } - - if (templ->rec_field_no == ULINT_UNDEFINED) { - prebuilt->need_to_access_clustered = TRUE; - } - - if (field->null_ptr) { - templ->mysql_null_byte_offset = - (ulint) ((char*) field->null_ptr - - (char*) table->record[0]); - - templ->mysql_null_bit_mask = (ulint) field->null_bit; - } else { - templ->mysql_null_bit_mask = 0; - } - - templ->mysql_col_offset = (ulint) - get_field_offset(table, field); - - templ->mysql_col_len = (ulint) field->pack_length(); - if (mysql_prefix_len < templ->mysql_col_offset - + templ->mysql_col_len) { - mysql_prefix_len = templ->mysql_col_offset - + templ->mysql_col_len; - } - templ->type = index->table->cols[i].mtype; - templ->mysql_type = (ulint)field->type(); - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - templ->mysql_length_bytes = (ulint) - (((Field_varstring*)field)->length_bytes); - } - - templ->charset = dtype_get_charset_coll_noninline( - index->table->cols[i].prtype); - templ->mbminlen = index->table->cols[i].mbminlen; - templ->mbmaxlen = index->table->cols[i].mbmaxlen; - templ->is_unsigned = index->table->cols[i].prtype - & DATA_UNSIGNED; - if (templ->type == DATA_BLOB) { - prebuilt->templ_contains_blob = TRUE; - } -skip_field: - ; - } - - prebuilt->n_template = n_requested_fields; - prebuilt->mysql_prefix_len = mysql_prefix_len; - - if (index != clust_index && prebuilt->need_to_access_clustered) { - /* Change rec_field_no's to correspond to the clustered index - record */ - for (i = 0; i < n_requested_fields; i++) { - templ = prebuilt->mysql_template + i; - - templ->rec_field_no = dict_col_get_clust_pos_noninline( - &index->table->cols[templ->col_no], - clust_index); - } - } -} - -/************************************************************************ -Get the upper limit of the MySQL integral and floating-point type. */ - -ulonglong -ha_innobase::innobase_get_int_col_max_value( -/*========================================*/ - const Field* field) -{ - ulonglong max_value = 0; - - switch(field->key_type()) { - /* TINY */ - case HA_KEYTYPE_BINARY: - max_value = 0xFFULL; - break; - case HA_KEYTYPE_INT8: - max_value = 0x7FULL; - break; - /* SHORT */ - case HA_KEYTYPE_USHORT_INT: - max_value = 0xFFFFULL; - break; - case HA_KEYTYPE_SHORT_INT: - max_value = 0x7FFFULL; - break; - /* MEDIUM */ - case HA_KEYTYPE_UINT24: - max_value = 0xFFFFFFULL; - break; - case HA_KEYTYPE_INT24: - max_value = 0x7FFFFFULL; - break; - /* LONG */ - case HA_KEYTYPE_ULONG_INT: - max_value = 0xFFFFFFFFULL; - break; - case HA_KEYTYPE_LONG_INT: - max_value = 0x7FFFFFFFULL; - break; - /* BIG */ - case HA_KEYTYPE_ULONGLONG: - max_value = 0xFFFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_LONGLONG: - max_value = 0x7FFFFFFFFFFFFFFFULL; - break; - case HA_KEYTYPE_FLOAT: - /* We use the maximum as per IEEE754-2008 standard, 2^24 */ - max_value = 0x1000000ULL; - break; - case HA_KEYTYPE_DOUBLE: - /* We use the maximum as per IEEE754-2008 standard, 2^53 */ - max_value = 0x20000000000000ULL; - break; - default: - ut_error; - } - - return(max_value); -} - -/************************************************************************ -This special handling is really to overcome the limitations of MySQL's -binlogging. We need to eliminate the non-determinism that will arise in -INSERT ... SELECT type of statements, since MySQL binlog only stores the -min value of the autoinc interval. Once that is fixed we can get rid of -the special lock handling.*/ - -ulong -ha_innobase::innobase_lock_autoinc(void) -/*====================================*/ - /* out: DB_SUCCESS if all OK else - error code */ -{ - ulint error = DB_SUCCESS; - - switch (innobase_autoinc_lock_mode) { - case AUTOINC_NO_LOCKING: - /* Acquire only the AUTOINC mutex. */ - dict_table_autoinc_lock(prebuilt->table); - break; - - case AUTOINC_NEW_STYLE_LOCKING: - /* For simple (single/multi) row INSERTs, we fallback to the - old style only if another transaction has already acquired - the AUTOINC lock on behalf of a LOAD FILE or INSERT ... SELECT - etc. type of statement. */ - if (thd_sql_command(user_thd) == SQLCOM_INSERT - || thd_sql_command(user_thd) == SQLCOM_REPLACE) { - dict_table_t* table = prebuilt->table; - - /* Acquire the AUTOINC mutex. */ - dict_table_autoinc_lock(table); - - /* We need to check that another transaction isn't - already holding the AUTOINC lock on the table. */ - if (table->n_waiting_or_granted_auto_inc_locks) { - /* Release the mutex to avoid deadlocks. */ - dict_table_autoinc_unlock(table); - } else { - break; - } - } - /* Fall through to old style locking. */ - - case AUTOINC_OLD_STYLE_LOCKING: - error = row_lock_table_autoinc_for_mysql(prebuilt); - - if (error == DB_SUCCESS) { - - /* Acquire the AUTOINC mutex. */ - dict_table_autoinc_lock(prebuilt->table); - } - break; - - default: - ut_error; - } - - return(ulong(error)); -} - -/************************************************************************ -Reset the autoinc value in the table.*/ - -ulong -ha_innobase::innobase_reset_autoinc( -/*================================*/ - /* out: DB_SUCCESS if all went well - else error code */ - ulonglong autoinc) /* in: value to store */ -{ - ulint error; - - error = innobase_lock_autoinc(); - - if (error == DB_SUCCESS) { - - dict_table_autoinc_initialize(prebuilt->table, autoinc); - - dict_table_autoinc_unlock(prebuilt->table); - } - - return(ulong(error)); -} - -/************************************************************************ -Store the autoinc value in the table. The autoinc value is only set if -it's greater than the existing autoinc value in the table.*/ - -ulong -ha_innobase::innobase_set_max_autoinc( -/*==================================*/ - /* out: DB_SUCCES if all went well - else error code */ - ulonglong auto_inc) /* in: value to store */ -{ - ulint error; - - error = innobase_lock_autoinc(); - - if (error == DB_SUCCESS) { - - dict_table_autoinc_update_if_greater(prebuilt->table, auto_inc); - - dict_table_autoinc_unlock(prebuilt->table); - } - - return(ulong(error)); -} - -/************************************************************************ -Stores a row in an InnoDB database, to the table specified in this -handle. */ - -int -ha_innobase::write_row( -/*===================*/ - /* out: error code */ - uchar* record) /* in: a row in MySQL format */ -{ - ulint error = 0; - int error_result= 0; - ibool auto_inc_used= FALSE; - ulint sql_command; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::write_row"); - - if (prebuilt->trx != trx) { - sql_print_error("The transaction object for the table handle is at " - "%p, but for the current thread it is at %p", - prebuilt->trx, trx); - - fputs("InnoDB: Dump of 200 bytes around prebuilt: ", stderr); - ut_print_buf(stderr, ((const byte*)prebuilt) - 100, 200); - fputs("\n" - "InnoDB: Dump of 200 bytes around ha_data: ", - stderr); - ut_print_buf(stderr, ((const byte*) trx) - 100, 200); - putc('\n', stderr); - ut_error; - } - - ha_statistic_increment(&SSV::ha_write_count); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT) - table->timestamp_field->set_time(); - - sql_command = thd_sql_command(user_thd); - - if ((sql_command == SQLCOM_ALTER_TABLE - || sql_command == SQLCOM_OPTIMIZE - || sql_command == SQLCOM_CREATE_INDEX - || sql_command == SQLCOM_DROP_INDEX) - && num_write_row >= 10000) { - /* ALTER TABLE is COMMITted at every 10000 copied rows. - The IX table lock for the original table has to be re-issued. - As this method will be called on a temporary table where the - contents of the original table is being copied to, it is - a bit tricky to determine the source table. The cursor - position in the source table need not be adjusted after the - intermediate COMMIT, since writes by other transactions are - being blocked by a MySQL table lock TL_WRITE_ALLOW_READ. */ - - dict_table_t* src_table; - ulint mode; - - num_write_row = 0; - - /* Commit the transaction. This will release the table - locks, so they have to be acquired again. */ - - /* Altering an InnoDB table */ - /* Get the source table. */ - src_table = lock_get_src_table( - prebuilt->trx, prebuilt->table, &mode); - if (!src_table) { -no_commit: - /* Unknown situation: do not commit */ - /* - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ALTER TABLE is holding lock" - " on %lu tables!\n", - prebuilt->trx->mysql_n_tables_locked); - */ - ; - } else if (src_table == prebuilt->table) { - /* Source table is not in InnoDB format: - no need to re-acquire locks on it. */ - - /* Altering to InnoDB format */ - innobase_commit(ht, user_thd, 1); - /* Note that this transaction is still active. */ - prebuilt->trx->active_trans = 1; - /* We will need an IX lock on the destination table. */ - prebuilt->sql_stat_start = TRUE; - } else { - /* Ensure that there are no other table locks than - LOCK_IX and LOCK_AUTO_INC on the destination table. */ - - if (!lock_is_table_exclusive(prebuilt->table, - prebuilt->trx)) { - goto no_commit; - } - - /* Commit the transaction. This will release the table - locks, so they have to be acquired again. */ - innobase_commit(ht, user_thd, 1); - /* Note that this transaction is still active. */ - prebuilt->trx->active_trans = 1; - /* Re-acquire the table lock on the source table. */ - row_lock_table_for_mysql(prebuilt, src_table, mode); - /* We will need an IX lock on the destination table. */ - prebuilt->sql_stat_start = TRUE; - } - } - - num_write_row++; - - /* This is the case where the table has an auto-increment column */ - if (table->next_number_field && record == table->record[0]) { - - /* Reset the error code before calling - innobase_get_auto_increment(). */ - prebuilt->autoinc_error = DB_SUCCESS; - - if ((error = update_auto_increment())) { - - /* We don't want to mask autoinc overflow errors. */ - if (prebuilt->autoinc_error != DB_SUCCESS) { - error = (int) prebuilt->autoinc_error; - - goto report_error; - } - - /* MySQL errors are passed straight back. */ - error_result = (int) error; - goto func_exit; - } - - auto_inc_used = TRUE; - } - - if (prebuilt->mysql_template == NULL - || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) { - - /* Build the template used in converting quickly between - the two database formats */ - - build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); - } - - innodb_srv_conc_enter_innodb(prebuilt->trx); - - error = row_insert_for_mysql((byte*) record, prebuilt); - - /* Handle duplicate key errors */ - if (auto_inc_used) { - ulint err; - ulonglong auto_inc; - ulonglong col_max_value; - - /* Note the number of rows processed for this statement, used - by get_auto_increment() to determine the number of AUTO-INC - values to reserve. This is only useful for a mult-value INSERT - and is a statement level counter.*/ - if (trx->n_autoinc_rows > 0) { - --trx->n_autoinc_rows; - } - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - /* Get the value that MySQL attempted to store in the table.*/ - auto_inc = table->next_number_field->val_int(); - - switch (error) { - case DB_DUPLICATE_KEY: - - /* A REPLACE command and LOAD DATA INFILE REPLACE - handle a duplicate key error themselves, but we - must update the autoinc counter if we are performing - those statements. */ - - switch (sql_command) { - case SQLCOM_LOAD: - if ((trx->duplicates - & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) { - - goto set_max_autoinc; - } - break; - - case SQLCOM_REPLACE: - case SQLCOM_INSERT_SELECT: - case SQLCOM_REPLACE_SELECT: - goto set_max_autoinc; - break; - - default: - break; - } - - break; - - case DB_SUCCESS: - /* If the actual value inserted is greater than - the upper limit of the interval, then we try and - update the table upper limit. Note: last_value - will be 0 if get_auto_increment() was not called.*/ - - if (auto_inc <= col_max_value - && auto_inc >= prebuilt->autoinc_last_value) { -set_max_autoinc: - ut_a(prebuilt->autoinc_increment > 0); - - ulonglong need; - ulonglong offset; - - offset = prebuilt->autoinc_offset; - need = prebuilt->autoinc_increment; - - auto_inc = innobase_next_autoinc( - auto_inc, need, offset, col_max_value); - - err = innobase_set_max_autoinc(auto_inc); - - if (err != DB_SUCCESS) { - error = err; - } - } - break; - } - } - - innodb_srv_conc_exit_innodb(prebuilt->trx); - -report_error: - error_result = convert_error_code_to_mysql((int) error, user_thd); - -func_exit: - innobase_active_small(); - - DBUG_RETURN(error_result); -} - -/************************************************************************** -Checks which fields have changed in a row and stores information -of them to an update vector. */ -static -int -calc_row_difference( -/*================*/ - /* out: error number or 0 */ - upd_t* uvect, /* in/out: update vector */ - uchar* old_row, /* in: old row in MySQL format */ - uchar* new_row, /* in: new row in MySQL format */ - struct st_table* table, /* in: table in MySQL data - dictionary */ - uchar* upd_buff, /* in: buffer to use */ - ulint buff_len, /* in: buffer length */ - row_prebuilt_t* prebuilt, /* in: InnoDB prebuilt struct */ - THD* thd) /* in: user thread */ -{ - uchar* original_upd_buff = upd_buff; - Field* field; - enum_field_types field_mysql_type; - uint n_fields; - ulint o_len; - ulint n_len; - ulint col_pack_len; - byte* new_mysql_row_col; - byte* o_ptr; - byte* n_ptr; - byte* buf; - upd_field_t* ufield; - ulint col_type; - ulint n_changed = 0; - dfield_t dfield; - dict_index_t* clust_index; - uint i; - - n_fields = table->s->fields; - clust_index = dict_table_get_first_index_noninline(prebuilt->table); - - /* We use upd_buff to convert changed fields */ - buf = (byte*) upd_buff; - - for (i = 0; i < n_fields; i++) { - field = table->field[i]; - - o_ptr = (byte*) old_row + get_field_offset(table, field); - n_ptr = (byte*) new_row + get_field_offset(table, field); - - /* Use new_mysql_row_col and col_pack_len save the values */ - - new_mysql_row_col = n_ptr; - col_pack_len = field->pack_length(); - - o_len = col_pack_len; - n_len = col_pack_len; - - /* We use o_ptr and n_ptr to dig up the actual data for - comparison. */ - - field_mysql_type = field->type(); - - col_type = prebuilt->table->cols[i].mtype; - - switch (col_type) { - - case DATA_BLOB: - o_ptr = row_mysql_read_blob_ref(&o_len, o_ptr, o_len); - n_ptr = row_mysql_read_blob_ref(&n_len, n_ptr, n_len); - - break; - - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_VARMYSQL: - if (field_mysql_type == MYSQL_TYPE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR where - the real payload data length is stored in - 1 or 2 bytes */ - - o_ptr = row_mysql_read_true_varchar( - &o_len, o_ptr, - (ulint) - (((Field_varstring*)field)->length_bytes)); - - n_ptr = row_mysql_read_true_varchar( - &n_len, n_ptr, - (ulint) - (((Field_varstring*)field)->length_bytes)); - } - - break; - default: - ; - } - - if (field->null_ptr) { - if (field_in_record_is_null(table, field, - (char*) old_row)) { - o_len = UNIV_SQL_NULL; - } - - if (field_in_record_is_null(table, field, - (char*) new_row)) { - n_len = UNIV_SQL_NULL; - } - } - - if (o_len != n_len || (o_len != UNIV_SQL_NULL && - 0 != memcmp(o_ptr, n_ptr, o_len))) { - /* The field has changed */ - - ufield = uvect->fields + n_changed; - - /* Let us use a dummy dfield to make the conversion - from the MySQL column format to the InnoDB format */ - - dict_col_copy_type_noninline(prebuilt->table->cols + i, - &dfield.type); - - if (n_len != UNIV_SQL_NULL) { - buf = row_mysql_store_col_in_innobase_format( - &dfield, - (byte*)buf, - TRUE, - new_mysql_row_col, - col_pack_len, - dict_table_is_comp_noninline( - prebuilt->table)); - ufield->new_val.data = dfield.data; - ufield->new_val.len = dfield.len; - } else { - ufield->new_val.data = NULL; - ufield->new_val.len = UNIV_SQL_NULL; - } - - ufield->exp = NULL; - ufield->field_no = dict_col_get_clust_pos_noninline( - &prebuilt->table->cols[i], clust_index); - n_changed++; - } - } - - uvect->n_fields = n_changed; - uvect->info_bits = 0; - - ut_a(buf <= (byte*)original_upd_buff + buff_len); - - return(0); -} - -/************************************************************************** -Updates a row given as a parameter to a new value. Note that we are given -whole rows, not just the fields which are updated: this incurs some -overhead for CPU when we check which fields are actually updated. -TODO: currently InnoDB does not prevent the 'Halloween problem': -in a searched update a single row can get updated several times -if its index columns are updated! */ - -int -ha_innobase::update_row( -/*====================*/ - /* out: error number or 0 */ - const uchar* old_row, /* in: old row in MySQL format */ - uchar* new_row) /* in: new row in MySQL format */ -{ - upd_t* uvect; - int error = 0; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::update_row"); - - ut_a(prebuilt->trx == trx); - - ha_statistic_increment(&SSV::ha_update_count); - - if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_UPDATE) - table->timestamp_field->set_time(); - - if (prebuilt->upd_node) { - uvect = prebuilt->upd_node->update; - } else { - uvect = row_get_prebuilt_update_vector(prebuilt); - } - - /* Build an update vector from the modified fields in the rows - (uses upd_buff of the handle) */ - - calc_row_difference(uvect, (uchar*) old_row, new_row, table, - upd_buff, (ulint)upd_and_key_val_buff_len, - prebuilt, user_thd); - - /* This is not a delete */ - prebuilt->upd_node->is_delete = FALSE; - - assert(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - - innodb_srv_conc_enter_innodb(trx); - - error = row_update_for_mysql((byte*) old_row, prebuilt); - - /* We need to do some special AUTOINC handling for the following case: - - INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ... - - We need to use the AUTOINC counter that was actually used by - MySQL in the UPDATE statement, which can be different from the - value used in the INSERT statement.*/ - - if (error == DB_SUCCESS - && table->next_number_field - && new_row == table->record[0] - && thd_sql_command(user_thd) == SQLCOM_INSERT - && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE)) - == TRX_DUP_IGNORE) { - - ulonglong auto_inc; - ulonglong col_max_value; - - auto_inc = table->next_number_field->val_int(); - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - if (auto_inc <= col_max_value && auto_inc != 0) { - - ulonglong need; - ulonglong offset; - - offset = prebuilt->autoinc_offset; - need = prebuilt->autoinc_increment; - - auto_inc = innobase_next_autoinc( - auto_inc, need, offset, col_max_value); - - error = innobase_set_max_autoinc(auto_inc); - } - } - - innodb_srv_conc_exit_innodb(trx); - - error = convert_error_code_to_mysql(error, user_thd); - - if (error == 0 /* success */ - && uvect->n_fields == 0 /* no columns were updated */) { - - /* This is the same as success, but instructs - MySQL that the row is not really updated and it - should not increase the count of updated rows. - This is fix for http://bugs.mysql.com/29157 */ - error = HA_ERR_RECORD_IS_THE_SAME; - } - - /* Tell InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - - DBUG_RETURN(error); -} - -/************************************************************************** -Deletes a row given as the parameter. */ - -int -ha_innobase::delete_row( -/*====================*/ - /* out: error number or 0 */ - const uchar* record) /* in: a row in MySQL format */ -{ - int error = 0; - trx_t* trx = thd_to_trx(user_thd); - - DBUG_ENTER("ha_innobase::delete_row"); - - ut_a(prebuilt->trx == trx); - - ha_statistic_increment(&SSV::ha_delete_count); - - if (!prebuilt->upd_node) { - row_get_prebuilt_update_vector(prebuilt); - } - - /* This is a delete */ - - prebuilt->upd_node->is_delete = TRUE; - - innodb_srv_conc_enter_innodb(trx); - - error = row_update_for_mysql((byte*) record, prebuilt); - - innodb_srv_conc_exit_innodb(trx); - - error = convert_error_code_to_mysql(error, user_thd); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - innobase_active_small(); - - DBUG_RETURN(error); -} - -/************************************************************************** -Removes a new lock set on a row, if it was not read optimistically. This can -be called after a row has been read in the processing of an UPDATE or a DELETE -query, if the option innodb_locks_unsafe_for_binlog is set. */ - -void -ha_innobase::unlock_row(void) -/*=========================*/ -{ - DBUG_ENTER("ha_innobase::unlock_row"); - - /* Consistent read does not take any locks, thus there is - nothing to unlock. */ - - if (prebuilt->select_lock_type == LOCK_NONE) { - DBUG_VOID_RETURN; - } - - switch (prebuilt->row_read_type) { - case ROW_READ_WITH_LOCKS: - if (!srv_locks_unsafe_for_binlog - && prebuilt->trx->isolation_level - != TRX_ISO_READ_COMMITTED) { - break; - } - /* fall through */ - case ROW_READ_TRY_SEMI_CONSISTENT: - row_unlock_for_mysql(prebuilt, FALSE); - break; - case ROW_READ_DID_SEMI_CONSISTENT: - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - break; - } - - DBUG_VOID_RETURN; -} - -/* See handler.h and row0mysql.h for docs on this function. */ -bool -ha_innobase::was_semi_consistent_read(void) -/*=======================================*/ -{ - return(prebuilt->row_read_type == ROW_READ_DID_SEMI_CONSISTENT); -} - -/* See handler.h and row0mysql.h for docs on this function. */ -void -ha_innobase::try_semi_consistent_read(bool yes) -/*===========================================*/ -{ - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - /* Row read type is set to semi consistent read if this was - requested by the MySQL and either innodb_locks_unsafe_for_binlog - option is used or this session is using READ COMMITTED isolation - level. */ - - if (yes - && (srv_locks_unsafe_for_binlog - || prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } else { - prebuilt->row_read_type = ROW_READ_WITH_LOCKS; - } -} - -/********************************************************************** -Initializes a handle to use an index. */ - -int -ha_innobase::index_init( -/*====================*/ - /* out: 0 or error number */ - uint keynr, /* in: key (index) number */ - bool sorted) /* in: 1 if result MUST be sorted according to index */ -{ - int error = 0; - DBUG_ENTER("index_init"); - - error = change_active_index(keynr); - - DBUG_RETURN(error); -} - -/********************************************************************** -Currently does nothing. */ - -int -ha_innobase::index_end(void) -/*========================*/ -{ - int error = 0; - DBUG_ENTER("index_end"); - active_index=MAX_KEY; - DBUG_RETURN(error); -} - -/************************************************************************* -Converts a search mode flag understood by MySQL to a flag understood -by InnoDB. */ -inline -ulint -convert_search_mode_to_innobase( -/*============================*/ - enum ha_rkey_function find_flag) -{ - switch (find_flag) { - case HA_READ_KEY_EXACT: - /* this does not require the index to be UNIQUE */ - return(PAGE_CUR_GE); - case HA_READ_KEY_OR_NEXT: - return(PAGE_CUR_GE); - case HA_READ_KEY_OR_PREV: - return(PAGE_CUR_LE); - case HA_READ_AFTER_KEY: - return(PAGE_CUR_G); - case HA_READ_BEFORE_KEY: - return(PAGE_CUR_L); - case HA_READ_PREFIX: - return(PAGE_CUR_GE); - case HA_READ_PREFIX_LAST: - return(PAGE_CUR_LE); - case HA_READ_PREFIX_LAST_OR_PREV: - return(PAGE_CUR_LE); - /* In MySQL-4.0 HA_READ_PREFIX and HA_READ_PREFIX_LAST always - pass a complete-field prefix of a key value as the search - tuple. I.e., it is not allowed that the last field would - just contain n first bytes of the full field value. - MySQL uses a 'padding' trick to convert LIKE 'abc%' - type queries so that it can use as a search tuple - a complete-field-prefix of a key value. Thus, the InnoDB - search mode PAGE_CUR_LE_OR_EXTENDS is never used. - TODO: when/if MySQL starts to use also partial-field - prefixes, we have to deal with stripping of spaces - and comparison of non-latin1 char type fields in - innobase_mysql_cmp() to get PAGE_CUR_LE_OR_EXTENDS to - work correctly. */ - case HA_READ_MBR_CONTAIN: - case HA_READ_MBR_INTERSECT: - case HA_READ_MBR_WITHIN: - case HA_READ_MBR_DISJOINT: - case HA_READ_MBR_EQUAL: - return(PAGE_CUR_UNSUPP); - /* do not use "default:" in order to produce a gcc warning: - enumeration value '...' not handled in switch - (if -Wswitch or -Wall is used) */ - } - - my_error(ER_CHECK_NOT_IMPLEMENTED, MYF(0), "this functionality"); - - return(PAGE_CUR_UNSUPP); -} - -/* - BACKGROUND INFO: HOW A SELECT SQL QUERY IS EXECUTED - --------------------------------------------------- -The following does not cover all the details, but explains how we determine -the start of a new SQL statement, and what is associated with it. - -For each table in the database the MySQL interpreter may have several -table handle instances in use, also in a single SQL query. For each table -handle instance there is an InnoDB 'prebuilt' struct which contains most -of the InnoDB data associated with this table handle instance. - - A) if the user has not explicitly set any MySQL table level locks: - - 1) MySQL calls ::external_lock to set an 'intention' table level lock on -the table of the handle instance. There we set -prebuilt->sql_stat_start = TRUE. The flag sql_stat_start should be set -true if we are taking this table handle instance to use in a new SQL -statement issued by the user. We also increment trx->n_mysql_tables_in_use. - - 2) If prebuilt->sql_stat_start == TRUE we 'pre-compile' the MySQL search -instructions to prebuilt->template of the table handle instance in -::index_read. The template is used to save CPU time in large joins. - - 3) In row_search_for_mysql, if prebuilt->sql_stat_start is true, we -allocate a new consistent read view for the trx if it does not yet have one, -or in the case of a locking read, set an InnoDB 'intention' table level -lock on the table. - - 4) We do the SELECT. MySQL may repeatedly call ::index_read for the -same table handle instance, if it is a join. - - 5) When the SELECT ends, MySQL removes its intention table level locks -in ::external_lock. When trx->n_mysql_tables_in_use drops to zero, - (a) we execute a COMMIT there if the autocommit is on, - (b) we also release possible 'SQL statement level resources' InnoDB may -have for this SQL statement. The MySQL interpreter does NOT execute -autocommit for pure read transactions, though it should. That is why the -table handler in that case has to execute the COMMIT in ::external_lock. - - B) If the user has explicitly set MySQL table level locks, then MySQL -does NOT call ::external_lock at the start of the statement. To determine -when we are at the start of a new SQL statement we at the start of -::index_read also compare the query id to the latest query id where the -table handle instance was used. If it has changed, we know we are at the -start of a new SQL statement. Since the query id can theoretically -overwrap, we use this test only as a secondary way of determining the -start of a new SQL statement. */ - - -/************************************************************************** -Positions an index cursor to the index specified in the handle. Fetches the -row if any. */ - -int -ha_innobase::index_read( -/*====================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, - or error number */ - uchar* buf, /* in/out: buffer for the returned - row */ - const uchar* key_ptr, /* in: key value; if this is NULL - we position the cursor at the - start or end of index; this can - also contain an InnoDB row id, in - which case key_len is the InnoDB - row id length; the key value can - also be a prefix of a full key value, - and the last column can be a prefix - of a full column */ - uint key_len,/* in: key value length */ - enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ -{ - ulint mode; - dict_index_t* index; - ulint match_mode = 0; - int error; - ulint ret; - - DBUG_ENTER("index_read"); - - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - ha_statistic_increment(&SSV::ha_read_key_count); - - index = prebuilt->index; - - /* Note that if the index for which the search template is built is not - necessarily prebuilt->index, but can also be the clustered index */ - - if (prebuilt->sql_stat_start) { - build_template(prebuilt, user_thd, table, - ROW_MYSQL_REC_FIELDS); - } - - if (key_ptr) { - /* Convert the search key value to InnoDB format into - prebuilt->search_tuple */ - - row_sel_convert_mysql_key_to_innobase(prebuilt->search_tuple, - (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, - index, - (byte*) key_ptr, - (ulint) key_len, prebuilt->trx); - } else { - /* We position the cursor to the last or the first entry - in the index */ - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - } - - mode = convert_search_mode_to_innobase(find_flag); - - match_mode = 0; - - if (find_flag == HA_READ_KEY_EXACT) { - match_mode = ROW_SEL_EXACT; - - } else if (find_flag == HA_READ_PREFIX - || find_flag == HA_READ_PREFIX_LAST) { - match_mode = ROW_SEL_EXACT_PREFIX; - } - - last_match_mode = (uint) match_mode; - - if (mode != PAGE_CUR_UNSUPP) { - - innodb_srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql((byte*) buf, mode, prebuilt, - match_mode, 0); - - innodb_srv_conc_exit_innodb(prebuilt->trx); - } else { - - ret = DB_UNSUPPORTED; - } - - if (ret == DB_SUCCESS) { - error = 0; - table->status = 0; - - } else if (ret == DB_RECORD_NOT_FOUND) { - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - - } else if (ret == DB_END_OF_INDEX) { - error = HA_ERR_KEY_NOT_FOUND; - table->status = STATUS_NOT_FOUND; - } else { - error = convert_error_code_to_mysql((int) ret, user_thd); - table->status = STATUS_NOT_FOUND; - } - - DBUG_RETURN(error); -} - -/*********************************************************************** -The following functions works like index_read, but it find the last -row with the current key value or prefix. */ - -int -ha_innobase::index_read_last( -/*=========================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, or an - error code */ - uchar* buf, /* out: fetched row */ - const uchar* key_ptr,/* in: key value, or a prefix of a full - key value */ - uint key_len)/* in: length of the key val or prefix - in bytes */ -{ - return(index_read(buf, key_ptr, key_len, HA_READ_PREFIX_LAST)); -} - -/************************************************************************ -Get the index for a handle. Does not change active index.*/ - -dict_index_t* -ha_innobase::innobase_get_index( -/*============================*/ - /* out: NULL or index instance. */ - uint keynr) /* in: use this index; MAX_KEY means always - clustered index, even if it was internally - generated by InnoDB */ -{ - KEY* key = 0; - dict_index_t* index = 0; - - DBUG_ENTER("innobase_get_index"); - ha_statistic_increment(&SSV::ha_read_key_count); - - ut_ad(user_thd == ha_thd()); - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - if (keynr != MAX_KEY && table->s->keys > 0) { - key = table->key_info + keynr; - - index = dict_table_get_index_noninline( - prebuilt->table, key->name); - } else { - index = dict_table_get_first_index_noninline(prebuilt->table); - } - - if (!index) { - sql_print_error( - "Innodb could not find key n:o %u with name %s " - "from dict cache for table %s", - keynr, key ? key->name : "NULL", - prebuilt->table->name); - } - - DBUG_RETURN(index); -} - -/************************************************************************ -Changes the active index of a handle. */ - -int -ha_innobase::change_active_index( -/*=============================*/ - /* out: 0 or error code */ - uint keynr) /* in: use this index; MAX_KEY means always clustered - index, even if it was internally generated by - InnoDB */ -{ - DBUG_ENTER("change_active_index"); - - ut_ad(user_thd == ha_thd()); - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - active_index = keynr; - - prebuilt->index = innobase_get_index(keynr); - - if (!prebuilt->index) { - DBUG_RETURN(1); - } - - assert(prebuilt->search_tuple != 0); - - dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields); - - dict_index_copy_types(prebuilt->search_tuple, prebuilt->index, - prebuilt->index->n_fields); - - /* MySQL changes the active index for a handle also during some - queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX() - and then calculates the sum. Previously we played safe and used - the flag ROW_MYSQL_WHOLE_ROW below, but that caused unnecessary - copying. Starting from MySQL-4.1 we use a more efficient flag here. */ - - build_template(prebuilt, user_thd, table, ROW_MYSQL_REC_FIELDS); - - DBUG_RETURN(0); -} - -/************************************************************************** -Positions an index cursor to the index specified in keynr. Fetches the -row if any. */ -/* ??? This is only used to read whole keys ??? */ - -int -ha_innobase::index_read_idx( -/*========================*/ - /* out: error number or 0 */ - uchar* buf, /* in/out: buffer for the returned - row */ - uint keynr, /* in: use this index */ - const uchar* key, /* in: key value; if this is NULL - we position the cursor at the - start or end of index */ - uint key_len, /* in: key value length */ - enum ha_rkey_function find_flag)/* in: search flags from my_base.h */ -{ - if (change_active_index(keynr)) { - - return(1); - } - - return(index_read(buf, key, key_len, find_flag)); -} - -/*************************************************************************** -Reads the next or previous row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::general_fetch( -/*=======================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - uchar* buf, /* in/out: buffer for next row in MySQL - format */ - uint direction, /* in: ROW_SEL_NEXT or ROW_SEL_PREV */ - uint match_mode) /* in: 0, ROW_SEL_EXACT, or - ROW_SEL_EXACT_PREFIX */ -{ - ulint ret; - int error = 0; - - DBUG_ENTER("general_fetch"); - - ut_a(prebuilt->trx == thd_to_trx(user_thd)); - - innodb_srv_conc_enter_innodb(prebuilt->trx); - - ret = row_search_for_mysql((byte*)buf, 0, prebuilt, match_mode, - direction); - innodb_srv_conc_exit_innodb(prebuilt->trx); - - if (ret == DB_SUCCESS) { - error = 0; - table->status = 0; - - } else if (ret == DB_RECORD_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - - } else if (ret == DB_END_OF_INDEX) { - error = HA_ERR_END_OF_FILE; - table->status = STATUS_NOT_FOUND; - } else { - error = convert_error_code_to_mysql((int) ret, user_thd); - table->status = STATUS_NOT_FOUND; - } - - DBUG_RETURN(error); -} - -/*************************************************************************** -Reads the next row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::index_next( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - uchar* buf) /* in/out: buffer for next row in MySQL - format */ -{ - ha_statistic_increment(&SSV::ha_read_next_count); - - return(general_fetch(buf, ROW_SEL_NEXT, 0)); -} - -/*********************************************************************** -Reads the next row matching to the key value given as the parameter. */ - -int -ha_innobase::index_next_same( -/*=========================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error - number */ - uchar* buf, /* in/out: buffer for the row */ - const uchar* key, /* in: key value */ - uint keylen) /* in: key value length */ -{ - ha_statistic_increment(&SSV::ha_read_next_count); - - return(general_fetch(buf, ROW_SEL_NEXT, last_match_mode)); -} - -/*************************************************************************** -Reads the previous row from a cursor, which must have previously been -positioned using index_read. */ - -int -ha_innobase::index_prev( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error number */ - uchar* buf) /* in/out: buffer for previous row in MySQL format */ -{ - ha_statistic_increment(&SSV::ha_read_prev_count); - - return(general_fetch(buf, ROW_SEL_PREV, 0)); -} - -/************************************************************************ -Positions a cursor on the first record in an index and reads the -corresponding row to buf. */ - -int -ha_innobase::index_first( -/*=====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error code */ - uchar* buf) /* in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_first"); - ha_statistic_increment(&SSV::ha_read_first_count); - - error = index_read(buf, NULL, 0, HA_READ_AFTER_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/************************************************************************ -Positions a cursor on the last record in an index and reads the -corresponding row to buf. */ - -int -ha_innobase::index_last( -/*====================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error code */ - uchar* buf) /* in/out: buffer for the row */ -{ - int error; - - DBUG_ENTER("index_last"); - ha_statistic_increment(&SSV::ha_read_last_count); - - error = index_read(buf, NULL, 0, HA_READ_BEFORE_KEY); - - /* MySQL does not seem to allow this to return HA_ERR_KEY_NOT_FOUND */ - - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - - DBUG_RETURN(error); -} - -/******************************************************************** -Initialize a table scan. */ - -int -ha_innobase::rnd_init( -/*==================*/ - /* out: 0 or error number */ - bool scan) /* in: ???????? */ -{ - int err; - - /* Store the active index value so that we can restore the original - value after a scan */ - - if (prebuilt->clust_index_was_generated) { - err = change_active_index(MAX_KEY); - } else { - err = change_active_index(primary_key); - } - - /* Don't use semi-consistent read in random row reads (by position). - This means we must disable semi_consistent_read if scan is false */ - - if (!scan) { - try_semi_consistent_read(0); - } - - start_of_scan = 1; - - return(err); -} - -/********************************************************************* -Ends a table scan. */ - -int -ha_innobase::rnd_end(void) -/*======================*/ - /* out: 0 or error number */ -{ - return(index_end()); -} - -/********************************************************************* -Reads the next row in a table scan (also used to read the FIRST row -in a table scan). */ - -int -ha_innobase::rnd_next( -/*==================*/ - /* out: 0, HA_ERR_END_OF_FILE, or error number */ - uchar* buf) /* in/out: returns the row in this buffer, - in MySQL format */ -{ - int error; - - DBUG_ENTER("rnd_next"); - ha_statistic_increment(&SSV::ha_read_rnd_next_count); - - if (start_of_scan) { - error = index_first(buf); - if (error == HA_ERR_KEY_NOT_FOUND) { - error = HA_ERR_END_OF_FILE; - } - start_of_scan = 0; - } else { - error = general_fetch(buf, ROW_SEL_NEXT, 0); - } - - DBUG_RETURN(error); -} - -/************************************************************************** -Fetches a row from the table based on a row reference. */ - -int -ha_innobase::rnd_pos( -/*=================*/ - /* out: 0, HA_ERR_KEY_NOT_FOUND, or error code */ - uchar* buf, /* in/out: buffer for the row */ - uchar* pos) /* in: primary key value of the row in the - MySQL format, or the row id if the clustered - index was internally generated by InnoDB; the - length of data in pos has to be ref_length */ -{ - int error; - uint keynr = active_index; - DBUG_ENTER("rnd_pos"); - DBUG_DUMP("key", pos, ref_length); - - ha_statistic_increment(&SSV::ha_read_rnd_count); - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - if (prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from the row id: the - row reference is the row id, not any key value - that MySQL knows of */ - - error = change_active_index(MAX_KEY); - } else { - error = change_active_index(primary_key); - } - - if (error) { - DBUG_PRINT("error", ("Got error: %d", error)); - DBUG_RETURN(error); - } - - /* Note that we assume the length of the row reference is fixed - for the table, and it is == ref_length */ - - error = index_read(buf, pos, ref_length, HA_READ_KEY_EXACT); - - if (error) { - DBUG_PRINT("error", ("Got error: %d", error)); - } - - change_active_index(keynr); - - DBUG_RETURN(error); -} - -/************************************************************************* -Stores a reference to the current row to 'ref' field of the handle. Note -that in the case where we have generated the clustered index for the -table, the function parameter is illogical: we MUST ASSUME that 'record' -is the current 'position' of the handle, because if row ref is actually -the row id internally generated in InnoDB, then 'record' does not contain -it. We just guess that the row id must be for the record where the handle -was positioned the last time. */ - -void -ha_innobase::position( -/*==================*/ - const uchar* record) /* in: row in MySQL format */ -{ - uint len; - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - if (prebuilt->clust_index_was_generated) { - /* No primary key was defined for the table and we - generated the clustered index from row id: the - row reference will be the row id, not any key value - that MySQL knows of */ - - len = DATA_ROW_ID_LEN; - - memcpy(ref, prebuilt->row_id, len); - } else { - len = store_key_val_for_row(primary_key, (char*)ref, - ref_length, record); - } - - /* We assume that the 'ref' value len is always fixed for the same - table. */ - - if (len != ref_length) { - sql_print_error("Stored ref len is %lu, but table ref len is %lu", - (ulong) len, (ulong) ref_length); - } -} - -/********************************************************************* -If it's a DB_TOO_BIG_RECORD error then set a suitable message to -return to the client.*/ -inline -void -innodb_check_for_record_too_big_error( -/*==================================*/ - ulint comp, /* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */ - int error) /* in: error code to check */ -{ - if (error == (int)DB_TOO_BIG_RECORD) { - ulint max_row_size - = page_get_free_space_of_empty_noninline(comp) / 2; - - my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size); - } -} - -/* limit innodb monitor access to users with PROCESS privilege. -See http://bugs.mysql.com/32710 for expl. why we choose PROCESS. */ -#define IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, thd) \ - (row_is_magic_monitor_table(table_name) \ - && check_global_access(thd, PROCESS_ACL)) - -/********************************************************************* -Creates a table definition to an InnoDB database. */ -static -int -create_table_def( -/*=============*/ - trx_t* trx, /* in: InnoDB transaction handle */ - TABLE* form, /* in: information on table - columns and indexes */ - const char* table_name, /* in: table name */ - const char* path_of_temp_table,/* in: if this is a table explicitly - created by the user with the - TEMPORARY keyword, then this - parameter is the dir path where the - table should be placed if we create - an .ibd file for it (no .ibd extension - in the path, though); otherwise this - is NULL */ - ulint flags) /* in: table flags */ -{ - Field* field; - dict_table_t* table; - ulint n_cols; - int error; - ulint col_type; - ulint col_len; - ulint nulls_allowed; - ulint unsigned_type; - ulint binary_type; - ulint long_true_varchar; - ulint charset_no; - ulint i; - - DBUG_ENTER("create_table_def"); - DBUG_PRINT("enter", ("table_name: %s", table_name)); - - ut_a(trx->mysql_thd != NULL); - if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(table_name, - (THD*) trx->mysql_thd)) { - DBUG_RETURN(HA_ERR_GENERIC); - } - - n_cols = form->s->fields; - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - - table = dict_mem_table_create(table_name, 0, n_cols, flags); - - if (path_of_temp_table) { - table->dir_path_of_temp_table = - mem_heap_strdup(table->heap, path_of_temp_table); - } - - for (i = 0; i < n_cols; i++) { - field = form->field[i]; - - col_type = get_innobase_type_from_mysql_type(&unsigned_type, - field); - if (field->null_ptr) { - nulls_allowed = 0; - } else { - nulls_allowed = DATA_NOT_NULL; - } - - if (field->binary()) { - binary_type = DATA_BINARY_TYPE; - } else { - binary_type = 0; - } - - charset_no = 0; - - if (dtype_is_string_type(col_type)) { - - charset_no = (ulint)field->charset()->number; - - ut_a(charset_no < 256); /* in data0type.h we assume - that the number fits in one - byte */ - } - - ut_a(field->type() < 256); /* we assume in dtype_form_prtype() - that this fits in one byte */ - col_len = field->pack_length(); - - /* The MySQL pack length contains 1 or 2 bytes length field - for a true VARCHAR. Let us subtract that, so that the InnoDB - column length in the InnoDB data dictionary is the real - maximum byte length of the actual data. */ - - long_true_varchar = 0; - - if (field->type() == MYSQL_TYPE_VARCHAR) { - col_len -= ((Field_varstring*)field)->length_bytes; - - if (((Field_varstring*)field)->length_bytes == 2) { - long_true_varchar = DATA_LONG_TRUE_VARCHAR; - } - } - - dict_mem_table_add_col(table, table->heap, - (char*) field->field_name, - col_type, - dtype_form_prtype( - (ulint)field->type() - | nulls_allowed | unsigned_type - | binary_type | long_true_varchar, - charset_no), - col_len); - } - - error = row_create_table_for_mysql(table, trx); - - innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error); - - error = convert_error_code_to_mysql(error, NULL); - - DBUG_RETURN(error); -} - -/********************************************************************* -Creates an index in an InnoDB database. */ -static -int -create_index( -/*=========*/ - trx_t* trx, /* in: InnoDB transaction handle */ - TABLE* form, /* in: information on table - columns and indexes */ - const char* table_name, /* in: table name */ - uint key_num) /* in: index number */ -{ - Field* field; - dict_index_t* index; - int error; - ulint n_fields; - KEY* key; - KEY_PART_INFO* key_part; - ulint ind_type; - ulint col_type; - ulint prefix_len; - ulint is_unsigned; - ulint i; - ulint j; - ulint* field_lengths; - - DBUG_ENTER("create_index"); - - key = form->key_info + key_num; - - n_fields = key->key_parts; - - ind_type = 0; - - if (key_num == form->s->primary_key) { - ind_type = ind_type | DICT_CLUSTERED; - } - - if (key->flags & HA_NOSAME ) { - ind_type = ind_type | DICT_UNIQUE; - } - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - - index = dict_mem_index_create((char*) table_name, key->name, 0, - ind_type, n_fields); - - field_lengths = (ulint*) my_malloc(sizeof(ulint) * n_fields, - MYF(MY_FAE)); - - for (i = 0; i < n_fields; i++) { - key_part = key->key_part + i; - - /* (The flag HA_PART_KEY_SEG denotes in MySQL a column prefix - field in an index: we only store a specified number of first - bytes of the column to the index field.) The flag does not - seem to be properly set by MySQL. Let us fall back on testing - the length of the key part versus the column. */ - - field = NULL; - for (j = 0; j < form->s->fields; j++) { - - field = form->field[j]; - - if (0 == innobase_strcasecmp( - field->field_name, - key_part->field->field_name)) { - /* Found the corresponding column */ - - break; - } - } - - ut_a(j < form->s->fields); - - col_type = get_innobase_type_from_mysql_type( - &is_unsigned, key_part->field); - - if (DATA_BLOB == col_type - || (key_part->length < field->pack_length() - && field->type() != MYSQL_TYPE_VARCHAR) - || (field->type() == MYSQL_TYPE_VARCHAR - && key_part->length < field->pack_length() - - ((Field_varstring*)field)->length_bytes)) { - - prefix_len = key_part->length; - - if (col_type == DATA_INT - || col_type == DATA_FLOAT - || col_type == DATA_DOUBLE - || col_type == DATA_DECIMAL) { - sql_print_error( - "MySQL is trying to create a column " - "prefix index field, on an " - "inappropriate data type. Table " - "name %s, column name %s.", - table_name, - key_part->field->field_name); - - prefix_len = 0; - } - } else { - prefix_len = 0; - } - - field_lengths[i] = key_part->length; - - dict_mem_index_add_field(index, - (char*) key_part->field->field_name, prefix_len); - } - - /* Even though we've defined max_supported_key_part_length, we - still do our own checking using field_lengths to be absolutely - sure we don't create too long indexes. */ - error = row_create_index_for_mysql(index, trx, field_lengths); - - innodb_check_for_record_too_big_error(form->s->row_type - != ROW_TYPE_REDUNDANT, error); - - error = convert_error_code_to_mysql(error, NULL); - - my_free(field_lengths, MYF(0)); - - DBUG_RETURN(error); -} - -/********************************************************************* -Creates an index to an InnoDB table when the user has defined no -primary index. */ -static -int -create_clustered_index_when_no_primary( -/*===================================*/ - trx_t* trx, /* in: InnoDB transaction handle */ - ulint comp, /* in: ROW_FORMAT: - nonzero=COMPACT, 0=REDUNDANT */ - const char* table_name) /* in: table name */ -{ - dict_index_t* index; - int error; - - /* We pass 0 as the space id, and determine at a lower level the space - id where to store the table */ - - index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX", - 0, DICT_CLUSTERED, 0); - error = row_create_index_for_mysql(index, trx, NULL); - - innodb_check_for_record_too_big_error(comp, error); - - error = convert_error_code_to_mysql(error, NULL); - - return(error); -} - -/********************************************************************* -Update create_info. Used in SHOW CREATE TABLE et al. */ - -void -ha_innobase::update_create_info( -/*============================*/ - HA_CREATE_INFO* create_info) /* in/out: create info */ -{ - if (!(create_info->used_fields & HA_CREATE_USED_AUTO)) { - ha_innobase::info(HA_STATUS_AUTO); - create_info->auto_increment_value = stats.auto_increment_value; - } -} - -/********************************************************************* -Creates a new table to an InnoDB database. */ - -int -ha_innobase::create( -/*================*/ - /* out: error number */ - const char* name, /* in: table name */ - TABLE* form, /* in: information on table - columns and indexes */ - HA_CREATE_INFO* create_info) /* in: more information of the - created table, contains also the - create statement string */ -{ - int error; - dict_table_t* innobase_table; - trx_t* parent_trx; - trx_t* trx; - int primary_key_no; - uint i; - char name2[FN_REFLEN]; - char norm_name[FN_REFLEN]; - THD* thd = ha_thd(); - ib_longlong auto_inc_value; - ulint flags; - - DBUG_ENTER("ha_innobase::create"); - - DBUG_ASSERT(thd != NULL); - DBUG_ASSERT(create_info != NULL); - -#ifdef __WIN__ - /* Names passed in from server are in two formats: - 1. <database_name>/<table_name>: for normal table creation - 2. full path: for temp table creation, or sym link - - When srv_file_per_table is on, check for full path pattern, i.e. - X:\dir\..., X is a driver letter, or - \\dir1\dir2\..., UNC path - returns error if it is in full path format, but not creating a temp. - table. Currently InnoDB does not support symbolic link on Windows. */ - - if (srv_file_per_table - && (!create_info->options & HA_LEX_CREATE_TMP_TABLE)) { - - if ((name[1] == ':') - || (name[0] == '\\' && name[1] == '\\')) { - sql_print_error("Cannot create table %s\n", name); - DBUG_RETURN(HA_ERR_GENERIC); - } - } -#endif - - if (form->s->fields > 1000) { - /* The limit probably should be REC_MAX_N_FIELDS - 3 = 1020, - but we play safe here */ - - DBUG_RETURN(HA_ERR_TO_BIG_ROW); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - trx = trx_allocate_for_mysql(); - - trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); - - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - trx->check_foreigns = FALSE; - } - - if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { - trx->check_unique_secondary = FALSE; - } - - if (lower_case_table_names) { - srv_lower_case_table_names = TRUE; - } else { - srv_lower_case_table_names = FALSE; - } - - strcpy(name2, name); - - normalize_table_name(norm_name, name2); - - /* Latch the InnoDB data dictionary exclusively so that no deadlocks - or lock waits can happen in it during a table create operation. - Drop table etc. do this latching in row0mysql.c. */ - - row_mysql_lock_data_dictionary(trx); - - /* Create the table definition in InnoDB */ - - flags = 0; - - if (form->s->row_type != ROW_TYPE_REDUNDANT) { - flags |= DICT_TF_COMPACT; - } - - error = create_table_def(trx, form, norm_name, - create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL, - flags); - - if (error) { - goto cleanup; - } - - /* Look for a primary key */ - - primary_key_no= (form->s->primary_key != MAX_KEY ? - (int) form->s->primary_key : - -1); - - /* Our function row_get_mysql_key_number_for_index assumes - the primary key is always number 0, if it exists */ - - DBUG_ASSERT(primary_key_no == -1 || primary_key_no == 0); - - /* Create the keys */ - - if (form->s->keys == 0 || primary_key_no == -1) { - /* Create an index which is used as the clustered index; - order the rows by their row id which is internally generated - by InnoDB */ - - error = create_clustered_index_when_no_primary( - trx, form->s->row_type != ROW_TYPE_REDUNDANT, - norm_name); - if (error) { - goto cleanup; - } - } - - if (primary_key_no != -1) { - /* In InnoDB the clustered index must always be created - first */ - if ((error = create_index(trx, form, norm_name, - (uint) primary_key_no))) { - goto cleanup; - } - } - - for (i = 0; i < form->s->keys; i++) { - - if (i != (uint) primary_key_no) { - - if ((error = create_index(trx, form, norm_name, i))) { - goto cleanup; - } - } - } - - if (*trx->mysql_query_str) { - error = row_table_add_foreign_constraints(trx, - *trx->mysql_query_str, norm_name, - create_info->options & HA_LEX_CREATE_TMP_TABLE); - - error = convert_error_code_to_mysql(error, NULL); - - if (error) { - goto cleanup; - } - } - - innobase_commit_low(trx); - - row_mysql_unlock_data_dictionary(trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - innobase_table = dict_table_get(norm_name, FALSE); - - DBUG_ASSERT(innobase_table != 0); - - /* Note: We can't call update_thd() as prebuilt will not be - setup at this stage and so we use thd. */ - - /* We need to copy the AUTOINC value from the old table if - this is an ALTER TABLE. */ - - if (((create_info->used_fields & HA_CREATE_USED_AUTO) - || thd_sql_command(thd) == SQLCOM_ALTER_TABLE) - && create_info->auto_increment_value != 0) { - - /* Query was ALTER TABLE...AUTO_INCREMENT = x; or - CREATE TABLE ...AUTO_INCREMENT = x; Find out a table - definition from the dictionary and get the current value - of the auto increment field. Set a new value to the - auto increment field if the value is greater than the - maximum value in the column. */ - - auto_inc_value = create_info->auto_increment_value; - - dict_table_autoinc_lock(innobase_table); - dict_table_autoinc_initialize(innobase_table, auto_inc_value); - dict_table_autoinc_unlock(innobase_table); - } - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_free_for_mysql(trx); - - DBUG_RETURN(0); - -cleanup: - innobase_commit_low(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx_free_for_mysql(trx); - - DBUG_RETURN(error); -} - -/********************************************************************* -Discards or imports an InnoDB tablespace. */ - -int -ha_innobase::discard_or_import_tablespace( -/*======================================*/ - /* out: 0 == success, -1 == error */ - my_bool discard) /* in: TRUE if discard, else import */ -{ - dict_table_t* dict_table; - trx_t* trx; - int err; - - DBUG_ENTER("ha_innobase::discard_or_import_tablespace"); - - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - dict_table = prebuilt->table; - trx = prebuilt->trx; - - if (discard) { - err = row_discard_tablespace_for_mysql(dict_table->name, trx); - } else { - err = row_import_tablespace_for_mysql(dict_table->name, trx); - } - - err = convert_error_code_to_mysql(err, NULL); - - DBUG_RETURN(err); -} - -/********************************************************************* -Deletes all rows of an InnoDB table. */ - -int -ha_innobase::delete_all_rows(void) -/*==============================*/ - /* out: error number */ -{ - int error; - - DBUG_ENTER("ha_innobase::delete_all_rows"); - - /* Get the transaction associated with the current thd, or create one - if not yet created, and update prebuilt->trx */ - - update_thd(ha_thd()); - - if (thd_sql_command(user_thd) != SQLCOM_TRUNCATE) { - fallback: - /* We only handle TRUNCATE TABLE t as a special case. - DELETE FROM t will have to use ha_innobase::delete_row(), - because DELETE is transactional while TRUNCATE is not. */ - DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND); - } - - /* Truncate the table in InnoDB */ - - error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx); - if (error == DB_ERROR) { - /* Cannot truncate; resort to ha_innobase::delete_row() */ - goto fallback; - } - - error = convert_error_code_to_mysql(error, NULL); - - DBUG_RETURN(error); -} - -/********************************************************************* -Drops a table from an InnoDB database. Before calling this function, -MySQL calls innobase_commit to commit the transaction of the current user. -Then the current user cannot have locks set on the table. Drop table -operation inside InnoDB will remove all locks any user has on the table -inside InnoDB. */ - -int -ha_innobase::delete_table( -/*======================*/ - /* out: error number */ - const char* name) /* in: table name */ -{ - ulint name_len; - int error; - trx_t* parent_trx; - trx_t* trx; - THD *thd = ha_thd(); - char norm_name[1000]; - - DBUG_ENTER("ha_innobase::delete_table"); - - /* Strangely, MySQL passes the table name without the '.frm' - extension, in contrast to ::create */ - normalize_table_name(norm_name, name); - - if (IS_MAGIC_TABLE_AND_USER_DENIED_ACCESS(norm_name, thd)) { - DBUG_RETURN(HA_ERR_GENERIC); - } - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - if (lower_case_table_names) { - srv_lower_case_table_names = TRUE; - } else { - srv_lower_case_table_names = FALSE; - } - - trx = trx_allocate_for_mysql(); - - trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); - - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - trx->check_foreigns = FALSE; - } - - if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) { - trx->check_unique_secondary = FALSE; - } - - name_len = strlen(name); - - assert(name_len < 1000); - - /* Drop the table in InnoDB */ - - error = row_drop_table_for_mysql(norm_name, trx, - thd_sql_command(thd) - == SQLCOM_DROP_DB); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - innobase_commit_low(trx); - - trx_free_for_mysql(trx); - - error = convert_error_code_to_mysql(error, NULL); - - DBUG_RETURN(error); -} - -/********************************************************************* -Removes all tables in the named database inside InnoDB. */ -static -void -innobase_drop_database( -/*===================*/ - /* out: error number */ - handlerton *hton, /* in: handlerton of Innodb */ - char* path) /* in: database path; inside InnoDB the name - of the last directory in the path is used as - the database name: for example, in 'mysql/data/test' - the database name is 'test' */ -{ - ulint len = 0; - trx_t* parent_trx; - trx_t* trx; - char* ptr; - int error; - char* namebuf; - THD* thd = current_thd; - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - ptr = strend(path) - 2; - - while (ptr >= path && *ptr != '\\' && *ptr != '/') { - ptr--; - len++; - } - - ptr++; - namebuf = (char*) my_malloc((uint) len + 2, MYF(0)); - - memcpy(namebuf, ptr, len); - namebuf[len] = '/'; - namebuf[len + 1] = '\0'; -#ifdef __WIN__ - innobase_casedn_str(namebuf); -#endif - trx = trx_allocate_for_mysql(); - trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); - - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - trx->check_foreigns = FALSE; - } - - error = row_drop_database_for_mysql(namebuf, trx); - my_free(namebuf, MYF(0)); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - innobase_commit_low(trx); - trx_free_for_mysql(trx); -#ifdef NO_LONGER_INTERESTED_IN_DROP_DB_ERROR - error = convert_error_code_to_mysql(error, NULL); - - return(error); -#else - return; -#endif -} - -/************************************************************************* -Renames an InnoDB table. */ - -int -ha_innobase::rename_table( -/*======================*/ - /* out: 0 or error code */ - const char* from, /* in: old name of the table */ - const char* to) /* in: new name of the table */ -{ - ulint name_len1; - ulint name_len2; - int error; - trx_t* parent_trx; - trx_t* trx; - char norm_from[1000]; - char norm_to[1000]; - THD* thd = ha_thd(); - - DBUG_ENTER("ha_innobase::rename_table"); - - /* Get the transaction associated with the current thd, or create one - if not yet created */ - - parent_trx = check_trx_exists(thd); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(parent_trx); - - if (lower_case_table_names) { - srv_lower_case_table_names = TRUE; - } else { - srv_lower_case_table_names = FALSE; - } - - trx = trx_allocate_for_mysql(); - trx->mysql_thd = thd; - trx->mysql_query_str = thd_query(thd); - - if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) { - trx->check_foreigns = FALSE; - } - - name_len1 = strlen(from); - name_len2 = strlen(to); - - assert(name_len1 < 1000); - assert(name_len2 < 1000); - - normalize_table_name(norm_from, from); - normalize_table_name(norm_to, to); - - /* Rename the table in InnoDB */ - - error = row_rename_table_for_mysql(norm_from, norm_to, trx); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - /* Tell the InnoDB server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - innobase_commit_low(trx); - trx_free_for_mysql(trx); - - error = convert_error_code_to_mysql(error, NULL); - - DBUG_RETURN(error); -} - -/************************************************************************* -Estimates the number of index records in a range. */ - -ha_rows -ha_innobase::records_in_range( -/*==========================*/ - /* out: estimated number of - rows */ - uint keynr, /* in: index number */ - key_range *min_key, /* in: start key value of the - range, may also be 0 */ - key_range *max_key) /* in: range end key val, may - also be 0 */ -{ - KEY* key; - dict_index_t* index; - uchar* key_val_buff2 = (uchar*) my_malloc( - table->s->reclength - + table->s->max_key_length + 100, - MYF(MY_FAE)); - ulint buff2_len = table->s->reclength - + table->s->max_key_length + 100; - dtuple_t* range_start; - dtuple_t* range_end; - ib_longlong n_rows; - ulint mode1; - ulint mode2; - void* heap1; - void* heap2; - - DBUG_ENTER("records_in_range"); - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - prebuilt->trx->op_info = (char*)"estimating records in index range"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - active_index = keynr; - - key = table->key_info + active_index; - - index = dict_table_get_index_noninline(prebuilt->table, key->name); - - range_start = dtuple_create_for_mysql(&heap1, key->key_parts); - dict_index_copy_types(range_start, index, key->key_parts); - - range_end = dtuple_create_for_mysql(&heap2, key->key_parts); - dict_index_copy_types(range_end, index, key->key_parts); - - row_sel_convert_mysql_key_to_innobase( - range_start, (byte*) key_val_buff, - (ulint)upd_and_key_val_buff_len, - index, - (byte*) (min_key ? min_key->key : - (const uchar*) 0), - (ulint) (min_key ? min_key->length : 0), - prebuilt->trx); - - row_sel_convert_mysql_key_to_innobase( - range_end, (byte*) key_val_buff2, - buff2_len, index, - (byte*) (max_key ? max_key->key : - (const uchar*) 0), - (ulint) (max_key ? max_key->length : 0), - prebuilt->trx); - - mode1 = convert_search_mode_to_innobase(min_key ? min_key->flag : - HA_READ_KEY_EXACT); - mode2 = convert_search_mode_to_innobase(max_key ? max_key->flag : - HA_READ_KEY_EXACT); - - if (mode1 != PAGE_CUR_UNSUPP && mode2 != PAGE_CUR_UNSUPP) { - - n_rows = btr_estimate_n_rows_in_range(index, range_start, - mode1, range_end, - mode2); - } else { - - n_rows = HA_POS_ERROR; - } - - dtuple_free_for_mysql(heap1); - dtuple_free_for_mysql(heap2); - - my_free(key_val_buff2, MYF(0)); - - prebuilt->trx->op_info = (char*)""; - - /* The MySQL optimizer seems to believe an estimate of 0 rows is - always accurate and may return the result 'Empty set' based on that. - The accuracy is not guaranteed, and even if it were, for a locking - read we should anyway perform the search to set the next-key lock. - Add 1 to the value to make sure MySQL does not make the assumption! */ - - if (n_rows == 0) { - n_rows = 1; - } - - DBUG_RETURN((ha_rows) n_rows); -} - -/************************************************************************* -Gives an UPPER BOUND to the number of rows in a table. This is used in -filesort.cc. */ - -ha_rows -ha_innobase::estimate_rows_upper_bound(void) -/*======================================*/ - /* out: upper bound of rows */ -{ - dict_index_t* index; - ulonglong estimate; - ulonglong local_data_file_length; - - DBUG_ENTER("estimate_rows_upper_bound"); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - prebuilt->trx->op_info = (char*) - "calculating upper bound for table rows"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - index = dict_table_get_first_index_noninline(prebuilt->table); - - local_data_file_length = ((ulonglong) index->stat_n_leaf_pages) - * UNIV_PAGE_SIZE; - - /* Calculate a minimum length for a clustered index record and from - that an upper bound for the number of rows. Since we only calculate - new statistics in row0mysql.c when a table has grown by a threshold - factor, we must add a safety factor 2 in front of the formula below. */ - - estimate = 2 * local_data_file_length / - dict_index_calc_min_rec_len(index); - - prebuilt->trx->op_info = (char*)""; - - DBUG_RETURN((ha_rows) estimate); -} - -/************************************************************************* -How many seeks it will take to read through the table. This is to be -comparable to the number returned by records_in_range so that we can -decide if we should scan the table or use keys. */ - -double -ha_innobase::scan_time() -/*====================*/ - /* out: estimated time measured in disk seeks */ -{ - /* Since MySQL seems to favor table scans too much over index - searches, we pretend that a sequential read takes the same time - as a random disk read, that is, we do not divide the following - by 10, which would be physically realistic. */ - - return((double) (prebuilt->table->stat_clustered_index_size)); -} - -/********************************************************************** -Calculate the time it takes to read a set of ranges through an index -This enables us to optimise reads for clustered indexes. */ - -double -ha_innobase::read_time( -/*===================*/ - /* out: estimated time measured in disk seeks */ - uint index, /* in: key number */ - uint ranges, /* in: how many ranges */ - ha_rows rows) /* in: estimated number of rows in the ranges */ -{ - ha_rows total_rows; - double time_for_scan; - - if (index != table->s->primary_key) { - /* Not clustered */ - return(handler::read_time(index, ranges, rows)); - } - - if (rows <= 2) { - - return((double) rows); - } - - /* Assume that the read time is proportional to the scan time for all - rows + at most one seek per range. */ - - time_for_scan = scan_time(); - - if ((total_rows = estimate_rows_upper_bound()) < rows) { - - return(time_for_scan); - } - - return(ranges + (double) rows / (double) total_rows * time_for_scan); -} - -/************************************************************************* -Returns statistics information of the table to the MySQL interpreter, -in various fields of the handle object. */ - -int -ha_innobase::info( -/*==============*/ - uint flag) /* in: what information MySQL requests */ -{ - dict_table_t* ib_table; - dict_index_t* index; - ha_rows rec_per_key; - ib_longlong n_rows; - ulong j; - ulong i; - char path[FN_REFLEN]; - os_file_stat_t stat_info; - - DBUG_ENTER("info"); - - /* If we are forcing recovery at a high level, we will suppress - statistics calculation on tables, because that may crash the - server if an index is badly corrupted. */ - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - - /* We return success (0) instead of HA_ERR_CRASHED, - because we want MySQL to process this query and not - stop, like it would do if it received the error code - HA_ERR_CRASHED. */ - - DBUG_RETURN(0); - } - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - prebuilt->trx->op_info = (char*)"returning various info to MySQL"; - - trx_search_latch_release_if_reserved(prebuilt->trx); - - ib_table = prebuilt->table; - - if (flag & HA_STATUS_TIME) { - if (innobase_stats_on_metadata) { - /* In sql_show we call with this flag: update - then statistics so that they are up-to-date */ - - prebuilt->trx->op_info = "updating table statistics"; - - dict_update_statistics(ib_table); - - prebuilt->trx->op_info = "returning various info to MySQL"; - } - - my_snprintf(path, sizeof(path), "%s/%s%s", - mysql_data_home, ib_table->name, reg_ext); - - unpack_filename(path,path); - - /* Note that we do not know the access time of the table, - nor the CHECK TABLE time, nor the UPDATE or INSERT time. */ - - if (os_file_get_status(path,&stat_info)) { - stats.create_time = stat_info.ctime; - } - } - - if (flag & HA_STATUS_VARIABLE) { - n_rows = ib_table->stat_n_rows; - - /* Because we do not protect stat_n_rows by any mutex in a - delete, it is theoretically possible that the value can be - smaller than zero! TODO: fix this race. - - The MySQL optimizer seems to assume in a left join that n_rows - is an accurate estimate if it is zero. Of course, it is not, - since we do not have any locks on the rows yet at this phase. - Since SHOW TABLE STATUS seems to call this function with the - HA_STATUS_TIME flag set, while the left join optimizer does not - set that flag, we add one to a zero value if the flag is not - set. That way SHOW TABLE STATUS will show the best estimate, - while the optimizer never sees the table empty. */ - - if (n_rows < 0) { - n_rows = 0; - } - - if (n_rows == 0 && !(flag & HA_STATUS_TIME)) { - n_rows++; - } - - /* Fix bug#40386: Not flushing query cache after truncate. - n_rows can not be 0 unless the table is empty, set to 1 - instead. The original problem of bug#29507 is actually - fixed in the server code. */ - if (thd_sql_command(user_thd) == SQLCOM_TRUNCATE) { - - n_rows = 1; - - /* We need to reset the prebuilt value too, otherwise - checks for values greater than the last value written - to the table will fail and the autoinc counter will - not be updated. This will force write_row() into - attempting an update of the table's AUTOINC counter. */ - - prebuilt->autoinc_last_value = 0; - } - - stats.records = (ha_rows)n_rows; - stats.deleted = 0; - stats.data_file_length = ((ulonglong) - ib_table->stat_clustered_index_size) - * UNIV_PAGE_SIZE; - stats.index_file_length = ((ulonglong) - ib_table->stat_sum_of_other_index_sizes) - * UNIV_PAGE_SIZE; - - /* Since fsp_get_available_space_in_free_extents() is - acquiring latches inside InnoDB, we do not call it if we - are asked by MySQL to avoid locking. Another reason to - avoid the call is that it uses quite a lot of CPU. - See Bug#38185. - We do not update delete_length if no locking is requested - so the "old" value can remain. delete_length is initialized - to 0 in the ha_statistics' constructor. */ - if (!(flag & HA_STATUS_NO_LOCK)) { - - /* lock the data dictionary to avoid races with - ibd_file_missing and tablespace_discarded */ - row_mysql_lock_data_dictionary(prebuilt->trx); - - /* ib_table->space must be an existent tablespace */ - if (!ib_table->ibd_file_missing - && !ib_table->tablespace_discarded) { - - stats.delete_length = - fsp_get_available_space_in_free_extents( - ib_table->space) * 1024; - } else { - - THD* thd; - - thd = ha_thd(); - - push_warning_printf( - thd, - MYSQL_ERROR::WARN_LEVEL_WARN, - ER_CANT_GET_STAT, - "InnoDB: Trying to get the free " - "space for table %s but its " - "tablespace has been discarded or " - "the .ibd file is missing. Setting " - "the free space to zero.", - ib_table->name); - - stats.delete_length = 0; - } - - row_mysql_unlock_data_dictionary(prebuilt->trx); - } - - stats.check_time = 0; - - if (stats.records == 0) { - stats.mean_rec_length = 0; - } else { - stats.mean_rec_length = (ulong) (stats.data_file_length / stats.records); - } - } - - if (flag & HA_STATUS_CONST) { - index = dict_table_get_first_index_noninline(ib_table); - - if (prebuilt->clust_index_was_generated) { - index = dict_table_get_next_index_noninline(index); - } - - for (i = 0; i < table->s->keys; i++) { - if (index == NULL) { - sql_print_error("Table %s contains fewer " - "indexes inside InnoDB than " - "are defined in the MySQL " - ".frm file. Have you mixed up " - ".frm files from different " - "installations? See " -"http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n", - - ib_table->name); - break; - } - - for (j = 0; j < table->key_info[i].key_parts; j++) { - - if (j + 1 > index->n_uniq) { - sql_print_error( -"Index %s of %s has %lu columns unique inside InnoDB, but MySQL is asking " -"statistics for %lu columns. Have you mixed up .frm files from different " -"installations? " -"See http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n", - index->name, - ib_table->name, - (unsigned long) - index->n_uniq, j + 1); - break; - } - - if (index->stat_n_diff_key_vals[j + 1] == 0) { - - rec_per_key = stats.records; - } else { - rec_per_key = (ha_rows)(stats.records / - index->stat_n_diff_key_vals[j + 1]); - } - - /* Since MySQL seems to favor table scans - too much over index searches, we pretend - index selectivity is 2 times better than - our estimate: */ - - rec_per_key = rec_per_key / 2; - - if (rec_per_key == 0) { - rec_per_key = 1; - } - - table->key_info[i].rec_per_key[j]= - rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 : - (ulong) rec_per_key; - } - - index = dict_table_get_next_index_noninline(index); - } - } - - if (flag & HA_STATUS_ERRKEY) { - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - - errkey = (unsigned int) row_get_mysql_key_number_for_index( - (dict_index_t*) trx_get_error_info(prebuilt->trx)); - } - - if (flag & HA_STATUS_AUTO && table->found_next_number_field) { - stats.auto_increment_value = innobase_peek_autoinc(); - } - - prebuilt->trx->op_info = (char*)""; - - DBUG_RETURN(0); -} - -/************************************************************************** -Updates index cardinalities of the table, based on 8 random dives into -each index tree. This does NOT calculate exact statistics on the table. */ - -int -ha_innobase::analyze( -/*=================*/ - /* out: returns always 0 (success) */ - THD* thd, /* in: connection thread handle */ - HA_CHECK_OPT* check_opt) /* in: currently ignored */ -{ - /* Simply call ::info() with all the flags */ - info(HA_STATUS_TIME | HA_STATUS_CONST | HA_STATUS_VARIABLE); - - return(0); -} - -/************************************************************************** -This is mapped to "ALTER TABLE tablename ENGINE=InnoDB", which rebuilds -the table in MySQL. */ - -int -ha_innobase::optimize( -/*==================*/ - THD* thd, /* in: connection thread handle */ - HA_CHECK_OPT* check_opt) /* in: currently ignored */ -{ - return(HA_ADMIN_TRY_ALTER); -} - -/*********************************************************************** -Tries to check that an InnoDB table is not corrupted. If corruption is -noticed, prints to stderr information about it. In case of corruption -may also assert a failure and crash the server. */ - -int -ha_innobase::check( -/*===============*/ - /* out: HA_ADMIN_CORRUPT or - HA_ADMIN_OK */ - THD* thd, /* in: user thread handle */ - HA_CHECK_OPT* check_opt) /* in: check options, currently - ignored */ -{ - ulint ret; - - DBUG_ASSERT(thd == ha_thd()); - ut_a(prebuilt->trx); - ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N); - ut_a(prebuilt->trx == thd_to_trx(thd)); - - if (prebuilt->mysql_template == NULL) { - /* Build the template; we will use a dummy template - in index scans done in checking */ - - build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW); - } - - ret = row_check_table_for_mysql(prebuilt); - - if (ret == DB_SUCCESS) { - return(HA_ADMIN_OK); - } - - return(HA_ADMIN_CORRUPT); -} - -/***************************************************************** -Adds information about free space in the InnoDB tablespace to a table comment -which is printed out when a user calls SHOW TABLE STATUS. Adds also info on -foreign keys. */ - -char* -ha_innobase::update_table_comment( -/*==============================*/ - /* out: table comment + InnoDB free space + - info on foreign keys */ - const char* comment)/* in: table comment defined by user */ -{ - uint length = (uint) strlen(comment); - char* str; - long flen; - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - if (length > 64000 - 3) { - return((char*)comment); /* string too long */ - } - - update_thd(ha_thd()); - - prebuilt->trx->op_info = (char*)"returning table comment"; - - /* In case MySQL calls this in the middle of a SELECT query, release - possible adaptive hash latch to avoid deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - str = NULL; - - /* output the data to a temporary file */ - - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); - - fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB", - fsp_get_available_space_in_free_extents( - prebuilt->table->space)); - - dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile, - prebuilt->trx, prebuilt->table); - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (length + flen + 3 > 64000) { - flen = 64000 - 3 - length; - } - - /* allocate buffer for the full string, and - read the contents of the temporary file */ - - str = (char*) my_malloc(length + flen + 3, MYF(0)); - - if (str) { - char* pos = str + length; - if (length) { - memcpy(str, comment, length); - *pos++ = ';'; - *pos++ = ' '; - } - rewind(srv_dict_tmpfile); - flen = (uint) fread(pos, 1, flen, srv_dict_tmpfile); - pos[flen] = 0; - } - - mutex_exit_noninline(&srv_dict_tmpfile_mutex); - - prebuilt->trx->op_info = (char*)""; - - return(str ? str : (char*) comment); -} - -/*********************************************************************** -Gets the foreign key create info for a table stored in InnoDB. */ - -char* -ha_innobase::get_foreign_key_create_info(void) -/*==========================================*/ - /* out, own: character string in the form which - can be inserted to the CREATE TABLE statement, - MUST be freed with ::free_foreign_key_create_info */ -{ - char* str = 0; - long flen; - - ut_a(prebuilt != NULL); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(ha_thd()); - - prebuilt->trx->op_info = (char*)"getting info on foreign keys"; - - /* In case MySQL calls this in the middle of a SELECT query, - release possible adaptive hash latch to avoid - deadlocks of threads */ - - trx_search_latch_release_if_reserved(prebuilt->trx); - - mutex_enter_noninline(&srv_dict_tmpfile_mutex); - rewind(srv_dict_tmpfile); - - /* output the data to a temporary file */ - dict_print_info_on_foreign_keys(TRUE, srv_dict_tmpfile, - prebuilt->trx, prebuilt->table); - prebuilt->trx->op_info = (char*)""; - - flen = ftell(srv_dict_tmpfile); - if (flen < 0) { - flen = 0; - } else if (flen > 64000 - 1) { - flen = 64000 - 1; - } - - /* allocate buffer for the string, and - read the contents of the temporary file */ - - str = (char*) my_malloc(flen + 1, MYF(0)); - - if (str) { - rewind(srv_dict_tmpfile); - flen = (uint) fread(str, 1, flen, srv_dict_tmpfile); - str[flen] = 0; - } - - mutex_exit_noninline(&srv_dict_tmpfile_mutex); - - return(str); -} - - -int -ha_innobase::get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list) -{ - dict_foreign_t* foreign; - - DBUG_ENTER("get_foreign_key_list"); - ut_a(prebuilt != NULL); - update_thd(ha_thd()); - prebuilt->trx->op_info = (char*)"getting list of foreign keys"; - trx_search_latch_release_if_reserved(prebuilt->trx); - mutex_enter_noninline(&(dict_sys->mutex)); - foreign = UT_LIST_GET_FIRST(prebuilt->table->foreign_list); - - while (foreign != NULL) { - uint i; - FOREIGN_KEY_INFO f_key_info; - LEX_STRING *name= 0; - uint ulen; - char uname[NAME_LEN+1]; /* Unencoded name */ - char db_name[NAME_LEN+1]; - const char *tmp_buff; - - tmp_buff= foreign->id; - i= 0; - while (tmp_buff[i] != '/') - i++; - tmp_buff+= i + 1; - f_key_info.forein_id = thd_make_lex_string(thd, 0, - tmp_buff, (uint) strlen(tmp_buff), 1); - tmp_buff= foreign->referenced_table_name; - - /* Database name */ - i= 0; - while (tmp_buff[i] != '/') - { - db_name[i]= tmp_buff[i]; - i++; - } - db_name[i]= 0; - ulen= filename_to_tablename(db_name, uname, sizeof(uname)); - f_key_info.referenced_db = thd_make_lex_string(thd, 0, - uname, ulen, 1); - - /* Table name */ - tmp_buff+= i + 1; - ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname)); - f_key_info.referenced_table = thd_make_lex_string(thd, 0, - uname, ulen, 1); - - for (i= 0;;) { - tmp_buff= foreign->foreign_col_names[i]; - name = thd_make_lex_string(thd, name, - tmp_buff, (uint) strlen(tmp_buff), 1); - f_key_info.foreign_fields.push_back(name); - tmp_buff= foreign->referenced_col_names[i]; - name = thd_make_lex_string(thd, name, - tmp_buff, (uint) strlen(tmp_buff), 1); - f_key_info.referenced_fields.push_back(name); - if (++i >= foreign->n_fields) - break; - } - - ulong length; - if (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE) - { - length=7; - tmp_buff= "CASCADE"; - } - else if (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL) - { - length=8; - tmp_buff= "SET NULL"; - } - else if (foreign->type & DICT_FOREIGN_ON_DELETE_NO_ACTION) - { - length=9; - tmp_buff= "NO ACTION"; - } - else - { - length=8; - tmp_buff= "RESTRICT"; - } - f_key_info.delete_method = thd_make_lex_string( - thd, f_key_info.delete_method, tmp_buff, length, 1); - - - if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE) - { - length=7; - tmp_buff= "CASCADE"; - } - else if (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL) - { - length=8; - tmp_buff= "SET NULL"; - } - else if (foreign->type & DICT_FOREIGN_ON_UPDATE_NO_ACTION) - { - length=9; - tmp_buff= "NO ACTION"; - } - else - { - length=8; - tmp_buff= "RESTRICT"; - } - f_key_info.update_method = thd_make_lex_string( - thd, f_key_info.update_method, tmp_buff, length, 1); - if (foreign->referenced_index && - foreign->referenced_index->name) - { - f_key_info.referenced_key_name = thd_make_lex_string( - thd, f_key_info.referenced_key_name, - foreign->referenced_index->name, - (uint) strlen(foreign->referenced_index->name), 1); - } - else - f_key_info.referenced_key_name= 0; - - FOREIGN_KEY_INFO *pf_key_info = (FOREIGN_KEY_INFO *) - thd_memdup(thd, &f_key_info, sizeof(FOREIGN_KEY_INFO)); - f_key_list->push_back(pf_key_info); - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - mutex_exit_noninline(&(dict_sys->mutex)); - prebuilt->trx->op_info = (char*)""; - - DBUG_RETURN(0); -} - -/********************************************************************* -Checks if ALTER TABLE may change the storage engine of the table. -Changing storage engines is not allowed for tables for which there -are foreign key constraints (parent or child tables). */ - -bool -ha_innobase::can_switch_engines(void) -/*=================================*/ -{ - bool can_switch; - - DBUG_ENTER("ha_innobase::can_switch_engines"); - - ut_a(prebuilt->trx == thd_to_trx(ha_thd())); - - prebuilt->trx->op_info = - "determining if there are foreign key constraints"; - row_mysql_lock_data_dictionary(prebuilt->trx); - - can_switch = !UT_LIST_GET_FIRST(prebuilt->table->referenced_list) - && !UT_LIST_GET_FIRST(prebuilt->table->foreign_list); - - row_mysql_unlock_data_dictionary(prebuilt->trx); - prebuilt->trx->op_info = ""; - - DBUG_RETURN(can_switch); -} - -/*********************************************************************** -Checks if a table is referenced by a foreign key. The MySQL manual states that -a REPLACE is either equivalent to an INSERT, or DELETE(s) + INSERT. Only a -delete is then allowed internally to resolve a duplicate key conflict in -REPLACE, not an update. */ - -uint -ha_innobase::referenced_by_foreign_key(void) -/*========================================*/ - /* out: > 0 if referenced by a FOREIGN KEY */ -{ - if (dict_table_referenced_by_foreign_key(prebuilt->table)) { - - return(1); - } - - return(0); -} - -/*********************************************************************** -Frees the foreign key create info for a table stored in InnoDB, if it is -non-NULL. */ - -void -ha_innobase::free_foreign_key_create_info( -/*======================================*/ - char* str) /* in, own: create info string to free */ -{ - if (str) { - my_free(str, MYF(0)); - } -} - -/*********************************************************************** -Tells something additional to the handler about how to do things. */ - -int -ha_innobase::extra( -/*===============*/ - /* out: 0 or error number */ - enum ha_extra_function operation) - /* in: HA_EXTRA_FLUSH or some other flag */ -{ - /* Warning: since it is not sure that MySQL calls external_lock - before calling this function, the trx field in prebuilt can be - obsolete! */ - - switch (operation) { - case HA_EXTRA_FLUSH: - if (prebuilt->blob_heap) { - row_mysql_prebuilt_free_blob_heap(prebuilt); - } - break; - case HA_EXTRA_RESET_STATE: - reset_template(prebuilt); - break; - case HA_EXTRA_NO_KEYREAD: - prebuilt->read_just_key = 0; - break; - case HA_EXTRA_KEYREAD: - prebuilt->read_just_key = 1; - break; - case HA_EXTRA_KEYREAD_PRESERVE_FIELDS: - prebuilt->keep_other_fields_on_keyread = 1; - break; - - /* IMPORTANT: prebuilt->trx can be obsolete in - this method, because it is not sure that MySQL - calls external_lock before this method with the - parameters below. We must not invoke update_thd() - either, because the calling threads may change. - CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */ - case HA_EXTRA_IGNORE_DUP_KEY: - thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE; - break; - case HA_EXTRA_WRITE_CAN_REPLACE: - thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE; - break; - case HA_EXTRA_WRITE_CANNOT_REPLACE: - thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE; - break; - case HA_EXTRA_NO_IGNORE_DUP_KEY: - thd_to_trx(ha_thd())->duplicates &= - ~(TRX_DUP_IGNORE | TRX_DUP_REPLACE); - break; - default:/* Do nothing */ - ; - } - - return(0); -} - -/********************************************************************** -Reset state of file to after 'open'. -This function is called after every statement for all tables used -by that statement. */ -int ha_innobase::reset() -{ - if (prebuilt->blob_heap) { - row_mysql_prebuilt_free_blob_heap(prebuilt); - } - - reset_template(prebuilt); - - /* TODO: This should really be reset in reset_template() but for now - it's safer to do it explicitly here. */ - - /* This is a statement level counter. */ - prebuilt->autoinc_last_value = 0; - - return(0); -} - -/********************************************************************** -MySQL calls this function at the start of each SQL statement inside LOCK -TABLES. Inside LOCK TABLES the ::external_lock method does not work to -mark SQL statement borders. Note also a special case: if a temporary table -is created inside LOCK TABLES, MySQL has not called external_lock() at all -on that table. -MySQL-5.0 also calls this before each statement in an execution of a stored -procedure. To make the execution more deterministic for binlogging, MySQL-5.0 -locks all tables involved in a stored procedure with full explicit table -locks (thd_in_lock_tables(thd) holds in store_lock()) before executing the -procedure. */ - -int -ha_innobase::start_stmt( -/*====================*/ - /* out: 0 or error code */ - THD* thd, /* in: handle to the user thread */ - thr_lock_type lock_type) -{ - trx_t* trx; - - update_thd(thd); - - trx = prebuilt->trx; - - /* Here we release the search latch and the InnoDB thread FIFO ticket - if they were reserved. They should have been released already at the - end of the previous statement, but because inside LOCK TABLES the - lock count method does not work to mark the end of a SELECT statement, - that may not be the case. We MUST release the search latch before an - INSERT, for example. */ - - innobase_release_stat_resources(trx); - - /* Reset the AUTOINC statement level counter for multi-row INSERTs. */ - trx->n_autoinc_rows = 0; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - reset_template(prebuilt); - - if (!prebuilt->mysql_has_locked) { - /* This handle is for a temporary table created inside - this same LOCK TABLES; since MySQL does NOT call external_lock - in this case, we must use x-row locks inside InnoDB to be - prepared for an update of a row */ - - prebuilt->select_lock_type = LOCK_X; - } else { - if (trx->isolation_level != TRX_ISO_SERIALIZABLE - && thd_sql_command(thd) == SQLCOM_SELECT - && lock_type == TL_READ) { - - /* For other than temporary tables, we obtain - no lock for consistent read (plain SELECT). */ - - prebuilt->select_lock_type = LOCK_NONE; - } else { - /* Not a consistent read: restore the - select_lock_type value. The value of - stored_select_lock_type was decided in: - 1) ::store_lock(), - 2) ::external_lock(), - 3) ::init_table_handle_for_HANDLER(), and - 4) ::transactional_table_lock(). */ - - prebuilt->select_lock_type = - prebuilt->stored_select_lock_type; - } - } - - trx->detailed_error[0] = '\0'; - - /* Set the MySQL flag to mark that there is an active transaction */ - if (trx->active_trans == 0) { - - innobase_register_trx_and_stmt(ht, thd); - trx->active_trans = 1; - } else { - innobase_register_stmt(ht, thd); - } - - return(0); -} - -/********************************************************************** -Maps a MySQL trx isolation level code to the InnoDB isolation level code */ -inline -ulint -innobase_map_isolation_level( -/*=========================*/ - /* out: InnoDB isolation level */ - enum_tx_isolation iso) /* in: MySQL isolation level code */ -{ - switch(iso) { - case ISO_REPEATABLE_READ: return(TRX_ISO_REPEATABLE_READ); - case ISO_READ_COMMITTED: return(TRX_ISO_READ_COMMITTED); - case ISO_SERIALIZABLE: return(TRX_ISO_SERIALIZABLE); - case ISO_READ_UNCOMMITTED: return(TRX_ISO_READ_UNCOMMITTED); - default: ut_a(0); return(0); - } -} - -/********************************************************************** -As MySQL will execute an external lock for every new table it uses when it -starts to process an SQL statement (an exception is when MySQL calls -start_stmt for the handle) we can use this function to store the pointer to -the THD in the handle. We will also use this function to communicate -to InnoDB that a new SQL statement has started and that we must store a -savepoint to our transaction handle, so that we are able to roll back -the SQL statement in case of an error. */ - -int -ha_innobase::external_lock( -/*=======================*/ - /* out: 0 */ - THD* thd, /* in: handle to the user thread */ - int lock_type) /* in: lock type */ -{ - trx_t* trx; - - DBUG_ENTER("ha_innobase::external_lock"); - DBUG_PRINT("enter",("lock_type: %d", lock_type)); - - update_thd(thd); - - /* Statement based binlogging does not work in isolation level - READ UNCOMMITTED and READ COMMITTED since the necessary - locks cannot be taken. In this case, we print an - informative error message and return with an error. */ - if (lock_type == F_WRLCK) - { - ulong const binlog_format= thd_binlog_format(thd); - ulong const tx_isolation = thd_tx_isolation(current_thd); - if (tx_isolation <= ISO_READ_COMMITTED && - binlog_format == BINLOG_FORMAT_STMT) - { - char buf[256]; - my_snprintf(buf, sizeof(buf), - "Transaction level '%s' in" - " InnoDB is not safe for binlog mode '%s'", - tx_isolation_names[tx_isolation], - binlog_format_names[binlog_format]); - my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf); - DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE); - } - } - - - trx = prebuilt->trx; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - - reset_template(prebuilt); - - if (lock_type == F_WRLCK) { - - /* If this is a SELECT, then it is in UPDATE TABLE ... - or SELECT ... FOR UPDATE */ - prebuilt->select_lock_type = LOCK_X; - prebuilt->stored_select_lock_type = LOCK_X; - } - - if (lock_type != F_UNLCK) { - /* MySQL is setting a new table lock */ - - trx->detailed_error[0] = '\0'; - - /* Set the MySQL flag to mark that there is an active - transaction */ - if (trx->active_trans == 0) { - - innobase_register_trx_and_stmt(ht, thd); - trx->active_trans = 1; - } else if (trx->n_mysql_tables_in_use == 0) { - innobase_register_stmt(ht, thd); - } - - if (trx->isolation_level == TRX_ISO_SERIALIZABLE - && prebuilt->select_lock_type == LOCK_NONE - && thd_test_options(thd, - OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - /* To get serializable execution, we let InnoDB - conceptually add 'LOCK IN SHARE MODE' to all SELECTs - which otherwise would have been consistent reads. An - exception is consistent reads in the AUTOCOMMIT=1 mode: - we know that they are read-only transactions, and they - can be serialized also if performed as consistent - reads. */ - - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } - - /* Starting from 4.1.9, no InnoDB table lock is taken in LOCK - TABLES if AUTOCOMMIT=1. It does not make much sense to acquire - an InnoDB table lock if it is released immediately at the end - of LOCK TABLES, and InnoDB's table locks in that case cause - VERY easily deadlocks. - - We do not set InnoDB table locks if user has not explicitly - requested a table lock. Note that thd_in_lock_tables(thd) - can hold in some cases, e.g., at the start of a stored - procedure call (SQLCOM_CALL). */ - - if (prebuilt->select_lock_type != LOCK_NONE) { - - if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES - && THDVAR(thd, table_locks) - && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT) - && thd_in_lock_tables(thd)) { - - ulint error = row_lock_table_for_mysql( - prebuilt, NULL, 0); - - if (error != DB_SUCCESS) { - error = convert_error_code_to_mysql( - (int) error, thd); - DBUG_RETURN((int) error); - } - } - - trx->mysql_n_tables_locked++; - } - - trx->n_mysql_tables_in_use++; - prebuilt->mysql_has_locked = TRUE; - - DBUG_RETURN(0); - } - - /* MySQL is releasing a table lock */ - - trx->n_mysql_tables_in_use--; - prebuilt->mysql_has_locked = FALSE; - - /* Release a possible FIFO ticket and search latch. Since we - may reserve the kernel mutex, we have to release the search - system latch first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - /* If the MySQL lock count drops to zero we know that the current SQL - statement has ended */ - - if (trx->n_mysql_tables_in_use == 0) { - - trx->mysql_n_tables_locked = 0; - prebuilt->used_in_HANDLER = FALSE; - - if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - if (trx->active_trans != 0) { - innobase_commit(ht, thd, TRUE); - } - } else { - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && trx->global_read_view) { - - /* At low transaction isolation levels we let - each consistent read set its own snapshot */ - - read_view_close_for_mysql(trx); - } - } - } - - DBUG_RETURN(0); -} - -/********************************************************************** -With this function MySQL request a transactional lock to a table when -user issued query LOCK TABLES..WHERE ENGINE = InnoDB. */ - -int -ha_innobase::transactional_table_lock( -/*==================================*/ - /* out: error code */ - THD* thd, /* in: handle to the user thread */ - int lock_type) /* in: lock type */ -{ - trx_t* trx; - - DBUG_ENTER("ha_innobase::transactional_table_lock"); - DBUG_PRINT("enter",("lock_type: %d", lock_type)); - - /* We do not know if MySQL can call this function before calling - external_lock(). To be safe, update the thd of the current table - handle. */ - - update_thd(thd); - - if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir?" - "InnoDB: See" - " http://dev.mysql.com/doc/refman/5.1/en/innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - prebuilt->table->name); - DBUG_RETURN(HA_ERR_CRASHED); - } - - trx = prebuilt->trx; - - prebuilt->sql_stat_start = TRUE; - prebuilt->hint_need_to_fetch_extra_cols = 0; - - reset_template(prebuilt); - - if (lock_type == F_WRLCK) { - prebuilt->select_lock_type = LOCK_X; - prebuilt->stored_select_lock_type = LOCK_X; - } else if (lock_type == F_RDLCK) { - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } else { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB error:\n" -"MySQL is trying to set transactional table lock with corrupted lock type\n" -"to table %s, lock type %d does not exist.\n", - prebuilt->table->name, lock_type); - DBUG_RETURN(HA_ERR_CRASHED); - } - - /* MySQL is setting a new transactional table lock */ - - /* Set the MySQL flag to mark that there is an active transaction */ - if (trx->active_trans == 0) { - - innobase_register_trx_and_stmt(ht, thd); - trx->active_trans = 1; - } - - if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) { - ulint error = DB_SUCCESS; - - error = row_lock_table_for_mysql(prebuilt, NULL, 0); - - if (error != DB_SUCCESS) { - error = convert_error_code_to_mysql((int) error, thd); - DBUG_RETURN((int) error); - } - - if (thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) { - - /* Store the current undo_no of the transaction - so that we know where to roll back if we have - to roll back the next SQL statement */ - - trx_mark_sql_stat_end(trx); - } - } - - DBUG_RETURN(0); -} - -/**************************************************************************** -Here we export InnoDB status variables to MySQL. */ -static -int -innodb_export_status() -/*==================*/ -{ - if (innodb_inited) { - srv_export_innodb_status(); - } - - return 0; -} - -/**************************************************************************** -Implements the SHOW INNODB STATUS command. Sends the output of the InnoDB -Monitor to the client. */ -static -bool -innodb_show_status( -/*===============*/ - handlerton* hton, /* in: the innodb handlerton */ - THD* thd, /* in: the MySQL query thread of the caller */ - stat_print_fn *stat_print) -{ - trx_t* trx; - static const char truncated_msg[] = "... truncated...\n"; - const long MAX_STATUS_SIZE = 64000; - ulint trx_list_start = ULINT_UNDEFINED; - ulint trx_list_end = ULINT_UNDEFINED; - - DBUG_ENTER("innodb_show_status"); - - trx = check_trx_exists(thd); - - innobase_release_stat_resources(trx); - - /* We let the InnoDB Monitor to output at most MAX_STATUS_SIZE - bytes of text. */ - - long flen, usable_len; - char* str; - - mutex_enter_noninline(&srv_monitor_file_mutex); - rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file); - flen = ftell(srv_monitor_file); - os_file_set_eof(srv_monitor_file); - - if (flen < 0) { - flen = 0; - } - - if (flen > MAX_STATUS_SIZE) { - usable_len = MAX_STATUS_SIZE; - } else { - usable_len = flen; - } - - /* allocate buffer for the string, and - read the contents of the temporary file */ - - if (!(str = (char*) my_malloc(usable_len + 1, MYF(0)))) { - mutex_exit_noninline(&srv_monitor_file_mutex); - DBUG_RETURN(TRUE); - } - - rewind(srv_monitor_file); - if (flen < MAX_STATUS_SIZE) { - /* Display the entire output. */ - flen = (long) fread(str, 1, flen, srv_monitor_file); - } else if (trx_list_end < (ulint) flen - && trx_list_start < trx_list_end - && trx_list_start + (flen - trx_list_end) - < MAX_STATUS_SIZE - sizeof truncated_msg - 1) { - /* Omit the beginning of the list of active transactions. */ - long len = (long) fread(str, 1, trx_list_start, srv_monitor_file); - memcpy(str + len, truncated_msg, sizeof truncated_msg - 1); - len += sizeof truncated_msg - 1; - usable_len = (MAX_STATUS_SIZE - 1) - len; - fseek(srv_monitor_file, flen - usable_len, SEEK_SET); - len += (long) fread(str + len, 1, usable_len, srv_monitor_file); - flen = len; - } else { - /* Omit the end of the output. */ - flen = (long) fread(str, 1, MAX_STATUS_SIZE - 1, srv_monitor_file); - } - - mutex_exit_noninline(&srv_monitor_file_mutex); - - bool result = FALSE; - - if (stat_print(thd, innobase_hton_name, (uint) strlen(innobase_hton_name), - STRING_WITH_LEN(""), str, flen)) { - result= TRUE; - } - my_free(str, MYF(0)); - - DBUG_RETURN(FALSE); -} - -/**************************************************************************** -Implements the SHOW MUTEX STATUS command. . */ -static -bool -innodb_mutex_show_status( -/*=====================*/ - handlerton* hton, /* in: the innodb handlerton */ - THD* thd, /* in: the MySQL query thread of the - caller */ - stat_print_fn* stat_print) -{ - char buf1[IO_SIZE], buf2[IO_SIZE]; - mutex_t* mutex; - rw_lock_t* lock; -#ifdef UNIV_DEBUG - ulint rw_lock_count= 0; - ulint rw_lock_count_spin_loop= 0; - ulint rw_lock_count_spin_rounds= 0; - ulint rw_lock_count_os_wait= 0; - ulint rw_lock_count_os_yield= 0; - ulonglong rw_lock_wait_time= 0; -#endif /* UNIV_DEBUG */ - uint hton_name_len= (uint) strlen(innobase_hton_name), buf1len, buf2len; - DBUG_ENTER("innodb_mutex_show_status"); - - mutex_enter_noninline(&mutex_list_mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex != NULL) { -#ifdef UNIV_DEBUG - if (mutex->mutex_type != 1) { - if (mutex->count_using > 0) { - buf1len= my_snprintf(buf1, sizeof(buf1), - "%s:%s", - mutex->cmutex_name, mutex->cfile_name); - buf2len= my_snprintf(buf2, sizeof(buf2), - "count=%lu, spin_waits=%lu," - " spin_rounds=%lu, " - "os_waits=%lu, os_yields=%lu," - " os_wait_times=%lu", - mutex->count_using, - mutex->count_spin_loop, - mutex->count_spin_rounds, - mutex->count_os_wait, - mutex->count_os_yield, - (ulong) (mutex->lspent_time/1000)); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit_noninline( - &mutex_list_mutex); - DBUG_RETURN(1); - } - } - } - else { - rw_lock_count += mutex->count_using; - rw_lock_count_spin_loop += mutex->count_spin_loop; - rw_lock_count_spin_rounds += mutex->count_spin_rounds; - rw_lock_count_os_wait += mutex->count_os_wait; - rw_lock_count_os_yield += mutex->count_os_yield; - rw_lock_wait_time += mutex->lspent_time; - } -#else /* UNIV_DEBUG */ - buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu", - mutex->cfile_name, (ulong) mutex->cline); - buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu", - mutex->count_os_wait); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit_noninline(&mutex_list_mutex); - DBUG_RETURN(1); - } -#endif /* UNIV_DEBUG */ - - mutex = UT_LIST_GET_NEXT(list, mutex); - } - - mutex_exit_noninline(&mutex_list_mutex); - - mutex_enter_noninline(&rw_lock_list_mutex); - - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) - { - if (lock->count_os_wait) - { - buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu", - lock->cfile_name, (ulong) lock->cline); - buf2len= my_snprintf(buf2, sizeof(buf2), - "os_waits=%lu", lock->count_os_wait); - - if (stat_print(thd, innobase_hton_name, - hton_name_len, buf1, buf1len, - buf2, buf2len)) { - mutex_exit_noninline(&rw_lock_list_mutex); - DBUG_RETURN(1); - } - } - lock = UT_LIST_GET_NEXT(list, lock); - } - - mutex_exit_noninline(&rw_lock_list_mutex); - -#ifdef UNIV_DEBUG - buf2len= my_snprintf(buf2, sizeof(buf2), - "count=%lu, spin_waits=%lu, spin_rounds=%lu, " - "os_waits=%lu, os_yields=%lu, os_wait_times=%lu", - rw_lock_count, rw_lock_count_spin_loop, - rw_lock_count_spin_rounds, - rw_lock_count_os_wait, rw_lock_count_os_yield, - (ulong) (rw_lock_wait_time/1000)); - - if (stat_print(thd, innobase_hton_name, hton_name_len, - STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) { - DBUG_RETURN(1); - } -#endif /* UNIV_DEBUG */ - - DBUG_RETURN(FALSE); -} - -static -bool innobase_show_status(handlerton *hton, THD* thd, - stat_print_fn* stat_print, - enum ha_stat_type stat_type) -{ - switch (stat_type) { - case HA_ENGINE_STATUS: - return innodb_show_status(hton, thd, stat_print); - case HA_ENGINE_MUTEX: - return innodb_mutex_show_status(hton, thd, stat_print); - default: - return FALSE; - } -} - rw_lock_t* lock; - - -/**************************************************************************** - Handling the shared INNOBASE_SHARE structure that is needed to provide table - locking. -****************************************************************************/ - -static uchar* innobase_get_key(INNOBASE_SHARE* share, size_t *length, - my_bool not_used __attribute__((unused))) -{ - *length=share->table_name_length; - - return (uchar*) share->table_name; -} - -static INNOBASE_SHARE* get_share(const char* table_name) -{ - INNOBASE_SHARE *share; - pthread_mutex_lock(&innobase_share_mutex); - uint length=(uint) strlen(table_name); - - if (!(share=(INNOBASE_SHARE*) hash_search(&innobase_open_tables, - (uchar*) table_name, - length))) { - - share = (INNOBASE_SHARE *) my_malloc(sizeof(*share)+length+1, - MYF(MY_FAE | MY_ZEROFILL)); - - share->table_name_length=length; - share->table_name=(char*) (share+1); - strmov(share->table_name,table_name); - - if (my_hash_insert(&innobase_open_tables, - (uchar*) share)) { - pthread_mutex_unlock(&innobase_share_mutex); - my_free(share,0); - - return 0; - } - - thr_lock_init(&share->lock); - pthread_mutex_init(&share->mutex,MY_MUTEX_INIT_FAST); - } - - share->use_count++; - pthread_mutex_unlock(&innobase_share_mutex); - - return share; -} - -static void free_share(INNOBASE_SHARE* share) -{ - pthread_mutex_lock(&innobase_share_mutex); - - if (!--share->use_count) { - hash_delete(&innobase_open_tables, (uchar*) share); - thr_lock_delete(&share->lock); - pthread_mutex_destroy(&share->mutex); - my_free(share, MYF(0)); - } - - pthread_mutex_unlock(&innobase_share_mutex); -} - -/********************************************************************* -Converts a MySQL table lock stored in the 'lock' field of the handle to -a proper type before storing pointer to the lock into an array of pointers. -MySQL also calls this if it wants to reset some table locks to a not-locked -state during the processing of an SQL query. An example is that during a -SELECT the read lock is released early on the 'const' tables where we only -fetch one row. MySQL does not call this when it releases all locks at the -end of an SQL statement. */ - -THR_LOCK_DATA** -ha_innobase::store_lock( -/*====================*/ - /* out: pointer to the next - element in the 'to' array */ - THD* thd, /* in: user thread handle */ - THR_LOCK_DATA** to, /* in: pointer to an array - of pointers to lock structs; - pointer to the 'lock' field - of current handle is stored - next to this array */ - enum thr_lock_type lock_type) /* in: lock type to store in - 'lock'; this may also be - TL_IGNORE */ -{ - trx_t* trx; - - /* Note that trx in this function is NOT necessarily prebuilt->trx - because we call update_thd() later, in ::external_lock()! Failure to - understand this caused a serious memory corruption bug in 5.1.11. */ - - trx = check_trx_exists(thd); - - /* NOTE: MySQL can call this function with lock 'type' TL_IGNORE! - Be careful to ignore TL_IGNORE if we are going to do something with - only 'real' locks! */ - - /* If no MySQL table is in use, we need to set the isolation level - of the transaction. */ - - if (lock_type != TL_IGNORE - && trx->n_mysql_tables_in_use == 0) { - trx->isolation_level = innobase_map_isolation_level( - (enum_tx_isolation) thd_tx_isolation(thd)); - - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && trx->global_read_view) { - - /* At low transaction isolation levels we let - each consistent read set its own snapshot */ - - read_view_close_for_mysql(trx); - } - } - - DBUG_ASSERT(thd == current_thd); - const bool in_lock_tables = thd_in_lock_tables(thd); - const uint sql_command = thd_sql_command(thd); - - if (sql_command == SQLCOM_DROP_TABLE) { - - /* MySQL calls this function in DROP TABLE though this table - handle may belong to another thd that is running a query. Let - us in that case skip any changes to the prebuilt struct. */ - - } else if ((lock_type == TL_READ && in_lock_tables) - || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) - || lock_type == TL_READ_WITH_SHARED_LOCKS - || lock_type == TL_READ_NO_INSERT - || (lock_type != TL_IGNORE - && sql_command != SQLCOM_SELECT)) { - - /* The OR cases above are in this order: - 1) MySQL is doing LOCK TABLES ... READ LOCAL, or we - are processing a stored procedure or function, or - 2) (we do not know when TL_READ_HIGH_PRIORITY is used), or - 3) this is a SELECT ... IN SHARE MODE, or - 4) we are doing a complex SQL statement like - INSERT INTO ... SELECT ... and the logical logging (MySQL - binlog) requires the use of a locking read, or - MySQL is doing LOCK TABLES ... READ. - 5) we let InnoDB do locking reads for all SQL statements that - are not simple SELECTs; note that select_lock_type in this - case may get strengthened in ::external_lock() to LOCK_X. - Note that we MUST use a locking read in all data modifying - SQL statements, because otherwise the execution would not be - serializable, and also the results from the update could be - unexpected if an obsolete consistent read view would be - used. */ - - ulint isolation_level; - - isolation_level = trx->isolation_level; - - if ((srv_locks_unsafe_for_binlog - || isolation_level == TRX_ISO_READ_COMMITTED) - && isolation_level != TRX_ISO_SERIALIZABLE - && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT) - && (sql_command == SQLCOM_INSERT_SELECT - || sql_command == SQLCOM_UPDATE - || sql_command == SQLCOM_CREATE_TABLE)) { - - /* If we either have innobase_locks_unsafe_for_binlog - option set or this session is using READ COMMITTED - isolation level and isolation level of the transaction - is not set to serializable and MySQL is doing - INSERT INTO...SELECT or UPDATE ... = (SELECT ...) or - CREATE ... SELECT... without FOR UPDATE or - IN SHARE MODE in select, then we use consistent - read for select. */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } else if (sql_command == SQLCOM_CHECKSUM) { - /* Use consistent read for checksum table */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } else { - prebuilt->select_lock_type = LOCK_S; - prebuilt->stored_select_lock_type = LOCK_S; - } - - } else if (lock_type != TL_IGNORE) { - - /* We set possible LOCK_X value in external_lock, not yet - here even if this would be SELECT ... FOR UPDATE */ - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = LOCK_NONE; - } - - if (lock_type != TL_IGNORE && lock.type == TL_UNLOCK) { - - /* Starting from 5.0.7, we weaken also the table locks - set at the start of a MySQL stored procedure call, just like - we weaken the locks set at the start of an SQL statement. - MySQL does set in_lock_tables TRUE there, but in reality - we do not need table locks to make the execution of a - single transaction stored procedure call deterministic - (if it does not use a consistent read). */ - - if (lock_type == TL_READ - && sql_command == SQLCOM_LOCK_TABLES) { - /* We come here if MySQL is processing LOCK TABLES - ... READ LOCAL. MyISAM under that table lock type - reads the table as it was at the time the lock was - granted (new inserts are allowed, but not seen by the - reader). To get a similar effect on an InnoDB table, - we must use LOCK TABLES ... READ. We convert the lock - type here, so that for InnoDB, READ LOCAL is - equivalent to READ. This will change the InnoDB - behavior in mysqldump, so that dumps of InnoDB tables - are consistent with dumps of MyISAM tables. */ - - lock_type = TL_READ_NO_INSERT; - } - - /* If we are not doing a LOCK TABLE, DISCARD/IMPORT - TABLESPACE or TRUNCATE TABLE then allow multiple - writers. Note that ALTER TABLE uses a TL_WRITE_ALLOW_READ - < TL_WRITE_CONCURRENT_INSERT. - - We especially allow multiple writers if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) or a - stored function call (MySQL does have in_lock_tables - TRUE there). */ - - if ((lock_type >= TL_WRITE_CONCURRENT_INSERT - && lock_type <= TL_WRITE) - && !(in_lock_tables - && sql_command == SQLCOM_LOCK_TABLES) - && !thd_tablespace_op(thd) - && sql_command != SQLCOM_TRUNCATE - && sql_command != SQLCOM_OPTIMIZE - && sql_command != SQLCOM_CREATE_TABLE) { - - lock_type = TL_WRITE_ALLOW_WRITE; - } - - /* In queries of type INSERT INTO t1 SELECT ... FROM t2 ... - MySQL would use the lock TL_READ_NO_INSERT on t2, and that - would conflict with TL_WRITE_ALLOW_WRITE, blocking all inserts - to t2. Convert the lock to a normal read lock to allow - concurrent inserts to t2. - - We especially allow concurrent inserts if MySQL is at the - start of a stored procedure call (SQLCOM_CALL) - (MySQL does have thd_in_lock_tables() TRUE there). */ - - if (lock_type == TL_READ_NO_INSERT - && sql_command != SQLCOM_LOCK_TABLES) { - - lock_type = TL_READ; - } - - lock.type = lock_type; - } - - *to++= &lock; - - return(to); -} - -/******************************************************************************* -Read the next autoinc value. Acquire the relevant locks before reading -the AUTOINC value. If SUCCESS then the table AUTOINC mutex will be locked -on return and all relevant locks acquired. */ - -ulong -ha_innobase::innobase_get_autoinc( -/*==============================*/ - /* out: DB_SUCCESS or error code */ - ulonglong* value) /* out: autoinc value */ -{ - *value = 0; - - prebuilt->autoinc_error = innobase_lock_autoinc(); - - if (prebuilt->autoinc_error == DB_SUCCESS) { - - /* Determine the first value of the interval */ - *value = dict_table_autoinc_read(prebuilt->table); - - /* It should have been initialized during open. */ - ut_a(*value != 0); - } - - return(ulong(prebuilt->autoinc_error)); -} - -/*********************************************************************** -This function reads the global auto-inc counter. It doesn't use the -AUTOINC lock even if the lock mode is set to TRADITIONAL. */ - -ulonglong -ha_innobase::innobase_peek_autoinc() -/*================================*/ - /* out: the autoinc value */ -{ - ulonglong auto_inc; - dict_table_t* innodb_table; - - ut_a(prebuilt != NULL); - ut_a(prebuilt->table != NULL); - - innodb_table = prebuilt->table; - - dict_table_autoinc_lock(innodb_table); - - auto_inc = dict_table_autoinc_read(innodb_table); - - ut_a(auto_inc > 0); - - dict_table_autoinc_unlock(innodb_table); - - return(auto_inc); -} - -/******************************************************************************* -This function initializes the auto-inc counter if it has not been -initialized yet. This function does not change the value of the auto-inc -counter if it already has been initialized. Returns the value of the -auto-inc counter in *first_value, and ULONGLONG_MAX in *nb_reserved_values (as -we have a table-level lock). offset, increment, nb_desired_values are ignored. -*first_value is set to -1 if error (deadlock or lock wait timeout) */ - -void -ha_innobase::get_auto_increment( -/*============================*/ - ulonglong offset, /* in: */ - ulonglong increment, /* in: table autoinc increment */ - ulonglong nb_desired_values, /* in: number of values reqd */ - ulonglong *first_value, /* out: the autoinc value */ - ulonglong *nb_reserved_values) /* out: count of reserved values */ -{ - trx_t* trx; - ulint error; - ulonglong autoinc = 0; - - /* Prepare prebuilt->trx in the table handle */ - update_thd(ha_thd()); - - error = innobase_get_autoinc(&autoinc); - - if (error != DB_SUCCESS) { - *first_value = (~(ulonglong) 0); - return; - } - - /* This is a hack, since nb_desired_values seems to be accurate only - for the first call to get_auto_increment() for multi-row INSERT and - meaningless for other statements e.g, LOAD etc. Subsequent calls to - this method for the same statement results in different values which - don't make sense. Therefore we store the value the first time we are - called and count down from that as rows are written (see write_row()). - */ - - trx = prebuilt->trx; - - /* Note: We can't rely on *first_value since some MySQL engines, - in particular the partition engine, don't initialize it to 0 when - invoking this method. So we are not sure if it's guaranteed to - be 0 or not. */ - - /* Called for the first time ? */ - if (trx->n_autoinc_rows == 0) { - - trx->n_autoinc_rows = (ulint) nb_desired_values; - - /* It's possible for nb_desired_values to be 0: - e.g., INSERT INTO T1(C) SELECT C FROM T2; */ - if (nb_desired_values == 0) { - - trx->n_autoinc_rows = 1; - } - - set_if_bigger(*first_value, autoinc); - /* Not in the middle of a mult-row INSERT. */ - } else if (prebuilt->autoinc_last_value == 0) { - set_if_bigger(*first_value, autoinc); - } - - *nb_reserved_values = trx->n_autoinc_rows; - - /* With old style AUTOINC locking we only update the table's - AUTOINC counter after attempting to insert the row. */ - if (innobase_autoinc_lock_mode != AUTOINC_OLD_STYLE_LOCKING) { - ulonglong need; - ulonglong next_value; - ulonglong col_max_value; - - /* We need the upper limit of the col type to check for - whether we update the table autoinc counter or not. */ - col_max_value = innobase_get_int_col_max_value( - table->next_number_field); - - need = *nb_reserved_values * increment; - - /* Compute the last value in the interval */ - next_value = innobase_next_autoinc( - *first_value, need, offset, col_max_value); - - prebuilt->autoinc_last_value = next_value; - - if (prebuilt->autoinc_last_value < *first_value) { - *first_value = (~(ulonglong) 0); - } else { - /* Update the table autoinc variable */ - dict_table_autoinc_update_if_greater( - prebuilt->table, prebuilt->autoinc_last_value); - } - } else { - /* This will force write_row() into attempting an update - of the table's AUTOINC counter. */ - prebuilt->autoinc_last_value = 0; - } - - /* The increment to be used to increase the AUTOINC value, we use - this in write_row() and update_row() to increase the autoinc counter - for columns that are filled by the user. We need the offset and - the increment. */ - prebuilt->autoinc_offset = offset; - prebuilt->autoinc_increment = increment; - - dict_table_autoinc_unlock(prebuilt->table); -} - -/* See comment in handler.h */ -int -ha_innobase::reset_auto_increment( -/*==============================*/ - ulonglong value) /* in: new value for table autoinc */ -{ - DBUG_ENTER("ha_innobase::reset_auto_increment"); - - int error; - - update_thd(ha_thd()); - - error = row_lock_table_autoinc_for_mysql(prebuilt); - - if (error != DB_SUCCESS) { - error = convert_error_code_to_mysql(error, user_thd); - - DBUG_RETURN(error); - } - - /* The next value can never be 0. */ - if (value == 0) { - value = 1; - } - - innobase_reset_autoinc(value); - - DBUG_RETURN(0); -} - -/* See comment in handler.cc */ -bool -ha_innobase::get_error_message(int error, String *buf) -{ - trx_t* trx = check_trx_exists(ha_thd()); - - buf->copy(trx->detailed_error, (uint) strlen(trx->detailed_error), - system_charset_info); - - return FALSE; -} - -/*********************************************************************** -Compares two 'refs'. A 'ref' is the (internal) primary key value of the row. -If there is no explicitly declared non-null unique key or a primary key, then -InnoDB internally uses the row id as the primary key. */ - -int -ha_innobase::cmp_ref( -/*=================*/ - /* out: < 0 if ref1 < ref2, 0 if equal, else - > 0 */ - const uchar* ref1, /* in: an (internal) primary key value in the - MySQL key value format */ - const uchar* ref2) /* in: an (internal) primary key value in the - MySQL key value format */ -{ - enum_field_types mysql_type; - Field* field; - KEY_PART_INFO* key_part; - KEY_PART_INFO* key_part_end; - uint len1; - uint len2; - int result; - - if (prebuilt->clust_index_was_generated) { - /* The 'ref' is an InnoDB row id */ - - return(memcmp(ref1, ref2, DATA_ROW_ID_LEN)); - } - - /* Do a type-aware comparison of primary key fields. PK fields - are always NOT NULL, so no checks for NULL are performed. */ - - key_part = table->key_info[table->s->primary_key].key_part; - - key_part_end = key_part - + table->key_info[table->s->primary_key].key_parts; - - for (; key_part != key_part_end; ++key_part) { - field = key_part->field; - mysql_type = field->type(); - - if (mysql_type == MYSQL_TYPE_TINY_BLOB - || mysql_type == MYSQL_TYPE_MEDIUM_BLOB - || mysql_type == MYSQL_TYPE_BLOB - || mysql_type == MYSQL_TYPE_LONG_BLOB) { - - /* In the MySQL key value format, a column prefix of - a BLOB is preceded by a 2-byte length field */ - - len1 = innobase_read_from_2_little_endian(ref1); - len2 = innobase_read_from_2_little_endian(ref2); - - ref1 += 2; - ref2 += 2; - result = ((Field_blob*)field)->cmp( ref1, len1, - ref2, len2); - } else { - result = field->key_cmp(ref1, ref2); - } - - if (result) { - - return(result); - } - - ref1 += key_part->store_length; - ref2 += key_part->store_length; - } - - return(0); -} - -/*********************************************************************** -Ask InnoDB if a query to a table can be cached. */ - -my_bool -ha_innobase::register_query_cache_table( -/*====================================*/ - /* out: TRUE if query caching - of the table is permitted */ - THD* thd, /* in: user thread handle */ - char* table_key, /* in: concatenation of database name, - the null character '\0', - and the table name */ - uint key_length, /* in: length of the full name, i.e. - len(dbname) + len(tablename) + 1 */ - qc_engine_callback* - call_back, /* out: pointer to function for - checking if query caching - is permitted */ - ulonglong *engine_data) /* in/out: data to call_back */ -{ - *call_back = innobase_query_caching_of_table_permitted; - *engine_data = 0; - return(innobase_query_caching_of_table_permitted(thd, table_key, - key_length, - engine_data)); -} - -char* -ha_innobase::get_mysql_bin_log_name() -{ - return(trx_sys_mysql_bin_log_name); -} - -ulonglong -ha_innobase::get_mysql_bin_log_pos() -{ - /* trx... is ib_longlong, which is a typedef for a 64-bit integer - (__int64 or longlong) so it's ok to cast it to ulonglong. */ - - return(trx_sys_mysql_bin_log_pos); -} - -/********************************************************************** -This function is used to find the storage length in bytes of the first n -characters for prefix indexes using a multibyte character set. The function -finds charset information and returns length of prefix_len characters in the -index field in bytes. - -NOTE: the prototype of this function is copied to data0type.c! If you change -this function, you MUST change also data0type.c! */ -extern "C" -ulint -innobase_get_at_most_n_mbchars( -/*===========================*/ - /* out: number of bytes occupied by the first - n characters */ - ulint charset_id, /* in: character set id */ - ulint prefix_len, /* in: prefix length in bytes of the index - (this has to be divided by mbmaxlen to get the - number of CHARACTERS n in the prefix) */ - ulint data_len, /* in: length of the string in bytes */ - const char* str) /* in: character string */ -{ - ulint char_length; /* character length in bytes */ - ulint n_chars; /* number of characters in prefix */ - CHARSET_INFO* charset; /* charset used in the field */ - - charset = get_charset((uint) charset_id, MYF(MY_WME)); - - ut_ad(charset); - ut_ad(charset->mbmaxlen); - - /* Calculate how many characters at most the prefix index contains */ - - n_chars = prefix_len / charset->mbmaxlen; - - /* If the charset is multi-byte, then we must find the length of the - first at most n chars in the string. If the string contains less - characters than n, then we return the length to the end of the last - character. */ - - if (charset->mbmaxlen > 1) { - /* my_charpos() returns the byte length of the first n_chars - characters, or a value bigger than the length of str, if - there were not enough full characters in str. - - Why does the code below work: - Suppose that we are looking for n UTF-8 characters. - - 1) If the string is long enough, then the prefix contains at - least n complete UTF-8 characters + maybe some extra - characters + an incomplete UTF-8 character. No problem in - this case. The function returns the pointer to the - end of the nth character. - - 2) If the string is not long enough, then the string contains - the complete value of a column, that is, only complete UTF-8 - characters, and we can store in the column prefix index the - whole string. */ - - char_length = my_charpos(charset, str, - str + data_len, (int) n_chars); - if (char_length > data_len) { - char_length = data_len; - } - } else { - if (data_len < prefix_len) { - char_length = data_len; - } else { - char_length = prefix_len; - } - } - - return(char_length); -} - -/*********************************************************************** -This function is used to prepare X/Open XA distributed transaction */ -static -int -innobase_xa_prepare( -/*================*/ - /* out: 0 or error number */ - handlerton *hton, - THD* thd, /* in: handle to the MySQL thread of the user - whose XA transaction should be prepared */ - bool all) /* in: TRUE - commit transaction - FALSE - the current SQL statement ended */ -{ - int error = 0; - trx_t* trx = check_trx_exists(thd); - - if (thd_sql_command(thd) != SQLCOM_XA_PREPARE && - (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) - { - - /* For ibbackup to work the order of transactions in binlog - and InnoDB must be the same. Consider the situation - - thread1> prepare; write to binlog; ... - <context switch> - thread2> prepare; write to binlog; commit - thread1> ... commit - - To ensure this will not happen we're taking the mutex on - prepare, and releasing it on commit. - - Note: only do it for normal commits, done via ha_commit_trans. - If 2pc protocol is executed by external transaction - coordinator, it will be just a regular MySQL client - executing XA PREPARE and XA COMMIT commands. - In this case we cannot know how many minutes or hours - will be between XA PREPARE and XA COMMIT, and we don't want - to block for undefined period of time. - */ - pthread_mutex_lock(&prepare_commit_mutex); - trx->active_trans = 2; - } - - if (!THDVAR(thd, support_xa)) { - - return(0); - } - - thd_get_xid(thd, (MYSQL_XID*) &trx->xid); - - /* Release a possible FIFO ticket and search latch. Since we will - reserve the kernel mutex, we have to release the search system latch - first to obey the latching order. */ - - innobase_release_stat_resources(trx); - - if (trx->active_trans == 0 && trx->conc_state != TRX_NOT_STARTED) { - - sql_print_error("trx->active_trans == 0, but trx->conc_state != " - "TRX_NOT_STARTED"); - } - - if (all - || (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))) { - - /* We were instructed to prepare the whole transaction, or - this is an SQL statement end and autocommit is on */ - - ut_ad(trx->active_trans); - - error = (int) trx_prepare_for_mysql(trx); - } else { - /* We just mark the SQL statement ended and do not do a - transaction prepare */ - - /* If we had reserved the auto-inc lock for some - table in this SQL statement we release it now */ - - row_unlock_table_autoinc_for_mysql(trx); - - /* Store the current undo_no of the transaction so that we - know where to roll back if we have to roll back the next - SQL statement */ - - trx_mark_sql_stat_end(trx); - } - - /* Tell the InnoDB server that there might be work for utility - threads: */ - - srv_active_wake_master_thread(); - - return error; -} - -/*********************************************************************** -This function is used to recover X/Open XA distributed transactions */ -static -int -innobase_xa_recover( -/*================*/ - /* out: number of prepared transactions - stored in xid_list */ - handlerton *hton, - XID* xid_list, /* in/out: prepared transactions */ - uint len) /* in: number of slots in xid_list */ -{ - if (len == 0 || xid_list == NULL) { - - return(0); - } - - return(trx_recover_for_mysql(xid_list, len)); -} - -/*********************************************************************** -This function is used to commit one X/Open XA distributed transaction -which is in the prepared state */ -static -int -innobase_commit_by_xid( -/*===================*/ - /* out: 0 or error number */ - handlerton *hton, - XID* xid) /* in: X/Open XA transaction identification */ -{ - trx_t* trx; - - trx = trx_get_trx_by_xid(xid); - - if (trx) { - innobase_commit_low(trx); - - return(XA_OK); - } else { - return(XAER_NOTA); - } -} - -/*********************************************************************** -This function is used to rollback one X/Open XA distributed transaction -which is in the prepared state */ -static -int -innobase_rollback_by_xid( -/*=====================*/ - /* out: 0 or error number */ - handlerton *hton, - XID *xid) /* in: X/Open XA transaction identification */ -{ - trx_t* trx; - - trx = trx_get_trx_by_xid(xid); - - if (trx) { - return(innobase_rollback_trx(trx)); - } else { - return(XAER_NOTA); - } -} - -/*********************************************************************** -Create a consistent view for a cursor based on current transaction -which is created if the corresponding MySQL thread still lacks one. -This consistent view is then used inside of MySQL when accessing records -using a cursor. */ -static -void* -innobase_create_cursor_view( -/*========================*/ - /* out: pointer to cursor view or NULL */ - handlerton *hton, /* in: innobase hton */ - THD* thd) /* in: user thread handle */ -{ - return(read_cursor_view_create_for_mysql(check_trx_exists(thd))); -} - -/*********************************************************************** -Close the given consistent cursor view of a transaction and restore -global read view to a transaction read view. Transaction is created if the -corresponding MySQL thread still lacks one. */ -static -void -innobase_close_cursor_view( -/*=======================*/ - handlerton *hton, - THD* thd, /* in: user thread handle */ - void* curview)/* in: Consistent read view to be closed */ -{ - read_cursor_view_close_for_mysql(check_trx_exists(thd), - (cursor_view_t*) curview); -} - -/*********************************************************************** -Set the given consistent cursor view to a transaction which is created -if the corresponding MySQL thread still lacks one. If the given -consistent cursor view is NULL global read view of a transaction is -restored to a transaction read view. */ -static -void -innobase_set_cursor_view( -/*=====================*/ - handlerton *hton, - THD* thd, /* in: user thread handle */ - void* curview)/* in: Consistent cursor view to be set */ -{ - read_cursor_set_for_mysql(check_trx_exists(thd), - (cursor_view_t*) curview); -} - - -bool ha_innobase::check_if_incompatible_data( - HA_CREATE_INFO* info, - uint table_changes) -{ - if (table_changes != IS_EQUAL_YES) { - - return COMPATIBLE_DATA_NO; - } - - /* Check that auto_increment value was not changed */ - if ((info->used_fields & HA_CREATE_USED_AUTO) && - info->auto_increment_value != 0) { - - return COMPATIBLE_DATA_NO; - } - - /* Check that row format didn't change */ - if ((info->used_fields & HA_CREATE_USED_ROW_FORMAT) && - get_row_type() != info->row_type) { - - return COMPATIBLE_DATA_NO; - } - - return COMPATIBLE_DATA_YES; -} - -static int show_innodb_vars(THD *thd, SHOW_VAR *var, char *buff) -{ - innodb_export_status(); - var->type= SHOW_ARRAY; - var->value= (char *) &innodb_status_variables; - return 0; -} - -static SHOW_VAR innodb_status_variables_export[]= { - {"Innodb", (char*) &show_innodb_vars, SHOW_FUNC}, - {NullS, NullS, SHOW_LONG} -}; - -static struct st_mysql_storage_engine innobase_storage_engine= -{ MYSQL_HANDLERTON_INTERFACE_VERSION }; - -/* plugin options */ -static MYSQL_SYSVAR_BOOL(checksums, innobase_use_checksums, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB checksums validation (enabled by default). " - "Disable with --skip-innodb-checksums.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_STR(data_home_dir, innobase_data_home_dir, - PLUGIN_VAR_READONLY, - "The common part for InnoDB table spaces.", - NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(doublewrite, innobase_use_doublewrite, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB doublewrite buffer (enabled by default). " - "Disable with --skip-innodb-doublewrite.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Flush dirty buffer pages when dirty max pct is not exceeded", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of IOPs the server can do. Tunes the background IO rate", - NULL, NULL, (long)200, (long)100, LONG_MAX, (long)0); - -static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown, - PLUGIN_VAR_OPCMDARG, - "Speeds up the shutdown process of the InnoDB storage engine. Possible " - "values are 0, 1 (faster)" - /* - NetWare can't close unclosed files, can't automatically kill remaining - threads, etc, so on this OS we disable the crash-like InnoDB shutdown. - */ - IF_NETWARE("", " or 2 (fastest - crash-like)") - ".", - NULL, NULL, (unsigned long)1, (unsigned long)0, - (unsigned long)IF_NETWARE(1,2), (unsigned long)0); - -static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Stores each InnoDB table to an .ibd file in the database dir.", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit, - PLUGIN_VAR_OPCMDARG, - "Set to 0 (write and flush once per second)," - " 1 (write and flush at each commit)" - " or 2 (write at commit, flush once per second).", - NULL, NULL, (unsigned long)1, (unsigned long)0, (unsigned long)2, - (unsigned long)0); - -static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "With which method to flush data.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(locks_unsafe_for_binlog, innobase_locks_unsafe_for_binlog, - PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY, - "Force InnoDB to not use next-key locking, to use only row-level locking.", - NULL, NULL, FALSE); - -#ifdef UNIV_LOG_ARCHIVE -static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Where full logs should be archived.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE); -#endif /* UNIV_LOG_ARCHIVE */ - -static MYSQL_SYSVAR_STR(log_group_home_dir, innobase_log_group_home_dir, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to InnoDB log files.", NULL, NULL, NULL); - -static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct, - PLUGIN_VAR_RQCMDARG, - "Percentage of dirty pages allowed in bufferpool.", - NULL, NULL, (unsigned long)75, (unsigned long)0, (unsigned long)99, - (unsigned long)0); - -static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag, - PLUGIN_VAR_RQCMDARG, - "Desired maximum length of the purge queue (0 = no limit)", - NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)~0L, - (unsigned long)0); - -static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Roll back the complete transaction on lock wait timeout, for 4.x compatibility (disabled by default)", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(status_file, innobase_create_status_file, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_NOSYSVAR, - "Enable SHOW INNODB STATUS output in the innodb_status.<pid> file", - NULL, NULL, FALSE); - -static MYSQL_SYSVAR_BOOL(stats_on_metadata, innobase_stats_on_metadata, - PLUGIN_VAR_OPCMDARG, - "Enable statistics gathering for metadata commands such as SHOW TABLE STATUS (on by default)", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(use_legacy_cardinality_algorithm, - srv_use_legacy_cardinality_algorithm, - PLUGIN_VAR_OPCMDARG, - "Use legacy algorithm for picking random pages during index cardinality " - "estimation. Disable this to use a better algorithm, but note that your " - "query plans may change (enabled by default).", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_BOOL(adaptive_hash_index, innobase_adaptive_hash_index, - PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, - "Enable InnoDB adaptive hash index (enabled by default). " - "Disable with --skip-innodb-adaptive-hash-index.", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.", - NULL, NULL, (long)8*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024); - -static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment, - PLUGIN_VAR_RQCMDARG, - "Data file autoextend increment in megabytes", - NULL, NULL, (unsigned long)64L, (unsigned long)1L, (unsigned long)1000L, - (unsigned long)0); - -static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.", - NULL, NULL, (long long)1024*1024*1024L, (long long)64*1024*1024L, - LONGLONG_MAX, (long long)1024*1024L); - -static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency, - PLUGIN_VAR_RQCMDARG, - "Helps in performance tuning in heavily concurrent environments.", - NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000, - (unsigned long)0); - -static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter, - PLUGIN_VAR_RQCMDARG, - "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket", - NULL, NULL, (unsigned long)500L, (unsigned long)1L, (unsigned long)~0L, - (unsigned long)0); - -static MYSQL_SYSVAR_LONG(write_io_threads, innobase_write_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of write I/O threads in InnoDB.", - NULL, NULL, (long)8, (long)1, (long)64, (long)0); - -static MYSQL_SYSVAR_LONG(read_io_threads, innobase_read_io_threads, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of read I/O threads in InnoDB.", - NULL, NULL, (long)8, (long)1, (long)64, (long)0); - -static MYSQL_SYSVAR_LONG(max_merged_io, innobase_max_merged_io, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Max number of adjacent IO requests to merge in InnoDB.", - NULL, NULL, (long)64, (long)1, (long)64, (long)0); - -static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Helps to save your data in case the disk image of the database becomes corrupt.", - NULL, NULL, (long)0, (long)0, (long)6, (long)0); - -static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.", - NULL, NULL, (long)50, (long)1, (long)(1024*1024*1024), (long)0); - -static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The size of the buffer which InnoDB uses to write log to the log files on disk.", - NULL, NULL, (long)16*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024); - -static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Size of each log file in a log group.", - NULL, NULL, (long long)128*1024*1024L, (long long)32*1024*1024L, - LONGLONG_MAX, (long long)1024*1024L); - -static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.", - NULL, NULL, (long)3, (long)2, (long)100, (long)0); - -static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.", - NULL, NULL, (long)1, (long)1, (long)10, (long)0); - -static MYSQL_SYSVAR_LONG(open_files, innobase_open_files, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "How many files at the maximum InnoDB keeps open at the same time.", - NULL, NULL, (long)300L, (long)10L, LONG_MAX, (long)0L); - -static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds, - PLUGIN_VAR_RQCMDARG, - "Count of spin-loop rounds in InnoDB mutexes", - NULL, NULL, (unsigned long)20L, (unsigned long)0L, (unsigned long)~0L, - (unsigned long)0L); - -static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based, - innobase_thread_concurrency_timer_based, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Use InnoDB timer based concurrency throttling. ", - NULL, NULL, TRUE); - -static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency, - PLUGIN_VAR_RQCMDARG, - "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.", - NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000, - (unsigned long)0); - -static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay, - PLUGIN_VAR_RQCMDARG, - "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep", - NULL, NULL, (unsigned long)10000L, (unsigned long)0L, (unsigned long)~0L, - (unsigned long)0); - -static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "Path to individual files and their sizes.", - NULL, NULL, NULL); - -static MYSQL_SYSVAR_LONG(autoinc_lock_mode, innobase_autoinc_lock_mode, - PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY, - "The AUTOINC lock modes supported by InnoDB: " - "0 => Old style AUTOINC locking (for backward" - " compatibility) " - "1 => New style AUTOINC locking " - "2 => No AUTOINC locking (unsafe for SBR)", - NULL, NULL, - AUTOINC_NEW_STYLE_LOCKING, /* Default setting */ - AUTOINC_OLD_STYLE_LOCKING, /* Minimum value */ - AUTOINC_NO_LOCKING, (long)0); /* Maximum value */ - -static struct st_mysql_sys_var* innobase_system_variables[]= { - MYSQL_SYSVAR(additional_mem_pool_size), - MYSQL_SYSVAR(autoextend_increment), - MYSQL_SYSVAR(buffer_pool_size), - MYSQL_SYSVAR(checksums), - MYSQL_SYSVAR(commit_concurrency), - MYSQL_SYSVAR(concurrency_tickets), - MYSQL_SYSVAR(data_file_path), - MYSQL_SYSVAR(data_home_dir), - MYSQL_SYSVAR(doublewrite), - MYSQL_SYSVAR(fast_shutdown), - MYSQL_SYSVAR(read_io_threads), - MYSQL_SYSVAR(write_io_threads), - MYSQL_SYSVAR(max_merged_io), - MYSQL_SYSVAR(thread_concurrency_timer_based), - MYSQL_SYSVAR(file_per_table), - MYSQL_SYSVAR(flush_log_at_trx_commit), - MYSQL_SYSVAR(flush_method), - MYSQL_SYSVAR(force_recovery), - MYSQL_SYSVAR(locks_unsafe_for_binlog), - MYSQL_SYSVAR(lock_wait_timeout), -#ifdef UNIV_LOG_ARCHIVE - MYSQL_SYSVAR(log_arch_dir), - MYSQL_SYSVAR(log_archive), -#endif /* UNIV_LOG_ARCHIVE */ - MYSQL_SYSVAR(log_buffer_size), - MYSQL_SYSVAR(log_file_size), - MYSQL_SYSVAR(log_files_in_group), - MYSQL_SYSVAR(log_group_home_dir), - MYSQL_SYSVAR(max_dirty_pages_pct), - MYSQL_SYSVAR(max_purge_lag), - MYSQL_SYSVAR(mirrored_log_groups), - MYSQL_SYSVAR(open_files), - MYSQL_SYSVAR(rollback_on_timeout), - MYSQL_SYSVAR(stats_on_metadata), - MYSQL_SYSVAR(use_legacy_cardinality_algorithm), - MYSQL_SYSVAR(adaptive_hash_index), - MYSQL_SYSVAR(status_file), - MYSQL_SYSVAR(support_xa), - MYSQL_SYSVAR(sync_spin_loops), - MYSQL_SYSVAR(table_locks), - MYSQL_SYSVAR(thread_concurrency), - MYSQL_SYSVAR(thread_sleep_delay), - MYSQL_SYSVAR(autoinc_lock_mode), - MYSQL_SYSVAR(extra_dirty_writes), - MYSQL_SYSVAR(io_capacity), - NULL -}; - -mysql_declare_plugin(innobase) -{ - MYSQL_STORAGE_ENGINE_PLUGIN, - &innobase_storage_engine, - innobase_hton_name, - "Innobase OY", - "Supports transactions, row-level locking, and foreign keys", - PLUGIN_LICENSE_GPL, - innobase_init, /* Plugin Init */ - NULL, /* Plugin Deinit */ - 0x0100 /* 1.0 */, - innodb_status_variables_export,/* status variables */ - innobase_system_variables, /* system variables */ - NULL /* reserved */ -} -mysql_declare_plugin_end; diff --git a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h deleted file mode 100644 index 8ca72ee1a60..00000000000 --- a/storage/innobase/handler/ha_innodb.h +++ /dev/null @@ -1,255 +0,0 @@ -/* Copyright (C) 2000-2005 MySQL AB && Innobase Oy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ - -/* - This file is based on ha_berkeley.h of MySQL distribution - - This file defines the Innodb handler: the interface between MySQL and - Innodb -*/ - -#ifdef USE_PRAGMA_INTERFACE -#pragma interface /* gcc class implementation */ -#endif - -typedef struct st_innobase_share { - THR_LOCK lock; - pthread_mutex_t mutex; - char *table_name; - uint table_name_length,use_count; -} INNOBASE_SHARE; - - -struct dict_index_struct; -struct row_prebuilt_struct; - -typedef struct dict_index_struct dict_index_t; -typedef struct row_prebuilt_struct row_prebuilt_t; - -/* The class defining a handle to an Innodb table */ -class ha_innobase: public handler -{ - row_prebuilt_t* prebuilt; /* prebuilt struct in InnoDB, used - to save CPU time with prebuilt data - structures*/ - THD* user_thd; /* the thread handle of the user - currently using the handle; this is - set in external_lock function */ - THR_LOCK_DATA lock; - INNOBASE_SHARE *share; - - uchar* upd_buff; /* buffer used in updates */ - uchar* key_val_buff; /* buffer used in converting - search key values from MySQL format - to Innodb format */ - ulong upd_and_key_val_buff_len; - /* the length of each of the previous - two buffers */ - Table_flags int_table_flags; - uint primary_key; - ulong start_of_scan; /* this is set to 1 when we are - starting a table scan but have not - yet fetched any row, else 0 */ - uint last_match_mode;/* match mode of the latest search: - ROW_SEL_EXACT, ROW_SEL_EXACT_PREFIX, - or undefined */ - uint num_write_row; /* number of write_row() calls */ - - uint store_key_val_for_row(uint keynr, char* buff, uint buff_len, - const uchar* record); - int update_thd(THD* thd); - int change_active_index(uint keynr); - int general_fetch(uchar* buf, uint direction, uint match_mode); - ulong innobase_lock_autoinc(); - ulonglong innobase_peek_autoinc(); - ulong innobase_set_max_autoinc(ulonglong auto_inc); - ulong innobase_reset_autoinc(ulonglong auto_inc); - ulong innobase_get_autoinc(ulonglong* value); - ulong innobase_update_autoinc(ulonglong auto_inc); - ulong innobase_initialize_autoinc(); - dict_index_t* innobase_get_index(uint keynr); - ulonglong innobase_get_int_col_max_value(const Field* field); - - /* Init values for the class: */ - public: - ha_innobase(handlerton *hton, TABLE_SHARE *table_arg); - ~ha_innobase() {} - /* - Get the row type from the storage engine. If this method returns - ROW_TYPE_NOT_USED, the information in HA_CREATE_INFO should be used. - */ - enum row_type get_row_type() const; - - const char* table_type() const { return("InnoDB");} - const char *index_type(uint key_number) { return "BTREE"; } - const char** bas_ext() const; - Table_flags table_flags() const; - ulong index_flags(uint idx, uint part, bool all_parts) const - { - return (HA_READ_NEXT | - HA_READ_PREV | - HA_READ_ORDER | - HA_READ_RANGE | - HA_KEYREAD_ONLY); - } - uint max_supported_keys() const { return MAX_KEY; } - /* An InnoDB page must store >= 2 keys; - a secondary key record must also contain the - primary key value: - max key length is therefore set to slightly - less than 1 / 4 of page size which is 16 kB; - but currently MySQL does not work with keys - whose size is > MAX_KEY_LENGTH */ - uint max_supported_key_length() const { return 3500; } - uint max_supported_key_part_length() const; - const key_map *keys_to_use_for_scanning() { return &key_map_full; } - - int open(const char *name, int mode, uint test_if_locked); - int close(void); - double scan_time(); - double read_time(uint index, uint ranges, ha_rows rows); - - int write_row(uchar * buf); - int update_row(const uchar * old_data, uchar * new_data); - int delete_row(const uchar * buf); - bool was_semi_consistent_read(); - void try_semi_consistent_read(bool yes); - void unlock_row(); - - int index_init(uint index, bool sorted); - int index_end(); - int index_read(uchar * buf, const uchar * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_idx(uchar * buf, uint index, const uchar * key, - uint key_len, enum ha_rkey_function find_flag); - int index_read_last(uchar * buf, const uchar * key, uint key_len); - int index_next(uchar * buf); - int index_next_same(uchar * buf, const uchar *key, uint keylen); - int index_prev(uchar * buf); - int index_first(uchar * buf); - int index_last(uchar * buf); - - int rnd_init(bool scan); - int rnd_end(); - int rnd_next(uchar *buf); - int rnd_pos(uchar * buf, uchar *pos); - - void position(const uchar *record); - int info(uint); - int analyze(THD* thd,HA_CHECK_OPT* check_opt); - int optimize(THD* thd,HA_CHECK_OPT* check_opt); - int discard_or_import_tablespace(my_bool discard); - int extra(enum ha_extra_function operation); - int reset(); - int external_lock(THD *thd, int lock_type); - int transactional_table_lock(THD *thd, int lock_type); - int start_stmt(THD *thd, thr_lock_type lock_type); - void position(uchar *record); - ha_rows records_in_range(uint inx, key_range *min_key, key_range - *max_key); - ha_rows estimate_rows_upper_bound(); - - void update_create_info(HA_CREATE_INFO* create_info); - int create(const char *name, register TABLE *form, - HA_CREATE_INFO *create_info); - int delete_all_rows(); - int delete_table(const char *name); - int rename_table(const char* from, const char* to); - int check(THD* thd, HA_CHECK_OPT* check_opt); - char* update_table_comment(const char* comment); - char* get_foreign_key_create_info(); - int get_foreign_key_list(THD *thd, List<FOREIGN_KEY_INFO> *f_key_list); - bool can_switch_engines(); - uint referenced_by_foreign_key(); - void free_foreign_key_create_info(char* str); - THR_LOCK_DATA **store_lock(THD *thd, THR_LOCK_DATA **to, - enum thr_lock_type lock_type); - void init_table_handle_for_HANDLER(); - virtual void get_auto_increment(ulonglong offset, ulonglong increment, - ulonglong nb_desired_values, - ulonglong *first_value, - ulonglong *nb_reserved_values); - int reset_auto_increment(ulonglong value); - - virtual bool get_error_message(int error, String *buf); - - uint8 table_cache_type() { return HA_CACHE_TBL_ASKTRANSACT; } - /* - ask handler about permission to cache table during query registration - */ - my_bool register_query_cache_table(THD *thd, char *table_key, - uint key_length, - qc_engine_callback *call_back, - ulonglong *engine_data); - static char *get_mysql_bin_log_name(); - static ulonglong get_mysql_bin_log_pos(); - bool primary_key_is_clustered() { return true; } - int cmp_ref(const uchar *ref1, const uchar *ref2); - bool check_if_incompatible_data(HA_CREATE_INFO *info, - uint table_changes); -}; - -/* Some accessor functions which the InnoDB plugin needs, but which -can not be added to mysql/plugin.h as part of the public interface; -the definitions are bracketed with #ifdef INNODB_COMPATIBILITY_HOOKS */ - -#ifndef INNODB_COMPATIBILITY_HOOKS -#error InnoDB needs MySQL to be built with #define INNODB_COMPATIBILITY_HOOKS -#endif - -extern "C" { -struct charset_info_st *thd_charset(MYSQL_THD thd); -char **thd_query(MYSQL_THD thd); - -/** Get the file name of the MySQL binlog. - * @return the name of the binlog file - */ -const char* mysql_bin_log_file_name(void); - -/** Get the current position of the MySQL binlog. - * @return byte offset from the beginning of the binlog - */ -ulonglong mysql_bin_log_file_pos(void); - -/** - Check if a user thread is a replication slave thread - @param thd user thread - @retval 0 the user thread is not a replication slave thread - @retval 1 the user thread is a replication slave thread -*/ -int thd_slave_thread(const MYSQL_THD thd); - -/** - Check if a user thread is running a non-transactional update - @param thd user thread - @retval 0 the user thread is not running a non-transactional update - @retval 1 the user thread is running a non-transactional update -*/ -int thd_non_transactional_update(const MYSQL_THD thd); - -/** - Get the user thread's binary logging format - @param thd user thread - @return Value to be used as index into the binlog_format_names array -*/ -int thd_binlog_format(const MYSQL_THD thd); - -/** - Mark transaction to rollback and mark error as fatal to a sub-statement. - @param thd Thread handle - @param all TRUE <=> rollback main transaction. -*/ -void thd_mark_transaction_to_rollback(MYSQL_THD thd, bool all); -} diff --git a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c deleted file mode 100644 index d54a3378993..00000000000 --- a/storage/innobase/ibuf/ibuf0ibuf.c +++ /dev/null @@ -1,3580 +0,0 @@ -/****************************************************** -Insert buffer - -(c) 1997 Innobase Oy - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#include "ibuf0ibuf.h" - -#ifdef UNIV_NONINL -#include "ibuf0ibuf.ic" -#endif - -#include "buf0buf.h" -#include "buf0rea.h" -#include "fsp0fsp.h" -#include "trx0sys.h" -#include "fil0fil.h" -#include "thr0loc.h" -#include "rem0rec.h" -#include "btr0cur.h" -#include "btr0pcur.h" -#include "btr0btr.h" -#include "sync0sync.h" -#include "dict0boot.h" -#include "fut0lst.h" -#include "lock0lock.h" -#include "log0recv.h" -#include "que0que.h" - -/* STRUCTURE OF AN INSERT BUFFER RECORD - -In versions < 4.1.x: - -1. The first field is the page number. -2. The second field is an array which stores type info for each subsequent - field. We store the information which affects the ordering of records, and - also the physical storage size of an SQL NULL value. E.g., for CHAR(10) it - is 10 bytes. -3. Next we have the fields of the actual index record. - -In versions >= 4.1.x: - -Note that contary to what we planned in the 1990's, there will only be one -insert buffer tree, and that is in the system tablespace of InnoDB. - -1. The first field is the space id. -2. The second field is a one-byte marker (0) which differentiates records from - the < 4.1.x storage format. -3. The third field is the page number. -4. The fourth field contains the type info, where we have also added 2 bytes to - store the charset. In the compressed table format of 5.0.x we must add more - information here so that we can build a dummy 'index' struct which 5.0.x - can use in the binary search on the index page in the ibuf merge phase. -5. The rest of the fields contain the fields of the actual index record. - -In versions >= 5.0.3: - -The first byte of the fourth field is an additional marker (0) if the record -is in the compact format. The presence of this marker can be detected by -looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. - -The high-order bit of the character set field in the type info is the -"nullable" flag for the field. */ - - -/* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM - -If an OS thread performs any operation that brings in disk pages from -non-system tablespaces into the buffer pool, or creates such a page there, -then the operation may have as a side effect an insert buffer index tree -compression. Thus, the tree latch of the insert buffer tree may be acquired -in the x-mode, and also the file space latch of the system tablespace may -be acquired in the x-mode. - -Also, an insert to an index in a non-system tablespace can have the same -effect. How do we know this cannot lead to a deadlock of OS threads? There -is a problem with the i\o-handler threads: they break the latching order -because they own x-latches to pages which are on a lower level than the -insert buffer tree latch, its page latches, and the tablespace latch an -insert buffer operation can reserve. - -The solution is the following: Let all the tree and page latches connected -with the insert buffer be later in the latching order than the fsp latch and -fsp page latches. - -Insert buffer pages must be such that the insert buffer is never invoked -when these pages are accessed as this would result in a recursion violating -the latching order. We let a special i/o-handler thread take care of i/o to -the insert buffer pages and the ibuf bitmap pages, as well as the fsp bitmap -pages and the first inode page, which contains the inode of the ibuf tree: let -us call all these ibuf pages. To prevent deadlocks, we do not let a read-ahead -access both non-ibuf and ibuf pages. - -Then an i/o-handler for the insert buffer never needs to access recursively the -insert buffer tree and thus obeys the latching order. On the other hand, other -i/o-handlers for other tablespaces may require access to the insert buffer, -but because all kinds of latches they need to access there are later in the -latching order, no violation of the latching order occurs in this case, -either. - -A problem is how to grow and contract an insert buffer tree. As it is later -in the latching order than the fsp management, we have to reserve the fsp -latch first, before adding or removing pages from the insert buffer tree. -We let the insert buffer tree have its own file space management: a free -list of pages linked to the tree root. To prevent recursive using of the -insert buffer when adding pages to the tree, we must first load these pages -to memory, obtaining a latch on them, and only after that add them to the -free list of the insert buffer tree. More difficult is removing of pages -from the free list. If there is an excess of pages in the free list of the -ibuf tree, they might be needed if some thread reserves the fsp latch, -intending to allocate more file space. So we do the following: if a thread -reserves the fsp latch, we check the writer count field of the latch. If -this field has value 1, it means that the thread did not own the latch -before entering the fsp system, and the mtr of the thread contains no -modifications to the fsp pages. Now we are free to reserve the ibuf latch, -and check if there is an excess of pages in the free list. We can then, in a -separate mini-transaction, take them out of the free list and free them to -the fsp system. - -To avoid deadlocks in the ibuf system, we divide file pages into three levels: - -(1) non-ibuf pages, -(2) ibuf tree pages and the pages in the ibuf tree free list, and -(3) ibuf bitmap pages. - -No OS thread is allowed to access higher level pages if it has latches to -lower level pages; even if the thread owns a B-tree latch it must not access -the B-tree non-leaf pages if it has latches on lower level pages. Read-ahead -is only allowed for level 1 and 2 pages. Dedicated i/o-handler threads handle -exclusively level 1 i/o. A dedicated i/o handler thread handles exclusively -level 2 i/o. However, if an OS thread does the i/o handling for itself, i.e., -it uses synchronous aio, it can access any pages, as long as it obeys the -access order rules. */ - -/* Buffer pool size per the maximum insert buffer size */ -#define IBUF_POOL_SIZE_PER_MAX_SIZE 2 - -/* The insert buffer control structure */ -ibuf_t* ibuf = NULL; - -static ulint ibuf_rnd = 986058871; - -ulint ibuf_flush_count = 0; - -#ifdef UNIV_IBUF_DEBUG -/* Dimensions for the ibuf_count array */ -#define IBUF_COUNT_N_SPACES 500 -#define IBUF_COUNT_N_PAGES 2000 - -/* Buffered entry counts for file pages, used in debugging */ -static ulint ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES]; - -/********************************************************************** -Checks that the indexes to ibuf_counts[][] are within limits. */ -UNIV_INLINE -void -ibuf_count_check( -/*=============*/ - ulint space_id, /* in: space identifier */ - ulint page_no) /* in: page number */ -{ - if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) { - return; - } - - fprintf(stderr, - "InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n" - "InnoDB: and breaks crash recovery.\n" - "InnoDB: space_id=%lu, should be 0<=space_id<%lu\n" - "InnoDB: page_no=%lu, should be 0<=page_no<%lu\n", - (ulint) space_id, (ulint) IBUF_COUNT_N_SPACES, - (ulint) page_no, (ulint) IBUF_COUNT_N_PAGES); - ut_error; -} -#endif - -/* The start address for an insert buffer bitmap page bitmap */ -#define IBUF_BITMAP PAGE_DATA - -/* Offsets in bits for the bits describing a single page in the bitmap */ -#define IBUF_BITMAP_FREE 0 -#define IBUF_BITMAP_BUFFERED 2 -#define IBUF_BITMAP_IBUF 3 /* TRUE if page is a part of the ibuf - tree, excluding the root page, or is - in the free list of the ibuf */ - -/* Number of bits describing a single page */ -#define IBUF_BITS_PER_PAGE 4 -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE must be an even number!" -#endif - -/* The mutex used to block pessimistic inserts to ibuf trees */ -static mutex_t ibuf_pessimistic_insert_mutex; - -/* The mutex protecting the insert buffer structs */ -static mutex_t ibuf_mutex; - -/* The mutex protecting the insert buffer bitmaps */ -static mutex_t ibuf_bitmap_mutex; - -/* The area in pages from which contract looks for page numbers for merge */ -#define IBUF_MERGE_AREA 8 - -/* Inside the merge area, pages which have at most 1 per this number less -buffered entries compared to maximum volume that can buffered for a single -page are merged along with the page whose buffer became full */ -#define IBUF_MERGE_THRESHOLD 4 - -/* In ibuf_contract at most this number of pages is read to memory in one -batch, in order to merge the entries for them in the insert buffer */ -#define IBUF_MAX_N_PAGES_MERGED IBUF_MERGE_AREA - -/* If the combined size of the ibuf trees exceeds ibuf->max_size by this -many pages, we start to contract it in connection to inserts there, using -non-synchronous contract */ -#define IBUF_CONTRACT_ON_INSERT_NON_SYNC 0 - -/* Same as above, but use synchronous contract */ -#define IBUF_CONTRACT_ON_INSERT_SYNC 5 - -/* Same as above, but no insert is done, only contract is called */ -#define IBUF_CONTRACT_DO_NOT_INSERT 10 - -/* TODO: how to cope with drop table if there are records in the insert -buffer for the indexes of the table? Is there actually any problem, -because ibuf merge is done to a page when it is read in, and it is -still physically like the index page even if the index would have been -dropped! So, there seems to be no problem. */ - -/********************************************************************** -Validates the ibuf data structures when the caller owns ibuf_mutex. */ - -ibool -ibuf_validate_low(void); -/*===================*/ - /* out: TRUE if ok */ - -/********************************************************************** -Sets the flag in the current OS thread local storage denoting that it is -inside an insert buffer routine. */ -UNIV_INLINE -void -ibuf_enter(void) -/*============*/ -{ - ibool* ptr; - - ptr = thr_local_get_in_ibuf_field(); - - ut_ad(*ptr == FALSE); - - *ptr = TRUE; -} - -/********************************************************************** -Sets the flag in the current OS thread local storage denoting that it is -exiting an insert buffer routine. */ -UNIV_INLINE -void -ibuf_exit(void) -/*===========*/ -{ - ibool* ptr; - - ptr = thr_local_get_in_ibuf_field(); - - ut_ad(*ptr == TRUE); - - *ptr = FALSE; -} - -/********************************************************************** -Returns TRUE if the current OS thread is performing an insert buffer -routine. */ - -ibool -ibuf_inside(void) -/*=============*/ - /* out: TRUE if inside an insert buffer routine: for instance, - a read-ahead of non-ibuf pages is then forbidden */ -{ - return(*thr_local_get_in_ibuf_field()); -} - -/********************************************************************** -Gets the ibuf header page and x-latches it. */ -static -page_t* -ibuf_header_page_get( -/*=================*/ - /* out: insert buffer header page */ - ulint space, /* in: space id */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - ut_a(space == 0); - - ut_ad(!ibuf_inside()); - - page = buf_page_get(space, FSP_IBUF_HEADER_PAGE_NO, RW_X_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_IBUF_HEADER); -#endif /* UNIV_SYNC_DEBUG */ - - return(page); -} - -/********************************************************************** -Gets the root page and x-latches it. */ -static -page_t* -ibuf_tree_root_get( -/*===============*/ - /* out: insert buffer tree root page */ - ibuf_data_t* data, /* in: ibuf data */ - ulint space, /* in: space id */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - ut_a(space == 0); - ut_ad(ibuf_inside()); - - mtr_x_lock(dict_index_get_lock(data->index), mtr); - - page = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, - mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - - return(page); -} - -#ifdef UNIV_IBUF_DEBUG -/********************************************************************** -Gets the ibuf count for a given page. */ - -ulint -ibuf_count_get( -/*===========*/ - /* out: number of entries in the insert buffer - currently buffered for this page */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - ibuf_count_check(space, page_no); - - return(ibuf_counts[space][page_no]); -} - -/********************************************************************** -Sets the ibuf count for a given page. */ -static -void -ibuf_count_set( -/*===========*/ - ulint space, /* in: space id */ - ulint page_no,/* in: page number */ - ulint val) /* in: value to set */ -{ - ibuf_count_check(space, page_no); - ut_a(val < UNIV_PAGE_SIZE); - - ibuf_counts[space][page_no] = val; -} -#endif - -/********************************************************************** -Creates the insert buffer data structure at a database startup and initializes -the data structures for the insert buffer. */ - -void -ibuf_init_at_db_start(void) -/*=======================*/ -{ - ibuf = mem_alloc(sizeof(ibuf_t)); - - /* Note that also a pessimistic delete can sometimes make a B-tree - grow in size, as the references on the upper levels of the tree can - change */ - - ibuf->max_size = buf_pool_get_curr_size() / UNIV_PAGE_SIZE - / IBUF_POOL_SIZE_PER_MAX_SIZE; - - UT_LIST_INIT(ibuf->data_list); - - ibuf->size = 0; - - mutex_create(&ibuf_pessimistic_insert_mutex, - SYNC_IBUF_PESS_INSERT_MUTEX); - - mutex_create(&ibuf_mutex, SYNC_IBUF_MUTEX); - - mutex_create(&ibuf_bitmap_mutex, SYNC_IBUF_BITMAP_MUTEX); - - fil_ibuf_init_at_db_start(); -} - -/********************************************************************** -Updates the size information in an ibuf data, assuming the segment size has -not changed. */ -static -void -ibuf_data_sizes_update( -/*===================*/ - ibuf_data_t* data, /* in: ibuf data struct */ - page_t* root, /* in: ibuf tree root */ - mtr_t* mtr) /* in: mtr */ -{ - ulint old_size; - - ut_ad(mutex_own(&ibuf_mutex)); - - old_size = data->size; - - data->free_list_len = flst_get_len(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, mtr); - - data->height = 1 + btr_page_get_level(root, mtr); - - data->size = data->seg_size - (1 + data->free_list_len); - /* the '1 +' is the ibuf header page */ - ut_ad(data->size < data->seg_size); - - if (page_get_n_recs(root) == 0) { - - data->empty = TRUE; - } else { - data->empty = FALSE; - } - - ut_ad(ibuf->size + data->size >= old_size); - - ibuf->size = ibuf->size + data->size - old_size; - -#if 0 - fprintf(stderr, "ibuf size %lu, space ibuf size %lu\n", - ibuf->size, data->size); -#endif -} - -/********************************************************************** -Creates the insert buffer data struct for a single tablespace. Reads the -root page of the insert buffer tree in the tablespace. This function can -be called only after the dictionary system has been initialized, as this -creates also the insert buffer table and index into this tablespace. */ - -ibuf_data_t* -ibuf_data_init_for_space( -/*=====================*/ - /* out, own: ibuf data struct, linked to the list - in ibuf control structure */ - ulint space) /* in: space id */ -{ - ibuf_data_t* data; - page_t* root; - page_t* header_page; - mtr_t mtr; - char* buf; - mem_heap_t* heap; - dict_table_t* table; - dict_index_t* index; - ulint n_used; - - ut_a(space == 0); - - data = mem_alloc(sizeof(ibuf_data_t)); - - data->space = space; - - mtr_start(&mtr); - - mutex_enter(&ibuf_mutex); - - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header_page = ibuf_header_page_get(space, &mtr); - - fseg_n_reserved_pages(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - &n_used, &mtr); - ibuf_enter(); - - ut_ad(n_used >= 2); - - data->seg_size = n_used; - - root = buf_page_get(space, FSP_IBUF_TREE_ROOT_PAGE_NO, RW_X_LATCH, - &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(root, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - - data->size = 0; - data->n_inserts = 0; - data->n_merges = 0; - data->n_merged_recs = 0; - - ibuf_data_sizes_update(data, root, &mtr); - /* - if (!data->empty) { - fprintf(stderr, - "InnoDB: index entries found in the insert buffer\n"); - } else { - fprintf(stderr, - "InnoDB: insert buffer empty\n"); - } - */ - mutex_exit(&ibuf_mutex); - - mtr_commit(&mtr); - - ibuf_exit(); - - heap = mem_heap_create(450); - buf = mem_heap_alloc(heap, 50); - - sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space); - /* use old-style record format for the insert buffer */ - table = dict_mem_table_create(buf, space, 2, 0); - - dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_BINARY, 0, 0); - dict_mem_table_add_col(table, heap, "TYPES", DATA_BINARY, 0, 0); - - table->id = ut_dulint_add(DICT_IBUF_ID_MIN, space); - - dict_table_add_to_cache(table, heap); - mem_heap_free(heap); - - index = dict_mem_index_create( - buf, "CLUST_IND", space, - DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, 2); - - dict_mem_index_add_field(index, "PAGE_NO", 0); - dict_mem_index_add_field(index, "TYPES", 0); - - index->id = ut_dulint_add(DICT_IBUF_ID_MIN, space); - - dict_index_add_to_cache(table, index, FSP_IBUF_TREE_ROOT_PAGE_NO); - - data->index = dict_table_get_first_index(table); - - mutex_enter(&ibuf_mutex); - - UT_LIST_ADD_LAST(data_list, ibuf->data_list, data); - - mutex_exit(&ibuf_mutex); - - return(data); -} - -/************************************************************************* -Initializes an ibuf bitmap page. */ - -void -ibuf_bitmap_page_init( -/*==================*/ - page_t* page, /* in: bitmap page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint bit_offset; - ulint byte_offset; - - /* Write all zeros to the bitmap */ - - bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE; - - byte_offset = bit_offset / 8 + 1; - /* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */ - - fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP); - - memset(page + IBUF_BITMAP, 0, byte_offset); - - /* The remaining area (up to the page trailer) is uninitialized. */ - - mlog_write_initial_log_record(page, MLOG_IBUF_BITMAP_INIT, mtr); -} - -/************************************************************************* -Parses a redo log record of an ibuf bitmap page init. */ - -byte* -ibuf_parse_bitmap_init( -/*===================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (page) { - ibuf_bitmap_page_init(page, mtr); - } - - return(ptr); -} - -/************************************************************************ -Gets the desired bits for a given page from a bitmap page. */ -UNIV_INLINE -ulint -ibuf_bitmap_page_get_bits( -/*======================*/ - /* out: value of bits */ - page_t* page, /* in: bitmap page */ - ulint page_no,/* in: page whose bits to get */ - ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ - mtr_t* mtr __attribute__((unused))) /* in: mtr containing an - x-latch to the bitmap - page */ -{ - ulint byte_offset; - ulint bit_offset; - ulint map_byte; - ulint value; - - ut_ad(bit < IBUF_BITS_PER_PAGE); -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE % 2 != 0" -#endif - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); - - bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE - + bit; - - byte_offset = bit_offset / 8; - bit_offset = bit_offset % 8; - - ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); - - map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); - - value = ut_bit_get_nth(map_byte, bit_offset); - - if (bit == IBUF_BITMAP_FREE) { - ut_ad(bit_offset + 1 < 8); - - value = value * 2 + ut_bit_get_nth(map_byte, bit_offset + 1); - } - - return(value); -} - -/************************************************************************ -Sets the desired bit for a given page in a bitmap page. */ -static -void -ibuf_bitmap_page_set_bits( -/*======================*/ - page_t* page, /* in: bitmap page */ - ulint page_no,/* in: page whose bits to set */ - ulint bit, /* in: IBUF_BITMAP_FREE, IBUF_BITMAP_BUFFERED, ... */ - ulint val, /* in: value to set */ - mtr_t* mtr) /* in: mtr containing an x-latch to the bitmap page */ -{ - ulint byte_offset; - ulint bit_offset; - ulint map_byte; - - ut_ad(bit < IBUF_BITS_PER_PAGE); -#if IBUF_BITS_PER_PAGE % 2 -# error "IBUF_BITS_PER_PAGE % 2 != 0" -#endif - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX)); -#ifdef UNIV_IBUF_DEBUG - ut_a((bit != IBUF_BITMAP_BUFFERED) || (val != FALSE) - || (0 == ibuf_count_get(buf_frame_get_space_id(page), - page_no))); -#endif - bit_offset = (page_no % XDES_DESCRIBED_PER_PAGE) * IBUF_BITS_PER_PAGE - + bit; - - byte_offset = bit_offset / 8; - bit_offset = bit_offset % 8; - - ut_ad(byte_offset + IBUF_BITMAP < UNIV_PAGE_SIZE); - - map_byte = mach_read_from_1(page + IBUF_BITMAP + byte_offset); - - if (bit == IBUF_BITMAP_FREE) { - ut_ad(bit_offset + 1 < 8); - ut_ad(val <= 3); - - map_byte = ut_bit_set_nth(map_byte, bit_offset, val / 2); - map_byte = ut_bit_set_nth(map_byte, bit_offset + 1, val % 2); - } else { - ut_ad(val <= 1); - map_byte = ut_bit_set_nth(map_byte, bit_offset, val); - } - - mlog_write_ulint(page + IBUF_BITMAP + byte_offset, map_byte, - MLOG_1BYTE, mtr); -} - -/************************************************************************ -Calculates the bitmap page number for a given page number. */ -UNIV_INLINE -ulint -ibuf_bitmap_page_no_calc( -/*=====================*/ - /* out: the bitmap page number where - the file page is mapped */ - ulint page_no) /* in: tablespace page number */ -{ - return(FSP_IBUF_BITMAP_OFFSET - + XDES_DESCRIBED_PER_PAGE - * (page_no / XDES_DESCRIBED_PER_PAGE)); -} - -/************************************************************************ -Gets the ibuf bitmap page where the bits describing a given file page are -stored. */ -static -page_t* -ibuf_bitmap_get_map_page( -/*=====================*/ - /* out: bitmap page where the file page is mapped, - that is, the bitmap page containing the descriptor - bits for the file page; the bitmap page is - x-latched */ - ulint space, /* in: space id of the file page */ - ulint page_no,/* in: page number of the file page */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - page = buf_page_get(space, ibuf_bitmap_page_no_calc(page_no), - RW_X_LATCH, mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_IBUF_BITMAP); -#endif /* UNIV_SYNC_DEBUG */ - - return(page); -} - -/**************************************************************************** -Sets the free bits of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ -UNIV_INLINE -void -ibuf_set_free_bits_low( -/*===================*/ - ulint type, /* in: index type */ - page_t* page, /* in: index page; free bit is set if the index is - non-clustered and page level is 0 */ - ulint val, /* in: value to set: < 4 */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* bitmap_page; - - if (type & DICT_CLUSTERED) { - - return; - } - - if (btr_page_get_level_low(page) != 0) { - - return; - } - - bitmap_page = ibuf_bitmap_get_map_page( - buf_frame_get_space_id(page), - buf_frame_get_page_no(page), mtr); -#ifdef UNIV_IBUF_DEBUG -# if 0 - fprintf(stderr, - "Setting page no %lu free bits to %lu should be %lu\n", - buf_frame_get_page_no(page), val, - ibuf_index_page_calc_free(page)); -# endif - - ut_a(val <= ibuf_index_page_calc_free(page)); -#endif /* UNIV_IBUF_DEBUG */ - ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page), - IBUF_BITMAP_FREE, val, mtr); - -} - -/**************************************************************************** -Sets the free bit of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ - -void -ibuf_set_free_bits( -/*===============*/ - ulint type, /* in: index type */ - page_t* page, /* in: index page; free bit is set if the index is - non-clustered and page level is 0 */ - ulint val, /* in: value to set: < 4 */ - ulint max_val)/* in: ULINT_UNDEFINED or a maximum value which - the bits must have before setting; this is for - debugging */ -{ - mtr_t mtr; - page_t* bitmap_page; - - if (type & DICT_CLUSTERED) { - - return; - } - - if (btr_page_get_level_low(page) != 0) { - - return; - } - - mtr_start(&mtr); - - bitmap_page = ibuf_bitmap_get_map_page( - buf_frame_get_space_id(page), buf_frame_get_page_no(page), - &mtr); - - if (max_val != ULINT_UNDEFINED) { -#ifdef UNIV_IBUF_DEBUG - ulint old_val; - - old_val = ibuf_bitmap_page_get_bits( - bitmap_page, buf_frame_get_page_no(page), - IBUF_BITMAP_FREE, &mtr); -# if 0 - if (old_val != max_val) { - fprintf(stderr, - "Ibuf: page %lu old val %lu max val %lu\n", - buf_frame_get_page_no(page), - old_val, max_val); - } -# endif - - ut_a(old_val <= max_val); -#endif - } -#ifdef UNIV_IBUF_DEBUG -# if 0 - fprintf(stderr, "Setting page no %lu free bits to %lu should be %lu\n", - buf_frame_get_page_no(page), val, - ibuf_index_page_calc_free(page)); -# endif - - ut_a(val <= ibuf_index_page_calc_free(page)); -#endif - ibuf_bitmap_page_set_bits(bitmap_page, buf_frame_get_page_no(page), - IBUF_BITMAP_FREE, val, &mtr); - mtr_commit(&mtr); -} - -/**************************************************************************** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict further -work to only ibuf bitmap operations, which would result if the latch to the -bitmap page were kept. */ - -void -ibuf_reset_free_bits_with_type( -/*===========================*/ - ulint type, /* in: index type */ - page_t* page) /* in: index page; free bits are set to 0 if the index - is non-clustered and non-unique and the page level is - 0 */ -{ - ibuf_set_free_bits(type, page, 0, ULINT_UNDEFINED); -} - -/**************************************************************************** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict further -work to solely ibuf bitmap operations, which would result if the latch to -the bitmap page were kept. */ - -void -ibuf_reset_free_bits( -/*=================*/ - dict_index_t* index, /* in: index */ - page_t* page) /* in: index page; free bits are set to 0 if - the index is non-clustered and non-unique and - the page level is 0 */ -{ - ibuf_set_free_bits(index->type, page, 0, ULINT_UNDEFINED); -} - -/************************************************************************** -Updates the free bits for a page to reflect the present state. Does this -in the mtr given, which means that the latching order rules virtually prevent -any further operations for this OS thread until mtr is committed. */ - -void -ibuf_update_free_bits_low( -/*======================*/ - dict_index_t* index, /* in: index */ - page_t* page, /* in: index page */ - ulint max_ins_size, /* in: value of maximum insert size - with reorganize before the latest - operation performed to the page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint before; - ulint after; - - before = ibuf_index_page_calc_free_bits(max_ins_size); - - after = ibuf_index_page_calc_free(page); - - if (before != after) { - ibuf_set_free_bits_low(index->type, page, after, mtr); - } -} - -/************************************************************************** -Updates the free bits for the two pages to reflect the present state. Does -this in the mtr given, which means that the latching order rules virtually -prevent any further operations until mtr is committed. */ - -void -ibuf_update_free_bits_for_two_pages_low( -/*====================================*/ - dict_index_t* index, /* in: index */ - page_t* page1, /* in: index page */ - page_t* page2, /* in: index page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint state; - - /* As we have to x-latch two random bitmap pages, we have to acquire - the bitmap mutex to prevent a deadlock with a similar operation - performed by another OS thread. */ - - mutex_enter(&ibuf_bitmap_mutex); - - state = ibuf_index_page_calc_free(page1); - - ibuf_set_free_bits_low(index->type, page1, state, mtr); - - state = ibuf_index_page_calc_free(page2); - - ibuf_set_free_bits_low(index->type, page2, state, mtr); - - mutex_exit(&ibuf_bitmap_mutex); -} - -/************************************************************************** -Returns TRUE if the page is one of the fixed address ibuf pages. */ -UNIV_INLINE -ibool -ibuf_fixed_addr_page( -/*=================*/ - /* out: TRUE if a fixed address ibuf i/o page */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - return((space == 0 && page_no == IBUF_TREE_ROOT_PAGE_NO) - || ibuf_bitmap_page(page_no)); -} - -/*************************************************************************** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ - -ibool -ibuf_page( -/*======*/ - /* out: TRUE if level 2 or level 3 page */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - page_t* bitmap_page; - mtr_t mtr; - ibool ret; - - if (recv_no_ibuf_operations) { - /* Recovery is running: no ibuf operations should be - performed */ - - return(FALSE); - } - - if (ibuf_fixed_addr_page(space, page_no)) { - - return(TRUE); - } - - if (space != 0) { - /* Currently we only have an ibuf tree in space 0 */ - - return(FALSE); - } - - ut_ad(fil_space_get_type(space) == FIL_TABLESPACE); - - mtr_start(&mtr); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); - - ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, - &mtr); - mtr_commit(&mtr); - - return(ret); -} - -/*************************************************************************** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ - -ibool -ibuf_page_low( -/*==========*/ - /* out: TRUE if level 2 or level 3 page */ - ulint space, /* in: space id */ - ulint page_no,/* in: page number */ - mtr_t* mtr) /* in: mtr which will contain an x-latch to the - bitmap page if the page is not one of the fixed - address ibuf pages */ -{ - page_t* bitmap_page; - ibool ret; - - if (ibuf_fixed_addr_page(space, page_no)) { - - return(TRUE); - } - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, mtr); - - ret = ibuf_bitmap_page_get_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, - mtr); - return(ret); -} - -/************************************************************************ -Returns the page number field of an ibuf record. */ -static -ulint -ibuf_rec_get_page_no( -/*=================*/ - /* out: page number */ - rec_t* rec) /* in: ibuf record */ -{ - byte* field; - ulint len; - - ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields_old(rec) > 2); - - field = rec_get_nth_field_old(rec, 1, &len); - - if (len == 1) { - /* This is of the >= 4.1.x record format */ - ut_a(trx_sys_multiple_tablespace_format); - - field = rec_get_nth_field_old(rec, 2, &len); - } else { - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - field = rec_get_nth_field_old(rec, 0, &len); - } - - ut_a(len == 4); - - return(mach_read_from_4(field)); -} - -/************************************************************************ -Returns the space id field of an ibuf record. For < 4.1.x format records -returns 0. */ -static -ulint -ibuf_rec_get_space( -/*===============*/ - /* out: space id */ - rec_t* rec) /* in: ibuf record */ -{ - byte* field; - ulint len; - - ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields_old(rec) > 2); - - field = rec_get_nth_field_old(rec, 1, &len); - - if (len == 1) { - /* This is of the >= 4.1.x record format */ - - ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field_old(rec, 0, &len); - ut_a(len == 4); - - return(mach_read_from_4(field)); - } - - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - return(0); -} - -/************************************************************************ -Creates a dummy index for inserting a record to a non-clustered index. -*/ -static -dict_index_t* -ibuf_dummy_index_create( -/*====================*/ - /* out: dummy index */ - ulint n, /* in: number of fields */ - ibool comp) /* in: TRUE=use compact record format */ -{ - dict_table_t* table; - dict_index_t* index; - - table = dict_mem_table_create("IBUF_DUMMY", - DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0); - - index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", - DICT_HDR_SPACE, 0, n); - - index->table = table; - - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - index->cached = TRUE; - - return(index); -} -/************************************************************************ -Add a column to the dummy index */ -static -void -ibuf_dummy_index_add_col( -/*=====================*/ - dict_index_t* index, /* in: dummy index */ - dtype_t* type, /* in: the data type of the column */ - ulint len) /* in: length of the column */ -{ - ulint i = index->table->n_def; - dict_mem_table_add_col(index->table, NULL, NULL, - dtype_get_mtype(type), - dtype_get_prtype(type), - dtype_get_len(type)); - dict_index_add_col(index, index->table, (dict_col_t*) - dict_table_get_nth_col(index->table, i), len); -} -/************************************************************************ -Deallocates a dummy index for inserting a record to a non-clustered index. -*/ -static -void -ibuf_dummy_index_free( -/*==================*/ - dict_index_t* index) /* in: dummy index */ -{ - dict_table_t* table = index->table; - - dict_mem_index_free(index); - dict_mem_table_free(table); -} - -/************************************************************************* -Builds the entry to insert into a non-clustered index when we have the -corresponding record in an ibuf index. */ -static -dtuple_t* -ibuf_build_entry_from_ibuf_rec( -/*===========================*/ - /* out, own: entry to insert to - a non-clustered index; NOTE that - as we copy pointers to fields in - ibuf_rec, the caller must hold a - latch to the ibuf_rec page as long - as the entry is used! */ - rec_t* ibuf_rec, /* in: record in an insert buffer */ - mem_heap_t* heap, /* in: heap where built */ - dict_index_t** pindex) /* out, own: dummy index that - describes the entry */ -{ - dtuple_t* tuple; - dfield_t* field; - ulint n_fields; - byte* types; - const byte* data; - ulint len; - ulint i; - dict_index_t* index; - - data = rec_get_nth_field_old(ibuf_rec, 1, &len); - - if (len > 1) { - /* This a < 4.1.x format record */ - - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - n_fields = rec_get_n_fields_old(ibuf_rec) - 2; - tuple = dtuple_create(heap, n_fields); - types = rec_get_nth_field_old(ibuf_rec, 1, &len); - - ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); - - dfield_set_data(field, data, len); - - dtype_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); - } - - *pindex = ibuf_dummy_index_create(n_fields, FALSE); - return(tuple); - } - - /* This a >= 4.1.x format record */ - - ut_a(trx_sys_multiple_tablespace_format); - ut_a(*data == 0); - ut_a(rec_get_n_fields_old(ibuf_rec) > 4); - - n_fields = rec_get_n_fields_old(ibuf_rec) - 4; - - tuple = dtuple_create(heap, n_fields); - - types = rec_get_nth_field_old(ibuf_rec, 3, &len); - - ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); - index = ibuf_dummy_index_create( - n_fields, len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { - /* compact record format */ - len--; - ut_a(*types == 0); - types++; - } - - ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); - - dfield_set_data(field, data, len); - - dtype_new_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - ibuf_dummy_index_add_col(index, dfield_get_type(field), len); - } - - *pindex = index; - return(tuple); -} - -/************************************************************************ -Returns the space taken by a stored non-clustered index entry if converted to -an index record. */ -static -ulint -ibuf_rec_get_volume( -/*================*/ - /* out: size of index record in bytes + an upper - limit of the space taken in the page directory */ - rec_t* ibuf_rec)/* in: ibuf record */ -{ - dtype_t dtype; - ibool new_format = FALSE; - ulint data_size = 0; - ulint n_fields; - byte* types; - byte* data; - ulint len; - ulint i; - - ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); - - data = rec_get_nth_field_old(ibuf_rec, 1, &len); - - if (len > 1) { - /* < 4.1.x format record */ - - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - n_fields = rec_get_n_fields_old(ibuf_rec) - 2; - - types = rec_get_nth_field_old(ibuf_rec, 1, &len); - - ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); - } else { - /* >= 4.1.x format record */ - - ut_a(trx_sys_multiple_tablespace_format); - ut_a(*data == 0); - - types = rec_get_nth_field_old(ibuf_rec, 3, &len); - - ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); - if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { - /* compact record format */ - ulint volume; - dict_index_t* dummy_index; - mem_heap_t* heap = mem_heap_create(500); - dtuple_t* entry = ibuf_build_entry_from_ibuf_rec( - ibuf_rec, heap, &dummy_index); - volume = rec_get_converted_size(dummy_index, entry); - ibuf_dummy_index_free(dummy_index); - mem_heap_free(heap); - return(volume + page_dir_calc_reserved_space(1)); - } - - n_fields = rec_get_n_fields_old(ibuf_rec) - 4; - - new_format = TRUE; - } - - for (i = 0; i < n_fields; i++) { - if (new_format) { - data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); - - dtype_new_read_for_order_and_null_size( - &dtype, types + i - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - } else { - data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); - - dtype_read_for_order_and_null_size( - &dtype, types + i - * DATA_ORDER_NULL_TYPE_BUF_SIZE); - } - - if (len == UNIV_SQL_NULL) { - data_size += dtype_get_sql_null_size(&dtype); - } else { - data_size += len; - } - } - - return(data_size + rec_get_converted_extra_size(data_size, n_fields) - + page_dir_calc_reserved_space(1)); -} - -/************************************************************************* -Builds the tuple to insert to an ibuf tree when we have an entry for a -non-clustered index. */ -static -dtuple_t* -ibuf_entry_build( -/*=============*/ - /* out, own: entry to insert into an ibuf - index tree; NOTE that the original entry - must be kept because we copy pointers to its - fields */ - dict_index_t* index, /* in: non-clustered index */ - dtuple_t* entry, /* in: entry for a non-clustered index */ - ulint space, /* in: space id */ - ulint page_no,/* in: index page number where entry should - be inserted */ - mem_heap_t* heap) /* in: heap into which to build */ -{ - dtuple_t* tuple; - dfield_t* field; - dfield_t* entry_field; - ulint n_fields; - byte* buf; - byte* buf2; - ulint i; - - /* Starting from 4.1.x, we have to build a tuple whose - (1) first field is the space id, - (2) the second field a single marker byte (0) to tell that this - is a new format record, - (3) the third contains the page number, and - (4) the fourth contains the relevent type information of each data - field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is - (a) 0 for b-trees in the old format, and - (b) 1 for b-trees in the compact format, the first byte of the field - being the marker (0); - (5) and the rest of the fields are copied from entry. All fields - in the tuple are ordered like the type binary in our insert buffer - tree. */ - - n_fields = dtuple_get_n_fields(entry); - - tuple = dtuple_create(heap, n_fields + 4); - - /* Store the space id in tuple */ - - field = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, space); - - dfield_set_data(field, buf, 4); - - /* Store the marker byte field in tuple */ - - field = dtuple_get_nth_field(tuple, 1); - - buf = mem_heap_alloc(heap, 1); - - /* We set the marker byte zero */ - - mach_write_to_1(buf, 0); - - dfield_set_data(field, buf, 1); - - /* Store the page number in tuple */ - - field = dtuple_get_nth_field(tuple, 2); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, page_no); - - dfield_set_data(field, buf, 4); - - /* Store the type info in buf2, and add the fields from entry to - tuple */ - buf2 = mem_heap_alloc(heap, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - + dict_table_is_comp(index->table)); - if (dict_table_is_comp(index->table)) { - *buf2++ = 0; /* write the compact format indicator */ - } - for (i = 0; i < n_fields; i++) { - ulint fixed_len; - const dict_field_t* ifield; - - /* We add 4 below because we have the 4 extra fields at the - start of an ibuf record */ - - field = dtuple_get_nth_field(tuple, i + 4); - entry_field = dtuple_get_nth_field(entry, i); - dfield_copy(field, entry_field); - - ifield = dict_index_get_nth_field(index, i); - /* Prefix index columns of fixed-length columns are of - fixed length. However, in the function call below, - dfield_get_type(entry_field) contains the fixed length - of the column in the clustered index. Replace it with - the fixed length of the secondary index column. */ - fixed_len = ifield->fixed_len; - -#ifdef UNIV_DEBUG - if (fixed_len) { - /* dict_index_add_col() should guarantee these */ - ut_ad(fixed_len <= (ulint) entry_field->type.len); - if (ifield->prefix_len) { - ut_ad(ifield->prefix_len == fixed_len); - } else { - ut_ad(fixed_len - == (ulint) entry_field->type.len); - } - } -#endif /* UNIV_DEBUG */ - - dtype_new_store_for_order_and_null_size( - buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE, - dfield_get_type(entry_field), fixed_len); - } - - /* Store the type info in buf2 to field 3 of tuple */ - - field = dtuple_get_nth_field(tuple, 3); - - if (dict_table_is_comp(index->table)) { - buf2--; - } - - dfield_set_data(field, buf2, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - + dict_table_is_comp(index->table)); - /* Set all the types in the new tuple binary */ - - dtuple_set_types_binary(tuple, n_fields + 4); - - return(tuple); -} - -/************************************************************************* -Builds a search tuple used to search buffered inserts for an index page. -This is for < 4.1.x format records */ -static -dtuple_t* -ibuf_search_tuple_build( -/*====================*/ - /* out, own: search tuple */ - ulint space, /* in: space id */ - ulint page_no,/* in: index page number */ - mem_heap_t* heap) /* in: heap into which to build */ -{ - dtuple_t* tuple; - dfield_t* field; - byte* buf; - - ut_a(space == 0); - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - tuple = dtuple_create(heap, 1); - - /* Store the page number in tuple */ - - field = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, page_no); - - dfield_set_data(field, buf, 4); - - dtuple_set_types_binary(tuple, 1); - - return(tuple); -} - -/************************************************************************* -Builds a search tuple used to search buffered inserts for an index page. -This is for >= 4.1.x format records. */ -static -dtuple_t* -ibuf_new_search_tuple_build( -/*========================*/ - /* out, own: search tuple */ - ulint space, /* in: space id */ - ulint page_no,/* in: index page number */ - mem_heap_t* heap) /* in: heap into which to build */ -{ - dtuple_t* tuple; - dfield_t* field; - byte* buf; - - ut_a(trx_sys_multiple_tablespace_format); - - tuple = dtuple_create(heap, 3); - - /* Store the space id in tuple */ - - field = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, space); - - dfield_set_data(field, buf, 4); - - /* Store the new format record marker byte */ - - field = dtuple_get_nth_field(tuple, 1); - - buf = mem_heap_alloc(heap, 1); - - mach_write_to_1(buf, 0); - - dfield_set_data(field, buf, 1); - - /* Store the page number in tuple */ - - field = dtuple_get_nth_field(tuple, 2); - - buf = mem_heap_alloc(heap, 4); - - mach_write_to_4(buf, page_no); - - dfield_set_data(field, buf, 4); - - dtuple_set_types_binary(tuple, 3); - - return(tuple); -} - -/************************************************************************* -Checks if there are enough pages in the free list of the ibuf tree that we -dare to start a pessimistic insert to the insert buffer. */ -UNIV_INLINE -ibool -ibuf_data_enough_free_for_insert( -/*=============================*/ - /* out: TRUE if enough free pages in list */ - ibuf_data_t* data) /* in: ibuf data for the space */ -{ - ut_ad(mutex_own(&ibuf_mutex)); - - /* We want a big margin of free pages, because a B-tree can sometimes - grow in size also if records are deleted from it, as the node pointers - can change, and we must make sure that we are able to delete the - inserts buffered for pages that we read to the buffer pool, without - any risk of running out of free space in the insert buffer. */ - - if (data->free_list_len >= data->size / 2 + 3 * data->height) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Checks if there are enough pages in the free list of the ibuf tree that we -should remove them and free to the file space management. */ -UNIV_INLINE -ibool -ibuf_data_too_much_free( -/*====================*/ - /* out: TRUE if enough free pages in list */ - ibuf_data_t* data) /* in: ibuf data for the space */ -{ - ut_ad(mutex_own(&ibuf_mutex)); - - return(data->free_list_len >= 3 + data->size / 2 + 3 * data->height); -} - -/************************************************************************* -Allocates a new page from the ibuf file segment and adds it to the free -list. */ -static -ulint -ibuf_add_free_page( -/*===============*/ - /* out: DB_SUCCESS, or DB_STRONG_FAIL - if no space left */ - ulint space, /* in: space id */ - ibuf_data_t* ibuf_data) /* in: ibuf data for the space */ -{ - mtr_t mtr; - page_t* header_page; - ulint page_no; - page_t* page; - page_t* root; - page_t* bitmap_page; - - ut_a(space == 0); - - mtr_start(&mtr); - - /* Acquire the fsp latch before the ibuf header, obeying the latching - order */ - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header_page = ibuf_header_page_get(space, &mtr); - - /* Allocate a new page: NOTE that if the page has been a part of a - non-clustered index which has subsequently been dropped, then the - page may have buffered inserts in the insert buffer, and these - should be deleted from there. These get deleted when the page - allocation creates the page in buffer. Thus the call below may end - up calling the insert buffer routines and, as we yet have no latches - to insert buffer tree pages, these routines can run without a risk - of a deadlock. This is the reason why we created a special ibuf - header page apart from the ibuf tree. */ - - page_no = fseg_alloc_free_page(header_page + IBUF_HEADER - + IBUF_TREE_SEG_HEADER, 0, FSP_UP, - &mtr); - if (page_no == FIL_NULL) { - mtr_commit(&mtr); - - return(DB_STRONG_FAIL); - } - - page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - root = ibuf_tree_root_get(ibuf_data, space, &mtr); - - /* Add the page to the free list and update the ibuf size data */ - - flst_add_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_IBUF_FREE_LIST, - MLOG_2BYTES, &mtr); - - ibuf_data->seg_size++; - ibuf_data->free_list_len++; - - /* Set the bit indicating that this page is now an ibuf tree page - (level 2 page) */ - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); - - ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, - TRUE, &mtr); - mtr_commit(&mtr); - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - return(DB_SUCCESS); -} - -/************************************************************************* -Removes a page from the free list and frees it to the fsp system. */ -static -void -ibuf_remove_free_page( -/*==================*/ - ulint space, /* in: space id */ - ibuf_data_t* ibuf_data) /* in: ibuf data for the space */ -{ - mtr_t mtr; - mtr_t mtr2; - page_t* header_page; - ulint page_no; - page_t* page; - page_t* root; - page_t* bitmap_page; - - ut_a(space == 0); - - mtr_start(&mtr); - - /* Acquire the fsp latch before the ibuf header, obeying the latching - order */ - mtr_x_lock(fil_space_get_latch(space), &mtr); - - header_page = ibuf_header_page_get(space, &mtr); - - /* Prevent pessimistic inserts to insert buffer trees for a while */ - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - if (!ibuf_data_too_much_free(ibuf_data)) { - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - mtr_commit(&mtr); - - return; - } - - mtr_start(&mtr2); - - root = ibuf_tree_root_get(ibuf_data, space, &mtr2); - - page_no = flst_get_last(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - &mtr2) - .page; - - /* NOTE that we must release the latch on the ibuf tree root - because in fseg_free_page we access level 1 pages, and the root - is a level 2 page. */ - - mtr_commit(&mtr2); - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - /* Since pessimistic inserts were prevented, we know that the - page is still in the free list. NOTE that also deletes may take - pages from the free list, but they take them from the start, and - the free list was so long that they cannot have taken the last - page from it. */ - - fseg_free_page(header_page + IBUF_HEADER + IBUF_TREE_SEG_HEADER, - space, page_no, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES - buf_page_reset_file_page_was_freed(space, page_no); -#endif - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - root = ibuf_tree_root_get(ibuf_data, space, &mtr); - - ut_ad(page_no == flst_get_last(root + PAGE_HEADER - + PAGE_BTR_IBUF_FREE_LIST, &mtr) - .page); - - page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - - /* Remove the page from the free list and update the ibuf size data */ - - flst_remove(root + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST, - page + PAGE_HEADER + PAGE_BTR_IBUF_FREE_LIST_NODE, &mtr); - - ibuf_data->seg_size--; - ibuf_data->free_list_len--; - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - /* Set the bit indicating that this page is no more an ibuf tree page - (level 2 page) */ - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); - - ibuf_bitmap_page_set_bits(bitmap_page, page_no, IBUF_BITMAP_IBUF, - FALSE, &mtr); -#ifdef UNIV_DEBUG_FILE_ACCESSES - buf_page_set_file_page_was_freed(space, page_no); -#endif - mtr_commit(&mtr); - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); -} - -/*************************************************************************** -Frees excess pages from the ibuf free list. This function is called when an OS -thread calls fsp services to allocate a new file segment, or a new page to a -file segment, and the thread did not own the fsp latch before this call. */ - -void -ibuf_free_excess_pages( -/*===================*/ - ulint space) /* in: space id */ -{ - ibuf_data_t* ibuf_data; - ulint i; - - if (space != 0) { - fprintf(stderr, - "InnoDB: Error: calling ibuf_free_excess_pages" - " for space %lu\n", (ulong) space); - return; - } - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(fil_space_get_latch(space), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(rw_lock_get_x_lock_count(fil_space_get_latch(space)) == 1); - ut_ad(!ibuf_inside()); - - /* NOTE: We require that the thread did not own the latch before, - because then we know that we can obey the correct latching order - for ibuf latches */ - - ibuf_data = fil_space_get_ibuf_data(space); - - if (ibuf_data == NULL) { - /* Not yet initialized */ - -#if 0 /* defined UNIV_DEBUG */ - fprintf(stderr, - "Ibuf for space %lu not yet initialized\n", space); -#endif - - return; - } - - /* Free at most a few pages at a time, so that we do not delay the - requested service too much */ - - for (i = 0; i < 4; i++) { - - mutex_enter(&ibuf_mutex); - - if (!ibuf_data_too_much_free(ibuf_data)) { - - mutex_exit(&ibuf_mutex); - - return; - } - - mutex_exit(&ibuf_mutex); - - ibuf_remove_free_page(space, ibuf_data); - } -} - -/************************************************************************* -Reads page numbers from a leaf in an ibuf tree. */ -static -ulint -ibuf_get_merge_page_nos( -/*====================*/ - /* out: a lower limit for the combined volume - of records which will be merged */ - ibool contract,/* in: TRUE if this function is called to - contract the tree, FALSE if this is called - when a single page becomes full and we look - if it pays to read also nearby pages */ - rec_t* rec, /* in: record from which we read up and down - in the chain of records */ - ulint* space_ids,/* in/out: space id's of the pages */ - ib_longlong* space_versions,/* in/out: tablespace version - timestamps; used to prevent reading in old - pages after DISCARD + IMPORT tablespace */ - ulint* page_nos,/* in/out: buffer for at least - IBUF_MAX_N_PAGES_MERGED many page numbers; - the page numbers are in an ascending order */ - ulint* n_stored)/* out: number of page numbers stored to - page_nos in this function */ -{ - ulint prev_page_no; - ulint prev_space_id; - ulint first_page_no; - ulint first_space_id; - ulint rec_page_no; - ulint rec_space_id; - ulint sum_volumes; - ulint volume_for_page; - ulint rec_volume; - ulint limit; - ulint n_pages; - - *n_stored = 0; - - limit = ut_min(IBUF_MAX_N_PAGES_MERGED, buf_pool->curr_size / 4); - - if (page_rec_is_supremum(rec)) { - - rec = page_rec_get_prev(rec); - } - - if (page_rec_is_infimum(rec)) { - - rec = page_rec_get_next(rec); - } - - if (page_rec_is_supremum(rec)) { - - return(0); - } - - first_page_no = ibuf_rec_get_page_no(rec); - first_space_id = ibuf_rec_get_space(rec); - n_pages = 0; - prev_page_no = 0; - prev_space_id = 0; - - /* Go backwards from the first rec until we reach the border of the - 'merge area', or the page start or the limit of storeable pages is - reached */ - - while (!page_rec_is_infimum(rec) && UNIV_LIKELY(n_pages < limit)) { - - rec_page_no = ibuf_rec_get_page_no(rec); - rec_space_id = ibuf_rec_get_space(rec); - - if (rec_space_id != first_space_id - || rec_page_no / IBUF_MERGE_AREA - != first_page_no / IBUF_MERGE_AREA) { - - break; - } - - if (rec_page_no != prev_page_no - || rec_space_id != prev_space_id) { - n_pages++; - } - - prev_page_no = rec_page_no; - prev_space_id = rec_space_id; - - rec = page_rec_get_prev(rec); - } - - rec = page_rec_get_next(rec); - - /* At the loop start there is no prev page; we mark this with a pair - of space id, page no (0, 0) for which there can never be entries in - the insert buffer */ - - prev_page_no = 0; - prev_space_id = 0; - sum_volumes = 0; - volume_for_page = 0; - - while (*n_stored < limit) { - if (page_rec_is_supremum(rec)) { - /* When no more records available, mark this with - another 'impossible' pair of space id, page no */ - rec_page_no = 1; - rec_space_id = 0; - } else { - rec_page_no = ibuf_rec_get_page_no(rec); - rec_space_id = ibuf_rec_get_space(rec); - ut_ad(rec_page_no > IBUF_TREE_ROOT_PAGE_NO); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(*n_stored < IBUF_MAX_N_PAGES_MERGED); -#endif - if ((rec_space_id != prev_space_id - || rec_page_no != prev_page_no) - && (prev_space_id != 0 || prev_page_no != 0)) { - - if ((prev_page_no == first_page_no - && prev_space_id == first_space_id) - || contract - || (volume_for_page - > ((IBUF_MERGE_THRESHOLD - 1) - * 4 * UNIV_PAGE_SIZE - / IBUF_PAGE_SIZE_PER_FREE_SPACE) - / IBUF_MERGE_THRESHOLD)) { - - space_ids[*n_stored] = prev_space_id; - space_versions[*n_stored] - = fil_space_get_version(prev_space_id); - page_nos[*n_stored] = prev_page_no; - - (*n_stored)++; - - sum_volumes += volume_for_page; - } - - if (rec_space_id != first_space_id - || rec_page_no / IBUF_MERGE_AREA - != first_page_no / IBUF_MERGE_AREA) { - - break; - } - - volume_for_page = 0; - } - - if (rec_page_no == 1 && rec_space_id == 0) { - /* Supremum record */ - - break; - } - - rec_volume = ibuf_rec_get_volume(rec); - - volume_for_page += rec_volume; - - prev_page_no = rec_page_no; - prev_space_id = rec_space_id; - - rec = page_rec_get_next(rec); - } - -#ifdef UNIV_IBUF_DEBUG - ut_a(*n_stored <= IBUF_MAX_N_PAGES_MERGED); -#endif -#if 0 - fprintf(stderr, "Ibuf merge batch %lu pages %lu volume\n", - *n_stored, sum_volumes); -#endif - return(sum_volumes); -} - -/************************************************************************* -Contracts insert buffer trees by reading pages to the buffer pool. */ -static -ulint -ibuf_contract_ext( -/*==============*/ - /* out: a lower limit for the combined size in bytes - of entries which will be merged from ibuf trees to the - pages read, 0 if ibuf is empty */ - ulint* n_pages,/* out: number of pages to which merged */ - ibool sync) /* in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ -{ - ulint rnd_pos; - ibuf_data_t* data; - btr_pcur_t pcur; - ulint space; - ibool all_trees_empty; - ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; - ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; - ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; - ulint sum_sizes; - mtr_t mtr; - - *n_pages = 0; -loop: - ut_ad(!ibuf_inside()); - - mutex_enter(&ibuf_mutex); - - ut_ad(ibuf_validate_low()); - - /* Choose an ibuf tree at random (though there really is only one tree - in the current implementation) */ - ibuf_rnd += 865558671; - - rnd_pos = ibuf_rnd % ibuf->size; - - all_trees_empty = TRUE; - - data = UT_LIST_GET_FIRST(ibuf->data_list); - - for (;;) { - if (!data->empty) { - all_trees_empty = FALSE; - - if (rnd_pos < data->size) { - - break; - } - - rnd_pos -= data->size; - } - - data = UT_LIST_GET_NEXT(data_list, data); - - if (data == NULL) { - if (all_trees_empty) { - mutex_exit(&ibuf_mutex); - - return(0); - } - - data = UT_LIST_GET_FIRST(ibuf->data_list); - } - } - - ut_ad(data); - - space = data->index->space; - - ut_a(space == 0); /* We currently only have an ibuf tree in - space 0 */ - mtr_start(&mtr); - - ibuf_enter(); - - /* Open a cursor to a randomly chosen leaf of the tree, at a random - position within the leaf */ - - btr_pcur_open_at_rnd_pos(data->index, BTR_SEARCH_LEAF, &pcur, &mtr); - - if (0 == page_get_n_recs(btr_pcur_get_page(&pcur))) { - - /* This tree is empty */ - - data->empty = TRUE; - - ibuf_exit(); - - mtr_commit(&mtr); - btr_pcur_close(&pcur); - - mutex_exit(&ibuf_mutex); - - goto loop; - } - - mutex_exit(&ibuf_mutex); - - sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, - page_nos, &n_stored); -#if 0 /* defined UNIV_IBUF_DEBUG */ - fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", - sync, n_stored, sum_sizes); -#endif - ibuf_exit(); - - mtr_commit(&mtr); - btr_pcur_close(&pcur); - - buf_read_ibuf_merge_pages(sync, space_ids, space_versions, page_nos, - n_stored); - *n_pages = n_stored; - - return(sum_sizes + 1); -} - -/************************************************************************* -Contracts insert buffer trees by reading pages to the buffer pool. */ - -ulint -ibuf_contract( -/*==========*/ - /* out: a lower limit for the combined size in bytes - of entries which will be merged from ibuf trees to the - pages read, 0 if ibuf is empty */ - ibool sync) /* in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ -{ - ulint n_pages; - - return(ibuf_contract_ext(&n_pages, sync)); -} - -/************************************************************************* -Contracts insert buffer trees by reading pages to the buffer pool. */ - -ulint -ibuf_contract_for_n_pages( -/*======================*/ - /* out: a lower limit for the combined size in bytes - of entries which will be merged from ibuf trees to the - pages read, 0 if ibuf is empty */ - ibool sync, /* in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ - ulint n_pages)/* in: try to read at least this many pages to - the buffer pool and merge the ibuf contents to - them */ -{ - ulint sum_bytes = 0; - ulint sum_pages = 0; - ulint n_bytes; - ulint n_pag2; - - while (sum_pages < n_pages) { - n_bytes = ibuf_contract_ext(&n_pag2, sync); - - if (n_bytes == 0) { - return(sum_bytes); - } - - sum_bytes += n_bytes; - sum_pages += n_pag2; - } - - return(sum_bytes); -} - -/************************************************************************* -Contract insert buffer trees after insert if they are too big. */ -UNIV_INLINE -void -ibuf_contract_after_insert( -/*=======================*/ - ulint entry_size) /* in: size of a record which was inserted - into an ibuf tree */ -{ - ibool sync; - ulint sum_sizes; - ulint size; - - mutex_enter(&ibuf_mutex); - - if (ibuf->size < ibuf->max_size + IBUF_CONTRACT_ON_INSERT_NON_SYNC) { - mutex_exit(&ibuf_mutex); - - return; - } - - sync = FALSE; - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_ON_INSERT_SYNC) { - - sync = TRUE; - } - - mutex_exit(&ibuf_mutex); - - /* Contract at least entry_size many bytes */ - sum_sizes = 0; - size = 1; - - while ((size > 0) && (sum_sizes < entry_size)) { - - size = ibuf_contract(sync); - sum_sizes += size; - } -} - -/************************************************************************* -Gets an upper limit for the combined size of entries buffered in the insert -buffer for a given page. */ - -ulint -ibuf_get_volume_buffered( -/*=====================*/ - /* out: upper limit for the volume of - buffered inserts for the index page, in bytes; - we may also return UNIV_PAGE_SIZE, if the - entries for the index page span on several - pages in the insert buffer */ - btr_pcur_t* pcur, /* in: pcur positioned at a place in an - insert buffer tree where we would insert an - entry for the index page whose number is - page_no, latch mode has to be BTR_MODIFY_PREV - or BTR_MODIFY_TREE */ - ulint space, /* in: space id */ - ulint page_no,/* in: page number of an index page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint volume; - rec_t* rec; - page_t* page; - ulint prev_page_no; - page_t* prev_page; - ulint next_page_no; - page_t* next_page; - - ut_a(trx_sys_multiple_tablespace_format); - - ut_ad((pcur->latch_mode == BTR_MODIFY_PREV) - || (pcur->latch_mode == BTR_MODIFY_TREE)); - - /* Count the volume of records earlier in the alphabetical order than - pcur */ - - volume = 0; - - rec = btr_pcur_get_rec(pcur); - - page = buf_frame_align(rec); - - if (page_rec_is_supremum(rec)) { - rec = page_rec_get_prev(rec); - } - - for (;;) { - if (page_rec_is_infimum(rec)) { - - break; - } - - if (page_no != ibuf_rec_get_page_no(rec) - || space != ibuf_rec_get_space(rec)) { - - goto count_later; - } - - volume += ibuf_rec_get_volume(rec); - - rec = page_rec_get_prev(rec); - } - - /* Look at the previous page */ - - prev_page_no = btr_page_get_prev(page, mtr); - - if (prev_page_no == FIL_NULL) { - - goto count_later; - } - - prev_page = buf_page_get(0, prev_page_no, RW_X_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_next(prev_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(prev_page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - - rec = page_get_supremum_rec(prev_page); - rec = page_rec_get_prev(rec); - - for (;;) { - if (page_rec_is_infimum(rec)) { - - /* We cannot go to yet a previous page, because we - do not have the x-latch on it, and cannot acquire one - because of the latching order: we have to give up */ - - return(UNIV_PAGE_SIZE); - } - - if (page_no != ibuf_rec_get_page_no(rec) - || space != ibuf_rec_get_space(rec)) { - - goto count_later; - } - - volume += ibuf_rec_get_volume(rec); - - rec = page_rec_get_prev(rec); - } - -count_later: - rec = btr_pcur_get_rec(pcur); - - if (!page_rec_is_supremum(rec)) { - rec = page_rec_get_next(rec); - } - - for (;;) { - if (page_rec_is_supremum(rec)) { - - break; - } - - if (page_no != ibuf_rec_get_page_no(rec) - || space != ibuf_rec_get_space(rec)) { - - return(volume); - } - - volume += ibuf_rec_get_volume(rec); - - rec = page_rec_get_next(rec); - } - - /* Look at the next page */ - - next_page_no = btr_page_get_next(page, mtr); - - if (next_page_no == FIL_NULL) { - - return(volume); - } - - next_page = buf_page_get(0, next_page_no, RW_X_LATCH, mtr); -#ifdef UNIV_BTR_DEBUG - ut_a(btr_page_get_prev(next_page, mtr) - == buf_frame_get_page_no(page)); -#endif /* UNIV_BTR_DEBUG */ - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(next_page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - - rec = page_get_infimum_rec(next_page); - rec = page_rec_get_next(rec); - - for (;;) { - if (page_rec_is_supremum(rec)) { - - /* We give up */ - - return(UNIV_PAGE_SIZE); - } - - if (page_no != ibuf_rec_get_page_no(rec) - || space != ibuf_rec_get_space(rec)) { - - return(volume); - } - - volume += ibuf_rec_get_volume(rec); - - rec = page_rec_get_next(rec); - } -} - -/************************************************************************* -Reads the biggest tablespace id from the high end of the insert buffer -tree and updates the counter in fil_system. */ - -void -ibuf_update_max_tablespace_id(void) -/*===============================*/ -{ - ulint max_space_id; - rec_t* rec; - byte* field; - ulint len; - ibuf_data_t* ibuf_data; - dict_index_t* ibuf_index; - btr_pcur_t pcur; - mtr_t mtr; - - ibuf_data = fil_space_get_ibuf_data(0); - - ibuf_index = ibuf_data->index; - ut_a(!dict_table_is_comp(ibuf_index->table)); - - ibuf_enter(); - - mtr_start(&mtr); - - btr_pcur_open_at_index_side(FALSE, ibuf_index, BTR_SEARCH_LEAF, - &pcur, TRUE, &mtr); - btr_pcur_move_to_prev(&pcur, &mtr); - - if (btr_pcur_is_before_first_on_page(&pcur, &mtr)) { - /* The tree is empty */ - - max_space_id = 0; - } else { - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old(rec, 0, &len); - - ut_a(len == 4); - - max_space_id = mach_read_from_4(field); - } - - mtr_commit(&mtr); - ibuf_exit(); - - /* printf("Maximum space id in insert buffer %lu\n", max_space_id); */ - - fil_set_max_space_id_if_bigger(max_space_id); -} - -/************************************************************************* -Makes an index insert to the insert buffer, instead of directly to the disk -page, if this is possible. */ -static -ulint -ibuf_insert_low( -/*============*/ - /* out: DB_SUCCESS, DB_FAIL, DB_STRONG_FAIL */ - ulint mode, /* in: BTR_MODIFY_PREV or BTR_MODIFY_TREE */ - dtuple_t* entry, /* in: index entry to insert */ - dict_index_t* index, /* in: index where to insert; must not be - unique or clustered */ - ulint space, /* in: space id where to insert */ - ulint page_no,/* in: page number where to insert */ - que_thr_t* thr) /* in: query thread */ -{ - big_rec_t* dummy_big_rec; - ulint entry_size; - btr_pcur_t pcur; - btr_cur_t* cursor; - dtuple_t* ibuf_entry; - mem_heap_t* heap; - ulint buffered; - rec_t* ins_rec; - ibool old_bit_value; - page_t* bitmap_page; - ibuf_data_t* ibuf_data; - dict_index_t* ibuf_index; - page_t* root; - ulint err; - ibool do_merge; - ulint space_ids[IBUF_MAX_N_PAGES_MERGED]; - ib_longlong space_versions[IBUF_MAX_N_PAGES_MERGED]; - ulint page_nos[IBUF_MAX_N_PAGES_MERGED]; - ulint n_stored; - ulint bits; - mtr_t mtr; - mtr_t bitmap_mtr; - - ut_a(!(index->type & DICT_CLUSTERED)); - ut_ad(dtuple_check_typed(entry)); - - ut_a(trx_sys_multiple_tablespace_format); - - do_merge = FALSE; - - /* Currently the insert buffer of space 0 takes care of inserts to all - tablespaces */ - - ibuf_data = fil_space_get_ibuf_data(0); - - ibuf_index = ibuf_data->index; - - mutex_enter(&ibuf_mutex); - - if (ibuf->size >= ibuf->max_size + IBUF_CONTRACT_DO_NOT_INSERT) { - /* Insert buffer is now too big, contract it but do not try - to insert */ - - mutex_exit(&ibuf_mutex); - -#ifdef UNIV_IBUF_DEBUG - fputs("Ibuf too big\n", stderr); -#endif - /* Use synchronous contract (== TRUE) */ - ibuf_contract(TRUE); - - return(DB_STRONG_FAIL); - } - - mutex_exit(&ibuf_mutex); - - if (mode == BTR_MODIFY_TREE) { - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - while (!ibuf_data_enough_free_for_insert(ibuf_data)) { - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - mutex_exit(&ibuf_pessimistic_insert_mutex); - - err = ibuf_add_free_page(0, ibuf_data); - - if (err == DB_STRONG_FAIL) { - - return(err); - } - - mutex_enter(&ibuf_pessimistic_insert_mutex); - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - } - } else { - ibuf_enter(); - } - - entry_size = rec_get_converted_size(index, entry); - - heap = mem_heap_create(512); - - /* Build the entry which contains the space id and the page number as - the first fields and the type information for other fields, and which - will be inserted to the insert buffer. */ - - ibuf_entry = ibuf_entry_build(index, entry, space, page_no, heap); - - /* Open a cursor to the insert buffer tree to calculate if we can add - the new entry to it without exceeding the free space limit for the - page. */ - - mtr_start(&mtr); - - btr_pcur_open(ibuf_index, ibuf_entry, PAGE_CUR_LE, mode, &pcur, &mtr); - - /* Find out the volume of already buffered inserts for the same index - page */ - buffered = ibuf_get_volume_buffered(&pcur, space, page_no, &mtr); - -#ifdef UNIV_IBUF_DEBUG - ut_a((buffered == 0) || ibuf_count_get(space, page_no)); -#endif - mtr_start(&bitmap_mtr); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &bitmap_mtr); - - /* We check if the index page is suitable for buffered entries */ - - if (buf_page_peek(space, page_no) - || lock_rec_expl_exist_on_page(space, page_no)) { - err = DB_STRONG_FAIL; - - mtr_commit(&bitmap_mtr); - - goto function_exit; - } - - bits = ibuf_bitmap_page_get_bits(bitmap_page, page_no, - IBUF_BITMAP_FREE, &bitmap_mtr); - - if (buffered + entry_size + page_dir_calc_reserved_space(1) - > ibuf_index_page_calc_free_from_bits(bits)) { - mtr_commit(&bitmap_mtr); - - /* It may not fit */ - err = DB_STRONG_FAIL; - - do_merge = TRUE; - - ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, - page_nos, &n_stored); - goto function_exit; - } - - /* Set the bitmap bit denoting that the insert buffer contains - buffered entries for this index page, if the bit is not set yet */ - - old_bit_value = ibuf_bitmap_page_get_bits(bitmap_page, page_no, - IBUF_BITMAP_BUFFERED, - &bitmap_mtr); - if (!old_bit_value) { - ibuf_bitmap_page_set_bits(bitmap_page, page_no, - IBUF_BITMAP_BUFFERED, TRUE, - &bitmap_mtr); - } - - mtr_commit(&bitmap_mtr); - - cursor = btr_pcur_get_btr_cur(&pcur); - - if (mode == BTR_MODIFY_PREV) { - err = btr_cur_optimistic_insert(BTR_NO_LOCKING_FLAG, cursor, - ibuf_entry, &ins_rec, - &dummy_big_rec, thr, - &mtr); - if (err == DB_SUCCESS) { - /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(ins_rec), - thr_get_trx(thr)->id); - } - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - /* We acquire an x-latch to the root page before the insert, - because a pessimistic insert releases the tree x-latch, - which would cause the x-latching of the root after that to - break the latching order. */ - - root = ibuf_tree_root_get(ibuf_data, 0, &mtr); - - err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG, - cursor, - ibuf_entry, &ins_rec, - &dummy_big_rec, thr, - &mtr); - if (err == DB_SUCCESS) { - /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(ins_rec), - thr_get_trx(thr)->id); - } - - ibuf_data_sizes_update(ibuf_data, root, &mtr); - } - -function_exit: -#ifdef UNIV_IBUF_DEBUG - if (err == DB_SUCCESS) { - fprintf(stderr, - "Incrementing ibuf count of space %lu page %lu\n" - "from %lu by 1\n", space, page_no, - ibuf_count_get(space, page_no)); - - ibuf_count_set(space, page_no, - ibuf_count_get(space, page_no) + 1); - } -#endif - if (mode == BTR_MODIFY_TREE) { - ut_ad(ibuf_validate_low()); - - mutex_exit(&ibuf_mutex); - mutex_exit(&ibuf_pessimistic_insert_mutex); - } - - mtr_commit(&mtr); - btr_pcur_close(&pcur); - ibuf_exit(); - - mem_heap_free(heap); - - mutex_enter(&ibuf_mutex); - - if (err == DB_SUCCESS) { - ibuf_data->empty = FALSE; - ibuf_data->n_inserts++; - } - - mutex_exit(&ibuf_mutex); - - if ((mode == BTR_MODIFY_TREE) && (err == DB_SUCCESS)) { - ibuf_contract_after_insert(entry_size); - } - - if (do_merge) { -#ifdef UNIV_IBUF_DEBUG - ut_a(n_stored <= IBUF_MAX_N_PAGES_MERGED); -#endif - buf_read_ibuf_merge_pages(FALSE, space_ids, space_versions, - page_nos, n_stored); - } - - return(err); -} - -/************************************************************************* -Makes an index insert to the insert buffer, instead of directly to the disk -page, if this is possible. Does not do insert if the index is clustered -or unique. */ - -ibool -ibuf_insert( -/*========*/ - /* out: TRUE if success */ - dtuple_t* entry, /* in: index entry to insert */ - dict_index_t* index, /* in: index where to insert */ - ulint space, /* in: space id where to insert */ - ulint page_no,/* in: page number where to insert */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_a(trx_sys_multiple_tablespace_format); - ut_ad(dtuple_check_typed(entry)); - - ut_a(!(index->type & DICT_CLUSTERED)); - - if (rec_get_converted_size(index, entry) - >= (page_get_free_space_of_empty(dict_table_is_comp(index->table)) - / 2)) { - return(FALSE); - } - - err = ibuf_insert_low(BTR_MODIFY_PREV, entry, index, space, page_no, - thr); - if (err == DB_FAIL) { - err = ibuf_insert_low(BTR_MODIFY_TREE, entry, index, space, - page_no, thr); - } - - if (err == DB_SUCCESS) { -#ifdef UNIV_IBUF_DEBUG - /* fprintf(stderr, "Ibuf insert for page no %lu of index %s\n", - page_no, index->name); */ -#endif - return(TRUE); - - } else { - ut_a(err == DB_STRONG_FAIL); - - return(FALSE); - } -} - -/************************************************************************ -During merge, inserts to an index page a secondary index entry extracted -from the insert buffer. */ -static -void -ibuf_insert_to_index_page( -/*======================*/ - dtuple_t* entry, /* in: buffered entry to insert */ - page_t* page, /* in: index page where the buffered entry - should be placed */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t page_cur; - ulint low_match; - rec_t* rec; - page_t* bitmap_page; - ulint old_bits; - - ut_ad(ibuf_inside()); - ut_ad(dtuple_check_typed(entry)); - - if (UNIV_UNLIKELY(dict_table_is_comp(index->table) - != (ibool)!!page_is_comp(page))) { - fputs("InnoDB: Trying to insert a record from" - " the insert buffer to an index page\n" - "InnoDB: but the 'compact' flag does not match!\n", - stderr); - goto dump; - } - - rec = page_rec_get_next(page_get_infimum_rec(page)); - - if (UNIV_UNLIKELY(rec_get_n_fields(rec, index) - != dtuple_get_n_fields(entry))) { - fputs("InnoDB: Trying to insert a record from" - " the insert buffer to an index page\n" - "InnoDB: but the number of fields does not match!\n", - stderr); -dump: - buf_page_print(page); - - dtuple_print(stderr, entry); - - fputs("InnoDB: The table where where" - " this index record belongs\n" - "InnoDB: is now probably corrupt." - " Please run CHECK TABLE on\n" - "InnoDB: your tables.\n" - "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com!\n", stderr); - - return; - } - - low_match = page_cur_search(page, index, entry, - PAGE_CUR_LE, &page_cur); - - if (low_match == dtuple_get_n_fields(entry)) { - rec = page_cur_get_rec(&page_cur); - - btr_cur_del_unmark_for_ibuf(rec, mtr); - } else { - rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); - - if (rec == NULL) { - /* If the record did not fit, reorganize */ - - btr_page_reorganize(page, index, mtr); - - page_cur_search(page, index, entry, - PAGE_CUR_LE, &page_cur); - - /* This time the record must fit */ - if (UNIV_UNLIKELY(!page_cur_tuple_insert( - &page_cur, entry, index, - mtr))) { - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Insert buffer insert" - " fails; page free %lu," - " dtuple size %lu\n", - (ulong) page_get_max_insert_size( - page, 1), - (ulong) rec_get_converted_size( - index, entry)); - fputs("InnoDB: Cannot insert index record ", - stderr); - dtuple_print(stderr, entry); - fputs("\nInnoDB: The table where" - " this index record belongs\n" - "InnoDB: is now probably corrupt." - " Please run CHECK TABLE on\n" - "InnoDB: that table.\n", stderr); - - bitmap_page = ibuf_bitmap_get_map_page( - buf_frame_get_space_id(page), - buf_frame_get_page_no(page), - mtr); - old_bits = ibuf_bitmap_page_get_bits( - bitmap_page, - buf_frame_get_page_no(page), - IBUF_BITMAP_FREE, mtr); - - fprintf(stderr, "InnoDB: Bitmap bits %lu\n", - (ulong) old_bits); - - fputs("InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - } - } - } -} - -/************************************************************************* -Deletes from ibuf the record on which pcur is positioned. If we have to -resort to a pessimistic delete, this function commits mtr and closes -the cursor. */ -static -ibool -ibuf_delete_rec( -/*============*/ - /* out: TRUE if mtr was committed and pcur - closed in this operation */ - ulint space, /* in: space id */ - ulint page_no,/* in: index page number where the record - should belong */ - btr_pcur_t* pcur, /* in: pcur positioned on the record to - delete, having latch mode BTR_MODIFY_LEAF */ - dtuple_t* search_tuple, - /* in: search tuple for entries of page_no */ - mtr_t* mtr) /* in: mtr */ -{ - ibool success; - ibuf_data_t* ibuf_data; - page_t* root; - ulint err; - - ut_ad(ibuf_inside()); - - success = btr_cur_optimistic_delete(btr_pcur_get_btr_cur(pcur), mtr); - - if (success) { -#ifdef UNIV_IBUF_DEBUG - fprintf(stderr, - "Decrementing ibuf count of space %lu page %lu\n" - "from %lu by 1\n", space, page_no, - ibuf_count_get(space, page_no)); - ibuf_count_set(space, page_no, - ibuf_count_get(space, page_no) - 1); -#endif - return(FALSE); - } - - /* We have to resort to a pessimistic delete from ibuf */ - btr_pcur_store_position(pcur, mtr); - - btr_pcur_commit_specify_mtr(pcur, mtr); - - /* Currently the insert buffer of space 0 takes care of inserts to all - tablespaces */ - - ibuf_data = fil_space_get_ibuf_data(0); - - mutex_enter(&ibuf_mutex); - - mtr_start(mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr); - - if (!success) { - if (fil_space_get_version(space) == -1) { - /* The tablespace has been dropped. It is possible - that another thread has deleted the insert buffer - entry. Do not complain. */ - goto commit_and_exit; - } - - fprintf(stderr, - "InnoDB: ERROR: Submit the output to" - " http://bugs.mysql.com\n" - "InnoDB: ibuf cursor restoration fails!\n" - "InnoDB: ibuf record inserted to page %lu\n", - (ulong) page_no); - fflush(stderr); - - rec_print_old(stderr, btr_pcur_get_rec(pcur)); - rec_print_old(stderr, pcur->old_rec); - dtuple_print(stderr, search_tuple); - - rec_print_old(stderr, - page_rec_get_next(btr_pcur_get_rec(pcur))); - fflush(stderr); - - btr_pcur_commit_specify_mtr(pcur, mtr); - - fputs("InnoDB: Validating insert buffer tree:\n", stderr); - if (!btr_validate_index(ibuf_data->index, NULL)) { - ut_error; - } - - fprintf(stderr, "InnoDB: ibuf tree ok\n"); - fflush(stderr); - - goto func_exit; - } - - root = ibuf_tree_root_get(ibuf_data, 0, mtr); - - btr_cur_pessimistic_delete(&err, TRUE, btr_pcur_get_btr_cur(pcur), - FALSE, mtr); - ut_a(err == DB_SUCCESS); - -#ifdef UNIV_IBUF_DEBUG - ibuf_count_set(space, page_no, ibuf_count_get(space, page_no) - 1); -#endif - ibuf_data_sizes_update(ibuf_data, root, mtr); - - ut_ad(ibuf_validate_low()); - -commit_and_exit: - btr_pcur_commit_specify_mtr(pcur, mtr); - -func_exit: - btr_pcur_close(pcur); - - mutex_exit(&ibuf_mutex); - - return(TRUE); -} - -/************************************************************************* -When an index page is read from a disk to the buffer pool, this function -inserts to the page the possible index entries buffered in the insert buffer. -The entries are deleted from the insert buffer. If the page is not read, but -created in the buffer pool, this function deletes its buffered entries from -the insert buffer; there can exist entries for such a page if the page -belonged to an index which subsequently was dropped. */ - -void -ibuf_merge_or_delete_for_page( -/*==========================*/ - page_t* page, /* in: if page has been read from disk, pointer to - the page x-latched, else NULL */ - ulint space, /* in: space id of the index page */ - ulint page_no,/* in: page number of the index page */ - ibool update_ibuf_bitmap)/* in: normally this is set to TRUE, but if - we have deleted or are deleting the tablespace, then we - naturally do not want to update a non-existent bitmap - page */ -{ - mem_heap_t* heap; - btr_pcur_t pcur; - dtuple_t* entry; - dtuple_t* search_tuple; - rec_t* ibuf_rec; - buf_block_t* block; - page_t* bitmap_page; - ibuf_data_t* ibuf_data; - ulint n_inserts; -#ifdef UNIV_IBUF_DEBUG - ulint volume; -#endif - ibool tablespace_being_deleted = FALSE; - ibool corruption_noticed = FALSE; - mtr_t mtr; - - if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) { - - return; - } - - if (ibuf_fixed_addr_page(space, page_no) || fsp_descr_page(page_no) - || trx_sys_hdr_page(space, page_no)) { - return; - } - - if (update_ibuf_bitmap) { - /* If the following returns FALSE, we get the counter - incremented, and must decrement it when we leave this - function. When the counter is > 0, that prevents tablespace - from being dropped. */ - - tablespace_being_deleted = fil_inc_pending_ibuf_merges(space); - - if (tablespace_being_deleted) { - /* Do not try to read the bitmap page from space; - just delete the ibuf records for the page */ - - page = NULL; - update_ibuf_bitmap = FALSE; - } - } - - if (update_ibuf_bitmap) { - mtr_start(&mtr); - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); - - if (!ibuf_bitmap_page_get_bits(bitmap_page, page_no, - IBUF_BITMAP_BUFFERED, &mtr)) { - /* No inserts buffered for this page */ - mtr_commit(&mtr); - - if (!tablespace_being_deleted) { - fil_decr_pending_ibuf_merges(space); - } - - return; - } - mtr_commit(&mtr); - } - - /* Currently the insert buffer of space 0 takes care of inserts to all - tablespaces */ - - ibuf_data = fil_space_get_ibuf_data(0); - - ibuf_enter(); - - heap = mem_heap_create(512); - - if (!trx_sys_multiple_tablespace_format) { - ut_a(trx_doublewrite_must_reset_space_ids); - search_tuple = ibuf_search_tuple_build(space, page_no, heap); - } else { - search_tuple = ibuf_new_search_tuple_build(space, page_no, - heap); - } - - if (page) { - /* Move the ownership of the x-latch on the page to this OS - thread, so that we can acquire a second x-latch on it. This - is needed for the insert operations to the index page to pass - the debug checks. */ - - block = buf_block_align(page); - rw_lock_x_lock_move_ownership(&(block->lock)); - - if (fil_page_get_type(page) != FIL_PAGE_INDEX) { - - corruption_noticed = TRUE; - - ut_print_timestamp(stderr); - - mtr_start(&mtr); - - fputs(" InnoDB: Dump of the ibuf bitmap page:\n", - stderr); - - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, - &mtr); - buf_page_print(bitmap_page); - - mtr_commit(&mtr); - - fputs("\nInnoDB: Dump of the page:\n", stderr); - - buf_page_print(page); - - fprintf(stderr, - "InnoDB: Error: corruption in the tablespace." - " Bitmap shows insert\n" - "InnoDB: buffer records to page n:o %lu" - " though the page\n" - "InnoDB: type is %lu, which is" - " not an index page!\n" - "InnoDB: We try to resolve the problem" - " by skipping the insert buffer\n" - "InnoDB: merge for this page." - " Please run CHECK TABLE on your tables\n" - "InnoDB: to determine if they are corrupt" - " after this.\n\n" - "InnoDB: Please submit a detailed bug report" - " to http://bugs.mysql.com\n\n", - (ulong) page_no, - (ulong) fil_page_get_type(page)); - } - } - - n_inserts = 0; -#ifdef UNIV_IBUF_DEBUG - volume = 0; -#endif -loop: - mtr_start(&mtr); - - if (page) { - ibool success = buf_page_get_known_nowait(RW_X_LATCH, page, - BUF_KEEP_OLD, - __FILE__, __LINE__, - &mtr); - ut_a(success); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TREE_NODE); -#endif /* UNIV_SYNC_DEBUG */ - } - - /* Position pcur in the insert buffer at the first entry for this - index page */ - btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &pcur, &mtr); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); - - goto reset_bit; - } - - for (;;) { - ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); - - ibuf_rec = btr_pcur_get_rec(&pcur); - - /* Check if the entry is for this index page */ - if (ibuf_rec_get_page_no(ibuf_rec) != page_no - || ibuf_rec_get_space(ibuf_rec) != space) { - if (page) { - page_header_reset_last_insert(page, &mtr); - } - goto reset_bit; - } - - if (corruption_noticed) { - fputs("InnoDB: Discarding record\n ", stderr); - rec_print_old(stderr, ibuf_rec); - fputs("\n from the insert buffer!\n\n", stderr); - } else if (page) { - /* Now we have at pcur a record which should be - inserted to the index page; NOTE that the call below - copies pointers to fields in ibuf_rec, and we must - keep the latch to the ibuf_rec page until the - insertion is finished! */ - dict_index_t* dummy_index; - dulint max_trx_id = page_get_max_trx_id( - buf_frame_align(ibuf_rec)); - page_update_max_trx_id(page, max_trx_id); - - entry = ibuf_build_entry_from_ibuf_rec( - ibuf_rec, heap, &dummy_index); -#ifdef UNIV_IBUF_DEBUG - volume += rec_get_converted_size(dummy_index, entry) - + page_dir_calc_reserved_space(1); - ut_a(volume <= 4 * UNIV_PAGE_SIZE - / IBUF_PAGE_SIZE_PER_FREE_SPACE); -#endif - ibuf_insert_to_index_page(entry, page, - dummy_index, &mtr); - ibuf_dummy_index_free(dummy_index); - } - - n_inserts++; - - /* Delete the record from ibuf */ - if (ibuf_delete_rec(space, page_no, &pcur, search_tuple, - &mtr)) { - /* Deletion was pessimistic and mtr was committed: - we start from the beginning again */ - - goto loop; - } - - if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { - mtr_commit(&mtr); - btr_pcur_close(&pcur); - - goto loop; - } - } - -reset_bit: -#ifdef UNIV_IBUF_DEBUG - if (ibuf_count_get(space, page_no) > 0) { - /* btr_print_tree(ibuf_data->index->tree, 100); - ibuf_print(); */ - } -#endif - if (update_ibuf_bitmap) { - bitmap_page = ibuf_bitmap_get_map_page(space, page_no, &mtr); - ibuf_bitmap_page_set_bits(bitmap_page, page_no, - IBUF_BITMAP_BUFFERED, FALSE, &mtr); - if (page) { - ulint old_bits = ibuf_bitmap_page_get_bits( - bitmap_page, page_no, IBUF_BITMAP_FREE, &mtr); - ulint new_bits = ibuf_index_page_calc_free(page); -#if 0 /* defined UNIV_IBUF_DEBUG */ - fprintf(stderr, "Old bits %lu new bits %lu" - " max size %lu\n", - old_bits, new_bits, - page_get_max_insert_size_after_reorganize( - page, 1)); -#endif - if (old_bits != new_bits) { - ibuf_bitmap_page_set_bits(bitmap_page, page_no, - IBUF_BITMAP_FREE, - new_bits, &mtr); - } - } - } -#if 0 /* defined UNIV_IBUF_DEBUG */ - fprintf(stderr, - "Ibuf merge %lu records volume %lu to page no %lu\n", - n_inserts, volume, page_no); -#endif - mtr_commit(&mtr); - btr_pcur_close(&pcur); - mem_heap_free(heap); - - /* Protect our statistics keeping from race conditions */ - mutex_enter(&ibuf_mutex); - - ibuf_data->n_merges++; - ibuf_data->n_merged_recs += n_inserts; - - mutex_exit(&ibuf_mutex); - - if (update_ibuf_bitmap && !tablespace_being_deleted) { - - fil_decr_pending_ibuf_merges(space); - } - - ibuf_exit(); -#ifdef UNIV_IBUF_DEBUG - ut_a(ibuf_count_get(space, page_no) == 0); -#endif -} - -/************************************************************************* -Deletes all entries in the insert buffer for a given space id. This is used -in DISCARD TABLESPACE and IMPORT TABLESPACE. -NOTE: this does not update the page free bitmaps in the space. The space will -become CORRUPT when you call this function! */ - -void -ibuf_delete_for_discarded_space( -/*============================*/ - ulint space) /* in: space id */ -{ - mem_heap_t* heap; - btr_pcur_t pcur; - dtuple_t* search_tuple; - rec_t* ibuf_rec; - ulint page_no; - ibool closed; - ibuf_data_t* ibuf_data; - ulint n_inserts; - mtr_t mtr; - - /* Currently the insert buffer of space 0 takes care of inserts to all - tablespaces */ - - ibuf_data = fil_space_get_ibuf_data(0); - - heap = mem_heap_create(512); - - /* Use page number 0 to build the search tuple so that we get the - cursor positioned at the first entry for this space id */ - - search_tuple = ibuf_new_search_tuple_build(space, 0, heap); - - n_inserts = 0; -loop: - ibuf_enter(); - - mtr_start(&mtr); - - /* Position pcur in the insert buffer at the first entry for the - space */ - btr_pcur_open_on_user_rec(ibuf_data->index, search_tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &pcur, &mtr); - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - ut_ad(btr_pcur_is_after_last_in_tree(&pcur, &mtr)); - - goto leave_loop; - } - - for (;;) { - ut_ad(btr_pcur_is_on_user_rec(&pcur, &mtr)); - - ibuf_rec = btr_pcur_get_rec(&pcur); - - /* Check if the entry is for this space */ - if (ibuf_rec_get_space(ibuf_rec) != space) { - - goto leave_loop; - } - - page_no = ibuf_rec_get_page_no(ibuf_rec); - - n_inserts++; - - /* Delete the record from ibuf */ - closed = ibuf_delete_rec(space, page_no, &pcur, search_tuple, - &mtr); - if (closed) { - /* Deletion was pessimistic and mtr was committed: - we start from the beginning again */ - - ibuf_exit(); - - goto loop; - } - - if (btr_pcur_is_after_last_on_page(&pcur, &mtr)) { - mtr_commit(&mtr); - btr_pcur_close(&pcur); - - ibuf_exit(); - - goto loop; - } - } - -leave_loop: - mtr_commit(&mtr); - btr_pcur_close(&pcur); - - /* Protect our statistics keeping from race conditions */ - mutex_enter(&ibuf_mutex); - - ibuf_data->n_merges++; - ibuf_data->n_merged_recs += n_inserts; - - mutex_exit(&ibuf_mutex); - /* - fprintf(stderr, - "InnoDB: Discarded %lu ibuf entries for space %lu\n", - (ulong) n_inserts, (ulong) space); - */ - ibuf_exit(); - - mem_heap_free(heap); -} - - -/********************************************************************** -Validates the ibuf data structures when the caller owns ibuf_mutex. */ - -ibool -ibuf_validate_low(void) -/*===================*/ - /* out: TRUE if ok */ -{ - ibuf_data_t* data; - ulint sum_sizes; - - ut_ad(mutex_own(&ibuf_mutex)); - - sum_sizes = 0; - - data = UT_LIST_GET_FIRST(ibuf->data_list); - - while (data) { - sum_sizes += data->size; - - data = UT_LIST_GET_NEXT(data_list, data); - } - - ut_a(sum_sizes == ibuf->size); - - return(TRUE); -} - -/********************************************************************** -Looks if the insert buffer is empty. */ - -ibool -ibuf_is_empty(void) -/*===============*/ - /* out: TRUE if empty */ -{ - ibuf_data_t* data; - ibool is_empty; - page_t* root; - mtr_t mtr; - - ibuf_enter(); - - mutex_enter(&ibuf_mutex); - - data = UT_LIST_GET_FIRST(ibuf->data_list); - - mtr_start(&mtr); - - root = ibuf_tree_root_get(data, 0, &mtr); - - if (page_get_n_recs(root) == 0) { - - is_empty = TRUE; - - if (data->empty == FALSE) { - fprintf(stderr, - "InnoDB: Warning: insert buffer tree is empty" - " but the data struct does not\n" - "InnoDB: know it. This condition is legal" - " if the master thread has not yet\n" - "InnoDB: run to completion.\n"); - } - } else { - ut_a(data->empty == FALSE); - - is_empty = FALSE; - } - - mtr_commit(&mtr); - - ut_a(data->space == 0); - - mutex_exit(&ibuf_mutex); - - ibuf_exit(); - - return(is_empty); -} - -/********************************************************************** -Prints info of ibuf. */ - -void -ibuf_print( -/*=======*/ - FILE* file) /* in: file where to print */ -{ - ibuf_data_t* data; -#ifdef UNIV_IBUF_DEBUG - ulint i; -#endif - - mutex_enter(&ibuf_mutex); - - data = UT_LIST_GET_FIRST(ibuf->data_list); - - while (data) { - fprintf(file, - "Ibuf: size %lu, free list len %lu, seg size %lu,\n" - "%lu inserts, %lu merged recs, %lu merges\n", - (ulong) data->size, - (ulong) data->free_list_len, - (ulong) data->seg_size, - (ulong) data->n_inserts, - (ulong) data->n_merged_recs, - (ulong) data->n_merges); -#ifdef UNIV_IBUF_DEBUG - for (i = 0; i < IBUF_COUNT_N_PAGES; i++) { - if (ibuf_count_get(data->space, i) > 0) { - - fprintf(stderr, - "Ibuf count for page %lu is %lu\n", - (ulong) i, - (ulong) - ibuf_count_get(data->space, i)); - } - } -#endif - data = UT_LIST_GET_NEXT(data_list, data); - } - - mutex_exit(&ibuf_mutex); -} diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h deleted file mode 100644 index 1573de7e818..00000000000 --- a/storage/innobase/include/btr0btr.h +++ /dev/null @@ -1,451 +0,0 @@ -/****************************************************** -The B-tree - -(c) 1994-1996 Innobase Oy - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef btr0btr_h -#define btr0btr_h - -#include "univ.i" - -#include "dict0dict.h" -#include "data0data.h" -#include "page0cur.h" -#include "rem0rec.h" -#include "mtr0mtr.h" -#include "btr0types.h" - -/* Maximum record size which can be stored on a page, without using the -special big record storage structure */ - -#define BTR_PAGE_MAX_REC_SIZE (UNIV_PAGE_SIZE / 2 - 200) - -/* Maximum depth of a B-tree in InnoDB. Note that this isn't a maximum as -such; none of the tree operations avoid producing trees bigger than this. It -is instead a "max depth that other code must work with", useful for e.g. -fixed-size arrays that must store some information about each level in a -tree. In other words: if a B-tree with bigger depth than this is -encountered, it is not acceptable for it to lead to mysterious memory -corruption, but it is acceptable for the program to die with a clear assert -failure. */ -#define BTR_MAX_LEVELS 100 - -/* Latching modes for btr_cur_search_to_nth_level(). */ -#define BTR_SEARCH_LEAF RW_S_LATCH -#define BTR_MODIFY_LEAF RW_X_LATCH -#define BTR_NO_LATCHES RW_NO_LATCH -#define BTR_MODIFY_TREE 33 -#define BTR_CONT_MODIFY_TREE 34 -#define BTR_SEARCH_PREV 35 -#define BTR_MODIFY_PREV 36 - -/* If this is ORed to the latch mode, it means that the search tuple will be -inserted to the index, at the searched position */ -#define BTR_INSERT 512 - -/* This flag ORed to latch mode says that we do the search in query -optimization */ -#define BTR_ESTIMATE 1024 - -/* This flag ORed to latch mode says that we can ignore possible -UNIQUE definition on secondary indexes when we decide if we can use the -insert buffer to speed up inserts */ -#define BTR_IGNORE_SEC_UNIQUE 2048 - -/****************************************************************** -Gets the root node of a tree and x-latches it. */ - -page_t* -btr_root_get( -/*=========*/ - /* out: root page, x-latched */ - dict_index_t* index, /* in: index tree */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /* in: space id */ - ulint page_no, /* in: page number */ - ulint mode, /* in: latch mode */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Gets the index id field of a page. */ -UNIV_INLINE -dulint -btr_page_get_index_id( -/*==================*/ - /* out: index id */ - page_t* page); /* in: index page */ -/************************************************************ -Gets the node level field in an index page. */ -UNIV_INLINE -ulint -btr_page_get_level_low( -/*===================*/ - /* out: level, leaf level == 0 */ - page_t* page); /* in: index page */ -/************************************************************ -Gets the node level field in an index page. */ -UNIV_INLINE -ulint -btr_page_get_level( -/*===============*/ - /* out: level, leaf level == 0 */ - page_t* page, /* in: index page */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Gets the next index page number. */ -UNIV_INLINE -ulint -btr_page_get_next( -/*==============*/ - /* out: next page number */ - page_t* page, /* in: index page */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Gets the previous index page number. */ -UNIV_INLINE -ulint -btr_page_get_prev( -/*==============*/ - /* out: prev page number */ - page_t* page, /* in: index page */ - mtr_t* mtr); /* in: mini-transaction handle */ -/***************************************************************** -Gets pointer to the previous user record in the tree. It is assumed -that the caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_prev_user_rec( -/*==================*/ - /* out: previous user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr); /* in: mtr holding a latch on the page, and if - needed, also to the previous page */ -/***************************************************************** -Gets pointer to the next user record in the tree. It is assumed -that the caller has appropriate latches on the page and its neighbor. */ - -rec_t* -btr_get_next_user_rec( -/*==================*/ - /* out: next user record, NULL if there is none */ - rec_t* rec, /* in: record on leaf level */ - mtr_t* mtr); /* in: mtr holding a latch on the page, and if - needed, also to the next page */ -/****************************************************************** -Releases the latch on a leaf page and bufferunfixes it. */ -UNIV_INLINE -void -btr_leaf_page_release( -/*==================*/ - page_t* page, /* in: page */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Gets the child node file address in a node pointer. */ -UNIV_INLINE -ulint -btr_node_ptr_get_child_page_no( -/*===========================*/ - /* out: child node address */ - rec_t* rec, /* in: node pointer record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/**************************************************************** -Creates the root node for a new index tree. */ - -ulint -btr_create( -/*=======*/ - /* out: page number of the created root, FIL_NULL if - did not succeed */ - ulint type, /* in: type of the index */ - ulint space, /* in: space where created */ - dulint index_id,/* in: index id */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr); /* in: mini-transaction handle */ -/**************************************************************** -Frees a B-tree except the root page, which MUST be freed after this -by calling btr_free_root. */ - -void -btr_free_but_not_root( -/*==================*/ - ulint space, /* in: space where created */ - ulint root_page_no); /* in: root page number */ -/**************************************************************** -Frees the B-tree root page. Other tree MUST already have been freed. */ - -void -btr_free_root( -/*==========*/ - ulint space, /* in: space where created */ - ulint root_page_no, /* in: root page number */ - mtr_t* mtr); /* in: a mini-transaction which has already - been started */ -/***************************************************************** -Makes tree one level higher by splitting the root, and inserts -the tuple. It is assumed that mtr contains an x-latch on the tree. -NOTE that the operation of this function must always succeed, -we cannot reverse it: therefore enough free disk space must be -guaranteed to be available before this function is called. */ - -rec_t* -btr_root_raise_and_insert( -/*======================*/ - /* out: inserted record */ - btr_cur_t* cursor, /* in: cursor at which to insert: must be - on the root page; when the function returns, - the cursor is positioned on the predecessor - of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Reorganizes an index page. */ - -void -btr_page_reorganize( -/*================*/ - page_t* page, /* in: page to be reorganized */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Decides if the page should be split at the convergence point of -inserts converging to left. */ - -ibool -btr_page_get_split_rec_to_left( -/*===========================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec);/* out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ -/***************************************************************** -Decides if the page should be split at the convergence point of -inserts converging to right. */ - -ibool -btr_page_get_split_rec_to_right( -/*============================*/ - /* out: TRUE if split recommended */ - btr_cur_t* cursor, /* in: cursor at which to insert */ - rec_t** split_rec);/* out: if split recommended, - the first record on upper half page, - or NULL if tuple should be first */ -/***************************************************************** -Splits an index page to halves and inserts the tuple. It is assumed -that mtr holds an x-latch to the index tree. NOTE: the tree x-latch -is released within this function! NOTE that the operation of this -function must always succeed, we cannot reverse it: therefore -enough free disk space must be guaranteed to be available before -this function is called. */ - -rec_t* -btr_page_split_and_insert( -/*======================*/ - /* out: inserted record; NOTE: the tree - x-latch is released! NOTE: 2 free disk - pages must be available! */ - btr_cur_t* cursor, /* in: cursor at which to insert; when the - function returns, the cursor is positioned - on the predecessor of the inserted record */ - dtuple_t* tuple, /* in: tuple to insert */ - mtr_t* mtr); /* in: mtr */ -/*********************************************************** -Inserts a data tuple to a tree on a non-leaf level. It is assumed -that mtr holds an x-latch on the tree. */ - -void -btr_insert_on_non_leaf_level( -/*=========================*/ - dict_index_t* index, /* in: index */ - ulint level, /* in: level, must be > 0 */ - dtuple_t* tuple, /* in: the record to be inserted */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************** -Sets a record as the predefined minimum record. */ - -void -btr_set_min_rec_mark( -/*=================*/ - rec_t* rec, /* in: record */ - ulint comp, /* in: nonzero=compact page format */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Deletes on the upper level the node pointer to a page. */ - -void -btr_node_ptr_delete( -/*================*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page whose node pointer is deleted */ - mtr_t* mtr); /* in: mtr */ -#ifdef UNIV_DEBUG -/**************************************************************** -Checks that the node pointer to a page is appropriate. */ - -ibool -btr_check_node_ptr( -/*===============*/ - /* out: TRUE */ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: index page */ - mtr_t* mtr); /* in: mtr */ -#endif /* UNIV_DEBUG */ -/***************************************************************** -Tries to merge the page first to the left immediate brother if such a -brother exists, and the node pointers to the current page and to the -brother reside on the same page. If the left brother does not satisfy these -conditions, looks at the right brother. If the page is the only one on that -level lifts the records of the page to the father page, thus reducing the -tree height. It is assumed that mtr holds an x-latch on the tree and on the -page. If cursor is on the leaf level, mtr must also hold x-latches to -the brothers, if they exist. NOTE: it is assumed that the caller has reserved -enough free extents so that the compression will always succeed if done! */ -void -btr_compress( -/*=========*/ - btr_cur_t* cursor, /* in: cursor on the page to merge or lift; - the page must not be empty: in record delete - use btr_discard_page if the page would become - empty */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Discards a page from a B-tree. This is used to remove the last record from -a B-tree page: the whole page must be removed at the same time. This cannot -be used for the root page, which is allowed to be empty. */ - -void -btr_discard_page( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to discard: not on - the root page */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************** -Parses the redo log record for setting an index record as the predefined -minimum record. */ - -byte* -btr_parse_set_min_rec_mark( -/*=======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint comp, /* in: nonzero=compact page format */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/*************************************************************** -Parses a redo log record of reorganizing a page. */ - -byte* -btr_parse_page_reorganize( -/*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/****************************************************************** -Gets the number of pages in a B-tree. */ - -ulint -btr_get_size( -/*=========*/ - /* out: number of pages */ - dict_index_t* index, /* in: index */ - ulint flag); /* in: BTR_N_LEAF_PAGES or BTR_TOTAL_SIZE */ -/****************************************************************** -Allocates a new file page to be used in an index tree. NOTE: we assume -that the caller has made the reservation for free extents! */ - -page_t* -btr_page_alloc( -/*===========*/ - /* out: new allocated page, x-latched; - NULL if out of space */ - dict_index_t* index, /* in: index tree */ - ulint hint_page_no, /* in: hint of a good page */ - byte file_direction, /* in: direction where a possible - page split is made */ - ulint level, /* in: level where the page is placed - in the tree */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Frees a file page used in an index tree. NOTE: cannot free field external -storage pages because the page must contain info on its level. */ - -void -btr_page_free( -/*==========*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Frees a file page used in an index tree. Can be used also to BLOB -external storage pages, because the page level 0 can be given as an -argument. */ - -void -btr_page_free_low( -/*==============*/ - dict_index_t* index, /* in: index tree */ - page_t* page, /* in: page to be freed, x-latched */ - ulint level, /* in: page level */ - mtr_t* mtr); /* in: mtr */ -#ifdef UNIV_BTR_PRINT -/***************************************************************** -Prints size info of a B-tree. */ - -void -btr_print_size( -/*===========*/ - dict_index_t* index); /* in: index tree */ -/****************************************************************** -Prints directories and other info of all nodes in the index. */ - -void -btr_print_index( -/*============*/ - dict_index_t* index, /* in: index */ - ulint width); /* in: print this many entries from start - and end */ -#endif /* UNIV_BTR_PRINT */ -/**************************************************************** -Checks the size and number of fields in a record based on the definition of -the index. */ - -ibool -btr_index_rec_validate( -/*===================*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: index record */ - dict_index_t* index, /* in: index */ - ibool dump_on_error); /* in: TRUE if the function - should print hex dump of record - and page on error */ -/****************************************************************** -Checks the consistency of an index tree. */ - -ibool -btr_validate_index( -/*===============*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - trx_t* trx); /* in: transaction or NULL */ - -#define BTR_N_LEAF_PAGES 1 -#define BTR_TOTAL_SIZE 2 - -#ifndef UNIV_NONINL -#include "btr0btr.ic" -#endif - -#endif diff --git a/storage/innobase/include/btr0btr.ic b/storage/innobase/include/btr0btr.ic deleted file mode 100644 index 4a88f58b318..00000000000 --- a/storage/innobase/include/btr0btr.ic +++ /dev/null @@ -1,234 +0,0 @@ -/****************************************************** -The B-tree - -(c) 1994-1996 Innobase Oy - -Created 6/2/1994 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#include "mtr0mtr.h" -#include "mtr0log.h" - -#define BTR_MAX_NODE_LEVEL 50 /* used in debug checking */ - -/****************************************************************** -Gets a buffer page and declares its latching order level. */ -UNIV_INLINE -page_t* -btr_page_get( -/*=========*/ - ulint space, /* in: space id */ - ulint page_no, /* in: page number */ - ulint mode, /* in: latch mode */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - page = buf_page_get(space, page_no, mode, mtr); -#ifdef UNIV_SYNC_DEBUG - if (mode != RW_NO_LATCH) { - - buf_page_dbg_add_level(page, SYNC_TREE_NODE); - } -#endif - return(page); -} - -/****************************************************************** -Sets the index id field of a page. */ -UNIV_INLINE -void -btr_page_set_index_id( -/*==================*/ - page_t* page, /* in: page to be created */ - dulint id, /* in: index id */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_dulint(page + PAGE_HEADER + PAGE_INDEX_ID, id, mtr); -} - -/****************************************************************** -Gets the index id field of a page. */ -UNIV_INLINE -dulint -btr_page_get_index_id( -/*==================*/ - /* out: index id */ - page_t* page) /* in: index page */ -{ - return(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID)); -} - -/************************************************************ -Gets the node level field in an index page. */ -UNIV_INLINE -ulint -btr_page_get_level_low( -/*===================*/ - /* out: level, leaf level == 0 */ - page_t* page) /* in: index page */ -{ - ulint level; - - ut_ad(page); - - level = mach_read_from_2(page + PAGE_HEADER + PAGE_LEVEL); - - ut_ad(level <= BTR_MAX_NODE_LEVEL); - - return(level); -} - -/************************************************************ -Gets the node level field in an index page. */ -UNIV_INLINE -ulint -btr_page_get_level( -/*===============*/ - /* out: level, leaf level == 0 */ - page_t* page, /* in: index page */ - mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - - return(btr_page_get_level_low(page)); -} - -/************************************************************ -Sets the node level field in an index page. */ -UNIV_INLINE -void -btr_page_set_level( -/*===============*/ - page_t* page, /* in: index page */ - ulint level, /* in: level, leaf level == 0 */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - ut_ad(level <= BTR_MAX_NODE_LEVEL); - - mlog_write_ulint(page + PAGE_HEADER + PAGE_LEVEL, level, - MLOG_2BYTES, mtr); -} - -/************************************************************ -Gets the next index page number. */ -UNIV_INLINE -ulint -btr_page_get_next( -/*==============*/ - /* out: next page number */ - page_t* page, /* in: index page */ - mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_PAGE_S_FIX)); - - return(mach_read_from_4(page + FIL_PAGE_NEXT)); -} - -/************************************************************ -Sets the next index page field. */ -UNIV_INLINE -void -btr_page_set_next( -/*==============*/ - page_t* page, /* in: index page */ - ulint next, /* in: next page number */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - - mlog_write_ulint(page + FIL_PAGE_NEXT, next, MLOG_4BYTES, mtr); -} - -/************************************************************ -Gets the previous index page number. */ -UNIV_INLINE -ulint -btr_page_get_prev( -/*==============*/ - /* out: prev page number */ - page_t* page, /* in: index page */ - mtr_t* mtr __attribute__((unused))) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - - return(mach_read_from_4(page + FIL_PAGE_PREV)); -} - -/************************************************************ -Sets the previous index page field. */ -UNIV_INLINE -void -btr_page_set_prev( -/*==============*/ - page_t* page, /* in: index page */ - ulint prev, /* in: previous page number */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ut_ad(page && mtr); - - mlog_write_ulint(page + FIL_PAGE_PREV, prev, MLOG_4BYTES, mtr); -} - -/****************************************************************** -Gets the child node file address in a node pointer. */ -UNIV_INLINE -ulint -btr_node_ptr_get_child_page_no( -/*===========================*/ - /* out: child node address */ - rec_t* rec, /* in: node pointer record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - byte* field; - ulint len; - ulint page_no; - - ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); - - /* The child address is in the last field */ - field = rec_get_nth_field(rec, offsets, - rec_offs_n_fields(offsets) - 1, &len); - - ut_ad(len == 4); - - page_no = mach_read_from_4(field); - - if (UNIV_UNLIKELY(page_no == 0)) { - fprintf(stderr, - "InnoDB: a nonsensical page number 0" - " in a node ptr record at offset %lu\n", - (ulong) page_offset(rec)); - buf_page_print(buf_frame_align(rec)); - } - - return(page_no); -} - -/****************************************************************** -Releases the latches on a leaf page and bufferunfixes it. */ -UNIV_INLINE -void -btr_leaf_page_release( -/*==================*/ - page_t* page, /* in: page */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF or BTR_MODIFY_LEAF */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(!mtr_memo_contains(mtr, buf_block_align(page), - MTR_MEMO_MODIFY)); - if (latch_mode == BTR_SEARCH_LEAF) { - mtr_memo_release(mtr, buf_block_align(page), - MTR_MEMO_PAGE_S_FIX); - } else { - ut_ad(latch_mode == BTR_MODIFY_LEAF); - mtr_memo_release(mtr, buf_block_align(page), - MTR_MEMO_PAGE_X_FIX); - } -} diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h deleted file mode 100644 index 213dcb7f568..00000000000 --- a/storage/innobase/include/btr0cur.h +++ /dev/null @@ -1,706 +0,0 @@ -/****************************************************** -The index tree cursor - -(c) 1994-1996 Innobase Oy - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#ifndef btr0cur_h -#define btr0cur_h - -#include "univ.i" -#include "dict0dict.h" -#include "data0data.h" -#include "page0cur.h" -#include "btr0types.h" -#include "que0types.h" -#include "row0types.h" -#include "ha0ha.h" - -/* Mode flags for btr_cur operations; these can be ORed */ -#define BTR_NO_UNDO_LOG_FLAG 1 /* do no undo logging */ -#define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ -#define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the - update vector or inserted entry */ - -#define BTR_CUR_ADAPT -#define BTR_CUR_HASH_ADAPT - -/************************************************************* -Returns the page cursor component of a tree cursor. */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - /* out: pointer to page cursor component */ - btr_cur_t* cursor);/* in: tree cursor */ -/************************************************************* -Returns the record pointer of a tree cursor. */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - /* out: pointer to record */ - btr_cur_t* cursor);/* in: tree cursor */ -/************************************************************* -Invalidates a tree cursor by setting record pointer to NULL. */ -UNIV_INLINE -void -btr_cur_invalidate( -/*===============*/ - btr_cur_t* cursor);/* in: tree cursor */ -/************************************************************* -Returns the page of a tree cursor. */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - /* out: pointer to page */ - btr_cur_t* cursor);/* in: tree cursor */ -/************************************************************* -Returns the index of a cursor. */ -UNIV_INLINE -dict_index_t* -btr_cur_get_index( -/*==============*/ - /* out: index */ - btr_cur_t* cursor);/* in: B-tree cursor */ -/************************************************************* -Positions a tree cursor at a given record. */ -UNIV_INLINE -void -btr_cur_position( -/*=============*/ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in tree */ - btr_cur_t* cursor);/* in: cursor */ -/************************************************************************ -Searches an index tree and positions a tree cursor on a given level. -NOTE: n_fields_cmp in tuple must be set so that it cannot be compared -to node pointer page number fields on the upper levels of the tree! -Note that if mode is PAGE_CUR_LE, which is used in inserts, then -cursor->up_match and cursor->low_match both will have sensible values. -If mode is PAGE_CUR_GE, then up_match will a have a sensible value. */ - -void -btr_cur_search_to_nth_level( -/*========================*/ - dict_index_t* index, /* in: index */ - ulint level, /* in: the tree level of search */ - dtuple_t* tuple, /* in: data tuple; NOTE: n_fields_cmp in - tuple must be set so that it cannot get - compared to the node ptr page number field! */ - ulint mode, /* in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be PAGE_CUR_LE, - not PAGE_CUR_GE, as the latter may end up on - the previous page of the record! Inserts - should always be made using PAGE_CUR_LE to - search the position! */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ..., ORed with - BTR_INSERT and BTR_ESTIMATE; - cursor->left_page is used to store a pointer - to the left neighbor page, in the cases - BTR_SEARCH_PREV and BTR_MODIFY_PREV; - NOTE that if has_search_latch - is != 0, we maybe do not have a latch set - on the cursor page, we assume - the caller uses his search latch - to protect the record! */ - btr_cur_t* cursor, /* in/out: tree cursor; the cursor page is - s- or x-latched, but see also above! */ - ulint has_search_latch,/* in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Opens a cursor at either end of an index. */ - -void -btr_cur_open_at_index_side( -/*=======================*/ - ibool from_left, /* in: TRUE if open to the low end, - FALSE if to the high end */ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: latch mode */ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Positions a cursor at a randomly chosen position within a B-tree. */ - -void -btr_cur_open_at_rnd_pos( -/*====================*/ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /* in/out: B-tree cursor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Tries to perform an insert to a page in an index tree, next to cursor. -It is assumed that mtr holds an x-latch on the page. The operation does -not succeed if there is too little space on the page. If there is just -one record on the page, the insert will always succeed; this is to -prevent trying to split a page with just one record. */ - -ulint -btr_cur_optimistic_insert( -/*======================*/ - /* out: DB_SUCCESS, DB_WAIT_LOCK, - DB_FAIL, or error number */ - ulint flags, /* in: undo logging and locking flags: if not - zero, the parameters index and thr should be - specified */ - btr_cur_t* cursor, /* in: cursor on page after which to insert; - cursor stays valid */ - dtuple_t* entry, /* in: entry to insert */ - rec_t** rec, /* out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ - que_thr_t* thr, /* in: query thread or NULL */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Performs an insert on a page of an index tree. It is assumed that mtr -holds an x-latch on the tree and on the cursor page. If the insert is -made on the leaf level, to avoid deadlocks, mtr must also own x-latches -to brothers of page, if those brothers exist. */ - -ulint -btr_cur_pessimistic_insert( -/*=======================*/ - /* out: DB_SUCCESS or error number */ - ulint flags, /* in: undo logging and locking flags: if not - zero, the parameter thr should be - specified; if no undo logging is specified, - then the caller must have reserved enough - free extents in the file space so that the - insertion will certainly succeed */ - btr_cur_t* cursor, /* in: cursor after which to insert; - cursor stays valid */ - dtuple_t* entry, /* in: entry to insert */ - rec_t** rec, /* out: pointer to inserted record if - succeed */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or - NULL */ - que_thr_t* thr, /* in: query thread or NULL */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Updates a record when the update causes no size changes in its fields. */ - -ulint -btr_cur_update_in_place( -/*====================*/ - /* out: DB_SUCCESS or error number */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - upd_t* update, /* in: update vector */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Tries to update a record on a page in an index tree. It is assumed that mtr -holds an x-latch on the page. The operation does not succeed if there is too -little space on the page or if the update would result in too empty a page, -so that tree compression is recommended. */ - -ulint -btr_cur_optimistic_update( -/*======================*/ - /* out: DB_SUCCESS, or DB_OVERFLOW if the - updated record does not fit, DB_UNDERFLOW - if the page would become too empty */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor on the record to update; - cursor stays valid and positioned on the - same record */ - upd_t* update, /* in: update vector; this must also - contain trx id and roll ptr fields */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Performs an update of a record on a page of a tree. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. If the -update is made on the leaf level, to avoid deadlocks, mtr must also -own x-latches to brothers of page, if those brothers exist. */ - -ulint -btr_cur_pessimistic_update( -/*=======================*/ - /* out: DB_SUCCESS or error code */ - ulint flags, /* in: undo logging, locking, and rollback - flags */ - btr_cur_t* cursor, /* in: cursor on the record to update */ - big_rec_t** big_rec,/* out: big rec vector whose fields have to - be stored externally by the caller, or NULL */ - upd_t* update, /* in: update vector; this is allowed also - contain trx id and roll ptr fields, but - the values in update vector have no effect */ - ulint cmpl_info,/* in: compiler info on secondary index - updates */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************** -Marks a clustered index record deleted. Writes an undo log record to -undo log on this delete marking. Writes in the trx id field the id -of the deleting transaction, and in the roll ptr field pointer to the -undo log record created. */ - -ulint -btr_cur_del_mark_set_clust_rec( -/*===========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - number */ - ulint flags, /* in: undo logging and locking flags */ - btr_cur_t* cursor, /* in: cursor */ - ibool val, /* in: value to set */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************** -Sets a secondary index record delete mark to TRUE or FALSE. */ - -ulint -btr_cur_del_mark_set_sec_rec( -/*=========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - number */ - ulint flags, /* in: locking flag */ - btr_cur_t* cursor, /* in: cursor */ - ibool val, /* in: value to set */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************** -Sets a secondary index record delete mark to FALSE. This function is -only used by the insert buffer insert merge mechanism. */ - -void -btr_cur_del_unmark_for_ibuf( -/*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Tries to compress a page of the tree on the leaf level. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! */ - -void -btr_cur_compress( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Tries to compress a page of the tree if it seems useful. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! */ - -ibool -btr_cur_compress_if_useful( -/*=======================*/ - /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - mtr_t* mtr); /* in: mtr */ -/*********************************************************** -Removes the record on which the tree cursor is positioned. It is assumed -that the mtr has an x-latch on the page where the cursor is positioned, -but no latch on the whole tree. */ - -ibool -btr_cur_optimistic_delete( -/*======================*/ - /* out: TRUE if success, i.e., the page - did not become too empty */ - btr_cur_t* cursor, /* in: cursor on the record to delete; - cursor stays valid: if deletion succeeds, - on function exit it points to the successor - of the deleted record */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Removes the record on which the tree cursor is positioned. Tries -to compress the page if its fillfactor drops below a threshold -or if it is the only page on the level. It is assumed that mtr holds -an x-latch on the tree and on the cursor page. To avoid deadlocks, -mtr must also own x-latches to brothers of page, if those brothers -exist. */ - -ibool -btr_cur_pessimistic_delete( -/*=======================*/ - /* out: TRUE if compression occurred */ - ulint* err, /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE; - the latter may occur because we may have - to update node pointers on upper levels, - and in the case of variable length keys - these may actually grow in size */ - ibool has_reserved_extents, /* in: TRUE if the - caller has already reserved enough free - extents so that he knows that the operation - will succeed */ - btr_cur_t* cursor, /* in: cursor on the record to delete; - if compression does not occur, the cursor - stays valid: it points to successor of - deleted record on function exit */ - ibool in_rollback,/* in: TRUE if called in rollback */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************** -Parses a redo log record of updating a record in-place. */ - -byte* -btr_cur_parse_update_in_place( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - dict_index_t* index); /* in: index corresponding to page */ -/******************************************************************** -Parses the redo log record for delete marking or unmarking of a clustered -index record. */ - -byte* -btr_cur_parse_del_mark_set_clust_rec( -/*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: index corresponding to page */ - page_t* page); /* in: page or NULL */ -/******************************************************************** -Parses the redo log record for delete marking or unmarking of a secondary -index record. */ - -byte* -btr_cur_parse_del_mark_set_sec_rec( -/*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ -/*********************************************************************** -Estimates the number of rows in a given index range. */ - -ib_longlong -btr_estimate_n_rows_in_range( -/*=========================*/ - /* out: estimated number of rows */ - dict_index_t* index, /* in: index */ - dtuple_t* tuple1, /* in: range start, may also be empty tuple */ - ulint mode1, /* in: search mode for range start */ - dtuple_t* tuple2, /* in: range end, may also be empty tuple */ - ulint mode2); /* in: search mode for range end */ -/*********************************************************************** -Estimates the number of different key values in a given index, for -each n-column prefix of the index where n <= dict_index_get_n_unique(index). -The estimates are stored in the array index->stat_n_diff_key_vals. */ - -void -btr_estimate_number_of_different_key_vals( -/*======================================*/ - dict_index_t* index); /* in: index */ -/*********************************************************************** -Marks not updated extern fields as not-owned by this record. The ownership -is transferred to the updated record which is inserted elsewhere in the -index tree. In purge only the owner of externally stored field is allowed -to free the field. */ - -void -btr_cur_mark_extern_inherited_fields( -/*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update, /* in: update vector */ - mtr_t* mtr); /* in: mtr */ -/*********************************************************************** -The complement of the previous function: in an update entry may inherit -some externally stored fields from a record. We must mark them as inherited -in entry, so that they are not freed in a rollback. */ - -void -btr_cur_mark_dtuple_inherited_extern( -/*=================================*/ - dtuple_t* entry, /* in: updated entry to be inserted to - clustered index */ - ulint* ext_vec, /* in: array of extern fields in the - original record */ - ulint n_ext_vec, /* in: number of elements in ext_vec */ - upd_t* update); /* in: update vector */ -/*********************************************************************** -Marks all extern fields in a dtuple as owned by the record. */ - -void -btr_cur_unmark_dtuple_extern_fields( -/*================================*/ - dtuple_t* entry, /* in: clustered index entry */ - ulint* ext_vec, /* in: array of numbers of fields - which have been stored externally */ - ulint n_ext_vec); /* in: number of elements in ext_vec */ -/*********************************************************************** -Stores the fields in big_rec_vec to the tablespace and puts pointers to -them in rec. The fields are stored on pages allocated from leaf node -file segment of the index tree. */ - -ulint -btr_store_big_rec_extern_fields( -/*============================*/ - /* out: DB_SUCCESS or error */ - dict_index_t* index, /* in: index of rec; the index tree - MUST be X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets, /* in: rec_get_offsets(rec, index); - the "external storage" flags in offsets - will not correspond to rec when - this function returns */ - big_rec_t* big_rec_vec, /* in: vector containing fields - to be stored externally */ - mtr_t* local_mtr); /* in: mtr containing the latch to - rec and to the tree */ -/*********************************************************************** -Frees the space in an externally stored field to the file space -management if the field in data is owned the externally stored field, -in a rollback we may have the additional condition that the field must -not be inherited. */ - -void -btr_free_externally_stored_field( -/*=============================*/ - dict_index_t* index, /* in: index of the data, the index - tree MUST be X-latched; if the tree - height is 1, then also the root page - must be X-latched! (this is relevant - in the case this function is called - from purge where 'data' is located on - an undo log page, not an index - page) */ - byte* data, /* in: internally stored data - + reference to the externally - stored part */ - ulint local_len, /* in: length of data */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* local_mtr); /* in: mtr containing the latch to - data an an X-latch to the index - tree */ -/*************************************************************** -Frees the externally stored fields for a record. */ - -void -btr_rec_free_externally_stored_fields( -/*==================================*/ - dict_index_t* index, /* in: index of the data, the index - tree MUST be X-latched */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ibool do_not_free_inherited,/* in: TRUE if called in a - rollback and we do not want to free - inherited fields */ - mtr_t* mtr); /* in: mini-transaction handle which contains - an X-latch to record page and to the index - tree */ -/*********************************************************************** -Copies an externally stored field of a record to mem heap. */ - -byte* -btr_rec_copy_externally_stored_field( -/*=================================*/ - /* out: the field copied to heap */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint no, /* in: field number */ - ulint* len, /* out: length of the field */ - mem_heap_t* heap); /* in: mem heap */ -/*********************************************************************** -Copies an externally stored field of a record to mem heap. Parameter -data contains a pointer to 'internally' stored part of the field: -possibly some data, and the reference to the externally stored part in -the last 20 bytes of data. */ - -byte* -btr_copy_externally_stored_field( -/*=============================*/ - /* out: the whole field copied to heap */ - ulint* len, /* out: length of the whole field */ - byte* data, /* in: 'internally' stored part of the - field containing also the reference to - the external part */ - ulint local_len,/* in: length of data */ - mem_heap_t* heap); /* in: mem heap */ -/*********************************************************************** -Stores the positions of the fields marked as extern storage in the update -vector, and also those fields who are marked as extern storage in rec -and not mentioned in updated fields. We use this function to remember -which fields we must mark as extern storage in a record inserted for an -update. */ - -ulint -btr_push_update_extern_fields( -/*==========================*/ - /* out: number of values stored in ext_vect */ - ulint* ext_vect,/* in: array of ulints, must be preallocated - to have space for all fields in rec */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update);/* in: update vector or NULL */ - - -/*######################################################################*/ - -/* In the pessimistic delete, if the page data size drops below this -limit, merging it to a neighbor is tried */ - -#define BTR_CUR_PAGE_COMPRESS_LIMIT (UNIV_PAGE_SIZE / 2) - -/* A slot in the path array. We store here info on a search path down the -tree. Each slot contains data on a single level of the tree. */ - -typedef struct btr_path_struct btr_path_t; -struct btr_path_struct{ - ulint nth_rec; /* index of the record - where the page cursor stopped on - this level (index in alphabetical - order); value ULINT_UNDEFINED - denotes array end */ - ulint n_recs; /* number of records on the page */ -}; - -#define BTR_PATH_ARRAY_N_SLOTS 250 /* size of path array (in slots) */ - -/* The tree cursor: the definition appears here only for the compiler -to know struct size! */ - -struct btr_cur_struct { - dict_index_t* index; /* index where positioned */ - page_cur_t page_cur; /* page cursor */ - page_t* left_page; /* this field is used to store - a pointer to the left neighbor - page, in the cases - BTR_SEARCH_PREV and - BTR_MODIFY_PREV */ - /*------------------------------*/ - que_thr_t* thr; /* this field is only used when - btr_cur_search_... is called for an - index entry insertion: the calling - query thread is passed here to be - used in the insert buffer */ - /*------------------------------*/ - /* The following fields are used in btr_cur_search... to pass - information: */ - ulint flag; /* BTR_CUR_HASH, BTR_CUR_HASH_FAIL, - BTR_CUR_BINARY, or - BTR_CUR_INSERT_TO_IBUF */ - ulint tree_height; /* Tree height if the search is done - for a pessimistic insert or update - operation */ - ulint up_match; /* If the search mode was PAGE_CUR_LE, - the number of matched fields to the - the first user record to the right of - the cursor record after - btr_cur_search_...; - for the mode PAGE_CUR_GE, the matched - fields to the first user record AT THE - CURSOR or to the right of it; - NOTE that the up_match and low_match - values may exceed the correct values - for comparison to the adjacent user - record if that record is on a - different leaf page! (See the note in - row_ins_duplicate_key.) */ - ulint up_bytes; /* number of matched bytes to the - right at the time cursor positioned; - only used internally in searches: not - defined after the search */ - ulint low_match; /* if search mode was PAGE_CUR_LE, - the number of matched fields to the - first user record AT THE CURSOR or - to the left of it after - btr_cur_search_...; - NOT defined for PAGE_CUR_GE or any - other search modes; see also the NOTE - in up_match! */ - ulint low_bytes; /* number of matched bytes to the - right at the time cursor positioned; - only used internally in searches: not - defined after the search */ - ulint n_fields; /* prefix length used in a hash - search if hash_node != NULL */ - ulint n_bytes; /* hash prefix bytes if hash_node != - NULL */ - ulint fold; /* fold value used in the search if - flag is BTR_CUR_HASH */ - /*------------------------------*/ - btr_path_t* path_arr; /* in estimating the number of - rows in range, we store in this array - information of the path through - the tree */ -}; - -/* Values for the flag documenting the used search method */ -#define BTR_CUR_HASH 1 /* successful shortcut using the hash - index */ -#define BTR_CUR_HASH_FAIL 2 /* failure using hash, success using - binary search: the misleading hash - reference is stored in the field - hash_node, and might be necessary to - update */ -#define BTR_CUR_BINARY 3 /* success using the binary search */ -#define BTR_CUR_INSERT_TO_IBUF 4 /* performed the intended insert to - the insert buffer */ - -/* If pessimistic delete fails because of lack of file space, -there is still a good change of success a little later: try this many times, -and sleep this many microseconds in between */ -#define BTR_CUR_RETRY_DELETE_N_TIMES 100 -#define BTR_CUR_RETRY_SLEEP_TIME 50000 - -/* The reference in a field for which data is stored on a different page. -The reference is at the end of the 'locally' stored part of the field. -'Locally' means storage in the index record. -We store locally a long enough prefix of each column so that we can determine -the ordering parts of each index record without looking into the externally -stored part. */ - -/*--------------------------------------*/ -#define BTR_EXTERN_SPACE_ID 0 /* space id where stored */ -#define BTR_EXTERN_PAGE_NO 4 /* page no where stored */ -#define BTR_EXTERN_OFFSET 8 /* offset of BLOB header - on that page */ -#define BTR_EXTERN_LEN 12 /* 8 bytes containing the - length of the externally - stored part of the BLOB. - The 2 highest bits are - reserved to the flags below. */ -/*--------------------------------------*/ -#define BTR_EXTERN_FIELD_REF_SIZE 20 - -/* The highest bit of BTR_EXTERN_LEN (i.e., the highest bit of the byte -at lowest address) is set to 1 if this field does not 'own' the externally -stored field; only the owner field is allowed to free the field in purge! -If the 2nd highest bit is 1 then it means that the externally stored field -was inherited from an earlier version of the row. In rollback we are not -allowed to free an inherited external field. */ - -#define BTR_EXTERN_OWNER_FLAG 128 -#define BTR_EXTERN_INHERITED_FLAG 64 - -extern ulint btr_cur_n_non_sea; -extern ulint btr_cur_n_sea; -extern ulint btr_cur_n_non_sea_old; -extern ulint btr_cur_n_sea_old; - -#ifndef UNIV_NONINL -#include "btr0cur.ic" -#endif - -#endif diff --git a/storage/innobase/include/btr0cur.ic b/storage/innobase/include/btr0cur.ic deleted file mode 100644 index bd2c46eb734..00000000000 --- a/storage/innobase/include/btr0cur.ic +++ /dev/null @@ -1,154 +0,0 @@ -/****************************************************** -The index tree cursor - -(c) 1994-1996 Innobase Oy - -Created 10/16/1994 Heikki Tuuri -*******************************************************/ - -#include "btr0btr.h" - -/************************************************************* -Returns the page cursor component of a tree cursor. */ -UNIV_INLINE -page_cur_t* -btr_cur_get_page_cur( -/*=================*/ - /* out: pointer to page cursor component */ - btr_cur_t* cursor) /* in: tree cursor */ -{ - return(&(cursor->page_cur)); -} - -/************************************************************* -Returns the record pointer of a tree cursor. */ -UNIV_INLINE -rec_t* -btr_cur_get_rec( -/*============*/ - /* out: pointer to record */ - btr_cur_t* cursor) /* in: tree cursor */ -{ - return(page_cur_get_rec(&(cursor->page_cur))); -} - -/************************************************************* -Invalidates a tree cursor by setting record pointer to NULL. */ -UNIV_INLINE -void -btr_cur_invalidate( -/*===============*/ - btr_cur_t* cursor) /* in: tree cursor */ -{ - page_cur_invalidate(&(cursor->page_cur)); -} - -/************************************************************* -Returns the page of a tree cursor. */ -UNIV_INLINE -page_t* -btr_cur_get_page( -/*=============*/ - /* out: pointer to page */ - btr_cur_t* cursor) /* in: tree cursor */ -{ - return(buf_frame_align(page_cur_get_rec(&(cursor->page_cur)))); -} - -/************************************************************* -Returns the index of a cursor. */ -UNIV_INLINE -dict_index_t* -btr_cur_get_index( -/*==============*/ - /* out: index */ - btr_cur_t* cursor) /* in: B-tree cursor */ -{ - return(cursor->index); -} - -/************************************************************* -Positions a tree cursor at a given record. */ -UNIV_INLINE -void -btr_cur_position( -/*=============*/ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in tree */ - btr_cur_t* cursor) /* in: cursor */ -{ - page_cur_position(rec, btr_cur_get_page_cur(cursor)); - - cursor->index = index; -} - -/************************************************************************* -Checks if compressing an index page where a btr cursor is placed makes -sense. */ -UNIV_INLINE -ibool -btr_cur_compress_recommendation( -/*============================*/ - /* out: TRUE if compression is recommended */ - btr_cur_t* cursor, /* in: btr cursor */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - - page = btr_cur_get_page(cursor); - - if ((page_get_data_size(page) < BTR_CUR_PAGE_COMPRESS_LIMIT) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL))) { - - /* The page fillfactor has dropped below a predefined - minimum value OR the level in the B-tree contains just - one page: we recommend compression if this is not the - root page. */ - - return(dict_index_get_page(cursor->index) - != buf_frame_get_page_no(page)); - } - - return(FALSE); -} - -/************************************************************************* -Checks if the record on which the cursor is placed can be deleted without -making tree compression necessary (or, recommended). */ -UNIV_INLINE -ibool -btr_cur_can_delete_without_compress( -/*================================*/ - /* out: TRUE if can be deleted without - recommended compression */ - btr_cur_t* cursor, /* in: btr cursor */ - ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - - page = btr_cur_get_page(cursor); - - if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) - || ((btr_page_get_next(page, mtr) == FIL_NULL) - && (btr_page_get_prev(page, mtr) == FIL_NULL)) - || (page_get_n_recs(page) < 2)) { - - /* The page fillfactor will drop below a predefined - minimum value, OR the level in the B-tree contains just - one page, OR the page will become empty: we recommend - compression if this is not the root page. */ - - return(dict_index_get_page(cursor->index) - == buf_frame_get_page_no(page)); - } - - return(TRUE); -} diff --git a/storage/innobase/include/btr0pcur.h b/storage/innobase/include/btr0pcur.h deleted file mode 100644 index ee40e905544..00000000000 --- a/storage/innobase/include/btr0pcur.h +++ /dev/null @@ -1,520 +0,0 @@ -/****************************************************** -The index tree persistent cursor - -(c) 1996 Innobase Oy - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - -#ifndef btr0pcur_h -#define btr0pcur_h - -#include "univ.i" -#include "dict0dict.h" -#include "data0data.h" -#include "mtr0mtr.h" -#include "page0cur.h" -#include "btr0cur.h" -#include "btr0btr.h" -#include "btr0types.h" - -/* Relative positions for a stored cursor position */ -#define BTR_PCUR_ON 1 -#define BTR_PCUR_BEFORE 2 -#define BTR_PCUR_AFTER 3 -/* Note that if the tree is not empty, btr_pcur_store_position does not -use the following, but only uses the above three alternatives, where the -position is stored relative to a specific record: this makes implementation -of a scroll cursor easier */ -#define BTR_PCUR_BEFORE_FIRST_IN_TREE 4 /* in an empty tree */ -#define BTR_PCUR_AFTER_LAST_IN_TREE 5 /* in an empty tree */ - -/****************************************************************** -Allocates memory for a persistent cursor object and initializes the cursor. */ - -btr_pcur_t* -btr_pcur_create_for_mysql(void); -/*============================*/ - /* out, own: persistent cursor */ -/****************************************************************** -Frees the memory for a persistent cursor object. */ - -void -btr_pcur_free_for_mysql( -/*====================*/ - btr_pcur_t* cursor); /* in, own: persistent cursor */ -/****************************************************************** -Copies the stored position of a pcur to another pcur. */ - -void -btr_pcur_copy_stored_position( -/*==========================*/ - btr_pcur_t* pcur_receive, /* in: pcur which will receive the - position info */ - btr_pcur_t* pcur_donate); /* in: pcur from which the info is - copied */ -/****************************************************************** -Sets the old_rec_buf field to NULL. */ -UNIV_INLINE -void -btr_pcur_init( -/*==========*/ - btr_pcur_t* pcur); /* in: persistent cursor */ -/****************************************************************** -Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. */ -UNIV_INLINE -void -btr_pcur_open( -/*==========*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Opens an persistent cursor to an index tree without initializing the -cursor. */ -UNIV_INLINE -void -btr_pcur_open_with_no_init( -/*=======================*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page of the - record! */ - ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...; - NOTE that if has_search_latch != 0 then - we maybe do not acquire a latch on the cursor - page, but assume that the caller uses his - btr search latch to protect the record! */ - btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ - ulint has_search_latch,/* in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Opens a persistent cursor at either end of an index. */ -UNIV_INLINE -void -btr_pcur_open_at_index_side( -/*========================*/ - ibool from_left, /* in: TRUE if open to the low end, - FALSE if to the high end */ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: latch mode */ - btr_pcur_t* pcur, /* in: cursor */ - ibool do_init, /* in: TRUE if should be initialized */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Gets the up_match value for a pcur after a search. */ -UNIV_INLINE -ulint -btr_pcur_get_up_match( -/*==================*/ - /* out: number of matched fields at the cursor - or to the right if search mode was PAGE_CUR_GE, - otherwise undefined */ - btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */ -/****************************************************************** -Gets the low_match value for a pcur after a search. */ -UNIV_INLINE -ulint -btr_pcur_get_low_match( -/*===================*/ - /* out: number of matched fields at the cursor - or to the right if search mode was PAGE_CUR_LE, - otherwise undefined */ - btr_pcur_t* cursor); /* in: memory buffer for persistent cursor */ -/****************************************************************** -If mode is PAGE_CUR_G or PAGE_CUR_GE, opens a persistent cursor on the first -user record satisfying the search condition, in the case PAGE_CUR_L or -PAGE_CUR_LE, on the last user record. If no such user record exists, then -in the first case sets the cursor after last in tree, and in the latter case -before first in tree. The latching mode must be BTR_SEARCH_LEAF or -BTR_MODIFY_LEAF. */ - -void -btr_pcur_open_on_user_rec( -/*======================*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ... */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF or - BTR_MODIFY_LEAF */ - btr_pcur_t* cursor, /* in: memory buffer for persistent - cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INLINE -void -btr_pcur_open_at_rnd_pos( -/*=====================*/ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in/out: B-tree pcur */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Frees the possible old_rec_buf buffer of a persistent cursor and sets the -latch mode of the persistent cursor to BTR_NO_LATCHES. */ -UNIV_INLINE -void -btr_pcur_close( -/*===========*/ - btr_pcur_t* cursor); /* in: persistent cursor */ -/****************************************************************** -The position of the cursor is stored by taking an initial segment of the -record the cursor is positioned on, before, or after, and copying it to the -cursor data structure, or just setting a flag if the cursor id before the -first in an EMPTY tree, or after the last in an EMPTY tree. NOTE that the -page where the cursor is positioned must not be empty if the index tree is -not totally empty! */ - -void -btr_pcur_store_position( -/*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Restores the stored position of a persistent cursor bufferfixing the page and -obtaining the specified latches. If the cursor position was saved when the -(1) cursor was positioned on a user record: this function restores the position -to the last record LESS OR EQUAL to the stored record; -(2) cursor was positioned on a page infimum record: restores the position to -the last record LESS than the user record which was the successor of the page -infimum; -(3) cursor was positioned on the page supremum: restores to the first record -GREATER than the user record which was the predecessor of the supremum. -(4) cursor was positioned before the first or after the last in an empty tree: -restores to before first or after the last in the tree. */ - -ibool -btr_pcur_restore_position( -/*======================*/ - /* out: TRUE if the cursor position - was stored when it was on a user record - and it can be restored on a user record - whose ordering fields are identical to - the ones of the original user record */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in: detached persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/****************************************************************** -If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY, -releases the page latch and bufferfix reserved by the cursor. -NOTE! In the case of BTR_LEAF_MODIFY, there should not exist changes -made by the current mini-transaction to the data protected by the -cursor latch, as then the latch must not be released until mtr_commit. */ - -void -btr_pcur_release_leaf( -/*==================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Gets the rel_pos field for a cursor whose position has been stored. */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - /* out: BTR_PCUR_ON, ... */ - btr_pcur_t* cursor);/* in: persistent cursor */ -/************************************************************* -Sets the mtr field for a pcur. */ -UNIV_INLINE -void -btr_pcur_set_mtr( -/*=============*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in, own: mtr */ -/************************************************************* -Gets the mtr field for a pcur. */ -UNIV_INLINE -mtr_t* -btr_pcur_get_mtr( -/*=============*/ - /* out: mtr */ - btr_pcur_t* cursor); /* in: persistent cursor */ -/****************************************************************** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, -that is, the cursor becomes detached. If there have been modifications -to the page where pcur is positioned, this can be used instead of -btr_pcur_release_leaf. Function btr_pcur_store_position should be used -before calling this, if restoration of cursor is wanted later. */ -UNIV_INLINE -void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur); /* in: persistent cursor */ -/****************************************************************** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void -btr_pcur_commit_specify_mtr( -/*========================*/ - btr_pcur_t* pcur, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr to commit */ -/****************************************************************** -Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */ -UNIV_INLINE -ibool -btr_pcur_is_detached( -/*=================*/ - /* out: TRUE if detached */ - btr_pcur_t* pcur); /* in: persistent cursor */ -/************************************************************* -Moves the persistent cursor to the next record in the tree. If no records are -left, the cursor stays 'after last in tree'. */ -UNIV_INLINE -ibool -btr_pcur_move_to_next( -/*==================*/ - /* out: TRUE if the cursor was not after last - in tree */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the previous record in the tree. If no records -are left, the cursor stays 'before first in tree'. */ - -ibool -btr_pcur_move_to_prev( -/*==================*/ - /* out: TRUE if the cursor was not before first - in tree */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the last record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_last_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the next user record in the tree. If no user -records are left, the cursor ends up 'after last in tree'. */ -UNIV_INLINE -ibool -btr_pcur_move_to_next_user_rec( -/*===========================*/ - /* out: TRUE if the cursor moved forward, - ending on a user record */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the first record on the next page. -Releases the latch on the current page, and bufferunfixes it. -Note that there must not be modifications on the current page, -as then the x-latch can be released only in mtr_commit. */ - -void -btr_pcur_move_to_next_page( -/*=======================*/ - btr_pcur_t* cursor, /* in: persistent cursor; must be on the - last record of the current page */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor backward if it is on the first record -of the page. Releases the latch on the current page, and bufferunfixes -it. Note that to prevent a possible deadlock, the operation first -stores the position of the cursor, releases the leaf latch, acquires -necessary latches and restores the cursor position again before returning. -The alphabetical position of the cursor is guaranteed to be sensible -on return, but it may happen that the cursor is not positioned on the -last record of any page, because the structure of the tree may have -changed while the cursor had no latches. */ - -void -btr_pcur_move_backward_from_page( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor, must be on the - first record of the current page */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Returns the btr cursor component of a persistent cursor. */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - /* out: pointer to btr cursor component */ - btr_pcur_t* cursor); /* in: persistent cursor */ -/************************************************************* -Returns the page cursor component of a persistent cursor. */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - /* out: pointer to page cursor component */ - btr_pcur_t* cursor); /* in: persistent cursor */ -/************************************************************* -Returns the page of a persistent cursor. */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - /* out: pointer to the page */ - btr_pcur_t* cursor);/* in: persistent cursor */ -/************************************************************* -Returns the record of a persistent cursor. */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - /* out: pointer to the record */ - btr_pcur_t* cursor);/* in: persistent cursor */ -/************************************************************* -Checks if the persistent cursor is on a user record. */ -UNIV_INLINE -ibool -btr_pcur_is_on_user_rec( -/*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Checks if the persistent cursor is after the last user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_on_page( -/*===========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Checks if the persistent cursor is before the first user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_on_page( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Checks if the persistent cursor is before the first user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_in_tree( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Checks if the persistent cursor is after the last user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_in_tree( -/*===========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the next record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_next_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ -/************************************************************* -Moves the persistent cursor to the previous record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_prev_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr); /* in: mtr */ - - -/* The persistent B-tree cursor structure. This is used mainly for SQL -selects, updates, and deletes. */ - -struct btr_pcur_struct{ - btr_cur_t btr_cur; /* a B-tree cursor */ - ulint latch_mode; /* see TODO note below! - BTR_SEARCH_LEAF, BTR_MODIFY_LEAF, - BTR_MODIFY_TREE, or BTR_NO_LATCHES, - depending on the latching state of - the page and tree where the cursor is - positioned; the last value means that - the cursor is not currently positioned: - we say then that the cursor is - detached; it can be restored to - attached if the old position was - stored in old_rec */ - ulint old_stored; /* BTR_PCUR_OLD_STORED - or BTR_PCUR_OLD_NOT_STORED */ - rec_t* old_rec; /* if cursor position is stored, - contains an initial segment of the - latest record cursor was positioned - either on, before, or after */ - ulint old_n_fields; /* number of fields in old_rec */ - ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or - BTR_PCUR_AFTER, depending on whether - cursor was on, before, or after the - old_rec record */ - buf_block_t* block_when_stored;/* buffer block when the position was - stored; note that if AWE is on, frames - may move */ - dulint modify_clock; /* the modify clock value of the - buffer block when the cursor position - was stored */ - ulint pos_state; /* see TODO note below! - BTR_PCUR_IS_POSITIONED, - BTR_PCUR_WAS_POSITIONED, - BTR_PCUR_NOT_POSITIONED */ - ulint search_mode; /* PAGE_CUR_G, ... */ - trx_t* trx_if_known; /* the transaction, if we know it; - otherwise this field is not defined; - can ONLY BE USED in error prints in - fatal assertion failures! */ - /*-----------------------------*/ - /* NOTE that the following fields may possess dynamically allocated - memory which should be freed if not needed anymore! */ - - mtr_t* mtr; /* NULL, or this field may contain - a mini-transaction which holds the - latch on the cursor page */ - byte* old_rec_buf; /* NULL, or a dynamically allocated - buffer for old_rec */ - ulint buf_size; /* old_rec_buf size if old_rec_buf - is not NULL */ -}; - -#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state - can be BTR_PCUR_IS_POSITIONED, - though it really should be - BTR_PCUR_WAS_POSITIONED, - because we have no obligation - to commit the cursor with - mtr; similarly latch_mode may - be out of date. This can - lead to problems if btr_pcur - is not used the right way; - all current code should be - ok. */ -#define BTR_PCUR_WAS_POSITIONED 1187549791 -#define BTR_PCUR_NOT_POSITIONED 1328997689 - -#define BTR_PCUR_OLD_STORED 908467085 -#define BTR_PCUR_OLD_NOT_STORED 122766467 - -#ifndef UNIV_NONINL -#include "btr0pcur.ic" -#endif - -#endif diff --git a/storage/innobase/include/btr0pcur.ic b/storage/innobase/include/btr0pcur.ic deleted file mode 100644 index 66462530716..00000000000 --- a/storage/innobase/include/btr0pcur.ic +++ /dev/null @@ -1,630 +0,0 @@ -/****************************************************** -The index tree persistent cursor - -(c) 1996 Innobase Oy - -Created 2/23/1996 Heikki Tuuri -*******************************************************/ - - -/************************************************************* -Gets the rel_pos field for a cursor whose position has been stored. */ -UNIV_INLINE -ulint -btr_pcur_get_rel_pos( -/*=================*/ - /* out: BTR_PCUR_ON, ... */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - ut_ad(cursor); - ut_ad(cursor->old_rec); - ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); - ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED - || cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(cursor->rel_pos); -} - -/************************************************************* -Sets the mtr field for a pcur. */ -UNIV_INLINE -void -btr_pcur_set_mtr( -/*=============*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in, own: mtr */ -{ - ut_ad(cursor); - - cursor->mtr = mtr; -} - -/************************************************************* -Gets the mtr field for a pcur. */ -UNIV_INLINE -mtr_t* -btr_pcur_get_mtr( -/*=============*/ - /* out: mtr */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - ut_ad(cursor); - - return(cursor->mtr); -} - -/************************************************************* -Returns the btr cursor component of a persistent cursor. */ -UNIV_INLINE -btr_cur_t* -btr_pcur_get_btr_cur( -/*=================*/ - /* out: pointer to btr cursor component */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - return(&(cursor->btr_cur)); -} - -/************************************************************* -Returns the page cursor component of a persistent cursor. */ -UNIV_INLINE -page_cur_t* -btr_pcur_get_page_cur( -/*==================*/ - /* out: pointer to page cursor component */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - return(btr_cur_get_page_cur(&(cursor->btr_cur))); -} - -/************************************************************* -Returns the page of a persistent cursor. */ -UNIV_INLINE -page_t* -btr_pcur_get_page( -/*==============*/ - /* out: pointer to the page */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - - return(page_cur_get_page(btr_pcur_get_page_cur(cursor))); -} - -/************************************************************* -Returns the record of a persistent cursor. */ -UNIV_INLINE -rec_t* -btr_pcur_get_rec( -/*=============*/ - /* out: pointer to the record */ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(page_cur_get_rec(btr_pcur_get_page_cur(cursor))); -} - -/****************************************************************** -Gets the up_match value for a pcur after a search. */ -UNIV_INLINE -ulint -btr_pcur_get_up_match( -/*==================*/ - /* out: number of matched fields at the cursor - or to the right if search mode was PAGE_CUR_GE, - otherwise undefined */ - btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */ -{ - btr_cur_t* btr_cursor; - - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - ut_ad(btr_cursor->up_match != ULINT_UNDEFINED); - - return(btr_cursor->up_match); -} - -/****************************************************************** -Gets the low_match value for a pcur after a search. */ -UNIV_INLINE -ulint -btr_pcur_get_low_match( -/*===================*/ - /* out: number of matched fields at the cursor - or to the right if search mode was PAGE_CUR_LE, - otherwise undefined */ - btr_pcur_t* cursor) /* in: memory buffer for persistent cursor */ -{ - btr_cur_t* btr_cursor; - - ut_ad((cursor->pos_state == BTR_PCUR_WAS_POSITIONED) - || (cursor->pos_state == BTR_PCUR_IS_POSITIONED)); - - btr_cursor = btr_pcur_get_btr_cur(cursor); - ut_ad(btr_cursor->low_match != ULINT_UNDEFINED); - - return(btr_cursor->low_match); -} - -/************************************************************* -Checks if the persistent cursor is after the last user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_on_page( -/*===========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); -} - -/************************************************************* -Checks if the persistent cursor is before the first user record on -a page. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_on_page( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); -} - -/************************************************************* -Checks if the persistent cursor is on a user record. */ -UNIV_INLINE -ibool -btr_pcur_is_on_user_rec( -/*====================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if ((btr_pcur_is_before_first_on_page(cursor, mtr)) - || (btr_pcur_is_after_last_on_page(cursor, mtr))) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************* -Checks if the persistent cursor is before the first user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_before_first_in_tree( -/*=============================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_page_get_prev(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_before_first(btr_pcur_get_page_cur(cursor))); -} - -/************************************************************* -Checks if the persistent cursor is after the last user record in -the index tree. */ -UNIV_INLINE -ibool -btr_pcur_is_after_last_in_tree( -/*===========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - if (btr_page_get_next(btr_pcur_get_page(cursor), mtr) != FIL_NULL) { - - return(FALSE); - } - - return(page_cur_is_after_last(btr_pcur_get_page_cur(cursor))); -} - -/************************************************************* -Moves the persistent cursor to the next record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_next_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_move_to_next(btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/************************************************************* -Moves the persistent cursor to the previous record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_prev_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_move_to_prev(btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/************************************************************* -Moves the persistent cursor to the last record on the same page. */ -UNIV_INLINE -void -btr_pcur_move_to_last_on_page( -/*==========================*/ - btr_pcur_t* cursor, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - UT_NOT_USED(mtr); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - page_cur_set_after_last(buf_frame_align(btr_pcur_get_rec(cursor)), - btr_pcur_get_page_cur(cursor)); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -} - -/************************************************************* -Moves the persistent cursor to the next user record in the tree. If no user -records are left, the cursor ends up 'after last in tree'. */ -UNIV_INLINE -ibool -btr_pcur_move_to_next_user_rec( -/*===========================*/ - /* out: TRUE if the cursor moved forward, - ending on a user record */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; -loop: - if (btr_pcur_is_after_last_on_page(cursor, mtr)) { - - if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_to_next_page(cursor, mtr); - } else { - btr_pcur_move_to_next_on_page(cursor, mtr); - } - - if (btr_pcur_is_on_user_rec(cursor, mtr)) { - - return(TRUE); - } - - goto loop; -} - -/************************************************************* -Moves the persistent cursor to the next record in the tree. If no records are -left, the cursor stays 'after last in tree'. */ -UNIV_INLINE -ibool -btr_pcur_move_to_next( -/*==================*/ - /* out: TRUE if the cursor was not after last - in tree */ - btr_pcur_t* cursor, /* in: persistent cursor; NOTE that the - function may release the page latch */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); - ut_ad(cursor->latch_mode != BTR_NO_LATCHES); - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - if (btr_pcur_is_after_last_on_page(cursor, mtr)) { - - if (btr_pcur_is_after_last_in_tree(cursor, mtr)) { - - return(FALSE); - } - - btr_pcur_move_to_next_page(cursor, mtr); - - return(TRUE); - } - - btr_pcur_move_to_next_on_page(cursor, mtr); - - return(TRUE); -} - -/****************************************************************** -Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES, -that is, the cursor becomes detached. If there have been modifications -to the page where pcur is positioned, this can be used instead of -btr_pcur_release_leaf. Function btr_pcur_store_position should be used -before calling this, if restoration of cursor is wanted later. */ -UNIV_INLINE -void -btr_pcur_commit( -/*============*/ - btr_pcur_t* pcur) /* in: persistent cursor */ -{ - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - mtr_commit(pcur->mtr); - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/****************************************************************** -Differs from btr_pcur_commit in that we can specify the mtr to commit. */ -UNIV_INLINE -void -btr_pcur_commit_specify_mtr( -/*========================*/ - btr_pcur_t* pcur, /* in: persistent cursor */ - mtr_t* mtr) /* in: mtr to commit */ -{ - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - mtr_commit(mtr); - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/****************************************************************** -Sets the pcur latch mode to BTR_NO_LATCHES. */ -UNIV_INLINE -void -btr_pcur_detach( -/*============*/ - btr_pcur_t* pcur) /* in: persistent cursor */ -{ - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); - - pcur->latch_mode = BTR_NO_LATCHES; - - pcur->pos_state = BTR_PCUR_WAS_POSITIONED; -} - -/****************************************************************** -Tests if a cursor is detached: that is the latch mode is BTR_NO_LATCHES. */ -UNIV_INLINE -ibool -btr_pcur_is_detached( -/*=================*/ - /* out: TRUE if detached */ - btr_pcur_t* pcur) /* in: persistent cursor */ -{ - if (pcur->latch_mode == BTR_NO_LATCHES) { - - return(TRUE); - } - - return(FALSE); -} - -/****************************************************************** -Sets the old_rec_buf field to NULL. */ -UNIV_INLINE -void -btr_pcur_init( -/*==========*/ - btr_pcur_t* pcur) /* in: persistent cursor */ -{ - pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; - pcur->old_rec_buf = NULL; - pcur->old_rec = NULL; -} - -/****************************************************************** -Initializes and opens a persistent cursor to an index tree. It should be -closed with btr_pcur_close. */ -UNIV_INLINE -void -btr_pcur_open( -/*==========*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page from the - record! */ - ulint latch_mode,/* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ - mtr_t* mtr) /* in: mtr */ -{ - btr_cur_t* btr_cursor; - - /* Initialize the cursor */ - - btr_pcur_init(cursor); - - cursor->latch_mode = latch_mode; - cursor->search_mode = mode; - - /* Search with the tree cursor */ - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, 0, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->trx_if_known = NULL; -} - -/****************************************************************** -Opens an persistent cursor to an index tree without initializing the -cursor. */ -UNIV_INLINE -void -btr_pcur_open_with_no_init( -/*=======================*/ - dict_index_t* index, /* in: index */ - dtuple_t* tuple, /* in: tuple on which search done */ - ulint mode, /* in: PAGE_CUR_L, ...; - NOTE that if the search is made using a unique - prefix of a record, mode should be - PAGE_CUR_LE, not PAGE_CUR_GE, as the latter - may end up on the previous page of the - record! */ - ulint latch_mode,/* in: BTR_SEARCH_LEAF, ...; - NOTE that if has_search_latch != 0 then - we maybe do not acquire a latch on the cursor - page, but assume that the caller uses his - btr search latch to protect the record! */ - btr_pcur_t* cursor, /* in: memory buffer for persistent cursor */ - ulint has_search_latch,/* in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, or 0 */ - mtr_t* mtr) /* in: mtr */ -{ - btr_cur_t* btr_cursor; - - cursor->latch_mode = latch_mode; - cursor->search_mode = mode; - - /* Search with the tree cursor */ - - btr_cursor = btr_pcur_get_btr_cur(cursor); - - btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode, - btr_cursor, has_search_latch, mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->trx_if_known = NULL; -} - -/********************************************************************* -Opens a persistent cursor at either end of an index. */ -UNIV_INLINE -void -btr_pcur_open_at_index_side( -/*========================*/ - ibool from_left, /* in: TRUE if open to the low end, - FALSE if to the high end */ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: latch mode */ - btr_pcur_t* pcur, /* in: cursor */ - ibool do_init, /* in: TRUE if should be initialized */ - mtr_t* mtr) /* in: mtr */ -{ - pcur->latch_mode = latch_mode; - - if (from_left) { - pcur->search_mode = PAGE_CUR_G; - } else { - pcur->search_mode = PAGE_CUR_L; - } - - if (do_init) { - btr_pcur_init(pcur); - } - - btr_cur_open_at_index_side(from_left, index, latch_mode, - btr_pcur_get_btr_cur(pcur), mtr); - pcur->pos_state = BTR_PCUR_IS_POSITIONED; - - pcur->old_stored = BTR_PCUR_OLD_NOT_STORED; - - pcur->trx_if_known = NULL; -} - -/************************************************************************** -Positions a cursor at a randomly chosen position within a B-tree. */ -UNIV_INLINE -void -btr_pcur_open_at_rnd_pos( -/*=====================*/ - dict_index_t* index, /* in: index */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_pcur_t* cursor, /* in/out: B-tree pcur */ - mtr_t* mtr) /* in: mtr */ -{ - /* Initialize the cursor */ - - cursor->latch_mode = latch_mode; - cursor->search_mode = PAGE_CUR_G; - - btr_pcur_init(cursor); - - btr_cur_open_at_rnd_pos(index, latch_mode, - btr_pcur_get_btr_cur(cursor), mtr); - cursor->pos_state = BTR_PCUR_IS_POSITIONED; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->trx_if_known = NULL; -} - -/****************************************************************** -Frees the possible memory heap of a persistent cursor and sets the latch -mode of the persistent cursor to BTR_NO_LATCHES. */ -UNIV_INLINE -void -btr_pcur_close( -/*===========*/ - btr_pcur_t* cursor) /* in: persistent cursor */ -{ - if (cursor->old_rec_buf != NULL) { - - mem_free(cursor->old_rec_buf); - - cursor->old_rec = NULL; - cursor->old_rec_buf = NULL; - } - - cursor->btr_cur.page_cur.rec = NULL; - cursor->old_rec = NULL; - cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; - - cursor->latch_mode = BTR_NO_LATCHES; - cursor->pos_state = BTR_PCUR_NOT_POSITIONED; - - cursor->trx_if_known = NULL; -} diff --git a/storage/innobase/include/btr0sea.h b/storage/innobase/include/btr0sea.h deleted file mode 100644 index 6d1c2bb86d3..00000000000 --- a/storage/innobase/include/btr0sea.h +++ /dev/null @@ -1,255 +0,0 @@ -/************************************************************************ -The index tree adaptive search - -(c) 1996 Innobase Oy - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#ifndef btr0sea_h -#define btr0sea_h - -#include "univ.i" - -#include "rem0rec.h" -#include "dict0dict.h" -#include "btr0types.h" -#include "mtr0mtr.h" -#include "ha0ha.h" - -/********************************************************************* -Creates and initializes the adaptive search system at a database start. */ - -void -btr_search_sys_create( -/*==================*/ - ulint hash_size); /* in: hash index hash table size */ -/************************************************************************ -Returns search info for an index. */ -UNIV_INLINE -btr_search_t* -btr_search_get_info( -/*================*/ - /* out: search info; search mutex reserved */ - dict_index_t* index); /* in: index */ -/********************************************************************* -Creates and initializes a search info struct. */ - -btr_search_t* -btr_search_info_create( -/*===================*/ - /* out, own: search info struct */ - mem_heap_t* heap); /* in: heap where created */ -/********************************************************************* -Returns the value of ref_count. The value is protected by -btr_search_latch. */ -ulint -btr_search_info_get_ref_count( -/*==========================*/ - /* out: ref_count value. */ - btr_search_t* info); /* in: search info. */ -/************************************************************************* -Updates the search info. */ -UNIV_INLINE -void -btr_search_info_update( -/*===================*/ - dict_index_t* index, /* in: index of the cursor */ - btr_cur_t* cursor);/* in: cursor which was just positioned */ -/********************************************************************** -Tries to guess the right search position based on the hash search info -of the index. Note that if mode is PAGE_CUR_LE, which is used in inserts, -and the function returns TRUE, then cursor->up_match and cursor->low_match -both have sensible values. */ - -ibool -btr_search_guess_on_hash( -/*=====================*/ - /* out: TRUE if succeeded */ - dict_index_t* index, /* in: index */ - btr_search_t* info, /* in: index search info */ - dtuple_t* tuple, /* in: logical record */ - ulint mode, /* in: PAGE_CUR_L, ... */ - ulint latch_mode, /* in: BTR_SEARCH_LEAF, ... */ - btr_cur_t* cursor, /* out: tree cursor */ - ulint has_search_latch,/* in: latch mode the caller - currently has on btr_search_latch: - RW_S_LATCH, RW_X_LATCH, or 0 */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -Moves or deletes hash entries for moved records. If new_page is already hashed, -then the hash index for page, if any, is dropped. If new_page is not hashed, -and page is hashed, then a new hash index is built to new_page with the same -parameters as page (this often happens when a page is split). */ - -void -btr_search_move_or_delete_hash_entries( -/*===================================*/ - page_t* new_page, /* in: records are copied - to this page */ - page_t* page, /* in: index page */ - dict_index_t* index); /* in: record descriptor */ -/************************************************************************ -Drops a page hash index. */ - -void -btr_search_drop_page_hash_index( -/*============================*/ - page_t* page); /* in: index page, s- or x-latched */ -/************************************************************************ -Drops a page hash index when a page is freed from a fseg to the file system. -Drops possible hash index if the page happens to be in the buffer pool. */ - -void -btr_search_drop_page_hash_when_freed( -/*=================================*/ - ulint space, /* in: space id */ - ulint page_no); /* in: page number */ -/************************************************************************ -Updates the page hash index when a single record is inserted on a page. */ - -void -btr_search_update_hash_node_on_insert( -/*==================================*/ - btr_cur_t* cursor);/* in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -/************************************************************************ -Updates the page hash index when a single record is inserted on a page. */ - -void -btr_search_update_hash_on_insert( -/*=============================*/ - btr_cur_t* cursor);/* in: cursor which was positioned to the - place to insert using btr_cur_search_..., - and the new record has been inserted next - to the cursor */ -/************************************************************************ -Updates the page hash index when a single record is deleted from a page. */ - -void -btr_search_update_hash_on_delete( -/*=============================*/ - btr_cur_t* cursor);/* in: cursor which was positioned on the - record to delete using btr_cur_search_..., - the record is not yet deleted */ -/************************************************************************ -Validates the search system. */ - -ibool -btr_search_validate(void); -/*======================*/ - /* out: TRUE if ok */ - -/* The search info struct in an index */ - -struct btr_search_struct{ - ulint ref_count; /* Number of blocks in this index tree - that have search index built - i.e. block->index points to this index. - Protected by btr_search_latch except - when during initialization in - btr_search_info_create(). */ - - /* The following fields are not protected by any latch. - Unfortunately, this means that they must be aligned to - the machine word, i.e., they cannot be turned into bit-fields. */ - page_t* root_guess; /* the root page frame when it was last time - fetched, or NULL */ - ulint hash_analysis; /* when this exceeds BTR_SEARCH_HASH_ANALYSIS, - the hash analysis starts; this is reset if no - success noticed */ - ibool last_hash_succ; /* TRUE if the last search would have - succeeded, or did succeed, using the hash - index; NOTE that the value here is not exact: - it is not calculated for every search, and the - calculation itself is not always accurate! */ - ulint n_hash_potential; - /* number of consecutive searches - which would have succeeded, or did succeed, - using the hash index; - the range is 0 .. BTR_SEARCH_BUILD_LIMIT + 5 */ - /*----------------------*/ - ulint n_fields; /* recommended prefix length for hash search: - number of full fields */ - ulint n_bytes; /* recommended prefix: number of bytes in - an incomplete field; - see also BTR_PAGE_MAX_REC_SIZE */ - ibool left_side; /* TRUE or FALSE, depending on whether - the leftmost record of several records with - the same prefix should be indexed in the - hash index */ - /*----------------------*/ -#ifdef UNIV_SEARCH_PERF_STAT - ulint n_hash_succ; /* number of successful hash searches thus - far */ - ulint n_hash_fail; /* number of failed hash searches */ - ulint n_patt_succ; /* number of successful pattern searches thus - far */ - ulint n_searches; /* number of searches */ -#endif /* UNIV_SEARCH_PERF_STAT */ -#ifdef UNIV_DEBUG - ulint magic_n; /* magic number */ -# define BTR_SEARCH_MAGIC_N 1112765 -#endif /* UNIV_DEBUG */ -}; - -/* The hash index system */ - -typedef struct btr_search_sys_struct btr_search_sys_t; - -struct btr_search_sys_struct{ - hash_table_t* hash_index; -}; - -extern btr_search_sys_t* btr_search_sys; - -/* The latch protecting the adaptive search system: this latch protects the -(1) hash index; -(2) columns of a record to which we have a pointer in the hash index; - -but does NOT protect: - -(3) next record offset field in a record; -(4) next or previous records on the same page. - -Bear in mind (3) and (4) when using the hash index. -*/ - -extern rw_lock_t* btr_search_latch_temp; - -#define btr_search_latch (*btr_search_latch_temp) - -#ifdef UNIV_SEARCH_PERF_STAT -extern ulint btr_search_n_succ; -extern ulint btr_search_n_hash_fail; -#endif /* UNIV_SEARCH_PERF_STAT */ - -/* After change in n_fields or n_bytes in info, this many rounds are waited -before starting the hash analysis again: this is to save CPU time when there -is no hope in building a hash index. */ - -#define BTR_SEARCH_HASH_ANALYSIS 17 - -/* Limit of consecutive searches for trying a search shortcut on the search -pattern */ - -#define BTR_SEARCH_ON_PATTERN_LIMIT 3 - -/* Limit of consecutive searches for trying a search shortcut using the hash -index */ - -#define BTR_SEARCH_ON_HASH_LIMIT 3 - -/* We do this many searches before trying to keep the search latch over calls -from MySQL. If we notice someone waiting for the latch, we again set this -much timeout. This is to reduce contention. */ - -#define BTR_SEA_TIMEOUT 10000 - -#ifndef UNIV_NONINL -#include "btr0sea.ic" -#endif - -#endif diff --git a/storage/innobase/include/btr0sea.ic b/storage/innobase/include/btr0sea.ic deleted file mode 100644 index f4e33027c25..00000000000 --- a/storage/innobase/include/btr0sea.ic +++ /dev/null @@ -1,67 +0,0 @@ -/************************************************************************ -The index tree adaptive search - -(c) 1996 Innobase Oy - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#include "dict0mem.h" -#include "btr0cur.h" -#include "buf0buf.h" - -/************************************************************************* -Updates the search info. */ - -void -btr_search_info_update_slow( -/*========================*/ - btr_search_t* info, /* in/out: search info */ - btr_cur_t* cursor);/* in: cursor which was just positioned */ - -/************************************************************************ -Returns search info for an index. */ -UNIV_INLINE -btr_search_t* -btr_search_get_info( -/*================*/ - /* out: search info; search mutex reserved */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - - return(index->search_info); -} - -/************************************************************************* -Updates the search info. */ -UNIV_INLINE -void -btr_search_info_update( -/*===================*/ - dict_index_t* index, /* in: index of the cursor */ - btr_cur_t* cursor) /* in: cursor which was just positioned */ -{ - btr_search_t* info; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); - ut_ad(!rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - info = btr_search_get_info(index); - - info->hash_analysis++; - - if (info->hash_analysis < BTR_SEARCH_HASH_ANALYSIS) { - - /* Do nothing */ - - return; - - } - - ut_ad(cursor->flag != BTR_CUR_HASH); - - btr_search_info_update_slow(info, cursor); -} diff --git a/storage/innobase/include/btr0types.h b/storage/innobase/include/btr0types.h deleted file mode 100644 index 8fa0bf0602d..00000000000 --- a/storage/innobase/include/btr0types.h +++ /dev/null @@ -1,21 +0,0 @@ -/************************************************************************ -The index tree general types - -(c) 1996 Innobase Oy - -Created 2/17/1996 Heikki Tuuri -*************************************************************************/ - -#ifndef btr0types_h -#define btr0types_h - -#include "univ.i" - -#include "rem0types.h" -#include "page0types.h" - -typedef struct btr_pcur_struct btr_pcur_t; -typedef struct btr_cur_struct btr_cur_t; -typedef struct btr_search_struct btr_search_t; - -#endif diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h deleted file mode 100644 index 3e8972d9182..00000000000 --- a/storage/innobase/include/buf0buf.h +++ /dev/null @@ -1,1074 +0,0 @@ -/* Innobase relational database engine; Copyright (C) 2001 Innobase Oy - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License 2 - as published by the Free Software Foundation in June 1991. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License 2 - along with this program (in file COPYING); if not, write to the Free - Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -/****************************************************** -The database buffer pool high-level routines - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0buf_h -#define buf0buf_h - -#include "univ.i" -#include "fil0fil.h" -#include "mtr0types.h" -#include "buf0types.h" -#include "sync0rw.h" -#include "hash0hash.h" -#include "ut0byte.h" -#include "os0proc.h" - -/* Flags for flush types */ -#define BUF_FLUSH_LRU 1 -#define BUF_FLUSH_SINGLE_PAGE 2 -#define BUF_FLUSH_LIST 3 /* An array in the pool struct - has size BUF_FLUSH_LIST + 1: if you - add more flush types, put them in - the middle! */ -/* Modes for buf_page_get_gen */ -#define BUF_GET 10 /* get always */ -#define BUF_GET_IF_IN_POOL 11 /* get if in pool */ -#define BUF_GET_NOWAIT 12 /* get if can set the latch without - waiting */ -#define BUF_GET_NO_LATCH 14 /* get and bufferfix, but set no latch; - we have separated this case, because - it is error-prone programming not to - set a latch, and it should be used - with care */ -/* Modes for buf_page_get_known_nowait */ -#define BUF_MAKE_YOUNG 51 -#define BUF_KEEP_OLD 52 -/* Magic value to use instead of checksums when they are disabled */ -#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL - -extern buf_pool_t* buf_pool; /* The buffer pool of the database */ -#ifdef UNIV_DEBUG -extern ibool buf_debug_prints;/* If this is set TRUE, the program - prints info whenever read or flush - occurs */ -#endif /* UNIV_DEBUG */ -extern ulint srv_buf_pool_write_requests; /* variable to count write request - issued */ - -/************************************************************************ -Creates the buffer pool. */ - -buf_pool_t* -buf_pool_init( -/*==========*/ - /* out, own: buf_pool object, NULL if not - enough memory or error */ - ulint max_size, /* in: maximum size of the buf_pool in - blocks */ - ulint curr_size, /* in: current size to use, must be <= - max_size, currently must be equal to - max_size */ - ulint n_frames); /* in: number of frames; if AWE is used, - this is the size of the address space window - where physical memory pages are mapped; if - AWE is not used then this must be the same - as max_size */ -/************************************************************************* -Gets the current size of buffer buf_pool in bytes. In the case of AWE, the -size of AWE window (= the frames). */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void); -/*========================*/ - /* out: size in bytes */ -/************************************************************************* -Gets the maximum size of buffer pool in bytes. In the case of AWE, the -size of AWE window (= the frames). */ -UNIV_INLINE -ulint -buf_pool_get_max_size(void); -/*=======================*/ - /* out: size in bytes */ -/************************************************************************ -Gets the smallest oldest_modification lsn for any page in the pool. Returns -ut_dulint_zero if all modified pages have been flushed to disk. */ -UNIV_INLINE -dulint -buf_pool_get_oldest_modification(void); -/*==================================*/ - /* out: oldest modification in pool, - ut_dulint_zero if none */ -/************************************************************************* -Allocates a buffer frame. */ - -buf_frame_t* -buf_frame_alloc(void); -/*==================*/ - /* out: buffer frame */ -/************************************************************************* -Frees a buffer frame which does not contain a file page. */ - -void -buf_frame_free( -/*===========*/ - buf_frame_t* frame); /* in: buffer frame */ -/************************************************************************* -Copies contents of a buffer frame to a given buffer. */ -UNIV_INLINE -byte* -buf_frame_copy( -/*===========*/ - /* out: buf */ - byte* buf, /* in: buffer to copy to */ - buf_frame_t* frame); /* in: buffer frame */ -/****************************************************************** -NOTE! The following macros should be used instead of buf_page_get_gen, -to improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed -in LA! */ -#define buf_page_get(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET, __FILE__, __LINE__, MTR) -/****************************************************************** -Use these macros to bufferfix a page with no latching. Remember not to -read the contents of the page unless you know it is safe. Do not modify -the contents of the page! We have separated this case, because it is -error-prone programming not to set a latch, and it should be used -with care. */ -#define buf_page_get_with_no_latch(SP, OF, MTR) buf_page_get_gen(\ - SP, OF, RW_NO_LATCH, NULL,\ - BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR) -/****************************************************************** -NOTE! The following macros should be used instead of buf_page_get_gen, to -improve debugging. Only values RW_S_LATCH and RW_X_LATCH are allowed as LA! */ -#define buf_page_get_nowait(SP, OF, LA, MTR) buf_page_get_gen(\ - SP, OF, LA, NULL,\ - BUF_GET_NOWAIT, __FILE__, __LINE__, MTR) -/****************************************************************** -NOTE! The following macros should be used instead of -buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and -RW_X_LATCH are allowed as LA! */ -#define buf_page_optimistic_get(LA, BL, G, MC, MTR) \ - buf_page_optimistic_get_func(LA, BL, G, MC, __FILE__, __LINE__, MTR) -/************************************************************************ -This is the general function used to get optimistic access to a database -page. */ - -ibool -buf_page_optimistic_get_func( -/*=========================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_block_t* block, /* in: guessed block */ - buf_frame_t* guess, /* in: guessed frame; note that AWE may move - frames */ - dulint modify_clock,/* in: modify clock value if mode is - ..._GUESS_ON_CLOCK */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr); /* in: mini-transaction */ -/************************************************************************ -Tries to get the page, but if file io is required, releases all latches -in mtr down to the given savepoint. If io is required, this function -retrieves the page to buffer buf_pool, but does not bufferfix it or latch -it. */ -UNIV_INLINE -buf_frame_t* -buf_page_get_release_on_io( -/*=======================*/ - /* out: pointer to the frame, or NULL - if not in buffer buf_pool */ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - buf_frame_t* guess, /* in: guessed frame or NULL */ - ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH, - or RW_NO_LATCH */ - ulint savepoint, /* in: mtr savepoint */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -This is used to get access to a known database page, when no waiting can be -done. */ - -ibool -buf_page_get_known_nowait( -/*======================*/ - /* out: TRUE if success */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH */ - buf_frame_t* guess, /* in: the known page frame */ - ulint mode, /* in: BUF_MAKE_YOUNG or BUF_KEEP_OLD */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr); /* in: mini-transaction */ -/************************************************************************ -This is the general function used to get access to a database page. */ - -buf_frame_t* -buf_page_get_gen( -/*=============*/ - /* out: pointer to the frame or NULL */ - ulint space, /* in: space id */ - ulint offset, /* in: page number */ - ulint rw_latch,/* in: RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ - buf_frame_t* guess, /* in: guessed frame or NULL */ - ulint mode, /* in: BUF_GET, BUF_GET_IF_IN_POOL, - BUF_GET_NO_LATCH */ - const char* file, /* in: file name */ - ulint line, /* in: line where called */ - mtr_t* mtr); /* in: mini-transaction */ -/************************************************************************ -Initializes a page to the buffer buf_pool. The page is usually not read -from a file even if it cannot be found in the buffer buf_pool. This is one -of the functions which perform to a block a state transition NOT_USED => -FILE_PAGE (the other is buf_page_init_for_read above). */ - -buf_frame_t* -buf_page_create( -/*============*/ - /* out: pointer to the frame, page bufferfixed */ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space in units of - a page */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Inits a page to the buffer buf_pool, for use in ibbackup --restore. */ - -void -buf_page_init_for_backup_restore( -/*=============================*/ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - buf_block_t* block); /* in: block to init */ -/************************************************************************ -Decrements the bufferfix count of a buffer control block and releases -a latch, if specified. */ -UNIV_INLINE -void -buf_page_release( -/*=============*/ - buf_block_t* block, /* in: buffer block */ - ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -Moves a page to the start of the buffer pool LRU list. This high-level -function can be used to prevent an important page from from slipping out of -the buffer pool. */ - -void -buf_page_make_young( -/*================*/ - buf_frame_t* frame); /* in: buffer frame of a file page */ -/************************************************************************ -Returns TRUE if the page can be found in the buffer pool hash table. NOTE -that it is possible that the page is not yet read from disk, though. */ - -ibool -buf_page_peek( -/*==========*/ - /* out: TRUE if found from page hash table, - NOTE that the page is not necessarily yet read - from disk! */ - ulint space, /* in: space id */ - ulint offset);/* in: page number */ -/************************************************************************ -Returns the buffer control block if the page can be found in the buffer -pool. NOTE that it is possible that the page is not yet read -from disk, though. This is a very low-level function: use with care! */ - -buf_block_t* -buf_page_peek_block( -/*================*/ - /* out: control block if found from page hash table, - otherwise NULL; NOTE that the page is not necessarily - yet read from disk! */ - ulint space, /* in: space id */ - ulint offset);/* in: page number */ -/************************************************************************ -Resets the check_index_page_at_flush field of a page if found in the buffer -pool. */ - -void -buf_reset_check_index_page_at_flush( -/*================================*/ - ulint space, /* in: space id */ - ulint offset);/* in: page number */ -/************************************************************************ -Sets file_page_was_freed TRUE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. */ - -buf_block_t* -buf_page_set_file_page_was_freed( -/*=============================*/ - /* out: control block if found from page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset); /* in: page number */ -/************************************************************************ -Sets file_page_was_freed FALSE if the page is found in the buffer pool. -This function should be called when we free a file page and want the -debug version to check that it is not accessed any more unless -reallocated. */ - -buf_block_t* -buf_page_reset_file_page_was_freed( -/*===============================*/ - /* out: control block if found from page hash table, - otherwise NULL */ - ulint space, /* in: space id */ - ulint offset); /* in: page number */ -/************************************************************************ -Recommends a move of a block to the start of the LRU list if there is danger -of dropping from the buffer pool. NOTE: does not reserve the buffer pool -mutex. */ -UNIV_INLINE -ibool -buf_block_peek_if_too_old( -/*======================*/ - /* out: TRUE if should be made younger */ - buf_block_t* block); /* in: block to make younger */ -/************************************************************************ -Returns the current state of is_hashed of a page. FALSE if the page is -not in the pool. NOTE that this operation does not fix the page in the -pool if it is found there. */ - -ibool -buf_page_peek_if_search_hashed( -/*===========================*/ - /* out: TRUE if page hash index is built in search - system */ - ulint space, /* in: space id */ - ulint offset);/* in: page number */ -/************************************************************************ -Gets the youngest modification log sequence number for a frame. -Returns zero if not file page or no modification occurred yet. */ -UNIV_INLINE -dulint -buf_frame_get_newest_modification( -/*==============================*/ - /* out: newest modification to page */ - buf_frame_t* frame); /* in: pointer to a frame */ -/************************************************************************ -Increments the modify clock of a frame by 1. The caller must (1) own the -pool mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -dulint -buf_frame_modify_clock_inc( -/*=======================*/ - /* out: new value */ - buf_frame_t* frame); /* in: pointer to a frame */ -/************************************************************************ -Increments the modify clock of a frame by 1. The caller must (1) own the -buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -dulint -buf_block_modify_clock_inc( -/*=======================*/ - /* out: new value */ - buf_block_t* block); /* in: block */ -/************************************************************************ -Returns the value of the modify clock. The caller must have an s-lock -or x-lock on the block. */ -UNIV_INLINE -dulint -buf_block_get_modify_clock( -/*=======================*/ - /* out: value */ - buf_block_t* block); /* in: block */ -/************************************************************************ -Calculates a page checksum which is stored to the page when it is written -to a file. Note that we must be careful to calculate the same value -on 32-bit and 64-bit architectures. */ - -ulint -buf_calc_page_new_checksum( -/*=======================*/ - /* out: checksum */ - byte* page); /* in: buffer page */ -/************************************************************************ -In versions < 4.0.14 and < 4.1.1 there was a bug that the checksum only -looked at the first few bytes of the page. This calculates that old -checksum. -NOTE: we must first store the new formula checksum to -FIL_PAGE_SPACE_OR_CHKSUM before calculating and storing this old checksum -because this takes that field as an input! */ - -ulint -buf_calc_page_old_checksum( -/*=======================*/ - /* out: checksum */ - byte* page); /* in: buffer page */ -/************************************************************************ -Checks if a page is corrupt. */ - -ibool -buf_page_is_corrupted( -/*==================*/ - /* out: TRUE if corrupted */ - byte* read_buf); /* in: a database page */ -/************************************************************************** -Gets the page number of a pointer pointing within a buffer frame containing -a file page. */ -UNIV_INLINE -ulint -buf_frame_get_page_no( -/*==================*/ - /* out: page number */ - byte* ptr); /* in: pointer to within a buffer frame */ -/************************************************************************** -Gets the space id of a pointer pointing within a buffer frame containing a -file page. */ -UNIV_INLINE -ulint -buf_frame_get_space_id( -/*===================*/ - /* out: space id */ - byte* ptr); /* in: pointer to within a buffer frame */ -/************************************************************************** -Gets the space id, page offset, and byte offset within page of a -pointer pointing to a buffer frame containing a file page. */ -UNIV_INLINE -void -buf_ptr_get_fsp_addr( -/*=================*/ - byte* ptr, /* in: pointer to a buffer frame */ - ulint* space, /* out: space id */ - fil_addr_t* addr); /* out: page offset and byte offset */ -/************************************************************************** -Gets the hash value of the page the pointer is pointing to. This can be used -in searches in the lock hash table. */ -UNIV_INLINE -ulint -buf_frame_get_lock_hash_val( -/*========================*/ - /* out: lock hash value */ - byte* ptr); /* in: pointer to within a buffer frame */ -/************************************************************************** -Gets the mutex number protecting the page record lock hash chain in the lock -table. */ -UNIV_INLINE -mutex_t* -buf_frame_get_mutex( -/*================*/ - /* out: mutex */ - byte* ptr); /* in: pointer to within a buffer frame */ -/*********************************************************************** -Gets the frame the pointer is pointing to. */ -UNIV_INLINE -buf_frame_t* -buf_frame_align( -/*============*/ - /* out: pointer to frame */ - byte* ptr); /* in: pointer to a frame */ -/*********************************************************************** -Checks if a pointer points to the block array of the buffer pool (blocks, not -the frames). */ -UNIV_INLINE -ibool -buf_pool_is_block( -/*==============*/ - /* out: TRUE if pointer to block */ - void* ptr); /* in: pointer to memory */ -#ifdef UNIV_DEBUG -/************************************************************************* -Validates the buffer pool data structure. */ - -ibool -buf_validate(void); -/*==============*/ -/************************************************************************* -Prints info of the buffer pool data structure. */ - -void -buf_print(void); -/*============*/ - -/************************************************************************* -Returns the number of latched pages in the buffer pool. */ - -ulint -buf_get_latched_pages_number(void); -/*==============================*/ -#endif /* UNIV_DEBUG */ - -/************************************************************************ -Prints a page to stderr. */ - -void -buf_page_print( -/*===========*/ - byte* read_buf); /* in: a database page */ - -/************************************************************************* -Returns the number of pending buf pool ios. */ - -ulint -buf_get_n_pending_ios(void); -/*=======================*/ -/************************************************************************* -Prints info of the buffer i/o. */ - -void -buf_print_io( -/*=========*/ - FILE* file); /* in: file where to print */ -/************************************************************************* -Returns the ratio in percents of modified pages in the buffer pool / -database pages in the buffer pool. */ - -ulint -buf_get_modified_ratio_pct(void); -/*============================*/ -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -buf_refresh_io_stats(void); -/*======================*/ -/************************************************************************* -Checks that all file pages in the buffer are in a replaceable state. */ - -ibool -buf_all_freed(void); -/*===============*/ -/************************************************************************* -Checks that there currently are no pending i/o-operations for the buffer -pool. */ - -ibool -buf_pool_check_no_pending_io(void); -/*==============================*/ - /* out: TRUE if there is no pending i/o */ -/************************************************************************* -Invalidates the file pages in the buffer pool when an archive recovery is -completed. All the file pages buffered must be in a replaceable state when -this function is called: not latched and not modified. */ - -void -buf_pool_invalidate(void); -/*=====================*/ - -/*======================================================================== ---------------------------- LOWER LEVEL ROUTINES ------------------------- -=========================================================================*/ - -/************************************************************************ -Maps the page of block to a frame, if not mapped yet. Unmaps some page -from the end of the awe_LRU_free_mapped. */ - -void -buf_awe_map_page_to_frame( -/*======================*/ - buf_block_t* block, /* in: block whose page should be - mapped to a frame */ - ibool add_to_mapped_list);/* in: TRUE if we in the case - we need to map the page should also - add the block to the - awe_LRU_free_mapped list */ -#ifdef UNIV_SYNC_DEBUG -/************************************************************************* -Adds latch level info for the rw-lock protecting the buffer frame. This -should be called in the debug version after a successful latching of a -page if we know the latching order level of the acquired latch. */ -UNIV_INLINE -void -buf_page_dbg_add_level( -/*===================*/ - buf_frame_t* frame, /* in: buffer page where we have acquired - a latch */ - ulint level); /* in: latching order level */ -#endif /* UNIV_SYNC_DEBUG */ -/************************************************************************* -Gets a pointer to the memory frame of a block. */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - /* out: pointer to the frame */ - buf_block_t* block); /* in: pointer to the control block */ -/************************************************************************* -Gets the space id of a block. */ -UNIV_INLINE -ulint -buf_block_get_space( -/*================*/ - /* out: space id */ - buf_block_t* block); /* in: pointer to the control block */ -/************************************************************************* -Gets the page number of a block. */ -UNIV_INLINE -ulint -buf_block_get_page_no( -/*==================*/ - /* out: page number */ - buf_block_t* block); /* in: pointer to the control block */ -/*********************************************************************** -Gets the block to whose frame the pointer is pointing to. */ -UNIV_INLINE -buf_block_t* -buf_block_align( -/*============*/ - /* out: pointer to block */ - byte* ptr); /* in: pointer to a frame */ -/************************************************************************ -This function is used to get info if there is an io operation -going on on a buffer page. */ -UNIV_INLINE -ibool -buf_page_io_query( -/*==============*/ - /* out: TRUE if io going on */ - buf_block_t* block); /* in: pool block, must be bufferfixed */ -/*********************************************************************** -Accessor function for block array. */ -UNIV_INLINE -buf_block_t* -buf_pool_get_nth_block( -/*===================*/ - /* out: pointer to block */ - buf_pool_t* pool, /* in: pool */ - ulint i); /* in: index of the block */ -/************************************************************************ -Function which inits a page for read to the buffer buf_pool. If the page is -(1) already in buf_pool, or -(2) if we specify to read only ibuf pages and the page is not an ibuf page, or -(3) if the space is deleted or being deleted, -then this function does nothing. -Sets the io_fix flag to BUF_IO_READ and sets a non-recursive exclusive lock -on the buffer frame. The io-handler must take care that the flag is cleared -and the lock released later. This is one of the functions which perform the -state transition NOT_USED => FILE_PAGE to a block (the other is -buf_page_create). */ - -buf_block_t* -buf_page_init_for_read( -/*===================*/ - /* out: pointer to the block or NULL */ - ulint* err, /* out: DB_SUCCESS or DB_TABLESPACE_DELETED */ - ulint mode, /* in: BUF_READ_IBUF_PAGES_ONLY, ... */ - ulint space, /* in: space id */ - ib_longlong tablespace_version,/* in: prevents reading from a wrong - version of the tablespace in case we have done - DISCARD + IMPORT */ - ulint offset);/* in: page number */ -/************************************************************************ -Completes an asynchronous read or write request of a file page to or from -the buffer pool. */ - -void -buf_page_io_complete( -/*=================*/ - buf_block_t* block); /* in: pointer to the block in question */ -/************************************************************************ -Calculates a folded value of a file page address to use in the page hash -table. */ -UNIV_INLINE -ulint -buf_page_address_fold( -/*==================*/ - /* out: the folded value */ - ulint space, /* in: space id */ - ulint offset);/* in: offset of the page within space */ -/********************************************************************** -Returns the control block of a file page, NULL if not found. */ -UNIV_INLINE -buf_block_t* -buf_page_hash_get( -/*==============*/ - /* out: block, NULL if not found */ - ulint space, /* in: space id */ - ulint offset);/* in: offset of the page within space */ -/*********************************************************************** -Increments the pool clock by one and returns its new value. Remember that -in the 32 bit version the clock wraps around at 4 billion! */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void); -/*====================*/ - /* out: new clock value */ -/************************************************************************* -Gets the current length of the free list of buffer blocks. */ - -ulint -buf_get_free_list_len(void); -/*=======================*/ - - - -/* The buffer control block structure */ - -struct buf_block_struct{ - - /* 1. General fields */ - - ulint magic_n; /* magic number to check */ - ulint state; /* state of the control block: - BUF_BLOCK_NOT_USED, ...; changing - this is only allowed when a thread - has BOTH the buffer pool mutex AND - block->mutex locked */ - byte* frame; /* pointer to buffer frame which - is of size UNIV_PAGE_SIZE, and - aligned to an address divisible by - UNIV_PAGE_SIZE; if AWE is used, this - will be NULL for the pages which are - currently not mapped into the virtual - address space window of the buffer - pool */ - os_awe_t* awe_info; /* if AWE is used, then an array of - awe page infos for - UNIV_PAGE_SIZE / OS_AWE_X86_PAGE_SIZE - (normally = 4) physical memory - pages; otherwise NULL */ - ulint space; /* space id of the page */ - ulint offset; /* page number within the space */ - ulint lock_hash_val; /* hashed value of the page address - in the record lock hash table */ - mutex_t mutex; /* mutex protecting this block: - state (also protected by the buffer - pool mutex), io_fix, buf_fix_count, - and accessed; we introduce this new - mutex in InnoDB-5.1 to relieve - contention on the buffer pool mutex */ - rw_lock_t lock; /* read-write lock of the buffer - frame */ - buf_block_t* hash; /* node used in chaining to the page - hash table */ - ibool check_index_page_at_flush; - /* TRUE if we know that this is - an index page, and want the database - to check its consistency before flush; - note that there may be pages in the - buffer pool which are index pages, - but this flag is not set because - we do not keep track of all pages */ - /* 2. Page flushing fields */ - - UT_LIST_NODE_T(buf_block_t) flush_list; - /* node of the modified, not yet - flushed blocks list */ - dulint newest_modification; - /* log sequence number of the youngest - modification to this block, zero if - not modified */ - dulint oldest_modification; - /* log sequence number of the START of - the log entry written of the oldest - modification to this block which has - not yet been flushed on disk; zero if - all modifications are on disk */ - ulint flush_type; /* if this block is currently being - flushed to disk, this tells the - flush_type: BUF_FLUSH_LRU or - BUF_FLUSH_LIST */ - - /* 3. LRU replacement algorithm fields */ - - UT_LIST_NODE_T(buf_block_t) free; - /* node of the free block list */ - ibool in_free_list; /* TRUE if in the free list; used in - debugging */ - UT_LIST_NODE_T(buf_block_t) LRU; - /* node of the LRU list */ - UT_LIST_NODE_T(buf_block_t) awe_LRU_free_mapped; - /* in the AWE version node in the - list of free and LRU blocks which are - mapped to a frame */ - ibool in_LRU_list; /* TRUE of the page is in the LRU list; - used in debugging */ - ulint LRU_position; /* value which monotonically - decreases (or may stay constant if - the block is in the old blocks) toward - the end of the LRU list, if the pool - ulint_clock has not wrapped around: - NOTE that this value can only be used - in heuristic algorithms, because of - the possibility of a wrap-around! */ - ulint freed_page_clock;/* the value of freed_page_clock - of the buffer pool when this block was - the last time put to the head of the - LRU list; a thread is allowed to - read this for heuristic purposes - without holding any mutex or latch */ - ibool old; /* TRUE if the block is in the old - blocks in the LRU list */ - ibool accessed; /* TRUE if the page has been accessed - while in the buffer pool: read-ahead - may read in pages which have not been - accessed yet; this is protected by - block->mutex; a thread is allowed to - read this for heuristic purposes - without holding any mutex or latch */ - ulint buf_fix_count; /* count of how manyfold this block - is currently bufferfixed; this is - protected by block->mutex */ - ulint io_fix; /* if a read is pending to the frame, - io_fix is BUF_IO_READ, in the case - of a write BUF_IO_WRITE, otherwise 0; - this is protected by block->mutex */ - /* 4. Optimistic search field */ - - dulint modify_clock; /* this clock is incremented every - time a pointer to a record on the - page may become obsolete; this is - used in the optimistic cursor - positioning: if the modify clock has - not changed, we know that the pointer - is still valid; this field may be - changed if the thread (1) owns the - pool mutex and the page is not - bufferfixed, or (2) the thread has an - x-latch on the block */ - - /* 5. Hash search fields: NOTE that the first 4 fields are NOT - protected by any semaphore! */ - - ulint n_hash_helps; /* counter which controls building - of a new hash index for the page */ - ulint n_fields; /* recommended prefix length for hash - search: number of full fields */ - ulint n_bytes; /* recommended prefix: number of bytes - in an incomplete field */ - ibool left_side; /* TRUE or FALSE, depending on - whether the leftmost record of several - records with the same prefix should be - indexed in the hash index */ - - /* These 6 fields may only be modified when we have - an x-latch on btr_search_latch AND - a) we are holding an s-latch or x-latch on block->lock or - b) we know that block->buf_fix_count == 0. - - An exception to this is when we init or create a page - in the buffer pool in buf0buf.c. */ - - ibool is_hashed; /* TRUE if hash index has already been - built on this page; note that it does - not guarantee that the index is - complete, though: there may have been - hash collisions, record deletions, - etc. */ - ulint n_pointers; /* used in debugging: the number of - pointers in the adaptive hash index - pointing to this frame */ - ulint curr_n_fields; /* prefix length for hash indexing: - number of full fields */ - ulint curr_n_bytes; /* number of bytes in hash indexing */ - ibool curr_left_side; /* TRUE or FALSE in hash indexing */ - dict_index_t* index; /* Index for which the adaptive - hash index has been created. */ - /* 6. Debug fields */ -#ifdef UNIV_SYNC_DEBUG - rw_lock_t debug_latch; /* in the debug version, each thread - which bufferfixes the block acquires - an s-latch here; so we can use the - debug utilities in sync0rw */ -#endif - ibool file_page_was_freed; - /* this is set to TRUE when fsp - frees a page in buffer pool */ -}; - -#define BUF_BLOCK_MAGIC_N 41526563 - -/* The buffer pool structure. NOTE! The definition appears here only for -other modules of this directory (buf) to see it. Do not use from outside! */ - -struct buf_pool_struct{ - - /* 1. General fields */ - - mutex_t mutex; /* mutex protecting the buffer pool - struct and control blocks, except the - read-write lock in them */ - byte* frame_mem; /* pointer to the memory area which - was allocated for the frames; in AWE - this is the virtual address space - window where we map pages stored - in physical memory */ - byte* frame_zero; /* pointer to the first buffer frame: - this may differ from frame_mem, because - this is aligned by the frame size */ - byte* high_end; /* pointer to the end of the buffer - frames */ - ulint n_frames; /* number of frames */ - buf_block_t* blocks; /* array of buffer control blocks */ - buf_block_t** blocks_of_frames;/* inverse mapping which can be used - to retrieve the buffer control block - of a frame; this is an array which - lists the blocks of frames in the - order frame_zero, - frame_zero + UNIV_PAGE_SIZE, ... - a control block is always assigned - for each frame, even if the frame does - not contain any data; note that in AWE - there are more control blocks than - buffer frames */ - os_awe_t* awe_info; /* if AWE is used, AWE info for the - physical 4 kB memory pages associated - with buffer frames */ - ulint max_size; /* number of control blocks == - maximum pool size in pages */ - ulint curr_size; /* current pool size in pages; - currently always the same as - max_size */ - hash_table_t* page_hash; /* hash table of the file pages */ - - ulint n_pend_reads; /* number of pending read operations */ - - time_t last_printout_time; /* when buf_print was last time - called */ - ulint n_pages_read; /* number read operations */ - ulint n_pages_written;/* number write operations */ - ulint n_pages_created;/* number of pages created in the pool - with no read */ - ulint n_page_gets; /* number of page gets performed; - also successful searches through - the adaptive hash index are - counted as page gets; this field - is NOT protected by the buffer - pool mutex */ - ulint n_pages_awe_remapped; /* if AWE is enabled, the - number of remaps of blocks to - buffer frames */ - ulint n_page_gets_old;/* n_page_gets when buf_print was - last time called: used to calculate - hit rate */ - ulint n_pages_read_old;/* n_pages_read when buf_print was - last time called */ - ulint n_pages_written_old;/* number write operations */ - ulint n_pages_created_old;/* number of pages created in - the pool with no read */ - ulint n_pages_awe_remapped_old; - /* 2. Page flushing algorithm fields */ - - UT_LIST_BASE_NODE_T(buf_block_t) flush_list; - /* base node of the modified block - list */ - ibool init_flush[BUF_FLUSH_LIST + 1]; - /* this is TRUE when a flush of the - given type is being initialized */ - ulint n_flush[BUF_FLUSH_LIST + 1]; - /* this is the number of pending - writes in the given flush type */ - os_event_t no_flush[BUF_FLUSH_LIST + 1]; - /* this is in the set state when there - is no flush batch of the given type - running */ - ulint ulint_clock; /* a sequence number used to count - time. NOTE! This counter wraps - around at 4 billion (if ulint == - 32 bits)! */ - ulint freed_page_clock;/* a sequence number used to count the - number of buffer blocks removed from - the end of the LRU list; NOTE that - this counter may wrap around at 4 - billion! A thread is allowed to - read this for heuristic purposes - without holding any mutex or latch */ - ulint LRU_flush_ended;/* when an LRU flush ends for a page, - this is incremented by one; this is - set to zero when a buffer block is - allocated */ - - /* 3. LRU replacement algorithm fields */ - - UT_LIST_BASE_NODE_T(buf_block_t) free; - /* base node of the free block list; - in the case of AWE, at the start are - always free blocks for which the - physical memory is mapped to a frame */ - UT_LIST_BASE_NODE_T(buf_block_t) LRU; - /* base node of the LRU list */ - buf_block_t* LRU_old; /* pointer to the about 3/8 oldest - blocks in the LRU list; NULL if LRU - length less than BUF_LRU_OLD_MIN_LEN */ - ulint LRU_old_len; /* length of the LRU list from - the block to which LRU_old points - onward, including that block; - see buf0lru.c for the restrictions - on this value; not defined if - LRU_old == NULL */ - UT_LIST_BASE_NODE_T(buf_block_t) awe_LRU_free_mapped; - /* list of those blocks which are - in the LRU list or the free list, and - where the page is mapped to a frame; - thus, frames allocated, e.g., to the - locki table, are not in this list */ -}; - -/* States of a control block */ -#define BUF_BLOCK_NOT_USED 211 /* is in the free list */ -#define BUF_BLOCK_READY_FOR_USE 212 /* when buf_get_free_block returns - a block, it is in this state */ -#define BUF_BLOCK_FILE_PAGE 213 /* contains a buffered file page */ -#define BUF_BLOCK_MEMORY 214 /* contains some main memory object */ -#define BUF_BLOCK_REMOVE_HASH 215 /* hash index should be removed - before putting to the free list */ - -/* Io_fix states of a control block; these must be != 0 */ -#define BUF_IO_READ 561 -#define BUF_IO_WRITE 562 - -/************************************************************************ -Let us list the consistency conditions for different control block states. - -NOT_USED: is in free list, not in LRU list, not in flush list, nor - page hash table -READY_FOR_USE: is not in free list, LRU list, or flush list, nor page - hash table -MEMORY: is not in free list, LRU list, or flush list, nor page - hash table -FILE_PAGE: space and offset are defined, is in page hash table - if io_fix == BUF_IO_WRITE, - pool: no_flush[block->flush_type] is in reset state, - pool: n_flush[block->flush_type] > 0 - - (1) if buf_fix_count == 0, then - is in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - is x-locked, - if and only if io_fix == BUF_IO_READ - is s-locked, - if and only if io_fix == BUF_IO_WRITE - - (2) if buf_fix_count > 0, then - is not in LRU list, not in free list - is in flush list, - if and only if oldest_modification > 0 - if io_fix == BUF_IO_READ, - is x-locked - if io_fix == BUF_IO_WRITE, - is s-locked - -State transitions: - -NOT_USED => READY_FOR_USE -READY_FOR_USE => MEMORY -READY_FOR_USE => FILE_PAGE -MEMORY => NOT_USED -FILE_PAGE => NOT_USED NOTE: This transition is allowed if and only if - (1) buf_fix_count == 0, - (2) oldest_modification == 0, and - (3) io_fix == 0. -*/ - -#ifndef UNIV_NONINL -#include "buf0buf.ic" -#endif - -#endif diff --git a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic deleted file mode 100644 index 4e96e13b8dc..00000000000 --- a/storage/innobase/include/buf0buf.ic +++ /dev/null @@ -1,665 +0,0 @@ -/****************************************************** -The database buffer buf_pool - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0flu.h" -#include "buf0lru.h" -#include "buf0rea.h" -#include "mtr0mtr.h" - -#ifdef UNIV_DEBUG -extern ulint buf_dbg_counter; /* This is used to insert validation - operations in execution in the - debug version */ -#endif /* UNIV_DEBUG */ -/************************************************************************ -Recommends a move of a block to the start of the LRU list if there is danger -of dropping from the buffer pool. NOTE: does not reserve the buffer pool -mutex. */ -UNIV_INLINE -ibool -buf_block_peek_if_too_old( -/*======================*/ - /* out: TRUE if should be made younger */ - buf_block_t* block) /* in: block to make younger */ -{ - return(buf_pool->freed_page_clock >= block->freed_page_clock - + 1 + (buf_pool->curr_size / 4)); -} - -/************************************************************************* -Gets the current size of buffer buf_pool in bytes. In the case of AWE, the -size of AWE window (= the frames). */ -UNIV_INLINE -ulint -buf_pool_get_curr_size(void) -/*========================*/ - /* out: size in bytes */ -{ - return((buf_pool->n_frames) * UNIV_PAGE_SIZE); -} - -/************************************************************************* -Gets the maximum size of buffer buf_pool in bytes. In the case of AWE, the -size of AWE window (= the frames). */ -UNIV_INLINE -ulint -buf_pool_get_max_size(void) -/*=======================*/ - /* out: size in bytes */ -{ - return((buf_pool->n_frames) * UNIV_PAGE_SIZE); -} - -/*********************************************************************** -Accessor function for block array. */ -UNIV_INLINE -buf_block_t* -buf_pool_get_nth_block( -/*===================*/ - /* out: pointer to block */ - buf_pool_t* buf_pool,/* in: buf_pool */ - ulint i) /* in: index of the block */ -{ - ut_ad(buf_pool); - ut_ad(i < buf_pool->max_size); - - return(i + buf_pool->blocks); -} - -/*********************************************************************** -Checks if a pointer points to the block array of the buffer pool (blocks, not -the frames). */ -UNIV_INLINE -ibool -buf_pool_is_block( -/*==============*/ - /* out: TRUE if pointer to block */ - void* ptr) /* in: pointer to memory */ -{ - if ((buf_pool->blocks <= (buf_block_t*)ptr) - && ((buf_block_t*)ptr < buf_pool->blocks - + buf_pool->max_size)) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************ -Gets the smallest oldest_modification lsn for any page in the pool. Returns -ut_dulint_zero if all modified pages have been flushed to disk. */ -UNIV_INLINE -dulint -buf_pool_get_oldest_modification(void) -/*==================================*/ - /* out: oldest modification in pool, - ut_dulint_zero if none */ -{ - buf_block_t* block; - dulint lsn; - - mutex_enter(&(buf_pool->mutex)); - - block = UT_LIST_GET_LAST(buf_pool->flush_list); - - if (block == NULL) { - lsn = ut_dulint_zero; - } else { - lsn = block->oldest_modification; - } - - mutex_exit(&(buf_pool->mutex)); - - return(lsn); -} - -/*********************************************************************** -Increments the buf_pool clock by one and returns its new value. Remember -that in the 32 bit version the clock wraps around at 4 billion! */ -UNIV_INLINE -ulint -buf_pool_clock_tic(void) -/*====================*/ - /* out: new clock value */ -{ - ut_ad(mutex_own(&(buf_pool->mutex))); - - buf_pool->ulint_clock++; - - return(buf_pool->ulint_clock); -} - -/************************************************************************* -Gets a pointer to the memory frame of a block. */ -UNIV_INLINE -buf_frame_t* -buf_block_get_frame( -/*================*/ - /* out: pointer to the frame */ - buf_block_t* block) /* in: pointer to the control block */ -{ - ut_ad(block); - ut_ad(block >= buf_pool->blocks); - ut_ad(block < buf_pool->blocks + buf_pool->max_size); - ut_ad(block->state != BUF_BLOCK_NOT_USED); - ut_ad((block->state != BUF_BLOCK_FILE_PAGE) - || (block->buf_fix_count > 0)); - - return(block->frame); -} - -/************************************************************************* -Gets the space id of a block. */ -UNIV_INLINE -ulint -buf_block_get_space( -/*================*/ - /* out: space id */ - buf_block_t* block) /* in: pointer to the control block */ -{ - ut_ad(block); - ut_ad(block >= buf_pool->blocks); - ut_ad(block < buf_pool->blocks + buf_pool->max_size); - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(block->buf_fix_count > 0); - - return(block->space); -} - -/************************************************************************* -Gets the page number of a block. */ -UNIV_INLINE -ulint -buf_block_get_page_no( -/*==================*/ - /* out: page number */ - buf_block_t* block) /* in: pointer to the control block */ -{ - ut_ad(block); - ut_ad(block >= buf_pool->blocks); - ut_ad(block < buf_pool->blocks + buf_pool->max_size); - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(block->buf_fix_count > 0); - - return(block->offset); -} - -/*********************************************************************** -Gets the block to whose frame the pointer is pointing to. */ -UNIV_INLINE -buf_block_t* -buf_block_align( -/*============*/ - /* out: pointer to block */ - byte* ptr) /* in: pointer to a frame */ -{ - buf_block_t* block; - buf_frame_t* frame_zero; - - ut_ad(ptr); - - frame_zero = buf_pool->frame_zero; - - if (UNIV_UNLIKELY((ulint)ptr < (ulint)frame_zero) - || UNIV_UNLIKELY((ulint)ptr > (ulint)(buf_pool->high_end))) { - - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Error: trying to access a stray pointer %p\n" - "InnoDB: buf pool start is at %p, end at %p\n" - "InnoDB: Probable reason is database corruption" - " or memory\n" - "InnoDB: corruption. If this happens in an" - " InnoDB database recovery, see\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: how to force recovery.\n", - ptr, frame_zero, - buf_pool->high_end); - ut_error; - } - - block = *(buf_pool->blocks_of_frames + (((ulint)(ptr - frame_zero)) - >> UNIV_PAGE_SIZE_SHIFT)); - return(block); -} - -/*********************************************************************** -Gets the frame the pointer is pointing to. */ -UNIV_INLINE -buf_frame_t* -buf_frame_align( -/*============*/ - /* out: pointer to frame */ - byte* ptr) /* in: pointer to a frame */ -{ - buf_frame_t* frame; - - ut_ad(ptr); - - frame = ut_align_down(ptr, UNIV_PAGE_SIZE); - - if (UNIV_UNLIKELY((ulint)frame < (ulint)(buf_pool->frame_zero)) - || UNIV_UNLIKELY((ulint)frame >= (ulint)(buf_pool->high_end))) { - - ut_print_timestamp(stderr); - fprintf(stderr, - "InnoDB: Error: trying to access a stray pointer %p\n" - "InnoDB: buf pool start is at %p, end at %p\n" - "InnoDB: Probable reason is database corruption" - " or memory\n" - "InnoDB: corruption. If this happens in an" - " InnoDB database recovery, see\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: how to force recovery.\n", - ptr, buf_pool->frame_zero, - buf_pool->high_end); - ut_error; - } - - return(frame); -} - -/************************************************************************** -Gets the page number of a pointer pointing within a buffer frame containing -a file page. */ -UNIV_INLINE -ulint -buf_frame_get_page_no( -/*==================*/ - /* out: page number */ - byte* ptr) /* in: pointer to within a buffer frame */ -{ - return(buf_block_get_page_no(buf_block_align(ptr))); -} - -/************************************************************************** -Gets the space id of a pointer pointing within a buffer frame containing a -file page. */ -UNIV_INLINE -ulint -buf_frame_get_space_id( -/*===================*/ - /* out: space id */ - byte* ptr) /* in: pointer to within a buffer frame */ -{ - return(buf_block_get_space(buf_block_align(ptr))); -} - -/************************************************************************** -Gets the space id, page offset, and byte offset within page of a -pointer pointing to a buffer frame containing a file page. */ -UNIV_INLINE -void -buf_ptr_get_fsp_addr( -/*=================*/ - byte* ptr, /* in: pointer to a buffer frame */ - ulint* space, /* out: space id */ - fil_addr_t* addr) /* out: page offset and byte offset */ -{ - buf_block_t* block; - - block = buf_block_align(ptr); - - *space = buf_block_get_space(block); - addr->page = buf_block_get_page_no(block); - addr->boffset = ptr - buf_frame_align(ptr); -} - -/************************************************************************** -Gets the hash value of the page the pointer is pointing to. This can be used -in searches in the lock hash table. */ -UNIV_INLINE -ulint -buf_frame_get_lock_hash_val( -/*========================*/ - /* out: lock hash value */ - byte* ptr) /* in: pointer to within a buffer frame */ -{ - buf_block_t* block; - - block = buf_block_align(ptr); - - return(block->lock_hash_val); -} - -/************************************************************************** -Gets the mutex number protecting the page record lock hash chain in the lock -table. */ -UNIV_INLINE -mutex_t* -buf_frame_get_mutex( -/*================*/ - /* out: mutex */ - byte* ptr) /* in: pointer to within a buffer frame */ -{ - buf_block_t* block; - - block = buf_block_align(ptr); - - return(&block->mutex); -} - -/************************************************************************* -Copies contents of a buffer frame to a given buffer. */ -UNIV_INLINE -byte* -buf_frame_copy( -/*===========*/ - /* out: buf */ - byte* buf, /* in: buffer to copy to */ - buf_frame_t* frame) /* in: buffer frame */ -{ - ut_ad(buf && frame); - - ut_memcpy(buf, frame, UNIV_PAGE_SIZE); - - return(buf); -} - -/************************************************************************ -Calculates a folded value of a file page address to use in the page hash -table. */ -UNIV_INLINE -ulint -buf_page_address_fold( -/*==================*/ - /* out: the folded value */ - ulint space, /* in: space id */ - ulint offset) /* in: offset of the page within space */ -{ - return((space << 20) + space + offset); -} - -/************************************************************************ -This function is used to get info if there is an io operation -going on on a buffer page. */ -UNIV_INLINE -ibool -buf_page_io_query( -/*==============*/ - /* out: TRUE if io going on */ - buf_block_t* block) /* in: buf_pool block, must be bufferfixed */ -{ - mutex_enter(&(buf_pool->mutex)); - - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(block->buf_fix_count > 0); - - if (block->io_fix != 0) { - mutex_exit(&(buf_pool->mutex)); - - return(TRUE); - } - - mutex_exit(&(buf_pool->mutex)); - - return(FALSE); -} - -/************************************************************************ -Gets the youngest modification log sequence number for a frame. Returns zero -if not a file page or no modification occurred yet. */ -UNIV_INLINE -dulint -buf_frame_get_newest_modification( -/*==============================*/ - /* out: newest modification to the page */ - buf_frame_t* frame) /* in: pointer to a frame */ -{ - buf_block_t* block; - dulint lsn; - - ut_ad(frame); - - block = buf_block_align(frame); - - mutex_enter(&(buf_pool->mutex)); - - if (block->state == BUF_BLOCK_FILE_PAGE) { - lsn = block->newest_modification; - } else { - lsn = ut_dulint_zero; - } - - mutex_exit(&(buf_pool->mutex)); - - return(lsn); -} - -/************************************************************************ -Increments the modify clock of a frame by 1. The caller must (1) own the -buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -dulint -buf_frame_modify_clock_inc( -/*=======================*/ - /* out: new value */ - buf_frame_t* frame) /* in: pointer to a frame */ -{ - buf_block_t* block; - - ut_ad(frame); - - block = buf_block_align(frame); - -#ifdef UNIV_SYNC_DEBUG - ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0)) - || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); -#endif /* UNIV_SYNC_DEBUG */ - - UT_DULINT_INC(block->modify_clock); - - return(block->modify_clock); -} - -/************************************************************************ -Increments the modify clock of a frame by 1. The caller must (1) own the -buf_pool mutex and block bufferfix count has to be zero, (2) or own an x-lock -on the block. */ -UNIV_INLINE -dulint -buf_block_modify_clock_inc( -/*=======================*/ - /* out: new value */ - buf_block_t* block) /* in: block */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad((mutex_own(&(buf_pool->mutex)) && (block->buf_fix_count == 0)) - || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); -#endif /* UNIV_SYNC_DEBUG */ - - UT_DULINT_INC(block->modify_clock); - - return(block->modify_clock); -} - -/************************************************************************ -Returns the value of the modify clock. The caller must have an s-lock -or x-lock on the block. */ -UNIV_INLINE -dulint -buf_block_get_modify_clock( -/*=======================*/ - /* out: value */ - buf_block_t* block) /* in: block */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_SHARED) - || rw_lock_own(&(block->lock), RW_LOCK_EXCLUSIVE)); -#endif /* UNIV_SYNC_DEBUG */ - - return(block->modify_clock); -} - -#ifdef UNIV_SYNC_DEBUG -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_debug( -/*========================*/ - buf_block_t* block, /* in: block to bufferfix */ - const char* file __attribute__ ((unused)), /* in: file name */ - ulint line __attribute__ ((unused))) /* in: line */ -{ - ibool ret; - - ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line); - - ut_ad(ret == TRUE); - ut_ad(mutex_own(&block->mutex)); - block->buf_fix_count++; -} -#else /* UNIV_SYNC_DEBUG */ -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc( -/*==================*/ - buf_block_t* block) /* in: block to bufferfix */ -{ - ut_ad(mutex_own(&block->mutex)); - - block->buf_fix_count++; -} -#endif /* UNIV_SYNC_DEBUG */ -/********************************************************************** -Returns the control block of a file page, NULL if not found. */ -UNIV_INLINE -buf_block_t* -buf_page_hash_get( -/*==============*/ - /* out: block, NULL if not found */ - ulint space, /* in: space id */ - ulint offset) /* in: offset of the page within space */ -{ - buf_block_t* block; - ulint fold; - - ut_ad(buf_pool); - ut_ad(mutex_own(&(buf_pool->mutex))); - - /* Look for the page in the hash table */ - - fold = buf_page_address_fold(space, offset); - - HASH_SEARCH(hash, buf_pool->page_hash, fold, block, - (block->space == space) && (block->offset == offset)); - ut_a(block == NULL || block->state == BUF_BLOCK_FILE_PAGE); - - return(block); -} - -/************************************************************************ -Tries to get the page, but if file io is required, releases all latches -in mtr down to the given savepoint. If io is required, this function -retrieves the page to buffer buf_pool, but does not bufferfix it or latch -it. */ -UNIV_INLINE -buf_frame_t* -buf_page_get_release_on_io( -/*=======================*/ - /* out: pointer to the frame, or NULL - if not in buffer buf_pool */ - ulint space, /* in: space id */ - ulint offset, /* in: offset of the page within space - in units of a page */ - buf_frame_t* guess, /* in: guessed frame or NULL */ - ulint rw_latch, /* in: RW_X_LATCH, RW_S_LATCH, - or RW_NO_LATCH */ - ulint savepoint, /* in: mtr savepoint */ - mtr_t* mtr) /* in: mtr */ -{ - buf_frame_t* frame; - - frame = buf_page_get_gen(space, offset, rw_latch, guess, - BUF_GET_IF_IN_POOL, - __FILE__, __LINE__, - mtr); - if (frame != NULL) { - - return(frame); - } - - /* The page was not in the buffer buf_pool: release the latches - down to the savepoint */ - - mtr_rollback_to_savepoint(mtr, savepoint); - - buf_page_get(space, offset, RW_S_LATCH, mtr); - - /* When we get here, the page is in buffer, but we release - the latches again down to the savepoint, before returning */ - - mtr_rollback_to_savepoint(mtr, savepoint); - - return(NULL); -} - -/************************************************************************ -Decrements the bufferfix count of a buffer control block and releases -a latch, if specified. */ -UNIV_INLINE -void -buf_page_release( -/*=============*/ - buf_block_t* block, /* in: buffer block */ - ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH, - RW_NO_LATCH */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(block); - - ut_a(block->state == BUF_BLOCK_FILE_PAGE); - ut_a(block->buf_fix_count > 0); - - if (rw_latch == RW_X_LATCH && mtr->modifications) { - mutex_enter(&buf_pool->mutex); - buf_flush_note_modification(block, mtr); - mutex_exit(&buf_pool->mutex); - } - - mutex_enter(&block->mutex); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_s_unlock(&(block->debug_latch)); -#endif - block->buf_fix_count--; - - mutex_exit(&block->mutex); - - if (rw_latch == RW_S_LATCH) { - rw_lock_s_unlock(&(block->lock)); - } else if (rw_latch == RW_X_LATCH) { - rw_lock_x_unlock(&(block->lock)); - } -} - -#ifdef UNIV_SYNC_DEBUG -/************************************************************************* -Adds latch level info for the rw-lock protecting the buffer frame. This -should be called in the debug version after a successful latching of a -page if we know the latching order level of the acquired latch. If -UNIV_SYNC_DEBUG is not defined, compiles to an empty function. */ -UNIV_INLINE -void -buf_page_dbg_add_level( -/*===================*/ - buf_frame_t* frame __attribute__((unused)), /* in: buffer page - where we have acquired latch */ - ulint level __attribute__((unused))) /* in: latching order - level */ -{ - sync_thread_add_level(&(buf_block_align(frame)->lock), level); -} -#endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/innobase/include/buf0flu.h b/storage/innobase/include/buf0flu.h deleted file mode 100644 index 322848509f4..00000000000 --- a/storage/innobase/include/buf0flu.h +++ /dev/null @@ -1,120 +0,0 @@ -/****************************************************** -The database buffer pool flush algorithm - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0flu_h -#define buf0flu_h - -#include "univ.i" -#include "buf0types.h" -#include "ut0byte.h" -#include "mtr0types.h" - -/************************************************************************ -Updates the flush system data structures when a write is completed. */ - -void -buf_flush_write_complete( -/*=====================*/ - buf_block_t* block); /* in: pointer to the block in question */ -/************************************************************************* -Flushes pages from the end of the LRU list if there is too small -a margin of replaceable pages there. */ - -void -buf_flush_free_margin(void); -/*=======================*/ -/************************************************************************ -Initializes a page for writing to the tablespace. */ - -void -buf_flush_init_for_writing( -/*=======================*/ - byte* page, /* in: page */ - dulint newest_lsn, /* in: newest modification lsn to the page */ - ulint space, /* in: space id */ - ulint page_no); /* in: page number */ -/*********************************************************************** -This utility flushes dirty blocks from the end of the LRU list or flush_list. -NOTE 1: in the case of an LRU flush the calling thread may own latches to -pages: to avoid deadlocks, this function must be written so that it cannot -end up waiting for these latches! NOTE 2: in the case of a flush list flush, -the calling thread is not allowed to own any latches on pages! */ - -ulint -buf_flush_batch( -/*============*/ - /* out: number of blocks for which the write - request was queued */ - ulint flush_type, /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST; if - BUF_FLUSH_LIST, then the caller must not own - any latches on pages */ - ulint min_n, /* in: wished minimum mumber of blocks flushed - (it is not guaranteed that the actual number - is that big, though) */ - dulint lsn_limit); /* in the case BUF_FLUSH_LIST all blocks whose - oldest_modification is smaller than this - should be flushed (if their number does not - exceed min_n), otherwise ignored */ -/********************************************************************** -Waits until a flush batch of the given type ends */ - -void -buf_flush_wait_batch_end( -/*=====================*/ - ulint type); /* in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ -/************************************************************************ -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it not -already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /* in: block which is modified */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -This function should be called when recovery has modified a buffer page. */ -UNIV_INLINE -void -buf_flush_recv_note_modification( -/*=============================*/ - buf_block_t* block, /* in: block which is modified */ - dulint start_lsn, /* in: start lsn of the first mtr in a - set of mtr's */ - dulint end_lsn); /* in: end lsn of the last mtr in the - set of mtr's */ -/************************************************************************ -Returns TRUE if the file page block is immediately suitable for replacement, -i.e., transition FILE_PAGE => NOT_USED allowed. */ -ibool -buf_flush_ready_for_replace( -/*========================*/ - /* out: TRUE if can replace immediately */ - buf_block_t* block); /* in: buffer control block, must be in state - BUF_BLOCK_FILE_PAGE and in the LRU list */ -/********************************************************************** -Validates the flush list. */ - -ibool -buf_flush_validate(void); -/*====================*/ - /* out: TRUE if ok */ - -/* When buf_flush_free_margin is called, it tries to make this many blocks -available to replacement in the free list and at the end of the LRU list (to -make sure that a read-ahead batch can be read efficiently in a single -sweep). */ - -#define BUF_FLUSH_FREE_BLOCK_MARGIN (5 + BUF_READ_AHEAD_AREA) -#define BUF_FLUSH_EXTRA_MARGIN (BUF_FLUSH_FREE_BLOCK_MARGIN / 4 + 100) - -#ifndef UNIV_NONINL -#include "buf0flu.ic" -#endif - -#endif diff --git a/storage/innobase/include/buf0flu.ic b/storage/innobase/include/buf0flu.ic deleted file mode 100644 index ae873c42088..00000000000 --- a/storage/innobase/include/buf0flu.ic +++ /dev/null @@ -1,106 +0,0 @@ -/****************************************************** -The database buffer pool flush algorithm - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#include "buf0buf.h" -#include "mtr0mtr.h" - -/************************************************************************ -Inserts a modified block into the flush list. */ - -void -buf_flush_insert_into_flush_list( -/*=============================*/ - buf_block_t* block); /* in: block which is modified */ -/************************************************************************ -Inserts a modified block into the flush list in the right sorted position. -This function is used by recovery, because there the modifications do not -necessarily come in the order of lsn's. */ - -void -buf_flush_insert_sorted_into_flush_list( -/*====================================*/ - buf_block_t* block); /* in: block which is modified */ - -/************************************************************************ -This function should be called at a mini-transaction commit, if a page was -modified in it. Puts the block to the list of modified blocks, if it is not -already in it. */ -UNIV_INLINE -void -buf_flush_note_modification( -/*========================*/ - buf_block_t* block, /* in: block which is modified */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(block); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(block->buf_fix_count > 0); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(buf_pool->mutex))); - - ut_ad(ut_dulint_cmp(mtr->start_lsn, ut_dulint_zero) != 0); - ut_ad(mtr->modifications); - ut_ad(ut_dulint_cmp(block->newest_modification, mtr->end_lsn) <= 0); - - block->newest_modification = mtr->end_lsn; - - if (ut_dulint_is_zero(block->oldest_modification)) { - - block->oldest_modification = mtr->start_lsn; - ut_ad(!ut_dulint_is_zero(block->oldest_modification)); - - buf_flush_insert_into_flush_list(block); - } else { - ut_ad(ut_dulint_cmp(block->oldest_modification, - mtr->start_lsn) <= 0); - } - - ++srv_buf_pool_write_requests; -} - -/************************************************************************ -This function should be called when recovery has modified a buffer page. */ -UNIV_INLINE -void -buf_flush_recv_note_modification( -/*=============================*/ - buf_block_t* block, /* in: block which is modified */ - dulint start_lsn, /* in: start lsn of the first mtr in a - set of mtr's */ - dulint end_lsn) /* in: end lsn of the last mtr in the - set of mtr's */ -{ - ut_ad(block); - ut_ad(block->state == BUF_BLOCK_FILE_PAGE); - ut_ad(block->buf_fix_count > 0); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - mutex_enter(&(buf_pool->mutex)); - - ut_ad(ut_dulint_cmp(block->newest_modification, end_lsn) <= 0); - - block->newest_modification = end_lsn; - - if (ut_dulint_is_zero(block->oldest_modification)) { - - block->oldest_modification = start_lsn; - - ut_ad(!ut_dulint_is_zero(block->oldest_modification)); - - buf_flush_insert_sorted_into_flush_list(block); - } else { - ut_ad(ut_dulint_cmp(block->oldest_modification, - start_lsn) <= 0); - } - - mutex_exit(&(buf_pool->mutex)); -} diff --git a/storage/innobase/include/buf0lru.h b/storage/innobase/include/buf0lru.h deleted file mode 100644 index 6d26fd4d3b2..00000000000 --- a/storage/innobase/include/buf0lru.h +++ /dev/null @@ -1,144 +0,0 @@ -/****************************************************** -The database buffer pool LRU replacement algorithm - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0lru_h -#define buf0lru_h - -#include "univ.i" -#include "ut0byte.h" -#include "buf0types.h" - -/********************************************************************** -Tries to remove LRU flushed blocks from the end of the LRU list and put them -to the free list. This is beneficial for the efficiency of the insert buffer -operation, as flushed pages from non-unique non-clustered indexes are here -taken out of the buffer pool, and their inserts redirected to the insert -buffer. Otherwise, the flushed blocks could get modified again before read -operations need new buffer blocks, and the i/o work done in flushing would be -wasted. */ - -void -buf_LRU_try_free_flushed_blocks(void); -/*==================================*/ -/********************************************************************** -Returns TRUE if less than 25 % of the buffer pool is available. This can be -used in heuristics to prevent huge transactions eating up the whole buffer -pool for their locks. */ - -ibool -buf_LRU_buf_pool_running_out(void); -/*==============================*/ - /* out: TRUE if less than 25 % of buffer pool - left */ - -/*####################################################################### -These are low-level functions -#########################################################################*/ - -/* Minimum LRU list length for which the LRU_old pointer is defined */ - -#define BUF_LRU_OLD_MIN_LEN 80 - -#define BUF_LRU_FREE_SEARCH_LEN (5 + 2 * BUF_READ_AHEAD_AREA) - -/********************************************************************** -Invalidates all pages belonging to a given tablespace when we are deleting -the data file(s) of that tablespace. A PROBLEM: if readahead is being started, -what guarantees that it will not try to read in pages after this operation has -completed? */ - -void -buf_LRU_invalidate_tablespace( -/*==========================*/ - ulint id); /* in: space id */ -/********************************************************************** -Gets the minimum LRU_position field for the blocks in an initial segment -(determined by BUF_LRU_INITIAL_RATIO) of the LRU list. The limit is not -guaranteed to be precise, because the ulint_clock may wrap around. */ - -ulint -buf_LRU_get_recent_limit(void); -/*==========================*/ - /* out: the limit; zero if could not determine it */ -/********************************************************************** -Look for a replaceable block from the end of the LRU list and put it to -the free list if found. */ - -ibool -buf_LRU_search_and_free_block( -/*==========================*/ - /* out: TRUE if freed */ - ulint n_iterations); /* in: how many times this has been called - repeatedly without result: a high value means - that we should search farther; if value is - k < 10, then we only search k/10 * number - of pages in the buffer pool from the end - of the LRU list */ -/********************************************************************** -Returns a free block from the buf_pool. The block is taken off the -free list. If it is empty, blocks are moved from the end of the -LRU list to the free list. */ - -buf_block_t* -buf_LRU_get_free_block(void); -/*=========================*/ - /* out: the free control block; also if AWE is - used, it is guaranteed that the block has its - page mapped to a frame when we return */ -/********************************************************************** -Puts a block back to the free list. */ - -void -buf_LRU_block_free_non_file_page( -/*=============================*/ - buf_block_t* block); /* in: block, must not contain a file page */ -/********************************************************************** -Adds a block to the LRU list. */ - -void -buf_LRU_add_block( -/*==============*/ - buf_block_t* block, /* in: control block */ - ibool old); /* in: TRUE if should be put to the old - blocks in the LRU list, else put to the - start; if the LRU list is very short, added to - the start regardless of this parameter */ -/********************************************************************** -Moves a block to the start of the LRU list. */ - -void -buf_LRU_make_block_young( -/*=====================*/ - buf_block_t* block); /* in: control block */ -/********************************************************************** -Moves a block to the end of the LRU list. */ - -void -buf_LRU_make_block_old( -/*===================*/ - buf_block_t* block); /* in: control block */ -#ifdef UNIV_DEBUG -/************************************************************************** -Validates the LRU list. */ - -ibool -buf_LRU_validate(void); -/*==================*/ -/************************************************************************** -Prints the LRU list. */ - -void -buf_LRU_print(void); -/*===============*/ -#endif /* UNIV_DEBUG */ - -#ifndef UNIV_NONINL -#include "buf0lru.ic" -#endif - -#endif diff --git a/storage/innobase/include/buf0lru.ic b/storage/innobase/include/buf0lru.ic deleted file mode 100644 index 7b8ee457b0b..00000000000 --- a/storage/innobase/include/buf0lru.ic +++ /dev/null @@ -1,8 +0,0 @@ -/****************************************************** -The database buffer replacement algorithm - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - diff --git a/storage/innobase/include/buf0rea.h b/storage/innobase/include/buf0rea.h deleted file mode 100644 index e4620172860..00000000000 --- a/storage/innobase/include/buf0rea.h +++ /dev/null @@ -1,104 +0,0 @@ -/****************************************************** -The database buffer read - -(c) 1995 Innobase Oy - -Created 11/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0rea_h -#define buf0rea_h - -#include "univ.i" -#include "buf0types.h" - -/************************************************************************ -High-level function which reads a page asynchronously from a file to the -buffer buf_pool if it is not already there. Sets the io_fix flag and sets -an exclusive lock on the buffer frame. The flag is cleared and the x-lock -released by the i/o-handler thread. Does a random read-ahead if it seems -sensible. */ - -ulint -buf_read_page( -/*==========*/ - /* out: number of page read requests issued: this can - be > 1 if read-ahead occurred */ - ulint space, /* in: space id */ - ulint offset);/* in: page number */ -/************************************************************************ -Applies linear read-ahead if in the buf_pool the page is a border page of -a linear read-ahead area and all the pages in the area have been accessed. -Does not read any page if the read-ahead mechanism is not activated. Note -that the the algorithm looks at the 'natural' adjacent successor and -predecessor of the page, which on the leaf level of a B-tree are the next -and previous page in the chain of leaves. To know these, the page specified -in (space, offset) must already be present in the buf_pool. Thus, the -natural way to use this function is to call it when a page in the buf_pool -is accessed the first time, calling this function just after it has been -bufferfixed. -NOTE 1: as this function looks at the natural predecessor and successor -fields on the page, what happens, if these are not initialized to any -sensible value? No problem, before applying read-ahead we check that the -area to read is within the span of the space, if not, read-ahead is not -applied. An uninitialized value may result in a useless read operation, but -only very improbably. -NOTE 2: the calling thread may own latches on pages: to avoid deadlocks this -function must be written such that it cannot end up waiting for these -latches! -NOTE 3: the calling thread must want access to the page given: this rule is -set to prevent unintended read-aheads performed by ibuf routines, a situation -which could result in a deadlock if the OS does not support asynchronous io. */ - -ulint -buf_read_ahead_linear( -/*==================*/ - /* out: number of page read requests issued */ - ulint space, /* in: space id */ - ulint offset);/* in: page number of a page; NOTE: the current thread - must want access to this page (see NOTE 3 above) */ -/************************************************************************ -Issues read requests for pages which the ibuf module wants to read in, in -order to contract the insert buffer tree. Technically, this function is like -a read-ahead function. */ - -void -buf_read_ibuf_merge_pages( -/*======================*/ - ibool sync, /* in: TRUE if the caller wants this function - to wait for the highest address page to get - read in, before this function returns */ - ulint* space_ids, /* in: array of space ids */ - ib_longlong* space_versions,/* in: the spaces must have this version - number (timestamp), otherwise we discard the - read; we use this to cancel reads if - DISCARD + IMPORT may have changed the - tablespace size */ - ulint* page_nos, /* in: array of page numbers to read, with the - highest page number the last in the array */ - ulint n_stored); /* in: number of page numbers in the array */ -/************************************************************************ -Issues read requests for pages which recovery wants to read in. */ - -void -buf_read_recv_pages( -/*================*/ - ibool sync, /* in: TRUE if the caller wants this function - to wait for the highest address page to get - read in, before this function returns */ - ulint space, /* in: space id */ - ulint* page_nos, /* in: array of page numbers to read, with the - highest page number the last in the array */ - ulint n_stored); /* in: number of page numbers in the array */ - -/* The size in pages of the area which the read-ahead algorithms read if -invoked */ - -#define BUF_READ_AHEAD_AREA \ - ut_min(64, ut_2_power_up(buf_pool->curr_size / 32)) - -/* Modes used in read-ahead */ -#define BUF_READ_IBUF_PAGES_ONLY 131 -#define BUF_READ_ANY_PAGE 132 - -#endif diff --git a/storage/innobase/include/buf0types.h b/storage/innobase/include/buf0types.h deleted file mode 100644 index 44fdfa80e73..00000000000 --- a/storage/innobase/include/buf0types.h +++ /dev/null @@ -1,20 +0,0 @@ -/****************************************************** -The database buffer pool global types for the directory - -(c) 1995 Innobase Oy - -Created 11/17/1995 Heikki Tuuri -*******************************************************/ - -#ifndef buf0types_h -#define buf0types_h - -typedef struct buf_block_struct buf_block_t; -typedef struct buf_pool_struct buf_pool_t; - -/* The 'type' used of a buffer frame */ -typedef byte buf_frame_t; - - -#endif - diff --git a/storage/innobase/include/data0data.h b/storage/innobase/include/data0data.h deleted file mode 100644 index 40592c3c0ce..00000000000 --- a/storage/innobase/include/data0data.h +++ /dev/null @@ -1,424 +0,0 @@ -/************************************************************************ -SQL data field and tuple - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef data0data_h -#define data0data_h - -#include "univ.i" - -#include "data0types.h" -#include "data0type.h" -#include "mem0mem.h" -#include "dict0types.h" - -typedef struct big_rec_struct big_rec_t; - -/* Some non-inlined functions used in the MySQL interface: */ -void -dfield_set_data_noninline( - dfield_t* field, /* in: field */ - void* data, /* in: data */ - ulint len); /* in: length or UNIV_SQL_NULL */ -void* -dfield_get_data_noninline( - dfield_t* field); /* in: field */ -ulint -dfield_get_len_noninline( - dfield_t* field); /* in: field */ -ulint -dtuple_get_n_fields_noninline( - dtuple_t* tuple); /* in: tuple */ -dfield_t* -dtuple_get_nth_field_noninline( - dtuple_t* tuple, /* in: tuple */ - ulint n); /* in: index of field */ - -/************************************************************************* -Gets pointer to the type struct of SQL data field. */ -UNIV_INLINE -dtype_t* -dfield_get_type( -/*============*/ - /* out: pointer to the type struct */ - dfield_t* field); /* in: SQL data field */ -/************************************************************************* -Sets the type struct of SQL data field. */ -UNIV_INLINE -void -dfield_set_type( -/*============*/ - dfield_t* field, /* in: SQL data field */ - dtype_t* type); /* in: pointer to data type struct */ -/************************************************************************* -Gets pointer to the data in a field. */ -UNIV_INLINE -void* -dfield_get_data( -/*============*/ - /* out: pointer to data */ - dfield_t* field); /* in: field */ -/************************************************************************* -Gets length of field data. */ -UNIV_INLINE -ulint -dfield_get_len( -/*===========*/ - /* out: length of data; UNIV_SQL_NULL if - SQL null data */ - dfield_t* field); /* in: field */ -/************************************************************************* -Sets length in a field. */ -UNIV_INLINE -void -dfield_set_len( -/*===========*/ - dfield_t* field, /* in: field */ - ulint len); /* in: length or UNIV_SQL_NULL */ -/************************************************************************* -Sets pointer to the data and length in a field. */ -UNIV_INLINE -void -dfield_set_data( -/*============*/ - dfield_t* field, /* in: field */ - const void* data, /* in: data */ - ulint len); /* in: length or UNIV_SQL_NULL */ -/************************************************************************** -Writes an SQL null field full of zeros. */ -UNIV_INLINE -void -data_write_sql_null( -/*================*/ - byte* data, /* in: pointer to a buffer of size len */ - ulint len); /* in: SQL null size in bytes */ -/************************************************************************* -Copies the data and len fields. */ -UNIV_INLINE -void -dfield_copy_data( -/*=============*/ - dfield_t* field1, /* in: field to copy to */ - dfield_t* field2);/* in: field to copy from */ -/************************************************************************* -Copies a data field to another. */ -UNIV_INLINE -void -dfield_copy( -/*========*/ - dfield_t* field1, /* in: field to copy to */ - dfield_t* field2);/* in: field to copy from */ -/************************************************************************* -Tests if data length and content is equal for two dfields. */ -UNIV_INLINE -ibool -dfield_datas_are_binary_equal( -/*==========================*/ - /* out: TRUE if equal */ - dfield_t* field1, /* in: field */ - dfield_t* field2);/* in: field */ -/************************************************************************* -Tests if dfield data length and content is equal to the given. */ - -ibool -dfield_data_is_binary_equal( -/*========================*/ - /* out: TRUE if equal */ - dfield_t* field, /* in: field */ - ulint len, /* in: data length or UNIV_SQL_NULL */ - byte* data); /* in: data */ -/************************************************************************* -Gets number of fields in a data tuple. */ -UNIV_INLINE -ulint -dtuple_get_n_fields( -/*================*/ - /* out: number of fields */ - dtuple_t* tuple); /* in: tuple */ -/************************************************************************* -Gets nth field of a tuple. */ -UNIV_INLINE -dfield_t* -dtuple_get_nth_field( -/*=================*/ - /* out: nth field */ - dtuple_t* tuple, /* in: tuple */ - ulint n); /* in: index of field */ -/************************************************************************* -Gets info bits in a data tuple. */ -UNIV_INLINE -ulint -dtuple_get_info_bits( -/*=================*/ - /* out: info bits */ - dtuple_t* tuple); /* in: tuple */ -/************************************************************************* -Sets info bits in a data tuple. */ -UNIV_INLINE -void -dtuple_set_info_bits( -/*=================*/ - dtuple_t* tuple, /* in: tuple */ - ulint info_bits); /* in: info bits */ -/************************************************************************* -Gets number of fields used in record comparisons. */ -UNIV_INLINE -ulint -dtuple_get_n_fields_cmp( -/*====================*/ - /* out: number of fields used in comparisons - in rem0cmp.* */ - dtuple_t* tuple); /* in: tuple */ -/************************************************************************* -Gets number of fields used in record comparisons. */ -UNIV_INLINE -void -dtuple_set_n_fields_cmp( -/*====================*/ - dtuple_t* tuple, /* in: tuple */ - ulint n_fields_cmp); /* in: number of fields used in - comparisons in rem0cmp.* */ -/************************************************************** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. */ -UNIV_INLINE -dtuple_t* -dtuple_create( -/*==========*/ - /* out, own: created tuple */ - mem_heap_t* heap, /* in: memory heap where the tuple - is created */ - ulint n_fields); /* in: number of fields */ - -/************************************************************************* -Creates a dtuple for use in MySQL. */ - -dtuple_t* -dtuple_create_for_mysql( -/*====================*/ - /* out, own created dtuple */ - void** heap, /* out: created memory heap */ - ulint n_fields); /* in: number of fields */ -/************************************************************************* -Frees a dtuple used in MySQL. */ - -void -dtuple_free_for_mysql( -/*==================*/ - void* heap); -/************************************************************************* -Sets number of fields used in a tuple. Normally this is set in -dtuple_create, but if you want later to set it smaller, you can use this. */ - -void -dtuple_set_n_fields( -/*================*/ - dtuple_t* tuple, /* in: tuple */ - ulint n_fields); /* in: number of fields */ -/************************************************************** -The following function returns the sum of data lengths of a tuple. The space -occupied by the field structs or the tuple struct is not counted. */ -UNIV_INLINE -ulint -dtuple_get_data_size( -/*=================*/ - /* out: sum of data lens */ - dtuple_t* tuple); /* in: typed data tuple */ -/**************************************************************** -Returns TRUE if lengths of two dtuples are equal and respective data fields -in them are equal when compared with collation in char fields (not as binary -strings). */ - -ibool -dtuple_datas_are_ordering_equal( -/*============================*/ - /* out: TRUE if length and fieds are equal - when compared with cmp_data_data: - NOTE: in character type fields some letters - are identified with others! (collation) */ - dtuple_t* tuple1, /* in: tuple 1 */ - dtuple_t* tuple2);/* in: tuple 2 */ -/**************************************************************** -Folds a prefix given as the number of fields of a tuple. */ -UNIV_INLINE -ulint -dtuple_fold( -/*========*/ - /* out: the folded value */ - dtuple_t* tuple, /* in: the tuple */ - ulint n_fields,/* in: number of complete fields to fold */ - ulint n_bytes,/* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id);/* in: index tree id */ -/*********************************************************************** -Sets types of fields binary in a tuple. */ -UNIV_INLINE -void -dtuple_set_types_binary( -/*====================*/ - dtuple_t* tuple, /* in: data tuple */ - ulint n); /* in: number of fields to set */ -/************************************************************************** -Checks if a dtuple contains an SQL null value. */ -UNIV_INLINE -ibool -dtuple_contains_null( -/*=================*/ - /* out: TRUE if some field is SQL null */ - dtuple_t* tuple); /* in: dtuple */ -/************************************************************** -Checks that a data field is typed. Asserts an error if not. */ - -ibool -dfield_check_typed( -/*===============*/ - /* out: TRUE if ok */ - dfield_t* field); /* in: data field */ -/************************************************************** -Checks that a data tuple is typed. Asserts an error if not. */ - -ibool -dtuple_check_typed( -/*===============*/ - /* out: TRUE if ok */ - dtuple_t* tuple); /* in: tuple */ -/************************************************************** -Checks that a data tuple is typed. */ - -ibool -dtuple_check_typed_no_assert( -/*=========================*/ - /* out: TRUE if ok */ - dtuple_t* tuple); /* in: tuple */ -#ifdef UNIV_DEBUG -/************************************************************** -Validates the consistency of a tuple which must be complete, i.e, -all fields must have been set. */ - -ibool -dtuple_validate( -/*============*/ - /* out: TRUE if ok */ - dtuple_t* tuple); /* in: tuple */ -#endif /* UNIV_DEBUG */ -/***************************************************************** -Pretty prints a dfield value according to its data type. */ - -void -dfield_print( -/*=========*/ - dfield_t* dfield);/* in: dfield */ -/***************************************************************** -Pretty prints a dfield value according to its data type. Also the hex string -is printed if a string contains non-printable characters. */ - -void -dfield_print_also_hex( -/*==================*/ - dfield_t* dfield); /* in: dfield */ -/************************************************************** -The following function prints the contents of a tuple. */ - -void -dtuple_print( -/*=========*/ - FILE* f, /* in: output stream */ - dtuple_t* tuple); /* in: tuple */ -/****************************************************************** -Moves parts of long fields in entry to the big record vector so that -the size of tuple drops below the maximum record size allowed in the -database. Moves data only from those fields which are not necessary -to determine uniquely the insertion place of the tuple in the index. */ - -big_rec_t* -dtuple_convert_big_rec( -/*===================*/ - /* out, own: created big record vector, - NULL if we are not able to shorten - the entry enough, i.e., if there are - too many short fields in entry */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint* ext_vec,/* in: array of externally stored fields, - or NULL: if a field already is externally - stored, then we cannot move it to the vector - this function returns */ - ulint n_ext_vec);/* in: number of elements is ext_vec */ -/****************************************************************** -Puts back to entry the data stored in vector. Note that to ensure the -fields in entry can accommodate the data, vector must have been created -from entry with dtuple_convert_big_rec. */ - -void -dtuple_convert_back_big_rec( -/*========================*/ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: entry whose data was put to vector */ - big_rec_t* vector);/* in, own: big rec vector; it is - freed in this function */ -/****************************************************************** -Frees the memory in a big rec vector. */ - -void -dtuple_big_rec_free( -/*================*/ - big_rec_t* vector); /* in, own: big rec vector; it is - freed in this function */ - -/*######################################################################*/ - -/* Structure for an SQL data field */ -struct dfield_struct{ - void* data; /* pointer to data */ - ulint len; /* data length; UNIV_SQL_NULL if SQL null; */ - dtype_t type; /* type of data */ -}; - -struct dtuple_struct { - ulint info_bits; /* info bits of an index record: - the default is 0; this field is used - if an index record is built from - a data tuple */ - ulint n_fields; /* number of fields in dtuple */ - ulint n_fields_cmp; /* number of fields which should - be used in comparison services - of rem0cmp.*; the index search - is performed by comparing only these - fields, others are ignored; the - default value in dtuple creation is - the same value as n_fields */ - dfield_t* fields; /* fields */ - UT_LIST_NODE_T(dtuple_t) tuple_list; - /* data tuples can be linked into a - list using this field */ - ulint magic_n; -}; -#define DATA_TUPLE_MAGIC_N 65478679 - -/* A slot for a field in a big rec vector */ - -typedef struct big_rec_field_struct big_rec_field_t; -struct big_rec_field_struct { - ulint field_no; /* field number in record */ - ulint len; /* stored data len */ - byte* data; /* stored data */ -}; - -/* Storage format for overflow data in a big record, that is, a record -which needs external storage of data fields */ - -struct big_rec_struct { - mem_heap_t* heap; /* memory heap from which allocated */ - ulint n_fields; /* number of stored fields */ - big_rec_field_t* fields; /* stored fields */ -}; - -#ifndef UNIV_NONINL -#include "data0data.ic" -#endif - -#endif diff --git a/storage/innobase/include/data0data.ic b/storage/innobase/include/data0data.ic deleted file mode 100644 index 753fa9ba45f..00000000000 --- a/storage/innobase/include/data0data.ic +++ /dev/null @@ -1,436 +0,0 @@ -/************************************************************************ -SQL data field and tuple - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "mem0mem.h" -#include "ut0rnd.h" - -#ifdef UNIV_DEBUG -extern byte data_error; -#endif /* UNIV_DEBUG */ - -/************************************************************************* -Gets pointer to the type struct of SQL data field. */ -UNIV_INLINE -dtype_t* -dfield_get_type( -/*============*/ - /* out: pointer to the type struct */ - dfield_t* field) /* in: SQL data field */ -{ - ut_ad(field); - - return(&(field->type)); -} - -/************************************************************************* -Sets the type struct of SQL data field. */ -UNIV_INLINE -void -dfield_set_type( -/*============*/ - dfield_t* field, /* in: SQL data field */ - dtype_t* type) /* in: pointer to data type struct */ -{ - ut_ad(field && type); - - field->type = *type; -} - -/************************************************************************* -Gets pointer to the data in a field. */ -UNIV_INLINE -void* -dfield_get_data( -/*============*/ - /* out: pointer to data */ - dfield_t* field) /* in: field */ -{ - ut_ad(field); - ut_ad((field->len == UNIV_SQL_NULL) - || (field->data != &data_error)); - - return(field->data); -} - -/************************************************************************* -Gets length of field data. */ -UNIV_INLINE -ulint -dfield_get_len( -/*===========*/ - /* out: length of data; UNIV_SQL_NULL if - SQL null data */ - dfield_t* field) /* in: field */ -{ - ut_ad(field); - ut_ad((field->len == UNIV_SQL_NULL) - || (field->data != &data_error)); - - return(field->len); -} - -/************************************************************************* -Sets length in a field. */ -UNIV_INLINE -void -dfield_set_len( -/*===========*/ - dfield_t* field, /* in: field */ - ulint len) /* in: length or UNIV_SQL_NULL */ -{ - ut_ad(field); - - field->len = len; -} - -/************************************************************************* -Sets pointer to the data and length in a field. */ -UNIV_INLINE -void -dfield_set_data( -/*============*/ - dfield_t* field, /* in: field */ - const void* data, /* in: data */ - ulint len) /* in: length or UNIV_SQL_NULL */ -{ - ut_ad(field); - - field->data = (void*) data; - field->len = len; -} - -/************************************************************************* -Copies the data and len fields. */ -UNIV_INLINE -void -dfield_copy_data( -/*=============*/ - dfield_t* field1, /* in: field to copy to */ - dfield_t* field2) /* in: field to copy from */ -{ - ut_ad(field1 && field2); - - field1->data = field2->data; - field1->len = field2->len; -} - -/************************************************************************* -Copies a data field to another. */ -UNIV_INLINE -void -dfield_copy( -/*========*/ - dfield_t* field1, /* in: field to copy to */ - dfield_t* field2) /* in: field to copy from */ -{ - *field1 = *field2; -} - -/************************************************************************* -Tests if data length and content is equal for two dfields. */ -UNIV_INLINE -ibool -dfield_datas_are_binary_equal( -/*==========================*/ - /* out: TRUE if equal */ - dfield_t* field1, /* in: field */ - dfield_t* field2) /* in: field */ -{ - ulint len; - - len = field1->len; - - if ((len != field2->len) - || ((len != UNIV_SQL_NULL) - && (0 != ut_memcmp(field1->data, field2->data, - len)))) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Gets info bits in a data tuple. */ -UNIV_INLINE -ulint -dtuple_get_info_bits( -/*=================*/ - /* out: info bits */ - dtuple_t* tuple) /* in: tuple */ -{ - ut_ad(tuple); - - return(tuple->info_bits); -} - -/************************************************************************* -Sets info bits in a data tuple. */ -UNIV_INLINE -void -dtuple_set_info_bits( -/*=================*/ - dtuple_t* tuple, /* in: tuple */ - ulint info_bits) /* in: info bits */ -{ - ut_ad(tuple); - - tuple->info_bits = info_bits; -} - -/************************************************************************* -Gets number of fields used in record comparisons. */ -UNIV_INLINE -ulint -dtuple_get_n_fields_cmp( -/*====================*/ - /* out: number of fields used in comparisons - in rem0cmp.* */ - dtuple_t* tuple) /* in: tuple */ -{ - ut_ad(tuple); - - return(tuple->n_fields_cmp); -} - -/************************************************************************* -Sets number of fields used in record comparisons. */ -UNIV_INLINE -void -dtuple_set_n_fields_cmp( -/*====================*/ - dtuple_t* tuple, /* in: tuple */ - ulint n_fields_cmp) /* in: number of fields used in - comparisons in rem0cmp.* */ -{ - ut_ad(tuple); - ut_ad(n_fields_cmp <= tuple->n_fields); - - tuple->n_fields_cmp = n_fields_cmp; -} - -/************************************************************************* -Gets number of fields in a data tuple. */ -UNIV_INLINE -ulint -dtuple_get_n_fields( -/*================*/ - /* out: number of fields */ - dtuple_t* tuple) /* in: tuple */ -{ - ut_ad(tuple); - - return(tuple->n_fields); -} - -/************************************************************************* -Gets nth field of a tuple. */ -UNIV_INLINE -dfield_t* -dtuple_get_nth_field( -/*=================*/ - /* out: nth field */ - dtuple_t* tuple, /* in: tuple */ - ulint n) /* in: index of field */ -{ - ut_ad(tuple); - ut_ad(n < tuple->n_fields); - - return(tuple->fields + n); -} - -/************************************************************** -Creates a data tuple to a memory heap. The default value for number -of fields used in record comparisons for this tuple is n_fields. */ -UNIV_INLINE -dtuple_t* -dtuple_create( -/*==========*/ - /* out, own: created tuple */ - mem_heap_t* heap, /* in: memory heap where the tuple - is created */ - ulint n_fields) /* in: number of fields */ -{ - dtuple_t* tuple; - - ut_ad(heap); - - tuple = (dtuple_t*) mem_heap_alloc(heap, sizeof(dtuple_t) - + n_fields * sizeof(dfield_t)); - tuple->info_bits = 0; - tuple->n_fields = n_fields; - tuple->n_fields_cmp = n_fields; - tuple->fields = (dfield_t*)(((byte*)tuple) + sizeof(dtuple_t)); - -#ifdef UNIV_DEBUG - tuple->magic_n = DATA_TUPLE_MAGIC_N; - - { /* In the debug version, initialize fields to an error value */ - ulint i; - - for (i = 0; i < n_fields; i++) { - (tuple->fields + i)->data = &data_error; - dfield_get_type(tuple->fields + i)->mtype = DATA_ERROR; - } - } -#endif - return(tuple); -} - -/************************************************************** -The following function returns the sum of data lengths of a tuple. The space -occupied by the field structs or the tuple struct is not counted. Neither -is possible space in externally stored parts of the field. */ -UNIV_INLINE -ulint -dtuple_get_data_size( -/*=================*/ - /* out: sum of data lengths */ - dtuple_t* tuple) /* in: typed data tuple */ -{ - dfield_t* field; - ulint n_fields; - ulint len; - ulint i; - ulint sum = 0; - - ut_ad(tuple); - ut_ad(dtuple_check_typed(tuple)); - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - - n_fields = tuple->n_fields; - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - len = dfield_get_len(field); - - if (len == UNIV_SQL_NULL) { - len = dtype_get_sql_null_size(dfield_get_type(field)); - } - - sum += len; - } - - return(sum); -} - -/*********************************************************************** -Sets types of fields binary in a tuple. */ -UNIV_INLINE -void -dtuple_set_types_binary( -/*====================*/ - dtuple_t* tuple, /* in: data tuple */ - ulint n) /* in: number of fields to set */ -{ - dtype_t* dfield_type; - ulint i; - - for (i = 0; i < n; i++) { - dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i)); - dtype_set(dfield_type, DATA_BINARY, 0, 0); - } -} - -/**************************************************************** -Folds a prefix given as the number of fields of a tuple. */ -UNIV_INLINE -ulint -dtuple_fold( -/*========*/ - /* out: the folded value */ - dtuple_t* tuple, /* in: the tuple */ - ulint n_fields,/* in: number of complete fields to fold */ - ulint n_bytes,/* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id)/* in: index tree id */ -{ - dfield_t* field; - ulint i; - byte* data; - ulint len; - ulint fold; - - ut_ad(tuple); - ut_ad(tuple->magic_n == DATA_TUPLE_MAGIC_N); - ut_ad(dtuple_check_typed(tuple)); - - fold = ut_fold_dulint(tree_id); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = (byte*) dfield_get_data(field); - len = dfield_get_len(field); - - if (len != UNIV_SQL_NULL) { - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - if (n_bytes > 0) { - field = dtuple_get_nth_field(tuple, i); - - data = (byte*) dfield_get_data(field); - len = dfield_get_len(field); - - if (len != UNIV_SQL_NULL) { - if (len > n_bytes) { - len = n_bytes; - } - - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - return(fold); -} - -/************************************************************************** -Writes an SQL null field full of zeros. */ -UNIV_INLINE -void -data_write_sql_null( -/*================*/ - byte* data, /* in: pointer to a buffer of size len */ - ulint len) /* in: SQL null size in bytes */ -{ - ulint j; - - for (j = 0; j < len; j++) { - data[j] = '\0'; - } -} - -/************************************************************************** -Checks if a dtuple contains an SQL null value. */ -UNIV_INLINE -ibool -dtuple_contains_null( -/*=================*/ - /* out: TRUE if some field is SQL null */ - dtuple_t* tuple) /* in: dtuple */ -{ - ulint n; - ulint i; - - n = dtuple_get_n_fields(tuple); - - for (i = 0; i < n; i++) { - if (dfield_get_len(dtuple_get_nth_field(tuple, i)) - == UNIV_SQL_NULL) { - - return(TRUE); - } - } - - return(FALSE); -} diff --git a/storage/innobase/include/data0type.h b/storage/innobase/include/data0type.h deleted file mode 100644 index e5e9c5076be..00000000000 --- a/storage/innobase/include/data0type.h +++ /dev/null @@ -1,450 +0,0 @@ -/****************************************************** -Data types - -(c) 1996 Innobase Oy - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#ifndef data0type_h -#define data0type_h - -#include "univ.i" - -extern ulint data_mysql_default_charset_coll; -#define DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL 8 -#define DATA_MYSQL_BINARY_CHARSET_COLL 63 - -/* SQL data type struct */ -typedef struct dtype_struct dtype_t; - -/*-------------------------------------------*/ -/* The 'MAIN TYPE' of a column */ -#define DATA_VARCHAR 1 /* character varying of the - latin1_swedish_ci charset-collation; note - that the MySQL format for this, DATA_BINARY, - DATA_VARMYSQL, is also affected by whether the - 'precise type' contains - DATA_MYSQL_TRUE_VARCHAR */ -#define DATA_CHAR 2 /* fixed length character of the - latin1_swedish_ci charset-collation */ -#define DATA_FIXBINARY 3 /* binary string of fixed length */ -#define DATA_BINARY 4 /* binary string */ -#define DATA_BLOB 5 /* binary large object, or a TEXT type; - if prtype & DATA_BINARY_TYPE == 0, then this is - actually a TEXT column (or a BLOB created - with < 4.0.14; since column prefix indexes - came only in 4.0.14, the missing flag in BLOBs - created before that does not cause any harm) */ -#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */ -#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */ -#define DATA_SYS 8 /* system column */ - -/* Data types >= DATA_FLOAT must be compared using the whole field, not as -binary strings */ - -#define DATA_FLOAT 9 -#define DATA_DOUBLE 10 -#define DATA_DECIMAL 11 /* decimal number stored as an ASCII string */ -#define DATA_VARMYSQL 12 /* any charset varying length char */ -#define DATA_MYSQL 13 /* any charset fixed length char */ - /* NOTE that 4.1.1 used DATA_MYSQL and - DATA_VARMYSQL for all character sets, and the - charset-collation for tables created with it - can also be latin1_swedish_ci */ -#define DATA_MTYPE_MAX 63 /* dtype_store_for_order_and_null_size() - requires the values are <= 63 */ -/*-------------------------------------------*/ -/* The 'PRECISE TYPE' of a column */ -/* -Tables created by a MySQL user have the following convention: - -- In the least significant byte in the precise type we store the MySQL type -code (not applicable for system columns). - -- In the second least significant byte we OR flags DATA_NOT_NULL, -DATA_UNSIGNED, DATA_BINARY_TYPE. - -- In the third least significant byte of the precise type of string types we -store the MySQL charset-collation code. In DATA_BLOB columns created with -< 4.0.14 we do not actually know if it is a BLOB or a TEXT column. Since there -are no indexes on prefixes of BLOB or TEXT columns in < 4.0.14, this is no -problem, though. - -Note that versions < 4.1.2 or < 5.0.1 did not store the charset code to the -precise type, since the charset was always the default charset of the MySQL -installation. If the stored charset code is 0 in the system table SYS_COLUMNS -of InnoDB, that means that the default charset of this MySQL installation -should be used. - -When loading a table definition from the system tables to the InnoDB data -dictionary cache in main memory, InnoDB versions >= 4.1.2 and >= 5.0.1 check -if the stored charset-collation is 0, and if that is the case and the type is -a non-binary string, replace that 0 by the default charset-collation code of -this MySQL installation. In short, in old tables, the charset-collation code -in the system tables on disk can be 0, but in in-memory data structures -(dtype_t), the charset-collation code is always != 0 for non-binary string -types. - -In new tables, in binary string types, the charset-collation code is the -MySQL code for the 'binary charset', that is, != 0. - -For binary string types and for DATA_CHAR, DATA_VARCHAR, and for those -DATA_BLOB which are binary or have the charset-collation latin1_swedish_ci, -InnoDB performs all comparisons internally, without resorting to the MySQL -comparison functions. This is to save CPU time. - -InnoDB's own internal system tables have different precise types for their -columns, and for them the precise type is usually not used at all. -*/ - -#define DATA_ENGLISH 4 /* English language character string: this - is a relic from pre-MySQL time and only used - for InnoDB's own system tables */ -#define DATA_ERROR 111 /* another relic from pre-MySQL time */ - -#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL - type from the precise type */ -#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3 - format true VARCHAR */ - -/* Precise data types for system columns and the length of those columns; -NOTE: the values must run from 0 up in the order given! All codes must -be less than 256 */ -#define DATA_ROW_ID 0 /* row id: a dulint */ -#define DATA_ROW_ID_LEN 6 /* stored length for row id */ - -#define DATA_TRX_ID 1 /* transaction id: 6 bytes */ -#define DATA_TRX_ID_LEN 6 - -#define DATA_ROLL_PTR 2 /* rollback data pointer: 7 bytes */ -#define DATA_ROLL_PTR_LEN 7 - -#define DATA_N_SYS_COLS 3 /* number of system columns defined above */ - -/* Flags ORed to the precise data type */ -#define DATA_NOT_NULL 256 /* this is ORed to the precise type when - the column is declared as NOT NULL */ -#define DATA_UNSIGNED 512 /* this id ORed to the precise type when - we have an unsigned integer type */ -#define DATA_BINARY_TYPE 1024 /* if the data type is a binary character - string, this is ORed to the precise type: - this only holds for tables created with - >= MySQL-4.0.14 */ -/* #define DATA_NONLATIN1 2048 This is a relic from < 4.1.2 and < 5.0.1. - In earlier versions this was set for some - BLOB columns. -*/ -#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data - type when the column is true VARCHAR where - MySQL uses 2 bytes to store the data len; - for shorter VARCHARs MySQL uses only 1 byte */ -/*-------------------------------------------*/ - -/* This many bytes we need to store the type information affecting the -alphabetical order for a single field and decide the storage size of an -SQL null*/ -#define DATA_ORDER_NULL_TYPE_BUF_SIZE 4 -/* In the >= 4.1.x storage format we add 2 bytes more so that we can also -store the charset-collation number; one byte is left unused, though */ -#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6 - -/************************************************************************* -Gets the MySQL type code from a dtype. */ -UNIV_INLINE -ulint -dtype_get_mysql_type( -/*=================*/ - /* out: MySQL type code; this is NOT an InnoDB - type code! */ - dtype_t* type); /* in: type struct */ -/************************************************************************* -Determine how many bytes the first n characters of the given string occupy. -If the string is shorter than n characters, returns the number of bytes -the characters in the string occupy. */ - -ulint -dtype_get_at_most_n_mbchars( -/*========================*/ - /* out: length of the prefix, - in bytes */ - ulint prtype, /* in: precise type */ - ulint mbminlen, /* in: minimum length of a - multi-byte character */ - ulint mbmaxlen, /* in: maximum length of a - multi-byte character */ - ulint prefix_len, /* in: length of the requested - prefix, in characters, multiplied by - dtype_get_mbmaxlen(dtype) */ - ulint data_len, /* in: length of str (in bytes) */ - const char* str); /* in: the string whose prefix - length is being determined */ -/************************************************************************* -Checks if a data main type is a string type. Also a BLOB is considered a -string type. */ - -ibool -dtype_is_string_type( -/*=================*/ - /* out: TRUE if string type */ - ulint mtype); /* in: InnoDB main data type code: DATA_CHAR, ... */ -/************************************************************************* -Checks if a type is a binary string type. Note that for tables created with -< 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. For -those DATA_BLOB columns this function currently returns FALSE. */ - -ibool -dtype_is_binary_string_type( -/*========================*/ - /* out: TRUE if binary string type */ - ulint mtype, /* in: main data type */ - ulint prtype);/* in: precise type */ -/************************************************************************* -Checks if a type is a non-binary string type. That is, dtype_is_string_type is -TRUE and dtype_is_binary_string_type is FALSE. Note that for tables created -with < 4.0.14, we do not know if a DATA_BLOB column is a BLOB or a TEXT column. -For those DATA_BLOB columns this function currently returns TRUE. */ - -ibool -dtype_is_non_binary_string_type( -/*============================*/ - /* out: TRUE if non-binary string type */ - ulint mtype, /* in: main data type */ - ulint prtype);/* in: precise type */ -/************************************************************************* -Sets a data type structure. */ -UNIV_INLINE -void -dtype_set( -/*======*/ - dtype_t* type, /* in: type struct to init */ - ulint mtype, /* in: main data type */ - ulint prtype, /* in: precise type */ - ulint len); /* in: precision of type */ -/************************************************************************* -Copies a data type structure. */ -UNIV_INLINE -void -dtype_copy( -/*=======*/ - dtype_t* type1, /* in: type struct to copy to */ - const dtype_t* type2); /* in: type struct to copy from */ -/************************************************************************* -Gets the SQL main data type. */ -UNIV_INLINE -ulint -dtype_get_mtype( -/*============*/ - dtype_t* type); -/************************************************************************* -Gets the precise data type. */ -UNIV_INLINE -ulint -dtype_get_prtype( -/*=============*/ - dtype_t* type); -/************************************************************************* -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_get_mblen( -/*============*/ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type (and collation) */ - ulint* mbminlen, /* out: minimum length of a - multi-byte character */ - ulint* mbmaxlen); /* out: maximum length of a - multi-byte character */ -/************************************************************************* -Gets the MySQL charset-collation code for MySQL string types. */ - -ulint -dtype_get_charset_coll_noninline( -/*=============================*/ - ulint prtype);/* in: precise data type */ -/************************************************************************* -Gets the MySQL charset-collation code for MySQL string types. */ -UNIV_INLINE -ulint -dtype_get_charset_coll( -/*===================*/ - ulint prtype);/* in: precise data type */ -/************************************************************************* -Forms a precise type from the < 4.1.2 format precise type plus the -charset-collation code. */ - -ulint -dtype_form_prtype( -/*==============*/ - ulint old_prtype, /* in: the MySQL type code and the flags - DATA_BINARY_TYPE etc. */ - ulint charset_coll); /* in: MySQL charset-collation code */ -/************************************************************************* -Gets the type length. */ -UNIV_INLINE -ulint -dtype_get_len( -/*==========*/ - dtype_t* type); -/************************************************************************* -Gets the minimum length of a character, in bytes. */ -UNIV_INLINE -ulint -dtype_get_mbminlen( -/*===============*/ - /* out: minimum length of a char, in bytes, - or 0 if this is not a character type */ - const dtype_t* type); /* in: type */ -/************************************************************************* -Gets the maximum length of a character, in bytes. */ -UNIV_INLINE -ulint -dtype_get_mbmaxlen( -/*===============*/ - /* out: maximum length of a char, in bytes, - or 0 if this is not a character type */ - const dtype_t* type); /* in: type */ -/************************************************************************* -Gets the padding character code for the type. */ -UNIV_INLINE -ulint -dtype_get_pad_char( -/*===============*/ - /* out: padding character code, or - ULINT_UNDEFINED if no padding specified */ - ulint mtype, /* in: main type */ - ulint prtype); /* in: precise type */ -/*************************************************************************** -Returns the size of a fixed size data type, 0 if not a fixed size type. */ -UNIV_INLINE -ulint -dtype_get_fixed_size_low( -/*=====================*/ - /* out: fixed size, or 0 */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - ulint len, /* in: length */ - ulint mbminlen, /* in: minimum length of a multibyte char */ - ulint mbmaxlen); /* in: maximum length of a multibyte char */ -/*************************************************************************** -Returns the minimum size of a data type. */ -UNIV_INLINE -ulint -dtype_get_min_size_low( -/*===================*/ - /* out: minimum size */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - ulint len, /* in: length */ - ulint mbminlen, /* in: minimum length of a multibyte char */ - ulint mbmaxlen); /* in: maximum length of a multibyte char */ -/*************************************************************************** -Returns the maximum size of a data type. Note: types in system tables may be -incomplete and return incorrect information. */ -UNIV_INLINE -ulint -dtype_get_max_size_low( -/*===================*/ - /* out: maximum size */ - ulint mtype, /* in: main type */ - ulint len); /* in: length */ -/*************************************************************************** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. -For fixed length types it is the fixed length of the type, otherwise 0. */ -UNIV_INLINE -ulint -dtype_get_sql_null_size( -/*====================*/ - /* out: SQL null storage size - in ROW_FORMAT=REDUNDANT */ - const dtype_t* type); /* in: type */ -/************************************************************************** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. */ -UNIV_INLINE -void -dtype_read_for_order_and_null_size( -/*===============================*/ - dtype_t* type, /* in: type struct */ - byte* buf); /* in: buffer for the stored order info */ -/************************************************************************** -Stores for a type the information which determines its alphabetical ordering -and the storage size of an SQL NULL value. This is the >= 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_store_for_order_and_null_size( -/*====================================*/ - byte* buf, /* in: buffer for - DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - bytes where we store the info */ - dtype_t* type, /* in: type struct */ - ulint prefix_len);/* in: prefix length to - replace type->len, or 0 */ -/************************************************************************** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_read_for_order_and_null_size( -/*===================================*/ - dtype_t* type, /* in: type struct */ - byte* buf); /* in: buffer for stored type order info */ - -/************************************************************************* -Validates a data type structure. */ - -ibool -dtype_validate( -/*===========*/ - /* out: TRUE if ok */ - dtype_t* type); /* in: type struct to validate */ -/************************************************************************* -Prints a data type structure. */ - -void -dtype_print( -/*========*/ - dtype_t* type); /* in: type */ - -/* Structure for an SQL data type. -If you add fields to this structure, be sure to initialize them everywhere. -This structure is initialized in the following functions: -dtype_set() -dtype_read_for_order_and_null_size() -dtype_new_read_for_order_and_null_size() -sym_tab_add_null_lit() */ - -struct dtype_struct{ - unsigned mtype:8; /* main data type */ - unsigned prtype:24; /* precise type; MySQL data - type, charset code, flags to - indicate nullability, - signedness, whether this is a - binary string, whether this is - a true VARCHAR where MySQL - uses 2 bytes to store the length */ - - /* the remaining fields do not affect alphabetical ordering: */ - - unsigned len:16; /* length; for MySQL data this - is field->pack_length(), - except that for a >= 5.0.3 - type true VARCHAR this is the - maximum byte length of the - string data (in addition to - the string, MySQL uses 1 or 2 - bytes to store the string length) */ - - unsigned mbminlen:2; /* minimum length of a - character, in bytes */ - unsigned mbmaxlen:3; /* maximum length of a - character, in bytes */ -}; - -#ifndef UNIV_NONINL -#include "data0type.ic" -#endif - -#endif diff --git a/storage/innobase/include/data0type.ic b/storage/innobase/include/data0type.ic deleted file mode 100644 index ad0f95755d2..00000000000 --- a/storage/innobase/include/data0type.ic +++ /dev/null @@ -1,562 +0,0 @@ -/****************************************************** -Data types - -(c) 1996 Innobase Oy - -Created 1/16/1996 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" - -#ifndef UNIV_HOTBACKUP -/********************************************************************** -Get the variable length bounds of the given character set. - -NOTE: the prototype of this function is copied from ha_innodb.cc! If you change -this function, you MUST change also the prototype here! */ -extern -void -innobase_get_cset_width( -/*====================*/ - ulint cset, /* in: MySQL charset-collation code */ - ulint* mbminlen, /* out: minimum length of a char (in bytes) */ - ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */ -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************* -Gets the MySQL charset-collation code for MySQL string types. */ -UNIV_INLINE -ulint -dtype_get_charset_coll( -/*===================*/ - ulint prtype) /* in: precise data type */ -{ - return((prtype >> 16) & 0xFFUL); -} - -/************************************************************************* -Gets the MySQL type code from a dtype. */ -UNIV_INLINE -ulint -dtype_get_mysql_type( -/*=================*/ - /* out: MySQL type code; this is NOT an InnoDB - type code! */ - dtype_t* type) /* in: type struct */ -{ - return(type->prtype & 0xFFUL); -} - -/************************************************************************* -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_get_mblen( -/*============*/ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type (and collation) */ - ulint* mbminlen, /* out: minimum length of a - multi-byte character */ - ulint* mbmaxlen) /* out: maximum length of a - multi-byte character */ -{ - if (dtype_is_string_type(mtype)) { -#ifndef UNIV_HOTBACKUP - innobase_get_cset_width(dtype_get_charset_coll(prtype), - mbminlen, mbmaxlen); - ut_ad(*mbminlen <= *mbmaxlen); - ut_ad(*mbminlen <= 2); /* mbminlen in dtype_t is 0..3 */ - ut_ad(*mbmaxlen < 1 << 3); /* mbmaxlen in dtype_t is 0..7 */ -#else /* !UNIV_HOTBACKUP */ - ut_a(mtype <= DATA_BINARY); - *mbminlen = *mbmaxlen = 1; -#endif /* !UNIV_HOTBACKUP */ - } else { - *mbminlen = *mbmaxlen = 0; - } -} - -/************************************************************************* -Compute the mbminlen and mbmaxlen members of a data type structure. */ -UNIV_INLINE -void -dtype_set_mblen( -/*============*/ - dtype_t* type) /* in/out: type */ -{ - ulint mbminlen; - ulint mbmaxlen; - - dtype_get_mblen(type->mtype, type->prtype, &mbminlen, &mbmaxlen); - type->mbminlen = mbminlen; - type->mbmaxlen = mbmaxlen; - - ut_ad(dtype_validate(type)); -} - -/************************************************************************* -Sets a data type structure. */ -UNIV_INLINE -void -dtype_set( -/*======*/ - dtype_t* type, /* in: type struct to init */ - ulint mtype, /* in: main data type */ - ulint prtype, /* in: precise type */ - ulint len) /* in: precision of type */ -{ - ut_ad(type); - ut_ad(mtype <= DATA_MTYPE_MAX); - - type->mtype = mtype; - type->prtype = prtype; - type->len = len; - - dtype_set_mblen(type); -} - -/************************************************************************* -Copies a data type structure. */ -UNIV_INLINE -void -dtype_copy( -/*=======*/ - dtype_t* type1, /* in: type struct to copy to */ - const dtype_t* type2) /* in: type struct to copy from */ -{ - *type1 = *type2; - - ut_ad(dtype_validate(type1)); -} - -/************************************************************************* -Gets the SQL main data type. */ -UNIV_INLINE -ulint -dtype_get_mtype( -/*============*/ - dtype_t* type) -{ - ut_ad(type); - - return(type->mtype); -} - -/************************************************************************* -Gets the precise data type. */ -UNIV_INLINE -ulint -dtype_get_prtype( -/*=============*/ - dtype_t* type) -{ - ut_ad(type); - - return(type->prtype); -} - -/************************************************************************* -Gets the type length. */ -UNIV_INLINE -ulint -dtype_get_len( -/*==========*/ - dtype_t* type) -{ - ut_ad(type); - - return(type->len); -} - -/************************************************************************* -Gets the minimum length of a character, in bytes. */ -UNIV_INLINE -ulint -dtype_get_mbminlen( -/*===============*/ - /* out: minimum length of a char, in bytes, - or 0 if this is not a character type */ - const dtype_t* type) /* in: type */ -{ - ut_ad(type); - return(type->mbminlen); -} -/************************************************************************* -Gets the maximum length of a character, in bytes. */ -UNIV_INLINE -ulint -dtype_get_mbmaxlen( -/*===============*/ - /* out: maximum length of a char, in bytes, - or 0 if this is not a character type */ - const dtype_t* type) /* in: type */ -{ - ut_ad(type); - return(type->mbmaxlen); -} - -/************************************************************************* -Gets the padding character code for a type. */ -UNIV_INLINE -ulint -dtype_get_pad_char( -/*===============*/ - /* out: padding character code, or - ULINT_UNDEFINED if no padding specified */ - ulint mtype, /* in: main type */ - ulint prtype) /* in: precise type */ -{ - switch (mtype) { - case DATA_FIXBINARY: - case DATA_BINARY: - if (UNIV_UNLIKELY(dtype_get_charset_coll(prtype) - == DATA_MYSQL_BINARY_CHARSET_COLL)) { - /* Starting from 5.0.18, do not pad - VARBINARY or BINARY columns. */ - return(ULINT_UNDEFINED); - } - /* Fall through */ - case DATA_CHAR: - case DATA_VARCHAR: - case DATA_MYSQL: - case DATA_VARMYSQL: - /* Space is the padding character for all char and binary - strings, and starting from 5.0.3, also for TEXT strings. */ - - return(0x20); - case DATA_BLOB: - if (!(prtype & DATA_BINARY_TYPE)) { - return(0x20); - } - /* Fall through */ - default: - /* No padding specified */ - return(ULINT_UNDEFINED); - } -} - -/************************************************************************** -Stores for a type the information which determines its alphabetical ordering -and the storage size of an SQL NULL value. This is the >= 4.1.x storage -format. */ -UNIV_INLINE -void -dtype_new_store_for_order_and_null_size( -/*====================================*/ - byte* buf, /* in: buffer for - DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE - bytes where we store the info */ - dtype_t* type, /* in: type struct */ - ulint prefix_len)/* in: prefix length to - replace type->len, or 0 */ -{ -#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE -#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" -#endif - ulint len; - - buf[0] = (byte)(type->mtype & 0xFFUL); - - if (type->prtype & DATA_BINARY_TYPE) { - buf[0] = buf[0] | 128; - } - - /* In versions < 4.1.2 we had: if (type->prtype & DATA_NONLATIN1) { - buf[0] = buf[0] | 64; - } - */ - - buf[1] = (byte)(type->prtype & 0xFFUL); - - len = prefix_len ? prefix_len : type->len; - - mach_write_to_2(buf + 2, len & 0xFFFFUL); - - ut_ad(dtype_get_charset_coll(type->prtype) < 256); - mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); - - if (type->prtype & DATA_NOT_NULL) { - buf[4] |= 128; - } -} - -/************************************************************************** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the < 4.1.x -storage format. */ -UNIV_INLINE -void -dtype_read_for_order_and_null_size( -/*===============================*/ - dtype_t* type, /* in: type struct */ - byte* buf) /* in: buffer for stored type order info */ -{ -#if 4 != DATA_ORDER_NULL_TYPE_BUF_SIZE -# error "4 != DATA_ORDER_NULL_TYPE_BUF_SIZE" -#endif - - type->mtype = buf[0] & 63; - type->prtype = buf[1]; - - if (buf[0] & 128) { - type->prtype = type->prtype | DATA_BINARY_TYPE; - } - - type->len = mach_read_from_2(buf + 2); - - type->prtype = dtype_form_prtype(type->prtype, - data_mysql_default_charset_coll); - dtype_set_mblen(type); -} - -/************************************************************************** -Reads to a type the stored information which determines its alphabetical -ordering and the storage size of an SQL NULL value. This is the >= 4.1.x -storage format. */ -UNIV_INLINE -void -dtype_new_read_for_order_and_null_size( -/*===================================*/ - dtype_t* type, /* in: type struct */ - byte* buf) /* in: buffer for stored type order info */ -{ - ulint charset_coll; - -#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE -#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" -#endif - - type->mtype = buf[0] & 63; - type->prtype = buf[1]; - - if (buf[0] & 128) { - type->prtype |= DATA_BINARY_TYPE; - } - - if (buf[4] & 128) { - type->prtype |= DATA_NOT_NULL; - } - - type->len = mach_read_from_2(buf + 2); - - mach_read_from_2(buf + 4); - - charset_coll = mach_read_from_2(buf + 4) & 0x7fff; - - if (dtype_is_string_type(type->mtype)) { - ut_a(charset_coll < 256); - - if (charset_coll == 0) { - /* This insert buffer record was inserted with MySQL - version < 4.1.2, and the charset-collation code was not - explicitly stored to dtype->prtype at that time. It - must be the default charset-collation of this MySQL - installation. */ - - charset_coll = data_mysql_default_charset_coll; - } - - type->prtype = dtype_form_prtype(type->prtype, charset_coll); - } - dtype_set_mblen(type); -} - -/*************************************************************************** -Returns the size of a fixed size data type, 0 if not a fixed size type. */ -UNIV_INLINE -ulint -dtype_get_fixed_size_low( -/*=====================*/ - /* out: fixed size, or 0 */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - ulint len, /* in: length */ - ulint mbminlen, /* in: minimum length of a multibyte char */ - ulint mbmaxlen) /* in: maximum length of a multibyte char */ -{ - switch (mtype) { - case DATA_SYS: -#ifdef UNIV_DEBUG - switch (prtype & DATA_MYSQL_TYPE_MASK) { - case DATA_ROW_ID: - ut_ad(len == DATA_ROW_ID_LEN); - break; - case DATA_TRX_ID: - ut_ad(len == DATA_TRX_ID_LEN); - break; - case DATA_ROLL_PTR: - ut_ad(len == DATA_ROLL_PTR_LEN); - break; - default: - ut_ad(0); - return(0); - } -#endif /* UNIV_DEBUG */ - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - return(len); - case DATA_MYSQL: - if (prtype & DATA_BINARY_TYPE) { - return(len); - } else { -#ifdef UNIV_HOTBACKUP - if (mbminlen == mbmaxlen) { - return(len); - } -#else /* UNIV_HOTBACKUP */ - /* We play it safe here and ask MySQL for - mbminlen and mbmaxlen. Although - mbminlen and mbmaxlen are - initialized if and only if prtype - is (in one of the 3 functions in this file), - it could be that none of these functions - has been called. */ - - ulint i_mbminlen, i_mbmaxlen; - - innobase_get_cset_width( - dtype_get_charset_coll(prtype), - &i_mbminlen, &i_mbmaxlen); - - if (UNIV_UNLIKELY(mbminlen != i_mbminlen) - || UNIV_UNLIKELY(mbmaxlen != i_mbmaxlen)) { - - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: " - "mbminlen=%lu, " - "mbmaxlen=%lu, " - "type->mbminlen=%lu, " - "type->mbmaxlen=%lu\n", - (ulong) i_mbminlen, - (ulong) i_mbmaxlen, - (ulong) mbminlen, - (ulong) mbmaxlen); - } - if (mbminlen == mbmaxlen) { - return(len); - } -#endif /* !UNIV_HOTBACKUP */ - } - /* fall through for variable-length charsets */ - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - case DATA_BLOB: - return(0); - default: - ut_error; - } - - return(0); -} - -/*************************************************************************** -Returns the minimum size of a data type. */ -UNIV_INLINE -ulint -dtype_get_min_size_low( -/*===================*/ - /* out: minimum size */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - ulint len, /* in: length */ - ulint mbminlen, /* in: minimum length of a multibyte char */ - ulint mbmaxlen) /* in: maximum length of a multibyte char */ -{ - switch (mtype) { - case DATA_SYS: -#ifdef UNIV_DEBUG - switch (prtype & DATA_MYSQL_TYPE_MASK) { - case DATA_ROW_ID: - ut_ad(len == DATA_ROW_ID_LEN); - break; - case DATA_TRX_ID: - ut_ad(len == DATA_TRX_ID_LEN); - break; - case DATA_ROLL_PTR: - ut_ad(len == DATA_ROLL_PTR_LEN); - break; - default: - ut_ad(0); - return(0); - } -#endif /* UNIV_DEBUG */ - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - return(len); - case DATA_MYSQL: - if ((prtype & DATA_BINARY_TYPE) || mbminlen == mbmaxlen) { - return(len); - } - /* this is a variable-length character set */ - ut_a(mbminlen > 0); - ut_a(mbmaxlen > mbminlen); - ut_a(len % mbmaxlen == 0); - return(len * mbminlen / mbmaxlen); - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - case DATA_BLOB: - return(0); - default: - ut_error; - } - - return(0); -} - -/*************************************************************************** -Returns the maximum size of a data type. Note: types in system tables may be -incomplete and return incorrect information. */ -UNIV_INLINE -ulint -dtype_get_max_size_low( -/*===================*/ - /* out: maximum size */ - ulint mtype, /* in: main type */ - ulint len) /* in: length */ -{ - switch (mtype) { - case DATA_SYS: - case DATA_CHAR: - case DATA_FIXBINARY: - case DATA_INT: - case DATA_FLOAT: - case DATA_DOUBLE: - case DATA_MYSQL: - case DATA_VARCHAR: - case DATA_BINARY: - case DATA_DECIMAL: - case DATA_VARMYSQL: - return(len); - case DATA_BLOB: - break; - default: - ut_error; - } - - return(ULINT_MAX); -} - -/*************************************************************************** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a type. -For fixed length types it is the fixed length of the type, otherwise 0. */ -UNIV_INLINE -ulint -dtype_get_sql_null_size( -/*====================*/ - /* out: SQL null storage size - in ROW_FORMAT=REDUNDANT */ - const dtype_t* type) /* in: type */ -{ - return(dtype_get_fixed_size_low(type->mtype, type->prtype, type->len, - type->mbminlen, type->mbmaxlen)); -} diff --git a/storage/innobase/include/data0types.h b/storage/innobase/include/data0types.h deleted file mode 100644 index ab314f8f471..00000000000 --- a/storage/innobase/include/data0types.h +++ /dev/null @@ -1,19 +0,0 @@ -/************************************************************************ -Some type definitions - -(c) 1994-2000 Innobase Oy - -Created 9/21/2000 Heikki Tuuri -*************************************************************************/ - -#ifndef data0types_h -#define data0types_h - -/* SQL data field struct */ -typedef struct dfield_struct dfield_t; - -/* SQL data tuple struct */ -typedef struct dtuple_struct dtuple_t; - -#endif - diff --git a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h deleted file mode 100644 index ed7ce151718..00000000000 --- a/storage/innobase/include/db0err.h +++ /dev/null @@ -1,80 +0,0 @@ -/****************************************************** -Global error codes for the database - -(c) 1996 Innobase Oy - -Created 5/24/1996 Heikki Tuuri -*******************************************************/ - -#ifndef db0err_h -#define db0err_h - - -#define DB_SUCCESS 10 - -/* The following are error codes */ -#define DB_ERROR 11 -#define DB_OUT_OF_MEMORY 12 -#define DB_OUT_OF_FILE_SPACE 13 -#define DB_LOCK_WAIT 14 -#define DB_DEADLOCK 15 -#define DB_ROLLBACK 16 -#define DB_DUPLICATE_KEY 17 -#define DB_QUE_THR_SUSPENDED 18 -#define DB_MISSING_HISTORY 19 /* required history data has been - deleted due to lack of space in - rollback segment */ -#define DB_CLUSTER_NOT_FOUND 30 -#define DB_TABLE_NOT_FOUND 31 -#define DB_MUST_GET_MORE_FILE_SPACE 32 /* the database has to be stopped - and restarted with more file space */ -#define DB_TABLE_IS_BEING_USED 33 -#define DB_TOO_BIG_RECORD 34 /* a record in an index would become - bigger than 1/2 free space in a page - frame */ -#define DB_LOCK_WAIT_TIMEOUT 35 /* lock wait lasted too long */ -#define DB_NO_REFERENCED_ROW 36 /* referenced key value not found - for a foreign key in an insert or - update of a row */ -#define DB_ROW_IS_REFERENCED 37 /* cannot delete or update a row - because it contains a key value - which is referenced */ -#define DB_CANNOT_ADD_CONSTRAINT 38 /* adding a foreign key constraint - to a table failed */ -#define DB_CORRUPTION 39 /* data structure corruption noticed */ -#define DB_COL_APPEARS_TWICE_IN_INDEX 40/* InnoDB cannot handle an index - where same column appears twice */ -#define DB_CANNOT_DROP_CONSTRAINT 41 /* dropping a foreign key constraint - from a table failed */ -#define DB_NO_SAVEPOINT 42 /* no savepoint exists with the given - name */ -#define DB_TABLESPACE_ALREADY_EXISTS 43 /* we cannot create a new single-table - tablespace because a file of the same - name already exists */ -#define DB_TABLESPACE_DELETED 44 /* tablespace does not exist or is - being dropped right now */ -#define DB_LOCK_TABLE_FULL 45 /* lock structs have exhausted the - buffer pool (for big transactions, - InnoDB stores the lock structs in the - buffer pool) */ -#define DB_FOREIGN_DUPLICATE_KEY 46 /* foreign key constraints - activated by the operation would - lead to a duplicate key in some - table */ -#define DB_TOO_MANY_CONCURRENT_TRXS 47 /* when InnoDB runs out of the - preconfigured undo slots, this can - only happen when there are too many - concurrent transactions */ -#define DB_UNSUPPORTED 48 /* when InnoDB sees any artefact or - a feature that it can't recoginize or - work with e.g., FT indexes created by - a later version of the engine. */ -/* The following are partial failure codes */ -#define DB_FAIL 1000 -#define DB_OVERFLOW 1001 -#define DB_UNDERFLOW 1002 -#define DB_STRONG_FAIL 1003 -#define DB_RECORD_NOT_FOUND 1500 -#define DB_END_OF_INDEX 1501 - -#endif diff --git a/storage/innobase/include/dict0boot.h b/storage/innobase/include/dict0boot.h deleted file mode 100644 index cac79410b24..00000000000 --- a/storage/innobase/include/dict0boot.h +++ /dev/null @@ -1,134 +0,0 @@ -/****************************************************** -Data dictionary creation and booting - -(c) 1996 Innobase Oy - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0boot_h -#define dict0boot_h - -#include "univ.i" - -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "ut0byte.h" -#include "buf0buf.h" -#include "fsp0fsp.h" -#include "dict0dict.h" - -typedef byte dict_hdr_t; - -/************************************************************************** -Gets a pointer to the dictionary header and x-latches its page. */ - -dict_hdr_t* -dict_hdr_get( -/*=========*/ - /* out: pointer to the dictionary header, - page x-latched */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Returns a new row, table, index, or tree id. */ - -dulint -dict_hdr_get_new_id( -/*================*/ - /* out: the new id */ - ulint type); /* in: DICT_HDR_ROW_ID, ... */ -/************************************************************************** -Returns a new row id. */ -UNIV_INLINE -dulint -dict_sys_get_new_row_id(void); -/*=========================*/ - /* out: the new id */ -/************************************************************************** -Reads a row id from a record or other 6-byte stored form. */ -UNIV_INLINE -dulint -dict_sys_read_row_id( -/*=================*/ - /* out: row id */ - byte* field); /* in: record field */ -/************************************************************************** -Writes a row id to a record or other 6-byte stored form. */ -UNIV_INLINE -void -dict_sys_write_row_id( -/*==================*/ - byte* field, /* in: record field */ - dulint row_id);/* in: row id */ -/********************************************************************* -Initializes the data dictionary memory structures when the database is -started. This function is also called when the data dictionary is created. */ - -void -dict_boot(void); -/*===========*/ -/********************************************************************* -Creates and initializes the data dictionary at the database creation. */ - -void -dict_create(void); -/*=============*/ - - -/* Space id and page no where the dictionary header resides */ -#define DICT_HDR_SPACE 0 /* the SYSTEM tablespace */ -#define DICT_HDR_PAGE_NO FSP_DICT_HDR_PAGE_NO - -/* The ids for the basic system tables and their indexes */ -#define DICT_TABLES_ID ut_dulint_create(0, 1) -#define DICT_COLUMNS_ID ut_dulint_create(0, 2) -#define DICT_INDEXES_ID ut_dulint_create(0, 3) -#define DICT_FIELDS_ID ut_dulint_create(0, 4) -/* The following is a secondary index on SYS_TABLES */ -#define DICT_TABLE_IDS_ID ut_dulint_create(0, 5) - -#define DICT_HDR_FIRST_ID 10 /* the ids for tables etc. start - from this number, except for basic - system tables and their above defined - indexes; ibuf tables and indexes are - assigned as the id the number - DICT_IBUF_ID_MIN plus the space id */ -#define DICT_IBUF_ID_MIN ut_dulint_create(0xFFFFFFFFUL, 0) - -/* The offset of the dictionary header on the page */ -#define DICT_HDR FSEG_PAGE_DATA - -/*-------------------------------------------------------------*/ -/* Dictionary header offsets */ -#define DICT_HDR_ROW_ID 0 /* The latest assigned row id */ -#define DICT_HDR_TABLE_ID 8 /* The latest assigned table id */ -#define DICT_HDR_INDEX_ID 16 /* The latest assigned index id */ -#define DICT_HDR_MIX_ID 24 /* Obsolete, always 0. */ -#define DICT_HDR_TABLES 32 /* Root of the table index tree */ -#define DICT_HDR_TABLE_IDS 36 /* Root of the table index tree */ -#define DICT_HDR_COLUMNS 40 /* Root of the column index tree */ -#define DICT_HDR_INDEXES 44 /* Root of the index index tree */ -#define DICT_HDR_FIELDS 48 /* Root of the index field - index tree */ - -#define DICT_HDR_FSEG_HEADER 56 /* Segment header for the tablespace - segment into which the dictionary - header is created */ -/*-------------------------------------------------------------*/ - -/* The field number of the page number field in the sys_indexes table -clustered index */ -#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8 -#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7 -#define DICT_SYS_INDEXES_TYPE_FIELD 6 - -/* When a row id which is zero modulo this number (which must be a power of -two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is -updated */ -#define DICT_HDR_ROW_ID_WRITE_MARGIN 256 - -#ifndef UNIV_NONINL -#include "dict0boot.ic" -#endif - -#endif diff --git a/storage/innobase/include/dict0boot.ic b/storage/innobase/include/dict0boot.ic deleted file mode 100644 index fe2a9e36653..00000000000 --- a/storage/innobase/include/dict0boot.ic +++ /dev/null @@ -1,76 +0,0 @@ -/****************************************************** -Data dictionary creation and booting - -(c) 1996 Innobase Oy - -Created 4/18/1996 Heikki Tuuri -*******************************************************/ - -/************************************************************************** -Writes the current value of the row id counter to the dictionary header file -page. */ - -void -dict_hdr_flush_row_id(void); -/*=======================*/ - - -/************************************************************************** -Returns a new row id. */ -UNIV_INLINE -dulint -dict_sys_get_new_row_id(void) -/*=========================*/ - /* out: the new id */ -{ - dulint id; - - mutex_enter(&(dict_sys->mutex)); - - id = dict_sys->row_id; - - if (0 == (ut_dulint_get_low(id) % DICT_HDR_ROW_ID_WRITE_MARGIN)) { - - dict_hdr_flush_row_id(); - } - - UT_DULINT_INC(dict_sys->row_id); - - mutex_exit(&(dict_sys->mutex)); - - return(id); -} - -/************************************************************************** -Reads a row id from a record or other 6-byte stored form. */ -UNIV_INLINE -dulint -dict_sys_read_row_id( -/*=================*/ - /* out: row id */ - byte* field) /* in: record field */ -{ -#if DATA_ROW_ID_LEN != 6 -# error "DATA_ROW_ID_LEN != 6" -#endif - - return(mach_read_from_6(field)); -} - -/************************************************************************** -Writes a row id to a record or other 6-byte stored form. */ -UNIV_INLINE -void -dict_sys_write_row_id( -/*==================*/ - byte* field, /* in: record field */ - dulint row_id) /* in: row id */ -{ -#if DATA_ROW_ID_LEN != 6 -# error "DATA_ROW_ID_LEN != 6" -#endif - - mach_write_to_6(field, row_id); -} - - diff --git a/storage/innobase/include/dict0crea.h b/storage/innobase/include/dict0crea.h deleted file mode 100644 index f0f30481abe..00000000000 --- a/storage/innobase/include/dict0crea.h +++ /dev/null @@ -1,179 +0,0 @@ -/****************************************************** -Database object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0crea_h -#define dict0crea_h - -#include "univ.i" -#include "dict0types.h" -#include "dict0dict.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" - -/************************************************************************* -Creates a table create graph. */ - -tab_node_t* -tab_create_graph_create( -/*====================*/ - /* out, own: table create node */ - dict_table_t* table, /* in: table to create, built as a memory data - structure */ - mem_heap_t* heap); /* in: heap where created */ -/************************************************************************* -Creates an index create graph. */ - -ind_node_t* -ind_create_graph_create( -/*====================*/ - /* out, own: index create node */ - dict_index_t* index, /* in: index to create, built as a memory data - structure */ - mem_heap_t* heap); /* in: heap where created */ -/*************************************************************** -Creates a table. This is a high-level function used in SQL execution graphs. */ - -que_thr_t* -dict_create_table_step( -/*===================*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/*************************************************************** -Creates an index. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -dict_create_index_step( -/*===================*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/*********************************************************************** -Truncates the index tree associated with a row in SYS_INDEXES table. */ - -ulint -dict_truncate_index_tree( -/*=====================*/ - /* out: new root page number, or - FIL_NULL on failure */ - dict_table_t* table, /* in: the table the index belongs to */ - btr_pcur_t* pcur, /* in/out: persistent cursor pointing to - record in the clustered index of - SYS_INDEXES table. The cursor may be - repositioned in this call. */ - mtr_t* mtr); /* in: mtr having the latch - on the record page. The mtr may be - committed and restarted in this call. */ -/*********************************************************************** -Drops the index tree associated with a row in SYS_INDEXES table. */ - -void -dict_drop_index_tree( -/*=================*/ - rec_t* rec, /* in: record in the clustered index of SYS_INDEXES - table */ - mtr_t* mtr); /* in: mtr having the latch on the record page */ -/******************************************************************** -Creates the foreign key constraints system tables inside InnoDB -at database creation or database start if they are not found or are -not of the right form. */ - -ulint -dict_create_or_check_foreign_constraint_tables(void); -/*================================================*/ - /* out: DB_SUCCESS or error code */ -/************************************************************************ -Adds foreign key definitions to data dictionary tables in the database. We -look at table->foreign_list, and also generate names to constraints that were -not named by the user. A generated constraint has a name of the format -databasename/tablename_ibfk_<number>, where the numbers start from 1, and are -given locally for this table, that is, the number is not global, as in the -old format constraints < 4.0.18 it used to be. */ - -ulint -dict_create_add_foreigns_to_dictionary( -/*===================================*/ - /* out: error code or DB_SUCCESS */ - ulint start_id,/* in: if we are actually doing ALTER TABLE - ADD CONSTRAINT, we want to generate constraint - numbers which are bigger than in the table so - far; we number the constraints from - start_id + 1 up; start_id should be set to 0 if - we are creating a new table, or if the table - so far has no constraints for which the name - was generated here */ - dict_table_t* table, /* in: table */ - trx_t* trx); /* in: transaction */ - - -/* Table create node structure */ - -struct tab_node_struct{ - que_common_t common; /* node type: QUE_NODE_TABLE_CREATE */ - dict_table_t* table; /* table to create, built as a memory data - structure with dict_mem_... functions */ - ins_node_t* tab_def; /* child node which does the insert of - the table definition; the row to be inserted - is built by the parent node */ - ins_node_t* col_def; /* child node which does the inserts of - the column definitions; the row to be inserted - is built by the parent node */ - commit_node_t* commit_node; - /* child node which performs a commit after - a successful table creation */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /* node execution state */ - ulint col_no; /* next column definition to insert */ - mem_heap_t* heap; /* memory heap used as auxiliary storage */ -}; - -/* Table create node states */ -#define TABLE_BUILD_TABLE_DEF 1 -#define TABLE_BUILD_COL_DEF 2 -#define TABLE_COMMIT_WORK 3 -#define TABLE_ADD_TO_CACHE 4 -#define TABLE_COMPLETED 5 - -/* Index create node struct */ - -struct ind_node_struct{ - que_common_t common; /* node type: QUE_NODE_INDEX_CREATE */ - dict_index_t* index; /* index to create, built as a memory data - structure with dict_mem_... functions */ - ins_node_t* ind_def; /* child node which does the insert of - the index definition; the row to be inserted - is built by the parent node */ - ins_node_t* field_def; /* child node which does the inserts of - the field definitions; the row to be inserted - is built by the parent node */ - commit_node_t* commit_node; - /* child node which performs a commit after - a successful index creation */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /* node execution state */ - ulint page_no;/* root page number of the index */ - dict_table_t* table; /* table which owns the index */ - dtuple_t* ind_row;/* index definition row built */ - ulint field_no;/* next field definition to insert */ - mem_heap_t* heap; /* memory heap used as auxiliary storage */ -}; - -/* Index create node states */ -#define INDEX_BUILD_INDEX_DEF 1 -#define INDEX_BUILD_FIELD_DEF 2 -#define INDEX_CREATE_INDEX_TREE 3 -#define INDEX_COMMIT_WORK 4 -#define INDEX_ADD_TO_CACHE 5 - -#ifndef UNIV_NONINL -#include "dict0crea.ic" -#endif - -#endif diff --git a/storage/innobase/include/dict0crea.ic b/storage/innobase/include/dict0crea.ic deleted file mode 100644 index b4da2d7e03f..00000000000 --- a/storage/innobase/include/dict0crea.ic +++ /dev/null @@ -1,8 +0,0 @@ -/****************************************************** -Database object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - diff --git a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h deleted file mode 100644 index 7d5ff09c7a6..00000000000 --- a/storage/innobase/include/dict0dict.h +++ /dev/null @@ -1,1002 +0,0 @@ -/****************************************************** -Data dictionary system - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0dict_h -#define dict0dict_h - -#include "univ.i" -#include "dict0types.h" -#include "dict0mem.h" -#include "data0type.h" -#include "data0data.h" -#include "sync0sync.h" -#include "sync0rw.h" -#include "mem0mem.h" -#include "rem0types.h" -#include "btr0types.h" -#include "ut0mem.h" -#include "ut0lst.h" -#include "hash0hash.h" -#include "ut0rnd.h" -#include "ut0byte.h" -#include "trx0types.h" - -#ifndef UNIV_HOTBACKUP -/********************************************************************** -Makes all characters in a NUL-terminated UTF-8 string lower case. */ - -void -dict_casedn_str( -/*============*/ - char* a); /* in/out: string to put in lower case */ -#endif /* !UNIV_HOTBACKUP */ -/************************************************************************ -Get the database name length in a table name. */ - -ulint -dict_get_db_name_len( -/*=================*/ - /* out: database name length */ - const char* name); /* in: table name in the form - dbname '/' tablename */ -/************************************************************************ -Return the end of table name where we have removed dbname and '/'. */ - -const char* -dict_remove_db_name( -/*================*/ - /* out: table name */ - const char* name); /* in: table name in the form - dbname '/' tablename */ -/************************************************************************ -Decrements the count of open MySQL handles to a table. */ - -void -dict_table_decrement_handle_count( -/*==============================*/ - dict_table_t* table); /* in: table */ -/************************************************************************** -Inits the data dictionary module. */ - -void -dict_init(void); -/*===========*/ -/************************************************************************ -Gets the space id of every table of the data dictionary and makes a linear -list and a hash table of them to the data dictionary cache. This function -can be called at database startup if we did not need to do a crash recovery. -In crash recovery we must scan the space id's from the .ibd files in MySQL -database directories. */ - -void -dict_load_space_id_list(void); -/*=========================*/ -/************************************************************************* -Gets the column data type. */ -UNIV_INLINE -void -dict_col_copy_type( -/*===============*/ - const dict_col_t* col, /* in: column */ - dtype_t* type); /* out: data type */ -/************************************************************************* -Gets the column data type. */ - -void -dict_col_copy_type_noninline( -/*=========================*/ - const dict_col_t* col, /* in: column */ - dtype_t* type); /* out: data type */ -#ifdef UNIV_DEBUG -/************************************************************************* -Assert that a column and a data type match. */ -UNIV_INLINE -ibool -dict_col_type_assert_equal( -/*=======================*/ - /* out: TRUE */ - const dict_col_t* col, /* in: column */ - const dtype_t* type); /* in: data type */ -#endif /* UNIV_DEBUG */ -/*************************************************************************** -Returns the minimum size of the column. */ -UNIV_INLINE -ulint -dict_col_get_min_size( -/*==================*/ - /* out: minimum size */ - const dict_col_t* col); /* in: column */ -/*************************************************************************** -Returns the maximum size of the column. */ -UNIV_INLINE -ulint -dict_col_get_max_size( -/*==================*/ - /* out: maximum size */ - const dict_col_t* col); /* in: column */ -/*************************************************************************** -Returns the size of a fixed size column, 0 if not a fixed size column. */ -UNIV_INLINE -ulint -dict_col_get_fixed_size( -/*====================*/ - /* out: fixed size, or 0 */ - const dict_col_t* col); /* in: column */ -/*************************************************************************** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. -For fixed length types it is the fixed length of the type, otherwise 0. */ -UNIV_INLINE -ulint -dict_col_get_sql_null_size( -/*=======================*/ - /* out: SQL null storage size - in ROW_FORMAT=REDUNDANT */ - const dict_col_t* col); /* in: column */ - -/************************************************************************* -Gets the column number. */ -UNIV_INLINE -ulint -dict_col_get_no( -/*============*/ - const dict_col_t* col); -/************************************************************************* -Gets the column position in the clustered index. */ -UNIV_INLINE -ulint -dict_col_get_clust_pos( -/*===================*/ - const dict_col_t* col, /* in: table column */ - const dict_index_t* clust_index); /* in: clustered index */ -/************************************************************************* -Gets the column position in the clustered index. */ - -ulint -dict_col_get_clust_pos_noninline( -/*=============================*/ - const dict_col_t* col, /* in: table column */ - const dict_index_t* clust_index); /* in: clustered index */ -/******************************************************************** -If the given column name is reserved for InnoDB system columns, return -TRUE. */ - -ibool -dict_col_name_is_reserved( -/*======================*/ - /* out: TRUE if name is reserved */ - const char* name); /* in: column name */ -/************************************************************************ -Acquire the autoinc lock.*/ - -void -dict_table_autoinc_lock( -/*====================*/ - dict_table_t* table); /* in: table */ -/************************************************************************ -Unconditionally set the autoinc counter. */ - -void -dict_table_autoinc_initialize( -/*==========================*/ - dict_table_t* table, /* in: table */ - ib_ulonglong value); /* in: next value to assign to a row */ -/************************************************************************ -Reads the next autoinc value (== autoinc counter value), 0 if not yet -initialized. */ - -ib_ulonglong -dict_table_autoinc_read( -/*====================*/ - /* out: value for a new row, or 0 */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Updates the autoinc counter if the value supplied is greater than the -current value. */ - -void -dict_table_autoinc_update_if_greater( -/*=================================*/ - - dict_table_t* table, /* in: table */ - ib_ulonglong value); /* in: value which was assigned to a row */ -/************************************************************************ -Release the autoinc lock.*/ - -void -dict_table_autoinc_unlock( -/*======================*/ - dict_table_t* table); /* in: table */ -/************************************************************************** -Adds system columns to a table object. */ - -void -dict_table_add_system_columns( -/*==========================*/ - dict_table_t* table, /* in/out: table */ - mem_heap_t* heap); /* in: temporary heap */ -/************************************************************************** -Adds a table object to the dictionary cache. */ - -void -dict_table_add_to_cache( -/*====================*/ - dict_table_t* table, /* in: table */ - mem_heap_t* heap); /* in: temporary heap */ -/************************************************************************** -Removes a table object from the dictionary cache. */ - -void -dict_table_remove_from_cache( -/*=========================*/ - dict_table_t* table); /* in, own: table */ -/************************************************************************** -Renames a table object. */ - -ibool -dict_table_rename_in_cache( -/*=======================*/ - /* out: TRUE if success */ - dict_table_t* table, /* in: table */ - const char* new_name, /* in: new name */ - ibool rename_also_foreigns);/* in: in ALTER TABLE we want - to preserve the original table name - in constraints which reference it */ -/************************************************************************** -Change the id of a table object in the dictionary cache. This is used in -DISCARD TABLESPACE. */ - -void -dict_table_change_id_in_cache( -/*==========================*/ - dict_table_t* table, /* in: table object already in cache */ - dulint new_id);/* in: new id to set */ -/************************************************************************** -Adds a foreign key constraint object to the dictionary cache. May free -the object if there already is an object with the same identifier in. -At least one of foreign table or referenced table must already be in -the dictionary cache! */ - -ulint -dict_foreign_add_to_cache( -/*======================*/ - /* out: DB_SUCCESS or error code */ - dict_foreign_t* foreign, /* in, own: foreign key constraint */ - ibool check_charsets);/* in: TRUE=check charset - compatibility */ -/************************************************************************* -Checks if a table is referenced by foreign keys. */ - -ibool -dict_table_referenced_by_foreign_key( -/*=================================*/ - /* out: TRUE if table is referenced by a - foreign key */ - dict_table_t* table); /* in: InnoDB table */ -/************************************************************************** -Determines whether a string starts with the specified keyword. */ - -ibool -dict_str_starts_with_keyword( -/*=========================*/ - /* out: TRUE if str starts - with keyword */ - void* mysql_thd, /* in: MySQL thread handle */ - const char* str, /* in: string to scan for keyword */ - const char* keyword); /* in: keyword to look for */ -/************************************************************************* -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -bot participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. */ - -ulint -dict_create_foreign_constraints( -/*============================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction */ - const char* sql_string, /* in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES - table2(c, d), table2 can be written - also with the database - name before it: test.table2; the - default database id the database of - parameter name */ - const char* name, /* in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks); /* in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ -/************************************************************************** -Parses the CONSTRAINT id's to be dropped in an ALTER TABLE statement. */ - -ulint -dict_foreign_parse_drop_constraints( -/*================================*/ - /* out: DB_SUCCESS or - DB_CANNOT_DROP_CONSTRAINT if - syntax error or the constraint - id does not match */ - mem_heap_t* heap, /* in: heap from which we can - allocate memory */ - trx_t* trx, /* in: transaction */ - dict_table_t* table, /* in: table */ - ulint* n, /* out: number of constraints - to drop */ - const char*** constraints_to_drop); /* out: id's of the - constraints to drop */ -/************************************************************************** -Returns a table object and optionally increment its MySQL open handle count. -NOTE! This is a high-level function to be used mainly from outside the -'dict' directory. Inside this directory dict_table_get_low is usually the -appropriate function. */ - -dict_table_t* -dict_table_get( -/*===========*/ - /* out: table, NULL if - does not exist */ - const char* table_name, /* in: table name */ - ibool inc_mysql_count); - /* in: whether to increment the open - handle count on the table */ -/************************************************************************** -Returns a table object based on table id. */ - -dict_table_t* -dict_table_get_on_id( -/*=================*/ - /* out: table, NULL if does not exist */ - dulint table_id, /* in: table id */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************** -Returns a table object based on table id. */ -UNIV_INLINE -dict_table_t* -dict_table_get_on_id_low( -/*=====================*/ - /* out: table, NULL if does not exist */ - dulint table_id); /* in: table id */ -/************************************************************************** -Checks if a table is in the dictionary cache. */ -UNIV_INLINE -dict_table_t* -dict_table_check_if_in_cache_low( -/*=============================*/ - /* out: table, NULL if not found */ - const char* table_name); /* in: table name */ -/************************************************************************** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - /* out: table, NULL if not found */ - const char* table_name); /* in: table name */ -/************************************************************************** -A noninlined version of dict_table_get_low. */ - -dict_table_t* -dict_table_get_low_noninlined( -/*==========================*/ - /* out: table, NULL if not found */ - const char* table_name); /* in: table name */ -/************************************************************************** -Returns an index object. */ -UNIV_INLINE -dict_index_t* -dict_table_get_index( -/*=================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name); /* in: index name */ -/************************************************************************** -Returns an index object. */ - -dict_index_t* -dict_table_get_index_noninline( -/*===========================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name); /* in: index name */ -/************************************************************************** -Returns a column's name. */ - -const char* -dict_table_get_col_name( -/*====================*/ - /* out: column name. NOTE: not - guaranteed to stay valid if table is - modified in any way (columns added, - etc.). */ - const dict_table_t* table, /* in: table */ - ulint col_nr);/* in: column number */ - -/************************************************************************** -Prints a table definition. */ - -void -dict_table_print( -/*=============*/ - dict_table_t* table); /* in: table */ -/************************************************************************** -Prints a table data. */ - -void -dict_table_print_low( -/*=================*/ - dict_table_t* table); /* in: table */ -/************************************************************************** -Prints a table data when we know the table name. */ - -void -dict_table_print_by_name( -/*=====================*/ - const char* name); -/************************************************************************** -Outputs info on foreign keys of a table. */ - -void -dict_print_info_on_foreign_keys( -/*============================*/ - ibool create_table_format, /* in: if TRUE then print in - a format suitable to be inserted into - a CREATE TABLE, otherwise in the format - of SHOW TABLE STATUS */ - FILE* file, /* in: file where to print */ - trx_t* trx, /* in: transaction */ - dict_table_t* table); /* in: table */ -/************************************************************************** -Outputs info on a foreign key of a table in a format suitable for -CREATE TABLE. */ -void -dict_print_info_on_foreign_key_in_create_format( -/*============================================*/ - FILE* file, /* in: file where to print */ - trx_t* trx, /* in: transaction */ - dict_foreign_t* foreign, /* in: foreign key constraint */ - ibool add_newline); /* in: whether to add a newline */ -/************************************************************************ -Displays the names of the index and the table. */ -void -dict_index_name_print( -/*==================*/ - FILE* file, /* in: output stream */ - trx_t* trx, /* in: transaction */ - const dict_index_t* index); /* in: index to print */ -/************************************************************************ -Gets the first index on the table (the clustered index). */ -UNIV_INLINE -dict_index_t* -dict_table_get_first_index( -/*=======================*/ - /* out: index, NULL if none exists */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Gets the first index on the table (the clustered index). */ - -dict_index_t* -dict_table_get_first_index_noninline( -/*=================================*/ - /* out: index, NULL if none exists */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Gets the next index on the table. */ -UNIV_INLINE -dict_index_t* -dict_table_get_next_index( -/*======================*/ - /* out: index, NULL if none left */ - dict_index_t* index); /* in: index */ -/************************************************************************ -Gets the next index on the table. */ - -dict_index_t* -dict_table_get_next_index_noninline( -/*================================*/ - /* out: index, NULL if none left */ - dict_index_t* index); /* in: index */ -/************************************************************************ -Gets the number of user-defined columns in a table in the dictionary -cache. */ -UNIV_INLINE -ulint -dict_table_get_n_user_cols( -/*=======================*/ - /* out: number of user-defined (e.g., not - ROW_ID) columns of a table */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Gets the number of system columns in a table in the dictionary cache. */ -UNIV_INLINE -ulint -dict_table_get_n_sys_cols( -/*======================*/ - /* out: number of system (e.g., - ROW_ID) columns of a table */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Gets the number of all columns (also system) in a table in the dictionary -cache. */ -UNIV_INLINE -ulint -dict_table_get_n_cols( -/*==================*/ - /* out: number of columns of a table */ - dict_table_t* table); /* in: table */ -/************************************************************************ -Gets the nth column of a table. */ -UNIV_INLINE -const dict_col_t* -dict_table_get_nth_col( -/*===================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint pos); /* in: position of column */ -/************************************************************************ -Gets the nth column of a table. */ - -const dict_col_t* -dict_table_get_nth_col_noninline( -/*=============================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint pos); /* in: position of column */ -/************************************************************************ -Gets the given system column of a table. */ -UNIV_INLINE -const dict_col_t* -dict_table_get_sys_col( -/*===================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint sys); /* in: DATA_ROW_ID, ... */ -/************************************************************************ -Gets the given system column number of a table. */ -UNIV_INLINE -ulint -dict_table_get_sys_col_no( -/*======================*/ - /* out: column number */ - dict_table_t* table, /* in: table */ - ulint sys); /* in: DATA_ROW_ID, ... */ -/************************************************************************ -Check whether the table uses the compact page format. */ -UNIV_INLINE -ibool -dict_table_is_comp( -/*===============*/ - /* out: TRUE if table uses the - compact page format */ - const dict_table_t* table); /* in: table */ -/************************************************************************ -Check whether the table uses the compact page format. */ - -ibool -dict_table_is_comp_noninline( -/*=========================*/ - /* out: TRUE if table uses the - compact page format */ - const dict_table_t* table); /* in: table */ -/************************************************************************ -Checks if a column is in the ordering columns of the clustered index of a -table. Column prefixes are treated like whole columns. */ - -ibool -dict_table_col_in_clustered_key( -/*============================*/ - /* out: TRUE if the column, or its prefix, is - in the clustered key */ - dict_table_t* table, /* in: table */ - ulint n); /* in: column number */ -/*********************************************************************** -Copies types of columns contained in table to tuple. */ - -void -dict_table_copy_types( -/*==================*/ - dtuple_t* tuple, /* in: data tuple */ - dict_table_t* table); /* in: index */ -/************************************************************************** -Looks for an index with the given id. NOTE that we do not reserve -the dictionary mutex: this function is for emergency purposes like -printing info of a corrupt database page! */ - -dict_index_t* -dict_index_find_on_id_low( -/*======================*/ - /* out: index or NULL if not found from cache */ - dulint id); /* in: index id */ -/************************************************************************** -Adds an index to the dictionary cache. */ - -void -dict_index_add_to_cache( -/*====================*/ - dict_table_t* table, /* in: table on which the index is */ - dict_index_t* index, /* in, own: index; NOTE! The index memory - object is freed in this function! */ - ulint page_no);/* in: root page number of the index */ -/************************************************************************ -Gets the number of fields in the internal representation of an index, -including fields added by the dictionary system. */ -UNIV_INLINE -ulint -dict_index_get_n_fields( -/*====================*/ - /* out: number of fields */ - dict_index_t* index); /* in: an internal representation of index - (in the dictionary cache) */ -/************************************************************************ -Gets the number of fields in the internal representation of an index -that uniquely determine the position of an index entry in the index, if -we do not take multiversioning into account: in the B-tree use the value -returned by dict_index_get_n_unique_in_tree. */ -UNIV_INLINE -ulint -dict_index_get_n_unique( -/*====================*/ - /* out: number of fields */ - dict_index_t* index); /* in: an internal representation of index - (in the dictionary cache) */ -/************************************************************************ -Gets the number of fields in the internal representation of an index -which uniquely determine the position of an index entry in the index, if -we also take multiversioning into account. */ -UNIV_INLINE -ulint -dict_index_get_n_unique_in_tree( -/*============================*/ - /* out: number of fields */ - dict_index_t* index); /* in: an internal representation of index - (in the dictionary cache) */ -/************************************************************************ -Gets the number of user-defined ordering fields in the index. In the internal -representation we add the row id to the ordering fields to make all indexes -unique, but this function returns the number of fields the user defined -in the index as ordering fields. */ -UNIV_INLINE -ulint -dict_index_get_n_ordering_defined_by_user( -/*======================================*/ - /* out: number of fields */ - dict_index_t* index); /* in: an internal representation of index - (in the dictionary cache) */ -/************************************************************************ -Gets the nth field of an index. */ -UNIV_INLINE -dict_field_t* -dict_index_get_nth_field( -/*=====================*/ - /* out: pointer to field object */ - dict_index_t* index, /* in: index */ - ulint pos); /* in: position of field */ -/************************************************************************ -Gets pointer to the nth column in an index. */ -UNIV_INLINE -const dict_col_t* -dict_index_get_nth_col( -/*===================*/ - /* out: column */ - const dict_index_t* index, /* in: index */ - ulint pos); /* in: position of the field */ -/************************************************************************ -Gets the column number of the nth field in an index. */ -UNIV_INLINE -ulint -dict_index_get_nth_col_no( -/*======================*/ - /* out: column number */ - const dict_index_t* index, /* in: index */ - ulint pos); /* in: position of the field */ -/************************************************************************ -Looks for column n in an index. */ - -ulint -dict_index_get_nth_col_pos( -/*=======================*/ - /* out: position in internal representation - of the index; if not contained, returns - ULINT_UNDEFINED */ - dict_index_t* index, /* in: index */ - ulint n); /* in: column number */ -/************************************************************************ -Returns TRUE if the index contains a column or a prefix of that column. */ - -ibool -dict_index_contains_col_or_prefix( -/*==============================*/ - /* out: TRUE if contains the column or its - prefix */ - dict_index_t* index, /* in: index */ - ulint n); /* in: column number */ -/************************************************************************ -Looks for a matching field in an index. The column has to be the same. The -column in index must be complete, or must contain a prefix longer than the -column in index2. That is, we must be able to construct the prefix in index2 -from the prefix in index. */ - -ulint -dict_index_get_nth_field_pos( -/*=========================*/ - /* out: position in internal representation - of the index; if not contained, returns - ULINT_UNDEFINED */ - dict_index_t* index, /* in: index from which to search */ - dict_index_t* index2, /* in: index */ - ulint n); /* in: field number in index2 */ -/************************************************************************ -Looks for column n position in the clustered index. */ - -ulint -dict_table_get_nth_col_pos( -/*=======================*/ - /* out: position in internal representation - of the clustered index */ - dict_table_t* table, /* in: table */ - ulint n); /* in: column number */ -/************************************************************************ -Returns the position of a system column in an index. */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - /* out: position, ULINT_UNDEFINED if not - contained */ - dict_index_t* index, /* in: index */ - ulint type); /* in: DATA_ROW_ID, ... */ -/*********************************************************************** -Adds a column to index. */ - -void -dict_index_add_col( -/*===============*/ - dict_index_t* index, /* in: index */ - dict_table_t* table, /* in: table */ - dict_col_t* col, /* in: column */ - ulint prefix_len); /* in: column prefix length */ -/*********************************************************************** -Copies types of fields contained in index to tuple. */ - -void -dict_index_copy_types( -/*==================*/ - dtuple_t* tuple, /* in: data tuple */ - dict_index_t* index, /* in: index */ - ulint n_fields); /* in: number of field types to copy */ -/************************************************************************* -Gets the field column. */ -UNIV_INLINE -const dict_col_t* -dict_field_get_col( -/*===============*/ - const dict_field_t* field); - -#ifdef UNIV_DEBUG -/************************************************************************** -Returns an index object if it is found in the dictionary cache. */ - -dict_index_t* -dict_index_get_if_in_cache( -/*=======================*/ - /* out: index, NULL if not found */ - dulint index_id); /* in: index id */ -/************************************************************************** -Checks that a tuple has n_fields_cmp value in a sensible range, so that -no comparison can occur with the page number field in a node pointer. */ - -ibool -dict_index_check_search_tuple( -/*==========================*/ - /* out: TRUE if ok */ - dict_index_t* index, /* in: index */ - dtuple_t* tuple); /* in: tuple used in a search */ -#endif /* UNIV_DEBUG */ -/************************************************************************** -Builds a node pointer out of a physical record and a page number. */ - -dtuple_t* -dict_index_build_node_ptr( -/*======================*/ - /* out, own: node pointer */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record for which to build node - pointer */ - ulint page_no,/* in: page number to put in node pointer */ - mem_heap_t* heap, /* in: memory heap where pointer created */ - ulint level); /* in: level of rec in tree: 0 means leaf - level */ -/************************************************************************** -Copies an initial segment of a physical record, long enough to specify an -index entry uniquely. */ - -rec_t* -dict_index_copy_rec_order_prefix( -/*=============================*/ - /* out: pointer to the prefix record */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record for which to copy prefix */ - ulint* n_fields,/* out: number of fields copied */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size);/* in/out: buffer size */ -/************************************************************************** -Builds a typed data tuple out of a physical record. */ - -dtuple_t* -dict_index_build_data_tuple( -/*========================*/ - /* out, own: data tuple */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record for which to build data tuple */ - ulint n_fields,/* in: number of data fields */ - mem_heap_t* heap); /* in: memory heap where tuple created */ -/************************************************************************* -Gets the space id of the root of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_space( -/*=================*/ - /* out: space id */ - dict_index_t* index); /* in: index */ -/************************************************************************* -Sets the space id of the root of the index tree. */ -UNIV_INLINE -void -dict_index_set_space( -/*=================*/ - dict_index_t* index, /* in: index */ - ulint space); /* in: space id */ -/************************************************************************* -Gets the page number of the root of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_page( -/*================*/ - /* out: page number */ - dict_index_t* tree); /* in: index */ -/************************************************************************* -Sets the page number of the root of index tree. */ -UNIV_INLINE -void -dict_index_set_page( -/*================*/ - dict_index_t* index, /* in: index */ - ulint page); /* in: page number */ -/************************************************************************* -Gets the type of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_type( -/*================*/ - /* out: type */ - dict_index_t* index); /* in: index */ -/************************************************************************* -Gets the read-write lock of the index tree. */ -UNIV_INLINE -rw_lock_t* -dict_index_get_lock( -/*================*/ - /* out: read-write lock */ - dict_index_t* index); /* in: index */ -/************************************************************************ -Returns free space reserved for future updates of records. This is -relevant only in the case of many consecutive inserts, as updates -which make the records bigger might fragment the index. */ -UNIV_INLINE -ulint -dict_index_get_space_reserve(void); -/*==============================*/ - /* out: number of free bytes on page, - reserved for updates */ -/************************************************************************* -Calculates the minimum record length in an index. */ - -ulint -dict_index_calc_min_rec_len( -/*========================*/ - dict_index_t* index); /* in: index */ -/************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ - -void -dict_update_statistics_low( -/*=======================*/ - dict_table_t* table, /* in: table */ - ibool has_dict_mutex);/* in: TRUE if the caller has the - dictionary mutex */ -/************************************************************************* -Calculates new estimates for table and index statistics. The statistics -are used in query optimization. */ - -void -dict_update_statistics( -/*===================*/ - dict_table_t* table); /* in: table */ -/************************************************************************ -Reserves the dictionary system mutex for MySQL. */ - -void -dict_mutex_enter_for_mysql(void); -/*============================*/ -/************************************************************************ -Releases the dictionary system mutex for MySQL. */ - -void -dict_mutex_exit_for_mysql(void); -/*===========================*/ -/************************************************************************ -Checks if the database name in two table names is the same. */ - -ibool -dict_tables_have_same_db( -/*=====================*/ - /* out: TRUE if same db name */ - const char* name1, /* in: table name in the form - dbname '/' tablename */ - const char* name2); /* in: table name in the form - dbname '/' tablename */ -/************************************************************************* -Scans from pointer onwards. Stops if is at the start of a copy of -'string' where characters are compared without case sensitivity. Stops -also at '\0'. */ - -const char* -dict_scan_to( -/*=========*/ - /* out: scanned up to this */ - const char* ptr, /* in: scan from */ - const char* string);/* in: look for this */ -/* Buffers for storing detailed information about the latest foreign key -and unique key errors */ -extern FILE* dict_foreign_err_file; -extern mutex_t dict_foreign_err_mutex; /* mutex protecting the buffers */ - -extern dict_sys_t* dict_sys; /* the dictionary system */ -extern rw_lock_t dict_operation_lock; - -/* Dictionary system struct */ -struct dict_sys_struct{ - mutex_t mutex; /* mutex protecting the data - dictionary; protects also the - disk-based dictionary system tables; - this mutex serializes CREATE TABLE - and DROP TABLE, as well as reading - the dictionary data for a table from - system tables */ - dulint row_id; /* the next row id to assign; - NOTE that at a checkpoint this - must be written to the dict system - header and flushed to a file; in - recovery this must be derived from - the log records */ - hash_table_t* table_hash; /* hash table of the tables, based - on name */ - hash_table_t* table_id_hash; /* hash table of the tables, based - on id */ - UT_LIST_BASE_NODE_T(dict_table_t) - table_LRU; /* LRU list of tables */ - ulint size; /* varying space in bytes occupied - by the data dictionary table and - index objects */ - dict_table_t* sys_tables; /* SYS_TABLES table */ - dict_table_t* sys_columns; /* SYS_COLUMNS table */ - dict_table_t* sys_indexes; /* SYS_INDEXES table */ - dict_table_t* sys_fields; /* SYS_FIELDS table */ -}; - -#ifndef UNIV_NONINL -#include "dict0dict.ic" -#endif - -#endif diff --git a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic deleted file mode 100644 index 7d38cbcd1fa..00000000000 --- a/storage/innobase/include/dict0dict.ic +++ /dev/null @@ -1,664 +0,0 @@ -/********************************************************************** -Data dictionary system - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - -#include "dict0load.h" -#include "trx0undo.h" -#include "trx0sys.h" -#include "rem0types.h" -#include "data0type.h" - -/************************************************************************* -Gets the column data type. */ -UNIV_INLINE -void -dict_col_copy_type( -/*===============*/ - const dict_col_t* col, /* in: column */ - dtype_t* type) /* out: data type */ -{ - ut_ad(col && type); - - type->mtype = col->mtype; - type->prtype = col->prtype; - type->len = col->len; - type->mbminlen = col->mbminlen; - type->mbmaxlen = col->mbmaxlen; -} - -#ifdef UNIV_DEBUG -/************************************************************************* -Assert that a column and a data type match. */ -UNIV_INLINE -ibool -dict_col_type_assert_equal( -/*=======================*/ - /* out: TRUE */ - const dict_col_t* col, /* in: column */ - const dtype_t* type) /* in: data type */ -{ - ut_ad(col); - ut_ad(type); - - ut_ad(col->mtype == type->mtype); - ut_ad(col->prtype == type->prtype); - ut_ad(col->len == type->len); - ut_ad(col->mbminlen == type->mbminlen); - ut_ad(col->mbmaxlen == type->mbmaxlen); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/*************************************************************************** -Returns the minimum size of the column. */ -UNIV_INLINE -ulint -dict_col_get_min_size( -/*==================*/ - /* out: minimum size */ - const dict_col_t* col) /* in: column */ -{ - return(dtype_get_min_size_low(col->mtype, col->prtype, col->len, - col->mbminlen, col->mbmaxlen)); -} -/*************************************************************************** -Returns the maximum size of the column. */ -UNIV_INLINE -ulint -dict_col_get_max_size( -/*==================*/ - /* out: maximum size */ - const dict_col_t* col) /* in: column */ -{ - return(dtype_get_max_size_low(col->mtype, col->len)); -} -/*************************************************************************** -Returns the size of a fixed size column, 0 if not a fixed size column. */ -UNIV_INLINE -ulint -dict_col_get_fixed_size( -/*====================*/ - /* out: fixed size, or 0 */ - const dict_col_t* col) /* in: column */ -{ - return(dtype_get_fixed_size_low(col->mtype, col->prtype, col->len, - col->mbminlen, col->mbmaxlen)); -} -/*************************************************************************** -Returns the ROW_FORMAT=REDUNDANT stored SQL NULL size of a column. -For fixed length types it is the fixed length of the type, otherwise 0. */ -UNIV_INLINE -ulint -dict_col_get_sql_null_size( -/*=======================*/ - /* out: SQL null storage size - in ROW_FORMAT=REDUNDANT */ - const dict_col_t* col) /* in: column */ -{ - return(dict_col_get_fixed_size(col)); -} - -/************************************************************************* -Gets the column number. */ -UNIV_INLINE -ulint -dict_col_get_no( -/*============*/ - const dict_col_t* col) -{ - ut_ad(col); - - return(col->ind); -} - -/************************************************************************* -Gets the column position in the clustered index. */ -UNIV_INLINE -ulint -dict_col_get_clust_pos( -/*===================*/ - const dict_col_t* col, /* in: table column */ - const dict_index_t* clust_index) /* in: clustered index */ -{ - ulint i; - - ut_ad(col); - ut_ad(clust_index && clust_index->type & DICT_CLUSTERED); - - for (i = 0; i < clust_index->n_def; i++) { - const dict_field_t* field = &clust_index->fields[i]; - - if (!field->prefix_len && field->col == col) { - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************ -Gets the first index on the table (the clustered index). */ -UNIV_INLINE -dict_index_t* -dict_table_get_first_index( -/*=======================*/ - /* out: index, NULL if none exists */ - dict_table_t* table) /* in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(UT_LIST_GET_FIRST(table->indexes)); -} - -/************************************************************************ -Gets the next index on the table. */ -UNIV_INLINE -dict_index_t* -dict_table_get_next_index( -/*======================*/ - /* out: index, NULL if none left */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(UT_LIST_GET_NEXT(indexes, index)); -} - -/************************************************************************ -Gets the number of user-defined columns in a table in the dictionary -cache. */ -UNIV_INLINE -ulint -dict_table_get_n_user_cols( -/*=======================*/ - /* out: number of user-defined (e.g., not - ROW_ID) columns of a table */ - dict_table_t* table) /* in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols - DATA_N_SYS_COLS); -} - -/************************************************************************ -Gets the number of system columns in a table in the dictionary cache. */ -UNIV_INLINE -ulint -dict_table_get_n_sys_cols( -/*======================*/ - /* out: number of system (e.g., - ROW_ID) columns of a table */ - dict_table_t* table __attribute__((unused))) /* in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - ut_ad(table->cached); - - return(DATA_N_SYS_COLS); -} - -/************************************************************************ -Gets the number of all columns (also system) in a table in the dictionary -cache. */ -UNIV_INLINE -ulint -dict_table_get_n_cols( -/*==================*/ - /* out: number of columns of a table */ - dict_table_t* table) /* in: table */ -{ - ut_ad(table); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols); -} - -/************************************************************************ -Gets the nth column of a table. */ -UNIV_INLINE -const dict_col_t* -dict_table_get_nth_col( -/*===================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint pos) /* in: position of column */ -{ - ut_ad(table); - ut_ad(pos < table->n_def); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return((table->cols) + pos); -} - -/************************************************************************ -Gets the given system column of a table. */ -UNIV_INLINE -const dict_col_t* -dict_table_get_sys_col( -/*===================*/ - /* out: pointer to column object */ - const dict_table_t* table, /* in: table */ - ulint sys) /* in: DATA_ROW_ID, ... */ -{ - const dict_col_t* col; - - ut_ad(table); - ut_ad(sys < DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - col = dict_table_get_nth_col(table, table->n_cols - - DATA_N_SYS_COLS + sys); - ut_ad(col->mtype == DATA_SYS); - ut_ad(col->prtype == (sys | DATA_NOT_NULL)); - - return(col); -} - -/************************************************************************ -Gets the given system column number of a table. */ -UNIV_INLINE -ulint -dict_table_get_sys_col_no( -/*======================*/ - /* out: column number */ - dict_table_t* table, /* in: table */ - ulint sys) /* in: DATA_ROW_ID, ... */ -{ - ut_ad(table); - ut_ad(sys < DATA_N_SYS_COLS); - ut_ad(table->magic_n == DICT_TABLE_MAGIC_N); - - return(table->n_cols - DATA_N_SYS_COLS + sys); -} - -/************************************************************************ -Check whether the table uses the compact page format. */ -UNIV_INLINE -ibool -dict_table_is_comp( -/*===============*/ - /* out: TRUE if table uses the - compact page format */ - const dict_table_t* table) /* in: table */ -{ - ut_ad(table); - -#if DICT_TF_COMPACT != TRUE -#error -#endif - - return(UNIV_LIKELY(table->flags & DICT_TF_COMPACT)); -} - -/************************************************************************ -Gets the number of fields in the internal representation of an index, -including fields added by the dictionary system. */ -UNIV_INLINE -ulint -dict_index_get_n_fields( -/*====================*/ - /* out: number of fields */ - dict_index_t* index) /* in: an internal representation of index - (in the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->n_fields); -} - -/************************************************************************ -Gets the number of fields in the internal representation of an index -that uniquely determine the position of an index entry in the index, if -we do not take multiversioning into account: in the B-tree use the value -returned by dict_index_get_n_unique_in_tree. */ -UNIV_INLINE -ulint -dict_index_get_n_unique( -/*====================*/ - /* out: number of fields */ - dict_index_t* index) /* in: an internal representation of index - (in the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - - return(index->n_uniq); -} - -/************************************************************************ -Gets the number of fields in the internal representation of an index -which uniquely determine the position of an index entry in the index, if -we also take multiversioning into account. */ -UNIV_INLINE -ulint -dict_index_get_n_unique_in_tree( -/*============================*/ - /* out: number of fields */ - dict_index_t* index) /* in: an internal representation of index - (in the dictionary cache) */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(index->cached); - - if (index->type & DICT_CLUSTERED) { - - return(dict_index_get_n_unique(index)); - } - - return(dict_index_get_n_fields(index)); -} - -/************************************************************************ -Gets the number of user-defined ordering fields in the index. In the internal -representation of clustered indexes we add the row id to the ordering fields -to make a clustered index unique, but this function returns the number of -fields the user defined in the index as ordering fields. */ -UNIV_INLINE -ulint -dict_index_get_n_ordering_defined_by_user( -/*======================================*/ - /* out: number of fields */ - dict_index_t* index) /* in: an internal representation of index - (in the dictionary cache) */ -{ - return(index->n_user_defined_cols); -} - -/************************************************************************ -Gets the nth field of an index. */ -UNIV_INLINE -dict_field_t* -dict_index_get_nth_field( -/*=====================*/ - /* out: pointer to field object */ - dict_index_t* index, /* in: index */ - ulint pos) /* in: position of field */ -{ - ut_ad(index); - ut_ad(pos < index->n_def); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return((index->fields) + pos); -} - -/************************************************************************ -Returns the position of a system column in an index. */ -UNIV_INLINE -ulint -dict_index_get_sys_col_pos( -/*=======================*/ - /* out: position, ULINT_UNDEFINED if not - contained */ - dict_index_t* index, /* in: index */ - ulint type) /* in: DATA_ROW_ID, ... */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - ut_ad(!(index->type & DICT_UNIVERSAL)); - - if (index->type & DICT_CLUSTERED) { - - return(dict_col_get_clust_pos( - dict_table_get_sys_col(index->table, type), - index)); - } - - return(dict_index_get_nth_col_pos( - index, dict_table_get_sys_col_no(index->table, type))); -} - -/************************************************************************* -Gets the field column. */ -UNIV_INLINE -const dict_col_t* -dict_field_get_col( -/*===============*/ - const dict_field_t* field) -{ - ut_ad(field); - - return(field->col); -} - -/************************************************************************ -Gets pointer to the nth column in an index. */ -UNIV_INLINE -const dict_col_t* -dict_index_get_nth_col( -/*===================*/ - /* out: column */ - const dict_index_t* index, /* in: index */ - ulint pos) /* in: position of the field */ -{ - return(dict_field_get_col(dict_index_get_nth_field((dict_index_t*) - index, pos))); -} - -/************************************************************************ -Gets the column number the nth field in an index. */ -UNIV_INLINE -ulint -dict_index_get_nth_col_no( -/*======================*/ - /* out: column number */ - const dict_index_t* index, /* in: index */ - ulint pos) /* in: position of the field */ -{ - return(dict_col_get_no(dict_index_get_nth_col(index, pos))); -} - -/************************************************************************* -Gets the space id of the root of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_space( -/*=================*/ - /* out: space id */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->space); -} - -/************************************************************************* -Sets the space id of the root of the index tree. */ -UNIV_INLINE -void -dict_index_set_space( -/*=================*/ - dict_index_t* index, /* in: index */ - ulint space) /* in: space id */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->space = space; -} - -/************************************************************************* -Gets the page number of the root of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_page( -/*================*/ - /* out: page number */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->page); -} - -/************************************************************************* -Sets the page number of the root of index tree. */ -UNIV_INLINE -void -dict_index_set_page( -/*================*/ - dict_index_t* index, /* in: index */ - ulint page) /* in: page number */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - index->page = page; -} - -/************************************************************************* -Gets the type of the index tree. */ -UNIV_INLINE -ulint -dict_index_get_type( -/*================*/ - /* out: type */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(index->type); -} - -/************************************************************************* -Gets the read-write lock of the index tree. */ -UNIV_INLINE -rw_lock_t* -dict_index_get_lock( -/*================*/ - /* out: read-write lock */ - dict_index_t* index) /* in: index */ -{ - ut_ad(index); - ut_ad(index->magic_n == DICT_INDEX_MAGIC_N); - - return(&(index->lock)); -} - -/************************************************************************ -Returns free space reserved for future updates of records. This is -relevant only in the case of many consecutive inserts, as updates -which make the records bigger might fragment the index. */ -UNIV_INLINE -ulint -dict_index_get_space_reserve(void) -/*==============================*/ - /* out: number of free bytes on page, - reserved for updates */ -{ - return(UNIV_PAGE_SIZE / 16); -} - -/************************************************************************** -Checks if a table is in the dictionary cache. */ -UNIV_INLINE -dict_table_t* -dict_table_check_if_in_cache_low( -/*=============================*/ - /* out: table, NULL if not found */ - const char* table_name) /* in: table name */ -{ - dict_table_t* table; - ulint table_fold; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - table_fold = ut_fold_string(table_name); - - HASH_SEARCH(name_hash, dict_sys->table_hash, table_fold, table, - ut_strcmp(table->name, table_name) == 0); - return(table); -} - -/************************************************************************** -Gets a table; loads it to the dictionary cache if necessary. A low-level -function. */ -UNIV_INLINE -dict_table_t* -dict_table_get_low( -/*===============*/ - /* out: table, NULL if not found */ - const char* table_name) /* in: table name */ -{ - dict_table_t* table; - - ut_ad(table_name); - ut_ad(mutex_own(&(dict_sys->mutex))); - - table = dict_table_check_if_in_cache_low(table_name); - - if (table == NULL) { - table = dict_load_table(table_name); - } - - return(table); -} - -/************************************************************************** -Returns a table object based on table id. */ -UNIV_INLINE -dict_table_t* -dict_table_get_on_id_low( -/*=====================*/ - /* out: table, NULL if does not exist */ - dulint table_id) /* in: table id */ -{ - dict_table_t* table; - ulint fold; - - ut_ad(mutex_own(&(dict_sys->mutex))); - - /* Look for the table name in the hash table */ - fold = ut_fold_dulint(table_id); - - HASH_SEARCH(id_hash, dict_sys->table_id_hash, fold, table, - ut_dulint_cmp(table->id, table_id) == 0); - if (table == NULL) { - table = dict_load_table_on_id(table_id); - } - - /* TODO: should get the type information from MySQL */ - - return(table); -} - -/************************************************************************** -Returns an index object. */ -UNIV_INLINE -dict_index_t* -dict_table_get_index( -/*=================*/ - /* out: index, NULL if does not exist */ - dict_table_t* table, /* in: table */ - const char* name) /* in: index name */ -{ - dict_index_t* index = NULL; - - index = dict_table_get_first_index(table); - - while (index != NULL) { - if (ut_strcmp(name, index->name) == 0) { - - break; - } - - index = dict_table_get_next_index(index); - } - - return(index); -} diff --git a/storage/innobase/include/dict0load.h b/storage/innobase/include/dict0load.h deleted file mode 100644 index 7e19c2eb3c0..00000000000 --- a/storage/innobase/include/dict0load.h +++ /dev/null @@ -1,100 +0,0 @@ -/****************************************************** -Loads to the memory cache database object definitions -from dictionary tables - -(c) 1996 Innobase Oy - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0load_h -#define dict0load_h - -#include "univ.i" -#include "dict0types.h" -#include "ut0byte.h" - -/************************************************************************ -In a crash recovery we already have all the tablespace objects created. -This function compares the space id information in the InnoDB data dictionary -to what we already read with fil_load_single_table_tablespaces(). - -In a normal startup, we create the tablespace objects for every table in -InnoDB's data dictionary, if the corresponding .ibd file exists. -We also scan the biggest space id, and store it to fil_system. */ - -void -dict_check_tablespaces_and_store_max_id( -/*====================================*/ - ibool in_crash_recovery); /* in: are we doing a crash recovery */ -/************************************************************************ -Finds the first table name in the given database. */ - -char* -dict_get_first_table_name_in_db( -/*============================*/ - /* out, own: table name, NULL if - does not exist; the caller must free - the memory in the string! */ - const char* name); /* in: database name which ends to '/' */ -/************************************************************************ -Loads a table definition and also all its index definitions, and also -the cluster definition if the table is a member in a cluster. Also loads -all foreign key constraints where the foreign key is in the table or where -a foreign key references columns in this table. */ - -dict_table_t* -dict_load_table( -/*============*/ - /* out: table, NULL if does not exist; - if the table is stored in an .ibd file, - but the file does not exist, - then we set the ibd_file_missing flag TRUE - in the table object we return */ - const char* name); /* in: table name in the - databasename/tablename format */ -/*************************************************************************** -Loads a table object based on the table id. */ - -dict_table_t* -dict_load_table_on_id( -/*==================*/ - /* out: table; NULL if table does not exist */ - dulint table_id); /* in: table id */ -/************************************************************************ -This function is called when the database is booted. -Loads system table index definitions except for the clustered index which -is added to the dictionary cache at booting before calling this function. */ - -void -dict_load_sys_table( -/*================*/ - dict_table_t* table); /* in: system table */ -/*************************************************************************** -Loads foreign key constraints where the table is either the foreign key -holder or where the table is referenced by a foreign key. Adds these -constraints to the data dictionary. Note that we know that the dictionary -cache already contains all constraints where the other relevant table is -already in the dictionary cache. */ - -ulint -dict_load_foreigns( -/*===============*/ - /* out: DB_SUCCESS or error code */ - const char* table_name, /* in: table name */ - ibool check_charsets);/* in: TRUE=check charsets - compatibility */ -/************************************************************************ -Prints to the standard output information on all tables found in the data -dictionary system table. */ - -void -dict_print(void); -/*============*/ - - -#ifndef UNIV_NONINL -#include "dict0load.ic" -#endif - -#endif diff --git a/storage/innobase/include/dict0load.ic b/storage/innobase/include/dict0load.ic deleted file mode 100644 index 1a207fbf0fd..00000000000 --- a/storage/innobase/include/dict0load.ic +++ /dev/null @@ -1,9 +0,0 @@ -/****************************************************** -Loads to the memory cache database object definitions -from dictionary tables - -(c) 1996 Innobase Oy - -Created 4/24/1996 Heikki Tuuri -*******************************************************/ - diff --git a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h deleted file mode 100644 index ac28fdb1bae..00000000000 --- a/storage/innobase/include/dict0mem.h +++ /dev/null @@ -1,431 +0,0 @@ -/****************************************************** -Data dictionary memory object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0mem_h -#define dict0mem_h - -#include "univ.i" -#include "dict0types.h" -#include "data0type.h" -#include "data0data.h" -#include "mem0mem.h" -#include "rem0types.h" -#include "btr0types.h" -#include "ut0mem.h" -#include "ut0lst.h" -#include "ut0rnd.h" -#include "ut0byte.h" -#include "sync0rw.h" -#include "lock0types.h" -#include "hash0hash.h" -#include "que0types.h" - -/* Type flags of an index: OR'ing of the flags is allowed to define a -combination of types */ -#define DICT_CLUSTERED 1 /* clustered index */ -#define DICT_UNIQUE 2 /* unique index */ -#define DICT_UNIVERSAL 4 /* index which can contain records from any - other index */ -#define DICT_IBUF 8 /* insert buffer tree */ - -/* Types for a table object */ -#define DICT_TABLE_ORDINARY 1 -#if 0 /* not implemented */ -#define DICT_TABLE_CLUSTER_MEMBER 2 -#define DICT_TABLE_CLUSTER 3 /* this means that the table is - really a cluster definition */ -#endif - -/* Table flags */ -#define DICT_TF_COMPACT 1 /* compact page format */ - -/************************************************************************** -Creates a table memory object. */ - -dict_table_t* -dict_mem_table_create( -/*==================*/ - /* out, own: table object */ - const char* name, /* in: table name */ - ulint space, /* in: space where the clustered index - of the table is placed; this parameter - is ignored if the table is made - a member of a cluster */ - ulint n_cols, /* in: number of columns */ - ulint flags); /* in: table flags */ -/******************************************************************** -Free a table memory object. */ - -void -dict_mem_table_free( -/*================*/ - dict_table_t* table); /* in: table */ -/************************************************************************** -Adds a column definition to a table. */ - -void -dict_mem_table_add_col( -/*===================*/ - dict_table_t* table, /* in: table */ - mem_heap_t* heap, /* in: temporary memory heap, or NULL */ - const char* name, /* in: column name, or NULL */ - ulint mtype, /* in: main datatype */ - ulint prtype, /* in: precise type */ - ulint len); /* in: precision */ -/************************************************************************** -Creates an index memory object. */ - -dict_index_t* -dict_mem_index_create( -/*==================*/ - /* out, own: index object */ - const char* table_name, /* in: table name */ - const char* index_name, /* in: index name */ - ulint space, /* in: space where the index tree is - placed, ignored if the index is of - the clustered type */ - ulint type, /* in: DICT_UNIQUE, - DICT_CLUSTERED, ... ORed */ - ulint n_fields); /* in: number of fields */ -/************************************************************************** -Adds a field definition to an index. NOTE: does not take a copy -of the column name if the field is a column. The memory occupied -by the column name may be released only after publishing the index. */ - -void -dict_mem_index_add_field( -/*=====================*/ - dict_index_t* index, /* in: index */ - const char* name, /* in: column name */ - ulint prefix_len); /* in: 0 or the column prefix length - in a MySQL index like - INDEX (textcol(25)) */ -/************************************************************************** -Frees an index memory object. */ - -void -dict_mem_index_free( -/*================*/ - dict_index_t* index); /* in: index */ -/************************************************************************** -Creates and initializes a foreign constraint memory object. */ - -dict_foreign_t* -dict_mem_foreign_create(void); -/*=========================*/ - /* out, own: foreign constraint struct */ - -/* Data structure for a column in a table */ -struct dict_col_struct{ - /*----------------------*/ - /* The following are copied from dtype_t, - so that all bit-fields can be packed tightly. */ - unsigned mtype:8; /* main data type */ - unsigned prtype:24; /* precise type; MySQL data - type, charset code, flags to - indicate nullability, - signedness, whether this is a - binary string, whether this is - a true VARCHAR where MySQL - uses 2 bytes to store the length */ - - /* the remaining fields do not affect alphabetical ordering: */ - - unsigned len:16; /* length; for MySQL data this - is field->pack_length(), - except that for a >= 5.0.3 - type true VARCHAR this is the - maximum byte length of the - string data (in addition to - the string, MySQL uses 1 or 2 - bytes to store the string length) */ - - unsigned mbminlen:2; /* minimum length of a - character, in bytes */ - unsigned mbmaxlen:3; /* maximum length of a - character, in bytes */ - /*----------------------*/ - /* End of definitions copied from dtype_t */ - - unsigned ind:10; /* table column position - (starting from 0) */ - unsigned ord_part:1; /* nonzero if this column - appears in the ordering fields - of an index */ -}; - -/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum -indexed column length (or indexed prefix length). It is set to 3*256, -so that one can create a column prefix index on 256 characters of a -TEXT or VARCHAR column also in the UTF-8 charset. In that charset, -a character may take at most 3 bytes. -This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data -files would be at risk! */ - -#define DICT_MAX_INDEX_COL_LEN 768 - -/* Data structure for a field in an index */ -struct dict_field_struct{ - dict_col_t* col; /* pointer to the table column */ - const char* name; /* name of the column */ - unsigned prefix_len:10; /* 0 or the length of the column - prefix in bytes in a MySQL index of - type, e.g., INDEX (textcol(25)); - must be smaller than - DICT_MAX_INDEX_COL_LEN; NOTE that - in the UTF-8 charset, MySQL sets this - to 3 * the prefix len in UTF-8 chars */ - unsigned fixed_len:10; /* 0 or the fixed length of the - column if smaller than - DICT_MAX_INDEX_COL_LEN */ -}; - -/* Data structure for an index */ -struct dict_index_struct{ - dulint id; /* id of the index */ - mem_heap_t* heap; /* memory heap */ - ulint type; /* index type */ - const char* name; /* index name */ - const char* table_name; /* table name */ - dict_table_t* table; /* back pointer to table */ - unsigned space:32; - /* space where the index tree is placed */ - unsigned page:32;/* index tree root page number */ - unsigned trx_id_offset:10;/* position of the the trx id column - in a clustered index record, if the fields - before it are known to be of a fixed size, - 0 otherwise */ - unsigned n_user_defined_cols:10; - /* number of columns the user defined to - be in the index: in the internal - representation we add more columns */ - unsigned n_uniq:10;/* number of fields from the beginning - which are enough to determine an index - entry uniquely */ - unsigned n_def:10;/* number of fields defined so far */ - unsigned n_fields:10;/* number of fields in the index */ - unsigned n_nullable:10;/* number of nullable fields */ - unsigned cached:1;/* TRUE if the index object is in the - dictionary cache */ - dict_field_t* fields; /* array of field descriptions */ - UT_LIST_NODE_T(dict_index_t) - indexes;/* list of indexes of the table */ - btr_search_t* search_info; /* info used in optimistic searches */ - /*----------------------*/ - ib_longlong* stat_n_diff_key_vals; - /* approximate number of different key values - for this index, for each n-column prefix - where n <= dict_get_n_unique(index); we - periodically calculate new estimates */ - ulint stat_index_size; - /* approximate index size in database pages */ - ulint stat_n_leaf_pages; - /* approximate number of leaf pages in the - index tree */ - rw_lock_t lock; /* read-write lock protecting the upper levels - of the index tree */ -#ifdef UNIV_DEBUG - ulint magic_n;/* magic number */ -# define DICT_INDEX_MAGIC_N 76789786 -#endif -}; - -/* Data structure for a foreign key constraint; an example: -FOREIGN KEY (A, B) REFERENCES TABLE2 (C, D) */ - -struct dict_foreign_struct{ - mem_heap_t* heap; /* this object is allocated from - this memory heap */ - char* id; /* id of the constraint as a - null-terminated string */ - unsigned n_fields:10; /* number of indexes' first fields - for which the the foreign key - constraint is defined: we allow the - indexes to contain more fields than - mentioned in the constraint, as long - as the first fields are as mentioned */ - unsigned type:6; /* 0 or DICT_FOREIGN_ON_DELETE_CASCADE - or DICT_FOREIGN_ON_DELETE_SET_NULL */ - char* foreign_table_name;/* foreign table name */ - dict_table_t* foreign_table; /* table where the foreign key is */ - const char** foreign_col_names;/* names of the columns in the - foreign key */ - char* referenced_table_name;/* referenced table name */ - dict_table_t* referenced_table;/* table where the referenced key - is */ - const char** referenced_col_names;/* names of the referenced - columns in the referenced table */ - dict_index_t* foreign_index; /* foreign index; we require that - both tables contain explicitly defined - indexes for the constraint: InnoDB - does not generate new indexes - implicitly */ - dict_index_t* referenced_index;/* referenced index */ - UT_LIST_NODE_T(dict_foreign_t) - foreign_list; /* list node for foreign keys of the - table */ - UT_LIST_NODE_T(dict_foreign_t) - referenced_list;/* list node for referenced keys of the - table */ -}; - -/* The flags for ON_UPDATE and ON_DELETE can be ORed; the default is that -a foreign key constraint is enforced, therefore RESTRICT just means no flag */ -#define DICT_FOREIGN_ON_DELETE_CASCADE 1 -#define DICT_FOREIGN_ON_DELETE_SET_NULL 2 -#define DICT_FOREIGN_ON_UPDATE_CASCADE 4 -#define DICT_FOREIGN_ON_UPDATE_SET_NULL 8 -#define DICT_FOREIGN_ON_DELETE_NO_ACTION 16 -#define DICT_FOREIGN_ON_UPDATE_NO_ACTION 32 - - -/* Data structure for a database table */ -struct dict_table_struct{ - dulint id; /* id of the table */ - mem_heap_t* heap; /* memory heap */ - const char* name; /* table name */ - const char* dir_path_of_temp_table;/* NULL or the directory path - where a TEMPORARY table that was explicitly - created by a user should be placed if - innodb_file_per_table is defined in my.cnf; - in Unix this is usually /tmp/..., in Windows - \temp\... */ - unsigned space:32; - /* space where the clustered index of the - table is placed */ - unsigned ibd_file_missing:1; - /* TRUE if this is in a single-table - tablespace and the .ibd file is missing; then - we must return in ha_innodb.cc an error if the - user tries to query such an orphaned table */ - unsigned tablespace_discarded:1; - /* this flag is set TRUE when the user - calls DISCARD TABLESPACE on this - table, and reset to FALSE in IMPORT - TABLESPACE */ - unsigned cached:1;/* TRUE if the table object has been added - to the dictionary cache */ - unsigned flags:8;/* DICT_TF_COMPACT, ... */ - unsigned n_def:10;/* number of columns defined so far */ - unsigned n_cols:10;/* number of columns */ - dict_col_t* cols; /* array of column descriptions */ - const char* col_names; - /* Column names packed in a character string - "name1\0name2\0...nameN\0". Until - the string contains n_cols, it will be - allocated from a temporary heap. The final - string will be allocated from table->heap. */ - hash_node_t name_hash; /* hash chain node */ - hash_node_t id_hash; /* hash chain node */ - UT_LIST_BASE_NODE_T(dict_index_t) - indexes; /* list of indexes of the table */ - UT_LIST_BASE_NODE_T(dict_foreign_t) - foreign_list;/* list of foreign key constraints - in the table; these refer to columns - in other tables */ - UT_LIST_BASE_NODE_T(dict_foreign_t) - referenced_list;/* list of foreign key constraints - which refer to this table */ - UT_LIST_NODE_T(dict_table_t) - table_LRU; /* node of the LRU list of tables */ - ulint n_mysql_handles_opened; - /* count of how many handles MySQL has opened - to this table; dropping of the table is - NOT allowed until this count gets to zero; - MySQL does NOT itself check the number of - open handles at drop */ - ulint n_foreign_key_checks_running; - /* count of how many foreign key check - operations are currently being performed - on the table: we cannot drop the table while - there are foreign key checks running on - it! */ - lock_t* auto_inc_lock;/* a buffer for an auto-inc lock - for this table: we allocate the memory here - so that individual transactions can get it - and release it without a need to allocate - space from the lock heap of the trx: - otherwise the lock heap would grow rapidly - if we do a large insert from a select */ - dulint query_cache_inv_trx_id; - /* transactions whose trx id < than this - number are not allowed to store to the MySQL - query cache or retrieve from it; when a trx - with undo logs commits, it sets this to the - value of the trx id counter for the tables it - had an IX lock on */ - UT_LIST_BASE_NODE_T(lock_t) - locks; /* list of locks on the table */ -#ifdef UNIV_DEBUG - /*----------------------*/ - ibool does_not_fit_in_memory; - /* this field is used to specify in simulations - tables which are so big that disk should be - accessed: disk access is simulated by - putting the thread to sleep for a while; - NOTE that this flag is not stored to the data - dictionary on disk, and the database will - forget about value TRUE if it has to reload - the table definition from disk */ -#endif /* UNIV_DEBUG */ - /*----------------------*/ - unsigned big_rows:1; - /* flag: TRUE if the maximum length of - a single row exceeds BIG_ROW_SIZE; - initialized in dict_table_add_to_cache() */ - unsigned stat_initialized:1; /* TRUE if statistics have - been calculated the first time - after database startup or table creation */ - ib_longlong stat_n_rows; - /* approximate number of rows in the table; - we periodically calculate new estimates */ - ulint stat_clustered_index_size; - /* approximate clustered index size in - database pages */ - ulint stat_sum_of_other_index_sizes; - /* other indexes in database pages */ - ulint stat_modified_counter; - /* when a row is inserted, updated, or deleted, - we add 1 to this number; we calculate new - estimates for the stat_... values for the - table and the indexes at an interval of 2 GB - or when about 1 / 16 of table has been - modified; also when the estimate operation is - called for MySQL SHOW TABLE STATUS; the - counter is reset to zero at statistics - calculation; this counter is not protected by - any latch, because this is only used for - heuristics */ - /*----------------------*/ - mutex_t autoinc_mutex; - /* mutex protecting the autoincrement - counter */ - ib_ulonglong autoinc;/* autoinc counter value to give to the - next inserted row */ - ulong n_waiting_or_granted_auto_inc_locks; - /* This counter is used to track the number - of granted and pending autoinc locks on this - table. This value is set after acquiring the - kernel mutex but we peek the contents to - determine whether other transactions have - acquired the AUTOINC lock or not. Of course - only one transaction can be granted the - lock but there can be multiple waiters. */ - /*----------------------*/ - -#ifdef UNIV_DEBUG - ulint magic_n;/* magic number */ -# define DICT_TABLE_MAGIC_N 76333786 -#endif /* UNIV_DEBUG */ -}; - -#ifndef UNIV_NONINL -#include "dict0mem.ic" -#endif - -#endif diff --git a/storage/innobase/include/dict0mem.ic b/storage/innobase/include/dict0mem.ic deleted file mode 100644 index 9bcefc2a51f..00000000000 --- a/storage/innobase/include/dict0mem.ic +++ /dev/null @@ -1,9 +0,0 @@ -/********************************************************************** -Data dictionary memory object creation - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -***********************************************************************/ - - diff --git a/storage/innobase/include/dict0types.h b/storage/innobase/include/dict0types.h deleted file mode 100644 index b90545f2105..00000000000 --- a/storage/innobase/include/dict0types.h +++ /dev/null @@ -1,27 +0,0 @@ -/****************************************************** -Data dictionary global types - -(c) 1996 Innobase Oy - -Created 1/8/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dict0types_h -#define dict0types_h - -typedef struct dict_sys_struct dict_sys_t; -typedef struct dict_col_struct dict_col_t; -typedef struct dict_field_struct dict_field_t; -typedef struct dict_index_struct dict_index_t; -typedef struct dict_table_struct dict_table_t; -typedef struct dict_foreign_struct dict_foreign_t; - -/* A cluster object is a table object with the type field set to -DICT_CLUSTERED */ - -typedef dict_table_t dict_cluster_t; - -typedef struct ind_node_struct ind_node_t; -typedef struct tab_node_struct tab_node_t; - -#endif diff --git a/storage/innobase/include/dyn0dyn.h b/storage/innobase/include/dyn0dyn.h deleted file mode 100644 index 7affccbf67e..00000000000 --- a/storage/innobase/include/dyn0dyn.h +++ /dev/null @@ -1,166 +0,0 @@ -/****************************************************** -The dynamically allocated array - -(c) 1996 Innobase Oy - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -#ifndef dyn0dyn_h -#define dyn0dyn_h - -#include "univ.i" -#include "ut0lst.h" -#include "mem0mem.h" - -typedef struct dyn_block_struct dyn_block_t; -typedef dyn_block_t dyn_array_t; - - -/* This is the initial 'payload' size of a dynamic array; -this must be > MLOG_BUF_MARGIN + 30! */ -#define DYN_ARRAY_DATA_SIZE 512 - -/************************************************************************* -Initializes a dynamic array. */ -UNIV_INLINE -dyn_array_t* -dyn_array_create( -/*=============*/ - /* out: initialized dyn array */ - dyn_array_t* arr); /* in: pointer to a memory buffer of - size sizeof(dyn_array_t) */ -/**************************************************************** -Frees a dynamic array. */ -UNIV_INLINE -void -dyn_array_free( -/*===========*/ - dyn_array_t* arr); /* in: dyn array */ -/************************************************************************* -Makes room on top of a dyn array and returns a pointer to a buffer in it. -After copying the elements, the caller must close the buffer using -dyn_array_close. */ -UNIV_INLINE -byte* -dyn_array_open( -/*===========*/ - /* out: pointer to the buffer */ - dyn_array_t* arr, /* in: dynamic array */ - ulint size); /* in: size in bytes of the buffer; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -/************************************************************************* -Closes the buffer returned by dyn_array_open. */ -UNIV_INLINE -void -dyn_array_close( -/*============*/ - dyn_array_t* arr, /* in: dynamic array */ - byte* ptr); /* in: buffer space from ptr up was not used */ -/************************************************************************* -Makes room on top of a dyn array and returns a pointer to -the added element. The caller must copy the element to -the pointer returned. */ -UNIV_INLINE -void* -dyn_array_push( -/*===========*/ - /* out: pointer to the element */ - dyn_array_t* arr, /* in: dynamic array */ - ulint size); /* in: size in bytes of the element */ -/**************************************************************** -Returns pointer to an element in dyn array. */ -UNIV_INLINE -void* -dyn_array_get_element( -/*==================*/ - /* out: pointer to element */ - dyn_array_t* arr, /* in: dyn array */ - ulint pos); /* in: position of element as bytes - from array start */ -/**************************************************************** -Returns the size of stored data in a dyn array. */ -UNIV_INLINE -ulint -dyn_array_get_data_size( -/*====================*/ - /* out: data size in bytes */ - dyn_array_t* arr); /* in: dyn array */ -/**************************************************************** -Gets the first block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_first_block( -/*======================*/ - dyn_array_t* arr); /* in: dyn array */ -/**************************************************************** -Gets the last block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_last_block( -/*=====================*/ - dyn_array_t* arr); /* in: dyn array */ -/************************************************************************ -Gets the next block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_next_block( -/*=====================*/ - /* out: pointer to next, NULL if end of list */ - dyn_array_t* arr, /* in: dyn array */ - dyn_block_t* block); /* in: dyn array block */ -/************************************************************************ -Gets the number of used bytes in a dyn array block. */ -UNIV_INLINE -ulint -dyn_block_get_used( -/*===============*/ - /* out: number of bytes used */ - dyn_block_t* block); /* in: dyn array block */ -/************************************************************************ -Gets pointer to the start of data in a dyn array block. */ -UNIV_INLINE -byte* -dyn_block_get_data( -/*===============*/ - /* out: pointer to data */ - dyn_block_t* block); /* in: dyn array block */ -/************************************************************ -Pushes n bytes to a dyn array. */ -UNIV_INLINE -void -dyn_push_string( -/*============*/ - dyn_array_t* arr, /* in: dyn array */ - const byte* str, /* in: string to write */ - ulint len); /* in: string length */ - -/*#################################################################*/ - -/* NOTE! Do not use the fields of the struct directly: the definition -appears here only for the compiler to know its size! */ -struct dyn_block_struct{ - mem_heap_t* heap; /* in the first block this is != NULL - if dynamic allocation has been needed */ - ulint used; /* number of data bytes used in this block */ - byte data[DYN_ARRAY_DATA_SIZE]; - /* storage for array elements */ - UT_LIST_BASE_NODE_T(dyn_block_t) base; - /* linear list of dyn blocks: this node is - used only in the first block */ - UT_LIST_NODE_T(dyn_block_t) list; - /* linear list node: used in all blocks */ -#ifdef UNIV_DEBUG - ulint buf_end;/* only in the debug version: if dyn array is - opened, this is the buffer end offset, else - this is 0 */ - ulint magic_n; -#endif -}; - - -#ifndef UNIV_NONINL -#include "dyn0dyn.ic" -#endif - -#endif diff --git a/storage/innobase/include/dyn0dyn.ic b/storage/innobase/include/dyn0dyn.ic deleted file mode 100644 index fcb3c17287a..00000000000 --- a/storage/innobase/include/dyn0dyn.ic +++ /dev/null @@ -1,346 +0,0 @@ -/****************************************************** -The dynamically allocated array - -(c) 1996 Innobase Oy - -Created 2/5/1996 Heikki Tuuri -*******************************************************/ - -#define DYN_BLOCK_MAGIC_N 375767 -#define DYN_BLOCK_FULL_FLAG 0x1000000UL - -/**************************************************************** -Adds a new block to a dyn array. */ - -dyn_block_t* -dyn_array_add_block( -/*================*/ - /* out: created block */ - dyn_array_t* arr); /* in: dyn array */ - - -/**************************************************************** -Gets the first block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_first_block( -/*======================*/ - dyn_array_t* arr) /* in: dyn array */ -{ - return(arr); -} - -/**************************************************************** -Gets the last block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_last_block( -/*=====================*/ - dyn_array_t* arr) /* in: dyn array */ -{ - if (arr->heap == NULL) { - - return(arr); - } - - return(UT_LIST_GET_LAST(arr->base)); -} - -/************************************************************************ -Gets the next block in a dyn array. */ -UNIV_INLINE -dyn_block_t* -dyn_array_get_next_block( -/*=====================*/ - /* out: pointer to next, NULL if end of list */ - dyn_array_t* arr, /* in: dyn array */ - dyn_block_t* block) /* in: dyn array block */ -{ - ut_ad(arr && block); - - if (arr->heap == NULL) { - ut_ad(arr == block); - - return(NULL); - } - - return(UT_LIST_GET_NEXT(list, block)); -} - -/************************************************************************ -Gets the number of used bytes in a dyn array block. */ -UNIV_INLINE -ulint -dyn_block_get_used( -/*===============*/ - /* out: number of bytes used */ - dyn_block_t* block) /* in: dyn array block */ -{ - ut_ad(block); - - return((block->used) & ~DYN_BLOCK_FULL_FLAG); -} - -/************************************************************************ -Gets pointer to the start of data in a dyn array block. */ -UNIV_INLINE -byte* -dyn_block_get_data( -/*===============*/ - /* out: pointer to data */ - dyn_block_t* block) /* in: dyn array block */ -{ - ut_ad(block); - - return(block->data); -} - -/************************************************************************* -Initializes a dynamic array. */ -UNIV_INLINE -dyn_array_t* -dyn_array_create( -/*=============*/ - /* out: initialized dyn array */ - dyn_array_t* arr) /* in: pointer to a memory buffer of - size sizeof(dyn_array_t) */ -{ - ut_ad(arr); -#if DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG -# error "DYN_ARRAY_DATA_SIZE >= DYN_BLOCK_FULL_FLAG" -#endif - - arr->heap = NULL; - arr->used = 0; - -#ifdef UNIV_DEBUG - arr->buf_end = 0; - arr->magic_n = DYN_BLOCK_MAGIC_N; -#endif - return(arr); -} - -/**************************************************************** -Frees a dynamic array. */ -UNIV_INLINE -void -dyn_array_free( -/*===========*/ - dyn_array_t* arr) /* in: dyn array */ -{ - if (arr->heap != NULL) { - mem_heap_free(arr->heap); - } - -#ifdef UNIV_DEBUG - arr->magic_n = 0; -#endif -} - -/************************************************************************* -Makes room on top of a dyn array and returns a pointer to the added element. -The caller must copy the element to the pointer returned. */ -UNIV_INLINE -void* -dyn_array_push( -/*===========*/ - /* out: pointer to the element */ - dyn_array_t* arr, /* in: dynamic array */ - ulint size) /* in: size in bytes of the element */ -{ - dyn_block_t* block; - ulint used; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - ut_ad(size <= DYN_ARRAY_DATA_SIZE); - ut_ad(size); - - block = arr; - used = block->used; - - if (used + size > DYN_ARRAY_DATA_SIZE) { - /* Get the last array block */ - - block = dyn_array_get_last_block(arr); - used = block->used; - - if (used + size > DYN_ARRAY_DATA_SIZE) { - block = dyn_array_add_block(arr); - used = block->used; - } - } - - block->used = used + size; - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); - - return((block->data) + used); -} - -/************************************************************************* -Makes room on top of a dyn array and returns a pointer to a buffer in it. -After copying the elements, the caller must close the buffer using -dyn_array_close. */ -UNIV_INLINE -byte* -dyn_array_open( -/*===========*/ - /* out: pointer to the buffer */ - dyn_array_t* arr, /* in: dynamic array */ - ulint size) /* in: size in bytes of the buffer; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -{ - dyn_block_t* block; - ulint used; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - ut_ad(size <= DYN_ARRAY_DATA_SIZE); - ut_ad(size); - - block = arr; - used = block->used; - - if (used + size > DYN_ARRAY_DATA_SIZE) { - /* Get the last array block */ - - block = dyn_array_get_last_block(arr); - used = block->used; - - if (used + size > DYN_ARRAY_DATA_SIZE) { - block = dyn_array_add_block(arr); - used = block->used; - ut_a(size <= DYN_ARRAY_DATA_SIZE); - } - } - - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); -#ifdef UNIV_DEBUG - ut_ad(arr->buf_end == 0); - - arr->buf_end = used + size; -#endif - return((block->data) + used); -} - -/************************************************************************* -Closes the buffer returned by dyn_array_open. */ -UNIV_INLINE -void -dyn_array_close( -/*============*/ - dyn_array_t* arr, /* in: dynamic array */ - byte* ptr) /* in: buffer space from ptr up was not used */ -{ - dyn_block_t* block; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - block = dyn_array_get_last_block(arr); - - ut_ad(arr->buf_end + block->data >= ptr); - - block->used = ptr - block->data; - - ut_ad(block->used <= DYN_ARRAY_DATA_SIZE); - -#ifdef UNIV_DEBUG - arr->buf_end = 0; -#endif -} - -/**************************************************************** -Returns pointer to an element in dyn array. */ -UNIV_INLINE -void* -dyn_array_get_element( -/*==================*/ - /* out: pointer to element */ - dyn_array_t* arr, /* in: dyn array */ - ulint pos) /* in: position of element as bytes - from array start */ -{ - dyn_block_t* block; - ulint used; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - /* Get the first array block */ - block = dyn_array_get_first_block(arr); - - if (arr->heap != NULL) { - used = dyn_block_get_used(block); - - while (pos >= used) { - pos -= used; - block = UT_LIST_GET_NEXT(list, block); - ut_ad(block); - - used = dyn_block_get_used(block); - } - } - - ut_ad(block); - ut_ad(dyn_block_get_used(block) >= pos); - - return(block->data + pos); -} - -/**************************************************************** -Returns the size of stored data in a dyn array. */ -UNIV_INLINE -ulint -dyn_array_get_data_size( -/*====================*/ - /* out: data size in bytes */ - dyn_array_t* arr) /* in: dyn array */ -{ - dyn_block_t* block; - ulint sum = 0; - - ut_ad(arr); - ut_ad(arr->magic_n == DYN_BLOCK_MAGIC_N); - - if (arr->heap == NULL) { - - return(arr->used); - } - - /* Get the first array block */ - block = dyn_array_get_first_block(arr); - - while (block != NULL) { - sum += dyn_block_get_used(block); - block = dyn_array_get_next_block(arr, block); - } - - return(sum); -} - -/************************************************************ -Pushes n bytes to a dyn array. */ -UNIV_INLINE -void -dyn_push_string( -/*============*/ - dyn_array_t* arr, /* in: dyn array */ - const byte* str, /* in: string to write */ - ulint len) /* in: string length */ -{ - ulint n_copied; - - while (len > 0) { - if (len > DYN_ARRAY_DATA_SIZE) { - n_copied = DYN_ARRAY_DATA_SIZE; - } else { - n_copied = len; - } - - memcpy(dyn_array_push(arr, n_copied), str, n_copied); - - str += n_copied; - len -= n_copied; - } -} diff --git a/storage/innobase/include/eval0eval.h b/storage/innobase/include/eval0eval.h deleted file mode 100644 index f950512adfd..00000000000 --- a/storage/innobase/include/eval0eval.h +++ /dev/null @@ -1,97 +0,0 @@ -/****************************************************** -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -(c) 1997 Innobase Oy - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#ifndef eval0eval_h -#define eval0eval_h - -#include "univ.i" -#include "que0types.h" -#include "pars0sym.h" -#include "pars0pars.h" - -/********************************************************************* -Free the buffer from global dynamic memory for a value of a que_node, -if it has been allocated in the above function. The freeing for pushed -column values is done in sel_col_prefetch_buf_free. */ - -void -eval_node_free_val_buf( -/*===================*/ - que_node_t* node); /* in: query graph node */ -/********************************************************************* -Evaluates a symbol table symbol. */ -UNIV_INLINE -void -eval_sym( -/*=====*/ - sym_node_t* sym_node); /* in: symbol table node */ -/********************************************************************* -Evaluates an expression. */ -UNIV_INLINE -void -eval_exp( -/*=====*/ - que_node_t* exp_node); /* in: expression */ -/********************************************************************* -Sets an integer value as the value of an expression node. */ -UNIV_INLINE -void -eval_node_set_int_val( -/*==================*/ - que_node_t* node, /* in: expression node */ - lint val); /* in: value to set */ -/********************************************************************* -Gets an integer value from an expression node. */ -UNIV_INLINE -lint -eval_node_get_int_val( -/*==================*/ - /* out: integer value */ - que_node_t* node); /* in: expression node */ -/********************************************************************* -Copies a binary string value as the value of a query graph node. Allocates a -new buffer if necessary. */ -UNIV_INLINE -void -eval_node_copy_and_alloc_val( -/*=========================*/ - que_node_t* node, /* in: query graph node */ - byte* str, /* in: binary string */ - ulint len); /* in: string length or UNIV_SQL_NULL */ -/********************************************************************* -Copies a query node value to another node. */ -UNIV_INLINE -void -eval_node_copy_val( -/*===============*/ - que_node_t* node1, /* in: node to copy to */ - que_node_t* node2); /* in: node to copy from */ -/********************************************************************* -Gets a iboolean value from a query node. */ -UNIV_INLINE -ibool -eval_node_get_ibool_val( -/*====================*/ - /* out: iboolean value */ - que_node_t* node); /* in: query graph node */ -/********************************************************************* -Evaluates a comparison node. */ - -ibool -eval_cmp( -/*=====*/ - /* out: the result of the comparison */ - func_node_t* cmp_node); /* in: comparison node */ - - -#ifndef UNIV_NONINL -#include "eval0eval.ic" -#endif - -#endif diff --git a/storage/innobase/include/eval0eval.ic b/storage/innobase/include/eval0eval.ic deleted file mode 100644 index caffa2e0bfd..00000000000 --- a/storage/innobase/include/eval0eval.ic +++ /dev/null @@ -1,234 +0,0 @@ -/****************************************************** -SQL evaluator: evaluates simple data structures, like expressions, in -a query graph - -(c) 1997 Innobase Oy - -Created 12/29/1997 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" -#include "rem0cmp.h" -#include "pars0grm.h" - -/********************************************************************* -Evaluates a function node. */ - -void -eval_func( -/*======*/ - func_node_t* func_node); /* in: function node */ -/********************************************************************* -Allocate a buffer from global dynamic memory for a value of a que_node. -NOTE that this memory must be explicitly freed when the query graph is -freed. If the node already has allocated buffer, that buffer is freed -here. NOTE that this is the only function where dynamic memory should be -allocated for a query node val field. */ - -byte* -eval_node_alloc_val_buf( -/*====================*/ - /* out: pointer to allocated buffer */ - que_node_t* node, /* in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size); /* in: buffer size */ - - -/********************************************************************* -Allocates a new buffer if needed. */ -UNIV_INLINE -byte* -eval_node_ensure_val_buf( -/*=====================*/ - /* out: pointer to buffer */ - que_node_t* node, /* in: query graph node; sets the val field - data field to point to the new buffer, and - len field equal to size */ - ulint size) /* in: buffer size */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - dfield_set_len(dfield, size); - - data = dfield_get_data(dfield); - - if (!data || que_node_get_val_buf_size(node) < size) { - - data = eval_node_alloc_val_buf(node, size); - } - - return(data); -} - -/********************************************************************* -Evaluates a symbol table symbol. */ -UNIV_INLINE -void -eval_sym( -/*=====*/ - sym_node_t* sym_node) /* in: symbol table node */ -{ - - ut_ad(que_node_get_type(sym_node) == QUE_NODE_SYMBOL); - - if (sym_node->indirection) { - /* The symbol table node is an alias for a variable or a - column */ - - dfield_copy_data(que_node_get_val(sym_node), - que_node_get_val(sym_node->indirection)); - } -} - -/********************************************************************* -Evaluates an expression. */ -UNIV_INLINE -void -eval_exp( -/*=====*/ - que_node_t* exp_node) /* in: expression */ -{ - if (que_node_get_type(exp_node) == QUE_NODE_SYMBOL) { - - eval_sym((sym_node_t*)exp_node); - - return; - } - - eval_func(exp_node); -} - -/********************************************************************* -Sets an integer value as the value of an expression node. */ -UNIV_INLINE -void -eval_node_set_int_val( -/*==================*/ - que_node_t* node, /* in: expression node */ - lint val) /* in: value to set */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - - data = dfield_get_data(dfield); - - if (data == NULL) { - data = eval_node_alloc_val_buf(node, 4); - } - - ut_ad(dfield_get_len(dfield) == 4); - - mach_write_to_4(data, (ulint)val); -} - -/********************************************************************* -Gets an integer non-SQL null value from an expression node. */ -UNIV_INLINE -lint -eval_node_get_int_val( -/*==================*/ - /* out: integer value */ - que_node_t* node) /* in: expression node */ -{ - dfield_t* dfield; - - dfield = que_node_get_val(node); - - ut_ad(dfield_get_len(dfield) == 4); - - return((int)mach_read_from_4(dfield_get_data(dfield))); -} - -/********************************************************************* -Gets a iboolean value from a query node. */ -UNIV_INLINE -ibool -eval_node_get_ibool_val( -/*====================*/ - /* out: iboolean value */ - que_node_t* node) /* in: query graph node */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(node); - - data = dfield_get_data(dfield); - - ut_ad(data != NULL); - - return(mach_read_from_1(data)); -} - -/********************************************************************* -Sets a iboolean value as the value of a function node. */ -UNIV_INLINE -void -eval_node_set_ibool_val( -/*====================*/ - func_node_t* func_node, /* in: function node */ - ibool val) /* in: value to set */ -{ - dfield_t* dfield; - byte* data; - - dfield = que_node_get_val(func_node); - - data = dfield_get_data(dfield); - - if (data == NULL) { - /* Allocate 1 byte to hold the value */ - - data = eval_node_alloc_val_buf(func_node, 1); - } - - ut_ad(dfield_get_len(dfield) == 1); - - mach_write_to_1(data, val); -} - -/********************************************************************* -Copies a binary string value as the value of a query graph node. Allocates a -new buffer if necessary. */ -UNIV_INLINE -void -eval_node_copy_and_alloc_val( -/*=========================*/ - que_node_t* node, /* in: query graph node */ - byte* str, /* in: binary string */ - ulint len) /* in: string length or UNIV_SQL_NULL */ -{ - byte* data; - - if (len == UNIV_SQL_NULL) { - dfield_set_len(que_node_get_val(node), len); - - return; - } - - data = eval_node_ensure_val_buf(node, len); - - ut_memcpy(data, str, len); -} - -/********************************************************************* -Copies a query node value to another node. */ -UNIV_INLINE -void -eval_node_copy_val( -/*===============*/ - que_node_t* node1, /* in: node to copy to */ - que_node_t* node2) /* in: node to copy from */ -{ - dfield_t* dfield2; - - dfield2 = que_node_get_val(node2); - - eval_node_copy_and_alloc_val(node1, dfield_get_data(dfield2), - dfield_get_len(dfield2)); -} diff --git a/storage/innobase/include/eval0proc.h b/storage/innobase/include/eval0proc.h deleted file mode 100644 index 8416551d0ba..00000000000 --- a/storage/innobase/include/eval0proc.h +++ /dev/null @@ -1,87 +0,0 @@ -/****************************************************** -Executes SQL stored procedures and their control structures - -(c) 1998 Innobase Oy - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#ifndef eval0proc_h -#define eval0proc_h - -#include "univ.i" -#include "que0types.h" -#include "pars0sym.h" -#include "pars0pars.h" - -/************************************************************************** -Performs an execution step of a procedure node. */ -UNIV_INLINE -que_thr_t* -proc_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of an if-statement node. */ - -que_thr_t* -if_step( -/*====*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of a while-statement node. */ - -que_thr_t* -while_step( -/*=======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of a for-loop node. */ - -que_thr_t* -for_step( -/*=====*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of an assignment statement node. */ - -que_thr_t* -assign_step( -/*========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of a procedure call node. */ -UNIV_INLINE -que_thr_t* -proc_eval_step( -/*===========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of an exit statement node. */ - -que_thr_t* -exit_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of a return-statement node. */ - -que_thr_t* -return_step( -/*========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ - - -#ifndef UNIV_NONINL -#include "eval0proc.ic" -#endif - -#endif diff --git a/storage/innobase/include/eval0proc.ic b/storage/innobase/include/eval0proc.ic deleted file mode 100644 index cf738056576..00000000000 --- a/storage/innobase/include/eval0proc.ic +++ /dev/null @@ -1,71 +0,0 @@ -/****************************************************** -Executes SQL stored procedures and their control structures - -(c) 1998 Innobase Oy - -Created 1/20/1998 Heikki Tuuri -*******************************************************/ - -#include "pars0pars.h" -#include "que0que.h" -#include "eval0eval.h" - -/************************************************************************** -Performs an execution step of a procedure node. */ -UNIV_INLINE -que_thr_t* -proc_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - proc_node_t* node; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_PROC); - - if (thr->prev_node == que_node_get_parent(node)) { - /* Start execution from the first statement in the statement - list */ - - thr->run_node = node->stat_list; - } else { - /* Move to the next statement */ - ut_ad(que_node_get_next(thr->prev_node) == NULL); - - thr->run_node = NULL; - } - - if (thr->run_node == NULL) { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} - -/************************************************************************** -Performs an execution step of a procedure call node. */ -UNIV_INLINE -que_thr_t* -proc_eval_step( -/*===========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - func_node_t* node; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_FUNC); - - /* Evaluate the procedure */ - - eval_exp(node); - - thr->run_node = que_node_get_parent(node); - - return(thr); -} diff --git a/storage/innobase/include/fil0fil.h b/storage/innobase/include/fil0fil.h deleted file mode 100644 index 6b8fd4b03d5..00000000000 --- a/storage/innobase/include/fil0fil.h +++ /dev/null @@ -1,716 +0,0 @@ -/****************************************************** -The low-level file system - -(c) 1995 Innobase Oy - -Created 10/25/1995 Heikki Tuuri -*******************************************************/ - -#ifndef fil0fil_h -#define fil0fil_h - -#include "univ.i" -#include "sync0rw.h" -#include "dict0types.h" -#include "ibuf0types.h" -#include "ut0byte.h" -#include "os0file.h" - -/* When mysqld is run, the default directory "." is the mysqld datadir, but in -ibbackup we must set it explicitly; the patgh must NOT contain the trailing -'/' or '\' */ -extern const char* fil_path_to_mysql_datadir; - -/* Initial size of a single-table tablespace in pages */ -#define FIL_IBD_FILE_INITIAL_SIZE 4 - -/* 'null' (undefined) page offset in the context of file spaces */ -#define FIL_NULL ULINT32_UNDEFINED - -/* Space address data type; this is intended to be used when -addresses accurate to a byte are stored in file pages. If the page part -of the address is FIL_NULL, the address is considered undefined. */ - -typedef byte fil_faddr_t; /* 'type' definition in C: an address - stored in a file page is a string of bytes */ -#define FIL_ADDR_PAGE 0 /* first in address is the page offset */ -#define FIL_ADDR_BYTE 4 /* then comes 2-byte byte offset within page*/ - -#define FIL_ADDR_SIZE 6 /* address size is 6 bytes */ - -/* A struct for storing a space address FIL_ADDR, when it is used -in C program data structures. */ - -typedef struct fil_addr_struct fil_addr_t; -struct fil_addr_struct{ - ulint page; /* page number within a space */ - ulint boffset; /* byte offset within the page */ -}; - -/* Null file address */ -extern fil_addr_t fil_addr_null; - -/* The byte offsets on a file page for various variables */ -#define FIL_PAGE_SPACE_OR_CHKSUM 0 /* in < MySQL-4.0.14 space id the - page belongs to (== 0) but in later - versions the 'new' checksum of the - page */ -#define FIL_PAGE_OFFSET 4 /* page offset inside space */ -#define FIL_PAGE_PREV 8 /* if there is a 'natural' predecessor - of the page, its offset. - Otherwise FIL_NULL. - This field is not set on BLOB pages, - which are stored as a singly-linked - list. See also FIL_PAGE_NEXT. */ -#define FIL_PAGE_NEXT 12 /* if there is a 'natural' successor - of the page, its offset. - Otherwise FIL_NULL. - B-tree index pages - (FIL_PAGE_TYPE contains FIL_PAGE_INDEX) - on the same PAGE_LEVEL are maintained - as a doubly linked list via - FIL_PAGE_PREV and FIL_PAGE_NEXT - in the collation order of the - smallest user record on each page. */ -#define FIL_PAGE_LSN 16 /* lsn of the end of the newest - modification log record to the page */ -#define FIL_PAGE_TYPE 24 /* file page type: FIL_PAGE_INDEX,..., - 2 bytes. - - The contents of this field can only - be trusted in the following case: - if the page is an uncompressed - B-tree index page, then it is - guaranteed that the value is - FIL_PAGE_INDEX. - The opposite does not hold. - - In tablespaces created by - MySQL/InnoDB 5.1.7 or later, the - contents of this field is valid - for all uncompressed pages. */ -#define FIL_PAGE_FILE_FLUSH_LSN 26 /* this is only defined for the - first page in a data file: the file - has been flushed to disk at least up - to this lsn */ -#define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID 34 /* starting from 4.1.x this - contains the space id of the page */ -#define FIL_PAGE_DATA 38 /* start of the data on the page */ - -/* File page trailer */ -#define FIL_PAGE_END_LSN_OLD_CHKSUM 8 /* the low 4 bytes of this are used - to store the page checksum, the - last 4 bytes should be identical - to the last 4 bytes of FIL_PAGE_LSN */ -#define FIL_PAGE_DATA_END 8 - -/* File page types (values of FIL_PAGE_TYPE) */ -#define FIL_PAGE_INDEX 17855 /* B-tree node */ -#define FIL_PAGE_UNDO_LOG 2 /* Undo log page */ -#define FIL_PAGE_INODE 3 /* Index node */ -#define FIL_PAGE_IBUF_FREE_LIST 4 /* Insert buffer free list */ -/* File page types introduced in MySQL/InnoDB 5.1.7 */ -#define FIL_PAGE_TYPE_ALLOCATED 0 /* Freshly allocated page */ -#define FIL_PAGE_IBUF_BITMAP 5 /* Insert buffer bitmap */ -#define FIL_PAGE_TYPE_SYS 6 /* System page */ -#define FIL_PAGE_TYPE_TRX_SYS 7 /* Transaction system data */ -#define FIL_PAGE_TYPE_FSP_HDR 8 /* File space header */ -#define FIL_PAGE_TYPE_XDES 9 /* Extent descriptor page */ -#define FIL_PAGE_TYPE_BLOB 10 /* Uncompressed BLOB page */ - -/* Space types */ -#define FIL_TABLESPACE 501 -#define FIL_LOG 502 - -extern ulint fil_n_log_flushes; - -extern ulint fil_n_pending_log_flushes; -extern ulint fil_n_pending_tablespace_flushes; - - -/*********************************************************************** -Returns the version number of a tablespace, -1 if not found. */ - -ib_longlong -fil_space_get_version( -/*==================*/ - /* out: version number, -1 if the tablespace does not - exist in the memory cache */ - ulint id); /* in: space id */ -/*********************************************************************** -Returns the latch of a file space. */ - -rw_lock_t* -fil_space_get_latch( -/*================*/ - /* out: latch protecting storage allocation */ - ulint id); /* in: space id */ -/*********************************************************************** -Returns the type of a file space. */ - -ulint -fil_space_get_type( -/*===============*/ - /* out: FIL_TABLESPACE or FIL_LOG */ - ulint id); /* in: space id */ -/*********************************************************************** -Returns the ibuf data of a file space. */ - -ibuf_data_t* -fil_space_get_ibuf_data( -/*====================*/ - /* out: ibuf data for this space */ - ulint id); /* in: space id */ -/*********************************************************************** -Appends a new file to the chain of files of a space. File must be closed. */ - -void -fil_node_create( -/*============*/ - const char* name, /* in: file name (file must be closed) */ - ulint size, /* in: file size in database blocks, rounded - downwards to an integer */ - ulint id, /* in: space id where to append */ - ibool is_raw);/* in: TRUE if a raw device or - a raw disk partition */ -/******************************************************************** -Drops files from the start of a file space, so that its size is cut by -the amount given. */ - -void -fil_space_truncate_start( -/*=====================*/ - ulint id, /* in: space id */ - ulint trunc_len); /* in: truncate by this much; it is an error - if this does not equal to the combined size of - some initial files in the space */ -/*********************************************************************** -Creates a space memory object and puts it to the 'fil system' hash table. If -there is an error, prints an error message to the .err log. */ - -ibool -fil_space_create( -/*=============*/ - /* out: TRUE if success */ - const char* name, /* in: space name */ - ulint id, /* in: space id */ - ulint purpose);/* in: FIL_TABLESPACE, or FIL_LOG if log */ -/*********************************************************************** -Frees a space object from a the tablespace memory cache. Closes the files in -the chain but does not delete them. */ - -ibool -fil_space_free( -/*===========*/ - /* out: TRUE if success */ - ulint id); /* in: space id */ -/*********************************************************************** -Returns the size of the space in pages. The tablespace must be cached in the -memory cache. */ - -ulint -fil_space_get_size( -/*===============*/ - /* out: space size, 0 if space not found */ - ulint id); /* in: space id */ -/*********************************************************************** -Checks if the pair space, page_no refers to an existing page in a tablespace -file space. The tablespace must be cached in the memory cache. */ - -ibool -fil_check_adress_in_tablespace( -/*===========================*/ - /* out: TRUE if the address is meaningful */ - ulint id, /* in: space id */ - ulint page_no);/* in: page number */ -/******************************************************************** -Initializes the tablespace memory cache. */ - -void -fil_init( -/*=====*/ - ulint max_n_open); /* in: max number of open files */ -/*********************************************************************** -Opens all log files and system tablespace data files. They stay open until the -database server shutdown. This should be called at a server startup after the -space objects for the log and the system tablespace have been created. The -purpose of this operation is to make sure we never run out of file descriptors -if we need to read from the insert buffer or to write to the log. */ - -void -fil_open_log_and_system_tablespace_files(void); -/*==========================================*/ -/*********************************************************************** -Closes all open files. There must not be any pending i/o's or not flushed -modifications in the files. */ - -void -fil_close_all_files(void); -/*=====================*/ -/*********************************************************************** -Sets the max tablespace id counter if the given number is bigger than the -previous value. */ - -void -fil_set_max_space_id_if_bigger( -/*===========================*/ - ulint max_id);/* in: maximum known id */ -/******************************************************************** -Initializes the ibuf data structure for space 0 == the system tablespace. -This can be called after the file space headers have been created and the -dictionary system has been initialized. */ - -void -fil_ibuf_init_at_db_start(void); -/*===========================*/ -/******************************************************************** -Writes the flushed lsn and the latest archived log number to the page -header of the first page of each data file in the system tablespace. */ - -ulint -fil_write_flushed_lsn_to_data_files( -/*================================*/ - /* out: DB_SUCCESS or error number */ - dulint lsn, /* in: lsn to write */ - ulint arch_log_no); /* in: latest archived log file number */ -/*********************************************************************** -Reads the flushed lsn and arch no fields from a data file at database -startup. */ - -void -fil_read_flushed_lsn_and_arch_log_no( -/*=================================*/ - os_file_t data_file, /* in: open data file */ - ibool one_read_already, /* in: TRUE if min and max parameters - below already contain sensible data */ -#ifdef UNIV_LOG_ARCHIVE - ulint* min_arch_log_no, /* in/out: */ - ulint* max_arch_log_no, /* in/out: */ -#endif /* UNIV_LOG_ARCHIVE */ - dulint* min_flushed_lsn, /* in/out: */ - dulint* max_flushed_lsn); /* in/out: */ -/*********************************************************************** -Increments the count of pending insert buffer page merges, if space is not -being deleted. */ - -ibool -fil_inc_pending_ibuf_merges( -/*========================*/ - /* out: TRUE if being deleted, and ibuf merges should - be skipped */ - ulint id); /* in: space id */ -/*********************************************************************** -Decrements the count of pending insert buffer page merges. */ - -void -fil_decr_pending_ibuf_merges( -/*=========================*/ - ulint id); /* in: space id */ -/*********************************************************************** -Parses the body of a log record written about an .ibd file operation. That is, -the log record part after the standard (type, space id, page no) header of the -log record. - -If desired, also replays the delete or rename operation if the .ibd file -exists and the space id in it matches. Replays the create operation if a file -at that path does not exist yet. If the database directory for the file to be -created does not exist, then we create the directory, too. - -Note that ibbackup --apply-log sets fil_path_to_mysql_datadir to point to the -datadir that we should use in replaying the file operations. */ - -byte* -fil_op_log_parse_or_replay( -/*=======================*/ - /* out: end of log record, or NULL if the - record was not completely contained between - ptr and end_ptr */ - byte* ptr, /* in: buffer containing the log record body, - or an initial segment of it, if the record does - not fir completely between ptr and end_ptr */ - byte* end_ptr, /* in: buffer end */ - ulint type, /* in: the type of this log record */ - ibool do_replay, /* in: TRUE if we want to replay the - operation, and not just parse the log record */ - ulint space_id); /* in: if do_replay is TRUE, the space id of - the tablespace in question; otherwise - ignored */ -/*********************************************************************** -Deletes a single-table tablespace. The tablespace must be cached in the -memory cache. */ - -ibool -fil_delete_tablespace( -/*==================*/ - /* out: TRUE if success */ - ulint id); /* in: space id */ -/*********************************************************************** -Discards a single-table tablespace. The tablespace must be cached in the -memory cache. Discarding is like deleting a tablespace, but -1) we do not drop the table from the data dictionary; -2) we remove all insert buffer entries for the tablespace immediately; in DROP -TABLE they are only removed gradually in the background; -3) when the user does IMPORT TABLESPACE, the tablespace will have the same id -as it originally had. */ - -ibool -fil_discard_tablespace( -/*===================*/ - /* out: TRUE if success */ - ulint id); /* in: space id */ -/*********************************************************************** -Renames a single-table tablespace. The tablespace must be cached in the -tablespace memory cache. */ - -ibool -fil_rename_tablespace( -/*==================*/ - /* out: TRUE if success */ - const char* old_name, /* in: old table name in the standard - databasename/tablename format of - InnoDB, or NULL if we do the rename - based on the space id only */ - ulint id, /* in: space id */ - const char* new_name); /* in: new table name in the standard - databasename/tablename format - of InnoDB */ - -/*********************************************************************** -Creates a new single-table tablespace to a database directory of MySQL. -Database directories are under the 'datadir' of MySQL. The datadir is the -directory of a running mysqld program. We can refer to it by simply the -path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp -dir of the mysqld server. */ - -ulint -fil_create_new_single_table_tablespace( -/*===================================*/ - /* out: DB_SUCCESS or error code */ - ulint* space_id, /* in/out: space id; if this is != 0, - then this is an input parameter, - otherwise output */ - const char* tablename, /* in: the table name in the usual - databasename/tablename format - of InnoDB, or a dir path to a temp - table */ - ibool is_temp, /* in: TRUE if a table created with - CREATE TEMPORARY TABLE */ - ulint size); /* in: the initial size of the - tablespace file in pages, - must be >= FIL_IBD_FILE_INITIAL_SIZE */ -/************************************************************************ -Tries to open a single-table tablespace and optionally checks the space id is -right in it. If does not succeed, prints an error message to the .err log. This -function is used to open a tablespace when we start up mysqld, and also in -IMPORT TABLESPACE. -NOTE that we assume this operation is used either at the database startup -or under the protection of the dictionary mutex, so that two users cannot -race here. This operation does not leave the file associated with the -tablespace open, but closes it after we have looked at the space id in it. */ - -ibool -fil_open_single_table_tablespace( -/*=============================*/ - /* out: TRUE if success */ - ibool check_space_id, /* in: should we check that the space - id in the file is right; we assume - that this function runs much faster - if no check is made, since accessing - the file inode probably is much - faster (the OS caches them) than - accessing the first page of the file */ - ulint id, /* in: space id */ - const char* name); /* in: table name in the - databasename/tablename format */ -/************************************************************************ -It is possible, though very improbable, that the lsn's in the tablespace to be -imported have risen above the current system lsn, if a lengthy purge, ibuf -merge, or rollback was performed on a backup taken with ibbackup. If that is -the case, reset page lsn's in the file. We assume that mysqld was shut down -after it performed these cleanup operations on the .ibd file, so that it at -the shutdown stamped the latest lsn to the FIL_PAGE_FILE_FLUSH_LSN in the -first page of the .ibd file, and we can determine whether we need to reset the -lsn's just by looking at that flush lsn. */ - -ibool -fil_reset_too_high_lsns( -/*====================*/ - /* out: TRUE if success */ - const char* name, /* in: table name in the - databasename/tablename format */ - dulint current_lsn); /* in: reset lsn's if the lsn stamped - to FIL_PAGE_FILE_FLUSH_LSN in the - first page is too high */ -/************************************************************************ -At the server startup, if we need crash recovery, scans the database -directories under the MySQL datadir, looking for .ibd files. Those files are -single-table tablespaces. We need to know the space id in each of them so that -we know into which file we should look to check the contents of a page stored -in the doublewrite buffer, also to know where to apply log records where the -space id is != 0. */ - -ulint -fil_load_single_table_tablespaces(void); -/*===================================*/ - /* out: DB_SUCCESS or error number */ -/************************************************************************ -If we need crash recovery, and we have called -fil_load_single_table_tablespaces() and dict_load_single_table_tablespaces(), -we can call this function to print an error message of orphaned .ibd files -for which there is not a data dictionary entry with a matching table name -and space id. */ - -void -fil_print_orphaned_tablespaces(void); -/*================================*/ -/*********************************************************************** -Returns TRUE if a single-table tablespace does not exist in the memory cache, -or is being deleted there. */ - -ibool -fil_tablespace_deleted_or_being_deleted_in_mem( -/*===========================================*/ - /* out: TRUE if does not exist or is being\ - deleted */ - ulint id, /* in: space id */ - ib_longlong version);/* in: tablespace_version should be this; if - you pass -1 as the value of this, then this - parameter is ignored */ -/*********************************************************************** -Returns TRUE if a single-table tablespace exists in the memory cache. */ - -ibool -fil_tablespace_exists_in_mem( -/*=========================*/ - /* out: TRUE if exists */ - ulint id); /* in: space id */ -/*********************************************************************** -Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory -cache. Note that if we have not done a crash recovery at the database startup, -there may be many tablespaces which are not yet in the memory cache. */ - -ibool -fil_space_for_table_exists_in_mem( -/*==============================*/ - /* out: TRUE if a matching tablespace - exists in the memory cache */ - ulint id, /* in: space id */ - const char* name, /* in: table name in the standard - 'databasename/tablename' format or - the dir path to a temp table */ - ibool is_temp, /* in: TRUE if created with CREATE - TEMPORARY TABLE */ - ibool mark_space, /* in: in crash recovery, at database - startup we mark all spaces which have - an associated table in the InnoDB - data dictionary, so that - we can print a warning about orphaned - tablespaces */ - ibool print_error_if_does_not_exist); - /* in: print detailed error - information to the .err log if a - matching tablespace is not found from - memory */ -/************************************************************************** -Tries to extend a data file so that it would accommodate the number of pages -given. The tablespace must be cached in the memory cache. If the space is big -enough already, does nothing. */ - -ibool -fil_extend_space_to_desired_size( -/*=============================*/ - /* out: TRUE if success */ - ulint* actual_size, /* out: size of the space after extension; - if we ran out of disk space this may be lower - than the desired size */ - ulint space_id, /* in: space id */ - ulint size_after_extend);/* in: desired size in pages after the - extension; if the current space size is bigger - than this already, the function does nothing */ -#ifdef UNIV_HOTBACKUP -/************************************************************************ -Extends all tablespaces to the size stored in the space header. During the -ibbackup --apply-log phase we extended the spaces on-demand so that log records -could be appllied, but that may have left spaces still too small compared to -the size stored in the space header. */ - -void -fil_extend_tablespaces_to_stored_len(void); -/*======================================*/ -#endif -/*********************************************************************** -Tries to reserve free extents in a file space. */ - -ibool -fil_space_reserve_free_extents( -/*===========================*/ - /* out: TRUE if succeed */ - ulint id, /* in: space id */ - ulint n_free_now, /* in: number of free extents now */ - ulint n_to_reserve); /* in: how many one wants to reserve */ -/*********************************************************************** -Releases free extents in a file space. */ - -void -fil_space_release_free_extents( -/*===========================*/ - ulint id, /* in: space id */ - ulint n_reserved); /* in: how many one reserved */ -/*********************************************************************** -Gets the number of reserved extents. If the database is silent, this number -should be zero. */ - -ulint -fil_space_get_n_reserved_extents( -/*=============================*/ - ulint id); /* in: space id */ -/************************************************************************ -Reads or writes data. This operation is asynchronous (aio). */ - -ulint -fil_io( -/*===*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE, - ORed to OS_FILE_LOG, if a log i/o - and ORed to OS_AIO_SIMULATED_WAKE_LATER - if simulated aio and we want to post a - batch of i/os; NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in - aio this must be divisible by the OS block - size */ - ulint len, /* in: how many bytes to read or write; this - must not cross a file boundary; in aio this - must be a block size multiple */ - void* buf, /* in/out: buffer where to store read data - or from where to write; in aio this must be - appropriately aligned */ - void* message); /* in: message for aio handler if non-sync - aio used, else ignored */ -/************************************************************************ -Reads data from a space to a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. */ - -ulint -fil_read( -/*=====*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /* in: how many bytes to read; this must not - cross a file boundary; in aio this must be a - block size multiple */ - void* buf, /* in/out: buffer where to store data read; - in aio this must be appropriately aligned */ - void* message); /* in: message for aio handler if non-sync - aio used, else ignored */ -/************************************************************************ -Writes data to a space from a buffer. Remember that the possible incomplete -blocks at the end of file are ignored: they are not taken into account when -calculating the byte offset within a space. */ - -ulint -fil_write( -/*======*/ - /* out: DB_SUCCESS, or DB_TABLESPACE_DELETED - if we are trying to do i/o on a tablespace - which does not exist */ - ibool sync, /* in: TRUE if synchronous aio is desired */ - ulint space_id, /* in: space id */ - ulint block_offset, /* in: offset in number of blocks */ - ulint byte_offset, /* in: remainder of offset in bytes; in aio - this must be divisible by the OS block size */ - ulint len, /* in: how many bytes to write; this must - not cross a file boundary; in aio this must - be a block size multiple */ - void* buf, /* in: buffer from which to write; in aio - this must be appropriately aligned */ - void* message); /* in: message for aio handler if non-sync - aio used, else ignored */ -/************************************************************************** -Waits for an aio operation to complete. This function is used to write the -handler for completed requests. The aio array of pending requests is divided -into segments (see os0file.c for more info). The thread specifies which -segment it wants to wait for. */ - -void -fil_aio_wait( -/*=========*/ - ulint segment); /* in: the number of the segment in the aio - array to wait for */ -/************************************************************************** -Flushes to disk possible writes cached by the OS. If the space does not exist -or is being dropped, does not do anything. */ - -void -fil_flush( -/*======*/ - ulint space_id); /* in: file space id (this can be a group of - log files or a tablespace of the database) */ -/************************************************************************** -Flushes to disk writes in file spaces of the given type possibly cached by -the OS. */ - -void -fil_flush_file_spaces( -/*==================*/ - ulint purpose); /* in: FIL_TABLESPACE, FIL_LOG */ -/********************************************************************** -Checks the consistency of the tablespace cache. */ - -ibool -fil_validate(void); -/*==============*/ - /* out: TRUE if ok */ -/************************************************************************ -Returns TRUE if file address is undefined. */ - -ibool -fil_addr_is_null( -/*=============*/ - /* out: TRUE if undefined */ - fil_addr_t addr); /* in: address */ -/************************************************************************ -Accessor functions for a file page */ - -ulint -fil_page_get_prev(byte* page); -ulint -fil_page_get_next(byte* page); -/************************************************************************* -Sets the file page type. */ - -void -fil_page_set_type( -/*==============*/ - byte* page, /* in: file page */ - ulint type); /* in: type */ -/************************************************************************* -Gets the file page type. */ - -ulint -fil_page_get_type( -/*==============*/ - /* out: type; NOTE that if the type has not been - written to page, the return value not defined */ - byte* page); /* in: file page */ - - -typedef struct fil_space_struct fil_space_t; - -#endif diff --git a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h deleted file mode 100644 index 82e95a2e920..00000000000 --- a/storage/innobase/include/fsp0fsp.h +++ /dev/null @@ -1,391 +0,0 @@ -/****************************************************** -File space management - -(c) 1995 Innobase Oy - -Created 12/18/1995 Heikki Tuuri -*******************************************************/ - -#ifndef fsp0fsp_h -#define fsp0fsp_h - -#include "univ.i" - -#include "mtr0mtr.h" -#include "fut0lst.h" -#include "ut0byte.h" -#include "page0types.h" - -/* If records are inserted in order, there are the following -flags to tell this (their type is made byte for the compiler -to warn if direction and hint parameters are switched in -fseg_alloc_free_page): */ -#define FSP_UP ((byte)111) /* alphabetically upwards */ -#define FSP_DOWN ((byte)112) /* alphabetically downwards */ -#define FSP_NO_DIR ((byte)113) /* no order */ - -/* File space extent size in pages */ -#define FSP_EXTENT_SIZE 64 - -/* On a page of any file segment, data may be put starting from this offset: */ -#define FSEG_PAGE_DATA FIL_PAGE_DATA - -/* File segment header which points to the inode describing the file segment */ -typedef byte fseg_header_t; - -#define FSEG_HDR_SPACE 0 /* space id of the inode */ -#define FSEG_HDR_PAGE_NO 4 /* page number of the inode */ -#define FSEG_HDR_OFFSET 8 /* byte offset of the inode */ - -#define FSEG_HEADER_SIZE 10 - -/************************************************************************** -Initializes the file space system. */ - -void -fsp_init(void); -/*==========*/ -/************************************************************************** -Gets the current free limit of a tablespace. The free limit means the -place of the first page which has never been put to the the free list -for allocation. The space above that address is initialized to zero. -Sets also the global variable log_fsp_current_free_limit. */ - -ulint -fsp_header_get_free_limit( -/*======================*/ - /* out: free limit in megabytes */ - ulint space); /* in: space id, must be 0 */ -/************************************************************************** -Gets the size of the tablespace from the tablespace header. If we do not -have an auto-extending data file, this should be equal to the size of the -data files. If there is an auto-extending data file, this can be smaller. */ - -ulint -fsp_header_get_tablespace_size( -/*===========================*/ - /* out: size in pages */ - ulint space); /* in: space id, must be 0 */ -/************************************************************************** -Reads the file space size stored in the header page. */ - -ulint -fsp_get_size_low( -/*=============*/ - /* out: tablespace size stored in the space header */ - page_t* page); /* in: header page (page 0 in the tablespace) */ -/************************************************************************** -Reads the space id from the first page of a tablespace. */ - -ulint -fsp_header_get_space_id( -/*====================*/ - /* out: space id, ULINT UNDEFINED if error */ - page_t* page); /* in: first page of a tablespace */ -/************************************************************************** -Writes the space id to a tablespace header. This function is used past the -buffer pool when we in fil0fil.c create a new single-table tablespace. */ - -void -fsp_header_write_space_id( -/*======================*/ - page_t* page, /* in: first page in the space */ - ulint space_id); /* in: space id */ -/************************************************************************** -Initializes the space header of a new created space and creates also the -insert buffer tree root if space == 0. */ - -void -fsp_header_init( -/*============*/ - ulint space, /* in: space id */ - ulint size, /* in: current size in blocks */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************** -Increases the space size field of a space. */ - -void -fsp_header_inc_size( -/*================*/ - ulint space, /* in: space id */ - ulint size_inc,/* in: size increment in pages */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************** -Creates a new segment. */ - -page_t* -fseg_create( -/*========*/ - /* out: the page where the segment header is placed, - x-latched, NULL if could not create segment - because of lack of space */ - ulint space, /* in: space id */ - ulint page, /* in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /* in: byte offset of the created segment header - on the page */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Creates a new segment. */ - -page_t* -fseg_create_general( -/*================*/ - /* out: the page where the segment header is placed, - x-latched, NULL if could not create segment - because of lack of space */ - ulint space, /* in: space id */ - ulint page, /* in: page where the segment header is placed: if - this is != 0, the page must belong to another segment, - if this is 0, a new page will be allocated and it - will belong to the created segment */ - ulint byte_offset, /* in: byte offset of the created segment header - on the page */ - ibool has_done_reservation, /* in: TRUE if the caller has already - done the reservation for the pages with - fsp_reserve_free_extents (at least 2 extents: one for - the inode and the other for the segment) then there is - no need to do the check for this individual - operation */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Calculates the number of pages reserved by a segment, and how many pages are -currently used. */ - -ulint -fseg_n_reserved_pages( -/*==================*/ - /* out: number of reserved pages */ - fseg_header_t* header, /* in: segment header */ - ulint* used, /* out: number of pages used (<= reserved) */ - mtr_t* mtr); /* in: mtr handle */ -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize -file space fragmentation. */ - -ulint -fseg_alloc_free_page( -/*=================*/ - /* out: the allocated page offset - FIL_NULL if no page could be allocated */ - fseg_header_t* seg_header, /* in: segment header */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction, /* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - mtr_t* mtr); /* in: mtr handle */ -/************************************************************************** -Allocates a single free page from a segment. This function implements -the intelligent allocation strategy which tries to minimize file space -fragmentation. */ - -ulint -fseg_alloc_free_page_general( -/*=========================*/ - /* out: allocated page offset, FIL_NULL if no - page could be allocated */ - fseg_header_t* seg_header,/* in: segment header */ - ulint hint, /* in: hint of which page would be desirable */ - byte direction,/* in: if the new page is needed because - of an index page split, and records are - inserted there in order, into which - direction they go alphabetically: FSP_DOWN, - FSP_UP, FSP_NO_DIR */ - ibool has_done_reservation, /* in: TRUE if the caller has - already done the reservation for the page - with fsp_reserve_free_extents, then there - is no need to do the check for this individual - page */ - mtr_t* mtr); /* in: mtr handle */ -/************************************************************************** -Reserves free pages from a tablespace. All mini-transactions which may -use several pages from the tablespace should call this function beforehand -and reserve enough free extents so that they certainly will be able -to do their operation, like a B-tree page split, fully. Reservations -must be released with function fil_space_release_free_extents! - -The alloc_type below has the following meaning: FSP_NORMAL means an -operation which will probably result in more space usage, like an -insert in a B-tree; FSP_UNDO means allocation to undo logs: if we are -deleting rows, then this allocation will in the long run result in -less space usage (after a purge); FSP_CLEANING means allocation done -in a physical record delete (like in a purge) or other cleaning operation -which will result in less space usage in the long run. We prefer the latter -two types of allocation: when space is scarce, FSP_NORMAL allocations -will not succeed, but the latter two allocations will succeed, if possible. -The purpose is to avoid dead end where the database is full but the -user cannot free any space because these freeing operations temporarily -reserve some space. - -Single-table tablespaces whose size is < 32 pages are a special case. In this -function we would liberally reserve several 64 page extents for every page -split or merge in a B-tree. But we do not want to waste disk space if the table -only occupies < 32 pages. That is why we apply different rules in that special -case, just ensuring that there are 3 free pages available. */ - -ibool -fsp_reserve_free_extents( -/*=====================*/ - /* out: TRUE if we were able to make the reservation */ - ulint* n_reserved,/* out: number of extents actually reserved; if we - return TRUE and the tablespace size is < 64 pages, - then this can be 0, otherwise it is n_ext */ - ulint space, /* in: space id */ - ulint n_ext, /* in: number of extents to reserve */ - ulint alloc_type,/* in: FSP_NORMAL, FSP_UNDO, or FSP_CLEANING */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -This function should be used to get information on how much we still -will be able to insert new data to the database without running out the -tablespace. Only free extents are taken into account and we also subtract -the safety margin required by the above function fsp_reserve_free_extents. */ - -ullint -fsp_get_available_space_in_free_extents( -/*====================================*/ - /* out: available space in kB */ - ulint space); /* in: space id */ -/************************************************************************** -Frees a single page of a segment. */ - -void -fseg_free_page( -/*===========*/ - fseg_header_t* seg_header, /* in: segment header */ - ulint space, /* in: space id */ - ulint page, /* in: page offset */ - mtr_t* mtr); /* in: mtr handle */ -/*********************************************************************** -Frees a segment. The freeing is performed in several mini-transactions, -so that there is no danger of bufferfixing too many buffer pages. */ - -void -fseg_free( -/*======*/ - ulint space, /* in: space id */ - ulint page_no,/* in: page number where the segment header is - placed */ - ulint offset);/* in: byte offset of the segment header on that - page */ -/************************************************************************** -Frees part of a segment. This function can be used to free a segment -by repeatedly calling this function in different mini-transactions. -Doing the freeing in a single mini-transaction might result in -too big a mini-transaction. */ - -ibool -fseg_free_step( -/*===========*/ - /* out: TRUE if freeing completed */ - fseg_header_t* header, /* in, own: segment header; NOTE: if the header - resides on the first page of the frag list - of the segment, this pointer becomes obsolete - after the last freeing step */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Frees part of a segment. Differs from fseg_free_step because this function -leaves the header page unfreed. */ - -ibool -fseg_free_step_not_header( -/*======================*/ - /* out: TRUE if freeing completed, except the - header page */ - fseg_header_t* header, /* in: segment header which must reside on - the first fragment page of the segment */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************************** -Checks if a page address is an extent descriptor page address. */ -UNIV_INLINE -ibool -fsp_descr_page( -/*===========*/ - /* out: TRUE if a descriptor page */ - ulint page_no);/* in: page number */ -/*************************************************************** -Parses a redo log record of a file page init. */ - -byte* -fsp_parse_init_file_page( -/*=====================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ -/*********************************************************************** -Validates the file space system and its segments. */ - -ibool -fsp_validate( -/*=========*/ - /* out: TRUE if ok */ - ulint space); /* in: space id */ -/*********************************************************************** -Prints info of a file space. */ - -void -fsp_print( -/*======*/ - ulint space); /* in: space id */ -/*********************************************************************** -Validates a segment. */ - -ibool -fseg_validate( -/*==========*/ - /* out: TRUE if ok */ - fseg_header_t* header, /* in: segment header */ - mtr_t* mtr2); /* in: mtr */ -/*********************************************************************** -Writes info of a segment. */ - -void -fseg_print( -/*=======*/ - fseg_header_t* header, /* in: segment header */ - mtr_t* mtr); /* in: mtr */ - -/* Flags for fsp_reserve_free_extents */ -#define FSP_NORMAL 1000000 -#define FSP_UNDO 2000000 -#define FSP_CLEANING 3000000 - -/* Number of pages described in a single descriptor page: currently each page -description takes less than 1 byte; a descriptor page is repeated every -this many file pages */ -#define XDES_DESCRIBED_PER_PAGE UNIV_PAGE_SIZE - -/* The space low address page map */ -/*--------------------------------------*/ - /* The following two pages are repeated - every XDES_DESCRIBED_PER_PAGE pages in - every tablespace. */ -#define FSP_XDES_OFFSET 0 /* extent descriptor */ -#define FSP_IBUF_BITMAP_OFFSET 1 /* insert buffer bitmap */ - /* The ibuf bitmap pages are the ones whose - page number is the number above plus a - multiple of XDES_DESCRIBED_PER_PAGE */ - -#define FSP_FIRST_INODE_PAGE_NO 2 /* in every tablespace */ - /* The following pages exist - in the system tablespace (space 0). */ -#define FSP_IBUF_HEADER_PAGE_NO 3 /* in tablespace 0 */ -#define FSP_IBUF_TREE_ROOT_PAGE_NO 4 /* in tablespace 0 */ - /* The ibuf tree root page number in - tablespace 0; its fseg inode is on the page - number FSP_FIRST_INODE_PAGE_NO */ -#define FSP_TRX_SYS_PAGE_NO 5 /* in tablespace 0 */ -#define FSP_FIRST_RSEG_PAGE_NO 6 /* in tablespace 0 */ -#define FSP_DICT_HDR_PAGE_NO 7 /* in tablespace 0 */ -/*--------------------------------------*/ - -#ifndef UNIV_NONINL -#include "fsp0fsp.ic" -#endif - -#endif diff --git a/storage/innobase/include/fsp0fsp.ic b/storage/innobase/include/fsp0fsp.ic deleted file mode 100644 index 89cd9263bd6..00000000000 --- a/storage/innobase/include/fsp0fsp.ic +++ /dev/null @@ -1,24 +0,0 @@ -/****************************************************** -File space management - -(c) 1995 Innobase Oy - -Created 12/18/1995 Heikki Tuuri -*******************************************************/ - -/*************************************************************************** -Checks if a page address is an extent descriptor page address. */ -UNIV_INLINE -ibool -fsp_descr_page( -/*===========*/ - /* out: TRUE if a descriptor page */ - ulint page_no)/* in: page number */ -{ - if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_XDES_OFFSET) { - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/innobase/include/fut0fut.h b/storage/innobase/include/fut0fut.h deleted file mode 100644 index b9546b4e1a0..00000000000 --- a/storage/innobase/include/fut0fut.h +++ /dev/null @@ -1,36 +0,0 @@ -/********************************************************************** -File-based utilities - -(c) 1995 Innobase Oy - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - - -#ifndef fut0fut_h -#define fut0fut_h - -#include "univ.i" - -#include "fil0fil.h" -#include "mtr0mtr.h" - -/************************************************************************ -Gets a pointer to a file address and latches the page. */ -UNIV_INLINE -byte* -fut_get_ptr( -/*========*/ - /* out: pointer to a byte in a frame; the file - page in the frame is bufferfixed and latched */ - ulint space, /* in: space id */ - fil_addr_t addr, /* in: file address */ - ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */ - mtr_t* mtr); /* in: mtr handle */ - -#ifndef UNIV_NONINL -#include "fut0fut.ic" -#endif - -#endif - diff --git a/storage/innobase/include/fut0fut.ic b/storage/innobase/include/fut0fut.ic deleted file mode 100644 index 6a107786376..00000000000 --- a/storage/innobase/include/fut0fut.ic +++ /dev/null @@ -1,38 +0,0 @@ -/********************************************************************** -File-based utilities - -(c) 1995 Innobase Oy - -Created 12/13/1995 Heikki Tuuri -***********************************************************************/ - -#include "sync0rw.h" -#include "buf0buf.h" - -/************************************************************************ -Gets a pointer to a file address and latches the page. */ -UNIV_INLINE -byte* -fut_get_ptr( -/*========*/ - /* out: pointer to a byte in a frame; the file - page in the frame is bufferfixed and latched */ - ulint space, /* in: space id */ - fil_addr_t addr, /* in: file address */ - ulint rw_latch, /* in: RW_S_LATCH, RW_X_LATCH */ - mtr_t* mtr) /* in: mtr handle */ -{ - byte* ptr; - - ut_ad(mtr); - ut_ad(addr.boffset < UNIV_PAGE_SIZE); - ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH)); - - ptr = buf_page_get(space, addr.page, rw_latch, mtr) + addr.boffset; - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(ptr, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - return(ptr); -} diff --git a/storage/innobase/include/fut0lst.h b/storage/innobase/include/fut0lst.h deleted file mode 100644 index 5427e2248da..00000000000 --- a/storage/innobase/include/fut0lst.h +++ /dev/null @@ -1,198 +0,0 @@ -/********************************************************************** -File-based list utilities - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef fut0lst_h -#define fut0lst_h - -#include "univ.i" - -#include "fil0fil.h" -#include "mtr0mtr.h" - - -/* The C 'types' of base node and list node: these should be used to -write self-documenting code. Of course, the sizeof macro cannot be -applied to these types! */ - -typedef byte flst_base_node_t; -typedef byte flst_node_t; - -/* The physical size of a list base node in bytes */ -#define FLST_BASE_NODE_SIZE (4 + 2 * FIL_ADDR_SIZE) - -/* The physical size of a list node in bytes */ -#define FLST_NODE_SIZE (2 * FIL_ADDR_SIZE) - - -/************************************************************************ -Initializes a list base node. */ -UNIV_INLINE -void -flst_init( -/*======*/ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Adds a node as the last node in a list. */ - -void -flst_add_last( -/*==========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node, /* in: node to add */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Adds a node as the first node in a list. */ - -void -flst_add_first( -/*===========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node, /* in: node to add */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Inserts a node after another in a list. */ - -void -flst_insert_after( -/*==============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node1, /* in: node to insert after */ - flst_node_t* node2, /* in: node to add */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Inserts a node before another in a list. */ - -void -flst_insert_before( -/*===============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: node to insert */ - flst_node_t* node3, /* in: node to insert before */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Removes a node. */ - -void -flst_remove( -/*========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: node to remove */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Cuts off the tail of the list, including the node given. The number of -nodes which will be removed must be provided by the caller, as this function -does not measure the length of the tail. */ - -void -flst_cut_end( -/*=========*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: first node to remove */ - ulint n_nodes,/* in: number of nodes to remove, - must be >= 1 */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Cuts off the tail of the list, not including the given node. The number of -nodes which will be removed must be provided by the caller, as this function -does not measure the length of the tail. */ - -void -flst_truncate_end( -/*==============*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - flst_node_t* node2, /* in: first node not to remove */ - ulint n_nodes,/* in: number of nodes to remove */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Gets list length. */ -UNIV_INLINE -ulint -flst_get_len( -/*=========*/ - /* out: length */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Gets list first node address. */ -UNIV_INLINE -fil_addr_t -flst_get_first( -/*===========*/ - /* out: file address */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Gets list last node address. */ -UNIV_INLINE -fil_addr_t -flst_get_last( -/*==========*/ - /* out: file address */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Gets list next node address. */ -UNIV_INLINE -fil_addr_t -flst_get_next_addr( -/*===============*/ - /* out: file address */ - flst_node_t* node, /* in: pointer to node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Gets list prev node address. */ -UNIV_INLINE -fil_addr_t -flst_get_prev_addr( -/*===============*/ - /* out: file address */ - flst_node_t* node, /* in: pointer to node */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Writes a file address. */ -UNIV_INLINE -void -flst_write_addr( -/*============*/ - fil_faddr_t* faddr, /* in: pointer to file faddress */ - fil_addr_t addr, /* in: file address */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Reads a file address. */ -UNIV_INLINE -fil_addr_t -flst_read_addr( -/*===========*/ - /* out: file address */ - fil_faddr_t* faddr, /* in: pointer to file faddress */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************ -Validates a file-based list. */ - -ibool -flst_validate( -/*==========*/ - /* out: TRUE if ok */ - flst_base_node_t* base, /* in: pointer to base node of list */ - mtr_t* mtr1); /* in: mtr */ -/************************************************************************ -Prints info of a file-based list. */ - -void -flst_print( -/*=======*/ - flst_base_node_t* base, /* in: pointer to base node of list */ - mtr_t* mtr); /* in: mtr */ - - -#ifndef UNIV_NONINL -#include "fut0lst.ic" -#endif - -#endif diff --git a/storage/innobase/include/fut0lst.ic b/storage/innobase/include/fut0lst.ic deleted file mode 100644 index 6c7e863b078..00000000000 --- a/storage/innobase/include/fut0lst.ic +++ /dev/null @@ -1,147 +0,0 @@ -/********************************************************************** -File-based list utilities - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "fut0fut.h" -#include "mtr0log.h" -#include "buf0buf.h" - -/* We define the field offsets of a node for the list */ -#define FLST_PREV 0 /* 6-byte address of the previous list element; - the page part of address is FIL_NULL, if no - previous element */ -#define FLST_NEXT FIL_ADDR_SIZE /* 6-byte address of the next - list element; the page part of address - is FIL_NULL, if no next element */ - -/* We define the field offsets of a base node for the list */ -#define FLST_LEN 0 /* 32-bit list length field */ -#define FLST_FIRST 4 /* 6-byte address of the first element - of the list; undefined if empty list */ -#define FLST_LAST (4 + FIL_ADDR_SIZE) /* 6-byte address of the - last element of the list; undefined - if empty list */ - -/************************************************************************ -Writes a file address. */ -UNIV_INLINE -void -flst_write_addr( -/*============*/ - fil_faddr_t* faddr, /* in: pointer to file faddress */ - fil_addr_t addr, /* in: file address */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ut_ad(faddr && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(faddr), - MTR_MEMO_PAGE_X_FIX)); - - mlog_write_ulint(faddr + FIL_ADDR_PAGE, addr.page, MLOG_4BYTES, mtr); - mlog_write_ulint(faddr + FIL_ADDR_BYTE, addr.boffset, - MLOG_2BYTES, mtr); -} - -/************************************************************************ -Reads a file address. */ -UNIV_INLINE -fil_addr_t -flst_read_addr( -/*===========*/ - /* out: file address */ - fil_faddr_t* faddr, /* in: pointer to file faddress */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - fil_addr_t addr; - - ut_ad(faddr && mtr); - - addr.page = mtr_read_ulint(faddr + FIL_ADDR_PAGE, MLOG_4BYTES, mtr); - addr.boffset = mtr_read_ulint(faddr + FIL_ADDR_BYTE, MLOG_2BYTES, - mtr); - return(addr); -} - -/************************************************************************ -Initializes a list base node. */ -UNIV_INLINE -void -flst_init( -/*======*/ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ut_ad(mtr_memo_contains(mtr, buf_block_align(base), - MTR_MEMO_PAGE_X_FIX)); - mlog_write_ulint(base + FLST_LEN, 0, MLOG_4BYTES, mtr); - flst_write_addr(base + FLST_FIRST, fil_addr_null, mtr); - flst_write_addr(base + FLST_LAST, fil_addr_null, mtr); -} - -/************************************************************************ -Gets list length. */ -UNIV_INLINE -ulint -flst_get_len( -/*=========*/ - /* out: length */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(mtr_read_ulint(base + FLST_LEN, MLOG_4BYTES, mtr)); -} - -/************************************************************************ -Gets list first node address. */ -UNIV_INLINE -fil_addr_t -flst_get_first( -/*===========*/ - /* out: file address */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(flst_read_addr(base + FLST_FIRST, mtr)); -} - -/************************************************************************ -Gets list last node address. */ -UNIV_INLINE -fil_addr_t -flst_get_last( -/*==========*/ - /* out: file address */ - flst_base_node_t* base, /* in: pointer to base node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(flst_read_addr(base + FLST_LAST, mtr)); -} - -/************************************************************************ -Gets list next node address. */ -UNIV_INLINE -fil_addr_t -flst_get_next_addr( -/*===============*/ - /* out: file address */ - flst_node_t* node, /* in: pointer to node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(flst_read_addr(node + FLST_NEXT, mtr)); -} - -/************************************************************************ -Gets list prev node address. */ -UNIV_INLINE -fil_addr_t -flst_get_prev_addr( -/*===============*/ - /* out: file address */ - flst_node_t* node, /* in: pointer to node */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(flst_read_addr(node + FLST_PREV, mtr)); -} diff --git a/storage/innobase/include/ha0ha.h b/storage/innobase/include/ha0ha.h deleted file mode 100644 index beaa06ae755..00000000000 --- a/storage/innobase/include/ha0ha.h +++ /dev/null @@ -1,140 +0,0 @@ -/****************************************************** -The hash table with external chains - -(c) 1994-1997 Innobase Oy - -Created 8/18/1994 Heikki Tuuri -*******************************************************/ - -#ifndef ha0ha_h -#define ha0ha_h - -#include "univ.i" - -#include "hash0hash.h" -#include "page0types.h" - -/***************************************************************** -Looks for an element in a hash table. */ -UNIV_INLINE -void* -ha_search_and_get_data( -/*===================*/ - /* out: pointer to the data of the first hash - table node in chain having the fold number, - NULL if not found */ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: folded value of the searched data */ -/************************************************************* -Looks for an element when we know the pointer to the data and updates -the pointer to data if found. */ - -void -ha_search_and_update_if_found( -/*==========================*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of the searched data */ - void* data, /* in: pointer to the data */ - void* new_data);/* in: new pointer to the data */ -/***************************************************************** -Creates a hash table with >= n array cells. The actual number of cells is -chosen to be a prime number slightly bigger than n. */ - -hash_table_t* -ha_create_func( -/*===========*/ - /* out, own: created table */ - ibool in_btr_search, /* in: TRUE if the hash table is used in - the btr_search module */ - ulint n, /* in: number of array cells */ -#ifdef UNIV_SYNC_DEBUG - ulint mutex_level, /* in: level of the mutexes in the latching - order: this is used in the debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes); /* in: number of mutexes to protect the - hash table: must be a power of 2 */ -#ifdef UNIV_SYNC_DEBUG -# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,level,n_m) -#else /* UNIV_SYNC_DEBUG */ -# define ha_create(b,n_c,n_m,level) ha_create_func(b,n_c,n_m) -#endif /* UNIV_SYNC_DEBUG */ -/***************************************************************** -Inserts an entry into a hash table. If an entry with the same fold number -is found, its node is updated to point to the new data, and no new node -is inserted. */ - -ibool -ha_insert_for_fold( -/*===============*/ - /* out: TRUE if succeed, FALSE if no more - memory could be allocated */ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of data; if a node with - the same fold value already exists, it is - updated to point to the same data, and no new - node is created! */ - void* data); /* in: data, must not be NULL */ -/***************************************************************** -Deletes an entry from a hash table. */ - -void -ha_delete( -/*======*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of data */ - void* data); /* in: data, must not be NULL and must exist - in the hash table */ -/************************************************************* -Looks for an element when we know the pointer to the data and deletes -it from the hash table if found. */ -UNIV_INLINE -ibool -ha_search_and_delete_if_found( -/*==========================*/ - /* out: TRUE if found */ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of the searched data */ - void* data); /* in: pointer to the data */ -/********************************************************************* -Removes from the chain determined by fold all nodes whose data pointer -points to the page given. */ - -void -ha_remove_all_nodes_to_page( -/*========================*/ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: fold value */ - page_t* page); /* in: buffer page */ -/***************************************************************** -Validates a given range of the cells in hash table. */ - -ibool -ha_validate( -/*========*/ - /* out: TRUE if ok */ - hash_table_t* table, /* in: hash table */ - ulint start_index, /* in: start index */ - ulint end_index); /* in: end index */ -/***************************************************************** -Prints info of a hash table. */ - -void -ha_print_info( -/*==========*/ - FILE* file, /* in: file where to print */ - hash_table_t* table); /* in: hash table */ - -/* The hash table external chain node */ - -typedef struct ha_node_struct ha_node_t; -struct ha_node_struct { - ha_node_t* next; /* next chain node or NULL if none */ - void* data; /* pointer to the data */ - ulint fold; /* fold value for the data */ -}; - -#ifndef UNIV_NONINL -#include "ha0ha.ic" -#endif - -#endif diff --git a/storage/innobase/include/ha0ha.ic b/storage/innobase/include/ha0ha.ic deleted file mode 100644 index fb264377f28..00000000000 --- a/storage/innobase/include/ha0ha.ic +++ /dev/null @@ -1,185 +0,0 @@ -/************************************************************************ -The hash table with external chains - -(c) 1994-1997 Innobase Oy - -Created 8/18/1994 Heikki Tuuri -*************************************************************************/ - -#include "ut0rnd.h" -#include "mem0mem.h" - -/*************************************************************** -Deletes a hash node. */ - -void -ha_delete_hash_node( -/*================*/ - hash_table_t* table, /* in: hash table */ - ha_node_t* del_node); /* in: node to be deleted */ - -/********************************************************************** -Gets a hash node data. */ -UNIV_INLINE -void* -ha_node_get_data( -/*=============*/ - /* out: pointer to the data */ - ha_node_t* node) /* in: hash chain node */ -{ - return(node->data); -} - -/********************************************************************** -Sets hash node data. */ -UNIV_INLINE -void -ha_node_set_data( -/*=============*/ - ha_node_t* node, /* in: hash chain node */ - void* data) /* in: pointer to the data */ -{ - node->data = data; -} - -/********************************************************************** -Gets the next node in a hash chain. */ -UNIV_INLINE -ha_node_t* -ha_chain_get_next( -/*==============*/ - /* out: next node, NULL if none */ - ha_node_t* node) /* in: hash chain node */ -{ - return(node->next); -} - -/********************************************************************** -Gets the first node in a hash chain. */ -UNIV_INLINE -ha_node_t* -ha_chain_get_first( -/*===============*/ - /* out: first node, NULL if none */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold value determining the chain */ -{ - return(hash_get_nth_cell(table, hash_calc_hash(fold, table))->node); -} - -/***************************************************************** -Looks for an element in a hash table. */ -UNIV_INLINE -ha_node_t* -ha_search( -/*======*/ - /* out: pointer to the first hash table node - in chain having the fold number, NULL if not - found */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: folded value of the searched data */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (node->fold == fold) { - - return(node); - } - - node = ha_chain_get_next(node); - } - - return(NULL); -} - -/***************************************************************** -Looks for an element in a hash table. */ -UNIV_INLINE -void* -ha_search_and_get_data( -/*===================*/ - /* out: pointer to the data of the first hash - table node in chain having the fold number, - NULL if not found */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: folded value of the searched data */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (node->fold == fold) { - - return(node->data); - } - - node = ha_chain_get_next(node); - } - - return(NULL); -} - -/************************************************************* -Looks for an element when we know the pointer to the data. */ -UNIV_INLINE -ha_node_t* -ha_search_with_data( -/*================*/ - /* out: pointer to the hash table node, NULL - if not found in the table */ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of the searched data */ - void* data) /* in: pointer to the data */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_chain_get_first(table, fold); - - while (node) { - if (node->data == data) { - - return(node); - } - - node = ha_chain_get_next(node); - } - - return(NULL); -} - -/************************************************************* -Looks for an element when we know the pointer to the data, and deletes -it from the hash table, if found. */ -UNIV_INLINE -ibool -ha_search_and_delete_if_found( -/*==========================*/ - /* out: TRUE if found */ - hash_table_t* table, /* in: hash table */ - ulint fold, /* in: folded value of the searched data */ - void* data) /* in: pointer to the data */ -{ - ha_node_t* node; - - ut_ad(!table->mutexes || mutex_own(hash_get_mutex(table, fold))); - - node = ha_search_with_data(table, fold, data); - - if (node) { - ha_delete_hash_node(table, node); - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h deleted file mode 100644 index 6bfc43579b3..00000000000 --- a/storage/innobase/include/ha_prototypes.h +++ /dev/null @@ -1,76 +0,0 @@ -#ifndef HA_INNODB_PROTOTYPES_H -#define HA_INNODB_PROTOTYPES_H - -#ifndef UNIV_HOTBACKUP - -#include "univ.i" /* ulint, uint */ -#include "m_ctype.h" /* CHARSET_INFO */ - -/* Prototypes for global functions in ha_innodb.cc that are called by -InnoDB's C-code. */ - -/************************************************************************* -Wrapper around MySQL's copy_and_convert function, see it for -documentation. */ - -ulint -innobase_convert_string( -/*====================*/ - void* to, - ulint to_length, - CHARSET_INFO* to_cs, - const void* from, - ulint from_length, - CHARSET_INFO* from_cs, - uint* errors); - -/********************************************************************* -Display an SQL identifier. */ - -void -innobase_print_identifier( -/*======================*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /* in: name to print */ - ulint namelen);/* in: length of name */ - -/********************************************************************** -Returns true if the thread is the replication thread on the slave -server. Used in srv_conc_enter_innodb() to determine if the thread -should be allowed to enter InnoDB - the replication thread is treated -differently than other threads. Also used in -srv_conc_force_exit_innodb(). */ - -ibool -thd_is_replication_slave_thread( -/*============================*/ - /* out: true if thd is the replication thread */ - void* thd); /* in: thread handle (THD*) */ - -/********************************************************************** -Returns true if the transaction this thread is processing has edited -non-transactional tables. Used by the deadlock detector when deciding -which transaction to rollback in case of a deadlock - we try to avoid -rolling back transactions that have edited non-transactional tables. */ - -ibool -thd_has_edited_nontrans_tables( -/*===========================*/ - /* out: true if non-transactional tables have - been edited */ - void* thd); /* in: thread handle (THD*) */ - -/********************************************************************** -Returns true if the thread is executing a SELECT statement. */ - -ibool -thd_is_select( -/*==========*/ - /* out: true if thd is executing SELECT */ - const void* thd); /* in: thread handle (THD*) */ - -#endif -#endif diff --git a/storage/innobase/include/hash0hash.h b/storage/innobase/include/hash0hash.h deleted file mode 100644 index e119a117c94..00000000000 --- a/storage/innobase/include/hash0hash.h +++ /dev/null @@ -1,367 +0,0 @@ -/****************************************************** -The simple hash table utility - -(c) 1997 Innobase Oy - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#ifndef hash0hash_h -#define hash0hash_h - -#include "univ.i" -#include "mem0mem.h" -#include "sync0sync.h" - -typedef struct hash_table_struct hash_table_t; -typedef struct hash_cell_struct hash_cell_t; - -typedef void* hash_node_t; - -/* Fix Bug #13859: symbol collision between imap/mysql */ -#define hash_create hash0_create - -/***************************************************************** -Creates a hash table with >= n array cells. The actual number -of cells is chosen to be a prime number slightly bigger than n. */ - -hash_table_t* -hash_create( -/*========*/ - /* out, own: created table */ - ulint n); /* in: number of array cells */ -/***************************************************************** -Creates a mutex array to protect a hash table. */ - -void -hash_create_mutexes_func( -/*=====================*/ - hash_table_t* table, /* in: hash table */ -#ifdef UNIV_SYNC_DEBUG - ulint sync_level, /* in: latching order level of the - mutexes: used in the debug version */ -#endif /* UNIV_SYNC_DEBUG */ - ulint n_mutexes); /* in: number of mutexes */ -#ifdef UNIV_SYNC_DEBUG -# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,level,n) -#else /* UNIV_SYNC_DEBUG */ -# define hash_create_mutexes(t,n,level) hash_create_mutexes_func(t,n) -#endif /* UNIV_SYNC_DEBUG */ - -/***************************************************************** -Frees a hash table. */ - -void -hash_table_free( -/*============*/ - hash_table_t* table); /* in, own: hash table */ -/****************************************************************** -Calculates the hash value from a folded value. */ -UNIV_INLINE -ulint -hash_calc_hash( -/*===========*/ - /* out: hashed value */ - ulint fold, /* in: folded value */ - hash_table_t* table); /* in: hash table */ -/************************************************************************ -Assert that the mutex for the table in a hash operation is owned. */ -#ifdef UNIV_SYNC_DEBUG -# define HASH_ASSERT_OWNED(TABLE, FOLD) \ -ut_ad(!(TABLE)->mutexes || mutex_own(hash_get_mutex(TABLE, FOLD))); -#else -# define HASH_ASSERT_OWNED(TABLE, FOLD) -#endif - -/*********************************************************************** -Inserts a struct to a hash table. */ - -#define HASH_INSERT(TYPE, NAME, TABLE, FOLD, DATA)\ -do {\ - hash_cell_t* cell3333;\ - TYPE* struct3333;\ -\ - HASH_ASSERT_OWNED(TABLE, FOLD)\ -\ - (DATA)->NAME = NULL;\ -\ - cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ -\ - if (cell3333->node == NULL) {\ - cell3333->node = DATA;\ - } else {\ - struct3333 = cell3333->node;\ -\ - while (struct3333->NAME != NULL) {\ -\ - struct3333 = struct3333->NAME;\ - }\ -\ - struct3333->NAME = DATA;\ - }\ -} while (0) - -/*********************************************************************** -Deletes a struct from a hash table. */ - -#define HASH_DELETE(TYPE, NAME, TABLE, FOLD, DATA)\ -do {\ - hash_cell_t* cell3333;\ - TYPE* struct3333;\ -\ - HASH_ASSERT_OWNED(TABLE, FOLD)\ -\ - cell3333 = hash_get_nth_cell(TABLE, hash_calc_hash(FOLD, TABLE));\ -\ - if (cell3333->node == DATA) {\ - cell3333->node = DATA->NAME;\ - } else {\ - struct3333 = cell3333->node;\ -\ - while (struct3333->NAME != DATA) {\ -\ - struct3333 = struct3333->NAME;\ - ut_a(struct3333);\ - }\ -\ - struct3333->NAME = DATA->NAME;\ - }\ -} while (0) - -/*********************************************************************** -Gets the first struct in a hash chain, NULL if none. */ - -#define HASH_GET_FIRST(TABLE, HASH_VAL)\ - (hash_get_nth_cell(TABLE, HASH_VAL)->node) - -/*********************************************************************** -Gets the next struct in a hash chain, NULL if none. */ - -#define HASH_GET_NEXT(NAME, DATA) ((DATA)->NAME) - -/************************************************************************ -Looks for a struct in a hash table. */ -#define HASH_SEARCH(NAME, TABLE, FOLD, DATA, TEST)\ -{\ -\ - HASH_ASSERT_OWNED(TABLE, FOLD)\ -\ - (DATA) = HASH_GET_FIRST(TABLE, hash_calc_hash(FOLD, TABLE));\ -\ - while ((DATA) != NULL) {\ - if (TEST) {\ - break;\ - } else {\ - (DATA) = HASH_GET_NEXT(NAME, DATA);\ - }\ - }\ -} - -/**************************************************************** -Gets the nth cell in a hash table. */ -UNIV_INLINE -hash_cell_t* -hash_get_nth_cell( -/*==============*/ - /* out: pointer to cell */ - hash_table_t* table, /* in: hash table */ - ulint n); /* in: cell index */ -/***************************************************************** -Returns the number of cells in a hash table. */ -UNIV_INLINE -ulint -hash_get_n_cells( -/*=============*/ - /* out: number of cells */ - hash_table_t* table); /* in: table */ -/*********************************************************************** -Deletes a struct which is stored in the heap of the hash table, and compacts -the heap. The fold value must be stored in the struct NODE in a field named -'fold'. */ - -#define HASH_DELETE_AND_COMPACT(TYPE, NAME, TABLE, NODE)\ -do {\ - TYPE* node111;\ - TYPE* top_node111;\ - hash_cell_t* cell111;\ - ulint fold111;\ -\ - fold111 = (NODE)->fold;\ -\ - HASH_DELETE(TYPE, NAME, TABLE, fold111, NODE);\ -\ - top_node111 = (TYPE*)mem_heap_get_top(\ - hash_get_heap(TABLE, fold111),\ - sizeof(TYPE));\ -\ - /* If the node to remove is not the top node in the heap, compact the\ - heap of nodes by moving the top node in the place of NODE. */\ -\ - if (NODE != top_node111) {\ -\ - /* Copy the top node in place of NODE */\ -\ - *(NODE) = *top_node111;\ -\ - cell111 = hash_get_nth_cell(TABLE,\ - hash_calc_hash(top_node111->fold, TABLE));\ -\ - /* Look for the pointer to the top node, to update it */\ -\ - if (cell111->node == top_node111) {\ - /* The top node is the first in the chain */\ -\ - cell111->node = NODE;\ - } else {\ - /* We have to look for the predecessor of the top\ - node */\ - node111 = cell111->node;\ -\ - while (top_node111 != HASH_GET_NEXT(NAME, node111)) {\ -\ - node111 = HASH_GET_NEXT(NAME, node111);\ - }\ -\ - /* Now we have the predecessor node */\ -\ - node111->NAME = NODE;\ - }\ - }\ -\ - /* Free the space occupied by the top node */\ -\ - mem_heap_free_top(hash_get_heap(TABLE, fold111), sizeof(TYPE));\ -} while (0) - -/******************************************************************** -Move all hash table entries from OLD_TABLE to NEW_TABLE.*/ - -#define HASH_MIGRATE(OLD_TABLE, NEW_TABLE, NODE_TYPE, PTR_NAME, FOLD_FUNC) \ -do {\ - ulint i2222;\ - ulint cell_count2222;\ -\ - cell_count2222 = hash_get_n_cells(OLD_TABLE);\ -\ - for (i2222 = 0; i2222 < cell_count2222; i2222++) {\ - NODE_TYPE* node2222 = HASH_GET_FIRST((OLD_TABLE), i2222);\ -\ - while (node2222) {\ - NODE_TYPE* next2222 = node2222->PTR_NAME;\ - ulint fold2222 = FOLD_FUNC(node2222);\ -\ - HASH_INSERT(NODE_TYPE, PTR_NAME, (NEW_TABLE),\ - fold2222, node2222);\ -\ - node2222 = next2222;\ - }\ - }\ -} while (0) - - -/**************************************************************** -Gets the mutex index for a fold value in a hash table. */ -UNIV_INLINE -ulint -hash_get_mutex_no( -/*==============*/ - /* out: mutex number */ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: fold */ -/**************************************************************** -Gets the nth heap in a hash table. */ -UNIV_INLINE -mem_heap_t* -hash_get_nth_heap( -/*==============*/ - /* out: mem heap */ - hash_table_t* table, /* in: hash table */ - ulint i); /* in: index of the heap */ -/**************************************************************** -Gets the heap for a fold value in a hash table. */ -UNIV_INLINE -mem_heap_t* -hash_get_heap( -/*==========*/ - /* out: mem heap */ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: fold */ -/**************************************************************** -Gets the nth mutex in a hash table. */ -UNIV_INLINE -mutex_t* -hash_get_nth_mutex( -/*===============*/ - /* out: mutex */ - hash_table_t* table, /* in: hash table */ - ulint i); /* in: index of the mutex */ -/**************************************************************** -Gets the mutex for a fold value in a hash table. */ -UNIV_INLINE -mutex_t* -hash_get_mutex( -/*===========*/ - /* out: mutex */ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: fold */ -/**************************************************************** -Reserves the mutex for a fold value in a hash table. */ - -void -hash_mutex_enter( -/*=============*/ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: fold */ -/**************************************************************** -Releases the mutex for a fold value in a hash table. */ - -void -hash_mutex_exit( -/*============*/ - hash_table_t* table, /* in: hash table */ - ulint fold); /* in: fold */ -/**************************************************************** -Reserves all the mutexes of a hash table, in an ascending order. */ - -void -hash_mutex_enter_all( -/*=================*/ - hash_table_t* table); /* in: hash table */ -/**************************************************************** -Releases all the mutexes of a hash table. */ - -void -hash_mutex_exit_all( -/*================*/ - hash_table_t* table); /* in: hash table */ - - -struct hash_cell_struct{ - void* node; /* hash chain node, NULL if none */ -}; - -/* The hash table structure */ -struct hash_table_struct { - ibool adaptive;/* TRUE if this is the hash table of the - adaptive hash index */ - ulint n_cells;/* number of cells in the hash table */ - hash_cell_t* array; /* pointer to cell array */ - ulint n_mutexes;/* if mutexes != NULL, then the number of - mutexes, must be a power of 2 */ - mutex_t* mutexes;/* NULL, or an array of mutexes used to - protect segments of the hash table */ - mem_heap_t** heaps; /* if this is non-NULL, hash chain nodes for - external chaining can be allocated from these - memory heaps; there are then n_mutexes many of - these heaps */ - mem_heap_t* heap; - ulint magic_n; -}; - -#define HASH_TABLE_MAGIC_N 76561114 - -#ifndef UNIV_NONINL -#include "hash0hash.ic" -#endif - -#endif diff --git a/storage/innobase/include/hash0hash.ic b/storage/innobase/include/hash0hash.ic deleted file mode 100644 index d246d8ee831..00000000000 --- a/storage/innobase/include/hash0hash.ic +++ /dev/null @@ -1,131 +0,0 @@ -/****************************************************** -The simple hash table utility - -(c) 1997 Innobase Oy - -Created 5/20/1997 Heikki Tuuri -*******************************************************/ - -#include "ut0rnd.h" - -/**************************************************************** -Gets the nth cell in a hash table. */ -UNIV_INLINE -hash_cell_t* -hash_get_nth_cell( -/*==============*/ - /* out: pointer to cell */ - hash_table_t* table, /* in: hash table */ - ulint n) /* in: cell index */ -{ - ut_ad(n < table->n_cells); - - return(table->array + n); -} - -/***************************************************************** -Returns the number of cells in a hash table. */ -UNIV_INLINE -ulint -hash_get_n_cells( -/*=============*/ - /* out: number of cells */ - hash_table_t* table) /* in: table */ -{ - return(table->n_cells); -} - -/****************************************************************** -Calculates the hash value from a folded value. */ -UNIV_INLINE -ulint -hash_calc_hash( -/*===========*/ - /* out: hashed value */ - ulint fold, /* in: folded value */ - hash_table_t* table) /* in: hash table */ -{ - return(ut_hash_ulint(fold, table->n_cells)); -} - -/**************************************************************** -Gets the mutex index for a fold value in a hash table. */ -UNIV_INLINE -ulint -hash_get_mutex_no( -/*==============*/ - /* out: mutex number */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold */ -{ - return(ut_2pow_remainder(hash_calc_hash(fold, table), - table->n_mutexes)); -} - -/**************************************************************** -Gets the nth heap in a hash table. */ -UNIV_INLINE -mem_heap_t* -hash_get_nth_heap( -/*==============*/ - /* out: mem heap */ - hash_table_t* table, /* in: hash table */ - ulint i) /* in: index of the heap */ -{ - ut_ad(i < table->n_mutexes); - - return(table->heaps[i]); -} - -/**************************************************************** -Gets the heap for a fold value in a hash table. */ -UNIV_INLINE -mem_heap_t* -hash_get_heap( -/*==========*/ - /* out: mem heap */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold */ -{ - ulint i; - - if (table->heap) { - return(table->heap); - } - - i = hash_get_mutex_no(table, fold); - - return(hash_get_nth_heap(table, i)); -} - -/**************************************************************** -Gets the nth mutex in a hash table. */ -UNIV_INLINE -mutex_t* -hash_get_nth_mutex( -/*===============*/ - /* out: mutex */ - hash_table_t* table, /* in: hash table */ - ulint i) /* in: index of the mutex */ -{ - ut_ad(i < table->n_mutexes); - - return(table->mutexes + i); -} - -/**************************************************************** -Gets the mutex for a fold value in a hash table. */ -UNIV_INLINE -mutex_t* -hash_get_mutex( -/*===========*/ - /* out: mutex */ - hash_table_t* table, /* in: hash table */ - ulint fold) /* in: fold */ -{ - ulint i; - - i = hash_get_mutex_no(table, fold); - - return(hash_get_nth_mutex(table, i)); -} diff --git a/storage/innobase/include/ibuf0ibuf.h b/storage/innobase/include/ibuf0ibuf.h deleted file mode 100644 index 77fefe2020b..00000000000 --- a/storage/innobase/include/ibuf0ibuf.h +++ /dev/null @@ -1,309 +0,0 @@ -/****************************************************** -Insert buffer - -(c) 1997 Innobase Oy - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#ifndef ibuf0ibuf_h -#define ibuf0ibuf_h - -#include "univ.i" - -#include "dict0mem.h" -#include "dict0dict.h" -#include "mtr0mtr.h" -#include "que0types.h" -#include "ibuf0types.h" -#include "fsp0fsp.h" - -extern ibuf_t* ibuf; - -/********************************************************************** -Creates the insert buffer data struct for a single tablespace. Reads the -root page of the insert buffer tree in the tablespace. This function can -be called only after the dictionary system has been initialized, as this -creates also the insert buffer table and index for this tablespace. */ - -ibuf_data_t* -ibuf_data_init_for_space( -/*=====================*/ - /* out, own: ibuf data struct, linked to the list - in ibuf control structure. */ - ulint space); /* in: space id */ -/********************************************************************** -Creates the insert buffer data structure at a database startup and -initializes the data structures for the insert buffer of each tablespace. */ - -void -ibuf_init_at_db_start(void); -/*=======================*/ -/************************************************************************* -Reads the biggest tablespace id from the high end of the insert buffer -tree and updates the counter in fil_system. */ - -void -ibuf_update_max_tablespace_id(void); -/*===============================*/ -/************************************************************************* -Initializes an ibuf bitmap page. */ - -void -ibuf_bitmap_page_init( -/*==================*/ - page_t* page, /* in: bitmap page */ - mtr_t* mtr); /* in: mtr */ -/**************************************************************************** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict further -work to only ibuf bitmap operations, which would result if the latch to the -bitmap page were kept. */ - -void -ibuf_reset_free_bits_with_type( -/*===========================*/ - ulint type, /* in: index type */ - page_t* page); /* in: index page; free bits are set to 0 if the index - is non-clustered and non-unique and the page level is - 0 */ -/**************************************************************************** -Resets the free bits of the page in the ibuf bitmap. This is done in a -separate mini-transaction, hence this operation does not restrict further -work to solely ibuf bitmap operations, which would result if the latch to -the bitmap page were kept. */ - -void -ibuf_reset_free_bits( -/*=================*/ - dict_index_t* index, /* in: index */ - page_t* page); /* in: index page; free bits are set to 0 if - the index is non-clustered and non-unique and - the page level is 0 */ -/**************************************************************************** -Updates the free bits of the page in the ibuf bitmap if there is not enough -free on the page any more. This is done in a separate mini-transaction, hence -this operation does not restrict further work to only ibuf bitmap operations, -which would result if the latch to the bitmap page were kept. */ -UNIV_INLINE -void -ibuf_update_free_bits_if_full( -/*==========================*/ - dict_index_t* index, /* in: index */ - page_t* page, /* in: index page to which we have added new - records; the free bits are updated if the - index is non-clustered and non-unique and - the page level is 0, and the page becomes - fuller */ - ulint max_ins_size,/* in: value of maximum insert size with - reorganize before the latest operation - performed to the page */ - ulint increase);/* in: upper limit for the additional space - used in the latest operation, if known, or - ULINT_UNDEFINED */ -/************************************************************************** -Updates the free bits for the page to reflect the present state. Does this -in the mtr given, which means that the latching order rules virtually -prevent any further operations for this OS thread until mtr is committed. */ - -void -ibuf_update_free_bits_low( -/*======================*/ - dict_index_t* index, /* in: index */ - page_t* page, /* in: index page */ - ulint max_ins_size, /* in: value of maximum insert size - with reorganize before the latest - operation performed to the page */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -Updates the free bits for the two pages to reflect the present state. Does -this in the mtr given, which means that the latching order rules virtually -prevent any further operations until mtr is committed. */ - -void -ibuf_update_free_bits_for_two_pages_low( -/*====================================*/ - dict_index_t* index, /* in: index */ - page_t* page1, /* in: index page */ - page_t* page2, /* in: index page */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -A basic partial test if an insert to the insert buffer could be possible and -recommended. */ -UNIV_INLINE -ibool -ibuf_should_try( -/*============*/ - dict_index_t* index, /* in: index where to insert */ - ulint ignore_sec_unique); /* in: if != 0, we should - ignore UNIQUE constraint on - a secondary index when we - decide */ -/********************************************************************** -Returns TRUE if the current OS thread is performing an insert buffer -routine. */ - -ibool -ibuf_inside(void); -/*=============*/ - /* out: TRUE if inside an insert buffer routine: for instance, - a read-ahead of non-ibuf pages is then forbidden */ -/*************************************************************************** -Checks if a page address is an ibuf bitmap page (level 3 page) address. */ -UNIV_INLINE -ibool -ibuf_bitmap_page( -/*=============*/ - /* out: TRUE if a bitmap page */ - ulint page_no);/* in: page number */ -/*************************************************************************** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ - -ibool -ibuf_page( -/*======*/ - /* out: TRUE if level 2 or level 3 page */ - ulint space, /* in: space id */ - ulint page_no);/* in: page number */ -/*************************************************************************** -Checks if a page is a level 2 or 3 page in the ibuf hierarchy of pages. */ - -ibool -ibuf_page_low( -/*==========*/ - /* out: TRUE if level 2 or level 3 page */ - ulint space, /* in: space id */ - ulint page_no,/* in: page number */ - mtr_t* mtr); /* in: mtr which will contain an x-latch to the - bitmap page if the page is not one of the fixed - address ibuf pages */ -/*************************************************************************** -Frees excess pages from the ibuf free list. This function is called when an OS -thread calls fsp services to allocate a new file segment, or a new page to a -file segment, and the thread did not own the fsp latch before this call. */ - -void -ibuf_free_excess_pages( -/*===================*/ - ulint space); /* in: space id */ -/************************************************************************* -Makes an index insert to the insert buffer, instead of directly to the disk -page, if this is possible. Does not do insert if the index is clustered -or unique. */ - -ibool -ibuf_insert( -/*========*/ - /* out: TRUE if success */ - dtuple_t* entry, /* in: index entry to insert */ - dict_index_t* index, /* in: index where to insert */ - ulint space, /* in: space id where to insert */ - ulint page_no,/* in: page number where to insert */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -When an index page is read from a disk to the buffer pool, this function -inserts to the page the possible index entries buffered in the insert buffer. -The entries are deleted from the insert buffer. If the page is not read, but -created in the buffer pool, this function deletes its buffered entries from -the insert buffer; there can exist entries for such a page if the page -belonged to an index which subsequently was dropped. */ - -void -ibuf_merge_or_delete_for_page( -/*==========================*/ - page_t* page, /* in: if page has been read from disk, pointer to - the page x-latched, else NULL */ - ulint space, /* in: space id of the index page */ - ulint page_no,/* in: page number of the index page */ - ibool update_ibuf_bitmap);/* in: normally this is set to TRUE, but if - we have deleted or are deleting the tablespace, then we - naturally do not want to update a non-existent bitmap - page */ -/************************************************************************* -Deletes all entries in the insert buffer for a given space id. This is used -in DISCARD TABLESPACE and IMPORT TABLESPACE. -NOTE: this does not update the page free bitmaps in the space. The space will -become CORRUPT when you call this function! */ - -void -ibuf_delete_for_discarded_space( -/*============================*/ - ulint space); /* in: space id */ -/************************************************************************* -Contracts insert buffer trees by reading pages to the buffer pool. */ - -ulint -ibuf_contract( -/*==========*/ - /* out: a lower limit for the combined size in bytes - of entries which will be merged from ibuf trees to the - pages read, 0 if ibuf is empty */ - ibool sync); /* in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ -/************************************************************************* -Contracts insert buffer trees by reading pages to the buffer pool. */ - -ulint -ibuf_contract_for_n_pages( -/*======================*/ - /* out: a lower limit for the combined size in bytes - of entries which will be merged from ibuf trees to the - pages read, 0 if ibuf is empty */ - ibool sync, /* in: TRUE if the caller wants to wait for the - issued read with the highest tablespace address - to complete */ - ulint n_pages);/* in: try to read at least this many pages to - the buffer pool and merge the ibuf contents to - them */ -/************************************************************************* -Parses a redo log record of an ibuf bitmap page init. */ - -byte* -ibuf_parse_bitmap_init( -/*===================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -#ifdef UNIV_IBUF_DEBUG -/********************************************************************** -Gets the ibuf count for a given page. */ - -ulint -ibuf_count_get( -/*===========*/ - /* out: number of entries in the insert buffer - currently buffered for this page */ - ulint space, /* in: space id */ - ulint page_no);/* in: page number */ -#endif -/********************************************************************** -Looks if the insert buffer is empty. */ - -ibool -ibuf_is_empty(void); -/*===============*/ - /* out: TRUE if empty */ -/********************************************************************** -Prints info of ibuf. */ - -void -ibuf_print( -/*=======*/ - FILE* file); /* in: file where to print */ - -#define IBUF_HEADER_PAGE_NO FSP_IBUF_HEADER_PAGE_NO -#define IBUF_TREE_ROOT_PAGE_NO FSP_IBUF_TREE_ROOT_PAGE_NO - -/* The ibuf header page currently contains only the file segment header -for the file segment from which the pages for the ibuf tree are allocated */ -#define IBUF_HEADER PAGE_DATA -#define IBUF_TREE_SEG_HEADER 0 /* fseg header for ibuf tree */ - -#ifndef UNIV_NONINL -#include "ibuf0ibuf.ic" -#endif - -#endif diff --git a/storage/innobase/include/ibuf0ibuf.ic b/storage/innobase/include/ibuf0ibuf.ic deleted file mode 100644 index 4d65a7f5250..00000000000 --- a/storage/innobase/include/ibuf0ibuf.ic +++ /dev/null @@ -1,224 +0,0 @@ -/****************************************************** -Insert buffer - -(c) 1997 Innobase Oy - -Created 7/19/1997 Heikki Tuuri -*******************************************************/ - -#include "buf0lru.h" -#include "page0page.h" - -extern ulint ibuf_flush_count; - -/* If this number is n, an index page must contain at least the page size -per n bytes of free space for ibuf to try to buffer inserts to this page. -If there is this much of free space, the corresponding bits are set in the -ibuf bitmap. */ -#define IBUF_PAGE_SIZE_PER_FREE_SPACE 32 - -/* Insert buffer data struct for a single tablespace */ -struct ibuf_data_struct{ - ulint space; /* space id */ - ulint seg_size;/* allocated pages if the file segment - containing ibuf header and tree */ - ulint size; /* size of the insert buffer tree in pages */ - ibool empty; /* after an insert to the ibuf tree is - performed, this is set to FALSE, and if a - contract operation finds the tree empty, this - is set to TRUE */ - ulint free_list_len; - /* length of the free list */ - ulint height; /* tree height */ - dict_index_t* index; /* insert buffer index */ - UT_LIST_NODE_T(ibuf_data_t) data_list; - /* list of ibuf data structs */ - ulint n_inserts;/* number of inserts made to the insert - buffer */ - ulint n_merges;/* number of pages merged */ - ulint n_merged_recs;/* number of records merged */ -}; - -struct ibuf_struct{ - ulint size; /* current size of the ibuf index - trees in pages */ - ulint max_size; /* recommended maximum size in pages - for the ibuf index tree */ - UT_LIST_BASE_NODE_T(ibuf_data_t) data_list; - /* list of ibuf data structs for - each tablespace */ -}; - -/**************************************************************************** -Sets the free bit of the page in the ibuf bitmap. This is done in a separate -mini-transaction, hence this operation does not restrict further work to only -ibuf bitmap operations, which would result if the latch to the bitmap page -were kept. */ - -void -ibuf_set_free_bits( -/*===============*/ - ulint type, /* in: index type */ - page_t* page, /* in: index page; free bit is reset if the index is - a non-clustered non-unique, and page level is 0 */ - ulint val, /* in: value to set: < 4 */ - ulint max_val);/* in: ULINT_UNDEFINED or a maximum value which - the bits must have before setting; this is for - debugging */ - -/************************************************************************** -A basic partial test if an insert to the insert buffer could be possible and -recommended. */ -UNIV_INLINE -ibool -ibuf_should_try( -/*============*/ - dict_index_t* index, /* in: index where to insert */ - ulint ignore_sec_unique) /* in: if != 0, we should - ignore UNIQUE constraint on - a secondary index when we - decide */ -{ - if (!(index->type & DICT_CLUSTERED) - && (ignore_sec_unique || !(index->type & DICT_UNIQUE))) { - - ibuf_flush_count++; - - if (ibuf_flush_count % 8 == 0) { - - buf_LRU_try_free_flushed_blocks(); - } - - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************************** -Checks if a page address is an ibuf bitmap page address. */ -UNIV_INLINE -ibool -ibuf_bitmap_page( -/*=============*/ - /* out: TRUE if a bitmap page */ - ulint page_no)/* in: page number */ -{ - if (page_no % XDES_DESCRIBED_PER_PAGE == FSP_IBUF_BITMAP_OFFSET) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Translates the free space on a page to a value in the ibuf bitmap.*/ -UNIV_INLINE -ulint -ibuf_index_page_calc_free_bits( -/*===========================*/ - /* out: value for ibuf bitmap bits */ - ulint max_ins_size) /* in: maximum insert size after reorganize - for the page */ -{ - ulint n; - - n = max_ins_size / (UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); - - if (n == 3) { - n = 2; - } - - if (n > 3) { - n = 3; - } - - return(n); -} - -/************************************************************************* -Translates the ibuf free bits to the free space on a page in bytes. */ -UNIV_INLINE -ulint -ibuf_index_page_calc_free_from_bits( -/*================================*/ - /* out: maximum insert size after reorganize for the - page */ - ulint bits) /* in: value for ibuf bitmap bits */ -{ - ut_ad(bits < 4); - - if (bits == 3) { - return(4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); - } - - return(bits * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); -} - -/************************************************************************* -Translates the free space on a page to a value in the ibuf bitmap.*/ -UNIV_INLINE -ulint -ibuf_index_page_calc_free( -/*======================*/ - /* out: value for ibuf bitmap bits */ - page_t* page) /* in: non-unique secondary index page */ -{ - return(ibuf_index_page_calc_free_bits( - page_get_max_insert_size_after_reorganize(page, 1))); -} - -/**************************************************************************** -Updates the free bits of the page in the ibuf bitmap if there is not enough -free on the page any more. This is done in a separate mini-transaction, hence -this operation does not restrict further work to only ibuf bitmap operations, -which would result if the latch to the bitmap page were kept. */ -UNIV_INLINE -void -ibuf_update_free_bits_if_full( -/*==========================*/ - dict_index_t* index, /* in: index */ - page_t* page, /* in: index page to which we have added new - records; the free bits are updated if the - index is non-clustered and non-unique and - the page level is 0, and the page becomes - fuller */ - ulint max_ins_size,/* in: value of maximum insert size with - reorganize before the latest operation - performed to the page */ - ulint increase)/* in: upper limit for the additional space - used in the latest operation, if known, or - ULINT_UNDEFINED */ -{ - ulint before; - ulint after; - - before = ibuf_index_page_calc_free_bits(max_ins_size); - - if (max_ins_size >= increase) { -#if ULINT32_UNDEFINED <= UNIV_PAGE_SIZE -# error "ULINT32_UNDEFINED <= UNIV_PAGE_SIZE" -#endif - after = ibuf_index_page_calc_free_bits(max_ins_size - - increase); -#ifdef UNIV_IBUF_DEBUG - ut_a(after <= ibuf_index_page_calc_free(page)); -#endif - } else { - after = ibuf_index_page_calc_free(page); - } - - if (after == 0) { - /* We move the page to the front of the buffer pool LRU list: - the purpose of this is to prevent those pages to which we - cannot make inserts using the insert buffer from slipping - out of the buffer pool */ - - buf_page_make_young(page); - } - - if (before > after) { - ibuf_set_free_bits(index->type, page, after, before); - } -} diff --git a/storage/innobase/include/ibuf0types.h b/storage/innobase/include/ibuf0types.h deleted file mode 100644 index fb202ac44b0..00000000000 --- a/storage/innobase/include/ibuf0types.h +++ /dev/null @@ -1,15 +0,0 @@ -/****************************************************** -Insert buffer global types - -(c) 1997 Innobase Oy - -Created 7/29/1997 Heikki Tuuri -*******************************************************/ - -#ifndef ibuf0types_h -#define ibuf0types_h - -typedef struct ibuf_data_struct ibuf_data_t; -typedef struct ibuf_struct ibuf_t; - -#endif diff --git a/storage/innobase/include/lock0iter.h b/storage/innobase/include/lock0iter.h deleted file mode 100644 index d063a360c1f..00000000000 --- a/storage/innobase/include/lock0iter.h +++ /dev/null @@ -1,52 +0,0 @@ -/****************************************************** -Lock queue iterator type and function prototypes. - -(c) 2007 Innobase Oy - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -#ifndef lock0iter_h -#define lock0iter_h - -#include "univ.i" -#include "lock0types.h" - -typedef struct lock_queue_iterator_struct { - lock_t* current_lock; - /* In case this is a record lock queue (not table lock queue) - then bit_no is the record number within the heap in which the - record is stored. */ - ulint bit_no; -} lock_queue_iterator_t; - -/*********************************************************************** -Initialize lock queue iterator so that it starts to iterate from -"lock". bit_no specifies the record number within the heap where the -record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: -1. If the lock is a table lock, thus we have a table lock queue; -2. If the lock is a record lock and it is a wait lock. In this case - bit_no is calculated in this function by using - lock_rec_find_set_bit(). There is exactly one bit set in the bitmap - of a wait lock. */ - -void -lock_queue_iterator_reset( -/*======================*/ - lock_queue_iterator_t* iter, /* out: iterator */ - lock_t* lock, /* in: lock to start from */ - ulint bit_no);/* in: record number in the - heap */ - -/*********************************************************************** -Gets the previous lock in the lock queue, returns NULL if there are no -more locks (i.e. the current lock is the first one). The iterator is -receded (if not-NULL is returned). */ - -lock_t* -lock_queue_iterator_get_prev( -/*=========================*/ - /* out: previous lock or NULL */ - lock_queue_iterator_t* iter); /* in/out: iterator */ - -#endif /* lock0iter_h */ diff --git a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h deleted file mode 100644 index 635724bf5a1..00000000000 --- a/storage/innobase/include/lock0lock.h +++ /dev/null @@ -1,709 +0,0 @@ -/****************************************************** -The transaction lock system - -(c) 1996 Innobase Oy - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#ifndef lock0lock_h -#define lock0lock_h - -#include "univ.i" -#include "trx0types.h" -#include "rem0types.h" -#include "dict0types.h" -#include "que0types.h" -#include "page0types.h" -#include "lock0types.h" -#include "read0types.h" -#include "hash0hash.h" - -#ifdef UNIV_DEBUG -extern ibool lock_print_waits; -#endif /* UNIV_DEBUG */ -/* Buffer for storing information about the most recent deadlock error */ -extern FILE* lock_latest_err_file; - -/************************************************************************* -Gets the size of a lock struct. */ - -ulint -lock_get_size(void); -/*===============*/ - /* out: size in bytes */ -/************************************************************************* -Creates the lock system at database start. */ - -void -lock_sys_create( -/*============*/ - ulint n_cells); /* in: number of slots in lock hash table */ -/************************************************************************* -Checks if some transaction has an implicit x-lock on a record in a secondary -index. */ - -trx_t* -lock_sec_rec_some_has_impl_off_kernel( -/*==================================*/ - /* out: transaction which has the x-lock, or - NULL */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: secondary index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/************************************************************************* -Checks if some transaction has an implicit x-lock on a record in a clustered -index. */ -UNIV_INLINE -trx_t* -lock_clust_rec_some_has_impl( -/*=========================*/ - /* out: transaction which has the x-lock, or - NULL */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/***************************************************************** -Makes a record to inherit the locks of another record as gap type -locks, but does not reset the lock bits of the other record. Also -waiting lock requests on rec are inherited as GRANTED gap locks. */ - -void -lock_rec_inherit_to_gap( -/*====================*/ - rec_t* heir, /* in: record which inherits */ - rec_t* rec); /* in: record from which inherited; does NOT reset - the locks on this record */ -/***************************************************************** -Updates the lock table when we have reorganized a page. NOTE: we copy -also the locks set on the infimum of the page; the infimum may carry -locks if an update of a record is occurring on the page, and its locks -were temporarily stored on the infimum. */ - -void -lock_move_reorganize_page( -/*======================*/ - page_t* page, /* in: old index page */ - page_t* new_page); /* in: reorganized page */ -/***************************************************************** -Moves the explicit locks on user records to another page if a record -list end is moved to another page. */ - -void -lock_move_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page to move to */ - page_t* page, /* in: index page */ - rec_t* rec); /* in: record on page: this is the - first record moved */ -/***************************************************************** -Moves the explicit locks on user records to another page if a record -list start is moved to another page. */ - -void -lock_move_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page to move to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page: this is the - first record NOT copied */ - rec_t* old_end); /* in: old previous-to-last record on - new_page before the records were copied */ -/***************************************************************** -Updates the lock table when a page is split to the right. */ - -void -lock_update_split_right( -/*====================*/ - page_t* right_page, /* in: right page */ - page_t* left_page); /* in: left page */ -/***************************************************************** -Updates the lock table when a page is merged to the right. */ - -void -lock_update_merge_right( -/*====================*/ - rec_t* orig_succ, /* in: original successor of infimum - on the right page before merge */ - page_t* left_page); /* in: merged index page which will be - discarded */ -/***************************************************************** -Updates the lock table when the root page is copied to another in -btr_root_raise_and_insert. Note that we leave lock structs on the -root page, even though they do not make sense on other than leaf -pages: the reason is that in a pessimistic update the infimum record -of the root page will act as a dummy carrier of the locks of the record -to be updated. */ - -void -lock_update_root_raise( -/*===================*/ - page_t* new_page, /* in: index page to which copied */ - page_t* root); /* in: root page */ -/***************************************************************** -Updates the lock table when a page is copied to another and the original page -is removed from the chain of leaf pages, except if page is the root! */ - -void -lock_update_copy_and_discard( -/*=========================*/ - page_t* new_page, /* in: index page to which copied */ - page_t* page); /* in: index page; NOT the root! */ -/***************************************************************** -Updates the lock table when a page is split to the left. */ - -void -lock_update_split_left( -/*===================*/ - page_t* right_page, /* in: right page */ - page_t* left_page); /* in: left page */ -/***************************************************************** -Updates the lock table when a page is merged to the left. */ - -void -lock_update_merge_left( -/*===================*/ - page_t* left_page, /* in: left page to which merged */ - rec_t* orig_pred, /* in: original predecessor of supremum - on the left page before merge */ - page_t* right_page); /* in: merged index page which will be - discarded */ -/***************************************************************** -Resets the original locks on heir and replaces them with gap type locks -inherited from rec. */ - -void -lock_rec_reset_and_inherit_gap_locks( -/*=================================*/ - rec_t* heir, /* in: heir record */ - rec_t* rec); /* in: record */ -/***************************************************************** -Updates the lock table when a page is discarded. */ - -void -lock_update_discard( -/*================*/ - rec_t* heir, /* in: record which will inherit the locks */ - page_t* page); /* in: index page which will be discarded */ -/***************************************************************** -Updates the lock table when a new user record is inserted. */ - -void -lock_update_insert( -/*===============*/ - rec_t* rec); /* in: the inserted record */ -/***************************************************************** -Updates the lock table when a record is removed. */ - -void -lock_update_delete( -/*===============*/ - rec_t* rec); /* in: the record to be removed */ -/************************************************************************* -Stores on the page infimum record the explicit locks of another record. -This function is used to store the lock state of a record when it is -updated and the size of the record changes in the update. The record -is in such an update moved, perhaps to another page. The infimum record -acts as a dummy carrier record, taking care of lock releases while the -actual record is being moved. */ - -void -lock_rec_store_on_page_infimum( -/*===========================*/ - page_t* page, /* in: page containing the record */ - rec_t* rec); /* in: record whose lock state is stored - on the infimum record of the same page; lock - bits are reset on the record */ -/************************************************************************* -Restores the state of explicit lock requests on a single record, where the -state was stored on the infimum of the page. */ - -void -lock_rec_restore_from_page_infimum( -/*===============================*/ - rec_t* rec, /* in: record whose lock state is restored */ - page_t* page); /* in: page (rec is not necessarily on this page) - whose infimum stored the lock state; lock bits are - reset on the infimum */ -/************************************************************************* -Returns TRUE if there are explicit record locks on a page. */ - -ibool -lock_rec_expl_exist_on_page( -/*========================*/ - /* out: TRUE if there are explicit record locks on - the page */ - ulint space, /* in: space id */ - ulint page_no);/* in: page number */ -/************************************************************************* -Checks if locks of other transactions prevent an immediate insert of -a record. If they do, first tests if the query thread should anyway -be suspended for some reason; if not, then puts the transaction and -the query thread to the lock wait state and inserts a waiting request -for a gap x-lock to the lock queue. */ - -ulint -lock_rec_insert_check_and_lock( -/*===========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record after which to insert */ - dict_index_t* index, /* in: index */ - que_thr_t* thr, /* in: query thread */ - ibool* inherit);/* out: set to TRUE if the new inserted - record maybe should inherit LOCK_GAP type - locks from the successor record */ -/************************************************************************* -Checks if locks of other transactions prevent an immediate modify (update, -delete mark, or delete unmark) of a clustered index record. If they do, -first tests if the query thread should anyway be suspended for some -reason; if not, then puts the transaction and the query thread to the -lock wait state and inserts a waiting request for a record x-lock to the -lock queue. */ - -ulint -lock_clust_rec_modify_check_and_lock( -/*=================================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record which should be modified */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Checks if locks of other transactions prevent an immediate modify -(delete mark or delete unmark) of a secondary index record. */ - -ulint -lock_sec_rec_modify_check_and_lock( -/*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record which should be modified; - NOTE: as this is a secondary index, we - always have to modify the clustered index - record first: see the comment below */ - dict_index_t* index, /* in: secondary index */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Like the counterpart for a clustered index below, but now we read a -secondary index record. */ - -ulint -lock_sec_rec_read_check_and_lock( -/*=============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: secondary index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. */ - -ulint -lock_clust_rec_read_check_and_lock( -/*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. This is an alternative version of -lock_clust_rec_read_check_and_lock() that does not require the parameter -"offsets". */ - -ulint -lock_clust_rec_read_check_and_lock_alt( -/*===================================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: clustered index */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Checks that a record is seen in a consistent read. */ - -ibool -lock_clust_rec_cons_read_sees( -/*==========================*/ - /* out: TRUE if sees, or FALSE if an earlier - version of the record should be retrieved */ - rec_t* rec, /* in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - read_view_t* view); /* in: consistent read view */ -/************************************************************************* -Checks that a non-clustered index record is seen in a consistent read. */ - -ulint -lock_sec_rec_cons_read_sees( -/*========================*/ - /* out: TRUE if certainly sees, or FALSE if an - earlier version of the clustered index record - might be needed: NOTE that a non-clustered - index page contains so little information on - its modifications that also in the case FALSE, - the present version of rec may be the right, - but we must check this from the clustered - index record */ - rec_t* rec, /* in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /* in: non-clustered index */ - read_view_t* view); /* in: consistent read view */ -/************************************************************************* -Locks the specified database table in the mode given. If the lock cannot -be granted immediately, the query thread is put to wait. */ - -ulint -lock_table( -/*=======*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - dict_table_t* table, /* in: database table in dictionary cache */ - ulint mode, /* in: lock mode */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Checks if there are any locks set on the table. */ - -ibool -lock_is_on_table( -/*=============*/ - /* out: TRUE if there are lock(s) */ - dict_table_t* table); /* in: database table in dictionary cache */ -/***************************************************************** -Removes a granted record lock of a transaction from the queue and grants -locks to other transactions waiting in the queue if they now are entitled -to a lock. */ - -void -lock_rec_unlock( -/*============*/ - trx_t* trx, /* in: transaction that has set a record - lock */ - rec_t* rec, /* in: record */ - ulint lock_mode); /* in: LOCK_S or LOCK_X */ -/************************************************************************* -Releases a table lock. -Releases possible other transactions waiting for this lock. */ - -void -lock_table_unlock( -/*==============*/ - lock_t* lock); /* in: lock */ -/************************************************************************* -Releases an auto-inc lock a transaction possibly has on a table. -Releases possible other transactions waiting for this lock. */ - -void -lock_table_unlock_auto_inc( -/*=======================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Releases transaction locks, and releases possible other transactions waiting -because of these locks. */ - -void -lock_release_off_kernel( -/*====================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ - -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock); /* in: waiting lock request */ - -/************************************************************************* -Removes locks on a table to be dropped or truncated. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock, that is going to be removed, is allowed to be a wait lock. */ - -void -lock_remove_all_on_table( -/*=====================*/ - dict_table_t* table, /* in: table to be dropped - or truncated */ - ibool remove_also_table_sx_locks);/* in: also removes - table S and X locks */ - -/************************************************************************* -Calculates the fold value of a page file address: used in inserting or -searching for a lock in the hash table. */ -UNIV_INLINE -ulint -lock_rec_fold( -/*==========*/ - /* out: folded value */ - ulint space, /* in: space */ - ulint page_no);/* in: page number */ -/************************************************************************* -Calculates the hash value of a page file address: used in inserting or -searching for a lock in the hash table. */ -UNIV_INLINE -ulint -lock_rec_hash( -/*==========*/ - /* out: hashed value */ - ulint space, /* in: space */ - ulint page_no);/* in: page number */ -/************************************************************************* -Gets the source table of an ALTER TABLE transaction. The table must be -covered by an IX or IS table lock. */ - -dict_table_t* -lock_get_src_table( -/*===============*/ - /* out: the source table of transaction, - if it is covered by an IX or IS table lock; - dest if there is no source table, and - NULL if the transaction is locking more than - two tables or an inconsistency is found */ - trx_t* trx, /* in: transaction */ - dict_table_t* dest, /* in: destination of ALTER TABLE */ - ulint* mode); /* out: lock mode of the source table */ -/************************************************************************* -Determine if the given table is exclusively "owned" by the given -transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC -on the table. */ - -ibool -lock_is_table_exclusive( -/*====================*/ - /* out: TRUE if table is only locked by trx, - with LOCK_IX, and possibly LOCK_AUTO_INC */ - dict_table_t* table, /* in: table */ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Checks if a lock request lock1 has to wait for request lock2. */ - -ibool -lock_has_to_wait( -/*=============*/ - /* out: TRUE if lock1 has to wait for lock2 to be - removed */ - lock_t* lock1, /* in: waiting lock */ - lock_t* lock2); /* in: another lock; NOTE that it is assumed that this - has a lock bit set on the same record as in lock1 if - the locks are record locks */ -/************************************************************************* -Checks that a transaction id is sensible, i.e., not in the future. */ - -ibool -lock_check_trx_id_sanity( -/*=====================*/ - /* out: TRUE if ok */ - dulint trx_id, /* in: trx id */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets, /* in: rec_get_offsets(rec, index) */ - ibool has_kernel_mutex);/* in: TRUE if the caller owns the - kernel mutex */ -/************************************************************************* -Validates the lock queue on a single record. */ - -ibool -lock_rec_queue_validate( -/*====================*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: record to look at */ - dict_index_t* index, /* in: index, or NULL if not known */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/************************************************************************* -Prints info of a table lock. */ - -void -lock_table_print( -/*=============*/ - FILE* file, /* in: file where to print */ - lock_t* lock); /* in: table type lock */ -/************************************************************************* -Prints info of a record lock. */ - -void -lock_rec_print( -/*===========*/ - FILE* file, /* in: file where to print */ - lock_t* lock); /* in: record type lock */ -/************************************************************************* -Prints info of locks for all transactions. */ - -void -lock_print_info_summary( -/*====================*/ - FILE* file); /* in: file where to print */ -/************************************************************************* -Prints info of locks for each transaction. */ - -void -lock_print_info_all_transactions( -/*=============================*/ - FILE* file); /* in: file where to print */ -/************************************************************************* -Validates the lock queue on a table. */ - -ibool -lock_table_queue_validate( -/*======================*/ - /* out: TRUE if ok */ - dict_table_t* table); /* in: table */ -/************************************************************************* -Validates the record lock queues on a page. */ - -ibool -lock_rec_validate_page( -/*===================*/ - /* out: TRUE if ok */ - ulint space, /* in: space id */ - ulint page_no);/* in: page number */ -/************************************************************************* -Validates the lock system. */ - -ibool -lock_validate(void); -/*===============*/ - /* out: TRUE if ok */ -/************************************************************************* -Return approximate number or record locks (bits set in the bitmap) for -this transaction. Since delete-marked records may be removed, the -record count will not be precise. */ - -ulint -lock_number_of_rows_locked( -/*=======================*/ - trx_t* trx); /* in: transaction */ - -/* The lock system */ -extern lock_sys_t* lock_sys; - -/* Lock modes and types */ -/* Basic modes */ -#define LOCK_NONE 0 /* this flag is used elsewhere to note - consistent read */ -#define LOCK_IS 2 /* intention shared */ -#define LOCK_IX 3 /* intention exclusive */ -#define LOCK_S 4 /* shared */ -#define LOCK_X 5 /* exclusive */ -#define LOCK_AUTO_INC 6 /* locks the auto-inc counter of a table - in an exclusive mode */ -#define LOCK_MODE_MASK 0xFUL /* mask used to extract mode from the - type_mode field in a lock */ -/* Lock types */ -#define LOCK_TABLE 16 /* these type values should be so high that */ -#define LOCK_REC 32 /* they can be ORed to the lock mode */ -#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the - type_mode field in a lock */ -/* Waiting lock flag */ -#define LOCK_WAIT 256 /* this wait bit should be so high that - it can be ORed to the lock mode and type; - when this bit is set, it means that the - lock has not yet been granted, it is just - waiting for its turn in the wait queue */ -/* Precise modes */ -#define LOCK_ORDINARY 0 /* this flag denotes an ordinary next-key lock - in contrast to LOCK_GAP or LOCK_REC_NOT_GAP */ -#define LOCK_GAP 512 /* this gap bit should be so high that - it can be ORed to the other flags; - when this bit is set, it means that the - lock holds only on the gap before the record; - for instance, an x-lock on the gap does not - give permission to modify the record on which - the bit is set; locks of this type are created - when records are removed from the index chain - of records */ -#define LOCK_REC_NOT_GAP 1024 /* this bit means that the lock is only on - the index record and does NOT block inserts - to the gap before the index record; this is - used in the case when we retrieve a record - with a unique key, and is also used in - locking plain SELECTs (not part of UPDATE - or DELETE) when the user has set the READ - COMMITTED isolation level */ -#define LOCK_INSERT_INTENTION 2048 /* this bit is set when we place a waiting - gap type record lock request in order to let - an insert of an index record to wait until - there are no conflicting locks by other - transactions on the gap; note that this flag - remains set when the waiting lock is granted, - or if the lock is inherited to a neighboring - record */ - -/* When lock bits are reset, the following flags are available: */ -#define LOCK_RELEASE_WAIT 1 -#define LOCK_NOT_RELEASE_WAIT 2 - -/* Lock operation struct */ -typedef struct lock_op_struct lock_op_t; -struct lock_op_struct{ - dict_table_t* table; /* table to be locked */ - ulint mode; /* lock mode */ -}; - -#define LOCK_OP_START 1 -#define LOCK_OP_COMPLETE 2 - -/* The lock system struct */ -struct lock_sys_struct{ - hash_table_t* rec_hash; /* hash table of the record locks */ -}; - -/* The lock system */ -extern lock_sys_t* lock_sys; - - -#ifndef UNIV_NONINL -#include "lock0lock.ic" -#endif - -#endif diff --git a/storage/innobase/include/lock0lock.ic b/storage/innobase/include/lock0lock.ic deleted file mode 100644 index 311623b190b..00000000000 --- a/storage/innobase/include/lock0lock.ic +++ /dev/null @@ -1,81 +0,0 @@ -/****************************************************** -The transaction lock system - -(c) 1996 Innobase Oy - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#include "srv0srv.h" -#include "dict0dict.h" -#include "row0row.h" -#include "trx0sys.h" -#include "trx0trx.h" -#include "buf0buf.h" -#include "page0page.h" -#include "page0cur.h" -#include "row0vers.h" -#include "que0que.h" -#include "btr0cur.h" -#include "read0read.h" -#include "log0recv.h" - -/************************************************************************* -Calculates the fold value of a page file address: used in inserting or -searching for a lock in the hash table. */ -UNIV_INLINE -ulint -lock_rec_fold( -/*==========*/ - /* out: folded value */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - return(ut_fold_ulint_pair(space, page_no)); -} - -/************************************************************************* -Calculates the hash value of a page file address: used in inserting or -searching for a lock in the hash table. */ -UNIV_INLINE -ulint -lock_rec_hash( -/*==========*/ - /* out: hashed value */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - return(hash_calc_hash(lock_rec_fold(space, page_no), - lock_sys->rec_hash)); -} - -/************************************************************************* -Checks if some transaction has an implicit x-lock on a record in a clustered -index. */ -UNIV_INLINE -trx_t* -lock_clust_rec_some_has_impl( -/*=========================*/ - /* out: transaction which has the x-lock, or - NULL */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - dulint trx_id; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(page_rec_is_user_rec(rec)); - - trx_id = row_get_rec_trx_id(rec, index, offsets); - - if (trx_is_active(trx_id)) { - /* The modifying or inserting transaction is active */ - - return(trx_get_on_id(trx_id)); - } - - return(NULL); -} diff --git a/storage/innobase/include/lock0priv.h b/storage/innobase/include/lock0priv.h deleted file mode 100644 index 7703a2b7def..00000000000 --- a/storage/innobase/include/lock0priv.h +++ /dev/null @@ -1,101 +0,0 @@ -/****************************************************** -Lock module internal structures and methods. - -(c) 2007 Innobase Oy - -Created July 12, 2007 Vasil Dimov -*******************************************************/ - -#ifndef lock0priv_h -#define lock0priv_h - -#ifndef LOCK_MODULE_IMPLEMENTATION -/* If you need to access members of the structures defined in this -file, please write appropriate functions that retrieve them and put -those functions in lock/ */ -#error Do not include lock0priv.h outside of the lock/ module -#endif - -#include "univ.i" -#include "dict0types.h" -#include "hash0hash.h" -#include "trx0types.h" -#include "ut0lst.h" - -/* A table lock */ -typedef struct lock_table_struct lock_table_t; -struct lock_table_struct { - dict_table_t* table; /* database table in dictionary - cache */ - UT_LIST_NODE_T(lock_t) - locks; /* list of locks on the same - table */ -}; - -/* Record lock for a page */ -typedef struct lock_rec_struct lock_rec_t; -struct lock_rec_struct { - ulint space; /* space id */ - ulint page_no; /* page number */ - ulint n_bits; /* number of bits in the lock - bitmap; NOTE: the lock bitmap is - placed immediately after the - lock struct */ -}; - -/* Lock struct */ -struct lock_struct { - trx_t* trx; /* transaction owning the - lock */ - UT_LIST_NODE_T(lock_t) - trx_locks; /* list of the locks of the - transaction */ - ulint type_mode; /* lock type, mode, LOCK_GAP or - LOCK_REC_NOT_GAP, - LOCK_INSERT_INTENTION, - wait flag, ORed */ - hash_node_t hash; /* hash chain node for a record - lock */ - dict_index_t* index; /* index for a record lock */ - union { - lock_table_t tab_lock;/* table lock */ - lock_rec_t rec_lock;/* record lock */ - } un_member; -}; - -/************************************************************************* -Gets the type of a lock. */ -UNIV_INLINE -ulint -lock_get_type( -/*==========*/ - /* out: LOCK_TABLE or LOCK_REC */ - const lock_t* lock); /* in: lock */ - -/************************************************************************** -Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, -if none found. */ - -ulint -lock_rec_find_set_bit( -/*==================*/ - /* out: bit index == heap number of the record, or - ULINT_UNDEFINED if none found */ - lock_t* lock); /* in: record lock with at least one bit set */ - -/************************************************************************* -Gets the previous record lock set on a record. */ - -lock_t* -lock_rec_get_prev( -/*==============*/ - /* out: previous lock on the same record, NULL if - none exists */ - lock_t* in_lock,/* in: record lock */ - ulint heap_no);/* in: heap number of the record */ - -#ifndef UNIV_NONINL -#include "lock0priv.ic" -#endif - -#endif /* lock0priv_h */ diff --git a/storage/innobase/include/lock0priv.ic b/storage/innobase/include/lock0priv.ic deleted file mode 100644 index 4bc8397509d..00000000000 --- a/storage/innobase/include/lock0priv.ic +++ /dev/null @@ -1,32 +0,0 @@ -/****************************************************** -Lock module internal inline methods. - -(c) 2007 Innobase Oy - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -/* This file contains only methods which are used in -lock/lock0* files, other than lock/lock0lock.c. -I.e. lock/lock0lock.c contains more internal inline -methods but they are used only in that file. */ - -#ifndef LOCK_MODULE_IMPLEMENTATION -#error Do not include lock0priv.ic outside of the lock/ module -#endif - -/************************************************************************* -Gets the type of a lock. */ -UNIV_INLINE -ulint -lock_get_type( -/*==========*/ - /* out: LOCK_TABLE or LOCK_REC */ - const lock_t* lock) /* in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_TYPE_MASK); -} - -/* vim: set filetype=c: */ diff --git a/storage/innobase/include/lock0types.h b/storage/innobase/include/lock0types.h deleted file mode 100644 index 43fd2d60da5..00000000000 --- a/storage/innobase/include/lock0types.h +++ /dev/null @@ -1,16 +0,0 @@ -/****************************************************** -The transaction lock system global types - -(c) 1996 Innobase Oy - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#ifndef lock0types_h -#define lock0types_h - -#define lock_t ib_lock_t -typedef struct lock_struct lock_t; -typedef struct lock_sys_struct lock_sys_t; - -#endif diff --git a/storage/innobase/include/log0log.h b/storage/innobase/include/log0log.h deleted file mode 100644 index 337b9f1e783..00000000000 --- a/storage/innobase/include/log0log.h +++ /dev/null @@ -1,872 +0,0 @@ -/****************************************************** -Database log - -(c) 1995 Innobase Oy - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#ifndef log0log_h -#define log0log_h - -#include "univ.i" -#include "ut0byte.h" -#include "sync0sync.h" -#include "sync0rw.h" - -typedef struct log_struct log_t; -typedef struct log_group_struct log_group_t; - -#ifdef UNIV_DEBUG -extern ibool log_do_write; -extern ibool log_debug_writes; -#else /* UNIV_DEBUG */ -# define log_do_write TRUE -#endif /* UNIV_DEBUG */ - -/* Wait modes for log_write_up_to */ -#define LOG_NO_WAIT 91 -#define LOG_WAIT_ONE_GROUP 92 -#define LOG_WAIT_ALL_GROUPS 93 -#define LOG_MAX_N_GROUPS 32 - -/******************************************************************** -Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint, -so that we know that the limit has been written to a log checkpoint field -on disk. */ - -void -log_fsp_current_free_limit_set_and_checkpoint( -/*==========================================*/ - ulint limit); /* in: limit to set */ -/*********************************************************************** -Calculates where in log files we find a specified lsn. */ - -ulint -log_calc_where_lsn_is( -/*==================*/ - /* out: log file number */ - ib_longlong* log_file_offset, /* out: offset in that file - (including the header) */ - dulint first_header_lsn, /* in: first log file start - lsn */ - dulint lsn, /* in: lsn whose position to - determine */ - ulint n_log_files, /* in: total number of log - files */ - ib_longlong log_file_size); /* in: log file size - (including the header) */ -/**************************************************************** -Writes to the log the string given. The log must be released with -log_release. */ -UNIV_INLINE -dulint -log_reserve_and_write_fast( -/*=======================*/ - /* out: end lsn of the log record, ut_dulint_zero if - did not succeed */ - byte* str, /* in: string */ - ulint len, /* in: string length */ - dulint* start_lsn,/* out: start lsn of the log record */ - ibool* success);/* out: TRUE if success */ -/*************************************************************************** -Releases the log mutex. */ -UNIV_INLINE -void -log_release(void); -/*=============*/ -/*************************************************************************** -Checks if there is need for a log buffer flush or a new checkpoint, and does -this if yes. Any database operation should call this when it has modified -more than about 4 pages. NOTE that this function may only be called when the -OS thread owns no synchronization objects except the dictionary mutex. */ -UNIV_INLINE -void -log_free_check(void); -/*================*/ -/**************************************************************** -Opens the log for log_write_low. The log must be closed with log_close and -released with log_release. */ - -dulint -log_reserve_and_open( -/*=================*/ - /* out: start lsn of the log record */ - ulint len); /* in: length of data to be catenated */ -/**************************************************************** -Writes to the log the string given. It is assumed that the caller holds the -log mutex. */ - -void -log_write_low( -/*==========*/ - byte* str, /* in: string */ - ulint str_len); /* in: string length */ -/**************************************************************** -Closes the log. */ - -dulint -log_close(void); -/*===========*/ - /* out: lsn */ -/**************************************************************** -Gets the current lsn. */ -UNIV_INLINE -dulint -log_get_lsn(void); -/*=============*/ - /* out: current lsn */ -/********************************************************** -Initializes the log. */ - -void -log_init(void); -/*==========*/ -/********************************************************************** -Inits a log group to the log system. */ - -void -log_group_init( -/*===========*/ - ulint id, /* in: group id */ - ulint n_files, /* in: number of log files */ - ulint file_size, /* in: log file size in bytes */ - ulint space_id, /* in: space id of the file space - which contains the log files of this - group */ - ulint archive_space_id); /* in: space id of the file space - which contains some archived log - files for this group; currently, only - for the first log group this is - used */ -/********************************************************** -Completes an i/o to a log file. */ - -void -log_io_complete( -/*============*/ - log_group_t* group); /* in: log group */ -/********************************************************** -This function is called, e.g., when a transaction wants to commit. It checks -that the log has been written to the log file up to the last log entry written -by the transaction. If there is a flush running, it waits and checks if the -flush flushed enough. If not, starts a new flush. */ - -void -log_write_up_to( -/*============*/ - dulint lsn, /* in: log sequence number up to which the log should - be written, ut_dulint_max if not specified */ - ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, - or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk); - /* in: TRUE if we want the written log also to be - flushed to disk */ -/******************************************************************** -Does a syncronous flush of the log buffer to disk. */ - -void -log_buffer_flush_to_disk(void); -/*==========================*/ -/******************************************************************** -Flushes the log buffer. Forces it to disk depending on the value of -the configuration parameter innodb_flush_log_at_trx_commit. */ - -void -log_buffer_flush_maybe_sync(void); -/*==========================*/ -/******************************************************************** -Advances the smallest lsn for which there are unflushed dirty blocks in the -buffer pool and also may make a new checkpoint. NOTE: this function may only -be called if the calling thread owns no synchronization objects! */ - -ibool -log_preflush_pool_modified_pages( -/*=============================*/ - /* out: FALSE if there was a flush batch of - the same type running, which means that we - could not start this flush batch */ - dulint new_oldest, /* in: try to advance oldest_modified_lsn - at least to this lsn */ - ibool sync); /* in: TRUE if synchronous operation is - desired */ -/********************************************************** -Makes a checkpoint. Note that this function does not flush dirty -blocks from the buffer pool: it only checks what is lsn of the oldest -modification in the pool, and writes information about the lsn in -log files. Use log_make_checkpoint_at to flush also the pool. */ - -ibool -log_checkpoint( -/*===========*/ - /* out: TRUE if success, FALSE if a checkpoint - write was already running */ - ibool sync, /* in: TRUE if synchronous operation is - desired */ - ibool write_always); /* in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ -/******************************************************************** -Makes a checkpoint at a given lsn or later. */ - -void -log_make_checkpoint_at( -/*===================*/ - dulint lsn, /* in: make a checkpoint at this or a later - lsn, if ut_dulint_max, makes a checkpoint at - the latest lsn */ - ibool write_always); /* in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ -/******************************************************************** -Makes a checkpoint at the latest lsn and writes it to first page of each -data file in the database, so that we know that the file spaces contain -all modifications up to that lsn. This can only be called at database -shutdown. This function also writes all log in log files to the log archive. */ - -void -logs_empty_and_mark_files_at_shutdown(void); -/*=======================================*/ -/********************************************************** -Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ - -void -log_group_read_checkpoint_info( -/*===========================*/ - log_group_t* group, /* in: log group */ - ulint field); /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ -/*********************************************************************** -Gets info from a checkpoint about a log group. */ - -void -log_checkpoint_get_nth_group_info( -/*==============================*/ - byte* buf, /* in: buffer containing checkpoint info */ - ulint n, /* in: nth slot */ - ulint* file_no,/* out: archived file number */ - ulint* offset);/* out: archived file offset */ -/********************************************************** -Writes checkpoint info to groups. */ - -void -log_groups_write_checkpoint_info(void); -/*==================================*/ -/********************************************************** -Writes info to a buffer of a log group when log files are created in -backup restoration. */ - -void -log_reset_first_header_and_checkpoint( -/*==================================*/ - byte* hdr_buf,/* in: buffer which will be written to the start - of the first log file */ - dulint start); /* in: lsn of the start of the first log file; - we pretend that there is a checkpoint at - start + LOG_BLOCK_HDR_SIZE */ -/************************************************************************ -Starts an archiving operation. */ - -ibool -log_archive_do( -/*===========*/ - /* out: TRUE if succeed, FALSE if an archiving - operation was already running */ - ibool sync, /* in: TRUE if synchronous operation is desired */ - ulint* n_bytes);/* out: archive log buffer size, 0 if nothing to - archive */ -/******************************************************************** -Writes the log contents to the archive up to the lsn when this function was -called, and stops the archiving. When archiving is started again, the archived -log file numbers start from a number one higher, so that the archiving will -not write again to the archived log files which exist when this function -returns. */ - -ulint -log_archive_stop(void); -/*==================*/ - /* out: DB_SUCCESS or DB_ERROR */ -/******************************************************************** -Starts again archiving which has been stopped. */ - -ulint -log_archive_start(void); -/*===================*/ - /* out: DB_SUCCESS or DB_ERROR */ -/******************************************************************** -Stop archiving the log so that a gap may occur in the archived log files. */ - -ulint -log_archive_noarchivelog(void); -/*==========================*/ - /* out: DB_SUCCESS or DB_ERROR */ -/******************************************************************** -Start archiving the log so that a gap may occur in the archived log files. */ - -ulint -log_archive_archivelog(void); -/*========================*/ - /* out: DB_SUCCESS or DB_ERROR */ -/********************************************************** -Generates an archived log file name. */ - -void -log_archived_file_name_gen( -/*=======================*/ - char* buf, /* in: buffer where to write */ - ulint id, /* in: group id */ - ulint file_no);/* in: file number */ -/************************************************************************ -Checks that there is enough free space in the log to start a new query step. -Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this -function may only be called if the calling thread owns no synchronization -objects! */ - -void -log_check_margins(void); -/*===================*/ -/********************************************************** -Reads a specified log segment to a buffer. */ - -void -log_group_read_log_seg( -/*===================*/ - ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */ - byte* buf, /* in: buffer where to read */ - log_group_t* group, /* in: log group */ - dulint start_lsn, /* in: read area start */ - dulint end_lsn); /* in: read area end */ -/********************************************************** -Writes a buffer to a log file group. */ - -void -log_group_write_buf( -/*================*/ - log_group_t* group, /* in: log group */ - byte* buf, /* in: buffer */ - ulint len, /* in: buffer len; must be divisible - by OS_FILE_LOG_BLOCK_SIZE */ - dulint start_lsn, /* in: start lsn of the buffer; must - be divisible by - OS_FILE_LOG_BLOCK_SIZE */ - ulint new_data_offset);/* in: start offset of new data in - buf: this parameter is used to decide - if we have to write a new log file - header */ -/************************************************************ -Sets the field values in group to correspond to a given lsn. For this function -to work, the values must already be correctly initialized to correspond to -some lsn, for instance, a checkpoint lsn. */ - -void -log_group_set_fields( -/*=================*/ - log_group_t* group, /* in: group */ - dulint lsn); /* in: lsn for which the values should be - set */ -/********************************************************** -Calculates the data capacity of a log group, when the log file headers are not -included. */ - -ulint -log_group_get_capacity( -/*===================*/ - /* out: capacity in bytes */ - log_group_t* group); /* in: log group */ -/**************************************************************** -Gets a log block flush bit. */ -UNIV_INLINE -ibool -log_block_get_flush_bit( -/*====================*/ - /* out: TRUE if this block was the first - to be written in a log flush */ - byte* log_block); /* in: log block */ -/**************************************************************** -Gets a log block number stored in the header. */ -UNIV_INLINE -ulint -log_block_get_hdr_no( -/*=================*/ - /* out: log block number stored in the block - header */ - byte* log_block); /* in: log block */ -/**************************************************************** -Gets a log block data length. */ -UNIV_INLINE -ulint -log_block_get_data_len( -/*===================*/ - /* out: log block data length measured as a - byte offset from the block start */ - byte* log_block); /* in: log block */ -/**************************************************************** -Sets the log block data length. */ -UNIV_INLINE -void -log_block_set_data_len( -/*===================*/ - byte* log_block, /* in: log block */ - ulint len); /* in: data length */ -/**************************************************************** -Calculates the checksum for a log block. */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - /* out: checksum */ - byte* block); /* in: log block */ -/**************************************************************** -Gets a log block checksum field value. */ -UNIV_INLINE -ulint -log_block_get_checksum( -/*===================*/ - /* out: checksum */ - byte* log_block); /* in: log block */ -/**************************************************************** -Sets a log block checksum field value. */ -UNIV_INLINE -void -log_block_set_checksum( -/*===================*/ - byte* log_block, /* in: log block */ - ulint checksum); /* in: checksum */ -/**************************************************************** -Gets a log block first mtr log record group offset. */ -UNIV_INLINE -ulint -log_block_get_first_rec_group( -/*==========================*/ - /* out: first mtr log record group byte offset - from the block start, 0 if none */ - byte* log_block); /* in: log block */ -/**************************************************************** -Sets the log block first mtr log record group offset. */ -UNIV_INLINE -void -log_block_set_first_rec_group( -/*==========================*/ - byte* log_block, /* in: log block */ - ulint offset); /* in: offset, 0 if none */ -/**************************************************************** -Gets a log block checkpoint number field (4 lowest bytes). */ -UNIV_INLINE -ulint -log_block_get_checkpoint_no( -/*========================*/ - /* out: checkpoint no (4 lowest bytes) */ - byte* log_block); /* in: log block */ -/**************************************************************** -Initializes a log block in the log buffer. */ -UNIV_INLINE -void -log_block_init( -/*===========*/ - byte* log_block, /* in: pointer to the log buffer */ - dulint lsn); /* in: lsn within the log block */ -/**************************************************************** -Initializes a log block in the log buffer in the old, < 3.23.52 format, where -there was no checksum yet. */ -UNIV_INLINE -void -log_block_init_in_old_format( -/*=========================*/ - byte* log_block, /* in: pointer to the log buffer */ - dulint lsn); /* in: lsn within the log block */ -/**************************************************************** -Converts a lsn to a log block number. */ -UNIV_INLINE -ulint -log_block_convert_lsn_to_no( -/*========================*/ - /* out: log block number, it is > 0 and <= 1G */ - dulint lsn); /* in: lsn of a byte within the block */ -/********************************************************** -Prints info of the log. */ - -void -log_print( -/*======*/ - FILE* file); /* in: file where to print */ -/********************************************************** -Peeks the current lsn. */ - -ibool -log_peek_lsn( -/*=========*/ - /* out: TRUE if success, FALSE if could not get the - log system mutex */ - dulint* lsn); /* out: if returns TRUE, current lsn is here */ -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -log_refresh_stats(void); -/*===================*/ - -extern log_t* log_sys; - -/* Values used as flags */ -#define LOG_FLUSH 7652559 -#define LOG_CHECKPOINT 78656949 -#define LOG_ARCHIVE 11122331 -#define LOG_RECOVER 98887331 - -/* The counting of lsn's starts from this value: this must be non-zero */ -#define LOG_START_LSN ut_dulint_create(0, 16 * OS_FILE_LOG_BLOCK_SIZE) - -#define LOG_BUFFER_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE) -#define LOG_ARCHIVE_BUF_SIZE (srv_log_buffer_size * UNIV_PAGE_SIZE / 4) - -/* Offsets of a log block header */ -#define LOG_BLOCK_HDR_NO 0 /* block number which must be > 0 and - is allowed to wrap around at 2G; the - highest bit is set to 1 if this is the - first log block in a log flush write - segment */ -#define LOG_BLOCK_FLUSH_BIT_MASK 0x80000000UL - /* mask used to get the highest bit in - the preceding field */ -#define LOG_BLOCK_HDR_DATA_LEN 4 /* number of bytes of log written to - this block */ -#define LOG_BLOCK_FIRST_REC_GROUP 6 /* offset of the first start of an - mtr log record group in this log block, - 0 if none; if the value is the same - as LOG_BLOCK_HDR_DATA_LEN, it means - that the first rec group has not yet - been catenated to this log block, but - if it will, it will start at this - offset; an archive recovery can - start parsing the log records starting - from this offset in this log block, - if value not 0 */ -#define LOG_BLOCK_CHECKPOINT_NO 8 /* 4 lower bytes of the value of - log_sys->next_checkpoint_no when the - log block was last written to: if the - block has not yet been written full, - this value is only updated before a - log buffer flush */ -#define LOG_BLOCK_HDR_SIZE 12 /* size of the log block header in - bytes */ - -/* Offsets of a log block trailer from the end of the block */ -#define LOG_BLOCK_CHECKSUM 4 /* 4 byte checksum of the log block - contents; in InnoDB versions - < 3.23.52 this did not contain the - checksum but the same value as - .._HDR_NO */ -#define LOG_BLOCK_TRL_SIZE 4 /* trailer size in bytes */ - -/* Offsets for a checkpoint field */ -#define LOG_CHECKPOINT_NO 0 -#define LOG_CHECKPOINT_LSN 8 -#define LOG_CHECKPOINT_OFFSET 16 -#define LOG_CHECKPOINT_LOG_BUF_SIZE 20 -#define LOG_CHECKPOINT_ARCHIVED_LSN 24 -#define LOG_CHECKPOINT_GROUP_ARRAY 32 - -/* For each value < LOG_MAX_N_GROUPS the following 8 bytes: */ - -#define LOG_CHECKPOINT_ARCHIVED_FILE_NO 0 -#define LOG_CHECKPOINT_ARCHIVED_OFFSET 4 - -#define LOG_CHECKPOINT_ARRAY_END (LOG_CHECKPOINT_GROUP_ARRAY\ - + LOG_MAX_N_GROUPS * 8) -#define LOG_CHECKPOINT_CHECKSUM_1 LOG_CHECKPOINT_ARRAY_END -#define LOG_CHECKPOINT_CHECKSUM_2 (4 + LOG_CHECKPOINT_ARRAY_END) -#define LOG_CHECKPOINT_FSP_FREE_LIMIT (8 + LOG_CHECKPOINT_ARRAY_END) - /* current fsp free limit in - tablespace 0, in units of one - megabyte; this information is only used - by ibbackup to decide if it can - truncate unused ends of - non-auto-extending data files in space - 0 */ -#define LOG_CHECKPOINT_FSP_MAGIC_N (12 + LOG_CHECKPOINT_ARRAY_END) - /* this magic number tells if the - checkpoint contains the above field: - the field was added to - InnoDB-3.23.50 */ -#define LOG_CHECKPOINT_SIZE (16 + LOG_CHECKPOINT_ARRAY_END) - -#define LOG_CHECKPOINT_FSP_MAGIC_N_VAL 1441231243 - -/* Offsets of a log file header */ -#define LOG_GROUP_ID 0 /* log group number */ -#define LOG_FILE_START_LSN 4 /* lsn of the start of data in this - log file */ -#define LOG_FILE_NO 12 /* 4-byte archived log file number; - this field is only defined in an - archived log file */ -#define LOG_FILE_WAS_CREATED_BY_HOT_BACKUP 16 - /* a 32-byte field which contains - the string 'ibbackup' and the - creation time if the log file was - created by ibbackup --restore; - when mysqld is first time started - on the restored database, it can - print helpful info for the user */ -#define LOG_FILE_ARCH_COMPLETED OS_FILE_LOG_BLOCK_SIZE - /* this 4-byte field is TRUE when - the writing of an archived log file - has been completed; this field is - only defined in an archived log file */ -#define LOG_FILE_END_LSN (OS_FILE_LOG_BLOCK_SIZE + 4) - /* lsn where the archived log file - at least extends: actually the - archived log file may extend to a - later lsn, as long as it is within the - same log block as this lsn; this field - is defined only when an archived log - file has been completely written */ -#define LOG_CHECKPOINT_1 OS_FILE_LOG_BLOCK_SIZE - /* first checkpoint field in the log - header; we write alternately to the - checkpoint fields when we make new - checkpoints; this field is only defined - in the first log file of a log group */ -#define LOG_CHECKPOINT_2 (3 * OS_FILE_LOG_BLOCK_SIZE) - /* second checkpoint field in the log - header */ -#define LOG_FILE_HDR_SIZE (4 * OS_FILE_LOG_BLOCK_SIZE) - -#define LOG_GROUP_OK 301 -#define LOG_GROUP_CORRUPTED 302 - -/* Log group consists of a number of log files, each of the same size; a log -group is implemented as a space in the sense of the module fil0fil. */ - -struct log_group_struct{ - /* The following fields are protected by log_sys->mutex */ - ulint id; /* log group id */ - ulint n_files; /* number of files in the group */ - ulint file_size; /* individual log file size in bytes, - including the log file header */ - ulint space_id; /* file space which implements the log - group */ - ulint state; /* LOG_GROUP_OK or - LOG_GROUP_CORRUPTED */ - dulint lsn; /* lsn used to fix coordinates within - the log group */ - ulint lsn_offset; /* the offset of the above lsn */ - ulint n_pending_writes;/* number of currently pending flush - writes for this log group */ - byte** file_header_bufs;/* buffers for each file header in the - group */ - /*-----------------------------*/ - byte** archive_file_header_bufs;/* buffers for each file - header in the group */ - ulint archive_space_id;/* file space which implements the log - group archive */ - ulint archived_file_no;/* file number corresponding to - log_sys->archived_lsn */ - ulint archived_offset;/* file offset corresponding to - log_sys->archived_lsn, 0 if we have - not yet written to the archive file - number archived_file_no */ - ulint next_archived_file_no;/* during an archive write, - until the write is completed, we - store the next value for - archived_file_no here: the write - completion function then sets the new - value to ..._file_no */ - ulint next_archived_offset; /* like the preceding field */ - /*-----------------------------*/ - dulint scanned_lsn; /* used only in recovery: recovery scan - succeeded up to this lsn in this log - group */ - byte* checkpoint_buf; /* checkpoint header is written from - this buffer to the group */ - UT_LIST_NODE_T(log_group_t) - log_groups; /* list of log groups */ -}; - -struct log_struct{ - byte pad[64]; /* padding to prevent other memory - update hotspots from residing on the - same memory cache line */ - dulint lsn; /* log sequence number */ - ulint buf_free; /* first free offset within the log - buffer */ - mutex_t mutex; /* mutex protecting the log */ - byte* buf; /* log buffer */ - ulint buf_size; /* log buffer size in bytes */ - ulint max_buf_free; /* recommended maximum value of - buf_free, after which the buffer is - flushed */ - ulint old_buf_free; /* value of buf free when log was - last time opened; only in the debug - version */ - dulint old_lsn; /* value of lsn when log was last time - opened; only in the debug version */ - ibool check_flush_or_checkpoint; - /* this is set to TRUE when there may - be need to flush the log buffer, or - preflush buffer pool pages, or make - a checkpoint; this MUST be TRUE when - lsn - last_checkpoint_lsn > - max_checkpoint_age; this flag is - peeked at by log_free_check(), which - does not reserve the log mutex */ - UT_LIST_BASE_NODE_T(log_group_t) - log_groups; /* log groups */ - - /* The fields involved in the log buffer flush */ - - ulint buf_next_to_write;/* first offset in the log buffer - where the byte content may not exist - written to file, e.g., the start - offset of a log record catenated - later; this is advanced when a flush - operation is completed to all the log - groups */ - dulint written_to_some_lsn; - /* first log sequence number not yet - written to any log group; for this to - be advanced, it is enough that the - write i/o has been completed for any - one log group */ - dulint written_to_all_lsn; - /* first log sequence number not yet - written to some log group; for this to - be advanced, it is enough that the - write i/o has been completed for all - log groups */ - dulint write_lsn; /* end lsn for the current running - write */ - ulint write_end_offset;/* the data in buffer has been written - up to this offset when the current - write ends: this field will then - be copied to buf_next_to_write */ - dulint current_flush_lsn;/* end lsn for the current running - write + flush operation */ - dulint flushed_to_disk_lsn; - /* how far we have written the log - AND flushed to disk */ - ulint n_pending_writes;/* number of currently pending flushes - or writes */ - /* NOTE on the 'flush' in names of the fields below: starting from - 4.0.14, we separate the write of the log file and the actual fsync() - or other method to flush it to disk. The names below shhould really - be 'flush_or_write'! */ - os_event_t no_flush_event; /* this event is in the reset state - when a flush or a write is running; - a thread should wait for this without - owning the log mutex, but NOTE that - to set or reset this event, the - thread MUST own the log mutex! */ - ibool one_flushed; /* during a flush, this is first FALSE - and becomes TRUE when one log group - has been written or flushed */ - os_event_t one_flushed_event;/* this event is reset when the - flush or write has not yet completed - for any log group; e.g., this means - that a transaction has been committed - when this is set; a thread should wait - for this without owning the log mutex, - but NOTE that to set or reset this - event, the thread MUST own the log - mutex! */ - ulint n_log_ios; /* number of log i/os initiated thus - far */ - ulint n_log_ios_old; /* number of log i/o's at the - previous printout */ - time_t last_printout_time;/* when log_print was last time - called */ - - /* Fields involved in checkpoints */ - ulint log_group_capacity; /* capacity of the log group; if - the checkpoint age exceeds this, it is - a serious error because it is possible - we will then overwrite log and spoil - crash recovery */ - ulint max_modified_age_async; - /* when this recommended value for lsn - - buf_pool_get_oldest_modification() - is exceeded, we start an asynchronous - preflush of pool pages */ - ulint max_modified_age_sync; - /* when this recommended value for lsn - - buf_pool_get_oldest_modification() - is exceeded, we start a synchronous - preflush of pool pages */ - ulint adm_checkpoint_interval; - /* administrator-specified checkpoint - interval in terms of log growth in - bytes; the interval actually used by - the database can be smaller */ - ulint max_checkpoint_age_async; - /* when this checkpoint age is exceeded - we start an asynchronous writing of a - new checkpoint */ - ulint max_checkpoint_age; - /* this is the maximum allowed value - for lsn - last_checkpoint_lsn when a - new query step is started */ - dulint next_checkpoint_no; - /* next checkpoint number */ - dulint last_checkpoint_lsn; - /* latest checkpoint lsn */ - dulint next_checkpoint_lsn; - /* next checkpoint lsn */ - ulint n_pending_checkpoint_writes; - /* number of currently pending - checkpoint writes */ - rw_lock_t checkpoint_lock;/* this latch is x-locked when a - checkpoint write is running; a thread - should wait for this without owning - the log mutex */ - byte* checkpoint_buf; /* checkpoint header is read to this - buffer */ - /* Fields involved in archiving */ - ulint archiving_state;/* LOG_ARCH_ON, LOG_ARCH_STOPPING - LOG_ARCH_STOPPED, LOG_ARCH_OFF */ - dulint archived_lsn; /* archiving has advanced to this - lsn */ - ulint max_archived_lsn_age_async; - /* recommended maximum age of - archived_lsn, before we start - asynchronous copying to the archive */ - ulint max_archived_lsn_age; - /* maximum allowed age for - archived_lsn */ - dulint next_archived_lsn;/* during an archive write, - until the write is completed, we - store the next value for - archived_lsn here: the write - completion function then sets the new - value to archived_lsn */ - ulint archiving_phase;/* LOG_ARCHIVE_READ or - LOG_ARCHIVE_WRITE */ - ulint n_pending_archive_ios; - /* number of currently pending reads - or writes in archiving */ - rw_lock_t archive_lock; /* this latch is x-locked when an - archive write is running; a thread - should wait for this without owning - the log mutex */ - ulint archive_buf_size;/* size of archive_buf */ - byte* archive_buf; /* log segment is written to the - archive from this buffer */ - os_event_t archiving_on; /* if archiving has been stopped, - a thread can wait for this event to - become signaled */ -}; - -#define LOG_ARCH_ON 71 -#define LOG_ARCH_STOPPING 72 -#define LOG_ARCH_STOPPING2 73 -#define LOG_ARCH_STOPPED 74 -#define LOG_ARCH_OFF 75 - -#ifndef UNIV_NONINL -#include "log0log.ic" -#endif - -#endif diff --git a/storage/innobase/include/log0log.ic b/storage/innobase/include/log0log.ic deleted file mode 100644 index df0a8baf2d5..00000000000 --- a/storage/innobase/include/log0log.ic +++ /dev/null @@ -1,398 +0,0 @@ -/****************************************************** -Database log - -(c) 1995 Innobase Oy - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#include "os0file.h" -#include "mach0data.h" -#include "mtr0mtr.h" - -/********************************************************** -Checks by parsing that the catenated log segment for a single mtr is -consistent. */ - -ibool -log_check_log_recs( -/*===============*/ - byte* buf, /* in: pointer to the start of the log segment - in the log_sys->buf log buffer */ - ulint len, /* in: segment length in bytes */ - dulint buf_start_lsn); /* in: buffer start lsn */ - -/**************************************************************** -Gets a log block flush bit. */ -UNIV_INLINE -ibool -log_block_get_flush_bit( -/*====================*/ - /* out: TRUE if this block was the first - to be written in a log flush */ - byte* log_block) /* in: log block */ -{ - if (LOG_BLOCK_FLUSH_BIT_MASK - & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)) { - - return(TRUE); - } - - return(FALSE); -} - -/**************************************************************** -Sets the log block flush bit. */ -UNIV_INLINE -void -log_block_set_flush_bit( -/*====================*/ - byte* log_block, /* in: log block */ - ibool val) /* in: value to set */ -{ - ulint field; - - field = mach_read_from_4(log_block + LOG_BLOCK_HDR_NO); - - if (val) { - field = field | LOG_BLOCK_FLUSH_BIT_MASK; - } else { - field = field & ~LOG_BLOCK_FLUSH_BIT_MASK; - } - - mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, field); -} - -/**************************************************************** -Gets a log block number stored in the header. */ -UNIV_INLINE -ulint -log_block_get_hdr_no( -/*=================*/ - /* out: log block number stored in the block - header */ - byte* log_block) /* in: log block */ -{ - return(~LOG_BLOCK_FLUSH_BIT_MASK - & mach_read_from_4(log_block + LOG_BLOCK_HDR_NO)); -} - -/**************************************************************** -Sets the log block number stored in the header; NOTE that this must be set -before the flush bit! */ -UNIV_INLINE -void -log_block_set_hdr_no( -/*=================*/ - byte* log_block, /* in: log block */ - ulint n) /* in: log block number: must be > 0 and - < LOG_BLOCK_FLUSH_BIT_MASK */ -{ - ut_ad(n > 0); - ut_ad(n < LOG_BLOCK_FLUSH_BIT_MASK); - - mach_write_to_4(log_block + LOG_BLOCK_HDR_NO, n); -} - -/**************************************************************** -Gets a log block data length. */ -UNIV_INLINE -ulint -log_block_get_data_len( -/*===================*/ - /* out: log block data length measured as a - byte offset from the block start */ - byte* log_block) /* in: log block */ -{ - return(mach_read_from_2(log_block + LOG_BLOCK_HDR_DATA_LEN)); -} - -/**************************************************************** -Sets the log block data length. */ -UNIV_INLINE -void -log_block_set_data_len( -/*===================*/ - byte* log_block, /* in: log block */ - ulint len) /* in: data length */ -{ - mach_write_to_2(log_block + LOG_BLOCK_HDR_DATA_LEN, len); -} - -/**************************************************************** -Gets a log block first mtr log record group offset. */ -UNIV_INLINE -ulint -log_block_get_first_rec_group( -/*==========================*/ - /* out: first mtr log record group byte offset - from the block start, 0 if none */ - byte* log_block) /* in: log block */ -{ - return(mach_read_from_2(log_block + LOG_BLOCK_FIRST_REC_GROUP)); -} - -/**************************************************************** -Sets the log block first mtr log record group offset. */ -UNIV_INLINE -void -log_block_set_first_rec_group( -/*==========================*/ - byte* log_block, /* in: log block */ - ulint offset) /* in: offset, 0 if none */ -{ - mach_write_to_2(log_block + LOG_BLOCK_FIRST_REC_GROUP, offset); -} - -/**************************************************************** -Gets a log block checkpoint number field (4 lowest bytes). */ -UNIV_INLINE -ulint -log_block_get_checkpoint_no( -/*========================*/ - /* out: checkpoint no (4 lowest bytes) */ - byte* log_block) /* in: log block */ -{ - return(mach_read_from_4(log_block + LOG_BLOCK_CHECKPOINT_NO)); -} - -/**************************************************************** -Sets a log block checkpoint number field (4 lowest bytes). */ -UNIV_INLINE -void -log_block_set_checkpoint_no( -/*========================*/ - byte* log_block, /* in: log block */ - dulint no) /* in: checkpoint no */ -{ - mach_write_to_4(log_block + LOG_BLOCK_CHECKPOINT_NO, - ut_dulint_get_low(no)); -} - -/**************************************************************** -Converts a lsn to a log block number. */ -UNIV_INLINE -ulint -log_block_convert_lsn_to_no( -/*========================*/ - /* out: log block number, it is > 0 and <= 1G */ - dulint lsn) /* in: lsn of a byte within the block */ -{ - ulint no; - - no = ut_dulint_get_low(lsn) / OS_FILE_LOG_BLOCK_SIZE; - no += (ut_dulint_get_high(lsn) % OS_FILE_LOG_BLOCK_SIZE) - * 2 * (0x80000000UL / OS_FILE_LOG_BLOCK_SIZE); - - no = no & 0x3FFFFFFFUL; - - return(no + 1); -} - -/**************************************************************** -Calculates the checksum for a log block. */ -UNIV_INLINE -ulint -log_block_calc_checksum( -/*====================*/ - /* out: checksum */ - byte* block) /* in: log block */ -{ - ulint sum; - ulint sh; - ulint i; - - sum = 1; - sh = 0; - - for (i = 0; i < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; i++) { - sum = sum & 0x7FFFFFFFUL; - sum += (((ulint)(*(block + i))) << sh) + (ulint)(*(block + i)); - sh++; - if (sh > 24) { - sh = 0; - } - } - - return(sum); -} - -/**************************************************************** -Gets a log block checksum field value. */ -UNIV_INLINE -ulint -log_block_get_checksum( -/*===================*/ - /* out: checksum */ - byte* log_block) /* in: log block */ -{ - return(mach_read_from_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM)); -} - -/**************************************************************** -Sets a log block checksum field value. */ -UNIV_INLINE -void -log_block_set_checksum( -/*===================*/ - byte* log_block, /* in: log block */ - ulint checksum) /* in: checksum */ -{ - mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM, - checksum); -} - -/**************************************************************** -Initializes a log block in the log buffer. */ -UNIV_INLINE -void -log_block_init( -/*===========*/ - byte* log_block, /* in: pointer to the log buffer */ - dulint lsn) /* in: lsn within the log block */ -{ - ulint no; - - ut_ad(mutex_own(&(log_sys->mutex))); - - no = log_block_convert_lsn_to_no(lsn); - - log_block_set_hdr_no(log_block, no); - - log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); - log_block_set_first_rec_group(log_block, 0); -} - -/**************************************************************** -Initializes a log block in the log buffer in the old format, where there -was no checksum yet. */ -UNIV_INLINE -void -log_block_init_in_old_format( -/*=========================*/ - byte* log_block, /* in: pointer to the log buffer */ - dulint lsn) /* in: lsn within the log block */ -{ - ulint no; - - ut_ad(mutex_own(&(log_sys->mutex))); - - no = log_block_convert_lsn_to_no(lsn); - - log_block_set_hdr_no(log_block, no); - mach_write_to_4(log_block + OS_FILE_LOG_BLOCK_SIZE - - LOG_BLOCK_CHECKSUM, no); - log_block_set_data_len(log_block, LOG_BLOCK_HDR_SIZE); - log_block_set_first_rec_group(log_block, 0); -} - -/**************************************************************** -Writes to the log the string given. The log must be released with -log_release. */ -UNIV_INLINE -dulint -log_reserve_and_write_fast( -/*=======================*/ - /* out: end lsn of the log record, ut_dulint_zero if - did not succeed */ - byte* str, /* in: string */ - ulint len, /* in: string length */ - dulint* start_lsn,/* out: start lsn of the log record */ - ibool* success)/* out: TRUE if success */ -{ - log_t* log = log_sys; - ulint data_len; - dulint lsn; - - *success = TRUE; - - mutex_enter(&(log->mutex)); - - data_len = len + log->buf_free % OS_FILE_LOG_BLOCK_SIZE; - - if (data_len >= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - - /* The string does not fit within the current log block - or the log block would become full */ - - *success = FALSE; - - mutex_exit(&(log->mutex)); - - return(ut_dulint_zero); - } - - *start_lsn = log->lsn; - - ut_memcpy(log->buf + log->buf_free, str, len); - - log_block_set_data_len(ut_align_down(log->buf + log->buf_free, - OS_FILE_LOG_BLOCK_SIZE), - data_len); -#ifdef UNIV_LOG_DEBUG - log->old_buf_free = log->buf_free; - log->old_lsn = log->lsn; -#endif - log->buf_free += len; - - ut_ad(log->buf_free <= log->buf_size); - - lsn = ut_dulint_add(log->lsn, len); - - log->lsn = lsn; - -#ifdef UNIV_LOG_DEBUG - log_check_log_recs(log->buf + log->old_buf_free, - log->buf_free - log->old_buf_free, log->old_lsn); -#endif - return(lsn); -} - -/*************************************************************************** -Releases the log mutex. */ -UNIV_INLINE -void -log_release(void) -/*=============*/ -{ - mutex_exit(&(log_sys->mutex)); -} - -/**************************************************************** -Gets the current lsn. */ -UNIV_INLINE -dulint -log_get_lsn(void) -/*=============*/ - /* out: current lsn */ -{ - dulint lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - return(lsn); -} - -/*************************************************************************** -Checks if there is need for a log buffer flush or a new checkpoint, and does -this if yes. Any database operation should call this when it has modified -more than about 4 pages. NOTE that this function may only be called when the -OS thread owns no synchronization objects except the dictionary mutex. */ -UNIV_INLINE -void -log_free_check(void) -/*================*/ -{ - /* ut_ad(sync_thread_levels_empty()); */ - - if (log_sys->check_flush_or_checkpoint) { - - log_check_margins(); - } -} diff --git a/storage/innobase/include/log0recv.h b/storage/innobase/include/log0recv.h deleted file mode 100644 index 091bbe34562..00000000000 --- a/storage/innobase/include/log0recv.h +++ /dev/null @@ -1,349 +0,0 @@ -/****************************************************** -Recovery - -(c) 1997 Innobase Oy - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -#ifndef log0recv_h -#define log0recv_h - -#include "univ.i" -#include "ut0byte.h" -#include "page0types.h" -#include "hash0hash.h" -#include "log0log.h" - -#ifdef UNIV_HOTBACKUP -extern ibool recv_replay_file_ops; -#endif /* UNIV_HOTBACKUP */ - -/*********************************************************************** -Reads the checkpoint info needed in hot backup. */ - -ibool -recv_read_cp_info_for_backup( -/*=========================*/ - /* out: TRUE if success */ - byte* hdr, /* in: buffer containing the log group header */ - dulint* lsn, /* out: checkpoint lsn */ - ulint* offset, /* out: checkpoint offset in the log group */ - ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the - database is running with < version 3.23.50 of InnoDB */ - dulint* cp_no, /* out: checkpoint number */ - dulint* first_header_lsn); - /* out: lsn of of the start of the first log file */ -/*********************************************************************** -Scans the log segment and n_bytes_scanned is set to the length of valid -log scanned. */ - -void -recv_scan_log_seg_for_backup( -/*=========================*/ - byte* buf, /* in: buffer containing log data */ - ulint buf_len, /* in: data length in that buffer */ - dulint* scanned_lsn, /* in/out: lsn of buffer start, - we return scanned lsn */ - ulint* scanned_checkpoint_no, - /* in/out: 4 lowest bytes of the - highest scanned checkpoint number so - far */ - ulint* n_bytes_scanned);/* out: how much we were able to - scan, smaller than buf_len if log - data ended here */ -/*********************************************************************** -Returns TRUE if recovery is currently running. */ -UNIV_INLINE -ibool -recv_recovery_is_on(void); -/*=====================*/ -/*********************************************************************** -Returns TRUE if recovery from backup is currently running. */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void); -/*=================================*/ -/**************************************************************************** -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. */ - -void -recv_recover_page( -/*==============*/ - ibool recover_backup, /* in: TRUE if we are recovering a backup - page: then we do not acquire any latches - since the page was read in outside the - buffer pool */ - ibool just_read_in, /* in: TRUE if the i/o-handler calls this for - a freshly read page */ - page_t* page, /* in: buffer page */ - ulint space, /* in: space id */ - ulint page_no); /* in: page number */ -/************************************************************ -Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. */ - -ulint -recv_recovery_from_checkpoint_start( -/*================================*/ - /* out: error code or DB_SUCCESS */ - ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */ - dulint limit_lsn, /* in: recover up to this lsn if possible */ - dulint min_flushed_lsn,/* in: min flushed lsn from data files */ - dulint max_flushed_lsn);/* in: max flushed lsn from data files */ -/************************************************************ -Completes recovery from a checkpoint. */ - -void -recv_recovery_from_checkpoint_finish(void); -/*======================================*/ -/*********************************************************** -Scans log from a buffer and stores new log data to the parsing buffer. Parses -and hashes the log records if new data found. */ - -ibool -recv_scan_log_recs( -/*===============*/ - /* out: TRUE if limit_lsn has been reached, or - not able to scan any more in this log group */ - ibool apply_automatically,/* in: TRUE if we want this function to - apply log records automatically when the - hash table becomes full; in the hot backup tool - the tool does the applying, not this - function */ - ulint available_memory,/* in: we let the hash table of recs to grow - to this size, at the maximum */ - ibool store_to_hash, /* in: TRUE if the records should be stored - to the hash table; this is set to FALSE if just - debug checking is needed */ - byte* buf, /* in: buffer containing a log segment or - garbage */ - ulint len, /* in: buffer length */ - dulint start_lsn, /* in: buffer start lsn */ - dulint* contiguous_lsn, /* in/out: it is known that all log groups - contain contiguous log data up to this lsn */ - dulint* group_scanned_lsn);/* out: scanning succeeded up to this lsn */ -/********************************************************** -Resets the logs. The contents of log files will be lost! */ - -void -recv_reset_logs( -/*============*/ - dulint lsn, /* in: reset to this lsn rounded up to - be divisible by OS_FILE_LOG_BLOCK_SIZE, - after which we add LOG_BLOCK_HDR_SIZE */ -#ifdef UNIV_LOG_ARCHIVE - ulint arch_log_no, /* in: next archived log file number */ -#endif /* UNIV_LOG_ARCHIVE */ - ibool new_logs_created);/* in: TRUE if resetting logs is done - at the log creation; FALSE if it is done - after archive recovery */ -#ifdef UNIV_HOTBACKUP -/********************************************************** -Creates new log files after a backup has been restored. */ - -void -recv_reset_log_files_for_backup( -/*============================*/ - const char* log_dir, /* in: log file directory path */ - ulint n_log_files, /* in: number of log files */ - ulint log_file_size, /* in: log file size */ - dulint lsn); /* in: new start lsn, must be - divisible by OS_FILE_LOG_BLOCK_SIZE */ -#endif /* UNIV_HOTBACKUP */ -/************************************************************ -Creates the recovery system. */ - -void -recv_sys_create(void); -/*=================*/ -/************************************************************ -Inits the recovery system for a recovery operation. */ - -void -recv_sys_init( -/*==========*/ - ibool recover_from_backup, /* in: TRUE if this is called - to recover from a hot backup */ - ulint available_memory); /* in: available memory in bytes */ -/*********************************************************************** -Empties the hash table of stored log records, applying them to appropriate -pages. */ - -void -recv_apply_hashed_log_recs( -/*=======================*/ - ibool allow_ibuf); /* in: if TRUE, also ibuf operations are - allowed during the application; if FALSE, - no ibuf operations are allowed, and after - the application all file pages are flushed to - disk and invalidated in buffer pool: this - alternative means that no new log records - can be generated during the application */ -#ifdef UNIV_HOTBACKUP -/*********************************************************************** -Applies log records in the hash table to a backup. */ - -void -recv_apply_log_recs_for_backup(void); -/*================================*/ -#endif -#ifdef UNIV_LOG_ARCHIVE -/************************************************************ -Recovers from archived log files, and also from log files, if they exist. */ - -ulint -recv_recovery_from_archive_start( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - dulint min_flushed_lsn,/* in: min flushed lsn field from the - data files */ - dulint limit_lsn, /* in: recover up to this lsn if possible */ - ulint first_log_no); /* in: number of the first archived log file - to use in the recovery; the file will be - searched from INNOBASE_LOG_ARCH_DIR specified - in server config file */ -/************************************************************ -Completes recovery from archive. */ - -void -recv_recovery_from_archive_finish(void); -/*===================================*/ -#endif /* UNIV_LOG_ARCHIVE */ - -/* Block of log record data */ -typedef struct recv_data_struct recv_data_t; -struct recv_data_struct{ - recv_data_t* next; /* pointer to the next block or NULL */ - /* the log record data is stored physically - immediately after this struct, max amount - RECV_DATA_BLOCK_SIZE bytes of it */ -}; - -/* Stored log record struct */ -typedef struct recv_struct recv_t; -struct recv_struct{ - byte type; /* log record type */ - ulint len; /* log record body length in bytes */ - recv_data_t* data; /* chain of blocks containing the log record - body */ - dulint start_lsn;/* start lsn of the log segment written by - the mtr which generated this log record: NOTE - that this is not necessarily the start lsn of - this log record */ - dulint end_lsn;/* end lsn of the log segment written by - the mtr which generated this log record: NOTE - that this is not necessarily the end lsn of - this log record */ - UT_LIST_NODE_T(recv_t) - rec_list;/* list of log records for this page */ -}; - -/* Hashed page file address struct */ -typedef struct recv_addr_struct recv_addr_t; -struct recv_addr_struct{ - ulint state; /* RECV_NOT_PROCESSED, RECV_BEING_PROCESSED, - or RECV_PROCESSED */ - ulint space; /* space id */ - ulint page_no;/* page number */ - UT_LIST_BASE_NODE_T(recv_t) - rec_list;/* list of log records for this page */ - hash_node_t addr_hash; -}; - -/* Recovery system data structure */ -typedef struct recv_sys_struct recv_sys_t; -struct recv_sys_struct{ - mutex_t mutex; /* mutex protecting the fields apply_log_recs, - n_addrs, and the state field in each recv_addr - struct */ - ibool apply_log_recs; - /* this is TRUE when log rec application to - pages is allowed; this flag tells the - i/o-handler if it should do log record - application */ - ibool apply_batch_on; - /* this is TRUE when a log rec application - batch is running */ - dulint lsn; /* log sequence number */ - ulint last_log_buf_size; - /* size of the log buffer when the database - last time wrote to the log */ - byte* last_block; - /* possible incomplete last recovered log - block */ - byte* last_block_buf_start; - /* the nonaligned start address of the - preceding buffer */ - byte* buf; /* buffer for parsing log records */ - ulint len; /* amount of data in buf */ - dulint parse_start_lsn; - /* this is the lsn from which we were able to - start parsing log records and adding them to - the hash table; ut_dulint_zero if a suitable - start point not found yet */ - dulint scanned_lsn; - /* the log data has been scanned up to this - lsn */ - ulint scanned_checkpoint_no; - /* the log data has been scanned up to this - checkpoint number (lowest 4 bytes) */ - ulint recovered_offset; - /* start offset of non-parsed log records in - buf */ - dulint recovered_lsn; - /* the log records have been parsed up to - this lsn */ - dulint limit_lsn;/* recovery should be made at most up to this - lsn */ - ibool found_corrupt_log; - /* this is set to TRUE if we during log - scan find a corrupt log block, or a corrupt - log record, or there is a log parsing - buffer overflow */ - log_group_t* archive_group; - /* in archive recovery: the log group whose - archive is read */ - mem_heap_t* heap; /* memory heap of log records and file - addresses*/ - hash_table_t* addr_hash;/* hash table of file addresses of pages */ - ulint n_addrs;/* number of not processed hashed file - addresses in the hash table */ -}; - -extern recv_sys_t* recv_sys; -extern ibool recv_recovery_on; -extern ibool recv_no_ibuf_operations; -extern ibool recv_needed_recovery; - -extern ibool recv_lsn_checks_on; -#ifdef UNIV_HOTBACKUP -extern ibool recv_is_making_a_backup; -#endif /* UNIV_HOTBACKUP */ -extern ulint recv_max_parsed_page_no; - -/* Size of the parsing buffer; it must accommodate RECV_SCAN_SIZE many -times! */ -#define RECV_PARSING_BUF_SIZE (2 * 1024 * 1024) - -/* Size of block reads when the log groups are scanned forward to do a -roll-forward */ -#define RECV_SCAN_SIZE (4 * UNIV_PAGE_SIZE) - -/* States of recv_addr_struct */ -#define RECV_NOT_PROCESSED 71 -#define RECV_BEING_READ 72 -#define RECV_BEING_PROCESSED 73 -#define RECV_PROCESSED 74 - -extern ulint recv_n_pool_free_frames; - -#ifndef UNIV_NONINL -#include "log0recv.ic" -#endif - -#endif diff --git a/storage/innobase/include/log0recv.ic b/storage/innobase/include/log0recv.ic deleted file mode 100644 index 489641bade2..00000000000 --- a/storage/innobase/include/log0recv.ic +++ /dev/null @@ -1,35 +0,0 @@ -/****************************************************** -Recovery - -(c) 1997 Innobase Oy - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#include "mem0mem.h" -#include "log0log.h" -#include "os0file.h" - -extern ibool recv_recovery_from_backup_on; - -/*********************************************************************** -Returns TRUE if recovery is currently running. */ -UNIV_INLINE -ibool -recv_recovery_is_on(void) -/*=====================*/ -{ - return(recv_recovery_on); -} - -/*********************************************************************** -Returns TRUE if recovery from backup is currently running. */ -UNIV_INLINE -ibool -recv_recovery_from_backup_is_on(void) -/*=================================*/ -{ - return(recv_recovery_from_backup_on); -} - diff --git a/storage/innobase/include/mach0data.h b/storage/innobase/include/mach0data.h deleted file mode 100644 index 25b619b3f12..00000000000 --- a/storage/innobase/include/mach0data.h +++ /dev/null @@ -1,345 +0,0 @@ -/********************************************************************** -Utilities for converting data from the database file -to the machine format. - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef mach0data_h -#define mach0data_h - -#include "univ.i" -#include "ut0byte.h" - -/* The data and all fields are always stored in a database file -in the same format: ascii, big-endian, ... . -All data in the files MUST be accessed using the functions in this -module. */ - -/*********************************************************** -The following function is used to store data in one byte. */ -UNIV_INLINE -void -mach_write_to_1( -/*============*/ - byte* b, /* in: pointer to byte where to store */ - ulint n); /* in: ulint integer to be stored, >= 0, < 256 */ -/************************************************************ -The following function is used to fetch data from one byte. */ -UNIV_INLINE -ulint -mach_read_from_1( -/*=============*/ - /* out: ulint integer, >= 0, < 256 */ - byte* b); /* in: pointer to byte */ -/*********************************************************** -The following function is used to store data in two consecutive -bytes. We store the most significant byte to the lower address. */ -UNIV_INLINE -void -mach_write_to_2( -/*============*/ - byte* b, /* in: pointer to two bytes where to store */ - ulint n); /* in: ulint integer to be stored, >= 0, < 64k */ -/************************************************************ -The following function is used to fetch data from two consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_2( -/*=============*/ - /* out: ulint integer, >= 0, < 64k */ - byte* b); /* in: pointer to two bytes */ - -/************************************************************ -The following function is used to convert a 16-bit data item -to the canonical format, for fast bytewise equality test -against memory. */ -UNIV_INLINE -uint16 -mach_encode_2( -/*==========*/ - /* out: 16-bit integer in canonical format */ - ulint n); /* in: integer in machine-dependent format */ -/************************************************************ -The following function is used to convert a 16-bit data item -from the canonical format, for fast bytewise equality test -against memory. */ -UNIV_INLINE -ulint -mach_decode_2( -/*==========*/ - /* out: integer in machine-dependent format */ - uint16 n); /* in: 16-bit integer in canonical format */ -/*********************************************************** -The following function is used to store data in 3 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_3( -/*============*/ - byte* b, /* in: pointer to 3 bytes where to store */ - ulint n); /* in: ulint integer to be stored */ -/************************************************************ -The following function is used to fetch data from 3 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_3( -/*=============*/ - /* out: ulint integer */ - byte* b); /* in: pointer to 3 bytes */ -/*********************************************************** -The following function is used to store data in four consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_4( -/*============*/ - byte* b, /* in: pointer to four bytes where to store */ - ulint n); /* in: ulint integer to be stored */ -/************************************************************ -The following function is used to fetch data from 4 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_4( -/*=============*/ - /* out: ulint integer */ - byte* b); /* in: pointer to four bytes */ -/************************************************************* -Writes a ulint in a compressed form (1..5 bytes). */ -UNIV_INLINE -ulint -mach_write_compressed( -/*==================*/ - /* out: stored size in bytes */ - byte* b, /* in: pointer to memory where to store */ - ulint n); /* in: ulint integer to be stored */ -/************************************************************* -Returns the size of an ulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_get_compressed_size( -/*=====================*/ - /* out: compressed size in bytes */ - ulint n); /* in: ulint integer to be stored */ -/************************************************************* -Reads a ulint in a compressed form. */ -UNIV_INLINE -ulint -mach_read_compressed( -/*=================*/ - /* out: read integer */ - byte* b); /* in: pointer to memory from where to read */ -/*********************************************************** -The following function is used to store data in 6 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_6( -/*============*/ - byte* b, /* in: pointer to 6 bytes where to store */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************ -The following function is used to fetch data from 6 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_6( -/*=============*/ - /* out: dulint integer */ - byte* b); /* in: pointer to 6 bytes */ -/*********************************************************** -The following function is used to store data in 7 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_7( -/*============*/ - byte* b, /* in: pointer to 7 bytes where to store */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************ -The following function is used to fetch data from 7 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_7( -/*=============*/ - /* out: dulint integer */ - byte* b); /* in: pointer to 7 bytes */ -/*********************************************************** -The following function is used to store data in 8 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_8( -/*============*/ - byte* b, /* in: pointer to 8 bytes where to store */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************ -The following function is used to fetch data from 8 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_8( -/*=============*/ - /* out: dulint integer */ - byte* b); /* in: pointer to 8 bytes */ -/************************************************************* -Writes a dulint in a compressed form (5..9 bytes). */ -UNIV_INLINE -ulint -mach_dulint_write_compressed( -/*=========================*/ - /* out: size in bytes */ - byte* b, /* in: pointer to memory where to store */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************* -Returns the size of a dulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_dulint_get_compressed_size( -/*============================*/ - /* out: compressed size in bytes */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************* -Reads a dulint in a compressed form. */ -UNIV_INLINE -dulint -mach_dulint_read_compressed( -/*========================*/ - /* out: read dulint */ - byte* b); /* in: pointer to memory from where to read */ -/************************************************************* -Writes a dulint in a compressed form (1..11 bytes). */ -UNIV_INLINE -ulint -mach_dulint_write_much_compressed( -/*==============================*/ - /* out: size in bytes */ - byte* b, /* in: pointer to memory where to store */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************* -Returns the size of a dulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_dulint_get_much_compressed_size( -/*=================================*/ - /* out: compressed size in bytes */ - dulint n); /* in: dulint integer to be stored */ -/************************************************************* -Reads a dulint in a compressed form. */ -UNIV_INLINE -dulint -mach_dulint_read_much_compressed( -/*=============================*/ - /* out: read dulint */ - byte* b); /* in: pointer to memory from where to read */ -/************************************************************* -Reads a ulint in a compressed form if the log record fully contains it. */ - -byte* -mach_parse_compressed( -/*==================*/ - /* out: pointer to end of the stored field, NULL if - not complete */ - byte* ptr, /* in: pointer to buffer from where to read */ - byte* end_ptr,/* in: pointer to end of the buffer */ - ulint* val); /* out: read value */ -/************************************************************* -Reads a dulint in a compressed form if the log record fully contains it. */ - -byte* -mach_dulint_parse_compressed( -/*=========================*/ - /* out: pointer to end of the stored field, NULL if - not complete */ - byte* ptr, /* in: pointer to buffer from where to read */ - byte* end_ptr,/* in: pointer to end of the buffer */ - dulint* val); /* out: read value */ -/************************************************************* -Reads a double. It is stored in a little-endian format. */ -UNIV_INLINE -double -mach_double_read( -/*=============*/ - /* out: double read */ - byte* b); /* in: pointer to memory from where to read */ -/************************************************************* -Writes a double. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_double_write( -/*==============*/ - byte* b, /* in: pointer to memory where to write */ - double d); /* in: double */ -/************************************************************* -Reads a float. It is stored in a little-endian format. */ -UNIV_INLINE -float -mach_float_read( -/*============*/ - /* out: float read */ - byte* b); /* in: pointer to memory from where to read */ -/************************************************************* -Writes a float. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_float_write( -/*=============*/ - byte* b, /* in: pointer to memory where to write */ - float d); /* in: float */ -/************************************************************* -Reads a ulint stored in the little-endian format. */ -UNIV_INLINE -ulint -mach_read_from_n_little_endian( -/*===========================*/ - /* out: unsigned long int */ - byte* buf, /* in: from where to read */ - ulint buf_size); /* in: from how many bytes to read */ -/************************************************************* -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_n_little_endian( -/*==========================*/ - byte* dest, /* in: where to write */ - ulint dest_size, /* in: into how many bytes to write */ - ulint n); /* in: unsigned long int to write */ -/************************************************************* -Reads a ulint stored in the little-endian format. */ -UNIV_INLINE -ulint -mach_read_from_2_little_endian( -/*===========================*/ - /* out: unsigned long int */ - byte* buf); /* in: from where to read */ -/************************************************************* -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_2_little_endian( -/*==========================*/ - byte* dest, /* in: where to write */ - ulint n); /* in: unsigned long int to write */ - -/************************************************************* -Convert integral type from storage byte order (big endian) to -host byte order. */ -UNIV_INLINE -ullint -mach_read_int_type( -/*===============*/ - /* out: integer value */ - const byte* src, /* in: where to read from */ - ulint len, /* in: length of src */ - ibool unsigned_type); /* in: signed or unsigned flag */ -#ifndef UNIV_NONINL -#include "mach0data.ic" -#endif - -#endif diff --git a/storage/innobase/include/mach0data.ic b/storage/innobase/include/mach0data.ic deleted file mode 100644 index ec15c10c661..00000000000 --- a/storage/innobase/include/mach0data.ic +++ /dev/null @@ -1,734 +0,0 @@ -/********************************************************************** -Utilities for converting data from the database file -to the machine format. - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "ut0mem.h" - -/*********************************************************** -The following function is used to store data in one byte. */ -UNIV_INLINE -void -mach_write_to_1( -/*============*/ - byte* b, /* in: pointer to byte where to store */ - ulint n) /* in: ulint integer to be stored, >= 0, < 256 */ -{ - ut_ad(b); - ut_ad(n <= 0xFFUL); - - b[0] = (byte)n; -} - -/************************************************************ -The following function is used to fetch data from one byte. */ -UNIV_INLINE -ulint -mach_read_from_1( -/*=============*/ - /* out: ulint integer, >= 0, < 256 */ - byte* b) /* in: pointer to byte */ -{ - ut_ad(b); - return((ulint)(b[0])); -} - -/*********************************************************** -The following function is used to store data in two consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_2( -/*============*/ - byte* b, /* in: pointer to two bytes where to store */ - ulint n) /* in: ulint integer to be stored */ -{ - ut_ad(b); - ut_ad(n <= 0xFFFFUL); - - b[0] = (byte)(n >> 8); - b[1] = (byte)(n); -} - -/************************************************************ -The following function is used to fetch data from 2 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_2( -/*=============*/ - /* out: ulint integer */ - byte* b) /* in: pointer to 2 bytes */ -{ - ut_ad(b); - return( ((ulint)(b[0]) << 8) - + (ulint)(b[1]) - ); -} - -/************************************************************ -The following function is used to convert a 16-bit data item -to the canonical format, for fast bytewise equality test -against memory. */ -UNIV_INLINE -uint16 -mach_encode_2( -/*==========*/ - /* out: 16-bit integer in canonical format */ - ulint n) /* in: integer in machine-dependent format */ -{ - uint16 ret; - ut_ad(2 == sizeof ret); - mach_write_to_2((byte*) &ret, n); - return(ret); -} -/************************************************************ -The following function is used to convert a 16-bit data item -from the canonical format, for fast bytewise equality test -against memory. */ -UNIV_INLINE -ulint -mach_decode_2( -/*==========*/ - /* out: integer in machine-dependent format */ - uint16 n) /* in: 16-bit integer in canonical format */ -{ - ut_ad(2 == sizeof n); - return(mach_read_from_2((byte*) &n)); -} - -/*********************************************************** -The following function is used to store data in 3 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_3( -/*============*/ - byte* b, /* in: pointer to 3 bytes where to store */ - ulint n) /* in: ulint integer to be stored */ -{ - ut_ad(b); - ut_ad(n <= 0xFFFFFFUL); - - b[0] = (byte)(n >> 16); - b[1] = (byte)(n >> 8); - b[2] = (byte)(n); -} - -/************************************************************ -The following function is used to fetch data from 3 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_3( -/*=============*/ - /* out: ulint integer */ - byte* b) /* in: pointer to 3 bytes */ -{ - ut_ad(b); - return( ((ulint)(b[0]) << 16) - + ((ulint)(b[1]) << 8) - + (ulint)(b[2]) - ); -} - -/*********************************************************** -The following function is used to store data in four consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_4( -/*============*/ - byte* b, /* in: pointer to four bytes where to store */ - ulint n) /* in: ulint integer to be stored */ -{ - ut_ad(b); - - b[0] = (byte)(n >> 24); - b[1] = (byte)(n >> 16); - b[2] = (byte)(n >> 8); - b[3] = (byte)n; -} - -/************************************************************ -The following function is used to fetch data from 4 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -ulint -mach_read_from_4( -/*=============*/ - /* out: ulint integer */ - byte* b) /* in: pointer to four bytes */ -{ - ut_ad(b); - return( ((ulint)(b[0]) << 24) - + ((ulint)(b[1]) << 16) - + ((ulint)(b[2]) << 8) - + (ulint)(b[3]) - ); -} - -/************************************************************* -Writes a ulint in a compressed form where the first byte codes the -length of the stored ulint. We look at the most significant bits of -the byte. If the most significant bit is zero, it means 1-byte storage, -else if the 2nd bit is 0, it means 2-byte storage, else if 3rd is 0, -it means 3-byte storage, else if 4th is 0, it means 4-byte storage, -else the storage is 5-byte. */ -UNIV_INLINE -ulint -mach_write_compressed( -/*==================*/ - /* out: compressed size in bytes */ - byte* b, /* in: pointer to memory where to store */ - ulint n) /* in: ulint integer (< 2^32) to be stored */ -{ - ut_ad(b); - - if (n < 0x80UL) { - mach_write_to_1(b, n); - return(1); - } else if (n < 0x4000UL) { - mach_write_to_2(b, n | 0x8000UL); - return(2); - } else if (n < 0x200000UL) { - mach_write_to_3(b, n | 0xC00000UL); - return(3); - } else if (n < 0x10000000UL) { - mach_write_to_4(b, n | 0xE0000000UL); - return(4); - } else { - mach_write_to_1(b, 0xF0UL); - mach_write_to_4(b + 1, n); - return(5); - } -} - -/************************************************************* -Returns the size of a ulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_get_compressed_size( -/*=====================*/ - /* out: compressed size in bytes */ - ulint n) /* in: ulint integer (< 2^32) to be stored */ -{ - if (n < 0x80UL) { - return(1); - } else if (n < 0x4000UL) { - return(2); - } else if (n < 0x200000UL) { - return(3); - } else if (n < 0x10000000UL) { - return(4); - } else { - return(5); - } -} - -/************************************************************* -Reads a ulint in a compressed form. */ -UNIV_INLINE -ulint -mach_read_compressed( -/*=================*/ - /* out: read integer (< 2^32) */ - byte* b) /* in: pointer to memory from where to read */ -{ - ulint flag; - - ut_ad(b); - - flag = mach_read_from_1(b); - - if (flag < 0x80UL) { - return(flag); - } else if (flag < 0xC0UL) { - return(mach_read_from_2(b) & 0x7FFFUL); - } else if (flag < 0xE0UL) { - return(mach_read_from_3(b) & 0x3FFFFFUL); - } else if (flag < 0xF0UL) { - return(mach_read_from_4(b) & 0x1FFFFFFFUL); - } else { - ut_ad(flag == 0xF0UL); - return(mach_read_from_4(b + 1)); - } -} - -/*********************************************************** -The following function is used to store data in 8 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_8( -/*============*/ - byte* b, /* in: pointer to 8 bytes where to store */ - dulint n) /* in: dulint integer to be stored */ -{ - ut_ad(b); - - mach_write_to_4(b, ut_dulint_get_high(n)); - mach_write_to_4(b + 4, ut_dulint_get_low(n)); -} - -/************************************************************ -The following function is used to fetch data from 8 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_8( -/*=============*/ - /* out: dulint integer */ - byte* b) /* in: pointer to 8 bytes */ -{ - ulint high; - ulint low; - - ut_ad(b); - - high = mach_read_from_4(b); - low = mach_read_from_4(b + 4); - - return(ut_dulint_create(high, low)); -} - -/*********************************************************** -The following function is used to store data in 7 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_7( -/*============*/ - byte* b, /* in: pointer to 7 bytes where to store */ - dulint n) /* in: dulint integer to be stored */ -{ - ut_ad(b); - - mach_write_to_3(b, ut_dulint_get_high(n)); - mach_write_to_4(b + 3, ut_dulint_get_low(n)); -} - -/************************************************************ -The following function is used to fetch data from 7 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_7( -/*=============*/ - /* out: dulint integer */ - byte* b) /* in: pointer to 7 bytes */ -{ - ulint high; - ulint low; - - ut_ad(b); - - high = mach_read_from_3(b); - low = mach_read_from_4(b + 3); - - return(ut_dulint_create(high, low)); -} - -/*********************************************************** -The following function is used to store data in 6 consecutive -bytes. We store the most significant byte to the lowest address. */ -UNIV_INLINE -void -mach_write_to_6( -/*============*/ - byte* b, /* in: pointer to 6 bytes where to store */ - dulint n) /* in: dulint integer to be stored */ -{ - ut_ad(b); - - mach_write_to_2(b, ut_dulint_get_high(n)); - mach_write_to_4(b + 2, ut_dulint_get_low(n)); -} - -/************************************************************ -The following function is used to fetch data from 6 consecutive -bytes. The most significant byte is at the lowest address. */ -UNIV_INLINE -dulint -mach_read_from_6( -/*=============*/ - /* out: dulint integer */ - byte* b) /* in: pointer to 7 bytes */ -{ - ulint high; - ulint low; - - ut_ad(b); - - high = mach_read_from_2(b); - low = mach_read_from_4(b + 2); - - return(ut_dulint_create(high, low)); -} - -/************************************************************* -Writes a dulint in a compressed form (5..9 bytes). */ -UNIV_INLINE -ulint -mach_dulint_write_compressed( -/*=========================*/ - /* out: size in bytes */ - byte* b, /* in: pointer to memory where to store */ - dulint n) /* in: dulint integer to be stored */ -{ - ulint size; - - ut_ad(b); - - size = mach_write_compressed(b, ut_dulint_get_high(n)); - mach_write_to_4(b + size, ut_dulint_get_low(n)); - - return(size + 4); -} - -/************************************************************* -Returns the size of a dulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_dulint_get_compressed_size( -/*============================*/ - /* out: compressed size in bytes */ - dulint n) /* in: dulint integer to be stored */ -{ - return(4 + mach_get_compressed_size(ut_dulint_get_high(n))); -} - -/************************************************************* -Reads a dulint in a compressed form. */ -UNIV_INLINE -dulint -mach_dulint_read_compressed( -/*========================*/ - /* out: read dulint */ - byte* b) /* in: pointer to memory from where to read */ -{ - ulint high; - ulint low; - ulint size; - - ut_ad(b); - - high = mach_read_compressed(b); - - size = mach_get_compressed_size(high); - - low = mach_read_from_4(b + size); - - return(ut_dulint_create(high, low)); -} - -/************************************************************* -Writes a dulint in a compressed form (1..11 bytes). */ -UNIV_INLINE -ulint -mach_dulint_write_much_compressed( -/*==============================*/ - /* out: size in bytes */ - byte* b, /* in: pointer to memory where to store */ - dulint n) /* in: dulint integer to be stored */ -{ - ulint size; - - ut_ad(b); - - if (ut_dulint_get_high(n) == 0) { - return(mach_write_compressed(b, ut_dulint_get_low(n))); - } - - *b = (byte)0xFF; - size = 1 + mach_write_compressed(b + 1, ut_dulint_get_high(n)); - - size += mach_write_compressed(b + size, ut_dulint_get_low(n)); - - return(size); -} - -/************************************************************* -Returns the size of a dulint when written in the compressed form. */ -UNIV_INLINE -ulint -mach_dulint_get_much_compressed_size( -/*=================================*/ - /* out: compressed size in bytes */ - dulint n) /* in: dulint integer to be stored */ -{ - if (0 == ut_dulint_get_high(n)) { - return(mach_get_compressed_size(ut_dulint_get_low(n))); - } - - return(1 + mach_get_compressed_size(ut_dulint_get_high(n)) - + mach_get_compressed_size(ut_dulint_get_low(n))); -} - -/************************************************************* -Reads a dulint in a compressed form. */ -UNIV_INLINE -dulint -mach_dulint_read_much_compressed( -/*=============================*/ - /* out: read dulint */ - byte* b) /* in: pointer to memory from where to read */ -{ - ulint high; - ulint low; - ulint size; - - ut_ad(b); - - if (*b != (byte)0xFF) { - high = 0; - size = 0; - } else { - high = mach_read_compressed(b + 1); - - size = 1 + mach_get_compressed_size(high); - } - - low = mach_read_compressed(b + size); - - return(ut_dulint_create(high, low)); -} - -/************************************************************* -Reads a double. It is stored in a little-endian format. */ -UNIV_INLINE -double -mach_double_read( -/*=============*/ - /* out: double read */ - byte* b) /* in: pointer to memory from where to read */ -{ - double d; - ulint i; - byte* ptr; - - ptr = (byte*)&d; - - for (i = 0; i < sizeof(double); i++) { -#ifdef WORDS_BIGENDIAN - ptr[sizeof(double) - i - 1] = b[i]; -#else - ptr[i] = b[i]; -#endif - } - - return(d); -} - -/************************************************************* -Writes a double. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_double_write( -/*==============*/ - byte* b, /* in: pointer to memory where to write */ - double d) /* in: double */ -{ - ulint i; - byte* ptr; - - ptr = (byte*)&d; - - for (i = 0; i < sizeof(double); i++) { -#ifdef WORDS_BIGENDIAN - b[i] = ptr[sizeof(double) - i - 1]; -#else - b[i] = ptr[i]; -#endif - } -} - -/************************************************************* -Reads a float. It is stored in a little-endian format. */ -UNIV_INLINE -float -mach_float_read( -/*============*/ - /* out: float read */ - byte* b) /* in: pointer to memory from where to read */ -{ - float d; - ulint i; - byte* ptr; - - ptr = (byte*)&d; - - for (i = 0; i < sizeof(float); i++) { -#ifdef WORDS_BIGENDIAN - ptr[sizeof(float) - i - 1] = b[i]; -#else - ptr[i] = b[i]; -#endif - } - - return(d); -} - -/************************************************************* -Writes a float. It is stored in a little-endian format. */ -UNIV_INLINE -void -mach_float_write( -/*=============*/ - byte* b, /* in: pointer to memory where to write */ - float d) /* in: float */ -{ - ulint i; - byte* ptr; - - ptr = (byte*)&d; - - for (i = 0; i < sizeof(float); i++) { -#ifdef WORDS_BIGENDIAN - b[i] = ptr[sizeof(float) - i - 1]; -#else - b[i] = ptr[i]; -#endif - } -} - -/************************************************************* -Reads a ulint stored in the little-endian format. */ -UNIV_INLINE -ulint -mach_read_from_n_little_endian( -/*===========================*/ - /* out: unsigned long int */ - byte* buf, /* in: from where to read */ - ulint buf_size) /* in: from how many bytes to read */ -{ - ulint n = 0; - byte* ptr; - - ut_ad(buf_size <= sizeof(ulint)); - ut_ad(buf_size > 0); - - ptr = buf + buf_size; - - for (;;) { - ptr--; - - n = n << 8; - - n += (ulint)(*ptr); - - if (ptr == buf) { - break; - } - } - - return(n); -} - -/************************************************************* -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_n_little_endian( -/*==========================*/ - byte* dest, /* in: where to write */ - ulint dest_size, /* in: into how many bytes to write */ - ulint n) /* in: unsigned long int to write */ -{ - byte* end; - - ut_ad(dest_size <= sizeof(ulint)); - ut_ad(dest_size > 0); - - end = dest + dest_size; - - for (;;) { - *dest = (byte)(n & 0xFF); - - n = n >> 8; - - dest++; - - if (dest == end) { - break; - } - } - - ut_ad(n == 0); -} - -/************************************************************* -Reads a ulint stored in the little-endian format. */ -UNIV_INLINE -ulint -mach_read_from_2_little_endian( -/*===========================*/ - /* out: unsigned long int */ - byte* buf) /* in: from where to read */ -{ - return((ulint)(*buf) + ((ulint)(*(buf + 1))) * 256); -} - -/************************************************************* -Writes a ulint in the little-endian format. */ -UNIV_INLINE -void -mach_write_to_2_little_endian( -/*==========================*/ - byte* dest, /* in: where to write */ - ulint n) /* in: unsigned long int to write */ -{ - ut_ad(n < 256 * 256); - - *dest = (byte)(n & 0xFFUL); - - n = n >> 8; - dest++; - - *dest = (byte)(n & 0xFFUL); -} - -/************************************************************* -Convert integral type from storage byte order (big endian) to -host byte order. */ -UNIV_INLINE -ullint -mach_read_int_type( -/*===============*/ - /* out: integer value */ - const byte* src, /* in: where to read from */ - ulint len, /* in: length of src */ - ibool unsigned_type) /* in: signed or unsigned flag */ -{ - /* XXX this can be optimized on big-endian machines */ - - ullint ret; - uint i; - - if (unsigned_type || (src[0] & 0x80)) { - - ret = 0x0000000000000000ULL; - } else { - - ret = 0xFFFFFFFFFFFFFF00ULL; - } - - if (unsigned_type) { - - ret |= src[0]; - } else { - - ret |= src[0] ^ 0x80; - } - - for (i = 1; i < len; i++) { - ret <<= 8; - ret |= src[i]; - } - - return(ret); -} diff --git a/storage/innobase/include/mem0dbg.h b/storage/innobase/include/mem0dbg.h deleted file mode 100644 index 2393e4edb54..00000000000 --- a/storage/innobase/include/mem0dbg.h +++ /dev/null @@ -1,126 +0,0 @@ -/****************************************************** -The memory management: the debug code. This is not a compilation module, -but is included in mem0mem.* ! - -(c) 1994, 1995 Innobase Oy - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -/* In the debug version each allocated field is surrounded with -check fields whose sizes are given below */ - -#ifdef UNIV_MEM_DEBUG -#define MEM_FIELD_HEADER_SIZE ut_calc_align(2 * sizeof(ulint),\ - UNIV_MEM_ALIGNMENT) -#define MEM_FIELD_TRAILER_SIZE sizeof(ulint) -#else -#define MEM_FIELD_HEADER_SIZE 0 -#endif - - -/* Space needed when allocating for a user a field of -length N. The space is allocated only in multiples of -UNIV_MEM_ALIGNMENT. In the debug version there are also -check fields at the both ends of the field. */ -#ifdef UNIV_MEM_DEBUG -#define MEM_SPACE_NEEDED(N) ut_calc_align((N) + MEM_FIELD_HEADER_SIZE\ - + MEM_FIELD_TRAILER_SIZE, UNIV_MEM_ALIGNMENT) -#else -#define MEM_SPACE_NEEDED(N) ut_calc_align((N), UNIV_MEM_ALIGNMENT) -#endif - -#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG -/******************************************************************* -Checks a memory heap for consistency and prints the contents if requested. -Outputs the sum of sizes of buffers given to the user (only in -the debug version), the physical size of the heap and the number of -blocks in the heap. In case of error returns 0 as sizes and number -of blocks. */ - -void -mem_heap_validate_or_print( -/*=======================*/ - mem_heap_t* heap, /* in: memory heap */ - byte* top, /* in: calculate and validate only until - this top pointer in the heap is reached, - if this pointer is NULL, ignored */ - ibool print, /* in: if TRUE, prints the contents - of the heap; works only in - the debug version */ - ibool* error, /* out: TRUE if error */ - ulint* us_size,/* out: allocated memory - (for the user) in the heap, - if a NULL pointer is passed as this - argument, it is ignored; in the - non-debug version this is always -1 */ - ulint* ph_size,/* out: physical size of the heap, - if a NULL pointer is passed as this - argument, it is ignored */ - ulint* n_blocks); /* out: number of blocks in the heap, - if a NULL pointer is passed as this - argument, it is ignored */ -/****************************************************************** -Validates the contents of a memory heap. */ - -ibool -mem_heap_validate( -/*==============*/ - /* out: TRUE if ok */ - mem_heap_t* heap); /* in: memory heap */ -#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */ -#ifdef UNIV_DEBUG -/****************************************************************** -Checks that an object is a memory heap (or a block of it) */ - -ibool -mem_heap_check( -/*===========*/ - /* out: TRUE if ok */ - mem_heap_t* heap); /* in: memory heap */ -#endif /* UNIV_DEBUG */ -#ifdef UNIV_MEM_DEBUG -/********************************************************************* -TRUE if no memory is currently allocated. */ - -ibool -mem_all_freed(void); -/*===============*/ - /* out: TRUE if no heaps exist */ -/********************************************************************* -Validates the dynamic memory */ - -ibool -mem_validate_no_assert(void); -/*=========================*/ - /* out: TRUE if error */ -/**************************************************************** -Validates the dynamic memory */ - -ibool -mem_validate(void); -/*===============*/ - /* out: TRUE if ok */ -#endif /* UNIV_MEM_DEBUG */ -/**************************************************************** -Tries to find neigboring memory allocation blocks and dumps to stderr -the neighborhood of a given pointer. */ - -void -mem_analyze_corruption( -/*===================*/ - void* ptr); /* in: pointer to place of possible corruption */ -/********************************************************************* -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers. Can only be used in the debug version. */ - -void -mem_print_info(void); -/*================*/ -/********************************************************************* -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers since the last ..._print_info or..._print_new_info. */ - -void -mem_print_new_info(void); -/*====================*/ diff --git a/storage/innobase/include/mem0dbg.ic b/storage/innobase/include/mem0dbg.ic deleted file mode 100644 index e8a34adb3fa..00000000000 --- a/storage/innobase/include/mem0dbg.ic +++ /dev/null @@ -1,93 +0,0 @@ -/************************************************************************ -The memory management: the debug code. This is not an independent -compilation module but is included in mem0mem.*. - -(c) 1994, 1995 Innobase Oy - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ - -#ifdef UNIV_MEM_DEBUG -extern mutex_t mem_hash_mutex; -extern ulint mem_current_allocated_memory; - -/********************************************************************** -Initializes an allocated memory field in the debug version. */ - -void -mem_field_init( -/*===========*/ - byte* buf, /* in: memory field */ - ulint n); /* in: how many bytes the user requested */ -/********************************************************************** -Erases an allocated memory field in the debug version. */ - -void -mem_field_erase( -/*============*/ - byte* buf, /* in: memory field */ - ulint n); /* in: how many bytes the user requested */ -/******************************************************************* -Initializes a buffer to a random combination of hex BA and BE. -Used to initialize allocated memory. */ - -void -mem_init_buf( -/*=========*/ - byte* buf, /* in: pointer to buffer */ - ulint n); /* in: length of buffer */ -/******************************************************************* -Initializes a buffer to a random combination of hex DE and AD. -Used to erase freed memory.*/ - -void -mem_erase_buf( -/*==========*/ - byte* buf, /* in: pointer to buffer */ - ulint n); /* in: length of buffer */ -/******************************************************************* -Inserts a created memory heap to the hash table of -current allocated memory heaps. -Initializes the hash table when first called. */ - -void -mem_hash_insert( -/*============*/ - mem_heap_t* heap, /* in: the created heap */ - const char* file_name, /* in: file name of creation */ - ulint line); /* in: line where created */ -/******************************************************************* -Removes a memory heap (which is going to be freed by the caller) -from the list of live memory heaps. Returns the size of the heap -in terms of how much memory in bytes was allocated for the user of -the heap (not the total space occupied by the heap). -Also validates the heap. -NOTE: This function does not free the storage occupied by the -heap itself, only the node in the list of heaps. */ - -void -mem_hash_remove( -/*============*/ - mem_heap_t* heap, /* in: the heap to be freed */ - const char* file_name, /* in: file name of freeing */ - ulint line); /* in: line where freed */ - - -void -mem_field_header_set_len(byte* field, ulint len); - -ulint -mem_field_header_get_len(byte* field); - -void -mem_field_header_set_check(byte* field, ulint check); - -ulint -mem_field_header_get_check(byte* field); - -void -mem_field_trailer_set_check(byte* field, ulint check); - -ulint -mem_field_trailer_get_check(byte* field); -#endif /* UNIV_MEM_DEBUG */ diff --git a/storage/innobase/include/mem0mem.h b/storage/innobase/include/mem0mem.h deleted file mode 100644 index 2d5fd1db6c3..00000000000 --- a/storage/innobase/include/mem0mem.h +++ /dev/null @@ -1,412 +0,0 @@ -/****************************************************** -The memory management - -(c) 1994, 1995 Innobase Oy - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -#ifndef mem0mem_h -#define mem0mem_h - -#include "univ.i" -#include "ut0mem.h" -#include "ut0byte.h" -#include "ut0ut.h" -#include "ut0rnd.h" -#include "sync0sync.h" -#include "ut0lst.h" -#include "mach0data.h" - -/* -------------------- MEMORY HEAPS ----------------------------- */ - -/* The info structure stored at the beginning of a heap block */ -typedef struct mem_block_info_struct mem_block_info_t; - -/* A block of a memory heap consists of the info structure -followed by an area of memory */ -typedef mem_block_info_t mem_block_t; - -/* A memory heap is a nonempty linear list of memory blocks */ -typedef mem_block_t mem_heap_t; - -/* Types of allocation for memory heaps: DYNAMIC means allocation from the -dynamic memory pool of the C compiler, BUFFER means allocation from the -buffer pool; the latter method is used for very big heaps */ - -#define MEM_HEAP_DYNAMIC 0 /* the most common type */ -#define MEM_HEAP_BUFFER 1 -#define MEM_HEAP_BTR_SEARCH 2 /* this flag can optionally be - ORed to MEM_HEAP_BUFFER, in which - case heap->free_block is used in - some cases for memory allocations, - and if it's NULL, the memory - allocation functions can return - NULL. */ - -/* The following start size is used for the first block in the memory heap if -the size is not specified, i.e., 0 is given as the parameter in the call of -create. The standard size is the maximum (payload) size of the blocks used for -allocations of small buffers. */ - -#define MEM_BLOCK_START_SIZE 64 -#define MEM_BLOCK_STANDARD_SIZE 8000 - -/* If a memory heap is allowed to grow into the buffer pool, the following -is the maximum size for a single allocated buffer: */ -#define MEM_MAX_ALLOC_IN_BUF (UNIV_PAGE_SIZE - 200) - -/********************************************************************** -Initializes the memory system. */ - -void -mem_init( -/*=====*/ - ulint size); /* in: common pool size in bytes */ -/****************************************************************** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -#define mem_heap_create(N) mem_heap_create_func(\ - (N), NULL, MEM_HEAP_DYNAMIC, __FILE__, __LINE__) -/****************************************************************** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -#define mem_heap_create_in_buffer(N) mem_heap_create_func(\ - (N), NULL, MEM_HEAP_BUFFER, __FILE__, __LINE__) -/****************************************************************** -Use this macro instead of the corresponding function! Macro for memory -heap creation. */ - -#define mem_heap_create_in_btr_search(N) mem_heap_create_func(\ - (N), NULL, MEM_HEAP_BTR_SEARCH | MEM_HEAP_BUFFER,\ - __FILE__, __LINE__) -/****************************************************************** -Use this macro instead of the corresponding function! Macro for fast -memory heap creation. An initial block of memory B is given by the -caller, N is its size, and this memory block is not freed by -mem_heap_free. See the parameter comment in mem_heap_create_func below. */ - -#define mem_heap_fast_create(N, B) mem_heap_create_func(\ - (N), (B), MEM_HEAP_DYNAMIC, __FILE__, __LINE__) - -/****************************************************************** -Use this macro instead of the corresponding function! Macro for memory -heap freeing. */ - -#define mem_heap_free(heap) mem_heap_free_func(\ - (heap), __FILE__, __LINE__) -/********************************************************************* -NOTE: Use the corresponding macros instead of this function. Creates a -memory heap. For debugging purposes, takes also the file name and line as -arguments. */ -UNIV_INLINE -mem_heap_t* -mem_heap_create_func( -/*=================*/ - /* out, own: memory heap, NULL if - did not succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps)*/ - ulint n, /* in: desired start block size, - this means that a single user buffer - of size n will fit in the block, - 0 creates a default size block; - if init_block is not NULL, n tells - its size in bytes */ - void* init_block, /* in: if very fast creation is - wanted, the caller can reserve some - memory from its stack, for example, - and pass it as the the initial block - to the heap: then no OS call of malloc - is needed at the creation. CAUTION: - the caller must make sure the initial - block is not unintentionally erased - (if allocated in the stack), before - the memory heap is explicitly freed. */ - ulint type, /* in: heap type */ - const char* file_name, /* in: file name where created */ - ulint line); /* in: line where created */ -/********************************************************************* -NOTE: Use the corresponding macro instead of this function. Frees the space -occupied by a memory heap. In the debug version erases the heap memory -blocks. */ -UNIV_INLINE -void -mem_heap_free_func( -/*===============*/ - mem_heap_t* heap, /* in, own: heap to be freed */ - const char* file_name, /* in: file name where freed */ - ulint line); /* in: line where freed */ -/******************************************************************* -Allocates n bytes of memory from a memory heap. */ -UNIV_INLINE -void* -mem_heap_alloc( -/*===========*/ - /* out: allocated storage, NULL if did not - succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps) */ - mem_heap_t* heap, /* in: memory heap */ - ulint n); /* in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -/********************************************************************* -Returns a pointer to the heap top. */ -UNIV_INLINE -byte* -mem_heap_get_heap_top( -/*==================*/ - /* out: pointer to the heap top */ - mem_heap_t* heap); /* in: memory heap */ -/********************************************************************* -Frees the space in a memory heap exceeding the pointer given. The -pointer must have been acquired from mem_heap_get_heap_top. The first -memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_free_heap_top( -/*===================*/ - mem_heap_t* heap, /* in: heap from which to free */ - byte* old_top);/* in: pointer to old top of heap */ -/********************************************************************* -Empties a memory heap. The first memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_empty( -/*===========*/ - mem_heap_t* heap); /* in: heap to empty */ -/********************************************************************* -Returns a pointer to the topmost element in a memory heap. -The size of the element must be given. */ -UNIV_INLINE -void* -mem_heap_get_top( -/*=============*/ - /* out: pointer to the topmost element */ - mem_heap_t* heap, /* in: memory heap */ - ulint n); /* in: size of the topmost element */ -/********************************************************************* -Frees the topmost element in a memory heap. -The size of the element must be given. */ -UNIV_INLINE -void -mem_heap_free_top( -/*==============*/ - mem_heap_t* heap, /* in: memory heap */ - ulint n); /* in: size of the topmost element */ -/********************************************************************* -Returns the space in bytes occupied by a memory heap. */ -UNIV_INLINE -ulint -mem_heap_get_size( -/*==============*/ - mem_heap_t* heap); /* in: heap */ -/****************************************************************** -Use this macro instead of the corresponding function! -Macro for memory buffer allocation */ - -#define mem_alloc(N) mem_alloc_func((N), __FILE__, __LINE__) -/****************************************************************** -Use this macro instead of the corresponding function! -Macro for memory buffer allocation */ - -#define mem_alloc_noninline(N) mem_alloc_func_noninline(\ - (N), __FILE__, __LINE__) -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. */ -UNIV_INLINE -void* -mem_alloc_func( -/*===========*/ - /* out, own: free storage */ - ulint n, /* in: desired number of bytes */ - const char* file_name, /* in: file name where created */ - ulint line /* in: line where created */ -); -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. */ - -void* -mem_alloc_func_noninline( -/*=====================*/ - /* out, own: free storage */ - ulint n, /* in: desired number of bytes */ - const char* file_name, /* in: file name where created */ - ulint line /* in: line where created */ - ); -/****************************************************************** -Use this macro instead of the corresponding function! -Macro for memory buffer freeing */ - -#define mem_free(PTR) mem_free_func((PTR), __FILE__, __LINE__) -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. -Frees a single buffer of storage from -the dynamic memory of C compiler. Similar to free of C. */ -UNIV_INLINE -void -mem_free_func( -/*==========*/ - void* ptr, /* in, own: buffer to be freed */ - const char* file_name, /* in: file name where created */ - ulint line /* in: line where created */ -); - -/************************************************************************** -Duplicates a NUL-terminated string. */ -UNIV_INLINE -char* -mem_strdup( -/*=======*/ - /* out, own: a copy of the string, - must be deallocated with mem_free */ - const char* str); /* in: string to be copied */ -/************************************************************************** -Makes a NUL-terminated copy of a nonterminated string. */ -UNIV_INLINE -char* -mem_strdupl( -/*========*/ - /* out, own: a copy of the string, - must be deallocated with mem_free */ - const char* str, /* in: string to be copied */ - ulint len); /* in: length of str, in bytes */ - -/************************************************************************** -Duplicates a NUL-terminated string, allocated from a memory heap. */ - -char* -mem_heap_strdup( -/*============*/ - /* out, own: a copy of the string */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* str); /* in: string to be copied */ -/************************************************************************** -Makes a NUL-terminated copy of a nonterminated string, -allocated from a memory heap. */ -UNIV_INLINE -char* -mem_heap_strdupl( -/*=============*/ - /* out, own: a copy of the string */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* str, /* in: string to be copied */ - ulint len); /* in: length of str, in bytes */ - -/************************************************************************** -Concatenate two strings and return the result, using a memory heap. */ - -char* -mem_heap_strcat( -/*============*/ - /* out, own: the result */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* s1, /* in: string 1 */ - const char* s2); /* in: string 2 */ - -/************************************************************************** -Duplicate a block of data, allocated from a memory heap. */ - -void* -mem_heap_dup( -/*=========*/ - /* out, own: a copy of the data */ - mem_heap_t* heap, /* in: memory heap where copy is allocated */ - const void* data, /* in: data to be copied */ - ulint len); /* in: length of data, in bytes */ - -/************************************************************************** -Concatenate two memory blocks and return the result, using a memory heap. */ - -void* -mem_heap_cat( -/*=========*/ - /* out, own: the result */ - mem_heap_t* heap, /* in: memory heap where result is allocated */ - const void* b1, /* in: block 1 */ - ulint len1, /* in: length of b1, in bytes */ - const void* b2, /* in: block 2 */ - ulint len2); /* in: length of b2, in bytes */ - -/******************************************************************** -A simple (s)printf replacement that dynamically allocates the space for the -formatted string from the given heap. This supports a very limited set of -the printf syntax: types 's' and 'u' and length modifier 'l' (which is -required for the 'u' type). */ - -char* -mem_heap_printf( -/*============*/ - /* out: heap-allocated formatted string */ - mem_heap_t* heap, /* in: memory heap */ - const char* format, /* in: format string */ - ...) __attribute__ ((format (printf, 2, 3))); - -#ifdef MEM_PERIODIC_CHECK -/********************************************************************** -Goes through the list of all allocated mem blocks, checks their magic -numbers, and reports possible corruption. */ - -void -mem_validate_all_blocks(void); -/*=========================*/ -#endif - -/*#######################################################################*/ - -/* The info header of a block in a memory heap */ - -struct mem_block_info_struct { - ulint magic_n;/* magic number for debugging */ - char file_name[8];/* file name where the mem heap was created */ - ulint line; /* line number where the mem heap was created */ - UT_LIST_BASE_NODE_T(mem_block_t) base; /* In the first block in the - the list this is the base node of the list of blocks; - in subsequent blocks this is undefined */ - UT_LIST_NODE_T(mem_block_t) list; /* This contains pointers to next - and prev in the list. The first block allocated - to the heap is also the first block in this list, - though it also contains the base node of the list. */ - ulint len; /* physical length of this block in bytes */ - ulint type; /* type of heap: MEM_HEAP_DYNAMIC, or - MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */ - ibool init_block; /* TRUE if this is the first block used in fast - creation of a heap: the memory will be freed - by the creator, not by mem_heap_free */ - ulint free; /* offset in bytes of the first free position for - user data in the block */ - ulint start; /* the value of the struct field 'free' at the - creation of the block */ - byte* free_block; - /* if the MEM_HEAP_BTR_SEARCH bit is set in type, - and this is the heap root, this can contain an - allocated buffer frame, which can be appended as a - free block to the heap, if we need more space; - otherwise, this is NULL */ -#ifdef MEM_PERIODIC_CHECK - UT_LIST_NODE_T(mem_block_t) mem_block_list; - /* List of all mem blocks allocated; protected - by the mem_comm_pool mutex */ -#endif -}; - -#define MEM_BLOCK_MAGIC_N 764741555 -#define MEM_FREED_BLOCK_MAGIC_N 547711122 - -/* Header size for a memory heap block */ -#define MEM_BLOCK_HEADER_SIZE ut_calc_align(sizeof(mem_block_info_t),\ - UNIV_MEM_ALIGNMENT) -#include "mem0dbg.h" - -#ifndef UNIV_NONINL -#include "mem0mem.ic" -#endif - -#endif diff --git a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic deleted file mode 100644 index 6227a27f277..00000000000 --- a/storage/innobase/include/mem0mem.ic +++ /dev/null @@ -1,619 +0,0 @@ -/************************************************************************ -The memory management - -(c) 1994, 1995 Innobase Oy - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ - -#include "mem0dbg.ic" - -#include "mem0pool.h" - -/******************************************************************* -Creates a memory heap block where data can be allocated. */ - -mem_block_t* -mem_heap_create_block( -/*==================*/ - /* out, own: memory heap block, NULL if - did not succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps) */ - mem_heap_t* heap, /* in: memory heap or NULL if first block - should be created */ - ulint n, /* in: number of bytes needed for user data, or - if init_block is not NULL, its size in bytes */ - void* init_block, /* in: init block in fast create, - type must be MEM_HEAP_DYNAMIC */ - ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ - const char* file_name,/* in: file name where created */ - ulint line); /* in: line where created */ -/********************************************************************** -Frees a block from a memory heap. */ - -void -mem_heap_block_free( -/*================*/ - mem_heap_t* heap, /* in: heap */ - mem_block_t* block); /* in: block to free */ -/********************************************************************** -Frees the free_block field from a memory heap. */ - -void -mem_heap_free_block_free( -/*=====================*/ - mem_heap_t* heap); /* in: heap */ -/******************************************************************* -Adds a new block to a memory heap. */ - -mem_block_t* -mem_heap_add_block( -/*===============*/ - /* out: created block, NULL if did not - succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps)*/ - mem_heap_t* heap, /* in: memory heap */ - ulint n); /* in: number of bytes user needs */ - -UNIV_INLINE -void -mem_block_set_len(mem_block_t* block, ulint len) -{ - ut_ad(len > 0); - - block->len = len; -} - -UNIV_INLINE -ulint -mem_block_get_len(mem_block_t* block) -{ - return(block->len); -} - -UNIV_INLINE -void -mem_block_set_type(mem_block_t* block, ulint type) -{ - ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER) - || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH)); - - block->type = type; -} - -UNIV_INLINE -ulint -mem_block_get_type(mem_block_t* block) -{ - return(block->type); -} - -UNIV_INLINE -void -mem_block_set_free(mem_block_t* block, ulint free) -{ - ut_ad(free > 0); - ut_ad(free <= mem_block_get_len(block)); - - block->free = free; -} - -UNIV_INLINE -ulint -mem_block_get_free(mem_block_t* block) -{ - return(block->free); -} - -UNIV_INLINE -void -mem_block_set_start(mem_block_t* block, ulint start) -{ - ut_ad(start > 0); - - block->start = start; -} - -UNIV_INLINE -ulint -mem_block_get_start(mem_block_t* block) -{ - return(block->start); -} - -/******************************************************************* -Allocates n bytes of memory from a memory heap. */ -UNIV_INLINE -void* -mem_heap_alloc( -/*===========*/ - /* out: allocated storage, NULL if did not - succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps) */ - mem_heap_t* heap, /* in: memory heap */ - ulint n) /* in: number of bytes; if the heap is allowed - to grow into the buffer pool, this must be - <= MEM_MAX_ALLOC_IN_BUF */ -{ - mem_block_t* block; - void* buf; - ulint free; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - ut_ad(!(block->type & MEM_HEAP_BUFFER) || (n <= MEM_MAX_ALLOC_IN_BUF)); - - /* Check if there is enough space in block. If not, create a new - block to the heap */ - - if (mem_block_get_len(block) - < mem_block_get_free(block) + MEM_SPACE_NEEDED(n)) { - - block = mem_heap_add_block(heap, n); - - if (block == NULL) { - - return(NULL); - } - } - - free = mem_block_get_free(block); - - buf = (byte*)block + free; - - mem_block_set_free(block, free + MEM_SPACE_NEEDED(n)); - -#ifdef UNIV_MEM_DEBUG - UNIV_MEM_ALLOC(buf, - n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE); - - /* In the debug version write debugging info to the field */ - mem_field_init((byte*)buf, n); - - /* Advance buf to point at the storage which will be given to the - caller */ - buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; - -#endif -#ifdef UNIV_SET_MEM_TO_ZERO - UNIV_MEM_ALLOC(buf, n); - memset(buf, '\0', n); -#endif - UNIV_MEM_ALLOC(buf, n); - return(buf); -} - -/********************************************************************* -Returns a pointer to the heap top. */ -UNIV_INLINE -byte* -mem_heap_get_heap_top( -/*==================*/ - /* out: pointer to the heap top */ - mem_heap_t* heap) /* in: memory heap */ -{ - mem_block_t* block; - byte* buf; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - buf = (byte*)block + mem_block_get_free(block); - - return(buf); -} - -/********************************************************************* -Frees the space in a memory heap exceeding the pointer given. The -pointer must have been acquired from mem_heap_get_heap_top. The first -memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_free_heap_top( -/*===================*/ - mem_heap_t* heap, /* in: heap from which to free */ - byte* old_top)/* in: pointer to old top of heap */ -{ - mem_block_t* block; - mem_block_t* prev_block; -#ifdef UNIV_MEM_DEBUG - ibool error; - ulint total_size; - ulint size; -#endif - - ut_ad(mem_heap_check(heap)); - -#ifdef UNIV_MEM_DEBUG - - /* Validate the heap and get its total allocated size */ - mem_heap_validate_or_print(heap, NULL, FALSE, &error, &total_size, - NULL, NULL); - ut_a(!error); - - /* Get the size below top pointer */ - mem_heap_validate_or_print(heap, old_top, FALSE, &error, &size, NULL, - NULL); - ut_a(!error); - -#endif - - block = UT_LIST_GET_LAST(heap->base); - - while (block != NULL) { - if (((byte*)block + mem_block_get_free(block) >= old_top) - && ((byte*)block <= old_top)) { - /* Found the right block */ - - break; - } - - /* Store prev_block value before freeing the current block - (the current block will be erased in freeing) */ - - prev_block = UT_LIST_GET_PREV(list, block); - - mem_heap_block_free(heap, block); - - block = prev_block; - } - - ut_ad(block); - - /* Set the free field of block */ - mem_block_set_free(block, old_top - (byte*)block); - -#ifdef UNIV_MEM_DEBUG - ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); - - /* In the debug version erase block from top up */ - mem_erase_buf(old_top, (byte*)block + block->len - old_top); - - /* Update allocated memory count */ - mutex_enter(&mem_hash_mutex); - mem_current_allocated_memory -= (total_size - size); - mutex_exit(&mem_hash_mutex); -#else /* UNIV_MEM_DEBUG */ - UNIV_MEM_ASSERT_W(old_top, (byte*)block + block->len - old_top); -#endif /* UNIV_MEM_DEBUG */ - UNIV_MEM_ALLOC(old_top, (byte*)block + block->len - old_top); - - /* If free == start, we may free the block if it is not the first - one */ - - if ((heap != block) && (mem_block_get_free(block) - == mem_block_get_start(block))) { - mem_heap_block_free(heap, block); - } -} - -/********************************************************************* -Empties a memory heap. The first memory block of the heap is not freed. */ -UNIV_INLINE -void -mem_heap_empty( -/*===========*/ - mem_heap_t* heap) /* in: heap to empty */ -{ - mem_heap_free_heap_top(heap, (byte*)heap + mem_block_get_start(heap)); - - if (heap->free_block) { - mem_heap_free_block_free(heap); - } -} - -/********************************************************************* -Returns a pointer to the topmost element in a memory heap. The size of the -element must be given. */ -UNIV_INLINE -void* -mem_heap_get_top( -/*=============*/ - /* out: pointer to the topmost element */ - mem_heap_t* heap, /* in: memory heap */ - ulint n) /* in: size of the topmost element */ -{ - mem_block_t* block; - void* buf; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - buf = (byte*)block + mem_block_get_free(block) - MEM_SPACE_NEEDED(n); - -#ifdef UNIV_MEM_DEBUG - ut_ad(mem_block_get_start(block) <=(ulint)((byte*)buf - (byte*)block)); - - /* In the debug version, advance buf to point at the storage which - was given to the caller in the allocation*/ - - buf = (byte*)buf + MEM_FIELD_HEADER_SIZE; - - /* Check that the field lengths agree */ - ut_ad(n == (ulint)mem_field_header_get_len(buf)); -#endif - - return(buf); -} - -/********************************************************************* -Frees the topmost element in a memory heap. The size of the element must be -given. */ -UNIV_INLINE -void -mem_heap_free_top( -/*==============*/ - mem_heap_t* heap, /* in: memory heap */ - ulint n) /* in: size of the topmost element */ -{ - mem_block_t* block; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - /* Subtract the free field of block */ - mem_block_set_free(block, mem_block_get_free(block) - - MEM_SPACE_NEEDED(n)); - UNIV_MEM_ASSERT_W((byte*) block + mem_block_get_free(block), n); -#ifdef UNIV_MEM_DEBUG - - ut_ad(mem_block_get_start(block) <= mem_block_get_free(block)); - - /* In the debug version check the consistency, and erase field */ - mem_field_erase((byte*)block + mem_block_get_free(block), n); -#endif - - /* If free == start, we may free the block if it is not the first - one */ - - if ((heap != block) && (mem_block_get_free(block) - == mem_block_get_start(block))) { - mem_heap_block_free(heap, block); - } else { - /* Avoid a bogus UNIV_MEM_ASSERT_W() warning in a - subsequent invocation of mem_heap_free_top(). - Originally, this was UNIV_MEM_FREE(), to catch writes - to freed memory. */ - UNIV_MEM_ALLOC((byte*) block + mem_block_get_free(block), n); - } -} - -/********************************************************************* -NOTE: Use the corresponding macros instead of this function. Creates a -memory heap. For debugging purposes, takes also the file name and line as -argument. */ -UNIV_INLINE -mem_heap_t* -mem_heap_create_func( -/*=================*/ - /* out, own: memory heap, NULL if - did not succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps)*/ - ulint n, /* in: desired start block size, - this means that a single user buffer - of size n will fit in the block, - 0 creates a default size block; - if init_block is not NULL, n tells - its size in bytes */ - void* init_block, /* in: if very fast creation is - wanted, the caller can reserve some - memory from its stack, for example, - and pass it as the the initial block - to the heap: then no OS call of malloc - is needed at the creation. CAUTION: - the caller must make sure the initial - block is not unintentionally erased - (if allocated in the stack), before - the memory heap is explicitly freed. */ - ulint type, /* in: heap type */ - const char* file_name, /* in: file name where created */ - ulint line) /* in: line where created */ -{ - mem_block_t* block; - - if (n > 0) { - block = mem_heap_create_block(NULL, n, init_block, type, - file_name, line); - } else { - block = mem_heap_create_block(NULL, MEM_BLOCK_START_SIZE, - init_block, type, - file_name, line); - } - - if (block == NULL) { - - return(NULL); - } - - UT_LIST_INIT(block->base); - - /* Add the created block itself as the first block in the list */ - UT_LIST_ADD_FIRST(list, block->base, block); - -#ifdef UNIV_MEM_DEBUG - - mem_hash_insert(block, file_name, line); - -#endif - - return(block); -} - -/********************************************************************* -NOTE: Use the corresponding macro instead of this function. Frees the space -occupied by a memory heap. In the debug version erases the heap memory -blocks. */ -UNIV_INLINE -void -mem_heap_free_func( -/*===============*/ - mem_heap_t* heap, /* in, own: heap to be freed */ - const char* file_name __attribute__((unused)), - /* in: file name where freed */ - ulint line __attribute__((unused))) -{ - mem_block_t* block; - mem_block_t* prev_block; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - -#ifdef UNIV_MEM_DEBUG - - /* In the debug version remove the heap from the hash table of heaps - and check its consistency */ - - mem_hash_remove(heap, file_name, line); - -#endif - - if (heap->free_block) { - mem_heap_free_block_free(heap); - } - - while (block != NULL) { - /* Store the contents of info before freeing current block - (it is erased in freeing) */ - - prev_block = UT_LIST_GET_PREV(list, block); - - mem_heap_block_free(heap, block); - - block = prev_block; - } -} - -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. */ -UNIV_INLINE -void* -mem_alloc_func( -/*===========*/ - /* out, own: free storage */ - ulint n, /* in: desired number of bytes */ - const char* file_name, /* in: file name where created */ - ulint line /* in: line where created */ - ) -{ - mem_heap_t* heap; - void* buf; - - heap = mem_heap_create_func(n, NULL, MEM_HEAP_DYNAMIC, file_name, - line); - - /* Note that as we created the first block in the heap big enough - for the buffer requested by the caller, the buffer will be in the - first block and thus we can calculate the pointer to the heap from - the pointer to the buffer when we free the memory buffer. */ - - buf = mem_heap_alloc(heap, n); - - ut_a((byte*)heap == (byte*)buf - MEM_BLOCK_HEADER_SIZE - - MEM_FIELD_HEADER_SIZE); - return(buf); -} - -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. Frees a single -buffer of storage from the dynamic memory of the C compiler. Similar to the -free of C. */ -UNIV_INLINE -void -mem_free_func( -/*==========*/ - void* ptr, /* in, own: buffer to be freed */ - const char* file_name, /* in: file name where created */ - ulint line /* in: line where created */ - ) -{ - mem_heap_t* heap; - - heap = (mem_heap_t*)((byte*)ptr - MEM_BLOCK_HEADER_SIZE - - MEM_FIELD_HEADER_SIZE); - mem_heap_free_func(heap, file_name, line); -} - -/********************************************************************* -Returns the space in bytes occupied by a memory heap. */ -UNIV_INLINE -ulint -mem_heap_get_size( -/*==============*/ - mem_heap_t* heap) /* in: heap */ -{ - mem_block_t* block; - ulint size = 0; - - ut_ad(mem_heap_check(heap)); - - block = heap; - - while (block != NULL) { - - size += mem_block_get_len(block); - block = UT_LIST_GET_NEXT(list, block); - } - - if (heap->free_block) { - size += UNIV_PAGE_SIZE; - } - - return(size); -} - -/************************************************************************** -Duplicates a NUL-terminated string. */ -UNIV_INLINE -char* -mem_strdup( -/*=======*/ - /* out, own: a copy of the string, - must be deallocated with mem_free */ - const char* str) /* in: string to be copied */ -{ - ulint len = strlen(str) + 1; - return(memcpy(mem_alloc(len), str, len)); -} - -/************************************************************************** -Makes a NUL-terminated copy of a nonterminated string. */ -UNIV_INLINE -char* -mem_strdupl( -/*========*/ - /* out, own: a copy of the string, - must be deallocated with mem_free */ - const char* str, /* in: string to be copied */ - ulint len) /* in: length of str, in bytes */ -{ - char* s = mem_alloc(len + 1); - s[len] = 0; - return(memcpy(s, str, len)); -} - -/************************************************************************** -Makes a NUL-terminated copy of a nonterminated string, -allocated from a memory heap. */ -UNIV_INLINE -char* -mem_heap_strdupl( -/*=============*/ - /* out, own: a copy of the string */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* str, /* in: string to be copied */ - ulint len) /* in: length of str, in bytes */ -{ - char* s = mem_heap_alloc(heap, len + 1); - s[len] = 0; - return(memcpy(s, str, len)); -} diff --git a/storage/innobase/include/mem0pool.h b/storage/innobase/include/mem0pool.h deleted file mode 100644 index bf659ca9a72..00000000000 --- a/storage/innobase/include/mem0pool.h +++ /dev/null @@ -1,108 +0,0 @@ -/****************************************************** -The lowest-level memory management - -(c) 1994, 1995 Innobase Oy - -Created 6/9/1994 Heikki Tuuri -*******************************************************/ - -#ifndef mem0pool_h -#define mem0pool_h - -#include "univ.i" -#include "os0file.h" -#include "ut0lst.h" - -typedef struct mem_area_struct mem_area_t; -typedef struct mem_pool_struct mem_pool_t; - -/* The common memory pool */ -extern mem_pool_t* mem_comm_pool; - -/* Memory area header */ - -struct mem_area_struct{ - ulint size_and_free; /* memory area size is obtained by - anding with ~MEM_AREA_FREE; area in - a free list if ANDing with - MEM_AREA_FREE results in nonzero */ - UT_LIST_NODE_T(mem_area_t) - free_list; /* free list node */ -}; - -/* Each memory area takes this many extra bytes for control information */ -#define MEM_AREA_EXTRA_SIZE (ut_calc_align(sizeof(struct mem_area_struct),\ - UNIV_MEM_ALIGNMENT)) - -/************************************************************************ -Creates a memory pool. */ - -mem_pool_t* -mem_pool_create( -/*============*/ - /* out: memory pool */ - ulint size); /* in: pool size in bytes */ -/************************************************************************ -Allocates memory from a pool. NOTE: This low-level function should only be -used in mem0mem.*! */ - -void* -mem_area_alloc( -/*===========*/ - /* out, own: allocated memory buffer */ - ulint size, /* in: allocated size in bytes; for optimum - space usage, the size should be a power of 2 - minus MEM_AREA_EXTRA_SIZE */ - mem_pool_t* pool); /* in: memory pool */ -/************************************************************************ -Frees memory to a pool. */ - -void -mem_area_free( -/*==========*/ - void* ptr, /* in, own: pointer to allocated memory - buffer */ - mem_pool_t* pool); /* in: memory pool */ -/************************************************************************ -Returns the amount of reserved memory. */ - -ulint -mem_pool_get_reserved( -/*==================*/ - /* out: reserved mmeory in bytes */ - mem_pool_t* pool); /* in: memory pool */ -/************************************************************************ -Reserves the mem pool mutex. */ - -void -mem_pool_mutex_enter(void); -/*======================*/ -/************************************************************************ -Releases the mem pool mutex. */ - -void -mem_pool_mutex_exit(void); -/*=====================*/ -/************************************************************************ -Validates a memory pool. */ - -ibool -mem_pool_validate( -/*==============*/ - /* out: TRUE if ok */ - mem_pool_t* pool); /* in: memory pool */ -/************************************************************************ -Prints info of a memory pool. */ - -void -mem_pool_print_info( -/*================*/ - FILE* outfile,/* in: output file to write to */ - mem_pool_t* pool); /* in: memory pool */ - - -#ifndef UNIV_NONINL -#include "mem0pool.ic" -#endif - -#endif diff --git a/storage/innobase/include/mem0pool.ic b/storage/innobase/include/mem0pool.ic deleted file mode 100644 index 4e8c08733ed..00000000000 --- a/storage/innobase/include/mem0pool.ic +++ /dev/null @@ -1,7 +0,0 @@ -/************************************************************************ -The lowest-level memory management - -(c) 1994, 1995 Innobase Oy - -Created 6/8/1994 Heikki Tuuri -*************************************************************************/ diff --git a/storage/innobase/include/mtr0log.h b/storage/innobase/include/mtr0log.h deleted file mode 100644 index 6a3920aa8a1..00000000000 --- a/storage/innobase/include/mtr0log.h +++ /dev/null @@ -1,217 +0,0 @@ -/****************************************************** -Mini-transaction logging routines - -(c) 1995 Innobase Oy - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0log_h -#define mtr0log_h - -#include "univ.i" -#include "mtr0mtr.h" -#include "dict0types.h" - -/************************************************************ -Writes 1 - 4 bytes to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ - -void -mlog_write_ulint( -/*=============*/ - byte* ptr, /* in: pointer where to write */ - ulint val, /* in: value to write */ - byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Writes 8 bytes to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ - -void -mlog_write_dulint( -/*==============*/ - byte* ptr, /* in: pointer where to write */ - dulint val, /* in: value to write */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Writes a string to a file page buffered in the buffer pool. Writes the -corresponding log record to the mini-transaction log. */ - -void -mlog_write_string( -/*==============*/ - byte* ptr, /* in: pointer where to write */ - const byte* str, /* in: string to write */ - ulint len, /* in: string length */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Writes initial part of a log record consisting of one-byte item -type and four-byte space and page numbers. */ - -void -mlog_write_initial_log_record( -/*==========================*/ - byte* ptr, /* in: pointer to (inside) a buffer frame - holding the file page where modification - is made */ - byte type, /* in: log item type: MLOG_1BYTE, ... */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Writes a log record about an .ibd file create/delete/rename. */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_for_file_op( -/*======================================*/ - /* out: new value of log_ptr */ - ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id,/* in: space id, if applicable */ - ulint page_no,/* in: page number (not relevant currently) */ - byte* log_ptr,/* in: pointer to mtr log which has been opened */ - mtr_t* mtr); /* in: mtr */ -/************************************************************ -Catenates 1 - 4 bytes to the mtr log. */ -UNIV_INLINE -void -mlog_catenate_ulint( -/*================*/ - mtr_t* mtr, /* in: mtr */ - ulint val, /* in: value to write */ - ulint type); /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ -/************************************************************ -Catenates n bytes to the mtr log. */ - -void -mlog_catenate_string( -/*=================*/ - mtr_t* mtr, /* in: mtr */ - const byte* str, /* in: string to write */ - ulint len); /* in: string length */ -/************************************************************ -Catenates a compressed ulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_ulint_compressed( -/*===========================*/ - mtr_t* mtr, /* in: mtr */ - ulint val); /* in: value to write */ -/************************************************************ -Catenates a compressed dulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_dulint_compressed( -/*============================*/ - mtr_t* mtr, /* in: mtr */ - dulint val); /* in: value to write */ -/************************************************************ -Opens a buffer to mlog. It must be closed with mlog_close. */ -UNIV_INLINE -byte* -mlog_open( -/*======*/ - /* out: buffer, NULL if log mode MTR_LOG_NONE */ - mtr_t* mtr, /* in: mtr */ - ulint size); /* in: buffer size in bytes; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -/************************************************************ -Closes a buffer opened to mlog. */ -UNIV_INLINE -void -mlog_close( -/*=======*/ - mtr_t* mtr, /* in: mtr */ - byte* ptr); /* in: buffer space from ptr up was not used */ -/************************************************************ -Writes the initial part of a log record (3..11 bytes). -If the implementation of this function is changed, all -size parameters to mlog_open() should be adjusted accordingly! */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_fast( -/*===============================*/ - /* out: new value of log_ptr */ - byte* ptr, /* in: pointer to (inside) a buffer frame holding the - file page where modification is made */ - byte type, /* in: log item type: MLOG_1BYTE, ... */ - byte* log_ptr,/* in: pointer to mtr log which has been opened */ - mtr_t* mtr); /* in: mtr */ -/************************************************************ -Parses an initial log record written by mlog_write_initial_log_record. */ - -byte* -mlog_parse_initial_log_record( -/*==========================*/ - /* out: parsed record end, NULL if not a complete - record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* type, /* out: log record type: MLOG_1BYTE, ... */ - ulint* space, /* out: space id */ - ulint* page_no);/* out: page number */ -/************************************************************ -Parses a log record written by mlog_write_ulint or mlog_write_dulint. */ - -byte* -mlog_parse_nbytes( -/*==============*/ - /* out: parsed record end, NULL if not a complete - record */ - ulint type, /* in: log record type: MLOG_1BYTE, ... */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* page); /* in: page where to apply the log record, or NULL */ -/************************************************************ -Parses a log record written by mlog_write_string. */ - -byte* -mlog_parse_string( -/*==============*/ - /* out: parsed record end, NULL if not a complete - record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* page); /* in: page where to apply the log record, or NULL */ - - -/************************************************************ -Opens a buffer for mlog, writes the initial log record and, -if needed, the field lengths of an index. Reserves space -for further log entries. The log entry must be closed with -mtr_close(). */ - -byte* -mlog_open_and_write_index( -/*======================*/ - /* out: buffer, NULL if log mode - MTR_LOG_NONE */ - mtr_t* mtr, /* in: mtr */ - byte* rec, /* in: index record or page */ - dict_index_t* index, /* in: record descriptor */ - byte type, /* in: log item type */ - ulint size); /* in: requested buffer size in bytes - (if 0, calls mlog_close() and returns NULL) */ - -/************************************************************ -Parses a log record written by mlog_open_and_write_index. */ - -byte* -mlog_parse_index( -/*=============*/ - /* out: parsed record end, - NULL if not a complete record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - /* out: new value of log_ptr */ - ibool comp, /* in: TRUE=compact record format */ - dict_index_t** index); /* out, own: dummy index */ - -/* Insert, update, and maybe other functions may use this value to define an -extra mlog buffer size for variable size data */ -#define MLOG_BUF_MARGIN 256 - -#ifndef UNIV_NONINL -#include "mtr0log.ic" -#endif - -#endif diff --git a/storage/innobase/include/mtr0log.ic b/storage/innobase/include/mtr0log.ic deleted file mode 100644 index 5b1d1ed34d9..00000000000 --- a/storage/innobase/include/mtr0log.ic +++ /dev/null @@ -1,227 +0,0 @@ -/****************************************************** -Mini-transaction logging routines - -(c) 1995 Innobase Oy - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#include "ut0lst.h" -#include "buf0buf.h" - -/************************************************************ -Opens a buffer to mlog. It must be closed with mlog_close. */ -UNIV_INLINE -byte* -mlog_open( -/*======*/ - /* out: buffer, NULL if log mode MTR_LOG_NONE */ - mtr_t* mtr, /* in: mtr */ - ulint size) /* in: buffer size in bytes; MUST be - smaller than DYN_ARRAY_DATA_SIZE! */ -{ - dyn_array_t* mlog; - - mtr->modifications = TRUE; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return(NULL); - } - - mlog = &(mtr->log); - - return(dyn_array_open(mlog, size)); -} - -/************************************************************ -Closes a buffer opened to mlog. */ -UNIV_INLINE -void -mlog_close( -/*=======*/ - mtr_t* mtr, /* in: mtr */ - byte* ptr) /* in: buffer space from ptr up was not used */ -{ - dyn_array_t* mlog; - - ut_ad(mtr_get_log_mode(mtr) != MTR_LOG_NONE); - - mlog = &(mtr->log); - - dyn_array_close(mlog, ptr); -} - -/************************************************************ -Catenates 1 - 4 bytes to the mtr log. The value is not compressed. */ -UNIV_INLINE -void -mlog_catenate_ulint( -/*================*/ - mtr_t* mtr, /* in: mtr */ - ulint val, /* in: value to write */ - ulint type) /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ -{ - dyn_array_t* mlog; - byte* ptr; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return; - } - - mlog = &(mtr->log); - -#if MLOG_1BYTE != 1 -# error "MLOG_1BYTE != 1" -#endif -#if MLOG_2BYTES != 2 -# error "MLOG_2BYTES != 2" -#endif -#if MLOG_4BYTES != 4 -# error "MLOG_4BYTES != 4" -#endif -#if MLOG_8BYTES != 8 -# error "MLOG_8BYTES != 8" -#endif - ptr = dyn_array_push(mlog, type); - - if (type == MLOG_4BYTES) { - mach_write_to_4(ptr, val); - } else if (type == MLOG_2BYTES) { - mach_write_to_2(ptr, val); - } else { - ut_ad(type == MLOG_1BYTE); - mach_write_to_1(ptr, val); - } -} - -/************************************************************ -Catenates a compressed ulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_ulint_compressed( -/*===========================*/ - mtr_t* mtr, /* in: mtr */ - ulint val) /* in: value to write */ -{ - byte* log_ptr; - - log_ptr = mlog_open(mtr, 10); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr += mach_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/************************************************************ -Catenates a compressed dulint to mlog. */ -UNIV_INLINE -void -mlog_catenate_dulint_compressed( -/*============================*/ - mtr_t* mtr, /* in: mtr */ - dulint val) /* in: value to write */ -{ - byte* log_ptr; - - log_ptr = mlog_open(mtr, 15); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr += mach_dulint_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/************************************************************ -Writes the initial part of a log record (3..11 bytes). -If the implementation of this function is changed, all -size parameters to mlog_open() should be adjusted accordingly! */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_fast( -/*===============================*/ - /* out: new value of log_ptr */ - byte* ptr, /* in: pointer to (inside) a buffer frame holding the - file page where modification is made */ - byte type, /* in: log item type: MLOG_1BYTE, ... */ - byte* log_ptr,/* in: pointer to mtr log which has been opened */ - mtr_t* mtr) /* in: mtr */ -{ - buf_block_t* block; - ulint space; - ulint offset; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(type <= MLOG_BIGGEST_TYPE); - ut_ad(ptr && log_ptr); - - block = buf_block_align(ptr); - - space = buf_block_get_space(block); - offset = buf_block_get_page_no(block); - - mach_write_to_1(log_ptr, type); - log_ptr++; - log_ptr += mach_write_compressed(log_ptr, space); - log_ptr += mach_write_compressed(log_ptr, offset); - - mtr->n_log_recs++; - -#ifdef UNIV_LOG_DEBUG - /* fprintf(stderr, - "Adding to mtr log record type %lu space %lu page no %lu\n", - type, space, offset); */ -#endif - -#ifdef UNIV_DEBUG - /* We now assume that all x-latched pages have been modified! */ - - if (!mtr_memo_contains(mtr, block, MTR_MEMO_MODIFY)) { - - mtr_memo_push(mtr, block, MTR_MEMO_MODIFY); - } -#endif - return(log_ptr); -} - -/************************************************************ -Writes a log record about an .ibd file create/delete/rename. */ -UNIV_INLINE -byte* -mlog_write_initial_log_record_for_file_op( -/*======================================*/ - /* out: new value of log_ptr */ - ulint type, /* in: MLOG_FILE_CREATE, MLOG_FILE_DELETE, or - MLOG_FILE_RENAME */ - ulint space_id,/* in: space id, if applicable */ - ulint page_no,/* in: page number (not relevant currently) */ - byte* log_ptr,/* in: pointer to mtr log which has been opened */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(log_ptr); - - mach_write_to_1(log_ptr, type); - log_ptr++; - - /* We write dummy space id and page number */ - log_ptr += mach_write_compressed(log_ptr, space_id); - log_ptr += mach_write_compressed(log_ptr, page_no); - - mtr->n_log_recs++; - - return(log_ptr); -} diff --git a/storage/innobase/include/mtr0mtr.h b/storage/innobase/include/mtr0mtr.h deleted file mode 100644 index 2a160d27e0c..00000000000 --- a/storage/innobase/include/mtr0mtr.h +++ /dev/null @@ -1,347 +0,0 @@ -/****************************************************** -Mini-transaction buffer - -(c) 1995 Innobase Oy - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0mtr_h -#define mtr0mtr_h - -#include "univ.i" -#include "mem0mem.h" -#include "dyn0dyn.h" -#include "buf0types.h" -#include "sync0rw.h" -#include "ut0byte.h" -#include "mtr0types.h" -#include "page0types.h" - -/* Logging modes for a mini-transaction */ -#define MTR_LOG_ALL 21 /* default mode: log all operations - modifying disk-based data */ -#define MTR_LOG_NONE 22 /* log no operations */ -/*#define MTR_LOG_SPACE 23 */ /* log only operations modifying - file space page allocation data - (operations in fsp0fsp.* ) */ -#define MTR_LOG_SHORT_INSERTS 24 /* inserts are logged in a shorter - form */ - -/* Types for the mlock objects to store in the mtr memo; NOTE that the -first 3 values must be RW_S_LATCH, RW_X_LATCH, RW_NO_LATCH */ -#define MTR_MEMO_PAGE_S_FIX RW_S_LATCH -#define MTR_MEMO_PAGE_X_FIX RW_X_LATCH -#define MTR_MEMO_BUF_FIX RW_NO_LATCH -#define MTR_MEMO_MODIFY 54 -#define MTR_MEMO_S_LOCK 55 -#define MTR_MEMO_X_LOCK 56 - -/* Log item types: we have made them to be of the type 'byte' -for the compiler to warn if val and type parameters are switched -in a call to mlog_write_ulint. NOTE! For 1 - 8 bytes, the -flag value must give the length also! */ -#define MLOG_SINGLE_REC_FLAG 128 /* if the mtr contains only - one log record for one page, - i.e., write_initial_log_record - has been called only once, - this flag is ORed to the type - of that first log record */ -#define MLOG_1BYTE (1) /* one byte is written */ -#define MLOG_2BYTES (2) /* 2 bytes ... */ -#define MLOG_4BYTES (4) /* 4 bytes ... */ -#define MLOG_8BYTES (8) /* 8 bytes ... */ -#define MLOG_REC_INSERT ((byte)9) /* record insert */ -#define MLOG_REC_CLUST_DELETE_MARK ((byte)10) /* mark clustered index record - deleted */ -#define MLOG_REC_SEC_DELETE_MARK ((byte)11) /* mark secondary index record - deleted */ -#define MLOG_REC_UPDATE_IN_PLACE ((byte)13) /* update of a record, - preserves record field sizes */ -#define MLOG_REC_DELETE ((byte)14) /* delete a record from a - page */ -#define MLOG_LIST_END_DELETE ((byte)15) /* delete record list end on - index page */ -#define MLOG_LIST_START_DELETE ((byte)16) /* delete record list start on - index page */ -#define MLOG_LIST_END_COPY_CREATED ((byte)17) /* copy record list end to a - new created index page */ -#define MLOG_PAGE_REORGANIZE ((byte)18) /* reorganize an index page */ -#define MLOG_PAGE_CREATE ((byte)19) /* create an index page */ -#define MLOG_UNDO_INSERT ((byte)20) /* insert entry in an undo - log */ -#define MLOG_UNDO_ERASE_END ((byte)21) /* erase an undo log - page end */ -#define MLOG_UNDO_INIT ((byte)22) /* initialize a page in an - undo log */ -#define MLOG_UNDO_HDR_DISCARD ((byte)23) /* discard an update undo log - header */ -#define MLOG_UNDO_HDR_REUSE ((byte)24) /* reuse an insert undo log - header */ -#define MLOG_UNDO_HDR_CREATE ((byte)25) /* create an undo log header */ -#define MLOG_REC_MIN_MARK ((byte)26) /* mark an index record as the - predefined minimum record */ -#define MLOG_IBUF_BITMAP_INIT ((byte)27) /* initialize an ibuf bitmap - page */ -/*#define MLOG_FULL_PAGE ((byte)28) full contents of a page */ -#define MLOG_INIT_FILE_PAGE ((byte)29) /* this means that a file page - is taken into use and the prior - contents of the page should be - ignored: in recovery we must - not trust the lsn values stored - to the file page */ -#define MLOG_WRITE_STRING ((byte)30) /* write a string to a page */ -#define MLOG_MULTI_REC_END ((byte)31) /* if a single mtr writes - log records for several pages, - this log record ends the - sequence of these records */ -#define MLOG_DUMMY_RECORD ((byte)32) /* dummy log record used to - pad a log block full */ -#define MLOG_FILE_CREATE ((byte)33) /* log record about an .ibd - file creation */ -#define MLOG_FILE_RENAME ((byte)34) /* log record about an .ibd - file rename */ -#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd - file deletion */ -#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record - as the predefined minimum - record */ -#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact - index page */ -#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */ -#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) - /* mark compact clustered index - record deleted */ -#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index - record deleted; this log - record type is redundant, as - MLOG_REC_SEC_DELETE_MARK is - independent of the record - format. */ -#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record, - preserves record field sizes */ -#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record - from a page */ -#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list - end on index page */ -#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list - start on index page */ -#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) - /* copy compact record list end - to a new created index page */ -#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ - -#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in - asserts) */ - -/******************************************************************* -Starts a mini-transaction and creates a mini-transaction handle -and buffer in the memory buffer given by the caller. */ -UNIV_INLINE -mtr_t* -mtr_start( -/*======*/ - /* out: mtr buffer which also acts as - the mtr handle */ - mtr_t* mtr); /* in: memory buffer for the mtr buffer */ -/******************************************************************* -Starts a mini-transaction and creates a mini-transaction handle -and buffer in the memory buffer given by the caller. */ - -mtr_t* -mtr_start_noninline( -/*================*/ - /* out: mtr buffer which also acts as - the mtr handle */ - mtr_t* mtr); /* in: memory buffer for the mtr buffer */ -/******************************************************************* -Commits a mini-transaction. */ - -void -mtr_commit( -/*=======*/ - mtr_t* mtr); /* in: mini-transaction */ -/************************************************************** -Sets and returns a savepoint in mtr. */ -UNIV_INLINE -ulint -mtr_set_savepoint( -/*==============*/ - /* out: savepoint */ - mtr_t* mtr); /* in: mtr */ -/************************************************************** -Releases the latches stored in an mtr memo down to a savepoint. -NOTE! The mtr must not have made changes to buffer pages after the -savepoint, as these can be handled only by mtr_commit. */ - -void -mtr_rollback_to_savepoint( -/*======================*/ - mtr_t* mtr, /* in: mtr */ - ulint savepoint); /* in: savepoint */ -/************************************************************** -Releases the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -UNIV_INLINE -void -mtr_release_s_latch_at_savepoint( -/*=============================*/ - mtr_t* mtr, /* in: mtr */ - ulint savepoint, /* in: savepoint */ - rw_lock_t* lock); /* in: latch to release */ -/******************************************************************* -Gets the logging mode of a mini-transaction. */ -UNIV_INLINE -ulint -mtr_get_log_mode( -/*=============*/ - /* out: logging mode: MTR_LOG_NONE, ... */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************* -Changes the logging mode of a mini-transaction. */ -UNIV_INLINE -ulint -mtr_set_log_mode( -/*=============*/ - /* out: old mode */ - mtr_t* mtr, /* in: mtr */ - ulint mode); /* in: logging mode: MTR_LOG_NONE, ... */ -/************************************************************ -Reads 1 - 4 bytes from a file page buffered in the buffer pool. */ - -ulint -mtr_read_ulint( -/*===========*/ - /* out: value read */ - byte* ptr, /* in: pointer from where to read */ - ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************ -Reads 8 bytes from a file page buffered in the buffer pool. */ - -dulint -mtr_read_dulint( -/*============*/ - /* out: value read */ - byte* ptr, /* in: pointer from where to read */ - mtr_t* mtr); /* in: mini-transaction handle */ -/************************************************************************* -This macro locks an rw-lock in s-mode. */ -#define mtr_s_lock(B, MTR) mtr_s_lock_func((B), __FILE__, __LINE__,\ - (MTR)) -/************************************************************************* -This macro locks an rw-lock in x-mode. */ -#define mtr_x_lock(B, MTR) mtr_x_lock_func((B), __FILE__, __LINE__,\ - (MTR)) -/************************************************************************* -NOTE! Use the macro above! -Locks a lock in s-mode. */ -UNIV_INLINE -void -mtr_s_lock_func( -/*============*/ - rw_lock_t* lock, /* in: rw-lock */ - const char* file, /* in: file name */ - ulint line, /* in: line number */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************* -NOTE! Use the macro above! -Locks a lock in x-mode. */ -UNIV_INLINE -void -mtr_x_lock_func( -/*============*/ - rw_lock_t* lock, /* in: rw-lock */ - const char* file, /* in: file name */ - ulint line, /* in: line number */ - mtr_t* mtr); /* in: mtr */ - -/******************************************************* -Releases an object in the memo stack. */ - -void -mtr_memo_release( -/*=============*/ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object */ - ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */ -#ifdef UNIV_DEBUG -/************************************************************** -Checks if memo contains the given item. */ -UNIV_INLINE -ibool -mtr_memo_contains( -/*==============*/ - /* out: TRUE if contains */ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object to search */ - ulint type); /* in: type of object */ -/************************************************************* -Prints info of an mtr handle. */ - -void -mtr_print( -/*======*/ - mtr_t* mtr); /* in: mtr */ -#endif /* UNIV_DEBUG */ -/*######################################################################*/ - -#define MTR_BUF_MEMO_SIZE 200 /* number of slots in memo */ - -/******************************************************************* -Returns the log object of a mini-transaction buffer. */ -UNIV_INLINE -dyn_array_t* -mtr_get_log( -/*========*/ - /* out: log */ - mtr_t* mtr); /* in: mini-transaction */ -/******************************************************* -Pushes an object to an mtr memo stack. */ -UNIV_INLINE -void -mtr_memo_push( -/*==========*/ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object */ - ulint type); /* in: object type: MTR_MEMO_S_LOCK, ... */ - - -/* Type definition of a mini-transaction memo stack slot. */ -typedef struct mtr_memo_slot_struct mtr_memo_slot_t; -struct mtr_memo_slot_struct{ - ulint type; /* type of the stored object (MTR_MEMO_S_LOCK, ...) */ - void* object; /* pointer to the object */ -}; - -/* Mini-transaction handle and buffer */ -struct mtr_struct{ - ulint state; /* MTR_ACTIVE, MTR_COMMITTING, MTR_COMMITTED */ - dyn_array_t memo; /* memo stack for locks etc. */ - dyn_array_t log; /* mini-transaction log */ - ibool modifications; - /* TRUE if the mtr made modifications to - buffer pool pages */ - ulint n_log_recs; - /* count of how many page initial log records - have been written to the mtr log */ - ulint log_mode; /* specifies which operations should be - logged; default value MTR_LOG_ALL */ - dulint start_lsn;/* start lsn of the possible log entry for - this mtr */ - dulint end_lsn;/* end lsn of the possible log entry for - this mtr */ - ulint magic_n; -}; - -#define MTR_MAGIC_N 54551 - -#define MTR_ACTIVE 12231 -#define MTR_COMMITTING 56456 -#define MTR_COMMITTED 34676 - -#ifndef UNIV_NONINL -#include "mtr0mtr.ic" -#endif - -#endif diff --git a/storage/innobase/include/mtr0mtr.ic b/storage/innobase/include/mtr0mtr.ic deleted file mode 100644 index 81eec3bfc92..00000000000 --- a/storage/innobase/include/mtr0mtr.ic +++ /dev/null @@ -1,251 +0,0 @@ -/****************************************************** -Mini-transaction buffer - -(c) 1995 Innobase Oy - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#include "sync0rw.h" -#include "mach0data.h" - -/******************************************************************* -Starts a mini-transaction and creates a mini-transaction handle -and a buffer in the memory buffer given by the caller. */ -UNIV_INLINE -mtr_t* -mtr_start( -/*======*/ - /* out: mtr buffer which also acts as - the mtr handle */ - mtr_t* mtr) /* in: memory buffer for the mtr buffer */ -{ - dyn_array_create(&(mtr->memo)); - dyn_array_create(&(mtr->log)); - - mtr->log_mode = MTR_LOG_ALL; - mtr->modifications = FALSE; - mtr->n_log_recs = 0; - -#ifdef UNIV_DEBUG - mtr->state = MTR_ACTIVE; - mtr->magic_n = MTR_MAGIC_N; -#endif - return(mtr); -} - -/******************************************************* -Pushes an object to an mtr memo stack. */ -UNIV_INLINE -void -mtr_memo_push( -/*==========*/ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object */ - ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */ -{ - dyn_array_t* memo; - mtr_memo_slot_t* slot; - - ut_ad(object); - ut_ad(type >= MTR_MEMO_PAGE_S_FIX); - ut_ad(type <= MTR_MEMO_X_LOCK); - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - memo = &(mtr->memo); - - slot = dyn_array_push(memo, sizeof(mtr_memo_slot_t)); - - slot->object = object; - slot->type = type; -} - -/************************************************************** -Sets and returns a savepoint in mtr. */ -UNIV_INLINE -ulint -mtr_set_savepoint( -/*==============*/ - /* out: savepoint */ - mtr_t* mtr) /* in: mtr */ -{ - dyn_array_t* memo; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - memo = &(mtr->memo); - - return(dyn_array_get_data_size(memo)); -} - -/************************************************************** -Releases the (index tree) s-latch stored in an mtr memo after a -savepoint. */ -UNIV_INLINE -void -mtr_release_s_latch_at_savepoint( -/*=============================*/ - mtr_t* mtr, /* in: mtr */ - ulint savepoint, /* in: savepoint */ - rw_lock_t* lock) /* in: latch to release */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - ut_ad(dyn_array_get_data_size(memo) > savepoint); - - slot = dyn_array_get_element(memo, savepoint); - - ut_ad(slot->object == lock); - ut_ad(slot->type == MTR_MEMO_S_LOCK); - - rw_lock_s_unlock(lock); - - slot->object = NULL; -} - -#ifdef UNIV_DEBUG -/************************************************************** -Checks if memo contains the given item. */ -UNIV_INLINE -ibool -mtr_memo_contains( -/*==============*/ - /* out: TRUE if contains */ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object to search */ - ulint type) /* in: type of object */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - ulint offset; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - memo = &(mtr->memo); - - offset = dyn_array_get_data_size(memo); - - while (offset > 0) { - offset -= sizeof(mtr_memo_slot_t); - - slot = dyn_array_get_element(memo, offset); - - if ((object == slot->object) && (type == slot->type)) { - - return(TRUE); - } - } - - return(FALSE); -} -#endif /* UNIV_DEBUG */ - -/******************************************************************* -Returns the log object of a mini-transaction buffer. */ -UNIV_INLINE -dyn_array_t* -mtr_get_log( -/*========*/ - /* out: log */ - mtr_t* mtr) /* in: mini-transaction */ -{ - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - - return(&(mtr->log)); -} - -/******************************************************************* -Gets the logging mode of a mini-transaction. */ -UNIV_INLINE -ulint -mtr_get_log_mode( -/*=============*/ - /* out: logging mode: MTR_LOG_NONE, ... */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr); - ut_ad(mtr->log_mode >= MTR_LOG_ALL); - ut_ad(mtr->log_mode <= MTR_LOG_SHORT_INSERTS); - - return(mtr->log_mode); -} - -/******************************************************************* -Changes the logging mode of a mini-transaction. */ -UNIV_INLINE -ulint -mtr_set_log_mode( -/*=============*/ - /* out: old mode */ - mtr_t* mtr, /* in: mtr */ - ulint mode) /* in: logging mode: MTR_LOG_NONE, ... */ -{ - ulint old_mode; - - ut_ad(mtr); - ut_ad(mode >= MTR_LOG_ALL); - ut_ad(mode <= MTR_LOG_SHORT_INSERTS); - - old_mode = mtr->log_mode; - - if ((mode == MTR_LOG_SHORT_INSERTS) && (old_mode == MTR_LOG_NONE)) { - /* Do nothing */ - } else { - mtr->log_mode = mode; - } - - ut_ad(old_mode >= MTR_LOG_ALL); - ut_ad(old_mode <= MTR_LOG_SHORT_INSERTS); - - return(old_mode); -} - -/************************************************************************* -Locks a lock in s-mode. */ -UNIV_INLINE -void -mtr_s_lock_func( -/*============*/ - rw_lock_t* lock, /* in: rw-lock */ - const char* file, /* in: file name */ - ulint line, /* in: line number */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr); - ut_ad(lock); - - rw_lock_s_lock_func(lock, 0, file, line); - - mtr_memo_push(mtr, lock, MTR_MEMO_S_LOCK); -} - -/************************************************************************* -Locks a lock in x-mode. */ -UNIV_INLINE -void -mtr_x_lock_func( -/*============*/ - rw_lock_t* lock, /* in: rw-lock */ - const char* file, /* in: file name */ - ulint line, /* in: line number */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr); - ut_ad(lock); - - rw_lock_x_lock_func(lock, 0, file, line); - - mtr_memo_push(mtr, lock, MTR_MEMO_X_LOCK); -} diff --git a/storage/innobase/include/mtr0types.h b/storage/innobase/include/mtr0types.h deleted file mode 100644 index e3b6ec9a84f..00000000000 --- a/storage/innobase/include/mtr0types.h +++ /dev/null @@ -1,14 +0,0 @@ -/****************************************************** -Mini-transaction buffer global types - -(c) 1995 Innobase Oy - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#ifndef mtr0types_h -#define mtr0types_h - -typedef struct mtr_struct mtr_t; - -#endif diff --git a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h deleted file mode 100644 index 70c07ea6d1a..00000000000 --- a/storage/innobase/include/os0file.h +++ /dev/null @@ -1,731 +0,0 @@ -/****************************************************** -The interface to the operating system file io - -(c) 1995 Innobase Oy - -Created 10/21/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0file_h -#define os0file_h - -#include "univ.i" - -#ifndef __WIN__ -#include <dirent.h> -#include <sys/stat.h> -#include <time.h> -#endif - -typedef struct fil_node_struct fil_node_t; - -#ifdef UNIV_DO_FLUSH -extern ibool os_do_not_call_flush_at_each_write; -#endif /* UNIV_DO_FLUSH */ -extern ibool os_has_said_disk_full; -extern ibool os_aio_print_debug; - -extern ulint os_file_n_pending_preads; -extern ulint os_file_n_pending_pwrites; - -extern ulint os_n_pending_reads; -extern ulint os_n_pending_writes; - -#ifdef __WIN__ - -/* We define always WIN_ASYNC_IO, and check at run-time whether - the OS actually supports it: Win 95 does not, NT does. */ -#define WIN_ASYNC_IO - -#define UNIV_NON_BUFFERED_IO - -#endif - -#ifdef __WIN__ -#define os_file_t HANDLE -#else -typedef int os_file_t; -#endif - -extern ulint os_innodb_umask; - -/* If this flag is TRUE, then we will use the native aio of the -OS (provided we compiled Innobase with it in), otherwise we will -use simulated aio we build below with threads */ - -extern ibool os_aio_use_native_aio; - -#define OS_FILE_SECTOR_SIZE 512 - -/* The next value should be smaller or equal to the smallest sector size used -on any disk. A log block is required to be a portion of disk which is written -so that if the start and the end of a block get written to disk, then the -whole block gets written. This should be true even in most cases of a crash: -if this fails for a log block, then it is equivalent to a media failure in the -log. */ - -#define OS_FILE_LOG_BLOCK_SIZE 512 - -/* Options for file_create */ -#define OS_FILE_OPEN 51 -#define OS_FILE_CREATE 52 -#define OS_FILE_OVERWRITE 53 -#define OS_FILE_OPEN_RAW 54 -#define OS_FILE_CREATE_PATH 55 -#define OS_FILE_OPEN_RETRY 56 /* for os_file_create() on - the first ibdata file */ - -#define OS_FILE_READ_ONLY 333 -#define OS_FILE_READ_WRITE 444 -#define OS_FILE_READ_ALLOW_DELETE 555 /* for ibbackup */ - -/* Options for file_create */ -#define OS_FILE_AIO 61 -#define OS_FILE_NORMAL 62 - -/* Types for file create */ -#define OS_DATA_FILE 100 -#define OS_LOG_FILE 101 - -/* Error codes from os_file_get_last_error */ -#define OS_FILE_NOT_FOUND 71 -#define OS_FILE_DISK_FULL 72 -#define OS_FILE_ALREADY_EXISTS 73 -#define OS_FILE_PATH_ERROR 74 -#define OS_FILE_AIO_RESOURCES_RESERVED 75 /* wait for OS aio resources - to become available again */ -#define OS_FILE_SHARING_VIOLATION 76 -#define OS_FILE_ERROR_NOT_SPECIFIED 77 - -/* Types for aio operations */ -#define OS_FILE_READ 10 -#define OS_FILE_WRITE 11 - -#define OS_FILE_LOG 256 /* This can be ORed to type */ - -#define OS_AIO_N_PENDING_IOS_PER_THREAD 32 /* Win NT does not allow more - than 64 */ - -/* Modes for aio operations */ -#define OS_AIO_NORMAL 21 /* Normal asynchronous i/o not for ibuf - pages or ibuf bitmap pages */ -#define OS_AIO_IBUF 22 /* Asynchronous i/o for ibuf pages or ibuf - bitmap pages */ -#define OS_AIO_LOG 23 /* Asynchronous i/o for the log */ -#define OS_AIO_SYNC 24 /* Asynchronous i/o where the calling thread - will itself wait for the i/o to complete, - doing also the job of the i/o-handler thread; - can be used for any pages, ibuf or non-ibuf. - This is used to save CPU time, as we can do - with fewer thread switches. Plain synchronous - i/o is not as good, because it must serialize - the file seek and read or write, causing a - bottleneck for parallelism. */ - -#define OS_AIO_SIMULATED_WAKE_LATER 512 /* This can be ORed to mode - in the call of os_aio(...), - if the caller wants to post several i/o - requests in a batch, and only after that - wake the i/o-handler thread; this has - effect only in simulated aio */ -#define OS_WIN31 1 -#define OS_WIN95 2 -#define OS_WINNT 3 -#define OS_WIN2000 4 - -extern ulint os_n_file_reads; -extern ulint os_n_file_writes; -extern ulint os_n_fsyncs; - -/* File types for directory entry data type */ - -enum os_file_type_enum{ - OS_FILE_TYPE_UNKNOWN = 0, - OS_FILE_TYPE_FILE, /* regular file */ - OS_FILE_TYPE_DIR, /* directory */ - OS_FILE_TYPE_LINK /* symbolic link */ -}; -typedef enum os_file_type_enum os_file_type_t; - -/* Maximum path string length in bytes when referring to tables with in the -'./databasename/tablename.ibd' path format; we can allocate at least 2 buffers -of this size from the thread stack; that is why this should not be made much -bigger than 4000 bytes */ -#define OS_FILE_MAX_PATH 4000 - -/* Struct used in fetching information of a file in a directory */ -struct os_file_stat_struct{ - char name[OS_FILE_MAX_PATH]; /* path to a file */ - os_file_type_t type; /* file type */ - ib_longlong size; /* file size */ - time_t ctime; /* creation time */ - time_t mtime; /* modification time */ - time_t atime; /* access time */ -}; -typedef struct os_file_stat_struct os_file_stat_t; - -#ifdef __WIN__ -typedef HANDLE os_file_dir_t; /* directory stream */ -#else -typedef DIR* os_file_dir_t; /* directory stream */ -#endif - -/*************************************************************************** -Gets the operating system version. Currently works only on Windows. */ - -ulint -os_get_os_version(void); -/*===================*/ - /* out: OS_WIN95, OS_WIN31, OS_WINNT, or OS_WIN2000 */ -/******************************************************************** -Creates the seek mutexes used in positioned reads and writes. */ - -void -os_io_init_simple(void); -/*===================*/ -/*************************************************************************** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -On Netware, this function is like tmpfile(3), because the C run-time -library of Netware does not expose the delete-on-close flag. */ - -FILE* -os_file_create_tmpfile(void); -/*========================*/ - /* out: temporary file handle, or NULL on error */ -/*************************************************************************** -The os_file_opendir() function opens a directory stream corresponding to the -directory named by the dirname argument. The directory stream is positioned -at the first entry. In both Unix and Windows we automatically skip the '.' -and '..' items at the start of the directory listing. */ - -os_file_dir_t -os_file_opendir( -/*============*/ - /* out: directory stream, NULL if - error */ - const char* dirname, /* in: directory name; it must not - contain a trailing '\' or '/' */ - ibool error_is_fatal);/* in: TRUE if we should treat an - error as a fatal error; if we try to - open symlinks then we do not wish a - fatal error if it happens not to be - a directory */ -/*************************************************************************** -Closes a directory stream. */ - -int -os_file_closedir( -/*=============*/ - /* out: 0 if success, -1 if failure */ - os_file_dir_t dir); /* in: directory stream */ -/*************************************************************************** -This function returns information of the next file in the directory. We jump -over the '.' and '..' entries in the directory. */ - -int -os_file_readdir_next_file( -/*======================*/ - /* out: 0 if ok, -1 if error, 1 if at the end - of the directory */ - const char* dirname,/* in: directory name or path */ - os_file_dir_t dir, /* in: directory stream */ - os_file_stat_t* info); /* in/out: buffer where the info is returned */ -/********************************************************************* -This function attempts to create a directory named pathname. The new directory -gets default permissions. On Unix, the permissions are (0770 & ~umask). If the -directory exists already, nothing is done and the call succeeds, unless the -fail_if_exists arguments is true. */ - -ibool -os_file_create_directory( -/*=====================*/ - /* out: TRUE if call succeeds, - FALSE on error */ - const char* pathname, /* in: directory name as - null-terminated string */ - ibool fail_if_exists);/* in: if TRUE, pre-existing directory - is treated as an error. */ -/******************************************************************** -A simple function to open or create a file. */ - -os_file_t -os_file_create_simple( -/*==================*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file is - opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), or - OS_FILE_CREATE_PATH if new file - (if exists, error) and subdirectories along - its path are created (if needed)*/ - ulint access_type,/* in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success);/* out: TRUE if succeed, FALSE if error */ -/******************************************************************** -A simple function to open or create a file. */ - -os_file_t -os_file_create_simple_no_error_handling( -/*====================================*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error) */ - ulint access_type,/* in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ - ibool* success);/* out: TRUE if succeed, FALSE if error */ -/******************************************************************** -Opens an existing file or creates a new. */ - -os_file_t -os_file_create( -/*===========*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), - OS_FILE_OVERWRITE if a new file is created - or an old overwritten; - OS_FILE_OPEN_RAW, if a raw device or disk - partition should be opened */ - ulint purpose,/* in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success);/* out: TRUE if succeed, FALSE if error */ -/*************************************************************************** -Deletes a file. The file has to be closed before calling this. */ - -ibool -os_file_delete( -/*===========*/ - /* out: TRUE if success */ - const char* name); /* in: file path as a null-terminated string */ - -/*************************************************************************** -Deletes a file if it exists. The file has to be closed before calling this. */ - -ibool -os_file_delete_if_exists( -/*=====================*/ - /* out: TRUE if success */ - const char* name); /* in: file path as a null-terminated string */ -/*************************************************************************** -Renames a file (can also move it to another directory). It is safest that the -file is closed before calling this function. */ - -ibool -os_file_rename( -/*===========*/ - /* out: TRUE if success */ - const char* oldpath, /* in: old file path as a - null-terminated string */ - const char* newpath); /* in: new file path */ -/*************************************************************************** -Closes a file handle. In case of error, error number can be retrieved with -os_file_get_last_error. */ - -ibool -os_file_close( -/*==========*/ - /* out: TRUE if success */ - os_file_t file); /* in, own: handle to a file */ -/*************************************************************************** -Closes a file handle. */ - -ibool -os_file_close_no_error_handling( -/*============================*/ - /* out: TRUE if success */ - os_file_t file); /* in, own: handle to a file */ -/*************************************************************************** -Gets a file size. */ - -ibool -os_file_get_size( -/*=============*/ - /* out: TRUE if success */ - os_file_t file, /* in: handle to a file */ - ulint* size, /* out: least significant 32 bits of file - size */ - ulint* size_high);/* out: most significant 32 bits of size */ -/*************************************************************************** -Gets file size as a 64-bit integer ib_longlong. */ - -ib_longlong -os_file_get_size_as_iblonglong( -/*===========================*/ - /* out: size in bytes, -1 if error */ - os_file_t file); /* in: handle to a file */ -/*************************************************************************** -Write the specified number of zeros to a newly created file. */ - -ibool -os_file_set_size( -/*=============*/ - /* out: TRUE if success */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - ulint size, /* in: least significant 32 bits of file - size */ - ulint size_high);/* in: most significant 32 bits of size */ -/*************************************************************************** -Truncates a file at its current position. */ - -ibool -os_file_set_eof( -/*============*/ - /* out: TRUE if success */ - FILE* file); /* in: file to be truncated */ -/*************************************************************************** -Flushes the write buffers of a given file to the disk. */ - -ibool -os_file_flush( -/*==========*/ - /* out: TRUE if success */ - os_file_t file); /* in, own: handle to a file */ -/*************************************************************************** -Retrieves the last error number if an error occurs in a file io function. -The number should be retrieved before any other OS calls (because they may -overwrite the error number). If the number is not known to this program, -the OS error number + 100 is returned. */ - -ulint -os_file_get_last_error( -/*===================*/ - /* out: error number, or OS error - number + 100 */ - ibool report_all_errors); /* in: TRUE if we want an error message - printed of all errors */ -/*********************************************************************** -Requests a synchronous read operation. */ - -ibool -os_file_read( -/*=========*/ - /* out: TRUE if request was - successful, FALSE if fail */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read */ - ulint offset, /* in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/* in: most significant 32 bits of - offset */ - ulint n); /* in: number of bytes to read */ -/*********************************************************************** -Rewind file to its start, read at most size - 1 bytes from it to str, and -NUL-terminate str. All errors are silently ignored. This function is -mostly meant to be used with temporary files. */ - -void -os_file_read_string( -/*================*/ - FILE* file, /* in: file to read from */ - char* str, /* in: buffer where to read */ - ulint size); /* in: size of buffer */ -/*********************************************************************** -Requests a synchronous positioned read operation. This function does not do -any error handling. In case of error it returns FALSE. */ - -ibool -os_file_read_no_error_handling( -/*===========================*/ - /* out: TRUE if request was - successful, FALSE if fail */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read */ - ulint offset, /* in: least significant 32 bits of file - offset where to read */ - ulint offset_high,/* in: most significant 32 bits of - offset */ - ulint n); /* in: number of bytes to read */ - -/*********************************************************************** -Requests a synchronous write operation. */ - -ibool -os_file_write( -/*==========*/ - /* out: TRUE if request was - successful, FALSE if fail */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - const void* buf, /* in: buffer from which to write */ - ulint offset, /* in: least significant 32 bits of file - offset where to write */ - ulint offset_high,/* in: most significant 32 bits of - offset */ - ulint n); /* in: number of bytes to write */ -/*********************************************************************** -Check the existence and type of the given file. */ - -ibool -os_file_status( -/*===========*/ - /* out: TRUE if call succeeded */ - const char* path, /* in: pathname of the file */ - ibool* exists, /* out: TRUE if file exists */ - os_file_type_t* type); /* out: type of the file (if it exists) */ -/******************************************************************** -The function os_file_dirname returns a directory component of a -null-terminated pathname string. In the usual case, dirname returns -the string up to, but not including, the final '/', and basename -is the component following the final '/'. Trailing '/' charac -ters are not counted as part of the pathname. - -If path does not contain a slash, dirname returns the string ".". - -Concatenating the string returned by dirname, a "/", and the basename -yields a complete pathname. - -The return value is a copy of the directory component of the pathname. -The copy is allocated from heap. It is the caller responsibility -to free it after it is no longer needed. - -The following list of examples (taken from SUSv2) shows the strings -returned by dirname and basename for different paths: - - path dirname basename - "/usr/lib" "/usr" "lib" - "/usr/" "/" "usr" - "usr" "." "usr" - "/" "/" "/" - "." "." "." - ".." "." ".." -*/ - -char* -os_file_dirname( -/*============*/ - /* out, own: directory component of the - pathname */ - const char* path); /* in: pathname */ -/******************************************************************** -Creates all missing subdirectories along the given path. */ - -ibool -os_file_create_subdirs_if_needed( -/*=============================*/ - /* out: TRUE if call succeeded - FALSE otherwise */ - const char* path); /* in: path name */ -/**************************************************************************** -Initializes the asynchronous io system. Creates n_read_threads segments for -read, n_write_threads segments for writes, one segment for the ibuf i/o, and -one segment for log IO. Returns the number of segments created. When async -IO is not used, and 4 threads should be created to process requests put -in the segments. */ - -ulint -os_aio_init( -/*========*/ - ulint ios_per_array, /* in: maximum number of pending aio operations - allowed per array */ - ulint n_read_threads, /* in: number of read threads */ - ulint n_write_threads, /* in: number of write threads */ - ulint n_slots_sync); /* in: number of slots in the sync aio array */ -/*********************************************************************** -Requests an asynchronous i/o operation. */ - -ibool -os_aio( -/*===*/ - /* out: TRUE if request was queued - successfully, FALSE if fail */ - ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ - ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed - to OS_AIO_SIMULATED_WAKE_LATER: the - last flag advises this function not to wake - i/o-handler threads, but the caller will - do the waking explicitly later, in this - way the caller can post several requests in - a batch; NOTE that the batch must not be - so big that it exhausts the slots in aio - arrays! NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read or from which - to write */ - ulint offset, /* in: least significant 32 bits of file - offset where to read or write */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint n, /* in: number of bytes to read or write */ - fil_node_t* message1,/* in: messages for the aio handler (these - can be used to identify a completed aio - operation); if mode is OS_AIO_SYNC, these - are ignored */ - void* message2); -/**************************************************************************** -Wakes up all async i/o threads so that they know to exit themselves in -shutdown. */ - -void -os_aio_wake_all_threads_at_shutdown(void); -/*=====================================*/ -/**************************************************************************** -Waits until there are no pending writes in os_aio_write_array. There can -be other, synchronous, pending writes. */ - -void -os_aio_wait_until_no_pending_writes(void); -/*=====================================*/ -/************************************************************************** -Wakes up simulated aio i/o-handler threads if they have something to do. */ - -void -os_aio_simulated_wake_handler_threads(void); -/*=======================================*/ -/************************************************************************** -This function can be called if one wants to post a batch of reads and -prefers an i/o-handler thread to handle them all at once later. You must -call os_aio_simulated_wake_handler_threads later to ensure the threads -are not left sleeping! */ - -void -os_aio_simulated_put_read_threads_to_sleep(void); -/*============================================*/ - -#ifdef WIN_ASYNC_IO -/************************************************************************** -This function is only used in Windows asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait the -for completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! */ - -ibool -os_aio_windows_handle( -/*==================*/ - /* out: TRUE if the aio operation succeeded */ - ulint segment, /* in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads; if - this is ULINT_UNDEFINED, then it means that - sync aio is used, and this parameter is - ignored */ - ulint pos, /* this parameter is used only in sync aio: - wait for the aio slot at this position */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type); /* out: OS_FILE_WRITE or ..._READ */ -#endif - -/* Currently we do not use Posix async i/o */ -#ifdef POSIX_ASYNC_IO -/************************************************************************** -This function is only used in Posix asynchronous i/o. Waits for an aio -operation to complete. */ - -ibool -os_aio_posix_handle( -/*================*/ - /* out: TRUE if the aio operation succeeded */ - ulint array_no, /* in: array number 0 - 3 */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2); -#endif -/************************************************************************** -Does simulated aio. This function should be called by an i/o-handler -thread. */ - -ibool -os_aio_simulated_handle( -/*====================*/ - /* out: TRUE if the aio operation succeeded */ - ulint segment, /* in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type); /* out: OS_FILE_WRITE or ..._READ */ -/************************************************************************** -Validates the consistency of the aio system. */ - -ibool -os_aio_validate(void); -/*=================*/ - /* out: TRUE if ok */ -/************************************************************************** -Prints info of the aio arrays. */ - -void -os_aio_print( -/*=========*/ - FILE* file); /* in: file where to print */ -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -os_aio_refresh_stats(void); -/*======================*/ - -#ifdef UNIV_DEBUG -/************************************************************************** -Checks that all slots in the system have been freed, that is, there are -no pending io operations. */ - -ibool -os_aio_all_slots_free(void); -/*=======================*/ -#endif /* UNIV_DEBUG */ - -/*********************************************************************** -This function returns information about the specified file */ -ibool -os_file_get_status( -/*===============*/ - /* out: TRUE if stat - information found */ - const char* path, /* in: pathname of the file */ - os_file_stat_t* stat_info); /* information of a file in a - directory */ - -#endif diff --git a/storage/innobase/include/os0proc.h b/storage/innobase/include/os0proc.h deleted file mode 100644 index f54e08de7ee..00000000000 --- a/storage/innobase/include/os0proc.h +++ /dev/null @@ -1,148 +0,0 @@ -/****************************************************** -The interface to the operating system -process control primitives - -(c) 1995 Innobase Oy - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0proc_h -#define os0proc_h - -#include "univ.i" - -#ifdef UNIV_LINUX -#include <sys/ipc.h> -#include <sys/shm.h> -#endif - -typedef void* os_process_t; -typedef unsigned long int os_process_id_t; - -/* The cell type in os_awe_allocate_mem page info */ -#if defined(__WIN2000__) && defined(ULONG_PTR) -typedef ULONG_PTR os_awe_t; -#else -typedef ulint os_awe_t; -#endif - -/* Physical page size when Windows AWE is used. This is the normal -page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB -pages. */ -#define OS_AWE_X86_PAGE_SIZE 4096 - -extern ibool os_use_large_pages; -/* Large page size. This may be a boot-time option on some platforms */ -extern ulint os_large_page_size; - -/******************************************************************** -Windows AWE support. Tries to enable the "lock pages in memory" privilege for -the current process so that the current process can allocate memory-locked -virtual address space to act as the window where AWE maps physical memory. */ - -ibool -os_awe_enable_lock_pages_in_mem(void); -/*=================================*/ - /* out: TRUE if success, FALSE if error; - prints error info to stderr if no success */ -/******************************************************************** -Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86 -processor. */ - -ibool -os_awe_allocate_physical_mem( -/*=========================*/ - /* out: TRUE if success */ - os_awe_t** page_info, /* out, own: array of opaque data containing - the info for allocated physical memory pages; - each allocated 4 kB physical memory page has - one slot of type os_awe_t in the array */ - ulint n_megabytes); /* in: number of megabytes to allocate */ -/******************************************************************** -Allocates a window in the virtual address space where we can map then -pages of physical memory. */ - -byte* -os_awe_allocate_virtual_mem_window( -/*===============================*/ - /* out, own: allocated memory, or NULL if did not - succeed */ - ulint size); /* in: virtual memory allocation size in bytes, must - be < 2 GB */ -/******************************************************************** -With this function you can map parts of physical memory allocated with -the ..._allocate_physical_mem to the virtual address space allocated with -the previous function. Intel implements this so that the process page -tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP -showed that this takes < 1 microsecond, much better than the estimated 80 us -for copying a 16 kB page memory to memory. But, the operation will at least -partially invalidate the translation lookaside buffer (TLB) of all -processors. Under a real-world load the performance hit may be bigger. */ - -ibool -os_awe_map_physical_mem_to_window( -/*==============================*/ - /* out: TRUE if success; the function - calls exit(1) in case of an error */ - byte* ptr, /* in: a page-aligned pointer to - somewhere in the virtual address - space window; we map the physical mem - pages here */ - ulint n_mem_pages, /* in: number of 4 kB mem pages to - map */ - os_awe_t* page_info); /* in: array of page infos for those - pages; each page has one slot in the - array */ -/******************************************************************** -Converts the current process id to a number. It is not guaranteed that the -number is unique. In Linux returns the 'process number' of the current -thread. That number is the same as one sees in 'top', for example. In Linux -the thread id is not the same as one sees in 'top'. */ - -ulint -os_proc_get_number(void); -/*====================*/ -/******************************************************************** -Allocates non-cacheable memory. */ - -void* -os_mem_alloc_nocache( -/*=================*/ - /* out: allocated memory */ - ulint n); /* in: number of bytes */ -/******************************************************************** -Allocates large pages memory. */ - -void* -os_mem_alloc_large( -/*===============*/ - /* out: allocated memory */ - ulint n, /* in: number of bytes */ - ibool set_to_zero, /* in: TRUE if allocated memory - should be set to zero if - UNIV_SET_MEM_TO_ZERO is defined */ - ibool assert_on_error);/* in: if TRUE, we crash mysqld if - the memory cannot be allocated */ -/******************************************************************** -Frees large pages memory. */ - -void -os_mem_free_large( -/*==============*/ -void *ptr); /* in: number of bytes */ -/******************************************************************** -Sets the priority boost for threads released from waiting within the current -process. */ - -void -os_process_set_priority_boost( -/*==========================*/ - ibool do_boost); /* in: TRUE if priority boost should be done, - FALSE if not */ - -#ifndef UNIV_NONINL -#include "os0proc.ic" -#endif - -#endif diff --git a/storage/innobase/include/os0proc.ic b/storage/innobase/include/os0proc.ic deleted file mode 100644 index 651ba1f17e3..00000000000 --- a/storage/innobase/include/os0proc.ic +++ /dev/null @@ -1,10 +0,0 @@ -/****************************************************** -The interface to the operating system -process control primitives - -(c) 1995 Innobase Oy - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - - diff --git a/storage/innobase/include/os0sync.h b/storage/innobase/include/os0sync.h deleted file mode 100644 index 26d2786e33b..00000000000 --- a/storage/innobase/include/os0sync.h +++ /dev/null @@ -1,311 +0,0 @@ -/****************************************************** -The interface to the operating system -synchronization primitives. - -(c) 1995 Innobase Oy - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ -#ifndef os0sync_h -#define os0sync_h - -#include "univ.i" -#include "ut0lst.h" - -#ifdef HAVE_SOLARIS_ATOMIC -#include <atomic.h> -#endif - -#ifdef __WIN__ - -#define os_fast_mutex_t CRITICAL_SECTION - -typedef HANDLE os_native_event_t; - -typedef struct os_event_struct os_event_struct_t; -typedef os_event_struct_t* os_event_t; - -struct os_event_struct { - os_native_event_t handle; - /* Windows event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /* list of all created events */ -}; -#else -typedef pthread_mutex_t os_fast_mutex_t; - -typedef struct os_event_struct os_event_struct_t; -typedef os_event_struct_t* os_event_t; - -struct os_event_struct { - os_fast_mutex_t os_mutex; /* this mutex protects the next - fields */ - ibool is_set; /* this is TRUE when the event is - in the signaled state, i.e., a thread - does not stop if it tries to wait for - this event */ - ib_longlong signal_count; /* this is incremented each time - the event becomes signaled */ - pthread_cond_t cond_var; /* condition variable is used in - waiting for the event */ - UT_LIST_NODE_T(os_event_struct_t) os_event_list; - /* list of all created events */ -}; -#endif - -typedef struct os_mutex_struct os_mutex_str_t; -typedef os_mutex_str_t* os_mutex_t; - -#define OS_SYNC_INFINITE_TIME ((ulint)(-1)) - -#define OS_SYNC_TIME_EXCEEDED 1 - -/* Mutex protecting counts and the event and OS 'slow' mutex lists */ -extern os_mutex_t os_sync_mutex; - -/* This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit */ -extern ulint os_thread_count; - -extern ulint os_event_count; -extern ulint os_mutex_count; -extern ulint os_fast_mutex_count; - -/************************************************************* -Initializes global event and OS 'slow' mutex lists. */ - -void -os_sync_init(void); -/*==============*/ -/************************************************************* -Frees created events and OS 'slow' mutexes. */ - -void -os_sync_free(void); -/*==============*/ -/************************************************************* -Creates an event semaphore, i.e., a semaphore which may just have two states: -signaled and nonsignaled. The created event is manual reset: it must be reset -explicitly by calling sync_os_reset_event. */ - -os_event_t -os_event_create( -/*============*/ - /* out: the event handle */ - const char* name); /* in: the name of the event, if NULL - the event is created without a name */ -#ifdef __WIN__ -/************************************************************* -Creates an auto-reset event semaphore, i.e., an event which is automatically -reset when a single thread is released. Works only in Windows. */ - -os_event_t -os_event_create_auto( -/*=================*/ - /* out: the event handle */ - const char* name); /* in: the name of the event, if NULL - the event is created without a name */ -#endif -/************************************************************** -Sets an event semaphore to the signaled state: lets waiting threads -proceed. */ - -void -os_event_set( -/*=========*/ - os_event_t event); /* in: event to set */ -/************************************************************** -Resets an event semaphore to the nonsignaled state. Waiting threads will -stop to wait for the event. -The return value should be passed to os_even_wait_low() if it is desired -that this thread should not wait in case of an intervening call to -os_event_set() between this os_event_reset() and the -os_event_wait_low() call. See comments for os_event_wait_low(). */ - -ib_longlong -os_event_reset( -/*===========*/ - os_event_t event); /* in: event to reset */ -/************************************************************** -Frees an event object. */ - -void -os_event_free( -/*==========*/ - os_event_t event); /* in: event to free */ - -/************************************************************** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). - -Typically, if the event has been signalled after the os_event_reset() -we'll return immediately because event->is_set == TRUE. -There are, however, situations (e.g.: sync_array code) where we may -lose this information. For example: - -thread A calls os_event_reset() -thread B calls os_event_set() [event->is_set == TRUE] -thread C calls os_event_reset() [event->is_set == FALSE] -thread A calls os_event_wait() [infinite wait!] -thread C calls os_event_wait() [infinite wait!] - -Where such a scenario is possible, to avoid infinite wait, the -value returned by os_event_reset() should be passed in as -reset_sig_count. */ - -#define os_event_wait(event) os_event_wait_low((event), 0) - -void -os_event_wait_low( -/*==============*/ - os_event_t event, /* in: event to wait */ - ib_longlong reset_sig_count);/* in: zero or the value - returned by previous call of - os_event_reset(). */ - -/************************************************************** -Waits for an event object until it is in the signaled state or -a timeout is exceeded. In Unix the timeout is always infinite. */ - -ulint -os_event_wait_time( -/*===============*/ - /* out: 0 if success, - OS_SYNC_TIME_EXCEEDED if timeout - was exceeded */ - os_event_t event, /* in: event to wait */ - ulint time); /* in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ -#ifdef __WIN__ -/************************************************************** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. */ - -ulint -os_event_wait_multiple( -/*===================*/ - /* out: index of the event - which was signaled */ - ulint n, /* in: number of events in the - array */ - os_native_event_t* native_event_array); - /* in: pointer to an array of event - handles */ -#endif -/************************************************************* -Creates an operating system mutex semaphore. Because these are slow, the -mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ - -os_mutex_t -os_mutex_create( -/*============*/ - /* out: the mutex handle */ - const char* name); /* in: the name of the mutex, if NULL - the mutex is created without a name */ -/************************************************************** -Acquires ownership of a mutex semaphore. */ - -void -os_mutex_enter( -/*===========*/ - os_mutex_t mutex); /* in: mutex to acquire */ -/************************************************************** -Releases ownership of a mutex. */ - -void -os_mutex_exit( -/*==========*/ - os_mutex_t mutex); /* in: mutex to release */ -/************************************************************** -Frees an mutex object. */ - -void -os_mutex_free( -/*==========*/ - os_mutex_t mutex); /* in: mutex to free */ -/************************************************************** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! */ -UNIV_INLINE -ulint -os_fast_mutex_trylock( -/*==================*/ - /* out: 0 if success, != 0 if - was reserved by another - thread */ - os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */ -/************************************************************** -Releases ownership of a fast mutex. */ - -void -os_fast_mutex_unlock( -/*=================*/ - os_fast_mutex_t* fast_mutex); /* in: mutex to release */ -/************************************************************* -Initializes an operating system fast mutex semaphore. */ - -void -os_fast_mutex_init( -/*===============*/ - os_fast_mutex_t* fast_mutex); /* in: fast mutex */ -/************************************************************** -Acquires ownership of a fast mutex. */ - -void -os_fast_mutex_lock( -/*===============*/ - os_fast_mutex_t* fast_mutex); /* in: mutex to acquire */ -/************************************************************** -Frees an mutex object. */ - -void -os_fast_mutex_free( -/*===============*/ - os_fast_mutex_t* fast_mutex); /* in: mutex to free */ - -#ifdef UNIV_SYNC_ATOMIC -/************************************************************** -Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. */ -UNIV_INLINE -ibool -os_compare_and_swap( -/*================*/ - /* out: true if swapped */ - volatile lint* ptr, /* in: pointer to target */ - lint oldVal, /* in: value to compare to */ - lint newVal); /* in: value to swap in */ - -/************************************************************** -Atomic increment for InnoDB. Currently requires GCC atomic builtins. */ -UNIV_INLINE -lint -os_atomic_increment( -/*================*/ - /* out: resulting value */ - volatile lint* ptr, /* in: pointer to target */ - lint amount); /* in: amount of increment */ - -/************************************************************** -Memory barrier operations for InnoDB. -Currently requires GCC atomic builtins. */ -UNIV_INLINE -void -os_memory_barrier_load(); - -UNIV_INLINE -void -os_memory_barrier_store(); - -UNIV_INLINE -void -os_memory_barrier(); - -#endif /* UNIV_SYNC_ATOMIC */ - -#ifndef UNIV_NONINL -#include "os0sync.ic" -#endif - -#endif diff --git a/storage/innobase/include/os0sync.ic b/storage/innobase/include/os0sync.ic deleted file mode 100644 index d1307134172..00000000000 --- a/storage/innobase/include/os0sync.ic +++ /dev/null @@ -1,152 +0,0 @@ -/****************************************************** -The interface to the operating system synchronization primitives. - -(c) 1995 Innobase Oy - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ - -#ifdef __WIN__ -#include <winbase.h> -#endif - -/************************************************************** -Acquires ownership of a fast mutex. Currently in Windows this is the same -as os_fast_mutex_lock! */ -UNIV_INLINE -ulint -os_fast_mutex_trylock( -/*==================*/ - /* out: 0 if success, != 0 if - was reserved by another - thread */ - os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */ -{ -#ifdef __WIN__ - EnterCriticalSection(fast_mutex); - - return(0); -#else -#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10) - /* Since the hot backup version is standalone, MySQL does not redefine - pthread_mutex_trylock for HP-UX-10.20, and consequently we must invert - the return value here */ - - return((ulint) (1 - pthread_mutex_trylock(fast_mutex))); -#else - /* NOTE that the MySQL my_pthread.h redefines pthread_mutex_trylock - so that it returns 0 on success. In the operating system - libraries, HP-UX-10.20 follows the old Posix 1003.4a Draft 4 and - returns 1 on success (but MySQL remaps that to 0), while Linux, - FreeBSD, Solaris, AIX, Tru64 Unix, HP-UX-11.0 return 0 on success. */ - - return((ulint) pthread_mutex_trylock(fast_mutex)); -#endif -#endif -} - -#ifdef UNIV_SYNC_ATOMIC -/************************************************************** -Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins -or Solaris atomic_* functions. */ -UNIV_INLINE -ibool -os_compare_and_swap( -/*================*/ - /* out: true if swapped */ - volatile lint* ptr, /* in: pointer to target */ - lint oldVal, /* in: value to compare to */ - lint newVal) /* in: value to swap in */ -{ -#ifdef HAVE_GCC_ATOMIC_BUILTINS - return (__sync_bool_compare_and_swap(ptr, oldVal, newVal)); -#elif HAVE_SOLARIS_ATOMIC - lint retVal = (lint)atomic_cas_ulong((volatile ulong_t *)ptr, - oldVal, newVal); - return (retVal == oldVal); -#elif WIN_ATOMICS32 - lint retVal = (lint)InterlockedCompareExchange(ptr, newVal, oldVal); - return (retVal == oldVal); -#elif WIN_ATOMICS64 - lint retVal = (lint)InterlockedCompareExchange64(ptr, newVal, oldVal); - return (retVal == oldVal); -#else -#error "Need support for atomic ops" -#endif -} - -/************************************************************** -Memory barrier for load */ -UNIV_INLINE -void -os_memory_barrier_load() -{ -#ifdef HAVE_GCC_ATOMIC_BUILTINS - __sync_synchronize(); -#elif HAVE_SOLARIS_ATOMIC - membar_consumer(); -#elif WIN_ATOMICS32 - MemoryBarrier(); -#elif WIN_ATOMICS64 - MemoryBarrier(); -#endif -} - -/************************************************************** -Memory barrier for store */ -UNIV_INLINE -void -os_memory_barrier_store() -{ -#ifdef HAVE_GCC_ATOMIC_BUILTINS - __sync_synchronize(); -#elif HAVE_SOLARIS_ATOMIC - membar_producer(); -#elif WIN_ATOMICS32 - MemoryBarrier(); -#elif WIN_ATOMICS64 - MemoryBarrier(); -#endif -} - -/************************************************************** -Memory barrier */ -UNIV_INLINE -void -os_memory_barrier() -{ -#ifdef HAVE_GCC_ATOMIC_BUILTINS - __sync_synchronize(); -#elif HAVE_SOLARIS_ATOMIC - membar_enter(); -#elif WIN_ATOMICS32 - MemoryBarrier(); -#elif WIN_ATOMICS64 - MemoryBarrier(); -#endif -} - - -/************************************************************** -Atomic increment for InnoDB. Currently requires GCC atomic builtins. */ -UNIV_INLINE -lint -os_atomic_increment( -/*================*/ - /* out: resulting value */ - volatile lint* ptr, /* in: pointer to target */ - lint amount) /* in: amount of increment */ -{ -#ifdef HAVE_GCC_ATOMIC_BUILTINS - return (__sync_add_and_fetch(ptr, amount)); -#elif HAVE_SOLARIS_ATOMIC - return ((lint)atomic_add_long_nv((volatile ulong_t *)ptr, amount)); -#elif WIN_ATOMICS32 - return ((lint)InterlockedExchangeAdd(ptr, amount) + amount); -#elif WIN_ATOMICS64 - return ((lint)InterlockedExchangeAdd64(ptr, amount) + amount); -#else -#error "Need support for atomic ops" -#endif -} -#endif /* UNIV_SYNC_ATOMIC */ diff --git a/storage/innobase/include/os0thread.h b/storage/innobase/include/os0thread.h deleted file mode 100644 index 3cf05feb3a9..00000000000 --- a/storage/innobase/include/os0thread.h +++ /dev/null @@ -1,145 +0,0 @@ -/****************************************************** -The interface to the operating system -process and thread control primitives - -(c) 1995 Innobase Oy - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ - -#ifndef os0thread_h -#define os0thread_h - -#include "univ.i" - -/* Maximum number of threads which can be created in the program; -this is also the size of the wait slot array for MySQL threads which -can wait inside InnoDB */ - -#define OS_THREAD_MAX_N srv_max_n_threads - - -/* Possible fixed priorities for threads */ -#define OS_THREAD_PRIORITY_NONE 100 -#define OS_THREAD_PRIORITY_BACKGROUND 1 -#define OS_THREAD_PRIORITY_NORMAL 2 -#define OS_THREAD_PRIORITY_ABOVE_NORMAL 3 - -#ifdef __WIN__ -typedef void* os_thread_t; -typedef ulint os_thread_id_t; /* In Windows the thread id - is an unsigned long int */ -#else -typedef pthread_t os_thread_t; -typedef os_thread_t os_thread_id_t; /* In Unix we use the thread - handle itself as the id of - the thread */ -#endif - -/* Define a function pointer type to use in a typecast */ -typedef void* (*os_posix_f_t) (void*); - -/******************************************************************* -Compares two thread ids for equality. */ - -ibool -os_thread_eq( -/*=========*/ - /* out: TRUE if equal */ - os_thread_id_t a, /* in: OS thread or thread id */ - os_thread_id_t b); /* in: OS thread or thread id */ -/******************************************************************** -Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is -unique for the thread though! */ - -ulint -os_thread_pf( -/*=========*/ - /* out: unsigned long int */ - os_thread_id_t a); /* in: thread or thread id */ -/******************************************************************** -Creates a new thread of execution. The execution starts from -the function given. The start function takes a void* parameter -and returns a ulint. -NOTE: We count the number of threads in os_thread_exit(). A created -thread should always use that to exit and not use return() to exit. */ - -os_thread_t -os_thread_create( -/*=============*/ - /* out: handle to the thread */ -#ifndef __WIN__ - os_posix_f_t start_f, -#else - ulint (*start_f)(void*), /* in: pointer to function - from which to start */ -#endif - void* arg, /* in: argument to start - function */ - os_thread_id_t* thread_id); /* out: id of the created - thread, or NULL */ -int -os_thread_join( -/*===========*/ - os_thread_id_t thread_id); /* in: id of the thread to join */ -/********************************************************************* -Exits the current thread. */ - -void -os_thread_exit( -/*===========*/ - void* exit_value); /* in: exit value; in Windows this void* - is cast as a DWORD */ -/********************************************************************* -Returns the thread identifier of current thread. */ - -os_thread_id_t -os_thread_get_curr_id(void); -/*========================*/ -/********************************************************************* -Returns handle to the current thread. */ - -os_thread_t -os_thread_get_curr(void); -/*====================*/ -/********************************************************************* -Advises the os to give up remainder of the thread's time slice. */ - -void -os_thread_yield(void); -/*=================*/ -/********************************************************************* -The thread sleeps at least the time given in microseconds. */ - -void -os_thread_sleep( -/*============*/ - ulint tm); /* in: time in microseconds */ -/********************************************************************** -Gets a thread priority. */ - -ulint -os_thread_get_priority( -/*===================*/ - /* out: priority */ - os_thread_t handle);/* in: OS handle to the thread */ -/********************************************************************** -Sets a thread priority. */ - -void -os_thread_set_priority( -/*===================*/ - os_thread_t handle, /* in: OS handle to the thread */ - ulint pri); /* in: priority: one of OS_PRIORITY_... */ -/********************************************************************** -Gets the last operating system error code for the calling thread. */ - -ulint -os_thread_get_last_error(void); -/*==========================*/ - -#ifndef UNIV_NONINL -#include "os0thread.ic" -#endif - -#endif diff --git a/storage/innobase/include/os0thread.ic b/storage/innobase/include/os0thread.ic deleted file mode 100644 index a75aa3abb34..00000000000 --- a/storage/innobase/include/os0thread.ic +++ /dev/null @@ -1,8 +0,0 @@ -/****************************************************** -The interface to the operating system -process and thread control primitives - -(c) 1995 Innobase Oy - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/page0cur.h b/storage/innobase/include/page0cur.h deleted file mode 100644 index 04f731414a3..00000000000 --- a/storage/innobase/include/page0cur.h +++ /dev/null @@ -1,286 +0,0 @@ -/************************************************************************ -The page cursor - -(c) 1994-1996 Innobase Oy - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef page0cur_h -#define page0cur_h - -#include "univ.i" - -#include "page0types.h" -#include "page0page.h" -#include "rem0rec.h" -#include "data0data.h" -#include "mtr0mtr.h" - - -#define PAGE_CUR_ADAPT - -/* Page cursor search modes; the values must be in this order! */ - -#define PAGE_CUR_UNSUPP 0 -#define PAGE_CUR_G 1 -#define PAGE_CUR_GE 2 -#define PAGE_CUR_L 3 -#define PAGE_CUR_LE 4 -/*#define PAGE_CUR_LE_OR_EXTENDS 5*/ /* This is a search mode used in - "column LIKE 'abc%' ORDER BY column DESC"; - we have to find strings which are <= 'abc' or - which extend it */ -#ifdef UNIV_SEARCH_DEBUG -# define PAGE_CUR_DBG 6 /* As PAGE_CUR_LE, but skips search shortcut */ -#endif /* UNIV_SEARCH_DEBUG */ - -#ifdef PAGE_CUR_ADAPT -# ifdef UNIV_SEARCH_PERF_STAT -extern ulint page_cur_short_succ; -# endif /* UNIV_SEARCH_PERF_STAT */ -#endif /* PAGE_CUR_ADAPT */ - -/************************************************************* -Gets pointer to the page frame where the cursor is positioned. */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - /* out: page */ - page_cur_t* cur); /* in: page cursor */ -/************************************************************* -Gets the record where the cursor is positioned. */ -UNIV_INLINE -rec_t* -page_cur_get_rec( -/*=============*/ - /* out: record */ - page_cur_t* cur); /* in: page cursor */ -/************************************************************* -Sets the cursor object to point before the first user record -on the page. */ -UNIV_INLINE -void -page_cur_set_before_first( -/*======================*/ - page_t* page, /* in: index page */ - page_cur_t* cur); /* in: cursor */ -/************************************************************* -Sets the cursor object to point after the last user record on -the page. */ -UNIV_INLINE -void -page_cur_set_after_last( -/*====================*/ - page_t* page, /* in: index page */ - page_cur_t* cur); /* in: cursor */ -/************************************************************* -Returns TRUE if the cursor is before first user record on page. */ -UNIV_INLINE -ibool -page_cur_is_before_first( -/*=====================*/ - /* out: TRUE if at start */ - const page_cur_t* cur); /* in: cursor */ -/************************************************************* -Returns TRUE if the cursor is after last user record. */ -UNIV_INLINE -ibool -page_cur_is_after_last( -/*===================*/ - /* out: TRUE if at end */ - const page_cur_t* cur); /* in: cursor */ -/************************************************************** -Positions the cursor on the given record. */ -UNIV_INLINE -void -page_cur_position( -/*==============*/ - rec_t* rec, /* in: record on a page */ - page_cur_t* cur); /* in: page cursor */ -/************************************************************** -Invalidates a page cursor by setting the record pointer NULL. */ -UNIV_INLINE -void -page_cur_invalidate( -/*================*/ - page_cur_t* cur); /* in: page cursor */ -/************************************************************** -Moves the cursor to the next record on page. */ -UNIV_INLINE -void -page_cur_move_to_next( -/*==================*/ - page_cur_t* cur); /* in: cursor; must not be after last */ -/************************************************************** -Moves the cursor to the previous record on page. */ -UNIV_INLINE -void -page_cur_move_to_prev( -/*==================*/ - page_cur_t* cur); /* in: cursor; must not before first */ -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same position. */ -UNIV_INLINE -rec_t* -page_cur_tuple_insert( -/*==================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mini-transaction handle */ -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same position. */ -UNIV_INLINE -rec_t* -page_cur_rec_insert( -/*================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - rec_t* rec, /* in: record to insert */ - dict_index_t* index, /* in: record descriptor */ - ulint* offsets,/* in: rec_get_offsets(rec, index) */ - mtr_t* mtr); /* in: mini-transaction handle */ -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The record to be -inserted can be in a data tuple or as a physical record. The other parameter -must then be NULL. The cursor stays at the same position. */ - -rec_t* -page_cur_insert_rec_low( -/*====================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - dict_index_t* index, /* in: record descriptor */ - rec_t* rec, /* in: pointer to a physical record or NULL */ - ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */ - mtr_t* mtr); /* in: mini-transaction handle */ -/***************************************************************** -Copies records from page to a newly created page, from a given record onward, -including that record. Infimum and supremum records are not copied. */ - -void -page_copy_rec_list_end_to_created_page( -/*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************** -Deletes a record at the page cursor. The cursor is moved to the -next record after the deleted one. */ - -void -page_cur_delete_rec( -/*================*/ - page_cur_t* cursor, /* in: a page cursor */ - dict_index_t* index, /* in: record descriptor */ - const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */ - mtr_t* mtr); /* in: mini-transaction handle */ -/******************************************************************** -Searches the right position for a page cursor. */ -UNIV_INLINE -ulint -page_cur_search( -/*============*/ - /* out: number of matched fields on the left */ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* tuple, /* in: data tuple */ - ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - page_cur_t* cursor);/* out: page cursor */ -/******************************************************************** -Searches the right position for a page cursor. */ - -void -page_cur_search_with_match( -/*=======================*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* tuple, /* in: data tuple */ - ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - ulint* iup_matched_fields, - /* in/out: already matched fields in upper - limit record */ - ulint* iup_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - ulint* ilow_matched_fields, - /* in/out: already matched fields in lower - limit record */ - ulint* ilow_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - page_cur_t* cursor); /* out: page cursor */ -/*************************************************************** -Positions a page cursor on a randomly chosen user record on a page. If there -are no user records, sets the cursor on the infimum record. */ - -void -page_cur_open_on_rnd_user_rec( -/*==========================*/ - page_t* page, /* in: page */ - page_cur_t* cursor);/* in/out: page cursor */ -/*************************************************************** -Parses a log record of a record insert on a page. */ - -byte* -page_cur_parse_insert_rec( -/*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/************************************************************** -Parses a log record of copying a record list end to a new created page. */ - -byte* -page_parse_copy_rec_list_to_created_page( -/*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/*************************************************************** -Parses log record of a record delete on a page. */ - -byte* -page_cur_parse_delete_rec( -/*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ - -/* Index page cursor */ - -struct page_cur_struct{ - byte* rec; /* pointer to a record on page */ -}; - -#ifndef UNIV_NONINL -#include "page0cur.ic" -#endif - -#endif diff --git a/storage/innobase/include/page0cur.ic b/storage/innobase/include/page0cur.ic deleted file mode 100644 index b747874abc2..00000000000 --- a/storage/innobase/include/page0cur.ic +++ /dev/null @@ -1,210 +0,0 @@ -/************************************************************************ -The page cursor - -(c) 1994-1996 Innobase Oy - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#include "page0page.h" - - -/************************************************************* -Gets pointer to the page frame where the cursor is positioned. */ -UNIV_INLINE -page_t* -page_cur_get_page( -/*==============*/ - /* out: page */ - page_cur_t* cur) /* in: page cursor */ -{ - ut_ad(cur); - - return(buf_frame_align(cur->rec)); -} - -/************************************************************* -Gets the record where the cursor is positioned. */ -UNIV_INLINE -rec_t* -page_cur_get_rec( -/*=============*/ - /* out: record */ - page_cur_t* cur) /* in: page cursor */ -{ - ut_ad(cur); - - return(cur->rec); -} - -/************************************************************* -Sets the cursor object to point before the first user record -on the page. */ -UNIV_INLINE -void -page_cur_set_before_first( -/*======================*/ - page_t* page, /* in: index page */ - page_cur_t* cur) /* in: cursor */ -{ - cur->rec = page_get_infimum_rec(page); -} - -/************************************************************* -Sets the cursor object to point after the last user record on -the page. */ -UNIV_INLINE -void -page_cur_set_after_last( -/*====================*/ - page_t* page, /* in: index page */ - page_cur_t* cur) /* in: cursor */ -{ - cur->rec = page_get_supremum_rec(page); -} - -/************************************************************* -Returns TRUE if the cursor is before first user record on page. */ -UNIV_INLINE -ibool -page_cur_is_before_first( -/*=====================*/ - /* out: TRUE if at start */ - const page_cur_t* cur) /* in: cursor */ -{ - return(page_rec_is_infimum(cur->rec)); -} - -/************************************************************* -Returns TRUE if the cursor is after last user record. */ -UNIV_INLINE -ibool -page_cur_is_after_last( -/*===================*/ - /* out: TRUE if at end */ - const page_cur_t* cur) /* in: cursor */ -{ - return(page_rec_is_supremum(cur->rec)); -} - -/************************************************************** -Positions the cursor on the given record. */ -UNIV_INLINE -void -page_cur_position( -/*==============*/ - rec_t* rec, /* in: record on a page */ - page_cur_t* cur) /* in: page cursor */ -{ - ut_ad(rec && cur); - - cur->rec = rec; -} - -/************************************************************** -Invalidates a page cursor by setting the record pointer NULL. */ -UNIV_INLINE -void -page_cur_invalidate( -/*================*/ - page_cur_t* cur) /* in: page cursor */ -{ - ut_ad(cur); - - cur->rec = NULL; -} - -/************************************************************** -Moves the cursor to the next record on page. */ -UNIV_INLINE -void -page_cur_move_to_next( -/*==================*/ - page_cur_t* cur) /* in: cursor; must not be after last */ -{ - ut_ad(!page_cur_is_after_last(cur)); - - cur->rec = page_rec_get_next(cur->rec); -} - -/************************************************************** -Moves the cursor to the previous record on page. */ -UNIV_INLINE -void -page_cur_move_to_prev( -/*==================*/ - page_cur_t* cur) /* in: page cursor, not before first */ -{ - ut_ad(!page_cur_is_before_first(cur)); - - cur->rec = page_rec_get_prev(cur->rec); -} - -/******************************************************************** -Searches the right position for a page cursor. */ -UNIV_INLINE -ulint -page_cur_search( -/*============*/ - /* out: number of matched fields on the left */ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* tuple, /* in: data tuple */ - ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - page_cur_t* cursor) /* out: page cursor */ -{ - ulint low_matched_fields = 0; - ulint low_matched_bytes = 0; - ulint up_matched_fields = 0; - ulint up_matched_bytes = 0; - - ut_ad(dtuple_check_typed(tuple)); - - page_cur_search_with_match(page, index, tuple, mode, - &up_matched_fields, - &up_matched_bytes, - &low_matched_fields, - &low_matched_bytes, - cursor); - return(low_matched_fields); -} - -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same position. */ -UNIV_INLINE -rec_t* -page_cur_tuple_insert( -/*==================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr)); -} - -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The cursor stays at -the same position. */ -UNIV_INLINE -rec_t* -page_cur_rec_insert( -/*================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - rec_t* rec, /* in: record to insert */ - dict_index_t* index, /* in: record descriptor */ - ulint* offsets,/* in: rec_get_offsets(rec, index) */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - return(page_cur_insert_rec_low(cursor, NULL, index, rec, - offsets, mtr)); -} - diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h deleted file mode 100644 index 273007c2778..00000000000 --- a/storage/innobase/include/page0page.h +++ /dev/null @@ -1,829 +0,0 @@ -/****************************************************** -Index page routines - -(c) 1994-1996 Innobase Oy - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef page0page_h -#define page0page_h - -#include "univ.i" - -#include "page0types.h" -#include "fil0fil.h" -#include "buf0buf.h" -#include "data0data.h" -#include "dict0dict.h" -#include "rem0rec.h" -#include "fsp0fsp.h" -#include "mtr0mtr.h" - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE -#endif - -/* PAGE HEADER - =========== - -Index page header starts at the first offset left free by the FIL-module */ - -typedef byte page_header_t; - -#define PAGE_HEADER FSEG_PAGE_DATA /* index page header starts at this - offset */ -/*-----------------------------*/ -#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ -#define PAGE_HEAP_TOP 2 /* pointer to record heap top */ -#define PAGE_N_HEAP 4 /* number of records in the heap, - bit 15=flag: new-style compact page format */ -#define PAGE_FREE 6 /* pointer to start of page free record list */ -#define PAGE_GARBAGE 8 /* number of bytes in deleted records */ -#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or - NULL if this info has been reset by a delete, - for example */ -#define PAGE_DIRECTION 12 /* last insert direction: PAGE_LEFT, ... */ -#define PAGE_N_DIRECTION 14 /* number of consecutive inserts to the same - direction */ -#define PAGE_N_RECS 16 /* number of user records on the page */ -#define PAGE_MAX_TRX_ID 18 /* highest id of a trx which may have modified - a record on the page; a dulint; defined only - in secondary indexes; specifically, not in an - ibuf tree; NOTE: this may be modified only - when the thread has an x-latch to the page, - and ALSO an x-latch to btr_search_latch - if there is a hash index to the page! */ -#define PAGE_HEADER_PRIV_END 26 /* end of private data structure of the page - header which are set in a page create */ -/*----*/ -#define PAGE_LEVEL 26 /* level of the node in an index tree; the - leaf level is the level 0 */ -#define PAGE_INDEX_ID 28 /* index id where the page belongs */ -#define PAGE_BTR_SEG_LEAF 36 /* file segment header for the leaf pages in - a B-tree: defined only on the root page of a - B-tree, but not in the root of an ibuf tree */ -#define PAGE_BTR_IBUF_FREE_LIST PAGE_BTR_SEG_LEAF -#define PAGE_BTR_IBUF_FREE_LIST_NODE PAGE_BTR_SEG_LEAF - /* in the place of PAGE_BTR_SEG_LEAF and _TOP - there is a free list base node if the page is - the root page of an ibuf tree, and at the same - place is the free list node if the page is in - a free list */ -#define PAGE_BTR_SEG_TOP (36 + FSEG_HEADER_SIZE) - /* file segment header for the non-leaf pages - in a B-tree: defined only on the root page of - a B-tree, but not in the root of an ibuf - tree */ -/*----*/ -#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) - /* start of data on the page */ - -#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) - /* offset of the page infimum record on an - old-style page */ -#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) - /* offset of the page supremum record on an - old-style page */ -#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) - /* offset of the page supremum record end on - an old-style page */ -#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) - /* offset of the page infimum record on a - new-style compact page */ -#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) - /* offset of the page supremum record on a - new-style compact page */ -#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) - /* offset of the page supremum record end on - a new-style compact page */ -/*-----------------------------*/ - -/* Directions of cursor movement */ -#define PAGE_LEFT 1 -#define PAGE_RIGHT 2 -#define PAGE_SAME_REC 3 -#define PAGE_SAME_PAGE 4 -#define PAGE_NO_DIRECTION 5 - -/* PAGE DIRECTORY - ============== -*/ - -typedef byte page_dir_slot_t; -typedef page_dir_slot_t page_dir_t; - -/* Offset of the directory start down from the page end. We call the -slot with the highest file address directory start, as it points to -the first record in the list of records. */ -#define PAGE_DIR FIL_PAGE_DATA_END - -/* We define a slot in the page directory as two bytes */ -#define PAGE_DIR_SLOT_SIZE 2 - -/* The offset of the physically lower end of the directory, counted from -page end, when the page is empty */ -#define PAGE_EMPTY_DIR_START (PAGE_DIR + 2 * PAGE_DIR_SLOT_SIZE) - -/* The maximum and minimum number of records owned by a directory slot. The -number may drop below the minimum in the first and the last slot in the -directory. */ -#define PAGE_DIR_SLOT_MAX_N_OWNED 8 -#define PAGE_DIR_SLOT_MIN_N_OWNED 4 - -/**************************************************************** -Gets the start of a page. */ -UNIV_INLINE -page_t* -page_align( -/*=======*/ - /* out: start of the page */ - void* ptr) /* in: pointer to page frame */ - __attribute__((const)); -/**************************************************************** -Gets the offset within a page. */ -UNIV_INLINE -ulint -page_offset( -/*========*/ - /* out: offset from the start of the page */ - const void* ptr) /* in: pointer to page frame */ - __attribute__((const)); -/***************************************************************** -Returns the max trx id field value. */ -UNIV_INLINE -dulint -page_get_max_trx_id( -/*================*/ - page_t* page); /* in: page */ -/***************************************************************** -Sets the max trx id field value. */ - -void -page_set_max_trx_id( -/*================*/ - page_t* page, /* in: page */ - dulint trx_id);/* in: transaction id */ -/***************************************************************** -Sets the max trx id field value if trx_id is bigger than the previous -value. */ -UNIV_INLINE -void -page_update_max_trx_id( -/*===================*/ - page_t* page, /* in: page */ - dulint trx_id); /* in: transaction id */ -/***************************************************************** -Reads the given header field. */ -UNIV_INLINE -ulint -page_header_get_field( -/*==================*/ - page_t* page, /* in: page */ - ulint field); /* in: PAGE_N_DIR_SLOTS, ... */ -/***************************************************************** -Sets the given header field. */ -UNIV_INLINE -void -page_header_set_field( -/*==================*/ - page_t* page, /* in: page */ - ulint field, /* in: PAGE_N_DIR_SLOTS, ... */ - ulint val); /* in: value */ -/***************************************************************** -Returns the pointer stored in the given header field. */ -UNIV_INLINE -byte* -page_header_get_ptr( -/*================*/ - /* out: pointer or NULL */ - page_t* page, /* in: page */ - ulint field); /* in: PAGE_FREE, ... */ -/***************************************************************** -Sets the pointer stored in the given header field. */ -UNIV_INLINE -void -page_header_set_ptr( -/*================*/ - page_t* page, /* in: page */ - ulint field, /* in: PAGE_FREE, ... */ - byte* ptr); /* in: pointer or NULL*/ -/***************************************************************** -Resets the last insert info field in the page header. Writes to mlog -about this operation. */ -UNIV_INLINE -void -page_header_reset_last_insert( -/*==========================*/ - page_t* page, /* in: page */ - mtr_t* mtr); /* in: mtr */ -/**************************************************************** -Gets the first record on the page. */ -UNIV_INLINE -rec_t* -page_get_infimum_rec( -/*=================*/ - /* out: the first record in record list */ - page_t* page); /* in: page which must have record(s) */ -/**************************************************************** -Gets the last record on the page. */ -UNIV_INLINE -rec_t* -page_get_supremum_rec( -/*==================*/ - /* out: the last record in record list */ - page_t* page); /* in: page which must have record(s) */ -/**************************************************************** -Returns the middle record of record list. If there are an even number -of records in the list, returns the first record of upper half-list. */ - -rec_t* -page_get_middle_rec( -/*================*/ - /* out: middle record */ - page_t* page); /* in: page */ -/***************************************************************** -Compares a data tuple to a physical record. Differs from the function -cmp_dtuple_rec_with_match in the way that the record must reside on an -index page, and also page infimum and supremum records can be given in -the parameter rec. These are considered as the negative infinity and -the positive infinity in the alphabetical order. */ -UNIV_INLINE -int -page_cmp_dtuple_rec_with_match( -/*===========================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record on a page; may also - be page infimum or supremum, in which case - matched-parameter values below are not - affected */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when function returns - contains the value for current comparison */ - ulint* matched_bytes); /* in/out: number of already matched - bytes within the first field not completely - matched; when function returns contains the - value for current comparison */ -/***************************************************************** -Gets the number of user records on page (the infimum and supremum records -are not user records). */ -UNIV_INLINE -ulint -page_get_n_recs( -/*============*/ - /* out: number of user records */ - page_t* page); /* in: index page */ -/******************************************************************* -Returns the number of records before the given record in chain. -The number includes infimum and supremum records. */ - -ulint -page_rec_get_n_recs_before( -/*=======================*/ - /* out: number of records */ - rec_t* rec); /* in: the physical record */ -/***************************************************************** -Gets the number of records in the heap. */ -UNIV_INLINE -ulint -page_dir_get_n_heap( -/*================*/ - /* out: number of user records */ - page_t* page); /* in: index page */ -/***************************************************************** -Sets the number of records in the heap. */ -UNIV_INLINE -void -page_dir_set_n_heap( -/*================*/ - page_t* page, /* in: index page */ - ulint n_heap);/* in: number of records */ -/***************************************************************** -Gets the number of dir slots in directory. */ -UNIV_INLINE -ulint -page_dir_get_n_slots( -/*=================*/ - /* out: number of slots */ - page_t* page); /* in: index page */ -/***************************************************************** -Sets the number of dir slots in directory. */ -UNIV_INLINE -void -page_dir_set_n_slots( -/*=================*/ - /* out: number of slots */ - page_t* page, /* in: index page */ - ulint n_slots);/* in: number of slots */ -/***************************************************************** -Gets pointer to nth directory slot. */ -UNIV_INLINE -page_dir_slot_t* -page_dir_get_nth_slot( -/*==================*/ - /* out: pointer to dir slot */ - page_t* page, /* in: index page */ - ulint n); /* in: position */ -/****************************************************************** -Used to check the consistency of a record on a page. */ -UNIV_INLINE -ibool -page_rec_check( -/*===========*/ - /* out: TRUE if succeed */ - rec_t* rec); /* in: record */ -/******************************************************************* -Gets the record pointed to by a directory slot. */ -UNIV_INLINE -rec_t* -page_dir_slot_get_rec( -/*==================*/ - /* out: pointer to record */ - page_dir_slot_t* slot); /* in: directory slot */ -/******************************************************************* -This is used to set the record offset in a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_rec( -/*==================*/ - page_dir_slot_t* slot, /* in: directory slot */ - rec_t* rec); /* in: record on the page */ -/******************************************************************* -Gets the number of records owned by a directory slot. */ -UNIV_INLINE -ulint -page_dir_slot_get_n_owned( -/*======================*/ - /* out: number of records */ - page_dir_slot_t* slot); /* in: page directory slot */ -/******************************************************************* -This is used to set the owned records field of a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_n_owned( -/*======================*/ - page_dir_slot_t* slot, /* in: directory slot */ - ulint n); /* in: number of records owned - by the slot */ -/**************************************************************** -Calculates the space reserved for directory slots of a given -number of records. The exact value is a fraction number -n * PAGE_DIR_SLOT_SIZE / PAGE_DIR_SLOT_MIN_N_OWNED, and it is -rounded upwards to an integer. */ -UNIV_INLINE -ulint -page_dir_calc_reserved_space( -/*=========================*/ - ulint n_recs); /* in: number of records */ -/******************************************************************* -Looks for the directory slot which owns the given record. */ - -ulint -page_dir_find_owner_slot( -/*=====================*/ - /* out: the directory slot number */ - rec_t* rec); /* in: the physical record */ -/**************************************************************** -Determine whether the page is in new-style compact format. */ -UNIV_INLINE -ulint -page_is_comp( -/*=========*/ - /* out: nonzero if the page is in compact - format, zero if it is in old-style format */ - page_t* page); /* in: index page */ -/**************************************************************** -TRUE if the record is on a page in compact format. */ -UNIV_INLINE -ulint -page_rec_is_comp( -/*=============*/ - /* out: nonzero if in compact format */ - const rec_t* rec); /* in: record */ -/**************************************************************** -Gets the pointer to the next record on the page. */ -UNIV_INLINE -rec_t* -page_rec_get_next( -/*==============*/ - /* out: pointer to next record */ - rec_t* rec); /* in: pointer to record, must not be page - supremum */ -/**************************************************************** -Sets the pointer to the next record on the page. */ -UNIV_INLINE -void -page_rec_set_next( -/*==============*/ - rec_t* rec, /* in: pointer to record, must not be - page supremum */ - rec_t* next); /* in: pointer to next record, must not - be page infimum */ -/**************************************************************** -Gets the pointer to the previous record. */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - /* out: pointer to previous record */ - rec_t* rec); /* in: pointer to record, - must not be page infimum */ -/**************************************************************** -TRUE if the record is a user record on the page. */ -UNIV_INLINE -ibool -page_rec_is_user_rec_low( -/*=====================*/ - /* out: TRUE if a user record */ - ulint offset);/* in: record offset on page */ -/**************************************************************** -TRUE if the record is the supremum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_supremum_low( -/*=====================*/ - /* out: TRUE if the supremum record */ - ulint offset);/* in: record offset on page */ -/**************************************************************** -TRUE if the record is the infimum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_infimum_low( -/*====================*/ - /* out: TRUE if the infimum record */ - ulint offset);/* in: record offset on page */ - -/**************************************************************** -TRUE if the record is a user record on the page. */ -UNIV_INLINE -ibool -page_rec_is_user_rec( -/*=================*/ - /* out: TRUE if a user record */ - const rec_t* rec); /* in: record */ -/**************************************************************** -TRUE if the record is the supremum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_supremum( -/*=================*/ - /* out: TRUE if the supremum record */ - const rec_t* rec); /* in: record */ -/**************************************************************** -TRUE if the record is the infimum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_infimum( -/*================*/ - /* out: TRUE if the infimum record */ - const rec_t* rec); /* in: record */ -/******************************************************************* -Looks for the record which owns the given record. */ -UNIV_INLINE -rec_t* -page_rec_find_owner_rec( -/*====================*/ - /* out: the owner record */ - rec_t* rec); /* in: the physical record */ -/*************************************************************************** -This is a low-level operation which is used in a database index creation -to update the page number of a created B-tree to a data dictionary -record. */ - -void -page_rec_write_index_page_no( -/*=========================*/ - rec_t* rec, /* in: record to update */ - ulint i, /* in: index of the field to update */ - ulint page_no,/* in: value to write */ - mtr_t* mtr); /* in: mtr */ -/**************************************************************** -Returns the maximum combined size of records which can be inserted on top -of record heap. */ -UNIV_INLINE -ulint -page_get_max_insert_size( -/*=====================*/ - /* out: maximum combined size for inserted records */ - page_t* page, /* in: index page */ - ulint n_recs); /* in: number of records */ -/**************************************************************** -Returns the maximum combined size of records which can be inserted on top -of record heap if page is first reorganized. */ -UNIV_INLINE -ulint -page_get_max_insert_size_after_reorganize( -/*======================================*/ - /* out: maximum combined size for inserted records */ - page_t* page, /* in: index page */ - ulint n_recs);/* in: number of records */ -/***************************************************************** -Calculates free space if a page is emptied. */ -UNIV_INLINE -ulint -page_get_free_space_of_empty( -/*=========================*/ - /* out: free space */ - ulint comp) /* in: nonzero=compact page format */ - __attribute__((const)); -/***************************************************************** -Calculates free space if a page is emptied. */ - -ulint -page_get_free_space_of_empty_noninline( -/*===================================*/ - /* out: free space */ - ulint comp) /* in: nonzero=compact page format */ - __attribute__((const)); -/**************************************************************** -Returns the sum of the sizes of the records in the record list -excluding the infimum and supremum records. */ -UNIV_INLINE -ulint -page_get_data_size( -/*===============*/ - /* out: data in bytes */ - page_t* page); /* in: index page */ -/**************************************************************** -Allocates a block of memory from an index page. */ - -byte* -page_mem_alloc( -/*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - dict_index_t* index, /* in: record descriptor */ - ulint* heap_no);/* out: this contains the heap number - of the allocated record - if allocation succeeds */ -/**************************************************************** -Puts a record to free list. */ -UNIV_INLINE -void -page_mem_free( -/*==========*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: pointer to the (origin of) record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -The index page creation function. */ - -page_t* -page_create( -/*========*/ - /* out: pointer to the page */ - buf_frame_t* frame, /* in: a buffer frame where the page is - created */ - mtr_t* mtr, /* in: mini-transaction handle */ - ulint comp); /* in: nonzero=compact page format */ -/***************************************************************** -Differs from page_copy_rec_list_end, because this function does not -touch the lock table and max trx id on page. */ - -void -page_copy_rec_list_end_no_locks( -/*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Copies records from page to new_page, from the given record onward, -including that record. Infimum and supremum records are not copied. -The records are copied to the start of the record list on new_page. */ - -void -page_copy_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Copies records from page to new_page, up to the given record, NOT -including that record. Infimum and supremum records are not copied. -The records are copied to the end of the record list on new_page. */ - -void -page_copy_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Deletes records from a page from a given record onward, including that record. -The infimum and supremum records are not deleted. */ - -void -page_delete_rec_list_end( -/*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - ulint n_recs, /* in: number of records to delete, - or ULINT_UNDEFINED if not known */ - ulint size, /* in: the sum of the sizes of the - records in the end of the chain to - delete, or ULINT_UNDEFINED if not known */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Deletes records from page, up to the given record, NOT including -that record. Infimum and supremum records are not deleted. */ - -void -page_delete_rec_list_start( -/*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Moves record list end to another page. Moved records include -split_rec. */ - -void -page_move_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Moves record list start to another page. Moved records do not include -split_rec. */ - -void -page_move_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************** -Splits a directory slot which owns too many records. */ - -void -page_dir_split_slot( -/*================*/ - page_t* page, /* in: the index page in question */ - ulint slot_no); /* in: the directory slot */ -/***************************************************************** -Tries to balance the given directory slot with too few records -with the upper neighbor, so that there are at least the minimum number -of records owned by the slot; this may result in the merging of -two slots. */ - -void -page_dir_balance_slot( -/*==================*/ - page_t* page, /* in: index page */ - ulint slot_no); /* in: the directory slot */ -/************************************************************** -Parses a log record of a record list end or start deletion. */ - -byte* -page_parse_delete_rec_list( -/*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE, - MLOG_LIST_START_DELETE, - MLOG_COMP_LIST_END_DELETE or - MLOG_COMP_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/*************************************************************** -Parses a redo log record of creating a page. */ - -byte* -page_parse_create( -/*==============*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint comp, /* in: nonzero=compact page format */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/**************************************************************** -Prints record contents including the data relevant only in -the index page context. */ - -void -page_rec_print( -/*===========*/ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: record descriptor */ -/******************************************************************* -This is used to print the contents of the directory for -debugging purposes. */ - -void -page_dir_print( -/*===========*/ - page_t* page, /* in: index page */ - ulint pr_n); /* in: print n first and n last entries */ -/******************************************************************* -This is used to print the contents of the page record list for -debugging purposes. */ - -void -page_print_list( -/*============*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: dictionary index of the page */ - ulint pr_n); /* in: print n first and n last entries */ -/******************************************************************* -Prints the info in a page header. */ - -void -page_header_print( -/*==============*/ - page_t* page); -/******************************************************************* -This is used to print the contents of the page for -debugging purposes. */ - -void -page_print( -/*=======*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: dictionary index of the page */ - ulint dn, /* in: print dn first and last entries - in directory */ - ulint rn); /* in: print rn first and last records - in directory */ -/******************************************************************* -The following is used to validate a record on a page. This function -differs from rec_validate as it can also check the n_owned field and -the heap_no field. */ - -ibool -page_rec_validate( -/*==============*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/******************************************************************* -Checks that the first directory slot points to the infimum record and -the last to the supremum. This function is intended to track if the -bug fixed in 4.0.14 has caused corruption to users' databases. */ - -void -page_check_dir( -/*===========*/ - page_t* page); /* in: index page */ -/******************************************************************* -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. */ - -ibool -page_simple_validate( -/*=================*/ - /* out: TRUE if ok */ - page_t* page); /* in: index page */ -/******************************************************************* -This function checks the consistency of an index page. */ - -ibool -page_validate( -/*==========*/ - /* out: TRUE if ok */ - page_t* page, /* in: index page */ - dict_index_t* index); /* in: data dictionary index containing - the page record type definition */ -/******************************************************************* -Looks in the page record list for a record with the given heap number. */ - -rec_t* -page_find_rec_with_heap_no( -/*=======================*/ - /* out: record, NULL if not found */ - page_t* page, /* in: index page */ - ulint heap_no);/* in: heap number */ - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif - -#ifndef UNIV_NONINL -#include "page0page.ic" -#endif - -#endif diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic deleted file mode 100644 index d9e67f3eeeb..00000000000 --- a/storage/innobase/include/page0page.ic +++ /dev/null @@ -1,851 +0,0 @@ -/****************************************************** -Index page routines - -(c) 1994-1996 Innobase Oy - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#include "mach0data.h" -#include "rem0cmp.h" -#include "mtr0log.h" - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE -#endif - -/**************************************************************** -Gets the start of a page. */ -UNIV_INLINE -page_t* -page_align( -/*=======*/ - /* out: start of the page */ - void* ptr) /* in: pointer to page frame */ -{ - return((page_t*) ut_align_down(ptr, UNIV_PAGE_SIZE)); -} -/**************************************************************** -Gets the offset within a page. */ -UNIV_INLINE -ulint -page_offset( -/*========*/ - /* out: offset from the start of the page */ - const void* ptr) /* in: pointer to page frame */ -{ - return(ut_align_offset(ptr, UNIV_PAGE_SIZE)); -} -/***************************************************************** -Returns the max trx id field value. */ -UNIV_INLINE -dulint -page_get_max_trx_id( -/*================*/ - page_t* page) /* in: page */ -{ - ut_ad(page); - - return(mach_read_from_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID)); -} - -/***************************************************************** -Sets the max trx id field value if trx_id is bigger than the previous -value. */ -UNIV_INLINE -void -page_update_max_trx_id( -/*===================*/ - page_t* page, /* in: page */ - dulint trx_id) /* in: transaction id */ -{ - ut_ad(page); - - if (ut_dulint_cmp(page_get_max_trx_id(page), trx_id) < 0) { - - page_set_max_trx_id(page, trx_id); - } -} - -/***************************************************************** -Reads the given header field. */ -UNIV_INLINE -ulint -page_header_get_field( -/*==================*/ - page_t* page, /* in: page */ - ulint field) /* in: PAGE_LEVEL, ... */ -{ - ut_ad(page); - ut_ad(field <= PAGE_INDEX_ID); - - return(mach_read_from_2(page + PAGE_HEADER + field)); -} - -/***************************************************************** -Sets the given header field. */ -UNIV_INLINE -void -page_header_set_field( -/*==================*/ - page_t* page, /* in: page */ - ulint field, /* in: PAGE_LEVEL, ... */ - ulint val) /* in: value */ -{ - ut_ad(page); - ut_ad(field <= PAGE_N_RECS); - ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); - ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); - - mach_write_to_2(page + PAGE_HEADER + field, val); -} - -/***************************************************************** -Returns the pointer stored in the given header field. */ -UNIV_INLINE -byte* -page_header_get_ptr( -/*================*/ - /* out: pointer or NULL */ - page_t* page, /* in: page */ - ulint field) /* in: PAGE_FREE, ... */ -{ - ulint offs; - - ut_ad(page); - ut_ad((field == PAGE_FREE) - || (field == PAGE_LAST_INSERT) - || (field == PAGE_HEAP_TOP)); - - offs = page_header_get_field(page, field); - - ut_ad((field != PAGE_HEAP_TOP) || offs); - - if (offs == 0) { - - return(NULL); - } - - return(page + offs); -} - -/***************************************************************** -Sets the pointer stored in the given header field. */ -UNIV_INLINE -void -page_header_set_ptr( -/*================*/ - page_t* page, /* in: page */ - ulint field, /* in: PAGE_FREE, ... */ - byte* ptr) /* in: pointer or NULL*/ -{ - ulint offs; - - ut_ad(page); - ut_ad((field == PAGE_FREE) - || (field == PAGE_LAST_INSERT) - || (field == PAGE_HEAP_TOP)); - - if (ptr == NULL) { - offs = 0; - } else { - offs = ptr - page; - } - - ut_ad((field != PAGE_HEAP_TOP) || offs); - - page_header_set_field(page, field, offs); -} - -/***************************************************************** -Resets the last insert info field in the page header. Writes to mlog -about this operation. */ -UNIV_INLINE -void -page_header_reset_last_insert( -/*==========================*/ - page_t* page, /* in: page */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(page && mtr); - - mlog_write_ulint(page + PAGE_HEADER + PAGE_LAST_INSERT, 0, - MLOG_2BYTES, mtr); -} - -/**************************************************************** -Determine whether the page is in new-style compact format. */ -UNIV_INLINE -ulint -page_is_comp( -/*=========*/ - /* out: nonzero if the page is in compact - format, zero if it is in old-style format */ - page_t* page) /* in: index page */ -{ - return(UNIV_EXPECT(page_header_get_field(page, PAGE_N_HEAP) & 0x8000, - 0x8000)); -} - -/**************************************************************** -TRUE if the record is on a page in compact format. */ -UNIV_INLINE -ulint -page_rec_is_comp( -/*=============*/ - /* out: nonzero if in compact format */ - const rec_t* rec) /* in: record */ -{ - return(page_is_comp(page_align((rec_t*) rec))); -} - -/**************************************************************** -Gets the first record on the page. */ -UNIV_INLINE -rec_t* -page_get_infimum_rec( -/*=================*/ - /* out: the first record in record list */ - page_t* page) /* in: page which must have record(s) */ -{ - ut_ad(page); - - if (page_is_comp(page)) { - return(page + PAGE_NEW_INFIMUM); - } else { - return(page + PAGE_OLD_INFIMUM); - } -} - -/**************************************************************** -Gets the last record on the page. */ -UNIV_INLINE -rec_t* -page_get_supremum_rec( -/*==================*/ - /* out: the last record in record list */ - page_t* page) /* in: page which must have record(s) */ -{ - ut_ad(page); - - if (page_is_comp(page)) { - return(page + PAGE_NEW_SUPREMUM); - } else { - return(page + PAGE_OLD_SUPREMUM); - } -} - -/**************************************************************** -TRUE if the record is a user record on the page. */ -UNIV_INLINE -ibool -page_rec_is_user_rec_low( -/*=====================*/ - /* out: TRUE if a user record */ - ulint offset) /* in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); -#if PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM -# error "PAGE_OLD_INFIMUM < PAGE_NEW_INFIMUM" -#endif -#if PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM -# error "PAGE_OLD_SUPREMUM < PAGE_NEW_SUPREMUM" -#endif -#if PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM -# error "PAGE_NEW_INFIMUM > PAGE_OLD_SUPREMUM" -#endif -#if PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM -# error "PAGE_OLD_INFIMUM > PAGE_NEW_SUPREMUM" -#endif -#if PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END -# error "PAGE_NEW_SUPREMUM > PAGE_OLD_SUPREMUM_END" -#endif -#if PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END -# error "PAGE_OLD_SUPREMUM > PAGE_NEW_SUPREMUM_END" -#endif - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(UNIV_LIKELY(offset != PAGE_NEW_SUPREMUM) - && UNIV_LIKELY(offset != PAGE_NEW_INFIMUM) - && UNIV_LIKELY(offset != PAGE_OLD_INFIMUM) - && UNIV_LIKELY(offset != PAGE_OLD_SUPREMUM)); -} - -/**************************************************************** -TRUE if the record is the supremum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_supremum_low( -/*=====================*/ - /* out: TRUE if the supremum record */ - ulint offset) /* in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(UNIV_UNLIKELY(offset == PAGE_NEW_SUPREMUM) - || UNIV_UNLIKELY(offset == PAGE_OLD_SUPREMUM)); -} - -/**************************************************************** -TRUE if the record is the infimum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_infimum_low( -/*====================*/ - /* out: TRUE if the infimum record */ - ulint offset) /* in: record offset on page */ -{ - ut_ad(offset >= PAGE_NEW_INFIMUM); - ut_ad(offset <= UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START); - - return(UNIV_UNLIKELY(offset == PAGE_NEW_INFIMUM) - || UNIV_UNLIKELY(offset == PAGE_OLD_INFIMUM)); -} - -/**************************************************************** -TRUE if the record is a user record on the page. */ -UNIV_INLINE -ibool -page_rec_is_user_rec( -/*=================*/ - /* out: TRUE if a user record */ - const rec_t* rec) /* in: record */ -{ - return(page_rec_is_user_rec_low(page_offset(rec))); -} - -/**************************************************************** -TRUE if the record is the supremum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_supremum( -/*=================*/ - /* out: TRUE if the supremum record */ - const rec_t* rec) /* in: record */ -{ - return(page_rec_is_supremum_low(page_offset(rec))); -} - -/**************************************************************** -TRUE if the record is the infimum record on a page. */ -UNIV_INLINE -ibool -page_rec_is_infimum( -/*================*/ - /* out: TRUE if the infimum record */ - const rec_t* rec) /* in: record */ -{ - return(page_rec_is_infimum_low(page_offset(rec))); -} - -/***************************************************************** -Compares a data tuple to a physical record. Differs from the function -cmp_dtuple_rec_with_match in the way that the record must reside on an -index page, and also page infimum and supremum records can be given in -the parameter rec. These are considered as the negative infinity and -the positive infinity in the alphabetical order. */ -UNIV_INLINE -int -page_cmp_dtuple_rec_with_match( -/*===========================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record on a page; may also - be page infimum or supremum, in which case - matched-parameter values below are not - affected */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when function returns - contains the value for current comparison */ - ulint* matched_bytes) /* in/out: number of already matched - bytes within the first field not completely - matched; when function returns contains the - value for current comparison */ -{ - ulint rec_offset; - - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec)); - - rec_offset = page_offset(rec); - - if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_INFIMUM) - || UNIV_UNLIKELY(rec_offset == PAGE_OLD_INFIMUM)) { - return(1); - } - if (UNIV_UNLIKELY(rec_offset == PAGE_NEW_SUPREMUM) - || UNIV_UNLIKELY(rec_offset == PAGE_OLD_SUPREMUM)) { - return(-1); - } - - return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, - matched_fields, - matched_bytes)); -} - -/***************************************************************** -Gets the number of user records on page (infimum and supremum records -are not user records). */ -UNIV_INLINE -ulint -page_get_n_recs( -/*============*/ - /* out: number of user records */ - page_t* page) /* in: index page */ -{ - return(page_header_get_field(page, PAGE_N_RECS)); -} - -/***************************************************************** -Gets the number of dir slots in directory. */ -UNIV_INLINE -ulint -page_dir_get_n_slots( -/*=================*/ - /* out: number of slots */ - page_t* page) /* in: index page */ -{ - return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); -} -/***************************************************************** -Sets the number of dir slots in directory. */ -UNIV_INLINE -void -page_dir_set_n_slots( -/*=================*/ - /* out: number of slots */ - page_t* page, /* in: index page */ - ulint n_slots)/* in: number of slots */ -{ - page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots); -} - -/***************************************************************** -Gets the number of records in the heap. */ -UNIV_INLINE -ulint -page_dir_get_n_heap( -/*================*/ - /* out: number of user records */ - page_t* page) /* in: index page */ -{ - return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); -} - -/***************************************************************** -Sets the number of records in the heap. */ -UNIV_INLINE -void -page_dir_set_n_heap( -/*================*/ - page_t* page, /* in: index page */ - ulint n_heap) /* in: number of records */ -{ - ut_ad(n_heap < 0x8000); - - page_header_set_field(page, PAGE_N_HEAP, n_heap - | (0x8000 - & page_header_get_field(page, PAGE_N_HEAP))); -} - -/***************************************************************** -Gets pointer to nth directory slot. */ -UNIV_INLINE -page_dir_slot_t* -page_dir_get_nth_slot( -/*==================*/ - /* out: pointer to dir slot */ - page_t* page, /* in: index page */ - ulint n) /* in: position */ -{ - ut_ad(page_dir_get_n_slots(page) > n); - - return(page + UNIV_PAGE_SIZE - PAGE_DIR - - (n + 1) * PAGE_DIR_SLOT_SIZE); -} - -/****************************************************************** -Used to check the consistency of a record on a page. */ -UNIV_INLINE -ibool -page_rec_check( -/*===========*/ - /* out: TRUE if succeed */ - rec_t* rec) /* in: record */ -{ - page_t* page; - - ut_a(rec); - - page = buf_frame_align(rec); - - ut_a(rec <= page_header_get_ptr(page, PAGE_HEAP_TOP)); - ut_a(rec >= page + PAGE_DATA); - - return(TRUE); -} - -/******************************************************************* -Gets the record pointed to by a directory slot. */ -UNIV_INLINE -rec_t* -page_dir_slot_get_rec( -/*==================*/ - /* out: pointer to record */ - page_dir_slot_t* slot) /* in: directory slot */ -{ - return(buf_frame_align(slot) + mach_read_from_2(slot)); -} - -/******************************************************************* -This is used to set the record offset in a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_rec( -/*==================*/ - page_dir_slot_t* slot, /* in: directory slot */ - rec_t* rec) /* in: record on the page */ -{ - ut_ad(page_rec_check(rec)); - - mach_write_to_2(slot, page_offset(rec)); -} - -/******************************************************************* -Gets the number of records owned by a directory slot. */ -UNIV_INLINE -ulint -page_dir_slot_get_n_owned( -/*======================*/ - /* out: number of records */ - page_dir_slot_t* slot) /* in: page directory slot */ -{ - rec_t* rec = page_dir_slot_get_rec(slot); - return(rec_get_n_owned(rec, page_rec_is_comp(rec))); -} - -/******************************************************************* -This is used to set the owned records field of a directory slot. */ -UNIV_INLINE -void -page_dir_slot_set_n_owned( -/*======================*/ - page_dir_slot_t* slot, /* in: directory slot */ - ulint n) /* in: number of records owned - by the slot */ -{ - rec_t* rec = page_dir_slot_get_rec(slot); - rec_set_n_owned(rec, page_rec_is_comp(rec), n); -} - -/**************************************************************** -Calculates the space reserved for directory slots of a given number of -records. The exact value is a fraction number n * PAGE_DIR_SLOT_SIZE / -PAGE_DIR_SLOT_MIN_N_OWNED, and it is rounded upwards to an integer. */ -UNIV_INLINE -ulint -page_dir_calc_reserved_space( -/*=========================*/ - ulint n_recs) /* in: number of records */ -{ - return((PAGE_DIR_SLOT_SIZE * n_recs + PAGE_DIR_SLOT_MIN_N_OWNED - 1) - / PAGE_DIR_SLOT_MIN_N_OWNED); -} - -/**************************************************************** -Gets the pointer to the next record on the page. */ -UNIV_INLINE -rec_t* -page_rec_get_next( -/*==============*/ - /* out: pointer to next record */ - rec_t* rec) /* in: pointer to record */ -{ - ulint offs; - page_t* page; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - - offs = rec_get_next_offs(rec, page_is_comp(page)); - - if (UNIV_UNLIKELY(offs >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Next record offset is nonsensical %lu" - " in record at offset %lu\n" - "InnoDB: rec address %p, first buffer frame %p\n" - "InnoDB: buffer pool high end %p, buf fix count %lu\n", - (ulong)offs, (ulong)(rec - page), - (void*) rec, (void*) buf_pool->frame_zero, - (void*) buf_pool->high_end, - (ulong) buf_block_align(rec)->buf_fix_count); - buf_page_print(page); - - ut_error; - } - - if (UNIV_UNLIKELY(offs == 0)) { - - return(NULL); - } - - return(page + offs); -} - -/**************************************************************** -Sets the pointer to the next record on the page. */ -UNIV_INLINE -void -page_rec_set_next( -/*==============*/ - rec_t* rec, /* in: pointer to record, must not be page supremum */ - rec_t* next) /* in: pointer to next record, must not be page - infimum */ -{ - page_t* page; - ulint offs; - - ut_ad(page_rec_check(rec)); - ut_ad(!page_rec_is_supremum(rec)); - page = page_align(rec); - - if (next) { - ut_ad(!page_rec_is_infimum(next)); - ut_ad(page == page_align(next)); - offs = (ulint) (next - page); - } else { - offs = 0; - } - - rec_set_next_offs(rec, page_is_comp(page), offs); -} - -/**************************************************************** -Gets the pointer to the previous record. */ -UNIV_INLINE -rec_t* -page_rec_get_prev( -/*==============*/ - /* out: pointer to previous record */ - rec_t* rec) /* in: pointer to record, must not be page - infimum */ -{ - page_dir_slot_t* slot; - ulint slot_no; - rec_t* rec2; - rec_t* prev_rec = NULL; - page_t* page; - - ut_ad(page_rec_check(rec)); - - page = page_align(rec); - - ut_ad(!page_rec_is_infimum(rec)); - - slot_no = page_dir_find_owner_slot(rec); - - ut_a(slot_no != 0); - - slot = page_dir_get_nth_slot(page, slot_no - 1); - - rec2 = page_dir_slot_get_rec(slot); - - while (rec != rec2) { - prev_rec = rec2; - rec2 = page_rec_get_next(rec2); - } - - ut_a(prev_rec); - - return(prev_rec); -} - -/******************************************************************* -Looks for the record which owns the given record. */ -UNIV_INLINE -rec_t* -page_rec_find_owner_rec( -/*====================*/ - /* out: the owner record */ - rec_t* rec) /* in: the physical record */ -{ - ut_ad(page_rec_check(rec)); - - if (page_rec_is_comp(rec)) { - while (rec_get_n_owned(rec, TRUE) == 0) { - rec = page_rec_get_next(rec); - } - } else { - while (rec_get_n_owned(rec, FALSE) == 0) { - rec = page_rec_get_next(rec); - } - } - - return(rec); -} - -/**************************************************************** -Returns the sum of the sizes of the records in the record list, excluding -the infimum and supremum records. */ -UNIV_INLINE -ulint -page_get_data_size( -/*===============*/ - /* out: data in bytes */ - page_t* page) /* in: index page */ -{ - ulint ret; - - ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) - - (page_is_comp(page) - ? PAGE_NEW_SUPREMUM_END - : PAGE_OLD_SUPREMUM_END) - - page_header_get_field(page, PAGE_GARBAGE)); - - ut_ad(ret < UNIV_PAGE_SIZE); - - return(ret); -} - -/***************************************************************** -Calculates free space if a page is emptied. */ -UNIV_INLINE -ulint -page_get_free_space_of_empty( -/*=========================*/ - /* out: free space */ - ulint comp) /* in: nonzero=compact page layout */ -{ - if (UNIV_LIKELY(comp)) { - return((ulint)(UNIV_PAGE_SIZE - - PAGE_NEW_SUPREMUM_END - - PAGE_DIR - - 2 * PAGE_DIR_SLOT_SIZE)); - } - - return((ulint)(UNIV_PAGE_SIZE - - PAGE_OLD_SUPREMUM_END - - PAGE_DIR - - 2 * PAGE_DIR_SLOT_SIZE)); -} - -/**************************************************************** -Each user record on a page, and also the deleted user records in the heap -takes its size plus the fraction of the dir cell size / -PAGE_DIR_SLOT_MIN_N_OWNED bytes for it. If the sum of these exceeds the -value of page_get_free_space_of_empty, the insert is impossible, otherwise -it is allowed. This function returns the maximum combined size of records -which can be inserted on top of the record heap. */ -UNIV_INLINE -ulint -page_get_max_insert_size( -/*=====================*/ - /* out: maximum combined size for inserted records */ - page_t* page, /* in: index page */ - ulint n_recs) /* in: number of records */ -{ - ulint occupied; - ulint free_space; - - if (page_is_comp(page)) { - occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_NEW_SUPREMUM_END - + page_dir_calc_reserved_space( - n_recs + page_dir_get_n_heap(page) - 2); - - free_space = page_get_free_space_of_empty(TRUE); - } else { - occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_OLD_SUPREMUM_END - + page_dir_calc_reserved_space( - n_recs + page_dir_get_n_heap(page) - 2); - - free_space = page_get_free_space_of_empty(FALSE); - } - - /* Above the 'n_recs +' part reserves directory space for the new - inserted records; the '- 2' excludes page infimum and supremum - records */ - - if (occupied > free_space) { - - return(0); - } - - return(free_space - occupied); -} - -/**************************************************************** -Returns the maximum combined size of records which can be inserted on top -of the record heap if a page is first reorganized. */ -UNIV_INLINE -ulint -page_get_max_insert_size_after_reorganize( -/*======================================*/ - /* out: maximum combined size for inserted records */ - page_t* page, /* in: index page */ - ulint n_recs) /* in: number of records */ -{ - ulint occupied; - ulint free_space; - - occupied = page_get_data_size(page) - + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); - - free_space = page_get_free_space_of_empty(page_is_comp(page)); - - if (occupied > free_space) { - - return(0); - } - - return(free_space - occupied); -} - -/**************************************************************** -Puts a record to free list. */ -UNIV_INLINE -void -page_mem_free( -/*==========*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: pointer to the (origin of) record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - rec_t* free; - ulint garbage; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(!rec_offs_comp(offsets) == !page_rec_is_comp(rec)); - free = page_header_get_ptr(page, PAGE_FREE); - - page_rec_set_next(rec, free); - page_header_set_ptr(page, PAGE_FREE, rec); - -#if 0 /* It's better not to destroy the user's data. */ - - /* Clear the data bytes of the deleted record in order to improve - the compression ratio of the page and to make it easier to read - page dumps in corruption reports. The extra bytes of the record - cannot be cleared, because page_mem_alloc() needs them in order - to determine the size of the deleted record. */ - memset(rec, 0, rec_offs_data_size(offsets)); -#endif - - garbage = page_header_get_field(page, PAGE_GARBAGE); - - page_header_set_field(page, PAGE_GARBAGE, - garbage + rec_offs_size(offsets)); -} - -#ifdef UNIV_MATERIALIZE -#undef UNIV_INLINE -#define UNIV_INLINE UNIV_INLINE_ORIGINAL -#endif diff --git a/storage/innobase/include/page0types.h b/storage/innobase/include/page0types.h deleted file mode 100644 index 1fbeeb0f60f..00000000000 --- a/storage/innobase/include/page0types.h +++ /dev/null @@ -1,22 +0,0 @@ -/****************************************************** -Index page routines - -(c) 1994-1996 Innobase Oy - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#ifndef page0types_h -#define page0types_h - -#include "univ.i" - -/* Type of the index page */ -/* The following define eliminates a name collision on HP-UX */ -#define page_t ib_page_t -typedef byte page_t; -typedef struct page_search_struct page_search_t; -typedef struct page_cur_struct page_cur_t; - - -#endif diff --git a/storage/innobase/include/pars0grm.h b/storage/innobase/include/pars0grm.h deleted file mode 100644 index 0062b8314ee..00000000000 --- a/storage/innobase/include/pars0grm.h +++ /dev/null @@ -1,234 +0,0 @@ -/* A Bison parser, made by GNU Bison 1.875d. */ - -/* Skeleton parser for Yacc-like parsing with Bison, - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* As a special exception, when this file is copied by Bison into a - Bison output file, you may use that output file without restriction. - This special exception was added by the Free Software Foundation - in version 1.24 of Bison. */ - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - PARS_INT_LIT = 258, - PARS_FLOAT_LIT = 259, - PARS_STR_LIT = 260, - PARS_FIXBINARY_LIT = 261, - PARS_BLOB_LIT = 262, - PARS_NULL_LIT = 263, - PARS_ID_TOKEN = 264, - PARS_AND_TOKEN = 265, - PARS_OR_TOKEN = 266, - PARS_NOT_TOKEN = 267, - PARS_GE_TOKEN = 268, - PARS_LE_TOKEN = 269, - PARS_NE_TOKEN = 270, - PARS_PROCEDURE_TOKEN = 271, - PARS_IN_TOKEN = 272, - PARS_OUT_TOKEN = 273, - PARS_BINARY_TOKEN = 274, - PARS_BLOB_TOKEN = 275, - PARS_INT_TOKEN = 276, - PARS_INTEGER_TOKEN = 277, - PARS_FLOAT_TOKEN = 278, - PARS_CHAR_TOKEN = 279, - PARS_IS_TOKEN = 280, - PARS_BEGIN_TOKEN = 281, - PARS_END_TOKEN = 282, - PARS_IF_TOKEN = 283, - PARS_THEN_TOKEN = 284, - PARS_ELSE_TOKEN = 285, - PARS_ELSIF_TOKEN = 286, - PARS_LOOP_TOKEN = 287, - PARS_WHILE_TOKEN = 288, - PARS_RETURN_TOKEN = 289, - PARS_SELECT_TOKEN = 290, - PARS_SUM_TOKEN = 291, - PARS_COUNT_TOKEN = 292, - PARS_DISTINCT_TOKEN = 293, - PARS_FROM_TOKEN = 294, - PARS_WHERE_TOKEN = 295, - PARS_FOR_TOKEN = 296, - PARS_DDOT_TOKEN = 297, - PARS_READ_TOKEN = 298, - PARS_ORDER_TOKEN = 299, - PARS_BY_TOKEN = 300, - PARS_ASC_TOKEN = 301, - PARS_DESC_TOKEN = 302, - PARS_INSERT_TOKEN = 303, - PARS_INTO_TOKEN = 304, - PARS_VALUES_TOKEN = 305, - PARS_UPDATE_TOKEN = 306, - PARS_SET_TOKEN = 307, - PARS_DELETE_TOKEN = 308, - PARS_CURRENT_TOKEN = 309, - PARS_OF_TOKEN = 310, - PARS_CREATE_TOKEN = 311, - PARS_TABLE_TOKEN = 312, - PARS_INDEX_TOKEN = 313, - PARS_UNIQUE_TOKEN = 314, - PARS_CLUSTERED_TOKEN = 315, - PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, - PARS_ON_TOKEN = 317, - PARS_ASSIGN_TOKEN = 318, - PARS_DECLARE_TOKEN = 319, - PARS_CURSOR_TOKEN = 320, - PARS_SQL_TOKEN = 321, - PARS_OPEN_TOKEN = 322, - PARS_FETCH_TOKEN = 323, - PARS_CLOSE_TOKEN = 324, - PARS_NOTFOUND_TOKEN = 325, - PARS_TO_CHAR_TOKEN = 326, - PARS_TO_NUMBER_TOKEN = 327, - PARS_TO_BINARY_TOKEN = 328, - PARS_BINARY_TO_NUMBER_TOKEN = 329, - PARS_SUBSTR_TOKEN = 330, - PARS_REPLSTR_TOKEN = 331, - PARS_CONCAT_TOKEN = 332, - PARS_INSTR_TOKEN = 333, - PARS_LENGTH_TOKEN = 334, - PARS_SYSDATE_TOKEN = 335, - PARS_PRINTF_TOKEN = 336, - PARS_ASSERT_TOKEN = 337, - PARS_RND_TOKEN = 338, - PARS_RND_STR_TOKEN = 339, - PARS_ROW_PRINTF_TOKEN = 340, - PARS_COMMIT_TOKEN = 341, - PARS_ROLLBACK_TOKEN = 342, - PARS_WORK_TOKEN = 343, - PARS_UNSIGNED_TOKEN = 344, - PARS_EXIT_TOKEN = 345, - PARS_FUNCTION_TOKEN = 346, - PARS_LOCK_TOKEN = 347, - PARS_SHARE_TOKEN = 348, - PARS_MODE_TOKEN = 349, - NEG = 350 - }; -#endif -#define PARS_INT_LIT 258 -#define PARS_FLOAT_LIT 259 -#define PARS_STR_LIT 260 -#define PARS_FIXBINARY_LIT 261 -#define PARS_BLOB_LIT 262 -#define PARS_NULL_LIT 263 -#define PARS_ID_TOKEN 264 -#define PARS_AND_TOKEN 265 -#define PARS_OR_TOKEN 266 -#define PARS_NOT_TOKEN 267 -#define PARS_GE_TOKEN 268 -#define PARS_LE_TOKEN 269 -#define PARS_NE_TOKEN 270 -#define PARS_PROCEDURE_TOKEN 271 -#define PARS_IN_TOKEN 272 -#define PARS_OUT_TOKEN 273 -#define PARS_BINARY_TOKEN 274 -#define PARS_BLOB_TOKEN 275 -#define PARS_INT_TOKEN 276 -#define PARS_INTEGER_TOKEN 277 -#define PARS_FLOAT_TOKEN 278 -#define PARS_CHAR_TOKEN 279 -#define PARS_IS_TOKEN 280 -#define PARS_BEGIN_TOKEN 281 -#define PARS_END_TOKEN 282 -#define PARS_IF_TOKEN 283 -#define PARS_THEN_TOKEN 284 -#define PARS_ELSE_TOKEN 285 -#define PARS_ELSIF_TOKEN 286 -#define PARS_LOOP_TOKEN 287 -#define PARS_WHILE_TOKEN 288 -#define PARS_RETURN_TOKEN 289 -#define PARS_SELECT_TOKEN 290 -#define PARS_SUM_TOKEN 291 -#define PARS_COUNT_TOKEN 292 -#define PARS_DISTINCT_TOKEN 293 -#define PARS_FROM_TOKEN 294 -#define PARS_WHERE_TOKEN 295 -#define PARS_FOR_TOKEN 296 -#define PARS_DDOT_TOKEN 297 -#define PARS_READ_TOKEN 298 -#define PARS_ORDER_TOKEN 299 -#define PARS_BY_TOKEN 300 -#define PARS_ASC_TOKEN 301 -#define PARS_DESC_TOKEN 302 -#define PARS_INSERT_TOKEN 303 -#define PARS_INTO_TOKEN 304 -#define PARS_VALUES_TOKEN 305 -#define PARS_UPDATE_TOKEN 306 -#define PARS_SET_TOKEN 307 -#define PARS_DELETE_TOKEN 308 -#define PARS_CURRENT_TOKEN 309 -#define PARS_OF_TOKEN 310 -#define PARS_CREATE_TOKEN 311 -#define PARS_TABLE_TOKEN 312 -#define PARS_INDEX_TOKEN 313 -#define PARS_UNIQUE_TOKEN 314 -#define PARS_CLUSTERED_TOKEN 315 -#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 -#define PARS_ON_TOKEN 317 -#define PARS_ASSIGN_TOKEN 318 -#define PARS_DECLARE_TOKEN 319 -#define PARS_CURSOR_TOKEN 320 -#define PARS_SQL_TOKEN 321 -#define PARS_OPEN_TOKEN 322 -#define PARS_FETCH_TOKEN 323 -#define PARS_CLOSE_TOKEN 324 -#define PARS_NOTFOUND_TOKEN 325 -#define PARS_TO_CHAR_TOKEN 326 -#define PARS_TO_NUMBER_TOKEN 327 -#define PARS_TO_BINARY_TOKEN 328 -#define PARS_BINARY_TO_NUMBER_TOKEN 329 -#define PARS_SUBSTR_TOKEN 330 -#define PARS_REPLSTR_TOKEN 331 -#define PARS_CONCAT_TOKEN 332 -#define PARS_INSTR_TOKEN 333 -#define PARS_LENGTH_TOKEN 334 -#define PARS_SYSDATE_TOKEN 335 -#define PARS_PRINTF_TOKEN 336 -#define PARS_ASSERT_TOKEN 337 -#define PARS_RND_TOKEN 338 -#define PARS_RND_STR_TOKEN 339 -#define PARS_ROW_PRINTF_TOKEN 340 -#define PARS_COMMIT_TOKEN 341 -#define PARS_ROLLBACK_TOKEN 342 -#define PARS_WORK_TOKEN 343 -#define PARS_UNSIGNED_TOKEN 344 -#define PARS_EXIT_TOKEN 345 -#define PARS_FUNCTION_TOKEN 346 -#define PARS_LOCK_TOKEN 347 -#define PARS_SHARE_TOKEN 348 -#define PARS_MODE_TOKEN 349 -#define NEG 350 - - - - -#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - -extern YYSTYPE yylval; - - - diff --git a/storage/innobase/include/pars0opt.h b/storage/innobase/include/pars0opt.h deleted file mode 100644 index ff92cc062d9..00000000000 --- a/storage/innobase/include/pars0opt.h +++ /dev/null @@ -1,58 +0,0 @@ -/****************************************************** -Simple SQL optimizer - -(c) 1997 Innobase Oy - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ - -#ifndef pars0opt_h -#define pars0opt_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "pars0sym.h" -#include "dict0types.h" -#include "row0sel.h" - -/*********************************************************************** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ - -void -opt_search_plan( -/*============*/ - sel_node_t* sel_node); /* in: parsed select node */ -/*********************************************************************** -Looks for occurrences of the columns of the table in the query subgraph and -adds them to the list of columns if an occurrence of the same column does not -already exist in the list. If the column is already in the list, puts a value -indirection to point to the occurrence in the column list, except if the -column occurrence we are looking at is in the column list, in which case -nothing is done. */ - -void -opt_find_all_cols( -/*==============*/ - ibool copy_val, /* in: if TRUE, new found columns are - added as columns to copy */ - dict_index_t* index, /* in: index to use */ - sym_node_list_t* col_list, /* in: base node of a list where - to add new found columns */ - plan_t* plan, /* in: plan or NULL */ - que_node_t* exp); /* in: expression or condition */ -/************************************************************************ -Prints info of a query plan. */ - -void -opt_print_query_plan( -/*=================*/ - sel_node_t* sel_node); /* in: select node */ - -#ifndef UNIV_NONINL -#include "pars0opt.ic" -#endif - -#endif diff --git a/storage/innobase/include/pars0opt.ic b/storage/innobase/include/pars0opt.ic deleted file mode 100644 index 0bfa8526bee..00000000000 --- a/storage/innobase/include/pars0opt.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Simple SQL optimizer - -(c) 1997 Innobase Oy - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/pars0pars.h b/storage/innobase/include/pars0pars.h deleted file mode 100644 index 1c6c550d313..00000000000 --- a/storage/innobase/include/pars0pars.h +++ /dev/null @@ -1,731 +0,0 @@ -/****************************************************** -SQL parser - -(c) 1996 Innobase Oy - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ - -#ifndef pars0pars_h -#define pars0pars_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "pars0types.h" -#include "row0types.h" -#include "trx0types.h" -#include "ut0vec.h" - -/* Type of the user functions. The first argument is always InnoDB-supplied -and varies in type, while 'user_arg' is a user-supplied argument. The -meaning of the return type also varies. See the individual use cases, e.g. -the FETCH statement, for details on them. */ -typedef void* (*pars_user_func_cb_t)(void* arg, void* user_arg); - -extern int yydebug; - -/* If the following is set TRUE, the lexer will print the SQL string -as it tokenizes it */ - -#ifdef UNIV_SQL_DEBUG -extern ibool pars_print_lexed; -#endif /* UNIV_SQL_DEBUG */ - -/* Global variable used while parsing a single procedure or query : the code is -NOT re-entrant */ -extern sym_tab_t* pars_sym_tab_global; - -extern pars_res_word_t pars_to_char_token; -extern pars_res_word_t pars_to_number_token; -extern pars_res_word_t pars_to_binary_token; -extern pars_res_word_t pars_binary_to_number_token; -extern pars_res_word_t pars_substr_token; -extern pars_res_word_t pars_replstr_token; -extern pars_res_word_t pars_concat_token; -extern pars_res_word_t pars_length_token; -extern pars_res_word_t pars_instr_token; -extern pars_res_word_t pars_sysdate_token; -extern pars_res_word_t pars_printf_token; -extern pars_res_word_t pars_assert_token; -extern pars_res_word_t pars_rnd_token; -extern pars_res_word_t pars_rnd_str_token; -extern pars_res_word_t pars_count_token; -extern pars_res_word_t pars_sum_token; -extern pars_res_word_t pars_distinct_token; -extern pars_res_word_t pars_binary_token; -extern pars_res_word_t pars_blob_token; -extern pars_res_word_t pars_int_token; -extern pars_res_word_t pars_char_token; -extern pars_res_word_t pars_float_token; -extern pars_res_word_t pars_update_token; -extern pars_res_word_t pars_asc_token; -extern pars_res_word_t pars_desc_token; -extern pars_res_word_t pars_open_token; -extern pars_res_word_t pars_close_token; -extern pars_res_word_t pars_share_token; -extern pars_res_word_t pars_unique_token; -extern pars_res_word_t pars_clustered_token; - -extern ulint pars_star_denoter; - -/* Procedure parameter types */ -#define PARS_INPUT 0 -#define PARS_OUTPUT 1 -#define PARS_NOT_PARAM 2 - -int -yyparse(void); - -/***************************************************************** -Parses an SQL string returning the query graph. */ - -que_t* -pars_sql( -/*=====*/ - /* out, own: the query graph */ - pars_info_t* info, /* in: extra information, or NULL */ - const char* str); /* in: SQL string */ -/***************************************************************** -Retrieves characters to the lexical analyzer. */ - -void -pars_get_lex_chars( -/*===============*/ - char* buf, /* in/out: buffer where to copy */ - int* result, /* out: number of characters copied or EOF */ - int max_size); /* in: maximum number of characters which fit - in the buffer */ -/***************************************************************** -Called by yyparse on error. */ - -void -yyerror( -/*====*/ - const char* s); /* in: error message string */ -/************************************************************************* -Parses a variable declaration. */ - -sym_node_t* -pars_variable_declaration( -/*======================*/ - /* out, own: symbol table node of type - SYM_VAR */ - sym_node_t* node, /* in: symbol table node allocated for the - id of the variable */ - pars_res_word_t* type); /* in: pointer to a type token */ -/************************************************************************* -Parses a function expression. */ - -func_node_t* -pars_func( -/*======*/ - /* out, own: function node in a query tree */ - que_node_t* res_word,/* in: function name reserved word */ - que_node_t* arg); /* in: first argument in the argument list */ -/************************************************************************* -Parses an operator expression. */ - -func_node_t* -pars_op( -/*====*/ - /* out, own: function node in a query tree */ - int func, /* in: operator token code */ - que_node_t* arg1, /* in: first argument */ - que_node_t* arg2); /* in: second argument or NULL for an unary - operator */ -/************************************************************************* -Parses an ORDER BY clause. Order by a single column only is supported. */ - -order_node_t* -pars_order_by( -/*==========*/ - /* out, own: order-by node in a query tree */ - sym_node_t* column, /* in: column name */ - pars_res_word_t* asc); /* in: &pars_asc_token or pars_desc_token */ -/************************************************************************* -Parses a select list; creates a query graph node for the whole SELECT -statement. */ - -sel_node_t* -pars_select_list( -/*=============*/ - /* out, own: select node in a query - tree */ - que_node_t* select_list, /* in: select list */ - sym_node_t* into_list); /* in: variables list or NULL */ -/************************************************************************* -Parses a cursor declaration. */ - -que_node_t* -pars_cursor_declaration( -/*====================*/ - /* out: sym_node */ - sym_node_t* sym_node, /* in: cursor id node in the symbol - table */ - sel_node_t* select_node); /* in: select node */ -/************************************************************************* -Parses a function declaration. */ - -que_node_t* -pars_function_declaration( -/*======================*/ - /* out: sym_node */ - sym_node_t* sym_node); /* in: function id node in the symbol - table */ -/************************************************************************* -Parses a select statement. */ - -sel_node_t* -pars_select_statement( -/*==================*/ - /* out, own: select node in a query - tree */ - sel_node_t* select_node, /* in: select node already containing - the select list */ - sym_node_t* table_list, /* in: table list */ - que_node_t* search_cond, /* in: search condition or NULL */ - pars_res_word_t* for_update, /* in: NULL or &pars_update_token */ - pars_res_word_t* consistent_read,/* in: NULL or - &pars_consistent_token */ - order_node_t* order_by); /* in: NULL or an order-by node */ -/************************************************************************* -Parses a column assignment in an update. */ - -col_assign_node_t* -pars_column_assignment( -/*===================*/ - /* out: column assignment node */ - sym_node_t* column, /* in: column to assign */ - que_node_t* exp); /* in: value to assign */ -/************************************************************************* -Parses a delete or update statement start. */ - -upd_node_t* -pars_update_statement_start( -/*========================*/ - /* out, own: update node in a query - tree */ - ibool is_delete, /* in: TRUE if delete */ - sym_node_t* table_sym, /* in: table name node */ - col_assign_node_t* col_assign_list);/* in: column assignment list, NULL - if delete */ -/************************************************************************* -Parses an update or delete statement. */ - -upd_node_t* -pars_update_statement( -/*==================*/ - /* out, own: update node in a query - tree */ - upd_node_t* node, /* in: update node */ - sym_node_t* cursor_sym, /* in: pointer to a cursor entry in - the symbol table or NULL */ - que_node_t* search_cond); /* in: search condition or NULL */ -/************************************************************************* -Parses an insert statement. */ - -ins_node_t* -pars_insert_statement( -/*==================*/ - /* out, own: update node in a query - tree */ - sym_node_t* table_sym, /* in: table name node */ - que_node_t* values_list, /* in: value expression list or NULL */ - sel_node_t* select); /* in: select condition or NULL */ -/************************************************************************* -Parses a procedure parameter declaration. */ - -sym_node_t* -pars_parameter_declaration( -/*=======================*/ - /* out, own: symbol table node of type - SYM_VAR */ - sym_node_t* node, /* in: symbol table node allocated for the - id of the parameter */ - ulint param_type, - /* in: PARS_INPUT or PARS_OUTPUT */ - pars_res_word_t* type); /* in: pointer to a type token */ -/************************************************************************* -Parses an elsif element. */ - -elsif_node_t* -pars_elsif_element( -/*===============*/ - /* out: elsif node */ - que_node_t* cond, /* in: if-condition */ - que_node_t* stat_list); /* in: statement list */ -/************************************************************************* -Parses an if-statement. */ - -if_node_t* -pars_if_statement( -/*==============*/ - /* out: if-statement node */ - que_node_t* cond, /* in: if-condition */ - que_node_t* stat_list, /* in: statement list */ - que_node_t* else_part); /* in: else-part statement list */ -/************************************************************************* -Parses a for-loop-statement. */ - -for_node_t* -pars_for_statement( -/*===============*/ - /* out: for-statement node */ - sym_node_t* loop_var, /* in: loop variable */ - que_node_t* loop_start_limit,/* in: loop start expression */ - que_node_t* loop_end_limit, /* in: loop end expression */ - que_node_t* stat_list); /* in: statement list */ -/************************************************************************* -Parses a while-statement. */ - -while_node_t* -pars_while_statement( -/*=================*/ - /* out: while-statement node */ - que_node_t* cond, /* in: while-condition */ - que_node_t* stat_list); /* in: statement list */ -/************************************************************************* -Parses an exit statement. */ - -exit_node_t* -pars_exit_statement(void); -/*=====================*/ - /* out: exit statement node */ -/************************************************************************* -Parses a return-statement. */ - -return_node_t* -pars_return_statement(void); -/*=======================*/ - /* out: return-statement node */ -/************************************************************************* -Parses a procedure call. */ - -func_node_t* -pars_procedure_call( -/*================*/ - /* out: function node */ - que_node_t* res_word,/* in: procedure name reserved word */ - que_node_t* args); /* in: argument list */ -/************************************************************************* -Parses an assignment statement. */ - -assign_node_t* -pars_assignment_statement( -/*======================*/ - /* out: assignment statement node */ - sym_node_t* var, /* in: variable to assign */ - que_node_t* val); /* in: value to assign */ -/************************************************************************* -Parses a fetch statement. into_list or user_func (but not both) must be -non-NULL. */ - -fetch_node_t* -pars_fetch_statement( -/*=================*/ - /* out: fetch statement node */ - sym_node_t* cursor, /* in: cursor node */ - sym_node_t* into_list, /* in: variables to set, or NULL */ - sym_node_t* user_func); /* in: user function name, or NULL */ -/************************************************************************* -Parses an open or close cursor statement. */ - -open_node_t* -pars_open_statement( -/*================*/ - /* out: fetch statement node */ - ulint type, /* in: ROW_SEL_OPEN_CURSOR - or ROW_SEL_CLOSE_CURSOR */ - sym_node_t* cursor); /* in: cursor node */ -/************************************************************************* -Parses a row_printf-statement. */ - -row_printf_node_t* -pars_row_printf_statement( -/*======================*/ - /* out: row_printf-statement node */ - sel_node_t* sel_node); /* in: select node */ -/************************************************************************* -Parses a commit statement. */ - -commit_node_t* -pars_commit_statement(void); -/*=======================*/ -/************************************************************************* -Parses a rollback statement. */ - -roll_node_t* -pars_rollback_statement(void); -/*=========================*/ -/************************************************************************* -Parses a column definition at a table creation. */ - -sym_node_t* -pars_column_def( -/*============*/ - /* out: column sym table - node */ - sym_node_t* sym_node, /* in: column node in the - symbol table */ - pars_res_word_t* type, /* in: data type */ - sym_node_t* len, /* in: length of column, or - NULL */ - void* is_unsigned, /* in: if not NULL, column - is of type UNSIGNED. */ - void* is_not_null); /* in: if not NULL, column - is of type NOT NULL. */ -/************************************************************************* -Parses a table creation operation. */ - -tab_node_t* -pars_create_table( -/*==============*/ - /* out: table create subgraph */ - sym_node_t* table_sym, /* in: table name node in the symbol - table */ - sym_node_t* column_defs, /* in: list of column names */ - void* not_fit_in_memory);/* in: a non-NULL pointer means that - this is a table which in simulations - should be simulated as not fitting - in memory; thread is put to sleep - to simulate disk accesses; NOTE that - this flag is not stored to the data - dictionary on disk, and the database - will forget about non-NULL value if - it has to reload the table definition - from disk */ -/************************************************************************* -Parses an index creation operation. */ - -ind_node_t* -pars_create_index( -/*==============*/ - /* out: index create subgraph */ - pars_res_word_t* unique_def, /* in: not NULL if a unique index */ - pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */ - sym_node_t* index_sym, /* in: index name node in the symbol - table */ - sym_node_t* table_sym, /* in: table name node in the symbol - table */ - sym_node_t* column_list); /* in: list of column names */ -/************************************************************************* -Parses a procedure definition. */ - -que_fork_t* -pars_procedure_definition( -/*======================*/ - /* out: query fork node */ - sym_node_t* sym_node, /* in: procedure id node in the symbol - table */ - sym_node_t* param_list, /* in: parameter declaration list */ - que_node_t* stat_list); /* in: statement list */ - -/***************************************************************** -Parses a stored procedure call, when this is not within another stored -procedure, that is, the client issues a procedure call directly. -In MySQL/InnoDB, stored InnoDB procedures are invoked via the -parsed procedure tree, not via InnoDB SQL, so this function is not used. */ - -que_fork_t* -pars_stored_procedure_call( -/*=======================*/ - /* out: query graph */ - sym_node_t* sym_node); /* in: stored procedure name */ -/********************************************************************** -Completes a query graph by adding query thread and fork nodes -above it and prepares the graph for running. The fork created is of -type QUE_FORK_MYSQL_INTERFACE. */ - -que_thr_t* -pars_complete_graph_for_exec( -/*=========================*/ - /* out: query thread node to run */ - que_node_t* node, /* in: root node for an incomplete - query graph */ - trx_t* trx, /* in: transaction handle */ - mem_heap_t* heap); /* in: memory heap from which allocated */ - -/******************************************************************** -Create parser info struct.*/ - -pars_info_t* -pars_info_create(void); -/*==================*/ - /* out, own: info struct */ - -/******************************************************************** -Free info struct and everything it contains.*/ - -void -pars_info_free( -/*===========*/ - pars_info_t* info); /* in: info struct */ - -/******************************************************************** -Add bound literal. */ - -void -pars_info_add_literal( -/*==================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const void* address, /* in: address */ - ulint length, /* in: length of data */ - ulint type, /* in: type, e.g. DATA_FIXBINARY */ - ulint prtype); /* in: precise type, e.g. - DATA_UNSIGNED */ - -/******************************************************************** -Equivalent to pars_info_add_literal(info, name, str, strlen(str), -DATA_VARCHAR, DATA_ENGLISH). */ - -void -pars_info_add_str_literal( -/*======================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const char* str); /* in: string */ - -/******************************************************************** -Equivalent to: - -char buf[4]; -mach_write_to_4(buf, val); -pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ - -void -pars_info_add_int4_literal( -/*=======================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - lint val); /* in: value */ - -/******************************************************************** -Equivalent to: - -char buf[8]; -mach_write_to_8(buf, val); -pars_info_add_literal(info, name, buf, 8, DATA_BINARY, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ - -void -pars_info_add_dulint_literal( -/*=========================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - dulint val); /* in: value */ -/******************************************************************** -Add user function. */ - -void -pars_info_add_function( -/*===================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: function name */ - pars_user_func_cb_t func, /* in: function address */ - void* arg); /* in: user-supplied argument */ - -/******************************************************************** -Add bound id. */ - -void -pars_info_add_id( -/*=============*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const char* id); /* in: id */ - -/******************************************************************** -Get user function with the given name.*/ - -pars_user_func_t* -pars_info_get_user_func( -/*====================*/ - /* out: user func, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name); /* in: function name to find*/ - -/******************************************************************** -Get bound literal with the given name.*/ - -pars_bound_lit_t* -pars_info_get_bound_lit( -/*====================*/ - /* out: bound literal, or NULL if - not found */ - pars_info_t* info, /* in: info struct */ - const char* name); /* in: bound literal name to find */ - -/******************************************************************** -Get bound id with the given name.*/ - -pars_bound_id_t* -pars_info_get_bound_id( -/*===================*/ - /* out: bound id, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name); /* in: bound id name to find */ - - -/* Extra information supplied for pars_sql(). */ -struct pars_info_struct { - mem_heap_t* heap; /* our own memory heap */ - - ib_vector_t* funcs; /* user functions, or NUll - (pars_user_func_t*) */ - ib_vector_t* bound_lits; /* bound literals, or NULL - (pars_bound_lit_t*) */ - ib_vector_t* bound_ids; /* bound ids, or NULL - (pars_bound_id_t*) */ - - ibool graph_owns_us; /* if TRUE (which is the default), - que_graph_free() will free us */ -}; - -/* User-supplied function and argument. */ -struct pars_user_func_struct { - const char* name; /* function name */ - pars_user_func_cb_t func; /* function address */ - void* arg; /* user-supplied argument */ -}; - -/* Bound literal. */ -struct pars_bound_lit_struct { - const char* name; /* name */ - const void* address; /* address */ - ulint length; /* length of data */ - ulint type; /* type, e.g. DATA_FIXBINARY */ - ulint prtype; /* precise type, e.g. DATA_UNSIGNED */ -}; - -/* Bound id. */ -struct pars_bound_id_struct { - const char* name; /* name */ - const char* id; /* id */ -}; - -/* Struct used to denote a reserved word in a parsing tree */ -struct pars_res_word_struct{ - int code; /* the token code for the reserved word from - pars0grm.h */ -}; - -/* A predefined function or operator node in a parsing tree; this construct -is also used for some non-functions like the assignment ':=' */ -struct func_node_struct{ - que_common_t common; /* type: QUE_NODE_FUNC */ - int func; /* token code of the function name */ - ulint class; /* class of the function */ - que_node_t* args; /* argument(s) of the function */ - UT_LIST_NODE_T(func_node_t) cond_list; - /* list of comparison conditions; defined - only for comparison operator nodes except, - presently, for OPT_SCROLL_TYPE ones */ - UT_LIST_NODE_T(func_node_t) func_node_list; - /* list of function nodes in a parsed - query graph */ -}; - -/* An order-by node in a select */ -struct order_node_struct{ - que_common_t common; /* type: QUE_NODE_ORDER */ - sym_node_t* column; /* order-by column */ - ibool asc; /* TRUE if ascending, FALSE if descending */ -}; - -/* Procedure definition node */ -struct proc_node_struct{ - que_common_t common; /* type: QUE_NODE_PROC */ - sym_node_t* proc_id; /* procedure name symbol in the symbol - table of this same procedure */ - sym_node_t* param_list; /* input and output parameters */ - que_node_t* stat_list; /* statement list */ - sym_tab_t* sym_tab; /* symbol table of this procedure */ -}; - -/* elsif-element node */ -struct elsif_node_struct{ - que_common_t common; /* type: QUE_NODE_ELSIF */ - que_node_t* cond; /* if condition */ - que_node_t* stat_list; /* statement list */ -}; - -/* if-statement node */ -struct if_node_struct{ - que_common_t common; /* type: QUE_NODE_IF */ - que_node_t* cond; /* if condition */ - que_node_t* stat_list; /* statement list */ - que_node_t* else_part; /* else-part statement list */ - elsif_node_t* elsif_list; /* elsif element list */ -}; - -/* while-statement node */ -struct while_node_struct{ - que_common_t common; /* type: QUE_NODE_WHILE */ - que_node_t* cond; /* while condition */ - que_node_t* stat_list; /* statement list */ -}; - -/* for-loop-statement node */ -struct for_node_struct{ - que_common_t common; /* type: QUE_NODE_FOR */ - sym_node_t* loop_var; /* loop variable: this is the - dereferenced symbol from the - variable declarations, not the - symbol occurrence in the for loop - definition */ - que_node_t* loop_start_limit;/* initial value of loop variable */ - que_node_t* loop_end_limit; /* end value of loop variable */ - lint loop_end_value; /* evaluated value for the end value: - it is calculated only when the loop - is entered, and will not change within - the loop */ - que_node_t* stat_list; /* statement list */ -}; - -/* exit statement node */ -struct exit_node_struct{ - que_common_t common; /* type: QUE_NODE_EXIT */ -}; - -/* return-statement node */ -struct return_node_struct{ - que_common_t common; /* type: QUE_NODE_RETURN */ -}; - -/* Assignment statement node */ -struct assign_node_struct{ - que_common_t common; /* type: QUE_NODE_ASSIGNMENT */ - sym_node_t* var; /* variable to set */ - que_node_t* val; /* value to assign */ -}; - -/* Column assignment node */ -struct col_assign_node_struct{ - que_common_t common; /* type: QUE_NODE_COL_ASSIGN */ - sym_node_t* col; /* column to set */ - que_node_t* val; /* value to assign */ -}; - -/* Classes of functions */ -#define PARS_FUNC_ARITH 1 /* +, -, *, / */ -#define PARS_FUNC_LOGICAL 2 -#define PARS_FUNC_CMP 3 -#define PARS_FUNC_PREDEFINED 4 /* TO_NUMBER, SUBSTR, ... */ -#define PARS_FUNC_AGGREGATE 5 /* COUNT, DISTINCT, SUM */ -#define PARS_FUNC_OTHER 6 /* these are not real functions, - e.g., := */ - -#ifndef UNIV_NONINL -#include "pars0pars.ic" -#endif - -#endif diff --git a/storage/innobase/include/pars0pars.ic b/storage/innobase/include/pars0pars.ic deleted file mode 100644 index 155b6659ace..00000000000 --- a/storage/innobase/include/pars0pars.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -SQL parser - -(c) 1996 Innobase Oy - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/pars0sym.h b/storage/innobase/include/pars0sym.h deleted file mode 100644 index fc7df92ff60..00000000000 --- a/storage/innobase/include/pars0sym.h +++ /dev/null @@ -1,223 +0,0 @@ -/****************************************************** -SQL parser symbol table - -(c) 1997 Innobase Oy - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ - -#ifndef pars0sym_h -#define pars0sym_h - -#include "univ.i" -#include "que0types.h" -#include "usr0types.h" -#include "dict0types.h" -#include "pars0types.h" -#include "row0types.h" - -/********************************************************************** -Creates a symbol table for a single stored procedure or query. */ - -sym_tab_t* -sym_tab_create( -/*===========*/ - /* out, own: symbol table */ - mem_heap_t* heap); /* in: memory heap where to create */ -/********************************************************************** -Frees the memory allocated dynamically AFTER parsing phase for variables -etc. in the symbol table. Does not free the mem heap where the table was -originally created. Frees also SQL explicit cursor definitions. */ - -void -sym_tab_free_private( -/*=================*/ - sym_tab_t* sym_tab); /* in, own: symbol table */ -/********************************************************************** -Adds an integer literal to a symbol table. */ - -sym_node_t* -sym_tab_add_int_lit( -/*================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - ulint val); /* in: integer value */ -/********************************************************************** -Adds an string literal to a symbol table. */ - -sym_node_t* -sym_tab_add_str_lit( -/*================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - byte* str, /* in: string with no quotes around - it */ - ulint len); /* in: string length */ -/********************************************************************** -Add a bound literal to a symbol table. */ - -sym_node_t* -sym_tab_add_bound_lit( -/*==================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - const char* name, /* in: name of bound literal */ - ulint* lit_type); /* out: type of literal (PARS_*_LIT) */ -/********************************************************************** -Adds an SQL null literal to a symbol table. */ - -sym_node_t* -sym_tab_add_null_lit( -/*=================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab); /* in: symbol table */ -/********************************************************************** -Adds an identifier to a symbol table. */ - -sym_node_t* -sym_tab_add_id( -/*===========*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - byte* name, /* in: identifier name */ - ulint len); /* in: identifier length */ - -/********************************************************************** -Add a bound identifier to a symbol table. */ - -sym_node_t* -sym_tab_add_bound_id( -/*===========*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - const char* name); /* in: name of bound id */ - -#define SYM_CLUST_FIELD_NO 0 -#define SYM_SEC_FIELD_NO 1 - -struct sym_node_struct{ - que_common_t common; /* node type: - QUE_NODE_SYMBOL */ - /* NOTE: if the data field in 'common.val' is not NULL and the symbol - table node is not for a temporary column, the memory for the value has - been allocated from dynamic memory and it should be freed when the - symbol table is discarded */ - - /* 'alias' and 'indirection' are almost the same, but not quite. - 'alias' always points to the primary instance of the variable, while - 'indirection' does the same only if we should use the primary - instance's values for the node's data. This is usually the case, but - when initializing a cursor (e.g., "DECLARE CURSOR c IS SELECT * FROM - t WHERE id = x;"), we copy the values from the primary instance to - the cursor's instance so that they are fixed for the duration of the - cursor, and set 'indirection' to NULL. If we did not, the value of - 'x' could change between fetches and things would break horribly. - - TODO: It would be cleaner to make 'indirection' a boolean field and - always use 'alias' to refer to the primary node. */ - - sym_node_t* indirection; /* pointer to - another symbol table - node which contains - the value for this - node, NULL otherwise */ - sym_node_t* alias; /* pointer to - another symbol table - node for which this - node is an alias, - NULL otherwise */ - UT_LIST_NODE_T(sym_node_t) col_var_list; /* list of table - columns or a list of - input variables for an - explicit cursor */ - ibool copy_val; /* TRUE if a column - and its value should - be copied to dynamic - memory when fetched */ - ulint field_nos[2]; /* if a column, in - the position - SYM_CLUST_FIELD_NO is - the field number in the - clustered index; in - the position - SYM_SEC_FIELD_NO - the field number in the - non-clustered index to - use first; if not found - from the index, then - ULINT_UNDEFINED */ - ibool resolved; /* TRUE if the - meaning of a variable - or a column has been - resolved; for literals - this is always TRUE */ - ulint token_type; /* SYM_VAR, SYM_COLUMN, - SYM_IMPLICIT_VAR, - SYM_LIT, SYM_TABLE, - SYM_CURSOR, ... */ - const char* name; /* name of an id */ - ulint name_len; /* id name length */ - dict_table_t* table; /* table definition - if a table id or a - column id */ - ulint col_no; /* column number if a - column */ - sel_buf_t* prefetch_buf; /* NULL, or a buffer - for cached column - values for prefetched - rows */ - sel_node_t* cursor_def; /* cursor definition - select node if a - named cursor */ - ulint param_type; /* PARS_INPUT, - PARS_OUTPUT, or - PARS_NOT_PARAM if not a - procedure parameter */ - sym_tab_t* sym_table; /* back pointer to - the symbol table */ - UT_LIST_NODE_T(sym_node_t) sym_list; /* list of symbol - nodes */ -}; - -struct sym_tab_struct{ - que_t* query_graph; - /* query graph generated by the - parser */ - const char* sql_string; - /* SQL string to parse */ - size_t string_len; - /* SQL string length */ - int next_char_pos; - /* position of the next character in - sql_string to give to the lexical - analyzer */ - pars_info_t* info; /* extra information, or NULL */ - sym_node_list_t sym_list; - /* list of symbol nodes in the symbol - table */ - UT_LIST_BASE_NODE_T(func_node_t) - func_node_list; - /* list of function nodes in the - parsed query graph */ - mem_heap_t* heap; /* memory heap from which we can - allocate space */ -}; - -/* Types of a symbol table entry */ -#define SYM_VAR 91 /* declared parameter or local - variable of a procedure */ -#define SYM_IMPLICIT_VAR 92 /* storage for a intermediate result - of a calculation */ -#define SYM_LIT 93 /* literal */ -#define SYM_TABLE 94 /* database table name */ -#define SYM_COLUMN 95 /* database table name */ -#define SYM_CURSOR 96 /* named cursor */ -#define SYM_PROCEDURE_NAME 97 /* stored procedure name */ -#define SYM_INDEX 98 /* database index name */ -#define SYM_FUNCTION 99 /* user function name */ - -#ifndef UNIV_NONINL -#include "pars0sym.ic" -#endif - -#endif diff --git a/storage/innobase/include/pars0sym.ic b/storage/innobase/include/pars0sym.ic deleted file mode 100644 index 9508d423769..00000000000 --- a/storage/innobase/include/pars0sym.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -SQL parser symbol table - -(c) 1997 Innobase Oy - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/pars0types.h b/storage/innobase/include/pars0types.h deleted file mode 100644 index bf7df89a883..00000000000 --- a/storage/innobase/include/pars0types.h +++ /dev/null @@ -1,33 +0,0 @@ -/****************************************************** -SQL parser global types - -(c) 1997 Innobase Oy - -Created 1/11/1998 Heikki Tuuri -*******************************************************/ - -#ifndef pars0types_h -#define pars0types_h - -typedef struct pars_info_struct pars_info_t; -typedef struct pars_user_func_struct pars_user_func_t; -typedef struct pars_bound_lit_struct pars_bound_lit_t; -typedef struct pars_bound_id_struct pars_bound_id_t; -typedef struct sym_node_struct sym_node_t; -typedef struct sym_tab_struct sym_tab_t; -typedef struct pars_res_word_struct pars_res_word_t; -typedef struct func_node_struct func_node_t; -typedef struct order_node_struct order_node_t; -typedef struct proc_node_struct proc_node_t; -typedef struct elsif_node_struct elsif_node_t; -typedef struct if_node_struct if_node_t; -typedef struct while_node_struct while_node_t; -typedef struct for_node_struct for_node_t; -typedef struct exit_node_struct exit_node_t; -typedef struct return_node_struct return_node_t; -typedef struct assign_node_struct assign_node_t; -typedef struct col_assign_node_struct col_assign_node_t; - -typedef UT_LIST_BASE_NODE_T(sym_node_t) sym_node_list_t; - -#endif diff --git a/storage/innobase/include/que0que.h b/storage/innobase/include/que0que.h deleted file mode 100644 index 8fbf5330c89..00000000000 --- a/storage/innobase/include/que0que.h +++ /dev/null @@ -1,510 +0,0 @@ -/****************************************************** -Query graph - -(c) 1996 Innobase Oy - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef que0que_h -#define que0que_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0trx.h" -#include "srv0srv.h" -#include "usr0types.h" -#include "que0types.h" -#include "row0types.h" -#include "pars0types.h" - -/* If the following flag is set TRUE, the module will print trace info -of SQL execution in the UNIV_SQL_DEBUG version */ -extern ibool que_trace_on; - -/*************************************************************************** -Adds a query graph to the session's list of graphs. */ - -void -que_graph_publish( -/*==============*/ - que_t* graph, /* in: graph */ - sess_t* sess); /* in: session */ -/*************************************************************************** -Creates a query graph fork node. */ - -que_fork_t* -que_fork_create( -/*============*/ - /* out, own: fork node */ - que_t* graph, /* in: graph, if NULL then this - fork node is assumed to be the - graph root */ - que_node_t* parent, /* in: parent node */ - ulint fork_type, /* in: fork type */ - mem_heap_t* heap); /* in: memory heap where created */ -/*************************************************************************** -Gets the first thr in a fork. */ -UNIV_INLINE -que_thr_t* -que_fork_get_first_thr( -/*===================*/ - que_fork_t* fork); /* in: query fork */ -/*************************************************************************** -Gets the child node of the first thr in a fork. */ -UNIV_INLINE -que_node_t* -que_fork_get_child( -/*===============*/ - que_fork_t* fork); /* in: query fork */ -/*************************************************************************** -Sets the parent of a graph node. */ -UNIV_INLINE -void -que_node_set_parent( -/*================*/ - que_node_t* node, /* in: graph node */ - que_node_t* parent);/* in: parent */ -/*************************************************************************** -Creates a query graph thread node. */ - -que_thr_t* -que_thr_create( -/*===========*/ - /* out, own: query thread node */ - que_fork_t* parent, /* in: parent node, i.e., a fork node */ - mem_heap_t* heap); /* in: memory heap where created */ -/************************************************************************** -Checks if the query graph is in a state where it should be freed, and -frees it in that case. If the session is in a state where it should be -closed, also this is done. */ - -ibool -que_graph_try_free( -/*===============*/ - /* out: TRUE if freed */ - que_t* graph); /* in: query graph */ -/************************************************************************** -Frees a query graph, but not the heap where it was created. Does not free -explicit cursor declarations, they are freed in que_graph_free. */ - -void -que_graph_free_recursive( -/*=====================*/ - que_node_t* node); /* in: query graph node */ -/************************************************************************** -Frees a query graph. */ - -void -que_graph_free( -/*===========*/ - que_t* graph); /* in: query graph; we assume that the memory - heap where this graph was created is private - to this graph: if not, then use - que_graph_free_recursive and free the heap - afterwards! */ -/************************************************************************** -Stops a query thread if graph or trx is in a state requiring it. The -conditions are tested in the order (1) graph, (2) trx. The kernel mutex has -to be reserved. */ - -ibool -que_thr_stop( -/*=========*/ - /* out: TRUE if stopped */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction. */ - -void -que_thr_move_to_run_state_for_mysql( -/*================================*/ - que_thr_t* thr, /* in: an query thread */ - trx_t* trx); /* in: transaction */ -/************************************************************************** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL -select, when there is no error or lock wait. */ - -void -que_thr_stop_for_mysql_no_error( -/*============================*/ - que_thr_t* thr, /* in: query thread */ - trx_t* trx); /* in: transaction */ -/************************************************************************** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The -query thread is stopped and made inactive, except in the case where -it was put to the lock wait state in lock0lock.c, but the lock has already -been granted or the transaction chosen as a victim in deadlock resolution. */ - -void -que_thr_stop_for_mysql( -/*===================*/ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Run a query thread. Handles lock waits. */ - -void -que_run_threads( -/*============*/ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -After signal handling is finished, returns control to a query graph error -handling routine. (Currently, just returns the control to the root of the -graph so that the graph can communicate an error message to the client.) */ - -void -que_fork_error_handle( -/*==================*/ - trx_t* trx, /* in: trx */ - que_t* fork); /* in: query graph which was run before signal - handling started, NULL not allowed */ -/************************************************************************** -Moves a suspended query thread to the QUE_THR_RUNNING state and releases -a single worker thread to execute it. This function should be used to end -the wait state of a query thread waiting for a lock or a stored procedure -completion. */ - -void -que_thr_end_wait( -/*=============*/ - que_thr_t* thr, /* in: query thread in the - QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ - que_thr_t** next_thr); /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/************************************************************************** -Same as que_thr_end_wait, but no parameter next_thr available. */ - -void -que_thr_end_wait_no_next_thr( -/*=========================*/ - que_thr_t* thr); /* in: query thread in the - QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ -/************************************************************************** -Starts execution of a command in a query fork. Picks a query thread which -is not in the QUE_THR_RUNNING state and moves it to that state. If none -can be chosen, a situation which may arise in parallelized fetches, NULL -is returned. */ - -que_thr_t* -que_fork_start_command( -/*===================*/ - /* out: a query thread of the graph moved to - QUE_THR_RUNNING state, or NULL; the query - thread should be executed by que_run_threads - by the caller */ - que_fork_t* fork); /* in: a query fork */ -/*************************************************************************** -Gets the trx of a query thread. */ -UNIV_INLINE -trx_t* -thr_get_trx( -/*========*/ - que_thr_t* thr); /* in: query thread */ -/*************************************************************************** -Gets the type of a graph node. */ -UNIV_INLINE -ulint -que_node_get_type( -/*==============*/ - que_node_t* node); /* in: graph node */ -/*************************************************************************** -Gets pointer to the value data type field of a graph node. */ -UNIV_INLINE -dtype_t* -que_node_get_data_type( -/*===================*/ - que_node_t* node); /* in: graph node */ -/*************************************************************************** -Gets pointer to the value dfield of a graph node. */ -UNIV_INLINE -dfield_t* -que_node_get_val( -/*=============*/ - que_node_t* node); /* in: graph node */ -/*************************************************************************** -Gets the value buffer size of a graph node. */ -UNIV_INLINE -ulint -que_node_get_val_buf_size( -/*======================*/ - /* out: val buffer size, not defined if - val.data == NULL in node */ - que_node_t* node); /* in: graph node */ -/*************************************************************************** -Sets the value buffer size of a graph node. */ -UNIV_INLINE -void -que_node_set_val_buf_size( -/*======================*/ - que_node_t* node, /* in: graph node */ - ulint size); /* in: size */ -/************************************************************************* -Gets the next list node in a list of query graph nodes. */ -UNIV_INLINE -que_node_t* -que_node_get_next( -/*==============*/ - que_node_t* node); /* in: node in a list */ -/************************************************************************* -Gets the parent node of a query graph node. */ -UNIV_INLINE -que_node_t* -que_node_get_parent( -/*================*/ - /* out: parent node or NULL */ - que_node_t* node); /* in: node */ -/******************************************************************** -Get the first containing loop node (e.g. while_node_t or for_node_t) for the -given node, or NULL if the node is not within a loop. */ - -que_node_t* -que_node_get_containing_loop_node( -/*==============================*/ - /* out: containing loop node, or NULL. */ - que_node_t* node); /* in: node */ -/************************************************************************* -Catenates a query graph node to a list of them, possible empty list. */ -UNIV_INLINE -que_node_t* -que_node_list_add_last( -/*===================*/ - /* out: one-way list of nodes */ - que_node_t* node_list, /* in: node list, or NULL */ - que_node_t* node); /* in: node */ -/************************************************************************* -Gets a query graph node list length. */ -UNIV_INLINE -ulint -que_node_list_get_len( -/*==================*/ - /* out: length, for NULL list 0 */ - que_node_t* node_list); /* in: node list, or NULL */ -/************************************************************************** -Checks if graph, trx, or session is in a state where the query thread should -be stopped. */ -UNIV_INLINE -ibool -que_thr_peek_stop( -/*==============*/ - /* out: TRUE if should be stopped; NOTE that - if the peek is made without reserving the - kernel mutex, then another peek with the - mutex reserved is necessary before deciding - the actual stopping */ - que_thr_t* thr); /* in: query thread */ -/*************************************************************************** -Returns TRUE if the query graph is for a SELECT statement. */ -UNIV_INLINE -ibool -que_graph_is_select( -/*================*/ - /* out: TRUE if a select */ - que_t* graph); /* in: graph */ -/************************************************************************** -Prints info of an SQL query graph node. */ - -void -que_node_print_info( -/*================*/ - que_node_t* node); /* in: query graph node */ -/************************************************************************* -Evaluate the given SQL */ - -ulint -que_eval_sql( -/*=========*/ - /* out: error code or DB_SUCCESS */ - pars_info_t* info, /* in: info struct, or NULL */ - const char* sql, /* in: SQL string */ - ibool reserve_dict_mutex, - /* in: if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. */ - trx_t* trx); /* in: trx */ - -/* Query graph query thread node: the fields are protected by the kernel -mutex with the exceptions named below */ - -struct que_thr_struct{ - que_common_t common; /* type: QUE_NODE_THR */ - ulint magic_n; /* magic number to catch memory - corruption */ - que_node_t* child; /* graph child node */ - que_t* graph; /* graph where this node belongs */ - ibool is_active; /* TRUE if the thread has been set - to the run state in - que_thr_move_to_run_state, but not - deactivated in - que_thr_dec_reference_count */ - ulint state; /* state of the query thread */ - UT_LIST_NODE_T(que_thr_t) - thrs; /* list of thread nodes of the fork - node */ - UT_LIST_NODE_T(que_thr_t) - trx_thrs; /* lists of threads in wait list of - the trx */ - UT_LIST_NODE_T(que_thr_t) - queue; /* list of runnable thread nodes in - the server task queue */ - /*------------------------------*/ - /* The following fields are private to the OS thread executing the - query thread, and are not protected by the kernel mutex: */ - - que_node_t* run_node; /* pointer to the node where the - subgraph down from this node is - currently executed */ - que_node_t* prev_node; /* pointer to the node from which - the control came */ - ulint resource; /* resource usage of the query thread - thus far */ - ulint lock_state; /* lock state of thread (table or - row) */ -}; - -#define QUE_THR_MAGIC_N 8476583 -#define QUE_THR_MAGIC_FREED 123461526 - -/* Query graph fork node: its fields are protected by the kernel mutex */ -struct que_fork_struct{ - que_common_t common; /* type: QUE_NODE_FORK */ - que_t* graph; /* query graph of this node */ - ulint fork_type; /* fork type */ - ulint n_active_thrs; /* if this is the root of a graph, the - number query threads that have been - started in que_thr_move_to_run_state - but for which que_thr_dec_refer_count - has not yet been called */ - trx_t* trx; /* transaction: this is set only in - the root node */ - ulint state; /* state of the fork node */ - que_thr_t* caller; /* pointer to a possible calling query - thread */ - UT_LIST_BASE_NODE_T(que_thr_t) - thrs; /* list of query threads */ - /*------------------------------*/ - /* The fields in this section are defined only in the root node */ - sym_tab_t* sym_tab; /* symbol table of the query, - generated by the parser, or NULL - if the graph was created 'by hand' */ - pars_info_t* info; /* in: info struct, or NULL */ - /* The following cur_... fields are relevant only in a select graph */ - - ulint cur_end; /* QUE_CUR_NOT_DEFINED, QUE_CUR_START, - QUE_CUR_END */ - ulint cur_pos; /* if there are n rows in the result - set, values 0 and n + 1 mean before - first row, or after last row, depending - on cur_end; values 1...n mean a row - index */ - ibool cur_on_row; /* TRUE if cursor is on a row, i.e., - it is not before the first row or - after the last row */ - dulint n_inserts; /* number of rows inserted */ - dulint n_updates; /* number of rows updated */ - dulint n_deletes; /* number of rows deleted */ - sel_node_t* last_sel_node; /* last executed select node, or NULL - if none */ - UT_LIST_NODE_T(que_fork_t) - graphs; /* list of query graphs of a session - or a stored procedure */ - /*------------------------------*/ - mem_heap_t* heap; /* memory heap where the fork was - created */ - -}; - -/* Query fork (or graph) types */ -#define QUE_FORK_SELECT_NON_SCROLL 1 /* forward-only cursor */ -#define QUE_FORK_SELECT_SCROLL 2 /* scrollable cursor */ -#define QUE_FORK_INSERT 3 -#define QUE_FORK_UPDATE 4 -#define QUE_FORK_ROLLBACK 5 - /* This is really the undo graph used in rollback, - no signal-sending roll_node in this graph */ -#define QUE_FORK_PURGE 6 -#define QUE_FORK_EXECUTE 7 -#define QUE_FORK_PROCEDURE 8 -#define QUE_FORK_PROCEDURE_CALL 9 -#define QUE_FORK_MYSQL_INTERFACE 10 -#define QUE_FORK_RECOVERY 11 - -/* Query fork (or graph) states */ -#define QUE_FORK_ACTIVE 1 -#define QUE_FORK_COMMAND_WAIT 2 -#define QUE_FORK_INVALID 3 -#define QUE_FORK_BEING_FREED 4 - -/* Flag which is ORed to control structure statement node types */ -#define QUE_NODE_CONTROL_STAT 1024 - -/* Query graph node types */ -#define QUE_NODE_LOCK 1 -#define QUE_NODE_INSERT 2 -#define QUE_NODE_UPDATE 4 -#define QUE_NODE_CURSOR 5 -#define QUE_NODE_SELECT 6 -#define QUE_NODE_AGGREGATE 7 -#define QUE_NODE_FORK 8 -#define QUE_NODE_THR 9 -#define QUE_NODE_UNDO 10 -#define QUE_NODE_COMMIT 11 -#define QUE_NODE_ROLLBACK 12 -#define QUE_NODE_PURGE 13 -#define QUE_NODE_CREATE_TABLE 14 -#define QUE_NODE_CREATE_INDEX 15 -#define QUE_NODE_SYMBOL 16 -#define QUE_NODE_RES_WORD 17 -#define QUE_NODE_FUNC 18 -#define QUE_NODE_ORDER 19 -#define QUE_NODE_PROC (20 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_IF (21 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_WHILE (22 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_ASSIGNMENT 23 -#define QUE_NODE_FETCH 24 -#define QUE_NODE_OPEN 25 -#define QUE_NODE_COL_ASSIGNMENT 26 -#define QUE_NODE_FOR (27 + QUE_NODE_CONTROL_STAT) -#define QUE_NODE_RETURN 28 -#define QUE_NODE_ROW_PRINTF 29 -#define QUE_NODE_ELSIF 30 -#define QUE_NODE_CALL 31 -#define QUE_NODE_EXIT 32 - -/* Query thread states */ -#define QUE_THR_RUNNING 1 -#define QUE_THR_PROCEDURE_WAIT 2 -#define QUE_THR_COMPLETED 3 /* in selects this means that the - thread is at the end of its result set - (or start, in case of a scroll cursor); - in other statements, this means the - thread has done its task */ -#define QUE_THR_COMMAND_WAIT 4 -#define QUE_THR_LOCK_WAIT 5 -#define QUE_THR_SIG_REPLY_WAIT 6 -#define QUE_THR_SUSPENDED 7 -#define QUE_THR_ERROR 8 - -/* Query thread lock states */ -#define QUE_THR_LOCK_NOLOCK 0 -#define QUE_THR_LOCK_ROW 1 -#define QUE_THR_LOCK_TABLE 2 - -/* From where the cursor position is counted */ -#define QUE_CUR_NOT_DEFINED 1 -#define QUE_CUR_START 2 -#define QUE_CUR_END 3 - - -#ifndef UNIV_NONINL -#include "que0que.ic" -#endif - -#endif diff --git a/storage/innobase/include/que0que.ic b/storage/innobase/include/que0que.ic deleted file mode 100644 index a20108a7820..00000000000 --- a/storage/innobase/include/que0que.ic +++ /dev/null @@ -1,259 +0,0 @@ -/****************************************************** -Query graph - -(c) 1996 Innobase Oy - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#include "usr0sess.h" - -/*************************************************************************** -Gets the trx of a query thread. */ -UNIV_INLINE -trx_t* -thr_get_trx( -/*========*/ - que_thr_t* thr) /* in: query thread */ -{ - ut_ad(thr); - - return(thr->graph->trx); -} - -/*************************************************************************** -Gets the first thr in a fork. */ -UNIV_INLINE -que_thr_t* -que_fork_get_first_thr( -/*===================*/ - que_fork_t* fork) /* in: query fork */ -{ - return(UT_LIST_GET_FIRST(fork->thrs)); -} - -/*************************************************************************** -Gets the child node of the first thr in a fork. */ -UNIV_INLINE -que_node_t* -que_fork_get_child( -/*===============*/ - que_fork_t* fork) /* in: query fork */ -{ - que_thr_t* thr; - - thr = UT_LIST_GET_FIRST(fork->thrs); - - return(thr->child); -} - -/*************************************************************************** -Gets the type of a graph node. */ -UNIV_INLINE -ulint -que_node_get_type( -/*==============*/ - que_node_t* node) /* in: graph node */ -{ - ut_ad(node); - - return(((que_common_t*)node)->type); -} - -/*************************************************************************** -Gets pointer to the value dfield of a graph node. */ -UNIV_INLINE -dfield_t* -que_node_get_val( -/*=============*/ - que_node_t* node) /* in: graph node */ -{ - ut_ad(node); - - return(&(((que_common_t*)node)->val)); -} - -/*************************************************************************** -Gets the value buffer size of a graph node. */ -UNIV_INLINE -ulint -que_node_get_val_buf_size( -/*======================*/ - /* out: val buffer size, not defined if - val.data == NULL in node */ - que_node_t* node) /* in: graph node */ -{ - ut_ad(node); - - return(((que_common_t*)node)->val_buf_size); -} - -/*************************************************************************** -Sets the value buffer size of a graph node. */ -UNIV_INLINE -void -que_node_set_val_buf_size( -/*======================*/ - que_node_t* node, /* in: graph node */ - ulint size) /* in: size */ -{ - ut_ad(node); - - ((que_common_t*)node)->val_buf_size = size; -} - -/*************************************************************************** -Sets the parent of a graph node. */ -UNIV_INLINE -void -que_node_set_parent( -/*================*/ - que_node_t* node, /* in: graph node */ - que_node_t* parent) /* in: parent */ -{ - ut_ad(node); - - ((que_common_t*)node)->parent = parent; -} - -/*************************************************************************** -Gets pointer to the value data type field of a graph node. */ -UNIV_INLINE -dtype_t* -que_node_get_data_type( -/*===================*/ - que_node_t* node) /* in: graph node */ -{ - ut_ad(node); - - return(&(((que_common_t*)node)->val.type)); -} - -/************************************************************************* -Catenates a query graph node to a list of them, possible empty list. */ -UNIV_INLINE -que_node_t* -que_node_list_add_last( -/*===================*/ - /* out: one-way list of nodes */ - que_node_t* node_list, /* in: node list, or NULL */ - que_node_t* node) /* in: node */ -{ - que_common_t* cnode; - que_common_t* cnode2; - - cnode = node; - - cnode->brother = NULL; - - if (node_list == NULL) { - - return(node); - } - - cnode2 = node_list; - - while (cnode2->brother != NULL) { - cnode2 = cnode2->brother; - } - - cnode2->brother = node; - - return(node_list); -} - -/************************************************************************* -Gets the next list node in a list of query graph nodes. */ -UNIV_INLINE -que_node_t* -que_node_get_next( -/*==============*/ - /* out: next node in a list of nodes */ - que_node_t* node) /* in: node in a list */ -{ - return(((que_common_t*)node)->brother); -} - -/************************************************************************* -Gets a query graph node list length. */ -UNIV_INLINE -ulint -que_node_list_get_len( -/*==================*/ - /* out: length, for NULL list 0 */ - que_node_t* node_list) /* in: node list, or NULL */ -{ - que_common_t* cnode; - ulint len; - - cnode = node_list; - len = 0; - - while (cnode != NULL) { - len++; - cnode = cnode->brother; - } - - return(len); -} - -/************************************************************************* -Gets the parent node of a query graph node. */ -UNIV_INLINE -que_node_t* -que_node_get_parent( -/*================*/ - /* out: parent node or NULL */ - que_node_t* node) /* in: node */ -{ - return(((que_common_t*)node)->parent); -} - -/************************************************************************** -Checks if graph, trx, or session is in a state where the query thread should -be stopped. */ -UNIV_INLINE -ibool -que_thr_peek_stop( -/*==============*/ - /* out: TRUE if should be stopped; NOTE that - if the peek is made without reserving the - kernel mutex, then another peek with the - mutex reserved is necessary before deciding - the actual stopping */ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - que_t* graph; - - graph = thr->graph; - trx = graph->trx; - - if (graph->state != QUE_FORK_ACTIVE - || trx->que_state == TRX_QUE_LOCK_WAIT - || (UT_LIST_GET_LEN(trx->signals) > 0 - && trx->que_state == TRX_QUE_RUNNING)) { - - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************************** -Returns TRUE if the query graph is for a SELECT statement. */ -UNIV_INLINE -ibool -que_graph_is_select( -/*================*/ - /* out: TRUE if a select */ - que_t* graph) /* in: graph */ -{ - if (graph->fork_type == QUE_FORK_SELECT_SCROLL - || graph->fork_type == QUE_FORK_SELECT_NON_SCROLL) { - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/innobase/include/que0types.h b/storage/innobase/include/que0types.h deleted file mode 100644 index 30e3f0a172b..00000000000 --- a/storage/innobase/include/que0types.h +++ /dev/null @@ -1,43 +0,0 @@ -/****************************************************** -Query graph global types - -(c) 1996 Innobase Oy - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef que0types_h -#define que0types_h - -#include "data0data.h" -#include "dict0types.h" - -/* Pseudotype for all graph nodes */ -typedef void que_node_t; - -typedef struct que_fork_struct que_fork_t; - -/* Query graph root is a fork node */ -typedef que_fork_t que_t; - -typedef struct que_thr_struct que_thr_t; -typedef struct que_common_struct que_common_t; - -/* Common struct at the beginning of each query graph node; the name of this -substruct must be 'common' */ - -struct que_common_struct{ - ulint type; /* query node type */ - que_node_t* parent; /* back pointer to parent node, or NULL */ - que_node_t* brother;/* pointer to a possible brother node */ - dfield_t val; /* evaluated value for an expression */ - ulint val_buf_size; - /* buffer size for the evaluated value data, - if the buffer has been allocated dynamically: - if this field is != 0, and the node is a - symbol node or a function node, then we - have to free the data field in val - explicitly */ -}; - -#endif diff --git a/storage/innobase/include/read0read.h b/storage/innobase/include/read0read.h deleted file mode 100644 index 97b6d7e9dd9..00000000000 --- a/storage/innobase/include/read0read.h +++ /dev/null @@ -1,165 +0,0 @@ -/****************************************************** -Cursor read - -(c) 1997 Innobase Oy - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#ifndef read0read_h -#define read0read_h - -#include "univ.i" - - -#include "ut0byte.h" -#include "ut0lst.h" -#include "trx0trx.h" -#include "read0types.h" - -/************************************************************************* -Opens a read view where exactly the transactions serialized before this -point in time are seen in the view. */ - -read_view_t* -read_view_open_now( -/*===============*/ - /* out, own: read view struct */ - dulint cr_trx_id, /* in: trx_id of creating - transaction, or (0, 0) used in - purge */ - mem_heap_t* heap); /* in: memory heap from which - allocated */ -/************************************************************************* -Makes a copy of the oldest existing read view, or opens a new. The view -must be closed with ..._close. */ - -read_view_t* -read_view_oldest_copy_or_open_new( -/*==============================*/ - /* out, own: read view struct */ - dulint cr_trx_id, /* in: trx_id of creating - transaction, or (0, 0) used in - purge */ - mem_heap_t* heap); /* in: memory heap from which - allocated */ -/************************************************************************* -Closes a read view. */ - -void -read_view_close( -/*============*/ - read_view_t* view); /* in: read view */ -/************************************************************************* -Closes a consistent read view for MySQL. This function is called at an SQL -statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ - -void -read_view_close_for_mysql( -/*======================*/ - trx_t* trx); /* in: trx which has a read view */ -/************************************************************************* -Checks if a read view sees the specified transaction. */ -UNIV_INLINE -ibool -read_view_sees_trx_id( -/*==================*/ - /* out: TRUE if sees */ - read_view_t* view, /* in: read view */ - dulint trx_id);/* in: trx id */ -/************************************************************************* -Prints a read view to stderr. */ - -void -read_view_print( -/*============*/ - read_view_t* view); /* in: read view */ -/************************************************************************* -Create a consistent cursor view for mysql to be used in cursors. In this -consistent read view modifications done by the creating transaction or future -transactions are not visible. */ - -cursor_view_t* -read_cursor_view_create_for_mysql( -/*==============================*/ - trx_t* cr_trx);/* in: trx where cursor view is created */ -/************************************************************************* -Close a given consistent cursor view for mysql and restore global read view -back to a transaction read view. */ - -void -read_cursor_view_close_for_mysql( -/*=============================*/ - trx_t* trx, /* in: trx */ - cursor_view_t* curview); /* in: cursor view to be closed */ -/************************************************************************* -This function sets a given consistent cursor view to a transaction -read view if given consistent cursor view is not NULL. Otherwise, function -restores a global read view to a transaction read view. */ - -void -read_cursor_set_for_mysql( -/*======================*/ - trx_t* trx, /* in: transaction where cursor is set */ - cursor_view_t* curview);/* in: consistent cursor view to be set */ - -/* Read view lists the trx ids of those transactions for which a consistent -read should not see the modifications to the database. */ - -struct read_view_struct{ - ulint type; /* VIEW_NORMAL, VIEW_HIGH_GRANULARITY */ - dulint undo_no; /* (0, 0) or if type is VIEW_HIGH_GRANULARITY - transaction undo_no when this high-granularity - consistent read view was created */ - dulint low_limit_no; /* The view does not need to see the undo - logs for transactions whose transaction number - is strictly smaller (<) than this value: they - can be removed in purge if not needed by other - views */ - dulint low_limit_id; /* The read should not see any transaction - with trx id >= this value */ - dulint up_limit_id; /* The read should see all trx ids which - are strictly smaller (<) than this value */ - ulint n_trx_ids; /* Number of cells in the trx_ids array */ - dulint* trx_ids; /* Additional trx ids which the read should - not see: typically, these are the active - transactions at the time when the read is - serialized, except the reading transaction - itself; the trx ids in this array are in a - descending order */ - dulint creator_trx_id; /* trx id of creating transaction, or - (0, 0) used in purge */ - UT_LIST_NODE_T(read_view_t) view_list; - /* List of read views in trx_sys */ -}; - -/* Read view types */ -#define VIEW_NORMAL 1 /* Normal consistent read view - where transaction does not see changes - made by active transactions except - creating transaction. */ -#define VIEW_HIGH_GRANULARITY 2 /* High-granularity read view where - transaction does not see changes - made by active transactions and own - changes after a point in time when this - read view was created. */ - -/* Implement InnoDB framework to support consistent read views in -cursors. This struct holds both heap where consistent read view -is allocated and pointer to a read view. */ - -struct cursor_view_struct{ - mem_heap_t* heap; - /* Memory heap for the cursor view */ - read_view_t* read_view; - /* Consistent read view of the cursor*/ - ulint n_mysql_tables_in_use; - /* number of Innobase tables used in the - processing of this cursor */ -}; - -#ifndef UNIV_NONINL -#include "read0read.ic" -#endif - -#endif diff --git a/storage/innobase/include/read0read.ic b/storage/innobase/include/read0read.ic deleted file mode 100644 index 3aded1ca07c..00000000000 --- a/storage/innobase/include/read0read.ic +++ /dev/null @@ -1,81 +0,0 @@ -/****************************************************** -Cursor read - -(c) 1997 Innobase Oy - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -/************************************************************************* -Gets the nth trx id in a read view. */ -UNIV_INLINE -dulint -read_view_get_nth_trx_id( -/*=====================*/ - /* out: trx id */ - read_view_t* view, /* in: read view */ - ulint n) /* in: position */ -{ - ut_ad(n < view->n_trx_ids); - - return(*(view->trx_ids + n)); -} - -/************************************************************************* -Sets the nth trx id in a read view. */ -UNIV_INLINE -void -read_view_set_nth_trx_id( -/*=====================*/ - read_view_t* view, /* in: read view */ - ulint n, /* in: position */ - dulint trx_id) /* in: trx id to set */ -{ - ut_ad(n < view->n_trx_ids); - - *(view->trx_ids + n) = trx_id; -} - -/************************************************************************* -Checks if a read view sees the specified transaction. */ -UNIV_INLINE -ibool -read_view_sees_trx_id( -/*==================*/ - /* out: TRUE if sees */ - read_view_t* view, /* in: read view */ - dulint trx_id) /* in: trx id */ -{ - ulint n_ids; - int cmp; - ulint i; - - if (ut_dulint_cmp(trx_id, view->up_limit_id) < 0) { - - return(TRUE); - } - - if (ut_dulint_cmp(trx_id, view->low_limit_id) >= 0) { - - return(FALSE); - } - - /* We go through the trx ids in the array smallest first: this order - may save CPU time, because if there was a very long running - transaction in the trx id array, its trx id is looked at first, and - the first two comparisons may well decide the visibility of trx_id. */ - - n_ids = view->n_trx_ids; - - for (i = 0; i < n_ids; i++) { - - cmp = ut_dulint_cmp( - trx_id, - read_view_get_nth_trx_id(view, n_ids - i - 1)); - if (cmp <= 0) { - return(cmp < 0); - } - } - - return(TRUE); -} diff --git a/storage/innobase/include/read0types.h b/storage/innobase/include/read0types.h deleted file mode 100644 index 7d42728523e..00000000000 --- a/storage/innobase/include/read0types.h +++ /dev/null @@ -1,15 +0,0 @@ -/****************************************************** -Cursor read - -(c) 1997 Innobase Oy - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#ifndef read0types_h -#define read0types_h - -typedef struct read_view_struct read_view_t; -typedef struct cursor_view_struct cursor_view_t; - -#endif diff --git a/storage/innobase/include/rem0cmp.h b/storage/innobase/include/rem0cmp.h deleted file mode 100644 index c6a6e5de4db..00000000000 --- a/storage/innobase/include/rem0cmp.h +++ /dev/null @@ -1,173 +0,0 @@ -/*********************************************************************** -Comparison services for records - -(c) 1994-2001 Innobase Oy - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -#ifndef rem0cmp_h -#define rem0cmp_h - -#include "univ.i" -#include "data0data.h" -#include "data0type.h" -#include "dict0dict.h" -#include "rem0rec.h" - -/***************************************************************** -Returns TRUE if two columns are equal for comparison purposes. */ - -ibool -cmp_cols_are_equal( -/*===============*/ - /* out: TRUE if the columns are - considered equal in comparisons */ - const dict_col_t* col1, /* in: column 1 */ - const dict_col_t* col2, /* in: column 2 */ - ibool check_charsets); - /* in: whether to check charsets */ -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. */ -UNIV_INLINE -int -cmp_data_data( -/*==========*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2); /* in: data field length or UNIV_SQL_NULL */ -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. */ - -int -cmp_data_data_slow( -/*===============*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2); /* in: data field length or UNIV_SQL_NULL */ -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield( -/*==============*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2);/* in: data field */ -/***************************************************************** -This function is used to compare a data tuple to a physical record. -Only dtuple->n_fields_cmp first fields are taken into account for -the the data tuple! If we denote by n = n_fields_cmp, then rec must -have either m >= n fields, or it must differ from dtuple in some of -the m fields rec has. If rec has an externally stored field we do not -compare it but return with value 0 if such a comparison should be -made. */ - -int -cmp_dtuple_rec_with_match( -/*======================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared, or - until the first externally stored field in - rec */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ - ulint* matched_bytes); /* in/out: number of already matched - bytes within the first field not completely - matched; when function returns, contains the - value for current comparison */ -/****************************************************************** -Compares a data tuple to a physical record. */ - -int -cmp_dtuple_rec( -/*===========*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively; see the comments - for cmp_dtuple_rec_with_match */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/****************************************************************** -Checks if a dtuple is a prefix of a record. The last field in dtuple -is allowed to be a prefix of the corresponding field in the record. */ - -ibool -cmp_dtuple_is_prefix_of_rec( -/*========================*/ - /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/***************************************************************** -This function is used to compare two physical records. Only the common -first fields are compared, and if an externally stored field is -encountered, then 0 is returned. */ - -int -cmp_rec_rec_with_match( -/*===================*/ - /* out: 1, 0 , -1 if rec1 is greater, equal, - less, respectively, than rec2; only the common - first fields are compared */ - rec_t* rec1, /* in: physical record */ - rec_t* rec2, /* in: physical record */ - const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ - dict_index_t* index, /* in: data dictionary index */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when the function returns, - contains the value the for current - comparison */ - ulint* matched_bytes);/* in/out: number of already matched - bytes within the first field not completely - matched; when the function returns, contains - the value for the current comparison */ -/***************************************************************** -This function is used to compare two physical records. Only the common -first fields are compared. */ -UNIV_INLINE -int -cmp_rec_rec( -/*========*/ - /* out: 1, 0 , -1 if rec1 is greater, equal, - less, respectively, than rec2; only the common - first fields are compared */ - rec_t* rec1, /* in: physical record */ - rec_t* rec2, /* in: physical record */ - const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ - dict_index_t* index); /* in: data dictionary index */ - - -#ifndef UNIV_NONINL -#include "rem0cmp.ic" -#endif - -#endif diff --git a/storage/innobase/include/rem0cmp.ic b/storage/innobase/include/rem0cmp.ic deleted file mode 100644 index 52dc7ff5dc9..00000000000 --- a/storage/innobase/include/rem0cmp.ic +++ /dev/null @@ -1,76 +0,0 @@ -/*********************************************************************** -Comparison services for records - -(c) 1994-1996 Innobase Oy - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. */ -UNIV_INLINE -int -cmp_data_data( -/*==========*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ - return(cmp_data_data_slow(mtype, prtype, data1, len1, data2, len2)); -} - -/***************************************************************** -This function is used to compare two dfields where at least the first -has its data type field set. */ -UNIV_INLINE -int -cmp_dfield_dfield( -/*==============*/ - /* out: 1, 0, -1, if dfield1 is greater, equal, - less than dfield2, respectively */ - dfield_t* dfield1,/* in: data field; must have type field set */ - dfield_t* dfield2)/* in: data field */ -{ - const dtype_t* type; - - ut_ad(dfield_check_typed(dfield1)); - - type = dfield_get_type(dfield1); - - return(cmp_data_data(type->mtype, type->prtype, - dfield_get_data(dfield1), - dfield_get_len(dfield1), - dfield_get_data(dfield2), - dfield_get_len(dfield2))); -} - -/***************************************************************** -This function is used to compare two physical records. Only the common -first fields are compared. */ -UNIV_INLINE -int -cmp_rec_rec( -/*========*/ - /* out: 1, 0 , -1 if rec1 is greater, equal, - less, respectively, than rec2; only the common - first fields are compared */ - rec_t* rec1, /* in: physical record */ - rec_t* rec2, /* in: physical record */ - const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ - dict_index_t* index) /* in: data dictionary index */ -{ - ulint match_f = 0; - ulint match_b = 0; - - return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, - &match_f, &match_b)); -} diff --git a/storage/innobase/include/rem0rec.h b/storage/innobase/include/rem0rec.h deleted file mode 100644 index abc204bb583..00000000000 --- a/storage/innobase/include/rem0rec.h +++ /dev/null @@ -1,582 +0,0 @@ -/************************************************************************ -Record manager - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef rem0rec_h -#define rem0rec_h - -#include "univ.i" -#include "data0data.h" -#include "rem0types.h" -#include "mtr0types.h" - -/* Info bit denoting the predefined minimum record: this bit is set -if and only if the record is the first user record on a non-leaf -B-tree page that is the leftmost page on its level -(PAGE_LEVEL is nonzero and FIL_PAGE_PREV is FIL_NULL). */ -#define REC_INFO_MIN_REC_FLAG 0x10UL - -/* Number of extra bytes in an old-style record, -in addition to the data and the offsets */ -#define REC_N_OLD_EXTRA_BYTES 6 -/* Number of extra bytes in a new-style record, -in addition to the data and the offsets */ -#define REC_N_NEW_EXTRA_BYTES 5 - -/* Record status values */ -#define REC_STATUS_ORDINARY 0 -#define REC_STATUS_NODE_PTR 1 -#define REC_STATUS_INFIMUM 2 -#define REC_STATUS_SUPREMUM 3 - -/* Number of elements that should be initially allocated for the -offsets[] array, first passed to rec_get_offsets() */ -#define REC_OFFS_NORMAL_SIZE 100 -#define REC_OFFS_SMALL_SIZE 10 - -/********************************************************** -The following function is used to get the offset of the -next chained record on the same page. */ -UNIV_INLINE -ulint -rec_get_next_offs( -/*==============*/ - /* out: the page offset of the next - chained record */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the next record offset field -of the record. */ -UNIV_INLINE -void -rec_set_next_offs( -/*==============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint next); /* in: offset of the next record */ -/********************************************************** -The following function is used to get the number of fields -in an old-style record. */ -UNIV_INLINE -ulint -rec_get_n_fields_old( -/*=================*/ - /* out: number of data fields */ - rec_t* rec); /* in: physical record */ -/********************************************************** -The following function is used to get the number of fields -in a record. */ -UNIV_INLINE -ulint -rec_get_n_fields( -/*=============*/ - /* out: number of data fields */ - rec_t* rec, /* in: physical record */ - dict_index_t* index); /* in: record descriptor */ -/********************************************************** -The following function is used to get the number of records -owned by the previous directory record. */ -UNIV_INLINE -ulint -rec_get_n_owned( -/*============*/ - /* out: number of owned records */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the number of owned -records. */ -UNIV_INLINE -void -rec_set_n_owned( -/*============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint n_owned); /* in: the number of owned */ -/********************************************************** -The following function is used to retrieve the info bits of -a record. */ -UNIV_INLINE -ulint -rec_get_info_bits( -/*==============*/ - /* out: info bits */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits( -/*==============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint bits); /* in: info bits */ -/********************************************************** -The following function retrieves the status bits of a new-style record. */ -UNIV_INLINE -ulint -rec_get_status( -/*===========*/ - /* out: status bits */ - rec_t* rec); /* in: physical record */ - -/********************************************************** -The following function is used to set the status bits of a new-style record. */ -UNIV_INLINE -void -rec_set_status( -/*===========*/ - rec_t* rec, /* in: physical record */ - ulint bits); /* in: info bits */ - -/********************************************************** -The following function is used to retrieve the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -ulint -rec_get_info_and_status_bits( -/*=========================*/ - /* out: info bits */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -void -rec_set_info_and_status_bits( -/*=========================*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint bits); /* in: info bits */ - -/********************************************************** -The following function tells if record is delete marked. */ -UNIV_INLINE -ulint -rec_get_deleted_flag( -/*=================*/ - /* out: nonzero if delete marked */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag( -/*=================*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint flag); /* in: nonzero if delete marked */ -/********************************************************** -The following function tells if a new-style record is a node pointer. */ -UNIV_INLINE -ibool -rec_get_node_ptr_flag( -/*==================*/ - /* out: TRUE if node pointer */ - rec_t* rec); /* in: physical record */ -/********************************************************** -The following function is used to get the order number -of the record in the heap of the index page. */ -UNIV_INLINE -ulint -rec_get_heap_no( -/*============*/ - /* out: heap order number */ - rec_t* rec, /* in: physical record */ - ulint comp); /* in: nonzero=compact page format */ -/********************************************************** -The following function is used to set the heap number -field in the record. */ -UNIV_INLINE -void -rec_set_heap_no( -/*============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint heap_no);/* in: the heap number */ -/********************************************************** -The following function is used to test whether the data offsets -in the record are stored in one-byte or two-byte format. */ -UNIV_INLINE -ibool -rec_get_1byte_offs_flag( -/*====================*/ - /* out: TRUE if 1-byte form */ - rec_t* rec); /* in: physical record */ -/********************************************************** -The following function determines the offsets to each field -in the record. It can reuse a previously allocated array. */ - -ulint* -rec_get_offsets_func( -/*=================*/ - /* out: the new offsets */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint* offsets,/* in: array consisting of offsets[0] - allocated elements, or an array from - rec_get_offsets(), or NULL */ - ulint n_fields,/* in: maximum number of initialized fields - (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /* in/out: memory heap */ - const char* file, /* in: file name where called */ - ulint line); /* in: line number where called */ - -#define rec_get_offsets(rec,index,offsets,n,heap) \ - rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__) - -/**************************************************************** -Validates offsets returned by rec_get_offsets(). */ -UNIV_INLINE -ibool -rec_offs_validate( -/*==============*/ - /* out: TRUE if valid */ - rec_t* rec, /* in: record or NULL */ - dict_index_t* index, /* in: record descriptor or NULL */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/**************************************************************** -Updates debug data in offsets, in order to avoid bogus -rec_offs_validate() failures. */ -UNIV_INLINE -void -rec_offs_make_valid( -/*================*/ - rec_t* rec, /* in: record */ - dict_index_t* index,/* in: record descriptor */ - ulint* offsets);/* in: array returned by rec_get_offsets() */ - -/**************************************************************** -The following function is used to get a pointer to the nth -data field in an old-style record. */ - -byte* -rec_get_nth_field_old( -/*==================*/ - /* out: pointer to the field */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len); /* out: length of the field; UNIV_SQL_NULL - if SQL null */ -/**************************************************************** -Gets the physical size of an old-style field. -Also an SQL null may have a field of size > 0, -if the data type is of a fixed size. */ -UNIV_INLINE -ulint -rec_get_nth_field_size( -/*===================*/ - /* out: field size in bytes */ - rec_t* rec, /* in: record */ - ulint n); /* in: index of the field */ -/**************************************************************** -The following function is used to get a pointer to the nth -data field in a record. */ -UNIV_INLINE -byte* -rec_get_nth_field( -/*==============*/ - /* out: pointer to the field */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n, /* in: index of the field */ - ulint* len); /* out: length of the field; UNIV_SQL_NULL - if SQL null */ -/********************************************************** -Determine if the offsets are for a record in the new -compact format. */ -UNIV_INLINE -ulint -rec_offs_comp( -/*==========*/ - /* out: nonzero if compact format */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/********************************************************** -Returns nonzero if the extern bit is set in nth field of rec. */ -UNIV_INLINE -ulint -rec_offs_nth_extern( -/*================*/ - /* out: nonzero if externally stored */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n); /* in: nth field */ -/********************************************************** -Returns nonzero if the SQL NULL bit is set in nth field of rec. */ -UNIV_INLINE -ulint -rec_offs_nth_sql_null( -/*==================*/ - /* out: nonzero if SQL NULL */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n); /* in: nth field */ -/********************************************************** -Gets the physical size of a field. */ -UNIV_INLINE -ulint -rec_offs_nth_size( -/*==============*/ - /* out: length of field */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n); /* in: nth field */ - -/********************************************************** -Returns TRUE if the extern bit is set in any of the fields -of rec. */ -UNIV_INLINE -ibool -rec_offs_any_extern( -/*================*/ - /* out: TRUE if a field is stored externally */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/*************************************************************** -Sets the value of the ith field extern storage bit. */ -UNIV_INLINE -void -rec_set_nth_field_extern_bit( -/*=========================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case - we do not write to log about the change */ -/*************************************************************** -Sets TRUE the extern storage bits of fields mentioned in an array. */ - -void -rec_set_field_extern_bits( -/*======================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - const ulint* vec, /* in: array of field numbers */ - ulint n_fields,/* in: number of fields numbers */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case - we do not write to log about the change */ -/*************************************************************** -This is used to modify the value of an already existing field in a record. -The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. -For records in ROW_FORMAT=COMPACT (new-style records), len must not be -UNIV_SQL_NULL unless the field already is SQL null. */ -UNIV_INLINE -void -rec_set_nth_field( -/*==============*/ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n, /* in: index number of the field */ - const void* data, /* in: pointer to the data if not SQL null */ - ulint len); /* in: length of the data or UNIV_SQL_NULL */ -/************************************************************** -The following function returns the data size of an old-style physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. */ -UNIV_INLINE -ulint -rec_get_data_size_old( -/*==================*/ - /* out: size */ - rec_t* rec); /* in: physical record */ -/************************************************************** -The following function returns the number of fields in a record. */ -UNIV_INLINE -ulint -rec_offs_n_fields( -/*==============*/ - /* out: number of fields */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -The following function returns the data size of a physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. */ -UNIV_INLINE -ulint -rec_offs_data_size( -/*===============*/ - /* out: size */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -Returns the total size of record minus data size of record. -The value returned by the function is the distance from record -start to record origin in bytes. */ -UNIV_INLINE -ulint -rec_offs_extra_size( -/*================*/ - /* out: size */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -Returns the total size of a physical record. */ -UNIV_INLINE -ulint -rec_offs_size( -/*==========*/ - /* out: size */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -Returns a pointer to the start of the record. */ -UNIV_INLINE -byte* -rec_get_start( -/*==========*/ - /* out: pointer to start */ - rec_t* rec, /* in: pointer to record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/************************************************************** -Returns a pointer to the end of the record. */ -UNIV_INLINE -byte* -rec_get_end( -/*========*/ - /* out: pointer to end */ - rec_t* rec, /* in: pointer to record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/******************************************************************* -Copies a physical record to a buffer. */ -UNIV_INLINE -rec_t* -rec_copy( -/*=====*/ - /* out: pointer to the origin of the copy */ - void* buf, /* in: buffer */ - const rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/****************************************************************** -Copies the first n fields of a physical record to a new physical record in -a buffer. */ - -rec_t* -rec_copy_prefix_to_buf( -/*===================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint n_fields, /* in: number of fields to copy */ - byte** buf, /* in/out: memory buffer - for the copied prefix, or NULL */ - ulint* buf_size); /* in/out: buffer size */ -/**************************************************************** -Folds a prefix of a physical record to a ulint. */ -UNIV_INLINE -ulint -rec_fold( -/*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - const ulint* offsets, /* in: array returned by - rec_get_offsets() */ - ulint n_fields, /* in: number of complete - fields to fold */ - ulint n_bytes, /* in: number of bytes to fold - in an incomplete last field */ - dulint tree_id); /* in: index tree id */ -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ - -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin - of physical record */ - byte* buf, /* in: start address of the - physical record */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple);/* in: data tuple */ -/************************************************************** -Returns the extra size of an old-style physical record if we know its -data size and number of fields. */ -UNIV_INLINE -ulint -rec_get_converted_extra_size( -/*=========================*/ - /* out: extra size */ - ulint data_size, /* in: data size */ - ulint n_fields) /* in: number of fields */ - __attribute__((const)); -/************************************************************** -The following function returns the size of a data tuple when converted to -a physical record. */ -UNIV_INLINE -ulint -rec_get_converted_size( -/*===================*/ - /* out: size */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple);/* in: data tuple */ -/****************************************************************** -Copies the first n fields of a physical record to a data tuple. -The fields are copied to the memory heap. */ - -void -rec_copy_prefix_to_dtuple( -/*======================*/ - dtuple_t* tuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint n_fields, /* in: number of fields to copy */ - mem_heap_t* heap); /* in: memory heap */ -/******************************************************************* -Validates the consistency of a physical record. */ - -ibool -rec_validate( -/*=========*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/******************************************************************* -Prints an old-style physical record. */ - -void -rec_print_old( -/*==========*/ - FILE* file, /* in: file where to print */ - rec_t* rec); /* in: physical record */ -/******************************************************************* -Prints a physical record. */ - -void -rec_print_new( -/*==========*/ - FILE* file, /* in: file where to print */ - rec_t* rec, /* in: physical record */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/******************************************************************* -Prints a physical record. */ - -void -rec_print( -/*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec, /* in: physical record */ - dict_index_t* index); /* in: record descriptor */ - -#define REC_INFO_BITS 6 /* This is single byte bit-field */ - -/* Maximum lengths for the data in a physical record if the offsets -are given in one byte (resp. two byte) format. */ -#define REC_1BYTE_OFFS_LIMIT 0x7FUL -#define REC_2BYTE_OFFS_LIMIT 0x7FFFUL - -/* The data size of record must be smaller than this because we reserve -two upmost bits in a two byte offset for special purposes */ -#define REC_MAX_DATA_SIZE (16 * 1024) - -#ifndef UNIV_NONINL -#include "rem0rec.ic" -#endif - -#endif diff --git a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic deleted file mode 100644 index d91fb4c4391..00000000000 --- a/storage/innobase/include/rem0rec.ic +++ /dev/null @@ -1,1531 +0,0 @@ -/************************************************************************ -Record manager - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "mach0data.h" -#include "ut0byte.h" -#include "dict0dict.h" - -/* Compact flag ORed to the extra size returned by rec_get_offsets() */ -#define REC_OFFS_COMPACT ((ulint) 1 << 31) -/* SQL NULL flag in offsets returned by rec_get_offsets() */ -#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) -/* External flag in offsets returned by rec_get_offsets() */ -#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) -/* Mask for offsets returned by rec_get_offsets() */ -#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) - -/* Offsets of the bit-fields in an old-style record. NOTE! In the table the -most significant bytes and bits are written below less significant. - - (1) byte offset (2) bit usage within byte - downward from - origin -> 1 8 bits pointer to next record - 2 8 bits pointer to next record - 3 1 bit short flag - 7 bits number of fields - 4 3 bits number of fields - 5 bits heap number - 5 8 bits heap number - 6 4 bits n_owned - 4 bits info bits -*/ - -/* Offsets of the bit-fields in a new-style record. NOTE! In the table the -most significant bytes and bits are written below less significant. - - (1) byte offset (2) bit usage within byte - downward from - origin -> 1 8 bits relative offset of next record - 2 8 bits relative offset of next record - the relative offset is an unsigned 16-bit - integer: - (offset_of_next_record - - offset_of_this_record) mod 64Ki, - where mod is the modulo as a non-negative - number; - we can calculate the the offset of the next - record with the formula: - relative_offset + offset_of_this_record - mod UNIV_PAGE_SIZE - 3 3 bits status: - 000=conventional record - 001=node pointer record (inside B-tree) - 010=infimum record - 011=supremum record - 1xx=reserved - 5 bits heap number - 4 8 bits heap number - 5 4 bits n_owned - 4 bits info bits -*/ - -/* We list the byte offsets from the origin of the record, the mask, -and the shift needed to obtain each bit-field of the record. */ - -#define REC_NEXT 2 -#define REC_NEXT_MASK 0xFFFFUL -#define REC_NEXT_SHIFT 0 - -#define REC_OLD_SHORT 3 /* This is single byte bit-field */ -#define REC_OLD_SHORT_MASK 0x1UL -#define REC_OLD_SHORT_SHIFT 0 - -#define REC_OLD_N_FIELDS 4 -#define REC_OLD_N_FIELDS_MASK 0x7FEUL -#define REC_OLD_N_FIELDS_SHIFT 1 - -#define REC_NEW_STATUS 3 /* This is single byte bit-field */ -#define REC_NEW_STATUS_MASK 0x7UL -#define REC_NEW_STATUS_SHIFT 0 - -#define REC_OLD_HEAP_NO 5 -#define REC_NEW_HEAP_NO 4 -#define REC_HEAP_NO_MASK 0xFFF8UL -#define REC_HEAP_NO_SHIFT 3 - -#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ -#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ -#define REC_N_OWNED_MASK 0xFUL -#define REC_N_OWNED_SHIFT 0 - -#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ -#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ -#define REC_INFO_BITS_MASK 0xF0UL -#define REC_INFO_BITS_SHIFT 0 - -/* The deleted flag in info bits */ -#define REC_INFO_DELETED_FLAG 0x20UL /* when bit is set to 1, it means the - record has been delete marked */ -/* The following masks are used to filter the SQL null bit from -one-byte and two-byte offsets */ - -#define REC_1BYTE_SQL_NULL_MASK 0x80UL -#define REC_2BYTE_SQL_NULL_MASK 0x8000UL - -/* In a 2-byte offset the second most significant bit denotes -a field stored to another page: */ - -#define REC_2BYTE_EXTERN_MASK 0x4000UL - -#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ - ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ - ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ - ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ - ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ - ^ 0xFFFFFFFFUL -# error "sum of old-style masks != 0xFFFFFFFFUL" -#endif -#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ - ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ - ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ - ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ - ^ 0xFFFFFFUL -# error "sum of new-style masks != 0xFFFFFFUL" -#endif - -/*************************************************************** -Sets the value of the ith field SQL null bit of an old-style record. */ - -void -rec_set_nth_field_null_bit( -/*=======================*/ - rec_t* rec, /* in: record */ - ulint i, /* in: ith field */ - ibool val); /* in: value to set */ -/*************************************************************** -Sets an old-style record field to SQL null. -The physical size of the field is not changed. */ - -void -rec_set_nth_field_sql_null( -/*=======================*/ - rec_t* rec, /* in: record */ - ulint n); /* in: index of the field */ - -/*************************************************************** -Sets the value of the ith field extern storage bit of an old-style record. */ - -void -rec_set_nth_field_extern_bit_old( -/*=============================*/ - rec_t* rec, /* in: old-style record */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page where - rec is, or NULL; in the NULL case we do not - write to log about the change */ -/*************************************************************** -Sets the value of the ith field extern storage bit of a new-style record. */ - -void -rec_set_nth_field_extern_bit_new( -/*=============================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - ulint ith, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case - we do not write to log about the change */ - -/********************************************************** -Gets a bit field from within 1 byte. */ -UNIV_INLINE -ulint -rec_get_bit_field_1( -/*================*/ - rec_t* rec, /* in: pointer to record origin */ - ulint offs, /* in: offset from the origin down */ - ulint mask, /* in: mask used to filter bits */ - ulint shift) /* in: shift right applied after masking */ -{ - ut_ad(rec); - - return((mach_read_from_1(rec - offs) & mask) >> shift); -} - -/********************************************************** -Sets a bit field within 1 byte. */ -UNIV_INLINE -void -rec_set_bit_field_1( -/*================*/ - rec_t* rec, /* in: pointer to record origin */ - ulint val, /* in: value to set */ - ulint offs, /* in: offset from the origin down */ - ulint mask, /* in: mask used to filter bits */ - ulint shift) /* in: shift right applied after masking */ -{ - ut_ad(rec); - ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); - ut_ad(mask); - ut_ad(mask <= 0xFFUL); - ut_ad(((mask >> shift) << shift) == mask); - ut_ad(((val << shift) & mask) == (val << shift)); - - mach_write_to_1(rec - offs, - (mach_read_from_1(rec - offs) & ~mask) - | (val << shift)); -} - -/********************************************************** -Gets a bit field from within 2 bytes. */ -UNIV_INLINE -ulint -rec_get_bit_field_2( -/*================*/ - rec_t* rec, /* in: pointer to record origin */ - ulint offs, /* in: offset from the origin down */ - ulint mask, /* in: mask used to filter bits */ - ulint shift) /* in: shift right applied after masking */ -{ - ut_ad(rec); - - return((mach_read_from_2(rec - offs) & mask) >> shift); -} - -/********************************************************** -Sets a bit field within 2 bytes. */ -UNIV_INLINE -void -rec_set_bit_field_2( -/*================*/ - rec_t* rec, /* in: pointer to record origin */ - ulint val, /* in: value to set */ - ulint offs, /* in: offset from the origin down */ - ulint mask, /* in: mask used to filter bits */ - ulint shift) /* in: shift right applied after masking */ -{ - ut_ad(rec); - ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); - ut_ad(mask > 0xFFUL); - ut_ad(mask <= 0xFFFFUL); - ut_ad((mask >> shift) & 1); - ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); - ut_ad(((mask >> shift) << shift) == mask); - ut_ad(((val << shift) & mask) == (val << shift)); - - mach_write_to_2(rec - offs, - (mach_read_from_2(rec - offs) & ~mask) - | (val << shift)); -} - -/********************************************************** -The following function is used to get the offset of the next chained record -on the same page. */ -UNIV_INLINE -ulint -rec_get_next_offs( -/*==============*/ - /* out: the page offset of the next chained record, or - 0 if none */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - ulint field_value; -#if REC_NEXT_MASK != 0xFFFFUL -# error "REC_NEXT_MASK != 0xFFFFUL" -#endif -#if REC_NEXT_SHIFT -# error "REC_NEXT_SHIFT != 0" -#endif - - field_value = mach_read_from_2(rec - REC_NEXT); - - if (comp) { -#if UNIV_PAGE_SIZE <= 32768 - /* Note that for 64 KiB pages, field_value can 'wrap around' - and the debug assertion is not valid */ - - /* In the following assertion, field_value is interpreted - as signed 16-bit integer in 2's complement arithmetics. - If all platforms defined int16_t in the standard headers, - the expression could be written simpler as - (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE - */ - ut_ad((field_value >= 32768 - ? field_value - 65536 - : field_value) - + ut_align_offset(rec, UNIV_PAGE_SIZE) - < UNIV_PAGE_SIZE); -#endif - if (field_value == 0) { - - return(0); - } - - return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); - } else { - ut_ad(field_value < UNIV_PAGE_SIZE); - - return(field_value); - } -} - -/********************************************************** -The following function is used to set the next record offset field of the -record. */ -UNIV_INLINE -void -rec_set_next_offs( -/*==============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint next) /* in: offset of the next record, or 0 if none */ -{ - ut_ad(rec); - ut_ad(UNIV_PAGE_SIZE > next); -#if REC_NEXT_MASK != 0xFFFFUL -# error "REC_NEXT_MASK != 0xFFFFUL" -#endif -#if REC_NEXT_SHIFT -# error "REC_NEXT_SHIFT != 0" -#endif - - if (comp) { - ulint field_value; - - if (next) { - /* The following two statements calculate - next - offset_of_rec mod 64Ki, where mod is the modulo - as a non-negative number */ - - field_value = (ulint)((lint)next - - (lint)ut_align_offset( - rec, UNIV_PAGE_SIZE)); - field_value &= REC_NEXT_MASK; - } else { - field_value = 0; - } - - mach_write_to_2(rec - REC_NEXT, field_value); - } else { - mach_write_to_2(rec - REC_NEXT, next); - } -} - -/********************************************************** -The following function is used to get the number of fields -in an old-style record. */ -UNIV_INLINE -ulint -rec_get_n_fields_old( -/*=================*/ - /* out: number of data fields */ - rec_t* rec) /* in: physical record */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, - REC_OLD_N_FIELDS_MASK, - REC_OLD_N_FIELDS_SHIFT); - ut_ad(ret <= REC_MAX_N_FIELDS); - ut_ad(ret > 0); - - return(ret); -} - -/********************************************************** -The following function is used to set the number of fields -in an old-style record. */ -UNIV_INLINE -void -rec_set_n_fields_old( -/*=================*/ - rec_t* rec, /* in: physical record */ - ulint n_fields) /* in: the number of fields */ -{ - ut_ad(rec); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields > 0); - - rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, - REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); -} - -/********************************************************** -The following function retrieves the status bits of a new-style record. */ -UNIV_INLINE -ulint -rec_get_status( -/*===========*/ - /* out: status bits */ - rec_t* rec) /* in: physical record */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, - REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); - ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); - - return(ret); -} - -/********************************************************** -The following function is used to get the number of fields -in a record. */ -UNIV_INLINE -ulint -rec_get_n_fields( -/*=============*/ - /* out: number of data fields */ - rec_t* rec, /* in: physical record */ - dict_index_t* index) /* in: record descriptor */ -{ - ut_ad(rec); - ut_ad(index); - - if (!dict_table_is_comp(index->table)) { - return(rec_get_n_fields_old(rec)); - } - - switch (rec_get_status(rec)) { - case REC_STATUS_ORDINARY: - return(dict_index_get_n_fields(index)); - case REC_STATUS_NODE_PTR: - return(dict_index_get_n_unique_in_tree(index) + 1); - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - return(1); - default: - ut_error; - return(ULINT_UNDEFINED); - } -} - -/********************************************************** -The following function is used to get the number of records owned by the -previous directory record. */ -UNIV_INLINE -ulint -rec_get_n_owned( -/*============*/ - /* out: number of owned records */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_1(rec, - comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); - ut_ad(ret <= REC_MAX_N_OWNED); - - return(ret); -} - -/********************************************************** -The following function is used to set the number of owned records. */ -UNIV_INLINE -void -rec_set_n_owned( -/*============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint n_owned) /* in: the number of owned */ -{ - ut_ad(rec); - ut_ad(n_owned <= REC_MAX_N_OWNED); - - rec_set_bit_field_1(rec, n_owned, - comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, - REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); -} - -/********************************************************** -The following function is used to retrieve the info bits of a record. */ -UNIV_INLINE -ulint -rec_get_info_bits( -/*==============*/ - /* out: info bits */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_1(rec, - comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, - REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); - ut_ad((ret & ~REC_INFO_BITS_MASK) == 0); - - return(ret); -} - -/********************************************************** -The following function is used to set the info bits of a record. */ -UNIV_INLINE -void -rec_set_info_bits( -/*==============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint bits) /* in: info bits */ -{ - ut_ad(rec); - ut_ad((bits & ~REC_INFO_BITS_MASK) == 0); - - rec_set_bit_field_1(rec, bits, - comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, - REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); -} - -/********************************************************** -The following function is used to set the status bits of a new-style record. */ -UNIV_INLINE -void -rec_set_status( -/*===========*/ - rec_t* rec, /* in: physical record */ - ulint bits) /* in: info bits */ -{ - ut_ad(rec); - ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0); - - rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, - REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); -} - -/********************************************************** -The following function is used to retrieve the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -ulint -rec_get_info_and_status_bits( -/*=========================*/ - /* out: info bits */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - ulint bits; -#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ -& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) -# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" -#endif - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { - bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec); - } else { - bits = rec_get_info_bits(rec, FALSE); - ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT))); - } - return(bits); -} -/********************************************************** -The following function is used to set the info and status -bits of a record. (Only compact records have status bits.) */ -UNIV_INLINE -void -rec_set_info_and_status_bits( -/*=========================*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint bits) /* in: info bits */ -{ -#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \ -& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT) -# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap" -#endif - if (comp) { - rec_set_status(rec, bits & REC_NEW_STATUS_MASK); - } else { - ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT))); - } - rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK); -} - -/********************************************************** -The following function tells if record is delete marked. */ -UNIV_INLINE -ulint -rec_get_deleted_flag( -/*=================*/ - /* out: nonzero if delete marked */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - if (UNIV_EXPECT(comp, REC_OFFS_COMPACT)) { - return(UNIV_UNLIKELY( - rec_get_bit_field_1(rec, REC_NEW_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT))); - } else { - return(UNIV_UNLIKELY( - rec_get_bit_field_1(rec, REC_OLD_INFO_BITS, - REC_INFO_DELETED_FLAG, - REC_INFO_BITS_SHIFT))); - } -} - -/********************************************************** -The following function is used to set the deleted bit. */ -UNIV_INLINE -void -rec_set_deleted_flag( -/*=================*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint flag) /* in: nonzero if delete marked */ -{ - ulint val; - - val = rec_get_info_bits(rec, comp); - - if (flag) { - val |= REC_INFO_DELETED_FLAG; - } else { - val &= ~REC_INFO_DELETED_FLAG; - } - - rec_set_info_bits(rec, comp, val); -} - -/********************************************************** -The following function tells if a new-style record is a node pointer. */ -UNIV_INLINE -ibool -rec_get_node_ptr_flag( -/*==================*/ - /* out: TRUE if node pointer */ - rec_t* rec) /* in: physical record */ -{ - return(REC_STATUS_NODE_PTR == rec_get_status(rec)); -} - -/********************************************************** -The following function is used to get the order number of the record in the -heap of the index page. */ -UNIV_INLINE -ulint -rec_get_heap_no( -/*============*/ - /* out: heap order number */ - rec_t* rec, /* in: physical record */ - ulint comp) /* in: nonzero=compact page format */ -{ - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_2(rec, - comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); - ut_ad(ret <= REC_MAX_HEAP_NO); - - return(ret); -} - -/********************************************************** -The following function is used to set the heap number field in the record. */ -UNIV_INLINE -void -rec_set_heap_no( -/*============*/ - rec_t* rec, /* in: physical record */ - ulint comp, /* in: nonzero=compact page format */ - ulint heap_no)/* in: the heap number */ -{ - ut_ad(heap_no <= REC_MAX_HEAP_NO); - - rec_set_bit_field_2(rec, heap_no, - comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, - REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); -} - -/********************************************************** -The following function is used to test whether the data offsets in the record -are stored in one-byte or two-byte format. */ -UNIV_INLINE -ibool -rec_get_1byte_offs_flag( -/*====================*/ - /* out: TRUE if 1-byte form */ - rec_t* rec) /* in: physical record */ -{ -#if TRUE != 1 -#error "TRUE != 1" -#endif - - return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, - REC_OLD_SHORT_SHIFT)); -} - -/********************************************************** -The following function is used to set the 1-byte offsets flag. */ -UNIV_INLINE -void -rec_set_1byte_offs_flag( -/*====================*/ - rec_t* rec, /* in: physical record */ - ibool flag) /* in: TRUE if 1byte form */ -{ -#if TRUE != 1 -#error "TRUE != 1" -#endif - ut_ad(flag <= TRUE); - - rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, - REC_OLD_SHORT_SHIFT); -} - -/********************************************************** -Returns the offset of nth field end if the record is stored in the 1-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. */ -UNIV_INLINE -ulint -rec_1_get_field_end_info( -/*=====================*/ - /* out: offset of the start of the field, SQL null - flag ORed */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); -} - -/********************************************************** -Returns the offset of nth field end if the record is stored in the 2-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. */ -UNIV_INLINE -ulint -rec_2_get_field_end_info( -/*=====================*/ - /* out: offset of the start of the field, SQL null - flag and extern storage flag ORed */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); -} - -#ifdef UNIV_DEBUG -/* Length of the rec_get_offsets() header */ -# define REC_OFFS_HEADER_SIZE 4 -#else /* UNIV_DEBUG */ -/* Length of the rec_get_offsets() header */ -# define REC_OFFS_HEADER_SIZE 2 -#endif /* UNIV_DEBUG */ - -/* Get the base address of offsets. The extra_size is stored at -this position, and following positions hold the end offsets of -the fields. */ -#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) - -/************************************************************** -The following function returns the number of allocated elements -for an array of offsets. */ -UNIV_INLINE -ulint -rec_offs_get_n_alloc( -/*=================*/ - /* out: number of elements */ - const ulint* offsets)/* in: array for rec_get_offsets() */ -{ - ulint n_alloc; - ut_ad(offsets); - n_alloc = offsets[0]; - ut_ad(n_alloc > 0); - return(n_alloc); -} - -/************************************************************** -The following function sets the number of allocated elements -for an array of offsets. */ -UNIV_INLINE -void -rec_offs_set_n_alloc( -/*=================*/ - ulint* offsets, /* out: array for rec_get_offsets(), - must be allocated */ - ulint n_alloc) /* in: number of elements */ -{ - ut_ad(offsets); - ut_ad(n_alloc > 0); - UNIV_MEM_ASSERT_AND_ALLOC(offsets, n_alloc * sizeof *offsets); - offsets[0] = n_alloc; -} - -/************************************************************** -The following function returns the number of fields in a record. */ -UNIV_INLINE -ulint -rec_offs_n_fields( -/*==============*/ - /* out: number of fields */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint n_fields; - ut_ad(offsets); - n_fields = offsets[1]; - ut_ad(n_fields > 0); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields + REC_OFFS_HEADER_SIZE - <= rec_offs_get_n_alloc(offsets)); - return(n_fields); -} - -/**************************************************************** -Validates offsets returned by rec_get_offsets(). */ -UNIV_INLINE -ibool -rec_offs_validate( -/*==============*/ - /* out: TRUE if valid */ - rec_t* rec, /* in: record or NULL */ - dict_index_t* index, /* in: record descriptor or NULL */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint i = rec_offs_n_fields(offsets); - ulint last = ULINT_MAX; - ulint comp = *rec_offs_base(offsets) & REC_OFFS_COMPACT; - - if (rec) { - ut_ad((ulint) rec == offsets[2]); - if (!comp) { - ut_a(rec_get_n_fields_old(rec) >= i); - } - } - if (index) { - ulint max_n_fields; - ut_ad((ulint) index == offsets[3]); - max_n_fields = ut_max( - dict_index_get_n_fields(index), - dict_index_get_n_unique_in_tree(index) + 1); - if (comp && rec) { - switch (rec_get_status(rec)) { - case REC_STATUS_ORDINARY: - break; - case REC_STATUS_NODE_PTR: - max_n_fields = dict_index_get_n_unique_in_tree( - index) + 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - max_n_fields = 1; - break; - default: - ut_error; - } - } - /* index->n_def == 0 for dummy indexes if !comp */ - ut_a(!comp || index->n_def); - ut_a(!index->n_def || i <= max_n_fields); - } - while (i--) { - ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; - ut_a(curr <= last); - last = curr; - } - return(TRUE); -} -/**************************************************************** -Updates debug data in offsets, in order to avoid bogus -rec_offs_validate() failures. */ -UNIV_INLINE -void -rec_offs_make_valid( -/*================*/ - rec_t* rec __attribute__((unused)), - /* in: record */ - dict_index_t* index __attribute__((unused)), - /* in: record descriptor */ - ulint* offsets __attribute__((unused))) - /* in: array returned by rec_get_offsets() */ -{ -#ifdef UNIV_DEBUG - ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets)); - offsets[2] = (ulint) rec; - offsets[3] = (ulint) index; -#endif /* UNIV_DEBUG */ -} - -/**************************************************************** -The following function is used to get a pointer to the nth -data field in a record. */ -UNIV_INLINE -byte* -rec_get_nth_field( -/*==============*/ - /* out: pointer to the field */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n, /* in: index of the field */ - ulint* len) /* out: length of the field; UNIV_SQL_NULL - if SQL null */ -{ - byte* field; - ulint length; - ut_ad(rec); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - ut_ad(len); - - if (UNIV_UNLIKELY(n == 0)) { - field = rec; - } else { - field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK); - } - - length = rec_offs_base(offsets)[1 + n]; - - if (length & REC_OFFS_SQL_NULL) { - length = UNIV_SQL_NULL; - } else { - length &= REC_OFFS_MASK; - length -= field - rec; - } - - *len = length; - return(field); -} - -/********************************************************** -Determine if the offsets are for a record in the new -compact format. */ -UNIV_INLINE -ulint -rec_offs_comp( -/*==========*/ - /* out: nonzero if compact format */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - return(*rec_offs_base(offsets) & REC_OFFS_COMPACT); -} - -/********************************************************** -Returns nonzero if the extern bit is set in nth field of rec. */ -UNIV_INLINE -ulint -rec_offs_nth_extern( -/*================*/ - /* out: nonzero if externally stored */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n) /* in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] - & REC_OFFS_EXTERNAL)); -} - -/********************************************************** -Returns nonzero if the SQL NULL bit is set in nth field of rec. */ -UNIV_INLINE -ulint -rec_offs_nth_sql_null( -/*==================*/ - /* out: nonzero if SQL NULL */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n) /* in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - return(UNIV_UNLIKELY(rec_offs_base(offsets)[1 + n] - & REC_OFFS_SQL_NULL)); -} - -/********************************************************** -Gets the physical size of a field. */ -UNIV_INLINE -ulint -rec_offs_nth_size( -/*==============*/ - /* out: length of field */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n) /* in: nth field */ -{ - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - ut_ad(n < rec_offs_n_fields(offsets)); - if (!n) { - return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK); - } - return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n]) - & REC_OFFS_MASK); -} - -/********************************************************** -Returns TRUE if the extern bit is set in any of the fields -of an old-style record. */ -UNIV_INLINE -ibool -rec_offs_any_extern( -/*================*/ - /* out: TRUE if a field is stored externally */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint i; - for (i = rec_offs_n_fields(offsets); i--; ) { - if (rec_offs_nth_extern(offsets, i)) { - return(TRUE); - } - } - return(FALSE); -} - -/*************************************************************** -Sets the value of the ith field extern storage bit. */ -UNIV_INLINE -void -rec_set_nth_field_extern_bit( -/*=========================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case - we do not write to log about the change */ -{ - if (dict_table_is_comp(index->table)) { - rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr); - } else { - rec_set_nth_field_extern_bit_old(rec, i, val, mtr); - } -} - -/********************************************************** -Returns the offset of n - 1th field end if the record is stored in the 1-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. This function and the 2-byte counterpart are defined here because the -C-compiler was not able to sum negative and positive constant offsets, and -warned of constant arithmetic overflow within the compiler. */ -UNIV_INLINE -ulint -rec_1_get_prev_field_end_info( -/*==========================*/ - /* out: offset of the start of the PREVIOUS field, SQL - null flag ORed */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); -} - -/********************************************************** -Returns the offset of n - 1th field end if the record is stored in the 2-byte -offsets form. If the field is SQL null, the flag is ORed in the returned -value. */ -UNIV_INLINE -ulint -rec_2_get_prev_field_end_info( -/*==========================*/ - /* out: offset of the start of the PREVIOUS field, SQL - null flag ORed */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); -} - -/********************************************************** -Sets the field end info for the nth field if the record is stored in the -1-byte format. */ -UNIV_INLINE -void -rec_1_set_field_end_info( -/*=====================*/ - rec_t* rec, /* in: record */ - ulint n, /* in: field index */ - ulint info) /* in: value to set */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); -} - -/********************************************************** -Sets the field end info for the nth field if the record is stored in the -2-byte format. */ -UNIV_INLINE -void -rec_2_set_field_end_info( -/*=====================*/ - rec_t* rec, /* in: record */ - ulint n, /* in: field index */ - ulint info) /* in: value to set */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields_old(rec)); - - mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); -} - -/********************************************************** -Returns the offset of nth field start if the record is stored in the 1-byte -offsets form. */ -UNIV_INLINE -ulint -rec_1_get_field_start_offs( -/*=======================*/ - /* out: offset of the start of the field */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - return(rec_1_get_prev_field_end_info(rec, n) - & ~REC_1BYTE_SQL_NULL_MASK); -} - -/********************************************************** -Returns the offset of nth field start if the record is stored in the 2-byte -offsets form. */ -UNIV_INLINE -ulint -rec_2_get_field_start_offs( -/*=======================*/ - /* out: offset of the start of the field */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - return(rec_2_get_prev_field_end_info(rec, n) - & ~(REC_2BYTE_SQL_NULL_MASK | REC_2BYTE_EXTERN_MASK)); -} - -/********************************************************** -The following function is used to read the offset of the start of a data field -in the record. The start of an SQL null field is the end offset of the -previous non-null field, or 0, if none exists. If n is the number of the last -field + 1, then the end offset of the last field is returned. */ -UNIV_INLINE -ulint -rec_get_field_start_offs( -/*=====================*/ - /* out: offset of the start of the field */ - rec_t* rec, /* in: record */ - ulint n) /* in: field index */ -{ - ut_ad(rec); - ut_ad(n <= rec_get_n_fields_old(rec)); - - if (n == 0) { - - return(0); - } - - if (rec_get_1byte_offs_flag(rec)) { - - return(rec_1_get_field_start_offs(rec, n)); - } - - return(rec_2_get_field_start_offs(rec, n)); -} - -/**************************************************************** -Gets the physical size of an old-style field. -Also an SQL null may have a field of size > 0, -if the data type is of a fixed size. */ -UNIV_INLINE -ulint -rec_get_nth_field_size( -/*===================*/ - /* out: field size in bytes */ - rec_t* rec, /* in: record */ - ulint n) /* in: index of the field */ -{ - ulint os; - ulint next_os; - - os = rec_get_field_start_offs(rec, n); - next_os = rec_get_field_start_offs(rec, n + 1); - - ut_ad(next_os - os < UNIV_PAGE_SIZE); - - return(next_os - os); -} - -/*************************************************************** -This is used to modify the value of an already existing field in a record. -The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. -For records in ROW_FORMAT=COMPACT (new-style records), len must not be -UNIV_SQL_NULL unless the field already is SQL null. */ -UNIV_INLINE -void -rec_set_nth_field( -/*==============*/ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n, /* in: index number of the field */ - const void* data, /* in: pointer to the data - if not SQL null */ - ulint len) /* in: length of the data or UNIV_SQL_NULL */ -{ - byte* data2; - ulint len2; - - ut_ad(rec); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (UNIV_UNLIKELY(len == UNIV_SQL_NULL)) { - if (!rec_offs_nth_sql_null(offsets, n)) { - ut_a(!rec_offs_comp(offsets)); - rec_set_nth_field_sql_null(rec, n); - } - - return; - } - - data2 = rec_get_nth_field(rec, offsets, n, &len2); - if (len2 == UNIV_SQL_NULL) { - ut_ad(!rec_offs_comp(offsets)); - rec_set_nth_field_null_bit(rec, n, FALSE); - ut_ad(len == rec_get_nth_field_size(rec, n)); - } else { - ut_ad(len2 == len); - } - - ut_memcpy(data2, data, len); -} - -/************************************************************** -The following function returns the data size of an old-style physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. */ -UNIV_INLINE -ulint -rec_get_data_size_old( -/*==================*/ - /* out: size */ - rec_t* rec) /* in: physical record */ -{ - ut_ad(rec); - - return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); -} - -/************************************************************** -The following function sets the number of fields in offsets. */ -UNIV_INLINE -void -rec_offs_set_n_fields( -/*==================*/ - ulint* offsets, /* in/out: array returned by - rec_get_offsets() */ - ulint n_fields) /* in: number of fields */ -{ - ut_ad(offsets); - ut_ad(n_fields > 0); - ut_ad(n_fields <= REC_MAX_N_FIELDS); - ut_ad(n_fields + REC_OFFS_HEADER_SIZE - <= rec_offs_get_n_alloc(offsets)); - offsets[1] = n_fields; -} - -/************************************************************** -The following function returns the data size of a physical -record, that is the sum of field lengths. SQL null fields -are counted as length 0 fields. The value returned by the function -is the distance from record origin to record end in bytes. */ -UNIV_INLINE -ulint -rec_offs_data_size( -/*===============*/ - /* out: size */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint size; - - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] - & REC_OFFS_MASK; - ut_ad(size < UNIV_PAGE_SIZE); - return(size); -} - -/************************************************************** -Returns the total size of record minus data size of record. The value -returned by the function is the distance from record start to record origin -in bytes. */ -UNIV_INLINE -ulint -rec_offs_extra_size( -/*================*/ - /* out: size */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint size; - ut_ad(rec_offs_validate(NULL, NULL, offsets)); - size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT; - ut_ad(size < UNIV_PAGE_SIZE); - return(size); -} - -/************************************************************** -Returns the total size of a physical record. */ -UNIV_INLINE -ulint -rec_offs_size( -/*==========*/ - /* out: size */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); -} - -/************************************************************** -Returns a pointer to the end of the record. */ -UNIV_INLINE -byte* -rec_get_end( -/*========*/ - /* out: pointer to end */ - rec_t* rec, /* in: pointer to record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - return(rec + rec_offs_data_size(offsets)); -} - -/************************************************************** -Returns a pointer to the start of the record. */ -UNIV_INLINE -byte* -rec_get_start( -/*==========*/ - /* out: pointer to start */ - rec_t* rec, /* in: pointer to record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - return(rec - rec_offs_extra_size(offsets)); -} - -/******************************************************************* -Copies a physical record to a buffer. */ -UNIV_INLINE -rec_t* -rec_copy( -/*=====*/ - /* out: pointer to the origin of the copy */ - void* buf, /* in: buffer */ - const rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint extra_len; - ulint data_len; - - ut_ad(rec && buf); - ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets)); - ut_ad(rec_validate((rec_t*) rec, offsets)); - - extra_len = rec_offs_extra_size(offsets); - data_len = rec_offs_data_size(offsets); - - ut_memcpy(buf, rec - extra_len, extra_len + data_len); - - return((byte*)buf + extra_len); -} - -/************************************************************** -Returns the extra size of an old-style physical record if we know its -data size and number of fields. */ -UNIV_INLINE -ulint -rec_get_converted_extra_size( -/*=========================*/ - /* out: extra size */ - ulint data_size, /* in: data size */ - ulint n_fields) /* in: number of fields */ -{ - if (data_size <= REC_1BYTE_OFFS_LIMIT) { - - return(REC_N_OLD_EXTRA_BYTES + n_fields); - } - - return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); -} - -/************************************************************** -The following function returns the size of a data tuple when converted to -a new-style physical record. */ - -ulint -rec_get_converted_size_new( -/*=======================*/ - /* out: size */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple);/* in: data tuple */ -/************************************************************** -The following function returns the size of a data tuple when converted to -a physical record. */ -UNIV_INLINE -ulint -rec_get_converted_size( -/*===================*/ - /* out: size */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple) /* in: data tuple */ -{ - ulint data_size; - ulint extra_size; - - ut_ad(index); - ut_ad(dtuple); - ut_ad(dtuple_check_typed(dtuple)); - - ut_ad(index->type & DICT_UNIVERSAL - || dtuple_get_n_fields(dtuple) - == (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) - == REC_STATUS_NODE_PTR) - ? dict_index_get_n_unique_in_tree(index) + 1 - : dict_index_get_n_fields(index))); - - if (dict_table_is_comp(index->table)) { - return(rec_get_converted_size_new(index, dtuple)); - } - - data_size = dtuple_get_data_size(dtuple); - - extra_size = rec_get_converted_extra_size( - data_size, dtuple_get_n_fields(dtuple)); - - return(data_size + extra_size); -} - -/**************************************************************** -Folds a prefix of a physical record to a ulint. Folds only existing fields, -that is, checks that we do not run out of the record. */ -UNIV_INLINE -ulint -rec_fold( -/*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - const ulint* offsets, /* in: array returned by - rec_get_offsets() */ - ulint n_fields, /* in: number of complete - fields to fold */ - ulint n_bytes, /* in: number of bytes to fold - in an incomplete last field */ - dulint tree_id) /* in: index tree id */ -{ - ulint i; - byte* data; - ulint len; - ulint fold; - ulint n_fields_rec; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_ad(rec_validate((rec_t*) rec, offsets)); - ut_ad(n_fields + n_bytes > 0); - - n_fields_rec = rec_offs_n_fields(offsets); - ut_ad(n_fields <= n_fields_rec); - ut_ad(n_fields < n_fields_rec || n_bytes == 0); - - if (n_fields > n_fields_rec) { - n_fields = n_fields_rec; - } - - if (n_fields == n_fields_rec) { - n_bytes = 0; - } - - fold = ut_fold_dulint(tree_id); - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - if (n_bytes > 0) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - if (len > n_bytes) { - len = n_bytes; - } - - fold = ut_fold_ulint_pair(fold, - ut_fold_binary(data, len)); - } - } - - return(fold); -} diff --git a/storage/innobase/include/rem0types.h b/storage/innobase/include/rem0types.h deleted file mode 100644 index 79c162392d2..00000000000 --- a/storage/innobase/include/rem0types.h +++ /dev/null @@ -1,20 +0,0 @@ -/************************************************************************ -Record manager global types - -(c) 1994-1996 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#ifndef rem0types_h -#define rem0types_h - -/* We define the physical record simply as an array of bytes */ -typedef byte rec_t; - -/* Maximum values for various fields (for non-blob tuples) */ -#define REC_MAX_N_FIELDS (1024 - 1) -#define REC_MAX_HEAP_NO (2 * 8192 - 1) -#define REC_MAX_N_OWNED (16 - 1) - -#endif diff --git a/storage/innobase/include/row0ins.h b/storage/innobase/include/row0ins.h deleted file mode 100644 index b4bcc8ac5ca..00000000000 --- a/storage/innobase/include/row0ins.h +++ /dev/null @@ -1,169 +0,0 @@ -/****************************************************** -Insert into a table - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0ins_h -#define row0ins_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" - -/******************************************************************* -Checks if foreign key constraint fails for an index entry. Sets shared locks -which lock either the success or the failure of the constraint. NOTE that -the caller must have a shared latch on dict_foreign_key_check_lock. */ - -ulint -row_ins_check_foreign_constraint( -/*=============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_NO_REFERENCED_ROW, - or DB_ROW_IS_REFERENCED */ - ibool check_ref,/* in: TRUE If we want to check that - the referenced table is ok, FALSE if we - want to to check the foreign key table */ - dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the - tables mentioned in it must be in the - dictionary cache if they exist at all */ - dict_table_t* table, /* in: if check_ref is TRUE, then the foreign - table, else the referenced table */ - dtuple_t* entry, /* in: index entry for index */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Creates an insert node struct. */ - -ins_node_t* -ins_node_create( -/*============*/ - /* out, own: insert node struct */ - ulint ins_type, /* in: INS_VALUES, ... */ - dict_table_t* table, /* in: table where to insert */ - mem_heap_t* heap); /* in: mem heap where created */ -/************************************************************************* -Sets a new row to insert for an INS_DIRECT node. This function is only used -if we have constructed the row separately, which is a rare case; this -function is quite slow. */ - -void -ins_node_set_new_row( -/*=================*/ - ins_node_t* node, /* in: insert node */ - dtuple_t* row); /* in: new row (or first row) for the node */ -/******************************************************************* -Tries to insert an index entry to an index. If the index is clustered -and a record with the same unique key is found, the other record is -necessarily marked deleted by a committed transaction, or a unique key -violation error occurs. The delete marked record is then updated to an -existing record, and we must write an undo log record on the delete -marked record. If the index is secondary, and a record with exactly the -same fields is found, the other record is necessarily marked deleted. -It is then unmarked. Otherwise, the entry is just inserted to the index. */ - -ulint -row_ins_index_entry_low( -/*====================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL - if pessimistic retry needed, or error code */ - ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - que_thr_t* thr); /* in: query thread */ -/******************************************************************* -Inserts an index entry to index. Tries first optimistic, then pessimistic -descent down the tree. If the entry matches enough to a delete marked record, -performs the insert by updating or delete unmarking the delete marked -record. */ - -ulint -row_ins_index_entry( -/*================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DUPLICATE_KEY, or some other error code */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - que_thr_t* thr); /* in: query thread */ -/*************************************************************** -Inserts a row to a table. */ - -ulint -row_ins( -/*====*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - ins_node_t* node, /* in: row insert node */ - que_thr_t* thr); /* in: query thread */ -/*************************************************************** -Inserts a row to a table. This is a high-level function used in -SQL execution graphs. */ - -que_thr_t* -row_ins_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ - -/* Insert node structure */ - -struct ins_node_struct{ - que_common_t common; /* node type: QUE_NODE_INSERT */ - ulint ins_type;/* INS_VALUES, INS_SEARCHED, or INS_DIRECT */ - dtuple_t* row; /* row to insert */ - dict_table_t* table; /* table where to insert */ - sel_node_t* select; /* select in searched insert */ - que_node_t* values_list;/* list of expressions to evaluate and - insert in an INS_VALUES insert */ - ulint state; /* node execution state */ - dict_index_t* index; /* NULL, or the next index where the index - entry should be inserted */ - dtuple_t* entry; /* NULL, or entry to insert in the index; - after a successful insert of the entry, - this should be reset to NULL */ - UT_LIST_BASE_NODE_T(dtuple_t) - entry_list;/* list of entries, one for each index */ - byte* row_id_buf;/* buffer for the row id sys field in row */ - dulint trx_id; /* trx id or the last trx which executed the - node */ - byte* trx_id_buf;/* buffer for the trx id sys field in row */ - mem_heap_t* entry_sys_heap; - /* memory heap used as auxiliary storage; - entry_list and sys fields are stored here; - if this is NULL, entry list should be created - and buffers for sys fields in row allocated */ - ulint magic_n; -}; - -#define INS_NODE_MAGIC_N 15849075 - -/* Insert node types */ -#define INS_SEARCHED 0 /* INSERT INTO ... SELECT ... */ -#define INS_VALUES 1 /* INSERT INTO ... VALUES ... */ -#define INS_DIRECT 2 /* this is for internal use in dict0crea: - insert the row directly */ - -/* Node execution states */ -#define INS_NODE_SET_IX_LOCK 1 /* we should set an IX lock on table */ -#define INS_NODE_ALLOC_ROW_ID 2 /* row id should be allocated */ -#define INS_NODE_INSERT_ENTRIES 3 /* index entries should be built and - inserted */ - -#ifndef UNIV_NONINL -#include "row0ins.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0ins.ic b/storage/innobase/include/row0ins.ic deleted file mode 100644 index 80a232d41ee..00000000000 --- a/storage/innobase/include/row0ins.ic +++ /dev/null @@ -1,9 +0,0 @@ -/****************************************************** -Insert into a table - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - - diff --git a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h deleted file mode 100644 index 5430190fa51..00000000000 --- a/storage/innobase/include/row0mysql.h +++ /dev/null @@ -1,743 +0,0 @@ -/****************************************************** -Interface between Innobase row operations and MySQL. -Contains also create table and other data dictionary operations. - -(c) 2000 Innobase Oy - -Created 9/17/2000 Heikki Tuuri -*******************************************************/ - -#ifndef row0mysql_h -#define row0mysql_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" -#include "btr0pcur.h" -#include "trx0types.h" - -extern ibool row_rollback_on_timeout; - -typedef struct row_prebuilt_struct row_prebuilt_t; - -/*********************************************************************** -Frees the blob heap in prebuilt when no longer needed. */ - -void -row_mysql_prebuilt_free_blob_heap( -/*==============================*/ - row_prebuilt_t* prebuilt); /* in: prebuilt struct of a - ha_innobase:: table handle */ -/*********************************************************************** -Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row -format. */ - -byte* -row_mysql_store_true_var_len( -/*=========================*/ - /* out: pointer to the data, we skip the 1 or 2 bytes - at the start that are used to store the len */ - byte* dest, /* in: where to store */ - ulint len, /* in: length, must fit in two bytes */ - ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */ -/*********************************************************************** -Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and -returns a pointer to the data. */ - -byte* -row_mysql_read_true_varchar( -/*========================*/ - /* out: pointer to the data, we skip the 1 or 2 bytes - at the start that are used to store the len */ - ulint* len, /* out: variable-length field length */ - byte* field, /* in: field in the MySQL format */ - ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */ -/*********************************************************************** -Stores a reference to a BLOB in the MySQL format. */ - -void -row_mysql_store_blob_ref( -/*=====================*/ - byte* dest, /* in: where to store */ - ulint col_len, /* in: dest buffer size: determines into - how many bytes the BLOB length is stored, - this may vary from 1 to 4 bytes */ - byte* data, /* in: BLOB data */ - ulint len); /* in: BLOB length */ -/*********************************************************************** -Reads a reference to a BLOB in the MySQL format. */ - -byte* -row_mysql_read_blob_ref( -/*====================*/ - /* out: pointer to BLOB data */ - ulint* len, /* out: BLOB length */ - byte* ref, /* in: BLOB reference in the MySQL format */ - ulint col_len); /* in: BLOB reference length (not BLOB - length) */ -/****************************************************************** -Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. -The counterpart of this function is row_sel_field_store_in_mysql_format() in -row0sel.c. */ - -byte* -row_mysql_store_col_in_innobase_format( -/*===================================*/ - /* out: up to which byte we used - buf in the conversion */ - dfield_t* dfield, /* in/out: dfield where dtype - information must be already set when - this function is called! */ - byte* buf, /* in/out: buffer for a converted - integer value; this must be at least - col_len long then! */ - ibool row_format_col, /* TRUE if the mysql_data is from - a MySQL row, FALSE if from a MySQL - key value; - in MySQL, a true VARCHAR storage - format differs in a row and in a - key value: in a key value the length - is always stored in 2 bytes! */ - byte* mysql_data, /* in: MySQL column value, not - SQL NULL; NOTE that dfield may also - get a pointer to mysql_data, - therefore do not discard this as long - as dfield is used! */ - ulint col_len, /* in: MySQL column length; NOTE that - this is the storage length of the - column in the MySQL format row, not - necessarily the length of the actual - payload data; if the column is a true - VARCHAR then this is irrelevant */ - ulint comp); /* in: nonzero=compact format */ -/******************************************************************** -Handles user errors and lock waits detected by the database engine. */ - -ibool -row_mysql_handle_errors( -/*====================*/ - /* out: TRUE if it was a lock wait and - we should continue running the query thread */ - ulint* new_err,/* out: possible new error encountered in - rollback, or the old error which was - during the function entry */ - trx_t* trx, /* in: transaction */ - que_thr_t* thr, /* in: query thread */ - trx_savept_t* savept);/* in: savepoint */ -/************************************************************************ -Create a prebuilt struct for a MySQL table handle. */ - -row_prebuilt_t* -row_create_prebuilt( -/*================*/ - /* out, own: a prebuilt struct */ - dict_table_t* table); /* in: Innobase table handle */ -/************************************************************************ -Free a prebuilt struct for a MySQL table handle. */ - -void -row_prebuilt_free( -/*==============*/ - row_prebuilt_t* prebuilt); /* in, own: prebuilt struct */ -/************************************************************************* -Updates the transaction pointers in query graphs stored in the prebuilt -struct. */ - -void -row_update_prebuilt_trx( -/*====================*/ - /* out: prebuilt dtuple */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL - handle */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Unlocks an AUTO_INC type lock possibly reserved by trx. */ - -void -row_unlock_table_autoinc_for_mysql( -/*===============================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Sets an AUTO_INC type lock on the table mentioned in prebuilt. The -AUTO_INC lock gives exclusive access to the auto-inc counter of the -table. The lock is reserved only for the duration of an SQL statement. -It is not compatible with another AUTO_INC or exclusive lock on the -table. */ - -int -row_lock_table_autoinc_for_mysql( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in the MySQL - table handle */ -/************************************************************************* -Sets a table lock on the table mentioned in prebuilt. */ - -int -row_lock_table_for_mysql( -/*=====================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL - table handle */ - dict_table_t* table, /* in: table to lock, or NULL - if prebuilt->table should be - locked as - prebuilt->select_lock_type */ - ulint mode); /* in: lock mode of table - (ignored if table==NULL) */ - -/************************************************************************* -Does an insert for MySQL. */ - -int -row_insert_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - byte* mysql_rec, /* in: row in the MySQL format */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL - handle */ -/************************************************************************* -Builds a dummy query graph used in selects. */ - -void -row_prebuild_sel_graph( -/*===================*/ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL - handle */ -/************************************************************************* -Gets pointer to a prebuilt update vector used in updates. If the update -graph has not yet been built in the prebuilt struct, then this function -first builds it. */ - -upd_t* -row_get_prebuilt_update_vector( -/*===========================*/ - /* out: prebuilt update vector */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL - handle */ -/************************************************************************* -Checks if a table is such that we automatically created a clustered -index on it (on row id). */ - -ibool -row_table_got_default_clust_index( -/*==============================*/ - dict_table_t* table); -/************************************************************************* -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table */ - -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - dict_index_t* index); -/************************************************************************* -Does an update or delete of a row for MySQL. */ - -int -row_update_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - byte* mysql_rec, /* in: the row to be updated, in - the MySQL format */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL - handle */ -/************************************************************************* -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. */ - -int -row_unlock_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL - handle */ - ibool has_latches_on_recs);/* TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ -/************************************************************************* -Creates an query graph node of 'update' type to be used in the MySQL -interface. */ - -upd_node_t* -row_create_update_node_for_mysql( -/*=============================*/ - /* out, own: update node */ - dict_table_t* table, /* in: table to update */ - mem_heap_t* heap); /* in: mem heap from which allocated */ -/************************************************************************** -Does a cascaded delete or set null in a foreign key operation. */ - -ulint -row_update_cascade_for_mysql( -/*=========================*/ - /* out: error code or DB_SUCCESS */ - que_thr_t* thr, /* in: query thread */ - upd_node_t* node, /* in: update node used in the cascade - or set null operation */ - dict_table_t* table); /* in: table where we do the operation */ -/************************************************************************* -Locks the data dictionary exclusively for performing a table create or other -data dictionary modification operation. */ - -void -row_mysql_lock_data_dictionary( -/*===========================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Unlocks the data dictionary exclusive lock. */ - -void -row_mysql_unlock_data_dictionary( -/*=============================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Locks the data dictionary in shared mode from modifications, for performing -foreign key check, rollback, or other operation invisible to MySQL. */ - -void -row_mysql_freeze_data_dictionary( -/*=============================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Unlocks the data dictionary shared lock. */ - -void -row_mysql_unfreeze_data_dictionary( -/*===============================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Creates a table for MySQL. If the name of the table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also start the printing of monitor -output by the master thread. If the table name ends in "innodb_mem_validate", -InnoDB will try to invoke mem_validate(). */ - -int -row_create_table_for_mysql( -/*=======================*/ - /* out: error code or DB_SUCCESS */ - dict_table_t* table, /* in: table definition */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Does an index creation operation for MySQL. TODO: currently failure -to create an index results in dropping the whole table! This is no problem -currently as all indexes must be created at the same time as the table. */ - -int -row_create_index_for_mysql( -/*=======================*/ - /* out: error number or DB_SUCCESS */ - dict_index_t* index, /* in: index definition */ - trx_t* trx, /* in: transaction handle */ - const ulint* field_lengths); /* in: if not NULL, must contain - dict_index_get_n_fields(index) - actual field lengths for the - index columns, which are - then checked for not being too - large. */ -/************************************************************************* -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -bot participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. */ - -int -row_table_add_foreign_constraints( -/*==============================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction */ - const char* sql_string, /* in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the - database name before it: test.table2 */ - const char* name, /* in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks); /* in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ - -/************************************************************************* -The master thread in srv0srv.c calls this regularly to drop tables which -we must drop in background after queries to them have ended. Such lazy -dropping of tables is needed in ALTER TABLE on Unix. */ - -ulint -row_drop_tables_for_mysql_in_background(void); -/*=========================================*/ - /* out: how many tables dropped - + remaining tables in list */ -/************************************************************************* -Get the background drop list length. NOTE: the caller must own the kernel -mutex! */ - -ulint -row_get_background_drop_list_len_low(void); -/*======================================*/ - /* out: how many tables in list */ -/************************************************************************* -Truncates a table for MySQL. */ - -int -row_truncate_table_for_mysql( -/*=========================*/ - /* out: error code or DB_SUCCESS */ - dict_table_t* table, /* in: table handle */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Drops a table for MySQL. If the name of the dropped table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also stop the printing of monitor -output by the master thread. */ - -int -row_drop_table_for_mysql( -/*=====================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx, /* in: transaction handle */ - ibool drop_db);/* in: TRUE=dropping whole database */ - -/************************************************************************* -Discards the tablespace of a table which stored in an .ibd file. Discarding -means that this function deletes the .ibd file and assigns a new table id for -the table. Also the flag table->ibd_file_missing is set TRUE. */ - -int -row_discard_tablespace_for_mysql( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx); /* in: transaction handle */ -/********************************************************************* -Imports a tablespace. The space id in the .ibd file must match the space id -of the table in the data dictionary. */ - -int -row_import_tablespace_for_mysql( -/*============================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Drops a database for MySQL. */ - -int -row_drop_database_for_mysql( -/*========================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: database name which ends to '/' */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Renames a table for MySQL. */ - -int -row_rename_table_for_mysql( -/*=======================*/ - /* out: error code or DB_SUCCESS */ - const char* old_name, /* in: old table name */ - const char* new_name, /* in: new table name */ - trx_t* trx); /* in: transaction handle */ -/************************************************************************* -Checks a table for corruption. */ - -ulint -row_check_table_for_mysql( -/*======================*/ - /* out: DB_ERROR or DB_SUCCESS */ - row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL - handle */ - -/************************************************************************* -Determines if a table is a magic monitor table. */ - -ibool -row_is_magic_monitor_table( -/*=======================*/ - /* out: TRUE if monitor table */ - const char* table_name); /* in: name of the table, in the - form database/table_name */ - -/* A struct describing a place for an individual column in the MySQL -row format which is presented to the table handler in ha_innobase. -This template struct is used to speed up row transformations between -Innobase and MySQL. */ - -typedef struct mysql_row_templ_struct mysql_row_templ_t; -struct mysql_row_templ_struct { - ulint col_no; /* column number of the column */ - ulint rec_field_no; /* field number of the column in an - Innobase record in the current index; - not defined if template_type is - ROW_MYSQL_WHOLE_ROW */ - ulint mysql_col_offset; /* offset of the column in the MySQL - row format */ - ulint mysql_col_len; /* length of the column in the MySQL - row format */ - ulint mysql_null_byte_offset; /* MySQL NULL bit byte offset in a - MySQL record */ - ulint mysql_null_bit_mask; /* bit mask to get the NULL bit, - zero if column cannot be NULL */ - ulint type; /* column type in Innobase mtype - numbers DATA_CHAR... */ - ulint mysql_type; /* MySQL type code; this is always - < 256 */ - ulint mysql_length_bytes; /* if mysql_type - == DATA_MYSQL_TRUE_VARCHAR, this tells - whether we should use 1 or 2 bytes to - store the MySQL true VARCHAR data - length at the start of row in the MySQL - format (NOTE that the MySQL key value - format always uses 2 bytes for the data - len) */ - ulint charset; /* MySQL charset-collation code - of the column, or zero */ - ulint mbminlen; /* minimum length of a char, in bytes, - or zero if not a char type */ - ulint mbmaxlen; /* maximum length of a char, in bytes, - or zero if not a char type */ - ulint is_unsigned; /* if a column type is an integer - type and this field is != 0, then - it is an unsigned integer type */ -}; - -#define MYSQL_FETCH_CACHE_SIZE 8 -/* After fetching this many rows, we start caching them in fetch_cache */ -#define MYSQL_FETCH_CACHE_THRESHOLD 4 - -#define ROW_PREBUILT_ALLOCATED 78540783 -#define ROW_PREBUILT_FREED 26423527 - -/* A struct for (sometimes lazily) prebuilt structures in an Innobase table -handle used within MySQL; these are used to save CPU time. */ - -struct row_prebuilt_struct { - ulint magic_n; /* this magic number is set to - ROW_PREBUILT_ALLOCATED when created - and to ROW_PREBUILT_FREED when the - struct has been freed; used in - debugging */ - dict_table_t* table; /* Innobase table handle */ - trx_t* trx; /* current transaction handle */ - ibool sql_stat_start; /* TRUE when we start processing of - an SQL statement: we may have to set - an intention lock on the table, - create a consistent read view etc. */ - ibool mysql_has_locked; /* this is set TRUE when MySQL - calls external_lock on this handle - with a lock flag, and set FALSE when - with the F_UNLOCK flag */ - ibool clust_index_was_generated; - /* if the user did not define a - primary key in MySQL, then Innobase - automatically generated a clustered - index where the ordering column is - the row id: in this case this flag - is set to TRUE */ - dict_index_t* index; /* current index for a search, if - any */ - ulint read_just_key; /* set to 1 when MySQL calls - ha_innobase::extra with the - argument HA_EXTRA_KEYREAD; it is enough - to read just columns defined in - the index (i.e., no read of the - clustered index record necessary) */ - ibool used_in_HANDLER;/* TRUE if we have been using this - handle in a MySQL HANDLER low level - index cursor command: then we must - store the pcur position even in a - unique search from a clustered index, - because HANDLER allows NEXT and PREV - in such a situation */ - ulint template_type; /* ROW_MYSQL_WHOLE_ROW, - ROW_MYSQL_REC_FIELDS, - ROW_MYSQL_DUMMY_TEMPLATE, or - ROW_MYSQL_NO_TEMPLATE */ - ulint n_template; /* number of elements in the - template */ - ulint null_bitmap_len;/* number of bytes in the SQL NULL - bitmap at the start of a row in the - MySQL format */ - ibool need_to_access_clustered; /* if we are fetching - columns through a secondary index - and at least one column is not in - the secondary index, then this is - set to TRUE */ - ibool templ_contains_blob;/* TRUE if the template contains - BLOB column(s) */ - mysql_row_templ_t* mysql_template;/* template used to transform - rows fast between MySQL and Innobase - formats; memory for this template - is not allocated from 'heap' */ - mem_heap_t* heap; /* memory heap from which - these auxiliary structures are - allocated when needed */ - ins_node_t* ins_node; /* Innobase SQL insert node - used to perform inserts - to the table */ - byte* ins_upd_rec_buff;/* buffer for storing data converted - to the Innobase format from the MySQL - format */ - const byte* default_rec; /* the default values of all columns - (a "default row") in MySQL format */ - ulint hint_need_to_fetch_extra_cols; - /* normally this is set to 0; if this - is set to ROW_RETRIEVE_PRIMARY_KEY, - then we should at least retrieve all - columns in the primary key; if this - is set to ROW_RETRIEVE_ALL_COLS, then - we must retrieve all columns in the - key (if read_just_key == 1), or all - columns in the table */ - upd_node_t* upd_node; /* Innobase SQL update node used - to perform updates and deletes */ - que_fork_t* ins_graph; /* Innobase SQL query graph used - in inserts */ - que_fork_t* upd_graph; /* Innobase SQL query graph used - in updates or deletes */ - btr_pcur_t* pcur; /* persistent cursor used in selects - and updates */ - btr_pcur_t* clust_pcur; /* persistent cursor used in - some selects and updates */ - que_fork_t* sel_graph; /* dummy query graph used in - selects */ - dtuple_t* search_tuple; /* prebuilt dtuple used in selects */ - byte row_id[DATA_ROW_ID_LEN]; - /* if the clustered index was - generated, the row id of the - last row fetched is stored - here */ - dtuple_t* clust_ref; /* prebuilt dtuple used in - sel/upd/del */ - ulint select_lock_type;/* LOCK_NONE, LOCK_S, or LOCK_X */ - ulint stored_select_lock_type;/* this field is used to - remember the original select_lock_type - that was decided in ha_innodb.cc, - ::store_lock(), ::external_lock(), - etc. */ - ulint row_read_type; /* ROW_READ_WITH_LOCKS if row locks - should be the obtained for records - under an UPDATE or DELETE cursor. - If innodb_locks_unsafe_for_binlog - is TRUE, this can be set to - ROW_READ_TRY_SEMI_CONSISTENT, so that - if the row under an UPDATE or DELETE - cursor was locked by another - transaction, InnoDB will resort - to reading the last committed value - ('semi-consistent read'). Then, - this field will be set to - ROW_READ_DID_SEMI_CONSISTENT to - indicate that. If the row does not - match the WHERE condition, MySQL will - invoke handler::unlock_row() to - clear the flag back to - ROW_READ_TRY_SEMI_CONSISTENT and - to simply skip the row. If - the row matches, the next call to - row_search_for_mysql() will lock - the row. - This eliminates lock waits in some - cases; note that this breaks - serializability. */ - ulint new_rec_locks; /* normally 0; if - srv_locks_unsafe_for_binlog is - TRUE or session is using READ - COMMITTED isolation level, in a - cursor search, if we set a new - record lock on an index, this is - incremented; this is used in - releasing the locks under the - cursors if we are performing an - UPDATE and we determine after - retrieving the row that it does - not need to be locked; thus, - these can be used to implement a - 'mini-rollback' that releases - the latest record locks */ - ulint mysql_prefix_len;/* byte offset of the end of - the last requested column */ - ulint mysql_row_len; /* length in bytes of a row in the - MySQL format */ - ulint n_rows_fetched; /* number of rows fetched after - positioning the current cursor */ - ulint fetch_direction;/* ROW_SEL_NEXT or ROW_SEL_PREV */ - byte* fetch_cache[MYSQL_FETCH_CACHE_SIZE]; - /* a cache for fetched rows if we - fetch many rows from the same cursor: - it saves CPU time to fetch them in a - batch; we reserve mysql_row_len - bytes for each such row; these - pointers point 4 bytes past the - allocated mem buf start, because - there is a 4 byte magic number at the - start and at the end */ - ibool keep_other_fields_on_keyread; /* when using fetch - cache with HA_EXTRA_KEYREAD, don't - overwrite other fields in mysql row - row buffer.*/ - ulint fetch_cache_first;/* position of the first not yet - fetched row in fetch_cache */ - ulint n_fetch_cached; /* number of not yet fetched rows - in fetch_cache */ - mem_heap_t* blob_heap; /* in SELECTS BLOB fields are copied - to this heap */ - mem_heap_t* old_vers_heap; /* memory heap where a previous - version is built in consistent read */ - /*----------------------*/ - ulonglong autoinc_last_value;/* last value of AUTO-INC interval */ - ulonglong autoinc_increment;/* The increment step of the auto - increment column. Value must be - greater than or equal to 1. Required to - calculate the next value */ - ulonglong autoinc_offset; /* The offset passed to - get_auto_increment() by MySQL. Required - to calculate the next value */ - ulint autoinc_error; /* The actual error code encountered - while trying to init or read the - autoinc value from the table. We - store it here so that we can return - it to MySQL */ - /*----------------------*/ - ulint magic_n2; /* this should be the same as - magic_n */ -}; - -#define ROW_PREBUILT_FETCH_MAGIC_N 465765687 - -#define ROW_MYSQL_WHOLE_ROW 0 -#define ROW_MYSQL_REC_FIELDS 1 -#define ROW_MYSQL_NO_TEMPLATE 2 -#define ROW_MYSQL_DUMMY_TEMPLATE 3 /* dummy template used in - row_scan_and_check_index */ - -/* Values for hint_need_to_fetch_extra_cols */ -#define ROW_RETRIEVE_PRIMARY_KEY 1 -#define ROW_RETRIEVE_ALL_COLS 2 - -/* Values for row_read_type */ -#define ROW_READ_WITH_LOCKS 0 -#define ROW_READ_TRY_SEMI_CONSISTENT 1 -#define ROW_READ_DID_SEMI_CONSISTENT 2 - -#ifndef UNIV_NONINL -#include "row0mysql.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0mysql.ic b/storage/innobase/include/row0mysql.ic deleted file mode 100644 index aa8a70d8761..00000000000 --- a/storage/innobase/include/row0mysql.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -MySQL interface for Innobase - -(C) 2001 Innobase Oy - -Created 1/23/2001 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/row0purge.h b/storage/innobase/include/row0purge.h deleted file mode 100644 index 174dd239eb5..00000000000 --- a/storage/innobase/include/row0purge.h +++ /dev/null @@ -1,79 +0,0 @@ -/****************************************************** -Purge obsolete records - -(c) 1997 Innobase Oy - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0purge_h -#define row0purge_h - -#include "univ.i" -#include "data0data.h" -#include "btr0types.h" -#include "btr0pcur.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" - -/************************************************************************ -Creates a purge node to a query graph. */ - -purge_node_t* -row_purge_node_create( -/*==================*/ - /* out, own: purge node */ - que_thr_t* parent, /* in: parent node, i.e., a thr node */ - mem_heap_t* heap); /* in: memory heap where created */ -/*************************************************************** -Does the purge operation for a single undo log record. This is a high-level -function used in an SQL execution graph. */ - -que_thr_t* -row_purge_step( -/*===========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ - -/* Purge node structure */ - -struct purge_node_struct{ - que_common_t common; /* node type: QUE_NODE_PURGE */ - /*----------------------*/ - /* Local storage for this graph node */ - dulint roll_ptr;/* roll pointer to undo log record */ - trx_undo_rec_t* undo_rec;/* undo log record */ - trx_undo_inf_t* reservation;/* reservation for the undo log record in - the purge array */ - dulint undo_no;/* undo number of the record */ - ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, - ... */ - btr_pcur_t pcur; /* persistent cursor used in searching the - clustered index record */ - ibool found_clust;/* TRUE if the clustered index record - determined by ref was found in the clustered - index, and we were able to position pcur on - it */ - dict_table_t* table; /* table where purge is done */ - ulint cmpl_info;/* compiler analysis info of an update */ - upd_t* update; /* update vector for a clustered index - record */ - dtuple_t* ref; /* NULL, or row reference to the next row to - handle */ - dtuple_t* row; /* NULL, or a copy (also fields copied to - heap) of the indexed fields of the row to - handle */ - dict_index_t* index; /* NULL, or the next index whose record should - be handled */ - mem_heap_t* heap; /* memory heap used as auxiliary storage for - row; this must be emptied after a successful - purge of a row */ -}; - -#ifndef UNIV_NONINL -#include "row0purge.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0purge.ic b/storage/innobase/include/row0purge.ic deleted file mode 100644 index 50aabf0bc1b..00000000000 --- a/storage/innobase/include/row0purge.ic +++ /dev/null @@ -1,8 +0,0 @@ - -/****************************************************** -Purge obsolete records - -(c) 1997 Innobase Oy - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/row0row.h b/storage/innobase/include/row0row.h deleted file mode 100644 index bea7627cd86..00000000000 --- a/storage/innobase/include/row0row.h +++ /dev/null @@ -1,250 +0,0 @@ -/****************************************************** -General row routines - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0row_h -#define row0row_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "mtr0mtr.h" -#include "rem0types.h" -#include "read0types.h" -#include "btr0types.h" - -/************************************************************************* -Reads the trx id field from a clustered index record. */ -UNIV_INLINE -dulint -row_get_rec_trx_id( -/*===============*/ - /* out: value of the field */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/************************************************************************* -Reads the roll pointer field from a clustered index record. */ -UNIV_INLINE -dulint -row_get_rec_roll_ptr( -/*=================*/ - /* out: value of the field */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/************************************************************************* -Writes the trx id field to a clustered index record. */ -UNIV_INLINE -void -row_set_rec_trx_id( -/*===============*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint trx_id);/* in: value of the field */ -/************************************************************************* -Sets the roll pointer field in a clustered index record. */ -UNIV_INLINE -void -row_set_rec_roll_ptr( -/*=================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint roll_ptr);/* in: value of the field */ -/********************************************************************* -When an insert to a table is performed, this function builds the entry which -has to be inserted to an index on the table. */ - -dtuple_t* -row_build_index_entry( -/*==================*/ - /* out: index entry which should be inserted */ - dtuple_t* row, /* in: row which should be inserted to the - table */ - dict_index_t* index, /* in: index on the table */ - mem_heap_t* heap); /* in: memory heap from which the memory for - the index entry is allocated */ -/*********************************************************************** -An inverse function to dict_row_build_index_entry. Builds a row from a -record in a clustered index. */ - -dtuple_t* -row_build( -/*======*/ - /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA; - the latter copies also the data fields to - heap while the first only places pointers to - data fields on the index page, and thus is - more efficient */ - dict_index_t* index, /* in: clustered index */ - rec_t* rec, /* in: record in the clustered index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row dtuple is used! */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) - or NULL, in which case this function - will invoke rec_get_offsets() */ - mem_heap_t* heap); /* in: memory heap from which the memory - needed is allocated */ -/*********************************************************************** -Converts an index record to a typed data tuple. */ - -dtuple_t* -row_rec_to_index_entry( -/*===================*/ - /* out, own: index entry built; see the - NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to - data fields on the index page */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the dtuple is used! */ - mem_heap_t* heap); /* in: memory heap from which the memory - needed is allocated */ -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ - -dtuple_t* -row_build_row_ref( -/*==============*/ - /* out, own: row reference built; see the - NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap, whereas the latter only places pointers - to data fields on the index page */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - mem_heap_t* heap); /* in: memory heap from which the memory - needed is allocated */ -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ - -void -row_build_row_ref_in_tuple( -/*=======================*/ - dtuple_t* ref, /* in/out: row reference built; see the - NOTE below! */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: the data fields in ref will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - trx_t* trx); /* in: transaction */ -/*********************************************************************** -From a row build a row reference with which we can search the clustered -index record. */ - -void -row_build_row_ref_from_row( -/*=======================*/ - dtuple_t* ref, /* in/out: row reference built; see the - NOTE below! ref must have the right number - of fields! */ - dict_table_t* table, /* in: table */ - dtuple_t* row); /* in: row - NOTE: the data fields in ref will point - directly into data of this row */ -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INLINE -void -row_build_row_ref_fast( -/*===================*/ - dtuple_t* ref, /* in: typed data tuple where the - reference is built */ - const ulint* map, /* in: array of field numbers in rec - telling how ref should be built from - the fields of rec */ - rec_t* rec, /* in: record in the index; must be - preserved while ref is used, as we do - not copy field values to heap */ - const ulint* offsets);/* in: array returned by rec_get_offsets() */ -/******************************************************************* -Searches the clustered index record for a row, if we have the row -reference. */ - -ibool -row_search_on_row_ref( -/*==================*/ - /* out: TRUE if found */ - btr_pcur_t* pcur, /* in/out: persistent cursor, which must - be closed by the caller */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - dict_table_t* table, /* in: table */ - dtuple_t* ref, /* in: row reference */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************* -Fetches the clustered index record for a secondary index record. The latches -on the secondary index record are preserved. */ - -rec_t* -row_get_clust_rec( -/*==============*/ - /* out: record or NULL, if no record found */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index, /* in: secondary index */ - dict_index_t** clust_index,/* out: clustered index */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************* -Searches an index record. */ - -ibool -row_search_index_entry( -/*===================*/ - /* out: TRUE if found */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - btr_pcur_t* pcur, /* in/out: persistent cursor, which must - be closed by the caller */ - mtr_t* mtr); /* in: mtr */ - - -#define ROW_COPY_DATA 1 -#define ROW_COPY_POINTERS 2 - -/* The allowed latching order of index records is the following: -(1) a secondary index record -> -(2) the clustered index record -> -(3) rollback segment data for the clustered index record. - -No new latches may be obtained while the kernel mutex is reserved. -However, the kernel mutex can be reserved while latches are owned. */ - -#ifndef UNIV_NONINL -#include "row0row.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0row.ic b/storage/innobase/include/row0row.ic deleted file mode 100644 index de417f3d971..00000000000 --- a/storage/innobase/include/row0row.ic +++ /dev/null @@ -1,182 +0,0 @@ -/****************************************************** -General row routines - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "dict0dict.h" -#include "rem0rec.h" -#include "trx0undo.h" - -/************************************************************************* -Reads the trx id or roll ptr field from a clustered index record: this function -is slower than the specialized inline functions. */ - -dulint -row_get_rec_sys_field( -/*==================*/ - /* out: value of the field */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/************************************************************************* -Sets the trx id or roll ptr field in a clustered index record: this function -is slower than the specialized inline functions. */ - -void -row_set_rec_sys_field( -/*==================*/ - /* out: value of the field */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint val); /* in: value to set */ - -/************************************************************************* -Reads the trx id field from a clustered index record. */ -UNIV_INLINE -dulint -row_get_rec_trx_id( -/*===============*/ - /* out: value of the field */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - ulint offset; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (offset) { - return(trx_read_trx_id(rec + offset)); - } else { - return(row_get_rec_sys_field(DATA_TRX_ID, - rec, index, offsets)); - } -} - -/************************************************************************* -Reads the roll pointer field from a clustered index record. */ -UNIV_INLINE -dulint -row_get_rec_roll_ptr( -/*=================*/ - /* out: value of the field */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - ulint offset; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (offset) { - return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); - } else { - return(row_get_rec_sys_field(DATA_ROLL_PTR, - rec, index, offsets)); - } -} - -/************************************************************************* -Writes the trx id field to a clustered index record. */ -UNIV_INLINE -void -row_set_rec_trx_id( -/*===============*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint trx_id) /* in: value of the field */ -{ - ulint offset; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (offset) { - trx_write_trx_id(rec + offset, trx_id); - } else { - row_set_rec_sys_field(DATA_TRX_ID, - rec, index, offsets, trx_id); - } -} - -/************************************************************************* -Sets the roll pointer field in a clustered index record. */ -UNIV_INLINE -void -row_set_rec_roll_ptr( -/*=================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint roll_ptr)/* in: value of the field */ -{ - ulint offset; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - - offset = index->trx_id_offset; - - if (offset) { - trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); - } else { - row_set_rec_sys_field(DATA_ROLL_PTR, - rec, index, offsets, roll_ptr); - } -} - -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ -UNIV_INLINE -void -row_build_row_ref_fast( -/*===================*/ - dtuple_t* ref, /* in: typed data tuple where the - reference is built */ - const ulint* map, /* in: array of field numbers in rec - telling how ref should be built from - the fields of rec */ - rec_t* rec, /* in: record in the index; must be - preserved while ref is used, as we do - not copy field values to heap */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - dfield_t* dfield; - byte* field; - ulint len; - ulint ref_len; - ulint field_no; - ulint i; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ref_len = dtuple_get_n_fields(ref); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - field_no = *(map + i); - - if (field_no != ULINT_UNDEFINED) { - - field = rec_get_nth_field(rec, offsets, - field_no, &len); - dfield_set_data(dfield, field, len); - } - } -} diff --git a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h deleted file mode 100644 index a0a4ccb973b..00000000000 --- a/storage/innobase/include/row0sel.h +++ /dev/null @@ -1,392 +0,0 @@ -/****************************************************** -Select - -(c) 1997 Innobase Oy - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0sel_h -#define row0sel_h - -#include "univ.i" -#include "data0data.h" -#include "que0types.h" -#include "dict0types.h" -#include "trx0types.h" -#include "row0types.h" -#include "que0types.h" -#include "pars0sym.h" -#include "btr0pcur.h" -#include "read0read.h" -#include "row0mysql.h" - -/************************************************************************* -Creates a select node struct. */ - -sel_node_t* -sel_node_create( -/*============*/ - /* out, own: select node struct */ - mem_heap_t* heap); /* in: memory heap where created */ -/************************************************************************* -Frees the memory private to a select node when a query graph is freed, -does not free the heap where the node was originally created. */ - -void -sel_node_free_private( -/*==================*/ - sel_node_t* node); /* in: select node struct */ -/************************************************************************* -Frees a prefetch buffer for a column, including the dynamically allocated -memory for data stored there. */ - -void -sel_col_prefetch_buf_free( -/*======================*/ - sel_buf_t* prefetch_buf); /* in, own: prefetch buffer */ -/************************************************************************* -Gets the plan node for the nth table in a join. */ -UNIV_INLINE -plan_t* -sel_node_get_nth_plan( -/*==================*/ - sel_node_t* node, - ulint i); -/************************************************************************** -Performs a select step. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -row_sel_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs an execution step of an open or close cursor statement node. */ -UNIV_INLINE -que_thr_t* -open_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Performs a fetch for a cursor. */ - -que_thr_t* -fetch_step( -/*=======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/******************************************************************** -Sample callback function for fetch that prints each row.*/ - -void* -row_fetch_print( -/*============*/ - /* out: always returns non-NULL */ - void* row, /* in: sel_node_t* */ - void* user_arg); /* in: not used */ -/******************************************************************** -Callback function for fetch that stores an unsigned 4 byte integer to the -location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length -= 4. */ - -void* -row_fetch_store_uint4( -/*==================*/ - /* out: always returns NULL */ - void* row, /* in: sel_node_t* */ - void* user_arg); /* in: data pointer */ -/*************************************************************** -Prints a row in a select result. */ - -que_thr_t* -row_printf_step( -/*============*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. The last -field of the key value may be just a prefix of a fixed length field: hence -the parameter key_len. But currently we do not allow search keys where the -last field is only a prefix of the full key field len and print a warning if -such appears. */ - -void -row_sel_convert_mysql_key_to_innobase( -/*==================================*/ - dtuple_t* tuple, /* in: tuple where to build; - NOTE: we assume that the type info - in the tuple is already according - to index! */ - byte* buf, /* in: buffer to use in field - conversions */ - ulint buf_len, /* in: buffer length */ - dict_index_t* index, /* in: index of the key value */ - byte* key_ptr, /* in: MySQL key value */ - ulint key_len, /* in: MySQL key value length */ - trx_t* trx); /* in: transaction */ -/************************************************************************ -Searches for rows in the database. This is used in the interface to -MySQL. This function opens a cursor, and also implements fetch next -and fetch prev. NOTE that if we do a search with a full key value -from a unique index (ROW_SEL_EXACT), then we will not store the cursor -position and fetch next or fetch prev must not be tried to the cursor! */ - -ulint -row_search_for_mysql( -/*=================*/ - /* out: DB_SUCCESS, - DB_RECORD_NOT_FOUND, - DB_END_OF_INDEX, DB_DEADLOCK, - DB_LOCK_TABLE_FULL, - or DB_TOO_BIG_RECORD */ - byte* buf, /* in/out: buffer for the fetched - row in the MySQL format */ - ulint mode, /* in: search mode PAGE_CUR_L, ... */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct for the - table handle; this contains the info - of search_tuple, index; if search - tuple contains 0 fields then we - position the cursor at the start or - the end of the index, depending on - 'mode' */ - ulint match_mode, /* in: 0 or ROW_SEL_EXACT or - ROW_SEL_EXACT_PREFIX */ - ulint direction); /* in: 0 or ROW_SEL_NEXT or - ROW_SEL_PREV; NOTE: if this is != 0, - then prebuilt must have a pcur - with stored position! In opening of a - cursor 'direction' should be 0. */ -/*********************************************************************** -Checks if MySQL at the moment is allowed for this table to retrieve a -consistent read result, or store it to the query cache. */ - -ibool -row_search_check_if_query_cache_permitted( -/*======================================*/ - /* out: TRUE if storing or retrieving - from the query cache is permitted */ - trx_t* trx, /* in: transaction object */ - const char* norm_name); /* in: concatenation of database name, - '/' char, table name */ -/*********************************************************************** -Read the max AUTOINC value from an index. */ - -ulint -row_search_max_autoinc( -/*===================*/ - /* out: DB_SUCCESS if all OK else - error code */ - dict_index_t* index, /* in: index to search */ - const char* col_name, /* in: autoinc column name */ - ib_ulonglong* value); /* out: AUTOINC value read */ - -/* A structure for caching column values for prefetched rows */ -struct sel_buf_struct{ - byte* data; /* data, or NULL; if not NULL, this field - has allocated memory which must be explicitly - freed; can be != NULL even when len is - UNIV_SQL_NULL */ - ulint len; /* data length or UNIV_SQL_NULL */ - ulint val_buf_size; - /* size of memory buffer allocated for data: - this can be more than len; this is defined - when data != NULL */ -}; - -struct plan_struct{ - dict_table_t* table; /* table struct in the dictionary - cache */ - dict_index_t* index; /* table index used in the search */ - btr_pcur_t pcur; /* persistent cursor used to search - the index */ - ibool asc; /* TRUE if cursor traveling upwards */ - ibool pcur_is_open; /* TRUE if pcur has been positioned - and we can try to fetch new rows */ - ibool cursor_at_end; /* TRUE if the cursor is open but - we know that there are no more - qualifying rows left to retrieve from - the index tree; NOTE though, that - there may still be unprocessed rows in - the prefetch stack; always FALSE when - pcur_is_open is FALSE */ - ibool stored_cursor_rec_processed; - /* TRUE if the pcur position has been - stored and the record it is positioned - on has already been processed */ - que_node_t** tuple_exps; /* array of expressions which are used - to calculate the field values in the - search tuple: there is one expression - for each field in the search tuple */ - dtuple_t* tuple; /* search tuple */ - ulint mode; /* search mode: PAGE_CUR_G, ... */ - ulint n_exact_match; /* number of first fields in the search - tuple which must be exactly matched */ - ibool unique_search; /* TRUE if we are searching an - index record with a unique key */ - ulint n_rows_fetched; /* number of rows fetched using pcur - after it was opened */ - ulint n_rows_prefetched;/* number of prefetched rows cached - for fetch: fetching several rows in - the same mtr saves CPU time */ - ulint first_prefetched;/* index of the first cached row in - select buffer arrays for each column */ - ibool no_prefetch; /* no prefetch for this table */ - sym_node_list_t columns; /* symbol table nodes for the columns - to retrieve from the table */ - UT_LIST_BASE_NODE_T(func_node_t) - end_conds; /* conditions which determine the - fetch limit of the index segment we - have to look at: when one of these - fails, the result set has been - exhausted for the cursor in this - index; these conditions are normalized - so that in a comparison the column - for this table is the first argument */ - UT_LIST_BASE_NODE_T(func_node_t) - other_conds; /* the rest of search conditions we can - test at this table in a join */ - ibool must_get_clust; /* TRUE if index is a non-clustered - index and we must also fetch the - clustered index record; this is the - case if the non-clustered record does - not contain all the needed columns, or - if this is a single-table explicit - cursor, or a searched update or - delete */ - ulint* clust_map; /* map telling how clust_ref is built - from the fields of a non-clustered - record */ - dtuple_t* clust_ref; /* the reference to the clustered - index entry is built here if index is - a non-clustered index */ - btr_pcur_t clust_pcur; /* if index is non-clustered, we use - this pcur to search the clustered - index */ - mem_heap_t* old_vers_heap; /* memory heap used in building an old - version of a row, or NULL */ -}; - -struct sel_node_struct{ - que_common_t common; /* node type: QUE_NODE_SELECT */ - ulint state; /* node state */ - que_node_t* select_list; /* select list */ - sym_node_t* into_list; /* variables list or NULL */ - sym_node_t* table_list; /* table list */ - ibool asc; /* TRUE if the rows should be fetched - in an ascending order */ - ibool set_x_locks; /* TRUE if the cursor is for update or - delete, which means that a row x-lock - should be placed on the cursor row */ - ibool select_will_do_update; - /* TRUE if the select is for a searched - update which can be performed in-place: - in this case the select will take care - of the update */ - ulint latch_mode; /* BTR_SEARCH_LEAF, or BTR_MODIFY_LEAF - if select_will_do_update is TRUE */ - ulint row_lock_mode; /* LOCK_X or LOCK_S */ - ulint n_tables; /* number of tables */ - ulint fetch_table; /* number of the next table to access - in the join */ - plan_t* plans; /* array of n_tables many plan nodes - containing the search plan and the - search data structures */ - que_node_t* search_cond; /* search condition */ - read_view_t* read_view; /* if the query is a non-locking - consistent read, its read view is - placed here, otherwise NULL */ - ibool consistent_read;/* TRUE if the select is a consistent, - non-locking read */ - order_node_t* order_by; /* order by column definition, or - NULL */ - ibool is_aggregate; /* TRUE if the select list consists of - aggregate functions */ - ibool aggregate_already_fetched; - /* TRUE if the aggregate row has - already been fetched for the current - cursor */ - ibool can_get_updated;/* this is TRUE if the select - is in a single-table explicit - cursor which can get updated - within the stored procedure, - or in a searched update or - delete; NOTE that to determine - of an explicit cursor if it - can get updated, the parser - checks from a stored procedure - if it contains positioned - update or delete statements */ - sym_node_t* explicit_cursor;/* not NULL if an explicit cursor */ - UT_LIST_BASE_NODE_T(sym_node_t) - copy_variables; /* variables whose values we have to - copy when an explicit cursor is opened, - so that they do not change between - fetches */ -}; - -/* Select node states */ -#define SEL_NODE_CLOSED 0 /* it is a declared cursor which is not - currently open */ -#define SEL_NODE_OPEN 1 /* intention locks not yet set on - tables */ -#define SEL_NODE_FETCH 2 /* intention locks have been set */ -#define SEL_NODE_NO_MORE_ROWS 3 /* cursor has reached the result set - end */ - -/* Fetch statement node */ -struct fetch_node_struct{ - que_common_t common; /* type: QUE_NODE_FETCH */ - sel_node_t* cursor_def; /* cursor definition */ - sym_node_t* into_list; /* variables to set */ - - pars_user_func_t* - func; /* User callback function or NULL. - The first argument to the function - is a sel_node_t*, containing the - results of the SELECT operation for - one row. If the function returns - NULL, it is not interested in - further rows and the cursor is - modified so (cursor % NOTFOUND) is - true. If it returns not-NULL, - continue normally. See - row_fetch_print() for an example - (and a useful debugging tool). */ -}; - -/* Open or close cursor statement node */ -struct open_node_struct{ - que_common_t common; /* type: QUE_NODE_OPEN */ - ulint op_type; /* ROW_SEL_OPEN_CURSOR or - ROW_SEL_CLOSE_CURSOR */ - sel_node_t* cursor_def; /* cursor definition */ -}; - -/* Row printf statement node */ -struct row_printf_node_struct{ - que_common_t common; /* type: QUE_NODE_ROW_PRINTF */ - sel_node_t* sel_node; /* select */ -}; - -#define ROW_SEL_OPEN_CURSOR 0 -#define ROW_SEL_CLOSE_CURSOR 1 - -/* Flags for the MySQL interface */ -#define ROW_SEL_NEXT 1 -#define ROW_SEL_PREV 2 - -#define ROW_SEL_EXACT 1 /* search using a complete key value */ -#define ROW_SEL_EXACT_PREFIX 2 /* search using a key prefix which - must match to rows: the prefix may - contain an incomplete field (the - last field in prefix may be just - a prefix of a fixed length column) */ - -#ifndef UNIV_NONINL -#include "row0sel.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0sel.ic b/storage/innobase/include/row0sel.ic deleted file mode 100644 index 1f92b99271e..00000000000 --- a/storage/innobase/include/row0sel.ic +++ /dev/null @@ -1,88 +0,0 @@ -/****************************************************** -Select - -(c) 1997 Innobase Oy - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" - -/************************************************************************* -Gets the plan node for the nth table in a join. */ -UNIV_INLINE -plan_t* -sel_node_get_nth_plan( -/*==================*/ - /* out: plan node */ - sel_node_t* node, /* in: select node */ - ulint i) /* in: get ith plan node */ -{ - ut_ad(i < node->n_tables); - - return(node->plans + i); -} - -/************************************************************************* -Resets the cursor defined by sel_node to the SEL_NODE_OPEN state, which means -that it will start fetching from the start of the result set again, regardless -of where it was before, and it will set intention locks on the tables. */ -UNIV_INLINE -void -sel_node_reset_cursor( -/*==================*/ - sel_node_t* node) /* in: select node */ -{ - node->state = SEL_NODE_OPEN; -} - -/************************************************************************** -Performs an execution step of an open or close cursor statement node. */ -UNIV_INLINE -que_thr_t* -open_step( -/*======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - sel_node_t* sel_node; - open_node_t* node; - ulint err; - - ut_ad(thr); - - node = thr->run_node; - ut_ad(que_node_get_type(node) == QUE_NODE_OPEN); - - sel_node = node->cursor_def; - - err = DB_SUCCESS; - - if (node->op_type == ROW_SEL_OPEN_CURSOR) { - - /* if (sel_node->state == SEL_NODE_CLOSED) { */ - - sel_node_reset_cursor(sel_node); - /* } else { - err = DB_ERROR; - } */ - } else { - if (sel_node->state != SEL_NODE_CLOSED) { - - sel_node->state = SEL_NODE_CLOSED; - } else { - err = DB_ERROR; - } - } - - if (UNIV_EXPECT(err, DB_SUCCESS) != DB_SUCCESS) { - /* SQL error detected */ - fprintf(stderr, "SQL error %lu\n", (ulong) err); - - ut_error; - } - - thr->run_node = que_node_get_parent(node); - - return(thr); -} diff --git a/storage/innobase/include/row0types.h b/storage/innobase/include/row0types.h deleted file mode 100644 index 56ca8711848..00000000000 --- a/storage/innobase/include/row0types.h +++ /dev/null @@ -1,37 +0,0 @@ -/****************************************************** -Row operation global types - -(c) 1996 Innobase Oy - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0types_h -#define row0types_h - -typedef struct plan_struct plan_t; - -typedef struct upd_struct upd_t; - -typedef struct upd_field_struct upd_field_t; - -typedef struct upd_node_struct upd_node_t; - -typedef struct del_node_struct del_node_t; - -typedef struct ins_node_struct ins_node_t; - -typedef struct sel_node_struct sel_node_t; - -typedef struct open_node_struct open_node_t; - -typedef struct fetch_node_struct fetch_node_t; - -typedef struct row_printf_node_struct row_printf_node_t; -typedef struct sel_buf_struct sel_buf_t; - -typedef struct undo_node_struct undo_node_t; - -typedef struct purge_node_struct purge_node_t; - -#endif diff --git a/storage/innobase/include/row0uins.h b/storage/innobase/include/row0uins.h deleted file mode 100644 index e28d5363048..00000000000 --- a/storage/innobase/include/row0uins.h +++ /dev/null @@ -1,36 +0,0 @@ -/****************************************************** -Fresh insert undo - -(c) 1996 Innobase Oy - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0uins_h -#define row0uins_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" - -/*************************************************************** -Undoes a fresh insert of a row to a table. A fresh insert means that -the same clustered index unique key did not have any record, even delete -marked, at the time of the insert. */ - -ulint -row_undo_ins( -/*=========*/ - /* out: DB_SUCCESS */ - undo_node_t* node); /* in: row undo node */ - - -#ifndef UNIV_NONINL -#include "row0uins.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0uins.ic b/storage/innobase/include/row0uins.ic deleted file mode 100644 index 2b3d5a10f95..00000000000 --- a/storage/innobase/include/row0uins.ic +++ /dev/null @@ -1,8 +0,0 @@ -/****************************************************** -Fresh insert undo - -(c) 1996 Innobase Oy - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - diff --git a/storage/innobase/include/row0umod.h b/storage/innobase/include/row0umod.h deleted file mode 100644 index f22945e6f12..00000000000 --- a/storage/innobase/include/row0umod.h +++ /dev/null @@ -1,35 +0,0 @@ -/****************************************************** -Undo modify of a row - -(c) 1997 Innobase Oy - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0umod_h -#define row0umod_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "mtr0mtr.h" - -/*************************************************************** -Undoes a modify operation on a row of a table. */ - -ulint -row_undo_mod( -/*=========*/ - /* out: DB_SUCCESS or error code */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr); /* in: query thread */ - - -#ifndef UNIV_NONINL -#include "row0umod.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0umod.ic b/storage/innobase/include/row0umod.ic deleted file mode 100644 index fcbf4dbc1f3..00000000000 --- a/storage/innobase/include/row0umod.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Undo modify of a row - -(c) 1997 Innobase Oy - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/row0undo.h b/storage/innobase/include/row0undo.h deleted file mode 100644 index 0be09ed1822..00000000000 --- a/storage/innobase/include/row0undo.h +++ /dev/null @@ -1,115 +0,0 @@ -/****************************************************** -Row undo - -(c) 1997 Innobase Oy - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0undo_h -#define row0undo_h - -#include "univ.i" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "btr0types.h" -#include "btr0pcur.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" - -/************************************************************************ -Creates a row undo node to a query graph. */ - -undo_node_t* -row_undo_node_create( -/*=================*/ - /* out, own: undo node */ - trx_t* trx, /* in: transaction */ - que_thr_t* parent, /* in: parent node, i.e., a thr node */ - mem_heap_t* heap); /* in: memory heap where created */ -/*************************************************************** -Looks for the clustered index record when node has the row reference. -The pcur in node is used in the search. If found, stores the row to node, -and stores the position of pcur, and detaches it. The pcur must be closed -by the caller in any case. */ - -ibool -row_undo_search_clust_to_pcur( -/*==========================*/ - /* out: TRUE if found; NOTE the node->pcur - must be closed by the caller, regardless of - the return value */ - undo_node_t* node); /* in: row undo node */ -/*************************************************************** -Undoes a row operation in a table. This is a high-level function used -in SQL execution graphs. */ - -que_thr_t* -row_undo_step( -/*==========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ - -/* A single query thread will try to perform the undo for all successive -versions of a clustered index record, if the transaction has modified it -several times during the execution which is rolled back. It may happen -that the task is transferred to another query thread, if the other thread -is assigned to handle an undo log record in the chain of different versions -of the record, and the other thread happens to get the x-latch to the -clustered index record at the right time. - If a query thread notices that the clustered index record it is looking -for is missing, or the roll ptr field in the record doed not point to the -undo log record the thread was assigned to handle, then it gives up the undo -task for that undo log record, and fetches the next. This situation can occur -just in the case where the transaction modified the same record several times -and another thread is currently doing the undo for successive versions of -that index record. */ - -/* Undo node structure */ - -struct undo_node_struct{ - que_common_t common; /* node type: QUE_NODE_UNDO */ - ulint state; /* node execution state */ - trx_t* trx; /* trx for which undo is done */ - dulint roll_ptr;/* roll pointer to undo log record */ - trx_undo_rec_t* undo_rec;/* undo log record */ - dulint undo_no;/* undo number of the record */ - ulint rec_type;/* undo log record type: TRX_UNDO_INSERT_REC, - ... */ - dulint new_roll_ptr; /* roll ptr to restore to clustered index - record */ - dulint new_trx_id; /* trx id to restore to clustered index - record */ - btr_pcur_t pcur; /* persistent cursor used in searching the - clustered index record */ - dict_table_t* table; /* table where undo is done */ - ulint cmpl_info;/* compiler analysis of an update */ - upd_t* update; /* update vector for a clustered index - record */ - dtuple_t* ref; /* row reference to the next row to handle */ - dtuple_t* row; /* a copy (also fields copied to heap) of the - row to handle */ - dict_index_t* index; /* the next index whose record should be - handled */ - mem_heap_t* heap; /* memory heap used as auxiliary storage for - row; this must be emptied after undo is tried - on a row */ -}; - -/* Execution states for an undo node */ -#define UNDO_NODE_FETCH_NEXT 1 /* we should fetch the next undo log - record */ -#define UNDO_NODE_PREV_VERS 2 /* the roll ptr to previous version of - a row is stored in node, and undo - should be done based on it */ -#define UNDO_NODE_INSERT 3 -#define UNDO_NODE_MODIFY 4 - - -#ifndef UNIV_NONINL -#include "row0undo.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0undo.ic b/storage/innobase/include/row0undo.ic deleted file mode 100644 index e7f89c7de67..00000000000 --- a/storage/innobase/include/row0undo.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Row undo - -(c) 1997 Innobase Oy - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/row0upd.h b/storage/innobase/include/row0upd.h deleted file mode 100644 index efbc6d6facf..00000000000 --- a/storage/innobase/include/row0upd.h +++ /dev/null @@ -1,432 +0,0 @@ -/****************************************************** -Update of a row - -(c) 1996 Innobase Oy - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#ifndef row0upd_h -#define row0upd_h - -#include "univ.i" -#include "data0data.h" -#include "btr0types.h" -#include "btr0pcur.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "row0types.h" -#include "pars0types.h" - -/************************************************************************* -Creates an update vector object. */ -UNIV_INLINE -upd_t* -upd_create( -/*=======*/ - /* out, own: update vector object */ - ulint n, /* in: number of fields */ - mem_heap_t* heap); /* in: heap from which memory allocated */ -/************************************************************************* -Returns the number of fields in the update vector == number of columns -to be updated by an update vector. */ -UNIV_INLINE -ulint -upd_get_n_fields( -/*=============*/ - /* out: number of fields */ - upd_t* update); /* in: update vector */ -/************************************************************************* -Returns the nth field of an update vector. */ -UNIV_INLINE -upd_field_t* -upd_get_nth_field( -/*==============*/ - /* out: update vector field */ - upd_t* update, /* in: update vector */ - ulint n); /* in: field position in update vector */ -/************************************************************************* -Sets an index field number to be updated by an update vector field. */ -UNIV_INLINE -void -upd_field_set_field_no( -/*===================*/ - upd_field_t* upd_field, /* in: update vector field */ - ulint field_no, /* in: field number in a clustered - index */ - dict_index_t* index, /* in: index */ - trx_t* trx); /* in: transaction */ -/************************************************************************* -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. */ - -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - /* out: new pointer to mlog */ - dict_index_t* index, /* in: clustered index */ - trx_t* trx, /* in: transaction */ - dulint roll_ptr,/* in: roll ptr of the undo log record */ - byte* log_ptr,/* pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************* -Updates the trx id and roll ptr field in a clustered index record when -a row is updated or marked deleted. */ -UNIV_INLINE -void -row_upd_rec_sys_fields( -/*===================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - trx_t* trx, /* in: transaction */ - dulint roll_ptr);/* in: roll ptr of the undo log record */ -/************************************************************************* -Sets the trx id or roll ptr field of a clustered index entry. */ - -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /* in: index entry, where the memory buffers - for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /* in: clustered index */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - dulint val); /* in: value to write */ -/************************************************************************* -Creates an update node for a query graph. */ - -upd_node_t* -upd_node_create( -/*============*/ - /* out, own: update node */ - mem_heap_t* heap); /* in: mem heap where created */ -/*************************************************************** -Writes to the redo log the new values of the fields occurring in the index. */ - -void -row_upd_index_write_log( -/*====================*/ - upd_t* update, /* in: update vector */ - byte* log_ptr,/* in: pointer to mlog buffer: must contain at least - MLOG_BUF_MARGIN bytes of free space; the buffer is - closed within this function */ - mtr_t* mtr); /* in: mtr into whose log to write */ -/*************************************************************** -Returns TRUE if row update changes size of some field in index or if some -field to be updated is stored externally in rec or update. */ - -ibool -row_upd_changes_field_size_or_external( -/*===================================*/ - /* out: TRUE if the update changes the size of - some field in index or the field is external - in rec or update */ - dict_index_t* index, /* in: index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - upd_t* update);/* in: update vector */ -/*************************************************************** -Replaces the new column values stored in the update vector to the record -given. No field size changes are allowed. This function is used only for -a clustered index */ - -void -row_upd_rec_in_place( -/*=================*/ - rec_t* rec, /* in/out: record where replaced */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update);/* in: update vector */ -/******************************************************************* -Builds an update vector from those fields which in a secondary index entry -differ from a record that has the equal ordering fields. NOTE: we compare -the fields as binary strings! */ - -upd_t* -row_upd_build_sec_rec_difference_binary( -/*====================================*/ - /* out, own: update vector of differing - fields */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: entry to insert */ - rec_t* rec, /* in: secondary index record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap); /* in: memory heap from which allocated */ -/******************************************************************* -Builds an update vector from those fields, excluding the roll ptr and -trx id fields, which in an index entry differ from a record that has -the equal ordering fields. NOTE: we compare the fields as binary strings! */ - -upd_t* -row_upd_build_difference_binary( -/*============================*/ - /* out, own: update vector of differing - fields, excluding roll ptr and trx id */ - dict_index_t* index, /* in: clustered index */ - dtuple_t* entry, /* in: entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - rec_t* rec, /* in: clustered index record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap); /* in: memory heap from which allocated */ -/*************************************************************** -Replaces the new column values stored in the update vector to the index entry -given. */ - -void -row_upd_index_replace_new_col_vals_index_pos( -/*=========================================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that this may also be a - non-clustered index */ - upd_t* update, /* in: an update vector built for the index so - that the field number in an upd_field is the - index position */ - ibool order_only, - /* in: if TRUE, limit the replacement to - ordering fields of index; note that this - does not work for non-clustered indexes. */ - mem_heap_t* heap); /* in: memory heap to which we allocate and - copy the new values, set this as NULL if you - do not want allocation */ -/*************************************************************** -Replaces the new column values stored in the update vector to the index entry -given. */ - -void -row_upd_index_replace_new_col_vals( -/*===============================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that this may also be a - non-clustered index */ - upd_t* update, /* in: an update vector built for the - CLUSTERED index so that the field number in - an upd_field is the clustered index position */ - mem_heap_t* heap); /* in: memory heap to which we allocate and - copy the new values, set this as NULL if you - do not want allocation */ -/*************************************************************** -Checks if an update vector changes an ordering field of an index record. -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! */ - -ibool -row_upd_changes_ord_field_binary( -/*=============================*/ - /* out: TRUE if update vector changes - an ordering field in the index record; - NOTE: the fields are compared as binary - strings */ - dtuple_t* row, /* in: old value of row, or NULL if the - row and the data values in update are not - known when this function is called, e.g., at - compile time */ - dict_index_t* index, /* in: index of the record */ - upd_t* update);/* in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ -/*************************************************************** -Checks if an update vector changes an ordering field of an index record. -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! */ - -ibool -row_upd_changes_some_index_ord_field_binary( -/*========================================*/ - /* out: TRUE if update vector may change - an ordering field in an index record */ - dict_table_t* table, /* in: table */ - upd_t* update);/* in: update vector for the row */ -/*************************************************************** -Updates a row in a table. This is a high-level function used -in SQL execution graphs. */ - -que_thr_t* -row_upd_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************* -Performs an in-place update for the current clustered index record in -select. */ - -void -row_upd_in_place_in_select( -/*=======================*/ - sel_node_t* sel_node, /* in: select node */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************* -Parses the log data of system field values. */ - -byte* -row_upd_parse_sys_vals( -/*===================*/ - /* out: log data end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint* pos, /* out: TRX_ID position in record */ - dulint* trx_id, /* out: trx id */ - dulint* roll_ptr);/* out: roll ptr */ -/************************************************************************* -Updates the trx id and roll ptr field in a clustered index record in database -recovery. */ - -void -row_upd_rec_sys_fields_in_recovery( -/*===============================*/ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr);/* in: roll ptr of the undo log record */ -/************************************************************************* -Parses the log data written by row_upd_index_write_log. */ - -byte* -row_upd_index_parse( -/*================*/ - /* out: log data end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - mem_heap_t* heap, /* in: memory heap where update vector is - built */ - upd_t** update_out);/* out: update vector */ - - -/* Update vector field */ -struct upd_field_struct{ - ulint field_no; /* field number in an index, usually - the clustered index, but in updating - a secondary index record in btr0cur.c - this is the position in the secondary - index */ - que_node_t* exp; /* expression for calculating a new - value: it refers to column values and - constants in the symbol table of the - query graph */ - dfield_t new_val; /* new value for the column */ - ibool extern_storage; /* this is set to TRUE if dfield - actually contains a reference to - an externally stored field */ -}; - -/* Update vector structure */ -struct upd_struct{ - ulint info_bits; /* new value of info bits to record; - default is 0 */ - ulint n_fields; /* number of update fields */ - upd_field_t* fields; /* array of update fields */ -}; - -/* Update node structure which also implements the delete operation -of a row */ - -struct upd_node_struct{ - que_common_t common; /* node type: QUE_NODE_UPDATE */ - ibool is_delete;/* TRUE if delete, FALSE if update */ - ibool searched_update; - /* TRUE if searched update, FALSE if - positioned */ - ibool select_will_do_update; - /* TRUE if a searched update where ordering - fields will not be updated, and the size of - the fields will not change: in this case the - select node will take care of the update */ - ibool in_mysql_interface; - /* TRUE if the update node was created - for the MySQL interface */ - dict_foreign_t* foreign;/* NULL or pointer to a foreign key - constraint if this update node is used in - doing an ON DELETE or ON UPDATE operation */ - upd_node_t* cascade_node;/* NULL or an update node template which - is used to implement ON DELETE/UPDATE CASCADE - or ... SET NULL for foreign keys */ - mem_heap_t* cascade_heap;/* NULL or a mem heap where the cascade - node is created */ - sel_node_t* select; /* query graph subtree implementing a base - table cursor: the rows returned will be - updated */ - btr_pcur_t* pcur; /* persistent cursor placed on the clustered - index record which should be updated or - deleted; the cursor is stored in the graph - of 'select' field above, except in the case - of the MySQL interface */ - dict_table_t* table; /* table where updated */ - upd_t* update; /* update vector for the row */ - ulint update_n_fields; - /* when this struct is used to implement - a cascade operation for foreign keys, we store - here the size of the buffer allocated for use - as the update vector */ - sym_node_list_t columns;/* symbol table nodes for the columns - to retrieve from the table */ - ibool has_clust_rec_x_lock; - /* TRUE if the select which retrieves the - records to update already sets an x-lock on - the clustered record; note that it must always - set at least an s-lock */ - ulint cmpl_info;/* information extracted during query - compilation; speeds up execution: - UPD_NODE_NO_ORD_CHANGE and - UPD_NODE_NO_SIZE_CHANGE, ORed */ - /*----------------------*/ - /* Local storage for this graph node */ - ulint state; /* node execution state */ - dict_index_t* index; /* NULL, or the next index whose record should - be updated */ - dtuple_t* row; /* NULL, or a copy (also fields copied to - heap) of the row to update; this must be reset - to NULL after a successful update */ - ulint* ext_vec;/* array describing which fields are stored - externally in the clustered index record of - row */ - ulint n_ext_vec;/* number of fields in ext_vec */ - mem_heap_t* heap; /* memory heap used as auxiliary storage; - this must be emptied after a successful - update */ - /*----------------------*/ - sym_node_t* table_sym;/* table node in symbol table */ - que_node_t* col_assign_list; - /* column assignment list */ - ulint magic_n; -}; - -#define UPD_NODE_MAGIC_N 1579975 - -/* Node execution states */ -#define UPD_NODE_SET_IX_LOCK 1 /* execution came to the node from - a node above and if the field - has_clust_rec_x_lock is FALSE, we - should set an intention x-lock on - the table */ -#define UPD_NODE_UPDATE_CLUSTERED 2 /* clustered index record should be - updated */ -#define UPD_NODE_INSERT_CLUSTERED 3 /* clustered index record should be - inserted, old record is already delete - marked */ -#define UPD_NODE_UPDATE_ALL_SEC 4 /* an ordering field of the clustered - index record was changed, or this is - a delete operation: should update - all the secondary index records */ -#define UPD_NODE_UPDATE_SOME_SEC 5 /* secondary index entries should be - looked at and updated if an ordering - field changed */ - -/* Compilation info flags: these must fit within 3 bits; see trx0rec.h */ -#define UPD_NODE_NO_ORD_CHANGE 1 /* no secondary index record will be - changed in the update and no ordering - field of the clustered index */ -#define UPD_NODE_NO_SIZE_CHANGE 2 /* no record field size will be - changed in the update */ - -#ifndef UNIV_NONINL -#include "row0upd.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0upd.ic b/storage/innobase/include/row0upd.ic deleted file mode 100644 index 6173849e68f..00000000000 --- a/storage/innobase/include/row0upd.ic +++ /dev/null @@ -1,122 +0,0 @@ -/****************************************************** -Update of a row - -(c) 1996 Innobase Oy - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#include "mtr0log.h" -#include "trx0trx.h" -#include "trx0undo.h" -#include "row0row.h" -#include "btr0sea.h" - -/************************************************************************* -Creates an update vector object. */ -UNIV_INLINE -upd_t* -upd_create( -/*=======*/ - /* out, own: update vector object */ - ulint n, /* in: number of fields */ - mem_heap_t* heap) /* in: heap from which memory allocated */ -{ - upd_t* update; - ulint i; - - update = mem_heap_alloc(heap, sizeof(upd_t)); - - update->info_bits = 0; - update->n_fields = n; - update->fields = mem_heap_alloc(heap, sizeof(upd_field_t) * n); - - for (i = 0; i < n; i++) { - update->fields[i].extern_storage = 0; - } - - return(update); -} - -/************************************************************************* -Returns the number of fields in the update vector == number of columns -to be updated by an update vector. */ -UNIV_INLINE -ulint -upd_get_n_fields( -/*=============*/ - /* out: number of fields */ - upd_t* update) /* in: update vector */ -{ - ut_ad(update); - - return(update->n_fields); -} - -/************************************************************************* -Returns the nth field of an update vector. */ -UNIV_INLINE -upd_field_t* -upd_get_nth_field( -/*==============*/ - /* out: update vector field */ - upd_t* update, /* in: update vector */ - ulint n) /* in: field position in update vector */ -{ - ut_ad(update); - ut_ad(n < update->n_fields); - - return(update->fields + n); -} - -/************************************************************************* -Sets an index field number to be updated by an update vector field. */ -UNIV_INLINE -void -upd_field_set_field_no( -/*===================*/ - upd_field_t* upd_field, /* in: update vector field */ - ulint field_no, /* in: field number in a clustered - index */ - dict_index_t* index, /* in: index */ - trx_t* trx) /* in: transaction */ -{ - upd_field->field_no = field_no; - - if (UNIV_UNLIKELY(field_no >= dict_index_get_n_fields(index))) { - fprintf(stderr, - "InnoDB: Error: trying to access field %lu in ", - (ulong) field_no); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, "\n" - "InnoDB: but index only has %lu fields\n", - (ulong) dict_index_get_n_fields(index)); - } - - dict_col_copy_type(dict_index_get_nth_col(index, field_no), - dfield_get_type(&(upd_field->new_val))); -} - -/************************************************************************* -Updates the trx id and roll ptr field in a clustered index record when -a row is updated or marked deleted. */ -UNIV_INLINE -void -row_upd_rec_sys_fields( -/*===================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - trx_t* trx, /* in: transaction */ - dulint roll_ptr)/* in: roll ptr of the undo log record */ -{ - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!buf_block_align(rec)->is_hashed - || rw_lock_own(&btr_search_latch, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - row_set_rec_trx_id(rec, index, offsets, trx->id); - row_set_rec_roll_ptr(rec, index, offsets, roll_ptr); -} diff --git a/storage/innobase/include/row0vers.h b/storage/innobase/include/row0vers.h deleted file mode 100644 index e1377112d2a..00000000000 --- a/storage/innobase/include/row0vers.h +++ /dev/null @@ -1,126 +0,0 @@ -/****************************************************** -Row versions - -(c) 1997 Innobase Oy - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#ifndef row0vers_h -#define row0vers_h - -#include "univ.i" -#include "data0data.h" -#include "dict0types.h" -#include "trx0types.h" -#include "que0types.h" -#include "rem0types.h" -#include "mtr0mtr.h" -#include "read0types.h" - -/********************************************************************* -Finds out if an active transaction has inserted or modified a secondary -index record. NOTE: the kernel mutex is temporarily released in this -function! */ - -trx_t* -row_vers_impl_x_locked_off_kernel( -/*==============================*/ - /* out: NULL if committed, else the active - transaction; NOTE that the kernel mutex is - temporarily released! */ - rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index, /* in: the secondary index */ - const ulint* offsets);/* in: rec_get_offsets(rec, index) */ -/********************************************************************* -Finds out if we must preserve a delete marked earlier version of a clustered -index record, because it is >= the purge view. */ - -ibool -row_vers_must_preserve_del_marked( -/*==============================*/ - /* out: TRUE if earlier version should be preserved */ - dulint trx_id, /* in: transaction id in the version */ - mtr_t* mtr); /* in: mtr holding the latch on the clustered index - record; it will also hold the latch on purge_view */ -/********************************************************************* -Finds out if a version of the record, where the version >= the current -purge view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry == ientry; exactly in -this case we return TRUE. */ - -ibool -row_vers_old_has_index_entry( -/*=========================*/ - /* out: TRUE if earlier version should have */ - ibool also_curr,/* in: TRUE if also rec is included in the - versions to search; otherwise only versions - prior to it are searched */ - rec_t* rec, /* in: record in the clustered index; the - caller must have a latch on the page */ - mtr_t* mtr, /* in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /* in: the secondary index */ - dtuple_t* ientry); /* in: the secondary index entry */ -/********************************************************************* -Constructs the version of a clustered index record which a consistent -read should see. We assume that the trx id stored in rec is such that -the consistent read should not see rec in its present version. */ - -ulint -row_vers_build_for_consistent_read( -/*===============================*/ - /* out: DB_SUCCESS or DB_MISSING_HISTORY */ - rec_t* rec, /* in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /* in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /* in: the clustered index */ - ulint** offsets,/* in/out: offsets returned by - rec_get_offsets(rec, index) */ - read_view_t* view, /* in: the consistent read view */ - mem_heap_t** offset_heap,/* in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/* in: memory heap from which the memory for - old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers);/* out, own: old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ - -/********************************************************************* -Constructs the last committed version of a clustered index record, -which should be seen by a semi-consistent read. */ - -ulint -row_vers_build_for_semi_consistent_read( -/*====================================*/ - /* out: DB_SUCCESS or DB_MISSING_HISTORY */ - rec_t* rec, /* in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /* in: mtr holding the latch on rec */ - dict_index_t* index, /* in: the clustered index */ - ulint** offsets,/* in/out: offsets returned by - rec_get_offsets(rec, index) */ - mem_heap_t** offset_heap,/* in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/* in: memory heap from which the memory for - old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers);/* out, own: rec, old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ - - -#ifndef UNIV_NONINL -#include "row0vers.ic" -#endif - -#endif diff --git a/storage/innobase/include/row0vers.ic b/storage/innobase/include/row0vers.ic deleted file mode 100644 index ab1e264635b..00000000000 --- a/storage/innobase/include/row0vers.ic +++ /dev/null @@ -1,13 +0,0 @@ -/****************************************************** -Row versions - -(c) 1997 Innobase Oy - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#include "row0row.h" -#include "dict0dict.h" -#include "read0read.h" -#include "page0page.h" -#include "log0recv.h" diff --git a/storage/innobase/include/srv0que.h b/storage/innobase/include/srv0que.h deleted file mode 100644 index 05c339cdd32..00000000000 --- a/storage/innobase/include/srv0que.h +++ /dev/null @@ -1,53 +0,0 @@ -/****************************************************** -Server query execution - -(c) 1996 Innobase Oy - -Created 6/5/1996 Heikki Tuuri -*******************************************************/ - - -#ifndef srv0que_h -#define srv0que_h - -#include "univ.i" -#include "que0types.h" - -/************************************************************************** -Checks if there is work to do in the server task queue. If there is, the -thread starts processing a task. Before leaving, it again checks the task -queue and picks a new task if any exists. This is called by a SRV_WORKER -thread. */ - -void -srv_que_task_queue_check(void); -/*==========================*/ -/************************************************************************** -Performs round-robin on the server tasks. This is called by a SRV_WORKER -thread every second or so. */ - -que_thr_t* -srv_que_round_robin( -/*================*/ - /* out: the new (may be == thr) query thread - to run */ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Enqueues a task to server task queue and releases a worker thread, if -there exists one suspended. */ - -void -srv_que_task_enqueue( -/*=================*/ - que_thr_t* thr); /* in: query thread */ -/************************************************************************** -Enqueues a task to server task queue and releases a worker thread, if -there exists one suspended. */ - -void -srv_que_task_enqueue_low( -/*=====================*/ - que_thr_t* thr); /* in: query thread */ - -#endif - diff --git a/storage/innobase/include/srv0srv.h b/storage/innobase/include/srv0srv.h deleted file mode 100644 index 05300e38430..00000000000 --- a/storage/innobase/include/srv0srv.h +++ /dev/null @@ -1,572 +0,0 @@ -/****************************************************** -The server main program - -(c) 1995 Innobase Oy - -Created 10/10/1995 Heikki Tuuri -*******************************************************/ - - -#ifndef srv0srv_h -#define srv0srv_h - -#include "univ.i" -#include "sync0sync.h" -#include "os0sync.h" -#include "que0types.h" -#include "trx0types.h" - -extern const char* srv_main_thread_op_info; - -/* Prefix used by MySQL to indicate pre-5.1 table name encoding */ -extern const char srv_mysql50_table_name_prefix[9]; - -/* When this event is set the lock timeout and InnoDB monitor -thread starts running */ -extern os_event_t srv_lock_timeout_thread_event; - -/* If the last data file is auto-extended, we add this many pages to it -at a time */ -#define SRV_AUTO_EXTEND_INCREMENT \ - (srv_auto_extend_increment * ((1024 * 1024) / UNIV_PAGE_SIZE)) - -/* This is set to TRUE if the MySQL user has set it in MySQL */ -extern ibool srv_lower_case_table_names; - -/* Mutex for locking srv_monitor_file */ -extern mutex_t srv_monitor_file_mutex; -/* Temporary file for innodb monitor output */ -extern FILE* srv_monitor_file; -/* Mutex for locking srv_dict_tmpfile. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -extern mutex_t srv_dict_tmpfile_mutex; -/* Temporary file for output from the data dictionary */ -extern FILE* srv_dict_tmpfile; -/* Mutex for locking srv_misc_tmpfile. -This mutex has a very low rank; threads reserving it should not -acquire any further latches or sleep before releasing this one. */ -extern mutex_t srv_misc_tmpfile_mutex; -/* Temporary file for miscellanous diagnostic output */ -extern FILE* srv_misc_tmpfile; - -/* Server parameters which are read from the initfile */ - -extern char* srv_data_home; -#ifdef UNIV_LOG_ARCHIVE -extern char* srv_arch_dir; -#endif /* UNIV_LOG_ARCHIVE */ - -extern ibool srv_file_per_table; -extern ibool srv_locks_unsafe_for_binlog; - -extern ulint srv_n_data_files; -extern char** srv_data_file_names; -extern ulint* srv_data_file_sizes; -extern ulint* srv_data_file_is_raw_partition; - -extern ibool srv_auto_extend_last_data_file; -extern ulint srv_last_file_size_max; -extern ulong srv_auto_extend_increment; - -extern ibool srv_created_new_raw; - -#define SRV_NEW_RAW 1 -#define SRV_OLD_RAW 2 - -extern char** srv_log_group_home_dirs; - -extern ulint srv_n_log_groups; -extern ulint srv_n_log_files; -extern ulint srv_log_file_size; -extern ulint srv_log_buffer_size; -extern ulong srv_flush_log_at_trx_commit; - -extern byte srv_latin1_ordering[256];/* The sort order table of the latin1 - character set */ -extern ulint srv_pool_size; -extern ulint srv_awe_window_size; -extern ulint srv_mem_pool_size; -extern ulint srv_lock_table_size; - -extern ibool srv_thread_concurrency_timer_based; - -/* Number of background IO threads for read and write. Replaces - * srv_n_file_io_threads. */ -extern ulint srv_n_read_io_threads; -extern ulint srv_n_write_io_threads; -/* Max number of adjacent IO requests to merge into one large request. */ -extern ulint srv_max_merged_io; - -/* Number of IO operations per second the server can do */ -extern ulint srv_io_capacity; - -/* Flush dirty pages when below max dirty percent */ -extern ibool srv_extra_dirty_writes; - - - -#ifdef UNIV_LOG_ARCHIVE -extern ibool srv_log_archive_on; -extern ibool srv_archive_recovery; -extern dulint srv_archive_recovery_limit_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - -extern ulint srv_lock_wait_timeout; - -extern char* srv_file_flush_method_str; -extern ulint srv_unix_file_flush_method; -extern ulint srv_win_file_flush_method; - -extern ulint srv_max_n_open_files; - -extern ulint srv_max_dirty_pages_pct; - -extern ulint srv_force_recovery; -extern ulong srv_thread_concurrency; -extern ulong srv_commit_concurrency; - -extern ulint srv_max_n_threads; - -extern lint srv_conc_n_threads; - -extern ulint srv_fast_shutdown; /* If this is 1, do not do a - purge and index buffer merge. - If this 2, do not even flush the - buffer pool to data files at the - shutdown: we effectively 'crash' - InnoDB (but lose no committed - transactions). */ -extern ibool srv_innodb_status; - -extern ibool srv_use_doublewrite_buf; -extern ibool srv_use_checksums; - -extern ibool srv_set_thread_priorities; -extern int srv_query_thread_priority; - -extern ulong srv_max_buf_pool_modified_pct; -extern ulong srv_max_purge_lag; -extern ibool srv_use_awe; -extern ibool srv_use_adaptive_hash_indexes; -/*-------------------------------------------*/ - -extern ulint srv_n_rows_inserted; -extern ulint srv_n_rows_updated; -extern ulint srv_n_rows_deleted; -extern ulint srv_n_rows_read; - -extern ibool srv_print_innodb_monitor; -extern ibool srv_print_innodb_lock_monitor; -extern ibool srv_print_innodb_tablespace_monitor; -extern ibool srv_print_verbose_log; -extern ibool srv_print_innodb_table_monitor; - -extern ibool srv_lock_timeout_and_monitor_active; -extern ibool srv_error_monitor_active; - -extern ulong srv_n_spin_wait_rounds; -extern ulong srv_n_free_tickets_to_enter; -extern ulong srv_thread_sleep_delay; -extern ulint srv_spin_wait_delay; -extern ibool srv_priority_boost; - -extern ulint srv_pool_size; -extern ulint srv_mem_pool_size; -extern ulint srv_lock_table_size; - -extern ibool srv_print_thread_releases; -extern ibool srv_print_lock_waits; -extern ibool srv_print_buf_io; -extern ibool srv_print_log_io; -extern ibool srv_print_latch_waits; - -extern ulint srv_activity_count; -extern ulint srv_fatal_semaphore_wait_threshold; -extern ulint srv_dml_needed_delay; - -extern mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, - query threads, and lock table: we allocate - it from dynamic memory to get it to the - same DRAM page as other hotspot semaphores */ -#define kernel_mutex (*kernel_mutex_temp) - -#define SRV_MAX_N_IO_THREADS 100 - -/* Array of English strings describing the current state of an -i/o handler thread */ -extern const char* srv_io_thread_op_info[]; -extern const char* srv_io_thread_function[]; - -/* the number of the log write requests done */ -extern ulint srv_log_write_requests; - -/* the number of physical writes to the log performed */ -extern ulint srv_log_writes; - -/* amount of data written to the log files in bytes */ -extern ulint srv_os_log_written; - -/* amount of writes being done to the log files */ -extern ulint srv_os_log_pending_writes; - -/* we increase this counter, when there we don't have enough space in the -log buffer and have to flush it */ -extern ulint srv_log_waits; - -/* variable that counts amount of data read in total (in bytes) */ -extern ulint srv_data_read; - -/* here we count the amount of data written in total (in bytes) */ -extern ulint srv_data_written; - -/* this variable counts the amount of times, when the doublewrite buffer -was flushed */ -extern ulint srv_dblwr_writes; - -/* here we store the number of pages that have been flushed to the -doublewrite buffer */ -extern ulint srv_dblwr_pages_written; - -/* in this variable we store the number of write requests issued */ -extern ulint srv_buf_pool_write_requests; - -/* here we store the number of times when we had to wait for a free page -in the buffer pool. It happens when the buffer pool is full and we need -to make a flush, in order to be able to read or create a page. */ -extern ulint srv_buf_pool_wait_free; - -/* variable to count the number of pages that were written from the -buffer pool to disk */ -extern ulint srv_buf_pool_flushed; - -/* variable to count the number of buffer pool reads that led to the -reading of a disk page */ -extern ulint srv_buf_pool_reads; - -/* variable to count the number of sequential read-aheads were done */ -extern ulint srv_read_ahead_seq; - -/* variable to count the number of random read-aheads were done */ -extern ulint srv_read_ahead_rnd; - -/* Number of threads that may have missed a lock wait wakeup */ -extern ulint sync_wake_ups; - -/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does -NOT update cardinality for indexes of InnoDB table". By default we are -running with the fix disabled because MySQL 5.1 is frozen for such -behavioral changes. */ -extern char srv_use_legacy_cardinality_algorithm; - -/* In this structure we store status variables to be passed to MySQL */ -typedef struct export_var_struct export_struc; - -extern export_struc export_vars; - -typedef struct srv_sys_struct srv_sys_t; - -/* The server system */ -extern srv_sys_t* srv_sys; - -/* Alternatives for the file flush option in Unix; see the InnoDB manual -about what these mean */ -#define SRV_UNIX_FSYNC 1 /* This is the default */ -#define SRV_UNIX_O_DSYNC 2 -#define SRV_UNIX_LITTLESYNC 3 -#define SRV_UNIX_NOSYNC 4 -#define SRV_UNIX_O_DIRECT 5 - -/* Alternatives for file i/o in Windows */ -#define SRV_WIN_IO_NORMAL 1 -#define SRV_WIN_IO_UNBUFFERED 2 /* This is the default */ - -/* Alternatives for srv_force_recovery. Non-zero values are intended -to help the user get a damaged database up so that he can dump intact -tables and rows with SELECT INTO OUTFILE. The database must not otherwise -be used with these options! A bigger number below means that all precautions -of lower numbers are included. */ - -#define SRV_FORCE_IGNORE_CORRUPT 1 /* let the server run even if it - detects a corrupt page */ -#define SRV_FORCE_NO_BACKGROUND 2 /* prevent the main thread from - running: if a crash would occur - in purge, this prevents it */ -#define SRV_FORCE_NO_TRX_UNDO 3 /* do not run trx rollback after - recovery */ -#define SRV_FORCE_NO_IBUF_MERGE 4 /* prevent also ibuf operations: - if they would cause a crash, better - not do them */ -#define SRV_FORCE_NO_UNDO_LOG_SCAN 5 /* do not look at undo logs when - starting the database: InnoDB will - treat even incomplete transactions - as committed */ -#define SRV_FORCE_NO_LOG_REDO 6 /* do not do the log roll-forward - in connection with recovery */ - -/************************************************************************* -Boots Innobase server. */ - -ulint -srv_boot(void); -/*==========*/ - /* out: DB_SUCCESS or error code */ -/************************************************************************* -Initializes the server. */ - -void -srv_init(void); -/*==========*/ -/************************************************************************* -Frees the OS fast mutex created in srv_boot(). */ - -void -srv_free(void); -/*==========*/ -/************************************************************************* -Initializes the synchronization primitives, memory system, and the thread -local storage. */ - -void -srv_general_init(void); -/*==================*/ -/************************************************************************* -Gets the number of threads in the system. */ - -ulint -srv_get_n_threads(void); -/*===================*/ -/************************************************************************* -Returns the calling thread type. */ - -ulint -srv_get_thread_type(void); -/*=====================*/ - /* out: SRV_COM, ... */ -/************************************************************************* -Sets the info describing an i/o thread current state. */ - -void -srv_set_io_thread_op_info( -/*======================*/ - ulint i, /* in: the 'segment' of the i/o thread */ - const char* str); /* in: constant char string describing the - state */ -/************************************************************************* -Releases threads of the type given from suspension in the thread table. -NOTE! The server mutex has to be reserved by the caller! */ - -ulint -srv_release_threads( -/*================*/ - /* out: number of threads released: this may be - < n if not enough threads were suspended at the - moment */ - ulint type, /* in: thread type */ - ulint n); /* in: number of threads to release */ -/************************************************************************* -The master thread controlling the server. */ - -os_thread_ret_t -srv_master_thread( -/*==============*/ - /* out: a dummy parameter */ - void* arg); /* in: a dummy parameter required by - os_thread_create */ -/*********************************************************************** -Tells the Innobase server that there has been activity in the database -and wakes up the master thread if it is suspended (not sleeping). Used -in the MySQL interface. Note that there is a small chance that the master -thread stays suspended (we do not protect our operation with the kernel -mutex, for performace reasons). */ - -void -srv_active_wake_master_thread(void); -/*===============================*/ -/*********************************************************************** -Wakes up the master thread if it is suspended or being suspended. */ - -void -srv_wake_master_thread(void); -/*========================*/ -/************************************************************************* -Puts an OS thread to wait if there are too many concurrent threads -(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ - -void -srv_conc_enter_innodb( -/*==================*/ - trx_t* trx); /* in: transaction object associated with the - thread */ -/************************************************************************* -This lets a thread enter InnoDB regardless of the number of threads inside -InnoDB. This must be called when a thread ends a lock wait. */ - -void -srv_conc_force_enter_innodb( -/*========================*/ - trx_t* trx); /* in: transaction object associated with the - thread */ -/************************************************************************* -This must be called when a thread exits InnoDB in a lock wait or at the -end of an SQL statement. */ - -void -srv_conc_force_exit_innodb( -/*=======================*/ - trx_t* trx); /* in: transaction object associated with the - thread */ -/************************************************************************* -This must be called when a thread exits InnoDB. */ - -void -srv_conc_exit_innodb( -/*=================*/ - trx_t* trx); /* in: transaction object associated with the - thread */ -/******************************************************************* -Puts a MySQL OS thread to wait for a lock to be released. If an error -occurs during the wait trx->error_state associated with thr is -!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK -are possible errors. DB_DEADLOCK is returned if selective deadlock -resolution chose this transaction as a victim. */ - -void -srv_suspend_mysql_thread( -/*=====================*/ - que_thr_t* thr); /* in: query thread associated with the MySQL - OS thread */ -/************************************************************************ -Releases a MySQL OS thread waiting for a lock to be released, if the -thread is already suspended. */ - -void -srv_release_mysql_thread_if_suspended( -/*==================================*/ - que_thr_t* thr); /* in: query thread associated with the - MySQL OS thread */ -/************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. -This also prints the info output by various InnoDB monitors. */ - -os_thread_ret_t -srv_lock_timeout_and_monitor_thread( -/*================================*/ - /* out: a dummy parameter */ - void* arg); /* in: a dummy parameter required by - os_thread_create */ -/************************************************************************* -A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. */ - -os_thread_ret_t -srv_error_monitor_thread( -/*=====================*/ - /* out: a dummy parameter */ - void* arg); /* in: a dummy parameter required by - os_thread_create */ -/********************************************************************** -Outputs to a file the output of the InnoDB Monitor. */ - -void -srv_printf_innodb_monitor( -/*======================*/ - FILE* file); /* in: output stream */ - -/********************************************************************** -Function to pass InnoDB status variables to MySQL */ - -void -srv_export_innodb_status(void); -/*=====================*/ - -/* Types for the threads existing in the system. Threads of types 4 - 9 -are called utility threads. Note that utility threads are mainly disk -bound, except that version threads 6 - 7 may also be CPU bound, if -cleaning versions from the buffer pool. */ - -#define SRV_COM 1 /* threads serving communication and queries */ -#define SRV_CONSOLE 2 /* thread serving console */ -#define SRV_WORKER 3 /* threads serving parallelized queries and - queries released from lock wait */ -#define SRV_BUFFER 4 /* thread flushing dirty buffer blocks, - not currently in use */ -#define SRV_RECOVERY 5 /* threads finishing a recovery, - not currently in use */ -#define SRV_INSERT 6 /* thread flushing the insert buffer to disk, - not currently in use */ -#define SRV_MASTER 7 /* the master thread, (whose type number must - be biggest) */ - -/* Thread slot in the thread table */ -typedef struct srv_slot_struct srv_slot_t; - -/* Thread table is an array of slots */ -typedef srv_slot_t srv_table_t; - -/* In this structure we store status variables to be passed to MySQL */ -struct export_var_struct{ - ulint innodb_data_pending_reads; - ulint innodb_data_pending_writes; - ulint innodb_data_pending_fsyncs; - ulint innodb_data_fsyncs; - ulint innodb_data_read; - ulint innodb_data_writes; - ulint innodb_data_written; - ulint innodb_data_reads; - ulint innodb_buffer_pool_pages_total; - ulint innodb_buffer_pool_pages_data; - ulint innodb_buffer_pool_pages_dirty; - ulint innodb_buffer_pool_pages_misc; - ulint innodb_buffer_pool_pages_free; -#ifdef UNIV_DEBUG - ulint innodb_buffer_pool_pages_latched; -#endif /* UNIV_DEBUG */ - ulint innodb_buffer_pool_read_requests; - ulint innodb_buffer_pool_reads; - ulint innodb_buffer_pool_wait_free; - ulint innodb_buffer_pool_pages_flushed; - ulint innodb_buffer_pool_write_requests; - ulint innodb_buffer_pool_read_ahead_seq; - ulint innodb_buffer_pool_read_ahead_rnd; - ulint innodb_dblwr_pages_written; - ulint innodb_dblwr_writes; - ibool innodb_have_sync_atomic; - ibool innodb_heap_enabled; - ulint innodb_log_waits; - ulint innodb_log_write_requests; - ulint innodb_log_writes; - ulint innodb_os_log_written; - ulint innodb_os_log_fsyncs; - ulint innodb_os_log_pending_writes; - ulint innodb_os_log_pending_fsyncs; - ulint innodb_page_size; - ulint innodb_pages_created; - ulint innodb_pages_read; - ulint innodb_pages_written; - ulint innodb_row_lock_waits; - ulint innodb_row_lock_current_waits; - ib_longlong innodb_row_lock_time; - ulint innodb_row_lock_time_avg; - ulint innodb_row_lock_time_max; - ulint innodb_rows_read; - ulint innodb_rows_inserted; - ulint innodb_rows_updated; - ulint innodb_rows_deleted; - ulint innodb_wake_ups; -}; - -/* The server system struct */ -struct srv_sys_struct{ - srv_table_t* threads; /* server thread table */ - UT_LIST_BASE_NODE_T(que_thr_t) - tasks; /* task queue */ - dict_index_t* dummy_ind1; /* dummy index for old-style - supremum and infimum records */ - dict_index_t* dummy_ind2; /* dummy index for new-style - supremum and infimum records */ -}; - -extern ulint srv_n_threads_active[]; - -#endif diff --git a/storage/innobase/include/srv0srv.ic b/storage/innobase/include/srv0srv.ic deleted file mode 100644 index 73e0729660f..00000000000 --- a/storage/innobase/include/srv0srv.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Server main program - -(c) 1995 Innobase Oy - -Created 10/4/1995 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/srv0start.h b/storage/innobase/include/srv0start.h deleted file mode 100644 index a04930d6516..00000000000 --- a/storage/innobase/include/srv0start.h +++ /dev/null @@ -1,112 +0,0 @@ -/****************************************************** -Starts the Innobase database server - -(c) 1995-2000 Innobase Oy - -Created 10/10/1995 Heikki Tuuri -*******************************************************/ - - -#ifndef srv0start_h -#define srv0start_h - -#include "univ.i" -#include "ut0byte.h" - -/************************************************************************* -Normalizes a directory path for Windows: converts slashes to backslashes. */ - -void -srv_normalize_path_for_win( -/*=======================*/ - char* str); /* in/out: null-terminated character string */ -/************************************************************************* -Reads the data files and their sizes from a character string given in -the .cnf file. */ - -ibool -srv_parse_data_file_paths_and_sizes( -/*================================*/ - /* out: TRUE if ok, FALSE if parsing - error */ - char* str, /* in: the data file path string */ - char*** data_file_names, /* out, own: array of data file - names */ - ulint** data_file_sizes, /* out, own: array of data file sizes - in megabytes */ - ulint** data_file_is_raw_partition,/* out, own: array of flags - showing which data files are raw - partitions */ - ulint* n_data_files, /* out: number of data files */ - ibool* is_auto_extending, /* out: TRUE if the last data file is - auto-extending */ - ulint* max_auto_extend_size); /* out: max auto extend size for the - last file if specified, 0 if not */ -/************************************************************************* -Reads log group home directories from a character string given in -the .cnf file. */ - -ibool -srv_parse_log_group_home_dirs( -/*==========================*/ - /* out: TRUE if ok, FALSE if parsing - error */ - char* str, /* in: character string */ - char*** log_group_home_dirs); /* out, own: log group home dirs */ -/************************************************************************* -Adds a slash or a backslash to the end of a string if it is missing -and the string is not empty. */ - -char* -srv_add_path_separator_if_needed( -/*=============================*/ - /* out: string which has the separator if the - string is not empty */ - char* str); /* in: null-terminated character string */ -/******************************************************************** -Starts Innobase and creates a new database if database files -are not found and the user wants. Server parameters are -read from a file of name "srv_init" in the ib_home directory. */ - -int -innobase_start_or_create_for_mysql(void); -/*====================================*/ - /* out: DB_SUCCESS or error code */ -/******************************************************************** -Shuts down the Innobase database. */ -int -innobase_shutdown_for_mysql(void); -/*=============================*/ - /* out: DB_SUCCESS or error code */ -extern dulint srv_shutdown_lsn; -extern dulint srv_start_lsn; - -#ifdef __NETWARE__ -void set_panic_flag_for_netware(void); -#endif - -#ifdef HAVE_DARWIN_THREADS -extern ibool srv_have_fullfsync; -#endif - -extern ulint srv_sizeof_trx_t_in_ha_innodb_cc; - -extern ibool srv_is_being_started; -extern ibool srv_startup_is_before_trx_rollback_phase; -extern ibool srv_is_being_shut_down; - -extern ibool srv_start_raw_disk_in_use; - -/* At a shutdown the value first climbs from 0 to SRV_SHUTDOWN_CLEANUP -and then to SRV_SHUTDOWN_LAST_PHASE, and so on */ - -extern ulint srv_shutdown_state; - -#define SRV_SHUTDOWN_CLEANUP 1 -#define SRV_SHUTDOWN_LAST_PHASE 2 -#define SRV_SHUTDOWN_EXIT_THREADS 3 - -/* Log 'spaces' have id's >= this */ -#define SRV_LOG_SPACE_FIRST_ID 0xFFFFFFF0UL - -#endif diff --git a/storage/innobase/include/sync0arr.h b/storage/innobase/include/sync0arr.h deleted file mode 100644 index fae26b7a63e..00000000000 --- a/storage/innobase/include/sync0arr.h +++ /dev/null @@ -1,122 +0,0 @@ -/****************************************************** -The wait array used in synchronization primitives - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0arr_h -#define sync0arr_h - -#include "univ.i" -#include "ut0lst.h" -#include "ut0mem.h" -#include "os0thread.h" - -typedef struct sync_cell_struct sync_cell_t; -typedef struct sync_array_struct sync_array_t; - -#define SYNC_ARRAY_OS_MUTEX 1 -#define SYNC_ARRAY_MUTEX 2 - -/*********************************************************************** -Creates a synchronization wait array. It is protected by a mutex -which is automatically reserved when the functions operating on it -are called. */ - -sync_array_t* -sync_array_create( -/*==============*/ - /* out, own: created wait array */ - ulint n_cells, /* in: number of cells in the array - to create */ - ulint protection); /* in: either SYNC_ARRAY_OS_MUTEX or - SYNC_ARRAY_MUTEX: determines the type - of mutex protecting the data structure */ -/********************************************************************** -Frees the resources in a wait array. */ - -void -sync_array_free( -/*============*/ - sync_array_t* arr); /* in, own: sync wait array */ -/********************************************************************** -Reserves a wait array cell for waiting for an object. -The event of the cell is reset to nonsignalled state. */ - -void -sync_array_reserve_cell( -/*====================*/ - sync_array_t* arr, /* in: wait array */ - void* object, /* in: pointer to the object to wait for */ - ulint type, /* in: lock request type */ - const char* file, /* in: file where requested */ - ulint line, /* in: line where requested */ - ulint* index); /* out: index of the reserved cell */ -/********************************************************************** -This function should be called when a thread starts to wait on -a wait array cell. In the debug version this function checks -if the wait for a semaphore will result in a deadlock, in which -case prints info and asserts. */ - -void -sync_array_wait_event( -/*==================*/ - sync_array_t* arr, /* in: wait array */ - ulint index); /* in: index of the reserved cell */ -/********************************************************************** -Frees the cell. NOTE! sync_array_wait_event frees the cell -automatically! */ - -void -sync_array_free_cell( -/*=================*/ - sync_array_t* arr, /* in: wait array */ - ulint index); /* in: index of the cell in array */ -/************************************************************************** -Note that one of the wait objects was signalled. */ - -void -sync_array_object_signalled( -/*========================*/ - sync_array_t* arr); /* in: wait array */ -/************************************************************************** -If the wakeup algorithm does not work perfectly at semaphore relases, -this function will do the waking (see the comment in mutex_exit). This -function should be called about every 1 second in the server. */ - -void -sync_arr_wake_threads_if_sema_free(void); -/*====================================*/ -/************************************************************************** -Prints warnings of long semaphore waits to stderr. */ - -ibool -sync_array_print_long_waits(void); -/*=============================*/ - /* out: TRUE if fatal semaphore wait threshold - was exceeded */ -/************************************************************************ -Validates the integrity of the wait array. Checks -that the number of reserved cells equals the count variable. */ - -void -sync_array_validate( -/*================*/ - sync_array_t* arr); /* in: sync wait array */ -/************************************************************************** -Prints info of the wait array. */ - -void -sync_array_print_info( -/*==================*/ - FILE* file, /* in: file where to print */ - sync_array_t* arr); /* in: wait array */ - - -#ifndef UNIV_NONINL -#include "sync0arr.ic" -#endif - -#endif diff --git a/storage/innobase/include/sync0arr.ic b/storage/innobase/include/sync0arr.ic deleted file mode 100644 index dbe35c033e5..00000000000 --- a/storage/innobase/include/sync0arr.ic +++ /dev/null @@ -1,10 +0,0 @@ -/****************************************************** -The wait array for synchronization primitives - -Inline code - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - diff --git a/storage/innobase/include/sync0rw.h b/storage/innobase/include/sync0rw.h deleted file mode 100644 index 6de26535689..00000000000 --- a/storage/innobase/include/sync0rw.h +++ /dev/null @@ -1,517 +0,0 @@ -/****************************************************** -The read-write lock (for threads, not for database transactions) - -(c) 1995 Innobase Oy - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0rw_h -#define sync0rw_h - -#include "univ.i" -#include "ut0lst.h" -#include "sync0sync.h" -#include "os0sync.h" - -/* The following undef is to prevent a name conflict with a macro -in MySQL: */ -#undef rw_lock_t - -/* Latch types; these are used also in btr0btr.h: keep the numerical values -smaller than 30 and the order of the numerical values like below! */ -#define RW_S_LATCH 1 -#define RW_X_LATCH 2 -#define RW_NO_LATCH 3 - -/* We decrement lock_word by this amount for each x_lock. It is also the -start value for the lock_word, meaning that it limits the maximum number -of concurrent read locks before the rw_lock breaks. The current value of -0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/ -#define X_LOCK_DECR 0x00100000 - -typedef struct rw_lock_struct rw_lock_t; -#ifdef UNIV_SYNC_DEBUG -typedef struct rw_lock_debug_struct rw_lock_debug_t; -#endif /* UNIV_SYNC_DEBUG */ - -typedef UT_LIST_BASE_NODE_T(rw_lock_t) rw_lock_list_t; - -extern rw_lock_list_t rw_lock_list; -extern mutex_t rw_lock_list_mutex; - -#ifdef UNIV_SYNC_DEBUG -/* The global mutex which protects debug info lists of all rw-locks. -To modify the debug info list of an rw-lock, this mutex has to be - -acquired in addition to the mutex protecting the lock. */ -extern mutex_t rw_lock_debug_mutex; -extern os_event_t rw_lock_debug_event; /* If deadlock detection does - not get immediately the mutex it - may wait for this event */ -extern ibool rw_lock_debug_waiters; /* This is set to TRUE, if - there may be waiters for the event */ -#endif /* UNIV_SYNC_DEBUG */ - -extern ib_longlong rw_s_spin_wait_count; -extern ib_longlong rw_s_spin_round_count; -extern ib_longlong rw_s_exit_count; -extern ib_longlong rw_s_os_wait_count; -extern ib_longlong rw_x_spin_wait_count; -extern ib_longlong rw_x_spin_round_count; -extern ib_longlong rw_x_os_wait_count; -extern ib_longlong rw_x_exit_count; - -/********************************************************************** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define rw_lock_create(L, level) \ - rw_lock_create_func((L), (level), #L, __FILE__, __LINE__) -# else /* UNIV_SYNC_DEBUG */ -# define rw_lock_create(L, level) \ - rw_lock_create_func((L), #L, __FILE__, __LINE__) -# endif /* UNIV_SYNC_DEBUG */ -#else /* UNIV_DEBUG */ -# define rw_lock_create(L, level) \ - rw_lock_create_func((L), __FILE__, __LINE__) -#endif /* UNIV_DEBUG */ - -/********************************************************************** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ - -void -rw_lock_create_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /* in: level */ -# endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /* in: mutex name */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /* in: file name where created */ - ulint cline); /* in: file line where created */ -/********************************************************************** -Calling this function is obligatory only if the memory buffer containing -the rw-lock is freed. Removes an rw-lock object from the global list. The -rw-lock is checked to be in the non-locked state. */ - -void -rw_lock_free( -/*=========*/ - rw_lock_t* lock); /* in: rw-lock */ -#ifdef UNIV_DEBUG -/********************************************************************** -Checks that the rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. */ - -ibool -rw_lock_validate( -/*=============*/ - rw_lock_t* lock); -#endif /* UNIV_DEBUG */ -/********************************************************************** -Low-level function which tries to lock an rw-lock in s-mode. Performs no -spinning. */ -UNIV_INLINE -ibool -rw_lock_s_lock_low( -/*===============*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, - /* in: pass value; != 0, if the lock will be - passed to another thread to unlock */ - const char* file_name, /* in: file name where lock requested */ - ulint line); /* in: line where requested */ -/****************************************************************** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock(M) rw_lock_s_lock_func(\ - (M), 0, __FILE__, __LINE__) -/****************************************************************** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock_gen(M, P) rw_lock_s_lock_func(\ - (M), (P), __FILE__, __LINE__) -/****************************************************************** -NOTE! The following macros should be used in rw s-locking, not the -corresponding function. */ - -#define rw_lock_s_lock_nowait(M, F, L) rw_lock_s_lock_low(\ - (M), 0, (F), (L)) -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function, except if -you supply the file name and line number. Lock an rw-lock in shared mode -for the current thread. If the rw-lock is locked in exclusive mode, or -there is an exclusive lock request waiting, the function spins a preset -time (controlled by SYNC_SPIN_ROUNDS), waiting for the lock, before -suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line); /* in: line where requested */ -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread if the lock can be -obtained immediately. */ -UNIV_INLINE -ibool -rw_lock_x_lock_func_nowait( -/*=======================*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name,/* in: file name where lock requested */ - ulint line); /* in: line where requested */ -/********************************************************************** -Releases a shared mode lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ - rw_lock_t* lock /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG - ,ulint pass /* in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - ); -/*********************************************************************** -Releases a shared mode lock. */ - -#ifdef UNIV_SYNC_DEBUG -#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L, 0) -#else -#define rw_lock_s_unlock(L) rw_lock_s_unlock_func(L) -#endif -/*********************************************************************** -Releases a shared mode lock. */ - -#ifdef UNIV_SYNC_DEBUG -#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L, P) -#else -#define rw_lock_s_unlock_gen(L, P) rw_lock_s_unlock_func(L) -#endif -/****************************************************************** -NOTE! The following macro should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock(M) rw_lock_x_lock_func(\ - (M), 0, __FILE__, __LINE__) -/****************************************************************** -NOTE! The following macro should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock_gen(M, P) rw_lock_x_lock_func(\ - (M), (P), __FILE__, __LINE__) -/****************************************************************** -NOTE! The following macros should be used in rw x-locking, not the -corresponding function. */ - -#define rw_lock_x_lock_nowait(M) rw_lock_x_lock_func_nowait(\ - (M), __FILE__, __LINE__) -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock, before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ - -void -rw_lock_x_lock_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line); /* in: line where requested */ -/********************************************************************** -Releases an exclusive mode lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ - rw_lock_t* lock /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG - ,ulint pass /* in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - ); -/*********************************************************************** -Releases an exclusive mode lock. */ - -#ifdef UNIV_SYNC_DEBUG -#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L, 0) -#else -#define rw_lock_x_unlock(L) rw_lock_x_unlock_func(L) -#endif -/*********************************************************************** -Releases an exclusive mode lock. */ - -#ifdef UNIV_SYNC_DEBUG -#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L, P) -#else -#define rw_lock_x_unlock_gen(L, P) rw_lock_x_unlock_func(L) -#endif -/********************************************************************** -Low-level function which locks an rw-lock in s-mode when we know that it -is possible and none else is currently accessing the rw-lock structure. -Then we can do the locking without reserving the mutex. */ -UNIV_INLINE -void -rw_lock_s_lock_direct( -/*==================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name, /* in: file name where requested */ - ulint line /* in: line where lock requested */ -); -/********************************************************************** -Low-level function which locks an rw-lock in x-mode when we know that it -is not locked and none else is currently accessing the rw-lock structure. -Then we can do the locking without reserving the mutex. */ -UNIV_INLINE -void -rw_lock_x_lock_direct( -/*==================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name, /* in: file name where requested */ - ulint line /* in: line where lock requested */ -); -/********************************************************************** -This function is used in the insert buffer to move the ownership of an -x-latch on a buffer frame to the current thread. The x-latch was set by -the buffer read operation and it protected the buffer frame while the -read was done. The ownership is moved because we want that the current -thread is able to acquire a second x-latch which is stored in an mtr. -This, in turn, is needed to pass the debug checks of index page -operations. */ - -void -rw_lock_x_lock_move_ownership( -/*==========================*/ - rw_lock_t* lock); /* in: lock which was x-locked in the - buffer read */ -/********************************************************************** -Releases a shared mode lock when we know there are no waiters and none -else will access the lock during the time this function is executed. */ -UNIV_INLINE -void -rw_lock_s_unlock_direct( -/*====================*/ - rw_lock_t* lock); /* in: rw-lock */ -/********************************************************************** -Releases an exclusive mode lock when we know there are no waiters, and -none else will access the lock durint the time this function is executed. */ -UNIV_INLINE -void -rw_lock_x_unlock_direct( -/*====================*/ - rw_lock_t* lock); /* in: rw-lock */ -/********************************************************************** -Returns the value of writer_count for the lock. Does not reserve the lock -mutex, so the caller must be sure it is not changed during the call. */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - /* out: value of writer_count */ - rw_lock_t* lock); /* in: rw-lock */ -/************************************************************************ -Accessor functions for rw lock. */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - rw_lock_t* lock); -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - rw_lock_t* lock); -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - rw_lock_t* lock); -/********************************************************************** -Decrements lock_word the specified amount if it is greater than 0. -This is used by both s_lock and x_lock operations. */ -UNIV_INLINE -ibool -rw_lock_lock_word_decr( - /* out: TRUE if decr occurs */ - rw_lock_t* lock, /* in: rw-lock */ - ulint amount); /* in: amount to decrement */ -/********************************************************************** -Increments lock_word the specified amount and returns new value. */ -UNIV_INLINE -lint -rw_lock_lock_word_incr( - /* out: TRUE if decr occurs */ - rw_lock_t* lock, - ulint amount); /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Checks if the thread has locked the rw-lock in the specified mode, with -the pass value == 0. */ - -ibool -rw_lock_own( -/*========*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint lock_type); /* in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -#endif /* UNIV_SYNC_DEBUG */ -/********************************************************************** -Checks if somebody has locked the rw-lock in the specified mode. */ - -ibool -rw_lock_is_locked( -/*==============*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint lock_type); /* in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -#ifdef UNIV_SYNC_DEBUG -/******************************************************************* -Prints debug info of an rw-lock. */ - -void -rw_lock_print( -/*==========*/ - rw_lock_t* lock); /* in: rw-lock */ -/******************************************************************* -Prints debug info of currently locked rw-locks. */ - -void -rw_lock_list_print_info( -/*====================*/ - FILE* file); /* in: file where to print */ -/******************************************************************* -Returns the number of currently locked rw-locks. -Works only in the debug version. */ - -ulint -rw_lock_n_locked(void); -/*==================*/ - -/*#####################################################################*/ - -/********************************************************************** -Acquires the debug mutex. We cannot use the mutex defined in sync0sync, -because the debug mutex is also acquired in sync0arr while holding the OS -mutex protecting the sync array, and the ordinary mutex_enter might -recursively call routines in sync0arr, leading to a deadlock on the OS -mutex. */ - -void -rw_lock_debug_mutex_enter(void); -/*==========================*/ -/********************************************************************** -Releases the debug mutex. */ - -void -rw_lock_debug_mutex_exit(void); -/*==========================*/ -/************************************************************************* -Prints info of a debug struct. */ - -void -rw_lock_debug_print( -/*================*/ - rw_lock_debug_t* info); /* in: debug struct */ -#endif /* UNIV_SYNC_DEBUG */ - -/* NOTE! The structure appears here only for the compiler to know its size. -Do not use its fields directly! The structure used in the spin lock -implementation of a read-write lock. Several threads may have a shared lock -simultaneously in this lock, but only one writer may have an exclusive lock, -in which case no shared locks are allowed. To prevent starving of a writer -blocked by readers, a writer may queue for x-lock by decrementing lock_word: -no new readers will be let in while the thread waits for readers to exit. */ - -struct rw_lock_struct { - volatile lint lock_word; - /* Holds the state of the lock. */ - volatile ulint waiters;/* 1: there are waiters */ - volatile ulint pass; /* Default value 0. This is set to some - value != 0 given by the caller of an x-lock - operation, if the x-lock is to be passed to - another thread to unlock (which happens in - asynchronous i/o). */ - volatile os_thread_id_t writer_thread; - /* Thread id of writer thread */ - os_event_t event; /* Used by sync0arr.c for thread queueing */ - os_event_t wait_ex_event; - /* Event for next-writer to wait on. A thread - must decrement lock_word before waiting. */ -#ifndef UNIV_SYNC_ATOMIC - mutex_t mutex; /* The mutex protecting rw_lock_struct */ -#endif /* UNIV_SYNC_ATOMIC */ - - UT_LIST_NODE_T(rw_lock_t) list; - /* All allocated rw locks are put into a - list */ -#ifdef UNIV_SYNC_DEBUG - UT_LIST_BASE_NODE_T(rw_lock_debug_t) debug_list; - /* In the debug version: pointer to the debug - info list of the lock */ - ulint level; /* Level in the global latching order. */ -#endif /* UNIV_SYNC_DEBUG */ - ulint count_os_wait; /* Count of os_waits. May not be accurate */ - const char* cfile_name;/* File name where lock created */ - /* last s-lock file/line is not guaranteed to be correct */ - const char* last_s_file_name;/* File name where last s-locked */ - const char* last_x_file_name;/* File name where last x-locked */ - ibool writer_is_wait_ex; - /* This is TRUE if the writer field is - RW_LOCK_WAIT_EX; this field is located far - from the memory update hotspot fields which - are at the start of this struct, thus we can - peek this field without causing much memory - bus traffic */ - unsigned cline:14; /* Line where created */ - unsigned last_s_line:14; /* Line number where last time s-locked */ - unsigned last_x_line:14; /* Line number where last time x-locked */ - ulint magic_n; -}; - -#define RW_LOCK_MAGIC_N 22643 - -#ifdef UNIV_SYNC_DEBUG -/* The structure for storing debug info of an rw-lock */ -struct rw_lock_debug_struct { - - os_thread_id_t thread_id; /* The thread id of the thread which - locked the rw-lock */ - ulint pass; /* Pass value given in the lock operation */ - ulint lock_type; /* Type of the lock: RW_LOCK_EX, - RW_LOCK_SHARED, RW_LOCK_WAIT_EX */ - const char* file_name;/* File name where the lock was obtained */ - ulint line; /* Line where the rw-lock was locked */ - UT_LIST_NODE_T(rw_lock_debug_t) list; - /* Debug structs are linked in a two-way - list */ -}; -#endif /* UNIV_SYNC_DEBUG */ - -#ifndef UNIV_NONINL -#include "sync0rw.ic" -#endif - -#endif diff --git a/storage/innobase/include/sync0rw.ic b/storage/innobase/include/sync0rw.ic deleted file mode 100644 index e3f1d881cb4..00000000000 --- a/storage/innobase/include/sync0rw.ic +++ /dev/null @@ -1,559 +0,0 @@ -/****************************************************** -The read-write lock (for threads) - -(c) 1995 Innobase Oy - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -/********************************************************************** -Lock an rw-lock in shared mode for the current thread. If the rw-lock is -locked in exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), -waiting for the lock before suspending the thread. */ - -void -rw_lock_s_lock_spin( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line); /* in: line where requested */ -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Inserts the debug information for an rw-lock. */ - -void -rw_lock_add_debug_info( -/*===================*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint pass, /* in: pass value */ - ulint lock_type, /* in: lock type */ - const char* file_name, /* in: file where requested */ - ulint line); /* in: line where requested */ -/********************************************************************** -Removes a debug information struct for an rw-lock. */ - -void -rw_lock_remove_debug_info( -/*======================*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint pass, /* in: pass value */ - ulint lock_type); /* in: lock type */ -#endif /* UNIV_SYNC_DEBUG */ - -/************************************************************************ -Accessor functions for rw lock. */ -UNIV_INLINE -ulint -rw_lock_get_waiters( -/*================*/ - rw_lock_t* lock) -{ - return(lock->waiters); -} -UNIV_INLINE -void -rw_lock_set_waiters( -/*================*/ - rw_lock_t* lock) -{ -#ifdef UNIV_SYNC_ATOMIC - os_compare_and_swap(&(lock->waiters), 0, 1); -#else /* UNIV_SYNC_ATOMIC */ - lock->waiters = 1; -#endif /* UNIV_SYNC_ATOMIC */ -} -UNIV_INLINE -void -rw_lock_reset_waiters( -/*================*/ - rw_lock_t* lock) -{ -#ifdef UNIV_SYNC_ATOMIC - os_compare_and_swap(&(lock->waiters), 1, 0); -#else /* UNIV_SYNC_ATOMIC */ - lock->waiters = 0; -#endif /* UNIV_SYNC_ATOMIC */ -} - -/********************************************************************** -Returns the write-status of the lock - this function made more sense -with the old rw_lock implementation. - */ -UNIV_INLINE -ulint -rw_lock_get_writer( -/*===============*/ - rw_lock_t* lock) -{ - lint lock_word = lock->lock_word; - if(lock_word > 0) { - /* return NOT_LOCKED in s-lock state, like the writer - member of the old lock implementation. */ - return RW_LOCK_NOT_LOCKED; - } else if (((-lock_word) % X_LOCK_DECR) == 0) { - return RW_LOCK_EX; - } else { - ut_ad(lock_word > -X_LOCK_DECR); - return RW_LOCK_WAIT_EX; - } -} - -UNIV_INLINE -ulint -rw_lock_get_reader_count( -/*=====================*/ - rw_lock_t* lock) -{ - lint lock_word = lock->lock_word; - if(lock_word > 0) { - /* s-locked, no x-waiters */ - return(X_LOCK_DECR - lock_word); - } else if (lock_word < 0 && lock_word > -X_LOCK_DECR) { - /* s-locked, with x-waiters */ - return (ulint)(-lock_word); - } - return 0; -} - -#ifndef UNIV_SYNC_ATOMIC -UNIV_INLINE -mutex_t* -rw_lock_get_mutex( -/*==============*/ - rw_lock_t* lock) -{ - return(&(lock->mutex)); -} -#endif - -/********************************************************************** -Returns the value of writer_count for the lock. Does not reserve the lock -mutex, so the caller must be sure it is not changed during the call. */ -UNIV_INLINE -ulint -rw_lock_get_x_lock_count( -/*=====================*/ - /* out: value of writer_count */ - rw_lock_t* lock) /* in: rw-lock */ -{ - lint lock_copy = lock->lock_word; - /* If there is a reader, lock_word is not divisible by X_LOCK_DECR */ - if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) { - return 0; - } - return ((-lock_copy) / X_LOCK_DECR) + 1; -} - -/********************************************************************** -Two different implementations for decrementing the lock_word of a rw_lock: -one for systems supporting atomic operations, one for others. This does -does not support recusive x-locks: they should be handled by the caller and -need not be atomic since they are performed by the current lock holder. -Returns true if the decrement was made, false if not. */ -UNIV_INLINE -ibool -rw_lock_lock_word_decr( - /* out: TRUE if decr occurs */ - rw_lock_t* lock, /* in: rw-lock */ - ulint amount) /* in: amount of decrement */ -{ - -#ifdef UNIV_SYNC_ATOMIC - - lint local_lock_word = lock->lock_word; - while (local_lock_word > 0) { - if(os_compare_and_swap(&(lock->lock_word), - local_lock_word, - local_lock_word - amount)) { - return TRUE; - } - local_lock_word = lock->lock_word; - } - return(FALSE); - -#else /* UNIV_SYNC_ATOMIC */ - - ibool success = FALSE; - mutex_enter(&(lock->mutex)); - if(lock->lock_word > 0) { - lock->lock_word -= amount; - success = TRUE; - } - mutex_exit(&(lock->mutex)); - return success; - -#endif /* UNIV_SYNC_ATOMIC */ - -} - -/********************************************************************** -Two different implementations for incrementing the lock_word of a rw_lock: -one for systems supporting atomic operations, one for others. -Returns the value of lock_word after increment. */ -UNIV_INLINE -lint -rw_lock_lock_word_incr( - /* out: lock->lock_word after increment */ - rw_lock_t* lock, /* in: rw-lock */ - ulint amount) /* in: amount of increment */ -{ - -#ifdef UNIV_SYNC_ATOMIC - - return(os_atomic_increment(&(lock->lock_word), amount)); - -#else /* UNIV_SYNC_ATOMIC */ - - lint local_lock_word; - - mutex_enter(&(lock->mutex)); - - lock->lock_word += amount; - local_lock_word = lock->lock_word; - - mutex_exit(&(lock->mutex)); - - return local_lock_word; - -#endif /* UNIV_SYNC_ATOMIC */ - -} - -/********************************************************************** -Low-level function which tries to lock an rw-lock in s-mode. Performs no -spinning. */ -UNIV_INLINE -ibool -rw_lock_s_lock_low( -/*===============*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass __attribute__((unused)), - /* in: pass value; != 0, if the lock will be - passed to another thread to unlock */ - const char* file_name, /* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ - if (!rw_lock_lock_word_decr(lock, 1)) { - /* Locking did not succeed */ - return(FALSE); - } - -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line); -#endif - /* These debugging values are not set safely: they may be incorrect - or even refer to a line that is invalid for the file name. */ - lock->last_s_file_name = file_name; - lock->last_s_line = line; - - return(TRUE); /* locking succeeded */ -} - -/* TODO: The "direct" functions are not used. Remove them? */ -/********************************************************************** -Low-level function which locks an rw-lock in s-mode when we know that it -is possible and none else is currently accessing the rw-lock structure. -Then we can do the locking without reserving the mutex. */ -UNIV_INLINE -void -rw_lock_s_lock_direct( -/*==================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name, /* in: file name where requested */ - ulint line) /* in: line where lock requested */ -{ - ut_ad(lock->lock_word == X_LOCK_DECR); - - /* Indicate there is a new reader by decrementing lock_word */ - lock->lock_word--; - - lock->last_s_file_name = file_name; - lock->last_s_line = line; - -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name, line); -#endif -} - -/* TODO: The "direct" functions are not used. Remove them? */ -/********************************************************************** -Low-level function which locks an rw-lock in x-mode when we know that it -is not locked and none else is currently accessing the rw-lock structure. -Then we can do the locking without reserving the mutex. */ -UNIV_INLINE -void -rw_lock_x_lock_direct( -/*==================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name, /* in: file name where requested */ - ulint line) /* in: line where lock requested */ -{ - ut_ad(rw_lock_validate(lock)); - ut_ad(lock->lock_word == X_LOCK_DECR); - - lock->lock_word -= X_LOCK_DECR; - lock->writer_thread = os_thread_get_curr_id(); - lock->pass = 0; - - lock->last_x_file_name = file_name; - lock->last_x_line = line; - -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); -#endif -} - -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in shared mode for the current thread. If the rw-lock is locked -in exclusive mode, or there is an exclusive lock request waiting, the -function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for -the lock, before suspending the thread. */ -UNIV_INLINE -void -rw_lock_s_lock_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - /* NOTE: As we do not know the thread ids for threads which have - s-locked a latch, and s-lockers will be served only after waiting - x-lock requests have been fulfilled, then if this thread already - owns an s-lock here, it may end up in a deadlock with another thread - which requests an x-lock here. Therefore, we will forbid recursive - s-locking of a latch: the following assert will warn the programmer - of the possibility of this kind of a deadlock. If we want to implement - safe recursive s-locking, we should keep in a list the thread ids of - the threads which have s-locked a latch. This would use some CPU - time. */ - -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */ -#endif /* UNIV_SYNC_DEBUG */ - - /* TODO: study performance of UNIV_LIKELY branch prediction hints. */ - if (rw_lock_s_lock_low(lock, pass, file_name, line)) { - - return; /* Success */ - } else { - /* Did not succeed, try spin wait */ - - rw_lock_s_lock_spin(lock, pass, file_name, line); - - return; - } -} - -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread if the lock can be -obtained immediately. */ -UNIV_INLINE -ibool -rw_lock_x_lock_func_nowait( -/*=======================*/ - /* out: TRUE if success */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - os_thread_id_t curr_thread = os_thread_get_curr_id(); - - ibool success; - -#ifdef UNIV_SYNC_ATOMIC - success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0); -#else - - success = FALSE; - mutex_enter(&(lock->mutex)); - if(lock->lock_word == X_LOCK_DECR) { - lock->lock_word = 0; - success = TRUE; - } - mutex_exit(&(lock->mutex)); - -#endif - if(success) { - lock->writer_thread = curr_thread; - lock->pass = 0; - - } else if (!(lock->pass) && - os_thread_eq(lock->writer_thread, curr_thread)) { - /* Must verify pass first: otherwise another thread can - call move_ownership suddenly allowing recursive locks. - and after we have verified our thread_id matches - (though move_ownership has since changed it).*/ - - /* Relock: this lock_word modification is safe since no other - threads can modify (lock, unlock, or reserve) lock_word while - there is an exclusive writer and this is the writer thread. */ - lock->lock_word -= X_LOCK_DECR; - - ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0); - - } else { - /* Failure */ - return(FALSE); - } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line); -#endif - - lock->last_x_file_name = file_name; - lock->last_x_line = line; - - ut_ad(rw_lock_validate(lock)); - - return(TRUE); -} - -/********************************************************************** -Releases a shared mode lock. */ -UNIV_INLINE -void -rw_lock_s_unlock_func( -/*==================*/ - rw_lock_t* lock /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG - ,ulint pass /* in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - ) -{ - ut_ad((lock->lock_word % X_LOCK_DECR) != 0); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED); -#endif - - /* Increment lock_word to indicate 1 less reader */ - if(rw_lock_lock_word_incr(lock, 1) == 0) { - - /* wait_ex waiter exists. It may not be asleep, but we signal - anyway. We do not wake other waiters, because they can't - exist without wait_ex waiter and wait_ex waiter goes first.*/ - os_event_set(lock->wait_ex_event); - sync_array_object_signalled(sync_primary_wait_array); - - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_s_exit_count++; -#endif -} - -/* TODO: The "direct" functions are not used. Remove them? */ -/********************************************************************** -Releases a shared mode lock when we know there are no waiters and none -else will access the lock during the time this function is executed. */ -UNIV_INLINE -void -rw_lock_s_unlock_direct( -/*====================*/ - rw_lock_t* lock) /* in: rw-lock */ -{ - ut_ad(lock->lock_word < X_LOCK_DECR); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED); -#endif - - /* Decrease reader count by incrementing lock_word */ - lock->lock_word++; - - ut_ad(!rw_lock_get_waiters(lock)); - ut_ad(rw_lock_validate(lock)); -#ifdef UNIV_SYNC_PERF_STAT - rw_s_exit_count++; -#endif -} - -/********************************************************************** -Releases an exclusive mode lock. */ -UNIV_INLINE -void -rw_lock_x_unlock_func( -/*==================*/ - rw_lock_t* lock /* in: rw-lock */ -#ifdef UNIV_SYNC_DEBUG - ,ulint pass /* in: pass value; != 0, if the lock may have - been passed to another thread to unlock */ -#endif - ) -{ - uint local_pass; - ut_ad((lock->lock_word % X_LOCK_DECR) == 0); - - /* - Must reset pass while we still have the lock. - If we are not the last unlocker, we correct it later in the function, - which is harmless since we still hold the lock. - */ - local_pass = lock->pass; - lock->pass = 1; - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX); -#endif - - if(rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) { - /* Lock is now free. May have to signal read/write waiters. - We do not need to signal wait_ex waiters, since they cannot - exist when there is a writer. */ - if(rw_lock_get_waiters(lock)) { - rw_lock_reset_waiters(lock); - os_event_set(lock->event); - sync_array_object_signalled(sync_primary_wait_array); - } - - } else { - /* We still hold x-lock, so we correct pass. */ - lock->pass = local_pass; - } - - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_x_exit_count++; -#endif -} - -/* TODO: The "direct" functions are not used. Remove them? */ -/********************************************************************** -Releases an exclusive mode lock when we know there are no waiters, and -none else will access the lock durint the time this function is executed. */ -UNIV_INLINE -void -rw_lock_x_unlock_direct( -/*====================*/ - rw_lock_t* lock) /* in: rw-lock */ -{ - /* Reset the exclusive lock if this thread no longer has an x-mode - lock */ - - ut_ad((lock->lock_word % X_LOCK_DECR) == 0); - -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX); -#endif - lock->pass = 1; - lock->lock_word += X_LOCK_DECR; - - ut_ad(!rw_lock_get_waiters(lock)); - ut_ad(rw_lock_validate(lock)); - -#ifdef UNIV_SYNC_PERF_STAT - rw_x_exit_count++; -#endif -} diff --git a/storage/innobase/include/sync0sync.h b/storage/innobase/include/sync0sync.h deleted file mode 100644 index ae6c72bcd15..00000000000 --- a/storage/innobase/include/sync0sync.h +++ /dev/null @@ -1,561 +0,0 @@ -/****************************************************** -Mutex, the basic synchronization primitive - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0sync_h -#define sync0sync_h - -#include "univ.i" -#include "sync0types.h" -#include "ut0lst.h" -#include "ut0mem.h" -#include "os0thread.h" -#include "os0sync.h" -#include "sync0arr.h" -#ifndef WIN32 -#include "my_atomic.h" -#endif - -#ifndef UNIV_HOTBACKUP -extern my_bool timed_mutexes; -#endif /* UNIV_HOTBACKUP */ - -/********************************************************************** -Initializes the synchronization data structures. */ - -void -sync_init(void); -/*===========*/ -/********************************************************************** -Frees the resources in synchronization data structures. */ - -void -sync_close(void); -/*===========*/ -/********************************************************************** -Creates, or rather, initializes a mutex object to a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ - -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG -# define mutex_create(M, level) \ - mutex_create_func((M), #M, (level), __FILE__, __LINE__) -# else -# define mutex_create(M, level) \ - mutex_create_func((M), #M, __FILE__, __LINE__) -# endif -#else -# define mutex_create(M, level) \ - mutex_create_func((M), __FILE__, __LINE__) -#endif - -/********************************************************************** -Creates, or rather, initializes a mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ - -void -mutex_create_func( -/*==============*/ - mutex_t* mutex, /* in: pointer to memory */ -#ifdef UNIV_DEBUG - const char* cmutex_name, /* in: mutex name */ -# ifdef UNIV_SYNC_DEBUG - ulint level, /* in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /* in: file name where created */ - ulint cline); /* in: file line where created */ -/********************************************************************** -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a mutex object from the mutex list. The mutex -is checked to be in the reset state. */ - -#undef mutex_free /* Fix for MacOS X */ -void -mutex_free( -/*=======*/ - mutex_t* mutex); /* in: mutex */ -/****************************************************************** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - -#define mutex_enter(M) mutex_enter_func((M), __FILE__, __LINE__) -/********************************************************************** -A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled -inlining of InnoDB functions, and no inlined functions should be called from -there. That is why we need to duplicate the inlined function here. */ - -void -mutex_enter_noninline( -/*==================*/ - mutex_t* mutex); /* in: mutex */ -/****************************************************************** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - -/* NOTE! currently same as mutex_enter! */ - -#define mutex_enter_fast(M) mutex_enter_func((M), __FILE__, __LINE__) -#define mutex_enter_fast_func mutex_enter_func; -/********************************************************************** -NOTE! Use the corresponding macro in the header file, not this function -directly. Locks a mutex for the current thread. If the mutex is reserved -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting -for the mutex before suspending the thread. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name, /* in: file name where locked */ - ulint line); /* in: line where locked */ -/****************************************************************** -NOTE! The following macro should be used in mutex locking, not the -corresponding function. */ - -#define mutex_enter_nowait(M) \ - mutex_enter_nowait_func((M), __FILE__, __LINE__) -/************************************************************************ -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. */ - -ulint -mutex_enter_nowait_func( -/*====================*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name, /* in: file name where mutex - requested */ - ulint line); /* in: line where requested */ -/********************************************************************** -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit( -/*=======*/ - mutex_t* mutex); /* in: pointer to mutex */ -/********************************************************************** -Releases a mutex. */ - -void -mutex_exit_noninline( -/*=================*/ - mutex_t* mutex); /* in: mutex */ -/********************************************************************** -Returns TRUE if no mutex or rw-lock is currently locked. -Works only in the debug version. */ - -ibool -sync_all_freed(void); -/*================*/ -/*##################################################################### -FUNCTION PROTOTYPES FOR DEBUGGING */ -/*********************************************************************** -Prints wait info of the sync system. */ - -void -sync_print_wait_info( -/*=================*/ - FILE* file); /* in: file where to print */ -/*********************************************************************** -Prints info of the sync system. */ - -void -sync_print( -/*=======*/ - FILE* file); /* in: file where to print */ -#ifdef UNIV_DEBUG -/********************************************************************** -Checks that the mutex has been initialized. */ - -ibool -mutex_validate( -/*===========*/ - const mutex_t* mutex); -/********************************************************************** -Checks that the current thread owns the mutex. Works only -in the debug version. */ - -ibool -mutex_own( -/*======*/ - /* out: TRUE if owns */ - const mutex_t* mutex); /* in: mutex */ -#endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Adds a latch and its level in the thread level array. Allocates the memory -for the array if called first time for this OS thread. Makes the checks -against other latch levels stored in the array for this thread. */ - -void -sync_thread_add_level( -/*==================*/ - void* latch, /* in: pointer to a mutex or an rw-lock */ - ulint level); /* in: level in the latching order; if - SYNC_LEVEL_VARYING, nothing is done */ -/********************************************************************** -Removes a latch from the thread level array if it is found there. */ - -ibool -sync_thread_reset_level( -/*====================*/ - /* out: TRUE if found from the array; it is no error - if the latch is not found, as we presently are not - able to determine the level for every latch - reservation the program does */ - void* latch); /* in: pointer to a mutex or an rw-lock */ -/********************************************************************** -Checks that the level array for the current thread is empty. */ - -ibool -sync_thread_levels_empty(void); -/*==========================*/ - /* out: TRUE if empty */ -/********************************************************************** -Checks that the level array for the current thread is empty. */ - -ibool -sync_thread_levels_empty_gen( -/*=========================*/ - /* out: TRUE if empty except the - exceptions specified below */ - ibool dict_mutex_allowed); /* in: TRUE if dictionary mutex is - allowed to be owned by the thread, - also purge_is_running mutex is - allowed */ -/********************************************************************** -Gets the debug information for a reserved mutex. */ - -void -mutex_get_debug_info( -/*=================*/ - mutex_t* mutex, /* in: mutex */ - const char** file_name, /* out: file where requested */ - ulint* line, /* out: line where requested */ - os_thread_id_t* thread_id); /* out: id of the thread which owns - the mutex */ -/********************************************************************** -Counts currently reserved mutexes. Works only in the debug version. */ - -ulint -mutex_n_reserved(void); -/*==================*/ -#endif /* UNIV_SYNC_DEBUG */ -/********************************************************************** -NOT to be used outside this module except in debugging! Gets the value -of the lock word. */ -UNIV_INLINE -byte -mutex_get_lock_word( -/*================*/ - const mutex_t* mutex); /* in: mutex */ -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -NOT to be used outside this module except in debugging! Gets the waiters -field in a mutex. */ -UNIV_INLINE -ulint -mutex_get_waiters( -/*==============*/ - /* out: value to set */ - const mutex_t* mutex); /* in: mutex */ -#endif /* UNIV_SYNC_DEBUG */ - -/* - LATCHING ORDER WITHIN THE DATABASE - ================================== - -The mutex or latch in the central memory object, for instance, a rollback -segment object, must be acquired before acquiring the latch or latches to -the corresponding file data structure. In the latching order below, these -file page object latches are placed immediately below the corresponding -central memory object latch or mutex. - -Synchronization object Notes ----------------------- ----- - -Dictionary mutex If we have a pointer to a dictionary -| object, e.g., a table, it can be -| accessed without reserving the -| dictionary mutex. We must have a -| reservation, a memoryfix, to the -| appropriate table object in this case, -| and the table must be explicitly -| released later. -V -Dictionary header -| -V -Secondary index tree latch The tree latch protects also all -| the B-tree non-leaf pages. These -V can be read with the page only -Secondary index non-leaf bufferfixed to save CPU time, -| no s-latch is needed on the page. -| Modification of a page requires an -| x-latch on the page, however. If a -| thread owns an x-latch to the tree, -| it is allowed to latch non-leaf pages -| even after it has acquired the fsp -| latch. -V -Secondary index leaf The latch on the secondary index leaf -| can be kept while accessing the -| clustered index, to save CPU time. -V -Clustered index tree latch To increase concurrency, the tree -| latch is usually released when the -| leaf page latch has been acquired. -V -Clustered index non-leaf -| -V -Clustered index leaf -| -V -Transaction system header -| -V -Transaction undo mutex The undo log entry must be written -| before any index page is modified. -| Transaction undo mutex is for the undo -| logs the analogue of the tree latch -| for a B-tree. If a thread has the -| trx undo mutex reserved, it is allowed -| to latch the undo log pages in any -| order, and also after it has acquired -| the fsp latch. -V -Rollback segment mutex The rollback segment mutex must be -| reserved, if, e.g., a new page must -| be added to an undo log. The rollback -| segment and the undo logs in its -| history list can be seen as an -| analogue of a B-tree, and the latches -| reserved similarly, using a version of -| lock-coupling. If an undo log must be -| extended by a page when inserting an -| undo log record, this corresponds to -| a pessimistic insert in a B-tree. -V -Rollback segment header -| -V -Purge system latch -| -V -Undo log pages If a thread owns the trx undo mutex, -| or for a log in the history list, the -| rseg mutex, it is allowed to latch -| undo log pages in any order, and even -| after it has acquired the fsp latch. -| If a thread does not have the -| appropriate mutex, it is allowed to -| latch only a single undo log page in -| a mini-transaction. -V -File space management latch If a mini-transaction must allocate -| several file pages, it can do that, -| because it keeps the x-latch to the -| file space management in its memo. -V -File system pages -| -V -Kernel mutex If a kernel operation needs a file -| page allocation, it must reserve the -| fsp x-latch before acquiring the kernel -| mutex. -V -Search system mutex -| -V -Buffer pool mutex -| -V -Log mutex -| -Any other latch -| -V -Memory pool mutex */ - -/* Latching order levels */ - -/* User transaction locks are higher than any of the latch levels below: -no latches are allowed when a thread goes to wait for a normal table -or row lock! */ -#define SYNC_USER_TRX_LOCK 9999 -#define SYNC_NO_ORDER_CHECK 3000 /* this can be used to suppress - latching order checking */ -#define SYNC_LEVEL_VARYING 2000 /* Level is varying. Only used with - buffer pool page locks, which do not - have a fixed level, but instead have - their level set after the page is - locked; see e.g. - ibuf_bitmap_get_map_page(). */ -#define SYNC_DICT_OPERATION 1001 /* table create, drop, etc. reserve - this in X-mode, implicit or backround - operations purge, rollback, foreign - key checks reserve this in S-mode */ -#define SYNC_DICT 1000 -#define SYNC_DICT_AUTOINC_MUTEX 999 -#define SYNC_DICT_HEADER 995 -#define SYNC_IBUF_HEADER 914 -#define SYNC_IBUF_PESS_INSERT_MUTEX 912 -#define SYNC_IBUF_MUTEX 910 /* ibuf mutex is really below - SYNC_FSP_PAGE: we assign a value this - high only to make the program to pass - the debug checks */ -/*-------------------------------*/ -#define SYNC_INDEX_TREE 900 -#define SYNC_TREE_NODE_NEW 892 -#define SYNC_TREE_NODE_FROM_HASH 891 -#define SYNC_TREE_NODE 890 -#define SYNC_PURGE_SYS 810 -#define SYNC_PURGE_LATCH 800 -#define SYNC_TRX_UNDO 700 -#define SYNC_RSEG 600 -#define SYNC_RSEG_HEADER_NEW 591 -#define SYNC_RSEG_HEADER 590 -#define SYNC_TRX_UNDO_PAGE 570 -#define SYNC_EXTERN_STORAGE 500 -#define SYNC_FSP 400 -#define SYNC_FSP_PAGE 395 -/*------------------------------------- Insert buffer headers */ -/*------------------------------------- ibuf_mutex */ -/*------------------------------------- Insert buffer tree */ -#define SYNC_IBUF_BITMAP_MUTEX 351 -#define SYNC_IBUF_BITMAP 350 -/*------------------------------------- MySQL query cache mutex */ -/*------------------------------------- MySQL binlog mutex */ -/*-------------------------------*/ -#define SYNC_KERNEL 300 -#define SYNC_REC_LOCK 299 -#define SYNC_TRX_LOCK_HEAP 298 -#define SYNC_TRX_SYS_HEADER 290 -#define SYNC_LOG 170 -#define SYNC_RECV 168 -#define SYNC_WORK_QUEUE 161 -#define SYNC_SEARCH_SYS 160 /* NOTE that if we have a memory - heap that can be extended to the - buffer pool, its logical level is - SYNC_SEARCH_SYS, as memory allocation - can call routines there! Otherwise - the level is SYNC_MEM_HASH. */ -#define SYNC_BUF_POOL 150 -#define SYNC_BUF_BLOCK 149 -#define SYNC_DOUBLEWRITE 140 -#define SYNC_ANY_LATCH 135 -#define SYNC_THR_LOCAL 133 -#define SYNC_MEM_HASH 131 -#define SYNC_MEM_POOL 130 - -/* Codes used to designate lock operations */ -#define RW_LOCK_NOT_LOCKED 350 -#define RW_LOCK_EX 351 -#define RW_LOCK_EXCLUSIVE 351 -#define RW_LOCK_SHARED 352 -#define RW_LOCK_WAIT_EX 353 -#define SYNC_MUTEX 354 - -/* NOTE! The structure appears here only for the compiler to know its size. -Do not use its fields directly! The structure used in the spin lock -implementation of a mutual exclusion semaphore. */ - -struct mutex_struct { - os_event_t event; /* Used by sync0arr.c for the wait queue */ - - byte lock_word; /* This byte is the target of the atomic - test-and-set instruction in Win32 and - x86 32/64 with GCC 4.1.0 or later version */ -#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) -#elif defined(MY_ATOMIC_NOLOCK) - /* We have my_atomic_* routines that are - intrinsically atomic, so no need for the - mutex. */ -#else - os_fast_mutex_t - os_fast_mutex; /* In other systems we use this OS mutex - in place of lock_word */ -#endif - ulint waiters; /* This ulint is set to 1 if there are (or - may be) threads waiting in the global wait - array for this mutex to be released. - Otherwise, this is 0. */ - UT_LIST_NODE_T(mutex_t) list; /* All allocated mutexes are put into - a list. Pointers to the next and prev. */ -#ifdef UNIV_SYNC_DEBUG - const char* file_name; /* File where the mutex was locked */ - ulint line; /* Line where the mutex was locked */ - ulint level; /* Level in the global latching order */ -#endif /* UNIV_SYNC_DEBUG */ - const char* cfile_name;/* File name where mutex created */ - ulint cline; /* Line where created */ -#ifdef UNIV_DEBUG - os_thread_id_t thread_id; /* The thread id of the thread - which locked the mutex. */ - ulint magic_n; -# define MUTEX_MAGIC_N (ulint)979585 -#endif /* UNIV_DEBUG */ -#ifndef UNIV_HOTBACKUP - ulong count_os_wait; /* count of os_wait */ -# ifdef UNIV_DEBUG - ulong count_using; /* count of times mutex used */ - ulong count_spin_loop; /* count of spin loops */ - ulong count_spin_rounds; /* count of spin rounds */ - ulong count_os_yield; /* count of os_wait */ - ulonglong lspent_time; /* mutex os_wait timer msec */ - ulonglong lmax_spent_time; /* mutex os_wait timer msec */ - const char* cmutex_name;/* mutex name */ - ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */ -# endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ -}; - -/* The global array of wait cells for implementation of the databases own -mutexes and read-write locks. Appears here for debugging purposes only! */ - -extern sync_array_t* sync_primary_wait_array; - -/* Constant determining how long spin wait is continued before suspending -the thread. A value 600 rounds on a 1995 100 MHz Pentium seems to correspond -to 20 microseconds. */ - -#define SYNC_SPIN_ROUNDS srv_n_spin_wait_rounds - -/* The number of system calls made in this module. Intended for performance -monitoring. */ - -extern ib_longlong mutex_exit_count; - -#ifdef UNIV_SYNC_DEBUG -/* Latching order checks start when this is set TRUE */ -extern ibool sync_order_checks_on; -#endif /* UNIV_SYNC_DEBUG */ - -/* This variable is set to TRUE when sync_init is called */ -extern ibool sync_initialized; - -/* Global list of database mutexes (not OS mutexes) created. */ -typedef UT_LIST_BASE_NODE_T(mutex_t) ut_list_base_node_t; -extern ut_list_base_node_t mutex_list; - -/* Mutex protecting the mutex_list variable */ -extern mutex_t mutex_list_mutex; - - -#ifndef UNIV_NONINL -#include "sync0sync.ic" -#endif - -#endif diff --git a/storage/innobase/include/sync0sync.ic b/storage/innobase/include/sync0sync.ic deleted file mode 100644 index f5a85e0e7fb..00000000000 --- a/storage/innobase/include/sync0sync.ic +++ /dev/null @@ -1,248 +0,0 @@ -/****************************************************** -Mutex, the basic synchronization primitive - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -/********************************************************************** -Sets the waiters field in a mutex. */ - -void -mutex_set_waiters( -/*==============*/ - mutex_t* mutex, /* in: mutex */ - ulint n); /* in: value to set */ -/********************************************************************** -Reserves a mutex for the current thread. If the mutex is reserved, the -function spins a preset time (controlled by SYNC_SPIN_ROUNDS) waiting -for the mutex before suspending the thread. */ - -void -mutex_spin_wait( -/*============*/ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name, /* in: file name where mutex - requested */ - ulint line); /* in: line where requested */ -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Sets the debug information for a reserved mutex. */ - -void -mutex_set_debug_info( -/*=================*/ - mutex_t* mutex, /* in: mutex */ - const char* file_name, /* in: file where requested */ - ulint line); /* in: line where requested */ -#endif /* UNIV_SYNC_DEBUG */ -/********************************************************************** -Releases the threads waiting in the primary wait array for this mutex. */ - -void -mutex_signal_object( -/*================*/ - mutex_t* mutex); /* in: mutex */ - -/********************************************************************** -Performs an atomic test-and-set instruction to the lock_word field of a -mutex. */ -UNIV_INLINE -byte -mutex_test_and_set( -/*===============*/ - /* out: the previous value of lock_word: 0 or - 1 */ - mutex_t* mutex) /* in: mutex */ -{ -#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) - byte res; - byte* lw; /* assembler code is used to ensure that - lock_word is loaded from memory */ - ut_ad(mutex); - ut_ad(sizeof(byte) == 1); - - lw = &(mutex->lock_word); - - __asm MOV ECX, lw - __asm MOV EDX, 1 - __asm XCHG DL, BYTE PTR [ECX] - __asm MOV res, DL - - /* The fence below would prevent this thread from - reading the data structure protected by the mutex - before the test-and-set operation is committed, but - the fence is apparently not needed: - - In a posting to comp.arch newsgroup (August 10, 1997) - Andy Glew said that in P6 a LOCKed instruction like - XCHG establishes a fence with respect to memory reads - and writes and thus an explicit fence is not - needed. In P5 he seemed to agree with a previous - newsgroup poster that LOCKed instructions serialize - all instruction execution, and, consequently, also - memory operations. This is confirmed in Intel Software - Dev. Manual, Vol. 3. */ - - /* mutex_fence(); */ - - return(res); -#elif defined(MY_ATOMIC_NOLOCK) - return ((byte)my_atomic_swap8( - (int8 volatile *)&(mutex->lock_word), 1)); -#else - ibool ret; - - ret = os_fast_mutex_trylock(&(mutex->os_fast_mutex)); - - if (ret == 0) { - /* We check that os_fast_mutex_trylock does not leak - and allow race conditions */ - ut_a(mutex->lock_word == 0); - - mutex->lock_word = 1; - } - - return((byte)ret); -#endif -} - -/********************************************************************** -Performs a reset instruction to the lock_word field of a mutex. This -instruction also serializes memory operations to the program order. */ -UNIV_INLINE -void -mutex_reset_lock_word( -/*==================*/ - mutex_t* mutex) /* in: mutex */ -{ -#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) - byte* lw; /* assembler code is used to ensure that - lock_word is loaded from memory */ - ut_ad(mutex); - - lw = &(mutex->lock_word); - - __asm MOV EDX, 0 - __asm MOV ECX, lw - __asm XCHG DL, BYTE PTR [ECX] -#elif defined(MY_ATOMIC_NOLOCK) - /* In theory __sync_lock_release should be used to release the lock. - Unfortunately, it does not work properly alone. The workaround is - that more conservative __sync_lock_test_and_set is used instead. */ - (void)my_atomic_swap8((int8 volatile *)&(mutex->lock_word), 0); -#else - mutex->lock_word = 0; - - os_fast_mutex_unlock(&(mutex->os_fast_mutex)); -#endif -} - -/********************************************************************** -Gets the value of the lock word. */ -UNIV_INLINE -byte -mutex_get_lock_word( -/*================*/ - const mutex_t* mutex) /* in: mutex */ -{ - const volatile byte* ptr; /* declared volatile to ensure that - lock_word is loaded from memory */ - ut_ad(mutex); - - ptr = &(mutex->lock_word); - - return(*ptr); -} - -/********************************************************************** -Gets the waiters field in a mutex. */ -UNIV_INLINE -ulint -mutex_get_waiters( -/*==============*/ - /* out: value to set */ - const mutex_t* mutex) /* in: mutex */ -{ - const volatile ulint* ptr; /* declared volatile to ensure that - the value is read from memory */ - ut_ad(mutex); - - ptr = &(mutex->waiters); - - return(*ptr); /* Here we assume that the read of a single - word from memory is atomic */ -} - -/********************************************************************** -Unlocks a mutex owned by the current thread. */ -UNIV_INLINE -void -mutex_exit( -/*=======*/ - mutex_t* mutex) /* in: pointer to mutex */ -{ - ut_ad(mutex_own(mutex)); - - ut_d(mutex->thread_id = (os_thread_id_t) ULINT_UNDEFINED); - -#ifdef UNIV_SYNC_DEBUG - sync_thread_reset_level(mutex); -#endif - mutex_reset_lock_word(mutex); - - /* A problem: we assume that mutex_reset_lock word - is a memory barrier, that is when we read the waiters - field next, the read must be serialized in memory - after the reset. A speculative processor might - perform the read first, which could leave a waiting - thread hanging indefinitely. - - Our current solution call every second - sync_arr_wake_threads_if_sema_free() - to wake up possible hanging threads if - they are missed in mutex_signal_object. */ - - if (mutex_get_waiters(mutex) != 0) { - - mutex_signal_object(mutex); - } - -#ifdef UNIV_SYNC_PERF_STAT - mutex_exit_count++; -#endif -} - -/********************************************************************** -Locks a mutex for the current thread. If the mutex is reserved, the function -spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting for the mutex -before suspending the thread. */ -UNIV_INLINE -void -mutex_enter_func( -/*=============*/ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name, /* in: file name where locked */ - ulint line) /* in: line where locked */ -{ - ut_ad(mutex_validate(mutex)); - ut_ad(!mutex_own(mutex)); - - /* Note that we do not peek at the value of lock_word before trying - the atomic test_and_set; we could peek, and possibly save time. */ - -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - mutex->count_using++; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - - if (!mutex_test_and_set(mutex)) { - ut_d(mutex->thread_id = os_thread_get_curr_id()); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - return; /* Succeeded! */ - } - - mutex_spin_wait(mutex, file_name, line); -} diff --git a/storage/innobase/include/sync0types.h b/storage/innobase/include/sync0types.h deleted file mode 100644 index 57478426f25..00000000000 --- a/storage/innobase/include/sync0types.h +++ /dev/null @@ -1,16 +0,0 @@ -/****************************************************** -Global types for sync - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#ifndef sync0types_h -#define sync0types_h - -#define mutex_t ib_mutex_t -typedef struct mutex_struct mutex_t; - - -#endif diff --git a/storage/innobase/include/thr0loc.h b/storage/innobase/include/thr0loc.h deleted file mode 100644 index 32e2dc3ae93..00000000000 --- a/storage/innobase/include/thr0loc.h +++ /dev/null @@ -1,67 +0,0 @@ -/****************************************************** -The thread local storage - -(c) 1995 Innobase Oy - -Created 10/5/1995 Heikki Tuuri -*******************************************************/ - -/* This module implements storage private to each thread, -a capability useful in some situations like storing the -OS handle to the current thread, or its priority. */ - -#ifndef thr0loc_h -#define thr0loc_h - -#include "univ.i" -#include "os0thread.h" - -/******************************************************************** -Initializes the thread local storage module. */ - -void -thr_local_init(void); -/*================*/ -/*********************************************************************** -Creates a local storage struct for the calling new thread. */ - -void -thr_local_create(void); -/*==================*/ -/*********************************************************************** -Frees the local storage struct for the specified thread. */ - -void -thr_local_free( -/*===========*/ - os_thread_id_t id); /* in: thread id */ -/*********************************************************************** -Gets the slot number in the thread table of a thread. */ - -ulint -thr_local_get_slot_no( -/*==================*/ - /* out: slot number */ - os_thread_id_t id); /* in: thread id of the thread */ -/*********************************************************************** -Sets in the local storage the slot number in the thread table of a thread. */ - -void -thr_local_set_slot_no( -/*==================*/ - os_thread_id_t id, /* in: thread id of the thread */ - ulint slot_no);/* in: slot number */ -/*********************************************************************** -Returns pointer to the 'in_ibuf' field within the current thread local -storage. */ - -ibool* -thr_local_get_in_ibuf_field(void); -/*=============================*/ - /* out: pointer to the in_ibuf field */ - -#ifndef UNIV_NONINL -#include "thr0loc.ic" -#endif - -#endif diff --git a/storage/innobase/include/thr0loc.ic b/storage/innobase/include/thr0loc.ic deleted file mode 100644 index b8b8136180c..00000000000 --- a/storage/innobase/include/thr0loc.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Thread local storage - -(c) 1995 Innobase Oy - -Created 10/4/1995 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/trx0purge.h b/storage/innobase/include/trx0purge.h deleted file mode 100644 index c4aab91a93a..00000000000 --- a/storage/innobase/include/trx0purge.h +++ /dev/null @@ -1,169 +0,0 @@ -/****************************************************** -Purge old versions - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0purge_h -#define trx0purge_h - -#include "univ.i" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "que0types.h" -#include "page0page.h" -#include "usr0sess.h" -#include "fil0fil.h" - -/* The global data structure coordinating a purge */ -extern trx_purge_t* purge_sys; - -/* A dummy undo record used as a return value when we have a whole undo log -which needs no purge */ -extern trx_undo_rec_t trx_purge_dummy_rec; - -/************************************************************************ -Calculates the file address of an undo log header when we have the file -address of its history list node. */ -UNIV_INLINE -fil_addr_t -trx_purge_get_log_from_hist( -/*========================*/ - /* out: file address of the log */ - fil_addr_t node_addr); /* in: file address of the history - list node of the log */ -/********************************************************************* -Checks if trx_id is >= purge_view: then it is guaranteed that its update -undo log still exists in the system. */ - -ibool -trx_purge_update_undo_must_exist( -/*=============================*/ - /* out: TRUE if is sure that it is preserved, also - if the function returns FALSE, it is possible that - the undo log still exists in the system */ - dulint trx_id);/* in: transaction id */ -/************************************************************************ -Creates the global purge system control structure and inits the history -mutex. */ - -void -trx_purge_sys_create(void); -/*======================*/ -/************************************************************************ -Adds the update undo log as the first log in the history list. Removes the -update undo log segment from the rseg slot if it is too big for reuse. */ - -void -trx_purge_add_update_undo_to_history( -/*=================================*/ - trx_t* trx, /* in: transaction */ - page_t* undo_page, /* in: update undo log header page, - x-latched */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -Fetches the next undo log record from the history list to purge. It must be -released with the corresponding release function. */ - -trx_undo_rec_t* -trx_purge_fetch_next_rec( -/*=====================*/ - /* out: copy of an undo log record, or - pointer to the dummy undo log record - &trx_purge_dummy_rec if the whole undo log - can skipped in purge; NULL if none left */ - dulint* roll_ptr,/* out: roll pointer to undo record */ - trx_undo_inf_t** cell, /* out: storage cell for the record in the - purge array */ - mem_heap_t* heap); /* in: memory heap where copied */ -/*********************************************************************** -Releases a reserved purge undo record. */ - -void -trx_purge_rec_release( -/*==================*/ - trx_undo_inf_t* cell); /* in: storage cell */ -/*********************************************************************** -This function runs a purge batch. */ - -ulint -trx_purge(void); -/*===========*/ - /* out: number of undo log pages handled in - the batch */ -/********************************************************************** -Prints information of the purge system to stderr. */ - -void -trx_purge_sys_print(void); -/*======================*/ - -/* The control structure used in the purge operation */ -struct trx_purge_struct{ - ulint state; /* Purge system state */ - sess_t* sess; /* System session running the purge - query */ - trx_t* trx; /* System transaction running the purge - query: this trx is not in the trx list - of the trx system and it never ends */ - que_t* query; /* The query graph which will do the - parallelized purge operation */ - rw_lock_t latch; /* The latch protecting the purge view. - A purge operation must acquire an - x-latch here for the instant at which - it changes the purge view: an undo - log operation can prevent this by - obtaining an s-latch here. */ - read_view_t* view; /* The purge will not remove undo logs - which are >= this view (purge view) */ - mutex_t mutex; /* Mutex protecting the fields below */ - ulint n_pages_handled;/* Approximate number of undo log - pages processed in purge */ - ulint handle_limit; /* Target of how many pages to get - processed in the current purge */ - /*------------------------------*/ - /* The following two fields form the 'purge pointer' which advances - during a purge, and which is used in history list truncation */ - - dulint purge_trx_no; /* Purge has advanced past all - transactions whose number is less - than this */ - dulint purge_undo_no; /* Purge has advanced past all records - whose undo number is less than this */ - /*-----------------------------*/ - ibool next_stored; /* TRUE if the info of the next record - to purge is stored below: if yes, then - the transaction number and the undo - number of the record are stored in - purge_trx_no and purge_undo_no above */ - trx_rseg_t* rseg; /* Rollback segment for the next undo - record to purge */ - ulint page_no; /* Page number for the next undo - record to purge, page number of the - log header, if dummy record */ - ulint offset; /* Page offset for the next undo - record to purge, 0 if the dummy - record */ - ulint hdr_page_no; /* Header page of the undo log where - the next record to purge belongs */ - ulint hdr_offset; /* Header byte offset on the page */ - /*-----------------------------*/ - trx_undo_arr_t* arr; /* Array of transaction numbers and - undo numbers of the undo records - currently under processing in purge */ - mem_heap_t* heap; /* Temporary storage used during a - purge: can be emptied after purge - completes */ -}; - -#define TRX_PURGE_ON 1 /* purge operation is running */ -#define TRX_STOP_PURGE 2 /* purge operation is stopped, or - it should be stopped */ -#ifndef UNIV_NONINL -#include "trx0purge.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0purge.ic b/storage/innobase/include/trx0purge.ic deleted file mode 100644 index 9f1c0ed96f8..00000000000 --- a/storage/innobase/include/trx0purge.ic +++ /dev/null @@ -1,26 +0,0 @@ -/****************************************************** -Purge old versions - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0undo.h" - -/************************************************************************ -Calculates the file address of an undo log header when we have the file -address of its history list node. */ -UNIV_INLINE -fil_addr_t -trx_purge_get_log_from_hist( -/*========================*/ - /* out: file address of the log */ - fil_addr_t node_addr) /* in: file address of the history - list node of the log */ -{ - node_addr.boffset -= TRX_UNDO_HISTORY_NODE; - - return(node_addr); -} - diff --git a/storage/innobase/include/trx0rec.h b/storage/innobase/include/trx0rec.h deleted file mode 100644 index 6447b6a2e35..00000000000 --- a/storage/innobase/include/trx0rec.h +++ /dev/null @@ -1,303 +0,0 @@ -/****************************************************** -Transaction undo log record - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0rec_h -#define trx0rec_h - -#include "univ.i" -#include "trx0types.h" -#include "row0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "dict0types.h" -#include "que0types.h" -#include "data0data.h" -#include "rem0types.h" - -/*************************************************************************** -Copies the undo record to the heap. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_rec_copy( -/*==============*/ - /* out, own: copy of undo log record */ - trx_undo_rec_t* undo_rec, /* in: undo log record */ - mem_heap_t* heap); /* in: heap where copied */ -/************************************************************************** -Reads the undo log record type. */ -UNIV_INLINE -ulint -trx_undo_rec_get_type( -/*==================*/ - /* out: record type */ - trx_undo_rec_t* undo_rec); /* in: undo log record */ -/************************************************************************** -Reads from an undo log record the record compiler info. */ -UNIV_INLINE -ulint -trx_undo_rec_get_cmpl_info( -/*=======================*/ - /* out: compiler info */ - trx_undo_rec_t* undo_rec); /* in: undo log record */ -/************************************************************************** -Returns TRUE if an undo log record contains an extern storage field. */ -UNIV_INLINE -ibool -trx_undo_rec_get_extern_storage( -/*============================*/ - /* out: TRUE if extern */ - trx_undo_rec_t* undo_rec); /* in: undo log record */ -/************************************************************************** -Reads the undo log record number. */ -UNIV_INLINE -dulint -trx_undo_rec_get_undo_no( -/*=====================*/ - /* out: undo no */ - trx_undo_rec_t* undo_rec); /* in: undo log record */ -/************************************************************************** -Reads from an undo log record the general parameters. */ - -byte* -trx_undo_rec_get_pars( -/*==================*/ - /* out: remaining part of undo log - record after reading these values */ - trx_undo_rec_t* undo_rec, /* in: undo log record */ - ulint* type, /* out: undo record type: - TRX_UNDO_INSERT_REC, ... */ - ulint* cmpl_info, /* out: compiler info, relevant only - for update type records */ - ibool* updated_extern, /* out: TRUE if we updated an - externally stored fild */ - dulint* undo_no, /* out: undo log record number */ - dulint* table_id); /* out: table id */ -/*********************************************************************** -Builds a row reference from an undo log record. */ - -byte* -trx_undo_rec_get_row_ref( -/*=====================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part of a copy of an undo log - record, at the start of the row reference; - NOTE that this copy of the undo log record must - be preserved as long as the row reference is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - dtuple_t** ref, /* out, own: row reference */ - mem_heap_t* heap); /* in: memory heap from which the memory - needed is allocated */ -/*********************************************************************** -Skips a row reference from an undo log record. */ - -byte* -trx_undo_rec_skip_row_ref( -/*======================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part in update undo log - record, at the start of the row reference */ - dict_index_t* index); /* in: clustered index */ -/************************************************************************** -Reads from an undo log update record the system field values of the old -version. */ - -byte* -trx_undo_update_rec_get_sys_cols( -/*=============================*/ - /* out: remaining part of undo log - record after reading these values */ - byte* ptr, /* in: remaining part of undo log - record after reading general - parameters */ - dulint* trx_id, /* out: trx id */ - dulint* roll_ptr, /* out: roll ptr */ - ulint* info_bits); /* out: info bits state */ -/*********************************************************************** -Builds an update vector based on a remaining part of an undo log record. */ - -byte* -trx_undo_update_rec_get_update( -/*===========================*/ - /* out: remaining part of the record, - NULL if an error detected, which means that - the record is corrupted */ - byte* ptr, /* in: remaining part in update undo log - record, after reading the row reference - NOTE that this copy of the undo log record must - be preserved as long as the update vector is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - ulint type, /* in: TRX_UNDO_UPD_EXIST_REC, - TRX_UNDO_UPD_DEL_REC, or - TRX_UNDO_DEL_MARK_REC; in the last case, - only trx id and roll ptr fields are added to - the update vector */ - dulint trx_id, /* in: transaction id from this undorecord */ - dulint roll_ptr,/* in: roll pointer from this undo record */ - ulint info_bits,/* in: info bits from this undo record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap, /* in: memory heap from which the memory - needed is allocated */ - upd_t** upd); /* out, own: update vector */ -/*********************************************************************** -Builds a partial row from an update undo log record. It contains the -columns which occur as ordering in any index of the table. */ - -byte* -trx_undo_rec_get_partial_row( -/*=========================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part in update undo log - record of a suitable type, at the start of - the stored index columns; - NOTE that this copy of the undo log record must - be preserved as long as the partial row is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - dtuple_t** row, /* out, own: partial row */ - mem_heap_t* heap); /* in: memory heap from which the memory - needed is allocated */ -/*************************************************************************** -Writes information to an undo log about an insert, update, or a delete marking -of a clustered index record. This information is used in a rollback of the -transaction and in consistent reads that must look to the history of this -transaction. */ - -ulint -trx_undo_report_row_operation( -/*==========================*/ - /* out: DB_SUCCESS or error code */ - ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is - set, does nothing */ - ulint op_type, /* in: TRX_UNDO_INSERT_OP or - TRX_UNDO_MODIFY_OP */ - que_thr_t* thr, /* in: query thread */ - dict_index_t* index, /* in: clustered index */ - dtuple_t* clust_entry, /* in: in the case of an insert, - index entry to insert into the - clustered index, otherwise NULL */ - upd_t* update, /* in: in the case of an update, - the update vector, otherwise NULL */ - ulint cmpl_info, /* in: compiler info on secondary - index updates */ - rec_t* rec, /* in: case of an update or delete - marking, the record in the clustered - index, otherwise NULL */ - dulint* roll_ptr); /* out: rollback pointer to the - inserted undo log record, - ut_dulint_zero if BTR_NO_UNDO_LOG - flag was specified */ -/********************************************************************** -Copies an undo record to heap. This function can be called if we know that -the undo log record exists. */ - -trx_undo_rec_t* -trx_undo_get_undo_rec_low( -/*======================*/ - /* out, own: copy of the record */ - dulint roll_ptr, /* in: roll pointer to record */ - mem_heap_t* heap); /* in: memory heap where copied */ -/********************************************************************** -Copies an undo record to heap. */ - -ulint -trx_undo_get_undo_rec( -/*==================*/ - /* out: DB_SUCCESS, or - DB_MISSING_HISTORY if the undo log - has been truncated and we cannot - fetch the old version; NOTE: the - caller must have latches on the - clustered index page and purge_view */ - dulint roll_ptr, /* in: roll pointer to record */ - dulint trx_id, /* in: id of the trx that generated - the roll pointer: it points to an - undo log of this transaction */ - trx_undo_rec_t** undo_rec, /* out, own: copy of the record */ - mem_heap_t* heap); /* in: memory heap where copied */ -/*********************************************************************** -Build a previous version of a clustered index record. This function checks -that the caller has a latch on the index page of the clustered index record -and an s-latch on the purge_view. This guarantees that the stack of versions -is locked. */ - -ulint -trx_undo_prev_version_build( -/*========================*/ - /* out: DB_SUCCESS, or DB_MISSING_HISTORY if - the previous version is not >= purge_view, - which means that it may have been removed, - DB_ERROR if corrupted record */ - rec_t* index_rec,/* in: clustered index record in the - index tree */ - mtr_t* index_mtr,/* in: mtr which contains the latch to - index_rec page and purge_view */ - rec_t* rec, /* in: version of a clustered index record */ - dict_index_t* index, /* in: clustered index */ - ulint* offsets,/* in: rec_get_offsets(rec, index) */ - mem_heap_t* heap, /* in: memory heap from which the memory - needed is allocated */ - rec_t** old_vers);/* out, own: previous version, or NULL if - rec is the first inserted version, or if - history data has been deleted */ -/*************************************************************** -Parses a redo log record of adding an undo log record. */ - -byte* -trx_undo_parse_add_undo_rec( -/*========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ -/*************************************************************** -Parses a redo log record of erasing of an undo page end. */ - -byte* -trx_undo_parse_erase_page_end( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ - -/* Types of an undo log record: these have to be smaller than 16, as the -compilation info multiplied by 16 is ORed to this value in an undo log -record */ -#define TRX_UNDO_INSERT_REC 11 /* fresh insert into clustered index */ -#define TRX_UNDO_UPD_EXIST_REC 12 /* update of a non-delete-marked - record */ -#define TRX_UNDO_UPD_DEL_REC 13 /* update of a delete marked record to - a not delete marked record; also the - fields of the record can change */ -#define TRX_UNDO_DEL_MARK_REC 14 /* delete marking of a record; fields - do not change */ -#define TRX_UNDO_CMPL_INFO_MULT 16 /* compilation info is multiplied by - this and ORed to the type above */ -#define TRX_UNDO_UPD_EXTERN 128 /* This bit can be ORed to type_cmpl - to denote that we updated external - storage fields: used by purge to - free the external storage */ - -/* Operation type flags used in trx_undo_report_row_operation */ -#define TRX_UNDO_INSERT_OP 1 -#define TRX_UNDO_MODIFY_OP 2 - -#ifndef UNIV_NONINL -#include "trx0rec.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0rec.ic b/storage/innobase/include/trx0rec.ic deleted file mode 100644 index a1ddc127ec7..00000000000 --- a/storage/innobase/include/trx0rec.ic +++ /dev/null @@ -1,86 +0,0 @@ -/****************************************************** -Transaction undo log record - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -/************************************************************************** -Reads from an undo log record the record type. */ -UNIV_INLINE -ulint -trx_undo_rec_get_type( -/*==================*/ - /* out: record type */ - trx_undo_rec_t* undo_rec) /* in: undo log record */ -{ - return(mach_read_from_1(undo_rec + 2) & (TRX_UNDO_CMPL_INFO_MULT - 1)); -} - -/************************************************************************** -Reads from an undo log record the record compiler info. */ -UNIV_INLINE -ulint -trx_undo_rec_get_cmpl_info( -/*=======================*/ - /* out: compiler info */ - trx_undo_rec_t* undo_rec) /* in: undo log record */ -{ - return(mach_read_from_1(undo_rec + 2) / TRX_UNDO_CMPL_INFO_MULT); -} - -/************************************************************************** -Returns TRUE if an undo log record contains an extern storage field. */ -UNIV_INLINE -ibool -trx_undo_rec_get_extern_storage( -/*============================*/ - /* out: TRUE if extern */ - trx_undo_rec_t* undo_rec) /* in: undo log record */ -{ - if (mach_read_from_1(undo_rec + 2) & TRX_UNDO_UPD_EXTERN) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************** -Reads the undo log record number. */ -UNIV_INLINE -dulint -trx_undo_rec_get_undo_no( -/*=====================*/ - /* out: undo no */ - trx_undo_rec_t* undo_rec) /* in: undo log record */ -{ - byte* ptr; - - ptr = undo_rec + 3; - - return(mach_dulint_read_much_compressed(ptr)); -} - -/*************************************************************************** -Copies the undo record to the heap. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_rec_copy( -/*==============*/ - /* out, own: copy of undo log record */ - trx_undo_rec_t* undo_rec, /* in: undo log record */ - mem_heap_t* heap) /* in: heap where copied */ -{ - ulint len; - trx_undo_rec_t* rec_copy; - - len = mach_read_from_2(undo_rec) + buf_frame_align(undo_rec) - - undo_rec; - rec_copy = mem_heap_alloc(heap, len); - - ut_memcpy(rec_copy, undo_rec, len); - - return(rec_copy); -} diff --git a/storage/innobase/include/trx0roll.h b/storage/innobase/include/trx0roll.h deleted file mode 100644 index c1eca3d5753..00000000000 --- a/storage/innobase/include/trx0roll.h +++ /dev/null @@ -1,314 +0,0 @@ -/****************************************************** -Transaction rollback - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0roll_h -#define trx0roll_h - -#include "univ.i" -#include "trx0trx.h" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" - -#define trx_roll_free_all_savepoints(s) trx_roll_savepoints_free((s), NULL) - -/*********************************************************************** -Returns a transaction savepoint taken at this point in time. */ - -trx_savept_t -trx_savept_take( -/*============*/ - /* out: savepoint */ - trx_t* trx); /* in: transaction */ -/*********************************************************************** -Creates an undo number array. */ - -trx_undo_arr_t* -trx_undo_arr_create(void); -/*=====================*/ -/*********************************************************************** -Frees an undo number array. */ - -void -trx_undo_arr_free( -/*==============*/ - trx_undo_arr_t* arr); /* in: undo number array */ -/*********************************************************************** -Returns pointer to nth element in an undo number array. */ -UNIV_INLINE -trx_undo_inf_t* -trx_undo_arr_get_nth_info( -/*======================*/ - /* out: pointer to the nth element */ - trx_undo_arr_t* arr, /* in: undo number array */ - ulint n); /* in: position */ -/*************************************************************************** -Tries truncate the undo logs. */ - -void -trx_roll_try_truncate( -/*==================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************ -Pops the topmost record when the two undo logs of a transaction are seen -as a single stack of records ordered by their undo numbers. Inserts the -undo number of the popped undo record to the array of currently processed -undo numbers in the transaction. When the query thread finishes processing -of this undo record, it must be released with trx_undo_rec_release. */ - -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx( -/*========================*/ - /* out: undo log record copied to heap, NULL - if none left, or if the undo number of the - top record would be less than the limit */ - trx_t* trx, /* in: transaction */ - dulint limit, /* in: least undo number we need */ - dulint* roll_ptr,/* out: roll pointer to undo record */ - mem_heap_t* heap); /* in: memory heap where copied */ -/************************************************************************ -Reserves an undo log record for a query thread to undo. This should be -called if the query thread gets the undo log record not using the pop -function above. */ - -ibool -trx_undo_rec_reserve( -/*=================*/ - /* out: TRUE if succeeded */ - trx_t* trx, /* in: transaction */ - dulint undo_no);/* in: undo number of the record */ -/*********************************************************************** -Releases a reserved undo record. */ - -void -trx_undo_rec_release( -/*=================*/ - trx_t* trx, /* in: transaction */ - dulint undo_no);/* in: undo number */ -/************************************************************************* -Starts a rollback operation. */ - -void -trx_rollback( -/*=========*/ - trx_t* trx, /* in: transaction */ - trx_sig_t* sig, /* in: signal starting the rollback */ - que_thr_t** next_thr);/* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/*********************************************************************** -Rollback or clean up transactions which have no user session. If the -transaction already was committed, then we clean up a possible insert -undo log. If the transaction was not yet committed, then we roll it back. -Note: this is done in a background thread. */ - -os_thread_ret_t -trx_rollback_or_clean_all_without_sess( -/*===================================*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))); - /* in: a dummy parameter required by - os_thread_create */ -/******************************************************************** -Finishes a transaction rollback. */ - -void -trx_finish_rollback_off_kernel( -/*===========================*/ - que_t* graph, /* in: undo graph which can now be freed */ - trx_t* trx, /* in: transaction */ - que_thr_t** next_thr);/* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if this parameter is - NULL, it is ignored */ -/******************************************************************** -Builds an undo 'query' graph for a transaction. The actual rollback is -performed by executing this query graph like a query subprocedure call. -The reply about the completion of the rollback will be sent by this -graph. */ - -que_t* -trx_roll_graph_build( -/*=================*/ - /* out, own: the query graph */ - trx_t* trx); /* in: trx handle */ -/************************************************************************* -Creates a rollback command node struct. */ - -roll_node_t* -roll_node_create( -/*=============*/ - /* out, own: rollback node struct */ - mem_heap_t* heap); /* in: mem heap where created */ -/*************************************************************** -Performs an execution step for a rollback command node in a query graph. */ - -que_thr_t* -trx_rollback_step( -/*==============*/ - /* out: query thread to run next, or NULL */ - que_thr_t* thr); /* in: query thread */ -/*********************************************************************** -Rollback a transaction used in MySQL. */ - -int -trx_rollback_for_mysql( -/*===================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx); /* in: transaction handle */ -/*********************************************************************** -Rollback the latest SQL statement for MySQL. */ - -int -trx_rollback_last_sql_stat_for_mysql( -/*=================================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx); /* in: transaction handle */ -/*********************************************************************** -Rollback a transaction used in MySQL. */ - -int -trx_general_rollback_for_mysql( -/*===========================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - ibool partial,/* in: TRUE if partial rollback requested */ - trx_savept_t* savept);/* in: pointer to savepoint undo number, if - partial rollback requested */ -/*********************************************************************** -Rolls back a transaction back to a named savepoint. Modifications after the -savepoint are undone but InnoDB does NOT release the corresponding locks -which are stored in memory. If a lock is 'implicit', that is, a new inserted -row holds a lock where the lock information is carried by the trx id stored in -the row, these locks are naturally released in the rollback. Savepoints which -were set after this savepoint are deleted. */ - -ulint -trx_rollback_to_savepoint_for_mysql( -/*================================*/ - /* out: if no savepoint - of the name found then - DB_NO_SAVEPOINT, - otherwise DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name, /* in: savepoint name */ - ib_longlong* mysql_binlog_cache_pos);/* out: the MySQL binlog cache - position corresponding to this - savepoint; MySQL needs this - information to remove the - binlog entries of the queries - executed after the savepoint */ -/*********************************************************************** -Creates a named savepoint. If the transaction is not yet started, starts it. -If there is already a savepoint of the same name, this call erases that old -savepoint and replaces it with a new. Savepoints are deleted in a transaction -commit or rollback. */ - -ulint -trx_savepoint_for_mysql( -/*====================*/ - /* out: always DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name, /* in: savepoint name */ - ib_longlong binlog_cache_pos); /* in: MySQL binlog cache - position corresponding to this - connection at the time of the - savepoint */ - -/*********************************************************************** -Releases a named savepoint. Savepoints which -were set after this savepoint are deleted. */ - -ulint -trx_release_savepoint_for_mysql( -/*============================*/ - /* out: if no savepoint - of the name found then - DB_NO_SAVEPOINT, - otherwise DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name); /* in: savepoint name */ - -/*********************************************************************** -Frees a single savepoint struct. */ - -void -trx_roll_savepoint_free( -/*=====================*/ - trx_t* trx, /* in: transaction handle */ - trx_named_savept_t* savep); /* in: savepoint to free */ - -/*********************************************************************** -Frees savepoint structs starting from savep, if savep == NULL then -free all savepoints. */ - -void -trx_roll_savepoints_free( -/*=====================*/ - trx_t* trx, /* in: transaction handle */ - trx_named_savept_t* savep); /* in: free all savepoints > this one; - if this is NULL, free all savepoints - of trx */ - -extern sess_t* trx_dummy_sess; - -/* A cell in the array used during a rollback and a purge */ -struct trx_undo_inf_struct{ - dulint trx_no; /* transaction number: not defined during - a rollback */ - dulint undo_no; /* undo number of an undo record */ - ibool in_use; /* TRUE if the cell is in use */ -}; - -/* During a rollback and a purge, undo numbers of undo records currently being -processed are stored in this array */ - -struct trx_undo_arr_struct{ - ulint n_cells; /* number of cells in the array */ - ulint n_used; /* number of cells currently in use */ - trx_undo_inf_t* infos; /* the array of undo infos */ - mem_heap_t* heap; /* memory heap from which allocated */ -}; - -/* Rollback command node in a query graph */ -struct roll_node_struct{ - que_common_t common; /* node type: QUE_NODE_ROLLBACK */ - ulint state; /* node execution state */ - ibool partial;/* TRUE if we want a partial rollback */ - trx_savept_t savept; /* savepoint to which to roll back, in the - case of a partial rollback */ -}; - -/* A savepoint set with SQL's "SAVEPOINT savepoint_id" command */ -struct trx_named_savept_struct{ - char* name; /* savepoint name */ - trx_savept_t savept; /* the undo number corresponding to - the savepoint */ - ib_longlong mysql_binlog_cache_pos; - /* the MySQL binlog cache position - corresponding to this savepoint, not - defined if the MySQL binlogging is not - enabled */ - UT_LIST_NODE_T(trx_named_savept_t) - trx_savepoints; /* the list of savepoints of a - transaction */ -}; - -/* Rollback node states */ -#define ROLL_NODE_SEND 1 -#define ROLL_NODE_WAIT 2 - -#ifndef UNIV_NONINL -#include "trx0roll.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0roll.ic b/storage/innobase/include/trx0roll.ic deleted file mode 100644 index dfde83ac478..00000000000 --- a/storage/innobase/include/trx0roll.ic +++ /dev/null @@ -1,23 +0,0 @@ -/****************************************************** -Transaction rollback - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -/*********************************************************************** -Returns pointer to nth element in an undo number array. */ -UNIV_INLINE -trx_undo_inf_t* -trx_undo_arr_get_nth_info( -/*======================*/ - /* out: pointer to the nth element */ - trx_undo_arr_t* arr, /* in: undo number array */ - ulint n) /* in: position */ -{ - ut_ad(arr); - ut_ad(n < arr->n_cells); - - return(arr->infos + n); -} diff --git a/storage/innobase/include/trx0rseg.h b/storage/innobase/include/trx0rseg.h deleted file mode 100644 index 46ba010bd1d..00000000000 --- a/storage/innobase/include/trx0rseg.h +++ /dev/null @@ -1,193 +0,0 @@ -/****************************************************** -Rollback segment - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0rseg_h -#define trx0rseg_h - -#include "univ.i" -#include "trx0types.h" -#include "trx0sys.h" - -/********************************************************************** -Gets a rollback segment header. */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get( -/*==========*/ - /* out: rollback segment header, page - x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number of the header */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Gets a newly created rollback segment header. */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get_new( -/*==============*/ - /* out: rollback segment header, page - x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number of the header */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************* -Gets the file page number of the nth undo log slot. */ -UNIV_INLINE -ulint -trx_rsegf_get_nth_undo( -/*===================*/ - /* out: page number of the undo log segment */ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - ulint n, /* in: index of slot */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************* -Sets the file page number of the nth undo log slot. */ -UNIV_INLINE -void -trx_rsegf_set_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - ulint n, /* in: index of slot */ - ulint page_no,/* in: page number of the undo log segment */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************** -Looks for a free slot for an undo log segment. */ -UNIV_INLINE -ulint -trx_rsegf_undo_find_free( -/*=====================*/ - /* out: slot index or ULINT_UNDEFINED if not - found */ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Looks for a rollback segment, based on the rollback segment id. */ - -trx_rseg_t* -trx_rseg_get_on_id( -/*===============*/ - /* out: rollback segment */ - ulint id); /* in: rollback segment id */ -/******************************************************************** -Creates a rollback segment header. This function is called only when -a new rollback segment is created in the database. */ - -ulint -trx_rseg_header_create( -/*===================*/ - /* out: page number of the created segment, - FIL_NULL if fail */ - ulint space, /* in: space id */ - ulint max_size, /* in: max size in pages */ - ulint* slot_no, /* out: rseg id == slot number in trx sys */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************* -Creates the memory copies for rollback segments and initializes the -rseg list and array in trx_sys at a database startup. */ - -void -trx_rseg_list_and_array_init( -/*=========================*/ - trx_sysf_t* sys_header, /* in: trx system header */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************** -Creates a new rollback segment to the database. */ - -trx_rseg_t* -trx_rseg_create( -/*============*/ - /* out: the created segment object, NULL if - fail */ - ulint space, /* in: space id */ - ulint max_size, /* in: max size in pages */ - ulint* id, /* out: rseg id */ - mtr_t* mtr); /* in: mtr */ - - -/* Number of undo log slots in a rollback segment file copy */ -#define TRX_RSEG_N_SLOTS 1024 - -/* Maximum number of transactions supported by a single rollback segment */ -#define TRX_RSEG_MAX_N_TRXS (TRX_RSEG_N_SLOTS / 2) - -/* The rollback segment memory object */ -struct trx_rseg_struct{ - /*--------------------------------------------------------*/ - ulint id; /* rollback segment id == the index of - its slot in the trx system file copy */ - mutex_t mutex; /* mutex protecting the fields in this - struct except id; NOTE that the latching - order must always be kernel mutex -> - rseg mutex */ - ulint space; /* space where the rollback segment is - header is placed */ - ulint page_no;/* page number of the rollback segment - header */ - ulint max_size;/* maximum allowed size in pages */ - ulint curr_size;/* current size in pages */ - /*--------------------------------------------------------*/ - /* Fields for update undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_list; - /* List of update undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) update_undo_cached; - /* List of update undo log segments - cached for fast reuse */ - /*--------------------------------------------------------*/ - /* Fields for insert undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_list; - /* List of insert undo logs */ - UT_LIST_BASE_NODE_T(trx_undo_t) insert_undo_cached; - /* List of insert undo log segments - cached for fast reuse */ - /*--------------------------------------------------------*/ - ulint last_page_no; /* Page number of the last not yet - purged log header in the history list; - FIL_NULL if all list purged */ - ulint last_offset; /* Byte offset of the last not yet - purged log header */ - dulint last_trx_no; /* Transaction number of the last not - yet purged log */ - ibool last_del_marks; /* TRUE if the last not yet purged log - needs purging */ - /*--------------------------------------------------------*/ - UT_LIST_NODE_T(trx_rseg_t) rseg_list; - /* the list of the rollback segment - memory objects */ -}; - -/* Undo log segment slot in a rollback segment header */ -/*-------------------------------------------------------------*/ -#define TRX_RSEG_SLOT_PAGE_NO 0 /* Page number of the header page of - an undo log segment */ -/*-------------------------------------------------------------*/ -/* Slot size */ -#define TRX_RSEG_SLOT_SIZE 4 - -/* The offset of the rollback segment header on its page */ -#define TRX_RSEG FSEG_PAGE_DATA - -/* Transaction rollback segment header */ -/*-------------------------------------------------------------*/ -#define TRX_RSEG_MAX_SIZE 0 /* Maximum allowed size for rollback - segment in pages */ -#define TRX_RSEG_HISTORY_SIZE 4 /* Number of file pages occupied - by the logs in the history list */ -#define TRX_RSEG_HISTORY 8 /* The update undo logs for committed - transactions */ -#define TRX_RSEG_FSEG_HEADER (8 + FLST_BASE_NODE_SIZE) - /* Header for the file segment where - this page is placed */ -#define TRX_RSEG_UNDO_SLOTS (8 + FLST_BASE_NODE_SIZE + FSEG_HEADER_SIZE) - /* Undo log segment slots */ -/*-------------------------------------------------------------*/ - -#ifndef UNIV_NONINL -#include "trx0rseg.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0rseg.ic b/storage/innobase/include/trx0rseg.ic deleted file mode 100644 index eb1893587a6..00000000000 --- a/storage/innobase/include/trx0rseg.ic +++ /dev/null @@ -1,126 +0,0 @@ -/****************************************************** -Rollback segment - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "srv0srv.h" - -/********************************************************************** -Gets a rollback segment header. */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get( -/*==========*/ - /* out: rollback segment header, page - x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number of the header */ - mtr_t* mtr) /* in: mtr */ -{ - trx_rsegf_t* header; - - header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(header, SYNC_RSEG_HEADER); -#endif /* UNIV_SYNC_DEBUG */ - - return(header); -} - -/********************************************************************** -Gets a newly created rollback segment header. */ -UNIV_INLINE -trx_rsegf_t* -trx_rsegf_get_new( -/*==============*/ - /* out: rollback segment header, page - x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number of the header */ - mtr_t* mtr) /* in: mtr */ -{ - trx_rsegf_t* header; - - header = TRX_RSEG + buf_page_get(space, page_no, RW_X_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(header, SYNC_RSEG_HEADER_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - return(header); -} - -/******************************************************************* -Gets the file page number of the nth undo log slot. */ -UNIV_INLINE -ulint -trx_rsegf_get_nth_undo( -/*===================*/ - /* out: page number of the undo log segment */ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - ulint n, /* in: index of slot */ - mtr_t* mtr) /* in: mtr */ -{ - if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { - fprintf(stderr, - "InnoDB: Error: trying to get slot %lu of rseg\n", - (ulong) n); - ut_error; - } - - return(mtr_read_ulint(rsegf + TRX_RSEG_UNDO_SLOTS - + n * TRX_RSEG_SLOT_SIZE, MLOG_4BYTES, mtr)); -} - -/******************************************************************* -Sets the file page number of the nth undo log slot. */ -UNIV_INLINE -void -trx_rsegf_set_nth_undo( -/*===================*/ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - ulint n, /* in: index of slot */ - ulint page_no,/* in: page number of the undo log segment */ - mtr_t* mtr) /* in: mtr */ -{ - if (UNIV_UNLIKELY(n >= TRX_RSEG_N_SLOTS)) { - fprintf(stderr, - "InnoDB: Error: trying to set slot %lu of rseg\n", - (ulong) n); - ut_error; - } - - mlog_write_ulint(rsegf + TRX_RSEG_UNDO_SLOTS + n * TRX_RSEG_SLOT_SIZE, - page_no, MLOG_4BYTES, mtr); -} - -/******************************************************************** -Looks for a free slot for an undo log segment. */ -UNIV_INLINE -ulint -trx_rsegf_undo_find_free( -/*=====================*/ - /* out: slot index or ULINT_UNDEFINED if not - found */ - trx_rsegf_t* rsegf, /* in: rollback segment header */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - ulint page_no; - - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { - - page_no = trx_rsegf_get_nth_undo(rsegf, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} diff --git a/storage/innobase/include/trx0sys.h b/storage/innobase/include/trx0sys.h deleted file mode 100644 index a8da5cd51a3..00000000000 --- a/storage/innobase/include/trx0sys.h +++ /dev/null @@ -1,453 +0,0 @@ -/****************************************************** -Transaction system - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0sys_h -#define trx0sys_h - -#include "univ.i" - -#include "trx0types.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "ut0byte.h" -#include "mem0mem.h" -#include "sync0sync.h" -#include "ut0lst.h" -#include "buf0buf.h" -#include "fil0fil.h" -#include "fut0lst.h" -#include "fsp0fsp.h" -#include "read0types.h" - -/* In a MySQL replication slave, in crash recovery we store the master log -file name and position here. We have successfully got the updates to InnoDB -up to this position. If .._pos is -1, it means no crash recovery was needed, -or there was no master log position info inside InnoDB. */ - -extern char trx_sys_mysql_master_log_name[]; -extern ib_longlong trx_sys_mysql_master_log_pos; - -/* If this MySQL server uses binary logging, after InnoDB has been inited -and if it has done a crash recovery, we store the binlog file name and position -here. If .._pos is -1, it means there was no binlog position info inside -InnoDB. */ - -extern char trx_sys_mysql_bin_log_name[]; -extern ib_longlong trx_sys_mysql_bin_log_pos; - -/* The transaction system */ -extern trx_sys_t* trx_sys; - -/* Doublewrite system */ -extern trx_doublewrite_t* trx_doublewrite; -extern ibool trx_doublewrite_must_reset_space_ids; -extern ibool trx_sys_multiple_tablespace_format; - -/******************************************************************** -Creates the doublewrite buffer to a new InnoDB installation. The header of the -doublewrite buffer is placed on the trx system header page. */ - -void -trx_sys_create_doublewrite_buf(void); -/*================================*/ -/******************************************************************** -At a database startup initializes the doublewrite buffer memory structure if -we already have a doublewrite buffer created in the data files. If we are -upgrading to an InnoDB version which supports multiple tablespaces, then this -function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ - -void -trx_sys_doublewrite_init_or_restore_pages( -/*======================================*/ - ibool restore_corrupt_pages); -/******************************************************************** -Marks the trx sys header when we have successfully upgraded to the >= 4.1.x -multiple tablespace format. */ - -void -trx_sys_mark_upgraded_to_multiple_tablespaces(void); -/*===============================================*/ -/******************************************************************** -Determines if a page number is located inside the doublewrite buffer. */ - -ibool -trx_doublewrite_page_inside( -/*========================*/ - /* out: TRUE if the location is inside - the two blocks of the doublewrite buffer */ - ulint page_no); /* in: page number */ -/******************************************************************* -Checks if a page address is the trx sys header page. */ -UNIV_INLINE -ibool -trx_sys_hdr_page( -/*=============*/ - /* out: TRUE if trx sys header page */ - ulint space, /* in: space */ - ulint page_no);/* in: page number */ -/********************************************************************* -Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. */ - -void -trx_sys_init_at_db_start(void); -/*==========================*/ -/********************************************************************* -Creates and initializes the transaction system at the database creation. */ - -void -trx_sys_create(void); -/*================*/ -/******************************************************************** -Looks for a free slot for a rollback segment in the trx system file copy. */ - -ulint -trx_sysf_rseg_find_free( -/*====================*/ - /* out: slot index or ULINT_UNDEFINED - if not found */ - mtr_t* mtr); /* in: mtr */ -/******************************************************************* -Gets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -trx_rseg_t* -trx_sys_get_nth_rseg( -/*=================*/ - /* out: pointer to rseg object, NULL if slot - not in use */ - trx_sys_t* sys, /* in: trx system */ - ulint n); /* in: index of slot */ -/******************************************************************* -Sets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -void -trx_sys_set_nth_rseg( -/*=================*/ - trx_sys_t* sys, /* in: trx system */ - ulint n, /* in: index of slot */ - trx_rseg_t* rseg); /* in: pointer to rseg object, NULL if slot - not in use */ -/************************************************************************** -Gets a pointer to the transaction system file copy and x-locks its page. */ -UNIV_INLINE -trx_sysf_t* -trx_sysf_get( -/*=========*/ - /* out: pointer to system file copy, page x-locked */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Gets the space of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_space( -/*====================*/ - /* out: space id */ - trx_sysf_t* sys_header, /* in: trx sys file copy */ - ulint i, /* in: slot index == rseg id */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Gets the page number of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_page_no( -/*======================*/ - /* out: page number, FIL_NULL - if slot unused */ - trx_sysf_t* sys_header, /* in: trx sys file copy */ - ulint i, /* in: slot index == rseg id */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Sets the space id of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_space( -/*====================*/ - trx_sysf_t* sys_header, /* in: trx sys file copy */ - ulint i, /* in: slot index == rseg id */ - ulint space, /* in: space id */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Sets the page number of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_page_no( -/*======================*/ - trx_sysf_t* sys_header, /* in: trx sys file copy */ - ulint i, /* in: slot index == rseg id */ - ulint page_no, /* in: page number, FIL_NULL if - the slot is reset to unused */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Allocates a new transaction id. */ -UNIV_INLINE -dulint -trx_sys_get_new_trx_id(void); -/*========================*/ - /* out: new, allocated trx id */ -/********************************************************************* -Allocates a new transaction number. */ -UNIV_INLINE -dulint -trx_sys_get_new_trx_no(void); -/*========================*/ - /* out: new, allocated trx number */ -/********************************************************************* -Writes a trx id to an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_trx_id( -/*=============*/ - byte* ptr, /* in: pointer to memory where written */ - dulint id); /* in: id */ -/********************************************************************* -Reads a trx id from an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_read_... */ -UNIV_INLINE -dulint -trx_read_trx_id( -/*============*/ - /* out: id */ - byte* ptr); /* in: pointer to memory from where to read */ -/******************************************************************** -Looks for the trx handle with the given id in trx_list. */ -UNIV_INLINE -trx_t* -trx_get_on_id( -/*==========*/ - /* out: the trx handle or NULL if not found */ - dulint trx_id); /* in: trx id to search for */ -/******************************************************************** -Returns the minumum trx id in trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->conc_state to -find out if the minimum trx id transaction itself is active, or already -committed.) */ -UNIV_INLINE -dulint -trx_list_get_min_trx_id(void); -/*=========================*/ - /* out: the minimum trx id, or trx_sys->max_trx_id - if the trx list is empty */ -/******************************************************************** -Checks if a transaction with the given id is active. */ -UNIV_INLINE -ibool -trx_is_active( -/*==========*/ - /* out: TRUE if active */ - dulint trx_id);/* in: trx id of the transaction */ -/******************************************************************** -Checks that trx is in the trx list. */ - -ibool -trx_in_trx_list( -/*============*/ - /* out: TRUE if is in */ - trx_t* in_trx);/* in: trx */ -/********************************************************************* -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ - -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/* in: MySQL log file name */ - ib_longlong offset, /* in: position in that log file */ - ulint field, /* in: offset of the MySQL log info field in - the trx sys header */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************* -Prints to stderr the MySQL binlog offset info in the trx system header if -the magic number shows it valid. */ - -void -trx_sys_print_mysql_binlog_offset(void); -/*===================================*/ -#ifdef UNIV_HOTBACKUP -/********************************************************************* -Prints to stderr the MySQL binlog info in the system header if the -magic number shows it valid. */ - -void -trx_sys_print_mysql_binlog_offset_from_page( -/*========================================*/ - byte* page); /* in: buffer containing the trx system header page, - i.e., page number TRX_SYS_PAGE_NO in the tablespace */ -#endif /* UNIV_HOTBACKUP */ -/********************************************************************* -Prints to stderr the MySQL master log offset info in the trx system header if -the magic number shows it valid. */ - -void -trx_sys_print_mysql_master_log_pos(void); -/*====================================*/ - -/* The automatically created system rollback segment has this id */ -#define TRX_SYS_SYSTEM_RSEG_ID 0 - -/* Space id and page no where the trx system file copy resides */ -#define TRX_SYS_SPACE 0 /* the SYSTEM tablespace */ -#define TRX_SYS_PAGE_NO FSP_TRX_SYS_PAGE_NO - -/* The offset of the transaction system header on the page */ -#define TRX_SYS FSEG_PAGE_DATA - -/* Transaction system header */ -/*-------------------------------------------------------------*/ -#define TRX_SYS_TRX_ID_STORE 0 /* the maximum trx id or trx number - modulo TRX_SYS_TRX_ID_UPDATE_MARGIN - written to a file page by any - transaction; the assignment of - transaction ids continues from this - number rounded up by .._MARGIN plus - .._MARGIN when the database is - started */ -#define TRX_SYS_FSEG_HEADER 8 /* segment header for the tablespace - segment the trx system is created - into */ -#define TRX_SYS_RSEGS (8 + FSEG_HEADER_SIZE) - /* the start of the array of rollback - segment specification slots */ -/*-------------------------------------------------------------*/ - -/* Max number of rollback segments: the number of segment specification slots -in the transaction system array; rollback segment id must fit in one byte, -therefore 256; each slot is currently 8 bytes in size */ -#define TRX_SYS_N_RSEGS 256 - -#define TRX_SYS_MYSQL_LOG_NAME_LEN 512 -#define TRX_SYS_MYSQL_LOG_MAGIC_N 873422344 - -/* The offset of the MySQL replication info in the trx system header; -this contains the same fields as TRX_SYS_MYSQL_LOG_INFO below */ -#define TRX_SYS_MYSQL_MASTER_LOG_INFO (UNIV_PAGE_SIZE - 2000) - -/* The offset of the MySQL binlog offset info in the trx system header */ -#define TRX_SYS_MYSQL_LOG_INFO (UNIV_PAGE_SIZE - 1000) -#define TRX_SYS_MYSQL_LOG_MAGIC_N_FLD 0 /* magic number which shows - if we have valid data in the - MySQL binlog info; the value - is ..._MAGIC_N if yes */ -#define TRX_SYS_MYSQL_LOG_OFFSET_HIGH 4 /* high 4 bytes of the offset - within that file */ -#define TRX_SYS_MYSQL_LOG_OFFSET_LOW 8 /* low 4 bytes of the offset - within that file */ -#define TRX_SYS_MYSQL_LOG_NAME 12 /* MySQL log file name */ - -/* The offset of the doublewrite buffer header on the trx system header page */ -#define TRX_SYS_DOUBLEWRITE (UNIV_PAGE_SIZE - 200) -/*-------------------------------------------------------------*/ -#define TRX_SYS_DOUBLEWRITE_FSEG 0 /* fseg header of the fseg - containing the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_MAGIC FSEG_HEADER_SIZE - /* 4-byte magic number which - shows if we already have - created the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_BLOCK1 (4 + FSEG_HEADER_SIZE) - /* page number of the - first page in the first - sequence of 64 - (= FSP_EXTENT_SIZE) consecutive - pages in the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_BLOCK2 (8 + FSEG_HEADER_SIZE) - /* page number of the - first page in the second - sequence of 64 consecutive - pages in the doublewrite - buffer */ -#define TRX_SYS_DOUBLEWRITE_REPEAT 12 /* we repeat the above 3 - numbers so that if the trx - sys header is half-written - to disk, we still may be able - to recover the information */ -#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED (24 + FSEG_HEADER_SIZE) - /* If this is not yet set to - .._N, we must reset the - doublewrite buffer, because - starting from 4.1.x the space - id of a data page is stored to - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO */ -/*-------------------------------------------------------------*/ -#define TRX_SYS_DOUBLEWRITE_MAGIC_N 536853855 -#define TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N 1783657386 - - -#define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE FSP_EXTENT_SIZE - -/* Doublewrite control struct */ -struct trx_doublewrite_struct{ - mutex_t mutex; /* mutex protecting the first_free field and - write_buf */ - ulint block1; /* the page number of the first - doublewrite block (64 pages) */ - ulint block2; /* page number of the second block */ - ulint first_free; /* first free position in write_buf measured - in units of UNIV_PAGE_SIZE */ - byte* write_buf; /* write buffer used in writing to the - doublewrite buffer, aligned to an - address divisible by UNIV_PAGE_SIZE - (which is required by Windows aio) */ - byte* write_buf_unaligned; /* pointer to write_buf, but unaligned */ - buf_block_t** - buf_block_arr; /* array to store pointers to the buffer - blocks which have been cached to write_buf */ -}; - -/* The transaction system central memory data structure; protected by the -kernel mutex */ -struct trx_sys_struct{ - dulint max_trx_id; /* The smallest number not yet - assigned as a transaction id or - transaction number */ - UT_LIST_BASE_NODE_T(trx_t) trx_list; - /* List of active and committed in - memory transactions, sorted on trx id, - biggest first */ - UT_LIST_BASE_NODE_T(trx_t) mysql_trx_list; - /* List of transactions created - for MySQL */ - UT_LIST_BASE_NODE_T(trx_rseg_t) rseg_list; - /* List of rollback segment objects */ - trx_rseg_t* latest_rseg; /* Latest rollback segment in the - round-robin assignment of rollback - segments to transactions */ - trx_rseg_t* rseg_array[TRX_SYS_N_RSEGS]; - /* Pointer array to rollback segments; - NULL if slot not in use */ - ulint rseg_history_len;/* Length of the TRX_RSEG_HISTORY - list (update undo logs for committed - transactions), protected by - rseg->mutex */ - UT_LIST_BASE_NODE_T(read_view_t) view_list; - /* List of read views sorted on trx no, - biggest first */ -}; - -/* When a trx id which is zero modulo this number (which must be a power of -two) is assigned, the field TRX_SYS_TRX_ID_STORE on the transaction system -page is updated */ -#define TRX_SYS_TRX_ID_WRITE_MARGIN 256 - -#ifndef UNIV_NONINL -#include "trx0sys.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0sys.ic b/storage/innobase/include/trx0sys.ic deleted file mode 100644 index 86b71df08d6..00000000000 --- a/storage/innobase/include/trx0sys.ic +++ /dev/null @@ -1,366 +0,0 @@ -/****************************************************** -Transaction system - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "srv0srv.h" -#include "trx0trx.h" -#include "data0type.h" - -/* The typedef for rseg slot in the file copy */ -typedef byte trx_sysf_rseg_t; - -/* Rollback segment specification slot offsets */ -/*-------------------------------------------------------------*/ -#define TRX_SYS_RSEG_SPACE 0 /* space where the the segment - header is placed; starting with - MySQL/InnoDB 5.1.7, this is - UNIV_UNDEFINED if the slot is unused */ -#define TRX_SYS_RSEG_PAGE_NO 4 /* page number where the the segment - header is placed; this is FIL_NULL - if the slot is unused */ -/*-------------------------------------------------------------*/ -/* Size of a rollback segment specification slot */ -#define TRX_SYS_RSEG_SLOT_SIZE 8 - -/********************************************************************* -Writes the value of max_trx_id to the file based trx system header. */ - -void -trx_sys_flush_max_trx_id(void); -/*==========================*/ - -/******************************************************************* -Checks if a page address is the trx sys header page. */ -UNIV_INLINE -ibool -trx_sys_hdr_page( -/*=============*/ - /* out: TRUE if trx sys header page */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - if ((space == TRX_SYS_SPACE) && (page_no == TRX_SYS_PAGE_NO)) { - - return(TRUE); - } - - return(FALSE); -} - -/******************************************************************* -Gets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -trx_rseg_t* -trx_sys_get_nth_rseg( -/*=================*/ - /* out: pointer to rseg object, NULL if slot - not in use */ - trx_sys_t* sys, /* in: trx system */ - ulint n) /* in: index of slot */ -{ - ut_ad(mutex_own(&(kernel_mutex))); - ut_ad(n < TRX_SYS_N_RSEGS); - - return(sys->rseg_array[n]); -} - -/******************************************************************* -Sets the pointer in the nth slot of the rseg array. */ -UNIV_INLINE -void -trx_sys_set_nth_rseg( -/*=================*/ - trx_sys_t* sys, /* in: trx system */ - ulint n, /* in: index of slot */ - trx_rseg_t* rseg) /* in: pointer to rseg object, NULL if slot - not in use */ -{ - ut_ad(n < TRX_SYS_N_RSEGS); - - sys->rseg_array[n] = rseg; -} - -/************************************************************************** -Gets a pointer to the transaction system header and x-latches its page. */ -UNIV_INLINE -trx_sysf_t* -trx_sysf_get( -/*=========*/ - /* out: pointer to system header, page x-latched. */ - mtr_t* mtr) /* in: mtr */ -{ - trx_sysf_t* header; - - ut_ad(mtr); - - header = TRX_SYS + buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, - RW_X_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(header, SYNC_TRX_SYS_HEADER); -#endif /* UNIV_SYNC_DEBUG */ - - return(header); -} - -/********************************************************************* -Gets the space of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_space( -/*====================*/ - /* out: space id */ - trx_sysf_t* sys_header, /* in: trx sys header */ - ulint i, /* in: slot index == rseg id */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mutex_own(&(kernel_mutex))); - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE, MLOG_4BYTES, mtr)); -} - -/********************************************************************* -Gets the page number of the nth rollback segment slot in the trx system -header. */ -UNIV_INLINE -ulint -trx_sysf_rseg_get_page_no( -/*======================*/ - /* out: page number, FIL_NULL - if slot unused */ - trx_sysf_t* sys_header, /* in: trx system header */ - ulint i, /* in: slot index == rseg id */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(sys_header); - ut_ad(mutex_own(&(kernel_mutex))); - ut_ad(i < TRX_SYS_N_RSEGS); - - return(mtr_read_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_PAGE_NO, MLOG_4BYTES, mtr)); -} - -/********************************************************************* -Sets the space id of the nth rollback segment slot in the trx system -file copy. */ -UNIV_INLINE -void -trx_sysf_rseg_set_space( -/*====================*/ - trx_sysf_t* sys_header, /* in: trx sys file copy */ - ulint i, /* in: slot index == rseg id */ - ulint space, /* in: space id */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mutex_own(&(kernel_mutex))); - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - mlog_write_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE, - space, - MLOG_4BYTES, mtr); -} - -/********************************************************************* -Sets the page number of the nth rollback segment slot in the trx system -header. */ -UNIV_INLINE -void -trx_sysf_rseg_set_page_no( -/*======================*/ - trx_sysf_t* sys_header, /* in: trx sys header */ - ulint i, /* in: slot index == rseg id */ - ulint page_no, /* in: page number, FIL_NULL if the - slot is reset to unused */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mutex_own(&(kernel_mutex))); - ut_ad(sys_header); - ut_ad(i < TRX_SYS_N_RSEGS); - - mlog_write_ulint(sys_header + TRX_SYS_RSEGS - + i * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_PAGE_NO, - page_no, - MLOG_4BYTES, mtr); -} - -/********************************************************************* -Writes a trx id to an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_trx_id( -/*=============*/ - byte* ptr, /* in: pointer to memory where written */ - dulint id) /* in: id */ -{ - ut_ad(DATA_TRX_ID_LEN == 6); - - mach_write_to_6(ptr, id); -} - -/********************************************************************* -Reads a trx id from an index page. In case that the id size changes in -some future version, this function should be used instead of -mach_read_... */ -UNIV_INLINE -dulint -trx_read_trx_id( -/*============*/ - /* out: id */ - byte* ptr) /* in: pointer to memory from where to read */ -{ - ut_ad(DATA_TRX_ID_LEN == 6); - - return(mach_read_from_6(ptr)); -} - -/******************************************************************** -Looks for the trx handle with the given id in trx_list. */ -UNIV_INLINE -trx_t* -trx_get_on_id( -/*==========*/ - /* out: the trx handle or NULL if not found */ - dulint trx_id) /* in: trx id to search for */ -{ - trx_t* trx; - - ut_ad(mutex_own(&(kernel_mutex))); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx != NULL) { - if (0 == ut_dulint_cmp(trx_id, trx->id)) { - - return(trx); - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - return(NULL); -} - -/******************************************************************** -Returns the minumum trx id in trx list. This is the smallest id for which -the trx can possibly be active. (But, you must look at the trx->conc_state to -find out if the minimum trx id transaction itself is active, or already -committed.) */ -UNIV_INLINE -dulint -trx_list_get_min_trx_id(void) -/*=========================*/ - /* out: the minimum trx id, or trx_sys->max_trx_id - if the trx list is empty */ -{ - trx_t* trx; - - ut_ad(mutex_own(&(kernel_mutex))); - - trx = UT_LIST_GET_LAST(trx_sys->trx_list); - - if (trx == NULL) { - - return(trx_sys->max_trx_id); - } - - return(trx->id); -} - -/******************************************************************** -Checks if a transaction with the given id is active. */ -UNIV_INLINE -ibool -trx_is_active( -/*==========*/ - /* out: TRUE if active */ - dulint trx_id) /* in: trx id of the transaction */ -{ - trx_t* trx; - - ut_ad(mutex_own(&(kernel_mutex))); - - if (ut_dulint_cmp(trx_id, trx_list_get_min_trx_id()) < 0) { - - return(FALSE); - } - - if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { - - /* There must be corruption: we return TRUE because this - function is only called by lock_clust_rec_some_has_impl() - and row_vers_impl_x_locked_off_kernel() and they have - diagnostic prints in this case */ - - return(TRUE); - } - - trx = trx_get_on_id(trx_id); - if (trx && (trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED)) { - - return(TRUE); - } - - return(FALSE); -} - -/********************************************************************* -Allocates a new transaction id. */ -UNIV_INLINE -dulint -trx_sys_get_new_trx_id(void) -/*========================*/ - /* out: new, allocated trx id */ -{ - dulint id; - - ut_ad(mutex_own(&kernel_mutex)); - - /* VERY important: after the database is started, max_trx_id value is - divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the following if - will evaluate to TRUE when this function is first time called, - and the value for trx id will be written to disk-based header! - Thus trx id values will not overlap when the database is - repeatedly started! */ - - if (ut_dulint_get_low(trx_sys->max_trx_id) - % TRX_SYS_TRX_ID_WRITE_MARGIN == 0) { - - trx_sys_flush_max_trx_id(); - } - - id = trx_sys->max_trx_id; - - UT_DULINT_INC(trx_sys->max_trx_id); - - return(id); -} - -/********************************************************************* -Allocates a new transaction number. */ -UNIV_INLINE -dulint -trx_sys_get_new_trx_no(void) -/*========================*/ - /* out: new, allocated trx number */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - return(trx_sys_get_new_trx_id()); -} diff --git a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h deleted file mode 100644 index f0833bc6f21..00000000000 --- a/storage/innobase/include/trx0trx.h +++ /dev/null @@ -1,713 +0,0 @@ -/****************************************************** -The transaction - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0trx_h -#define trx0trx_h - -#include "univ.i" -#include "trx0types.h" -#include "lock0types.h" -#include "usr0types.h" -#include "que0types.h" -#include "mem0mem.h" -#include "read0types.h" -#include "dict0types.h" -#include "trx0xa.h" - -extern ulint trx_n_mysql_transactions; - -/************************************************************************ -Releases the search latch if trx has reserved it. */ - -void -trx_search_latch_release_if_reserved( -/*=================================*/ - trx_t* trx); /* in: transaction */ -/********************************************************************** -Set detailed error message for the transaction. */ -void -trx_set_detailed_error( -/*===================*/ - trx_t* trx, /* in: transaction struct */ - const char* msg); /* in: detailed error message */ -/***************************************************************** -Set detailed error message for the transaction from a file. Note that the -file is rewinded before reading from it. */ - -void -trx_set_detailed_error_from_file( -/*=============================*/ - trx_t* trx, /* in: transaction struct */ - FILE* file); /* in: file to read message from */ -/******************************************************************** -Retrieves the error_info field from a trx. */ - -void* -trx_get_error_info( -/*===============*/ - /* out: the error info */ - trx_t* trx); /* in: trx object */ -/******************************************************************** -Creates and initializes a transaction object. */ - -trx_t* -trx_create( -/*=======*/ - /* out, own: the transaction */ - sess_t* sess); /* in: session or NULL */ -/************************************************************************ -Creates a transaction object for MySQL. */ - -trx_t* -trx_allocate_for_mysql(void); -/*========================*/ - /* out, own: transaction object */ -/************************************************************************ -Creates a transaction object for background operations by the master thread. */ - -trx_t* -trx_allocate_for_background(void); -/*=============================*/ - /* out, own: transaction object */ -/************************************************************************ -Frees a transaction object. */ - -void -trx_free( -/*=====*/ - trx_t* trx); /* in, own: trx object */ -/************************************************************************ -Frees a transaction object for MySQL. */ - -void -trx_free_for_mysql( -/*===============*/ - trx_t* trx); /* in, own: trx object */ -/************************************************************************ -Frees a transaction object of a background operation of the master thread. */ - -void -trx_free_for_background( -/*====================*/ - trx_t* trx); /* in, own: trx object */ -/******************************************************************** -Creates trx objects for transactions and initializes the trx list of -trx_sys at database start. Rollback segment and undo log lists must -already exist when this function is called, because the lists of -transactions to be rolled back or cleaned up are built based on the -undo log lists. */ - -void -trx_lists_init_at_db_start(void); -/*============================*/ -/******************************************************************** -Starts a new transaction. */ - -ibool -trx_start( -/*======*/ - /* out: TRUE if success, FALSE if the rollback - segment could not support this many transactions */ - trx_t* trx, /* in: transaction */ - ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -/******************************************************************** -Starts a new transaction. */ - -ibool -trx_start_low( -/*==========*/ - /* out: TRUE */ - trx_t* trx, /* in: transaction */ - ulint rseg_id);/* in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -/***************************************************************** -Starts the transaction if it is not yet started. */ -UNIV_INLINE -void -trx_start_if_not_started( -/*=====================*/ - trx_t* trx); /* in: transaction */ -/***************************************************************** -Starts the transaction if it is not yet started. Assumes we have reserved -the kernel mutex! */ -UNIV_INLINE -void -trx_start_if_not_started_low( -/*=========================*/ - trx_t* trx); /* in: transaction */ -/***************************************************************** -Starts the transaction if it is not yet started. */ - -void -trx_start_if_not_started_noninline( -/*===============================*/ - trx_t* trx); /* in: transaction */ -/******************************************************************** -Commits a transaction. */ - -void -trx_commit_off_kernel( -/*==================*/ - trx_t* trx); /* in: transaction */ -/******************************************************************** -Cleans up a transaction at database startup. The cleanup is needed if -the transaction already got to the middle of a commit when the database -crashed, andf we cannot roll it back. */ - -void -trx_cleanup_at_db_startup( -/*======================*/ - trx_t* trx); /* in: transaction */ -/************************************************************************** -Does the transaction commit for MySQL. */ - -ulint -trx_commit_for_mysql( -/*=================*/ - /* out: 0 or error number */ - trx_t* trx); /* in: trx handle */ -/************************************************************************** -Does the transaction prepare for MySQL. */ - -ulint -trx_prepare_for_mysql( -/*==================*/ - /* out: 0 or error number */ - trx_t* trx); /* in: trx handle */ -/************************************************************************** -This function is used to find number of prepared transactions and -their transaction objects for a recovery. */ - -int -trx_recover_for_mysql( -/*==================*/ - /* out: number of prepared transactions */ - XID* xid_list, /* in/out: prepared transactions */ - ulint len); /* in: number of slots in xid_list */ -/*********************************************************************** -This function is used to find one X/Open XA distributed transaction -which is in the prepared state */ -trx_t * -trx_get_trx_by_xid( -/*===============*/ - /* out: trx or NULL */ - XID* xid); /* in: X/Open XA transaction identification */ -/************************************************************************** -If required, flushes the log to disk if we called trx_commit_for_mysql() -with trx->flush_log_later == TRUE. */ - -ulint -trx_commit_complete_for_mysql( -/*==========================*/ - /* out: 0 or error number */ - trx_t* trx); /* in: trx handle */ -/************************************************************************** -Marks the latest SQL statement ended. */ - -void -trx_mark_sql_stat_end( -/*==================*/ - trx_t* trx); /* in: trx handle */ -/************************************************************************ -Assigns a read view for a consistent read query. All the consistent reads -within the same transaction will get the same read view, which is created -when this function is first called for a new started transaction. */ - -read_view_t* -trx_assign_read_view( -/*=================*/ - /* out: consistent read view */ - trx_t* trx); /* in: active transaction */ -/*************************************************************** -The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to -the TRX_QUE_RUNNING state and releases query threads which were -waiting for a lock in the wait_thrs list. */ - -void -trx_end_lock_wait( -/*==============*/ - trx_t* trx); /* in: transaction */ -/******************************************************************** -Sends a signal to a trx object. */ - -void -trx_sig_send( -/*=========*/ - trx_t* trx, /* in: trx handle */ - ulint type, /* in: signal type */ - ulint sender, /* in: TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver_thr, /* in: query thread which wants the - reply, or NULL; if type is - TRX_SIG_END_WAIT, this must be NULL */ - trx_savept_t* savept, /* in: possible rollback savepoint, or - NULL */ - que_thr_t** next_thr); /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the parameter - is NULL, it is ignored */ -/******************************************************************** -Send the reply message when a signal in the queue of the trx has -been handled. */ - -void -trx_sig_reply( -/*==========*/ - trx_sig_t* sig, /* in: signal */ - que_thr_t** next_thr); /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/******************************************************************** -Removes the signal object from a trx signal queue. */ - -void -trx_sig_remove( -/*===========*/ - trx_t* trx, /* in: trx handle */ - trx_sig_t* sig); /* in, own: signal */ -/******************************************************************** -Starts handling of a trx signal. */ - -void -trx_sig_start_handle( -/*=================*/ - trx_t* trx, /* in: trx handle */ - que_thr_t** next_thr); /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -/******************************************************************** -Ends signal handling. If the session is in the error state, and -trx->graph_before_signal_handling != NULL, returns control to the error -handling routine of the graph (currently only returns the control to the -graph root which then sends an error message to the client). */ - -void -trx_end_signal_handling( -/*====================*/ - trx_t* trx); /* in: trx */ -/************************************************************************* -Creates a commit command node struct. */ - -commit_node_t* -commit_node_create( -/*===============*/ - /* out, own: commit node struct */ - mem_heap_t* heap); /* in: mem heap where created */ -/*************************************************************** -Performs an execution step for a commit type node in a query graph. */ - -que_thr_t* -trx_commit_step( -/*============*/ - /* out: query thread to run next, or NULL */ - que_thr_t* thr); /* in: query thread */ - -/************************************************************************** -Prints info about a transaction to the given file. The caller must own the -kernel mutex and must have called -innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL -or InnoDB cannot meanwhile change the info printed here. */ - -void -trx_print( -/*======*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction */ - ulint max_query_len); /* in: max query length to print, or 0 to - use the default max length */ - -#ifndef UNIV_HOTBACKUP -/************************************************************************** -Determines if the currently running transaction has been interrupted. */ - -ibool -trx_is_interrupted( -/*===============*/ - /* out: TRUE if interrupted */ - trx_t* trx); /* in: transaction */ -#else /* !UNIV_HOTBACKUP */ -#define trx_is_interrupted(trx) FALSE -#endif /* !UNIV_HOTBACKUP */ - -/*********************************************************************** -Compares the "weight" (or size) of two transactions. The weight of one -transaction is estimated as the number of altered rows + the number of -locked rows. Transactions that have edited non-transactional tables are -considered heavier than ones that have not. */ - -int -trx_weight_cmp( -/*===========*/ - /* out: <0, 0 or >0; similar to strcmp(3) */ - trx_t* a, /* in: the first transaction to be compared */ - trx_t* b); /* in: the second transaction to be compared */ - -/* Signal to a transaction */ -struct trx_sig_struct{ - ulint type; /* signal type */ - ulint sender; /* TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver; /* non-NULL if the sender of the signal - wants reply after the operation induced - by the signal is completed */ - trx_savept_t savept; /* possible rollback savepoint */ - UT_LIST_NODE_T(trx_sig_t) - signals; /* queue of pending signals to the - transaction */ - UT_LIST_NODE_T(trx_sig_t) - reply_signals; /* list of signals for which the sender - transaction is waiting a reply */ -}; - -#define TRX_MAGIC_N 91118598 - -/* The transaction handle; every session has a trx object which is freed only -when the session is freed; in addition there may be session-less transactions -rolling back after a database recovery */ - -struct trx_struct{ - ulint magic_n; - /* All the next fields are protected by the kernel mutex, except the - undo logs which are protected by undo_mutex */ - const char* op_info; /* English text describing the - current operation, or an empty - string */ - unsigned is_purge:1; /* 0=user transaction, 1=purge */ - ulint conc_state; /* state of the trx from the point - of view of concurrency control: - TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY, - ... */ - time_t start_time; /* time the trx object was created - or the state last time became - TRX_ACTIVE */ - ulint isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */ - ibool check_foreigns; /* normally TRUE, but if the user - wants to suppress foreign key checks, - (in table imports, for example) we - set this FALSE */ - ibool check_unique_secondary; - /* normally TRUE, but if the user - wants to speed up inserts by - suppressing unique key checks - for secondary indexes when we decide - if we can use the insert buffer for - them, we set this FALSE */ - dulint id; /* transaction id */ - XID xid; /* X/Open XA transaction - identification to identify a - transaction branch */ - ibool support_xa; /* normally we do the XA two-phase - commit steps, but by setting this to - FALSE, one can save CPU time and about - 150 bytes in the undo log size as then - we skip XA steps */ - dulint no; /* transaction serialization number == - max trx id when the transaction is - moved to COMMITTED_IN_MEMORY state */ - ibool flush_log_later;/* when we commit the transaction - in MySQL's binlog write, we will - flush the log to disk later in - a separate call */ - ibool must_flush_log_later;/* this flag is set to TRUE in - trx_commit_off_kernel() if - flush_log_later was TRUE, and there - were modifications by the transaction; - in that case we must flush the log - in trx_commit_complete_for_mysql() */ - dulint commit_lsn; /* lsn at the time of the commit */ - ibool dict_operation; /* TRUE if the trx is used to create - a table, create an index, or drop a - table. This is a hint that the table - may need to be dropped in crash - recovery. */ - dulint table_id; /* table id if the preceding field is - TRUE */ - /*------------------------------*/ - unsigned duplicates:2; /* TRX_DUP_IGNORE | TRX_DUP_REPLACE */ - unsigned active_trans:2; /* 1 - if a transaction in MySQL - is active. 2 - if prepare_commit_mutex - was taken */ - void* mysql_thd; /* MySQL thread handle corresponding - to this trx, or NULL */ - char** mysql_query_str;/* pointer to the field in mysqld_thd - which contains the pointer to the - current SQL query string */ - const char* mysql_log_file_name; - /* if MySQL binlog is used, this field - contains a pointer to the latest file - name; this is NULL if binlog is not - used */ - ib_longlong mysql_log_offset;/* if MySQL binlog is used, this field - contains the end offset of the binlog - entry */ - os_thread_id_t mysql_thread_id;/* id of the MySQL thread associated - with this transaction object */ - ulint mysql_process_no;/* since in Linux, 'top' reports - process id's and not thread id's, we - store the process number too */ - /*------------------------------*/ - ulint n_mysql_tables_in_use; /* number of Innobase tables - used in the processing of the current - SQL statement in MySQL */ - ulint mysql_n_tables_locked; - /* how many tables the current SQL - statement uses, except those - in consistent read */ - ibool dict_operation_lock_mode; - /* 0, RW_S_LATCH, or RW_X_LATCH: - the latch mode trx currently holds - on dict_operation_lock */ - ibool has_search_latch; - /* TRUE if this trx has latched the - search system latch in S-mode */ - ulint search_latch_timeout; - /* If we notice that someone is - waiting for our S-lock on the search - latch to be released, we wait in - row0sel.c for BTR_SEA_TIMEOUT new - searches until we try to keep - the search latch again over - calls from MySQL; this is intended - to reduce contention on the search - latch */ - /*------------------------------*/ - ibool declared_to_be_inside_innodb; - /* this is TRUE if we have declared - this transaction in - srv_conc_enter_innodb to be inside the - InnoDB engine */ - ulint n_tickets_to_enter_innodb; - /* this can be > 0 only when - declared_to_... is TRUE; when we come - to srv_conc_innodb_enter, if the value - here is > 0, we decrement this by 1 */ - /*------------------------------*/ - lock_t* auto_inc_lock; /* possible auto-inc lock reserved by - the transaction; note that it is also - in the lock list trx_locks */ - UT_LIST_NODE_T(trx_t) - trx_list; /* list of transactions */ - UT_LIST_NODE_T(trx_t) - mysql_trx_list; /* list of transactions created for - MySQL */ - /*------------------------------*/ - ulint error_state; /* 0 if no error, otherwise error - number; NOTE That ONLY the thread - doing the transaction is allowed to - set this field: this is NOT protected - by the kernel mutex */ - void* error_info; /* if the error number indicates a - duplicate key error, a pointer to - the problematic index is stored here */ - sess_t* sess; /* session of the trx, NULL if none */ - ulint que_state; /* TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT, - ... */ - que_t* graph; /* query currently run in the session, - or NULL if none; NOTE that the query - belongs to the session, and it can - survive over a transaction commit, if - it is a stored procedure with a COMMIT - WORK statement, for instance */ - ulint n_active_thrs; /* number of active query threads */ - ibool handling_signals;/* this is TRUE as long as the trx - is handling signals */ - que_t* graph_before_signal_handling; - /* value of graph when signal handling - for this trx started: this is used to - return control to the original query - graph for error processing */ - trx_sig_t sig; /* one signal object can be allocated - in this space, avoiding mem_alloc */ - UT_LIST_BASE_NODE_T(trx_sig_t) - signals; /* queue of processed or pending - signals to the trx */ - UT_LIST_BASE_NODE_T(trx_sig_t) - reply_signals; /* list of signals sent by the query - threads of this trx for which a thread - is waiting for a reply; if this trx is - killed, the reply requests in the list - must be canceled */ - /*------------------------------*/ - lock_t* wait_lock; /* if trx execution state is - TRX_QUE_LOCK_WAIT, this points to - the lock request, otherwise this is - NULL */ - ibool was_chosen_as_deadlock_victim; - /* when the transaction decides to wait - for a lock, it sets this to FALSE; - if another transaction chooses this - transaction as a victim in deadlock - resolution, it sets this to TRUE */ - time_t wait_started; /* lock wait started at this time */ - UT_LIST_BASE_NODE_T(que_thr_t) - wait_thrs; /* query threads belonging to this - trx that are in the QUE_THR_LOCK_WAIT - state */ - ulint deadlock_mark; /* a mark field used in deadlock - checking algorithm */ - /*------------------------------*/ - mem_heap_t* lock_heap; /* memory heap for the locks of the - transaction */ - UT_LIST_BASE_NODE_T(lock_t) - trx_locks; /* locks reserved by the transaction */ - /*------------------------------*/ - mem_heap_t* global_read_view_heap; - /* memory heap for the global read - view */ - read_view_t* global_read_view; - /* consistent read view associated - to a transaction or NULL */ - read_view_t* read_view; /* consistent read view used in the - transaction or NULL, this read view - if defined can be normal read view - associated to a transaction (i.e. - same as global_read_view) or read view - associated to a cursor */ - /*------------------------------*/ - UT_LIST_BASE_NODE_T(trx_named_savept_t) - trx_savepoints; /* savepoints set with SAVEPOINT ..., - oldest first */ - /*------------------------------*/ - mutex_t undo_mutex; /* mutex protecting the fields in this - section (down to undo_no_arr), EXCEPT - last_sql_stat_start, which can be - accessed only when we know that there - cannot be any activity in the undo - logs! */ - dulint undo_no; /* next undo log record number to - assign; since the undo log is - private for a transaction, this - is a simple ascending sequence - with no gaps; thus it represents - the number of modified/inserted - rows in a transaction */ - trx_savept_t last_sql_stat_start; - /* undo_no when the last sql statement - was started: in case of an error, trx - is rolled back down to this undo - number; see note at undo_mutex! */ - trx_rseg_t* rseg; /* rollback segment assigned to the - transaction, or NULL if not assigned - yet */ - trx_undo_t* insert_undo; /* pointer to the insert undo log, or - NULL if no inserts performed yet */ - trx_undo_t* update_undo; /* pointer to the update undo log, or - NULL if no update performed yet */ - dulint roll_limit; /* least undo number to undo during - a rollback */ - ulint pages_undone; /* number of undo log pages undone - since the last undo log truncation */ - trx_undo_arr_t* undo_no_arr; /* array of undo numbers of undo log - records which are currently processed - by a rollback operation */ - ulint n_autoinc_rows; /* no. of AUTO-INC rows required for - an SQL statement. This is useful for - multi-row INSERTs */ - /*------------------------------*/ - char detailed_error[256]; /* detailed error message for last - error, or empty. */ -}; - -#define TRX_MAX_N_THREADS 32 /* maximum number of - concurrent threads running a - single operation of a - transaction, e.g., a parallel - query */ -/* Transaction concurrency states (trx->conc_state) */ -#define TRX_NOT_STARTED 1 -#define TRX_ACTIVE 2 -#define TRX_COMMITTED_IN_MEMORY 3 -#define TRX_PREPARED 4 /* Support for 2PC/XA */ - -/* Transaction execution states when trx->conc_state == TRX_ACTIVE */ -#define TRX_QUE_RUNNING 1 /* transaction is running */ -#define TRX_QUE_LOCK_WAIT 2 /* transaction is waiting for a lock */ -#define TRX_QUE_ROLLING_BACK 3 /* transaction is rolling back */ -#define TRX_QUE_COMMITTING 4 /* transaction is committing */ - -/* Transaction isolation levels (trx->isolation_level) */ -#define TRX_ISO_READ_UNCOMMITTED 1 /* dirty read: non-locking - SELECTs are performed so that - we do not look at a possible - earlier version of a record; - thus they are not 'consistent' - reads under this isolation - level; otherwise like level - 2 */ - -#define TRX_ISO_READ_COMMITTED 2 /* somewhat Oracle-like - isolation, except that in - range UPDATE and DELETE we - must block phantom rows - with next-key locks; - SELECT ... FOR UPDATE and ... - LOCK IN SHARE MODE only lock - the index records, NOT the - gaps before them, and thus - allow free inserting; - each consistent read reads its - own snapshot */ - -#define TRX_ISO_REPEATABLE_READ 3 /* this is the default; - all consistent reads in the - same trx read the same - snapshot; - full next-key locking used - in locking reads to block - insertions into gaps */ - -#define TRX_ISO_SERIALIZABLE 4 /* all plain SELECTs are - converted to LOCK IN SHARE - MODE reads */ - -/* Treatment of duplicate values (trx->duplicates; for example, in inserts). -Multiple flags can be combined with bitwise OR. */ -#define TRX_DUP_IGNORE 1 /* duplicate rows are to be updated */ -#define TRX_DUP_REPLACE 2 /* duplicate rows are to be replaced */ - - -/* Types of a trx signal */ -#define TRX_SIG_NO_SIGNAL 100 -#define TRX_SIG_TOTAL_ROLLBACK 1 -#define TRX_SIG_ROLLBACK_TO_SAVEPT 2 -#define TRX_SIG_COMMIT 3 -#define TRX_SIG_ERROR_OCCURRED 4 -#define TRX_SIG_BREAK_EXECUTION 5 - -/* Sender types of a signal */ -#define TRX_SIG_SELF 1 /* sent by the session itself, or - by an error occurring within this - session */ -#define TRX_SIG_OTHER_SESS 2 /* sent by another session (which - must hold rights to this) */ - -/* Commit command node in a query graph */ -struct commit_node_struct{ - que_common_t common; /* node type: QUE_NODE_COMMIT */ - ulint state; /* node execution state */ -}; - -/* Commit node states */ -#define COMMIT_NODE_SEND 1 -#define COMMIT_NODE_WAIT 2 - - -#ifndef UNIV_NONINL -#include "trx0trx.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0trx.ic b/storage/innobase/include/trx0trx.ic deleted file mode 100644 index 09b2f822ff7..00000000000 --- a/storage/innobase/include/trx0trx.ic +++ /dev/null @@ -1,40 +0,0 @@ -/****************************************************** -The transaction - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -/***************************************************************** -Starts the transaction if it is not yet started. */ -UNIV_INLINE -void -trx_start_if_not_started( -/*=====================*/ - trx_t* trx) /* in: transaction */ -{ - ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); - - if (trx->conc_state == TRX_NOT_STARTED) { - - trx_start(trx, ULINT_UNDEFINED); - } -} - -/***************************************************************** -Starts the transaction if it is not yet started. Assumes we have reserved -the kernel mutex! */ -UNIV_INLINE -void -trx_start_if_not_started_low( -/*=========================*/ - trx_t* trx) /* in: transaction */ -{ - ut_ad(trx->conc_state != TRX_COMMITTED_IN_MEMORY); - - if (trx->conc_state == TRX_NOT_STARTED) { - - trx_start_low(trx, ULINT_UNDEFINED); - } -} diff --git a/storage/innobase/include/trx0types.h b/storage/innobase/include/trx0types.h deleted file mode 100644 index 0e6ee79498c..00000000000 --- a/storage/innobase/include/trx0types.h +++ /dev/null @@ -1,45 +0,0 @@ -/****************************************************** -Transaction system global type definitions - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0types_h -#define trx0types_h - -#include "lock0types.h" -#include "ut0byte.h" - -/* Memory objects */ -typedef struct trx_struct trx_t; -typedef struct trx_sys_struct trx_sys_t; -typedef struct trx_doublewrite_struct trx_doublewrite_t; -typedef struct trx_sig_struct trx_sig_t; -typedef struct trx_rseg_struct trx_rseg_t; -typedef struct trx_undo_struct trx_undo_t; -typedef struct trx_undo_arr_struct trx_undo_arr_t; -typedef struct trx_undo_inf_struct trx_undo_inf_t; -typedef struct trx_purge_struct trx_purge_t; -typedef struct roll_node_struct roll_node_t; -typedef struct commit_node_struct commit_node_t; -typedef struct trx_named_savept_struct trx_named_savept_t; - -/* Transaction savepoint */ -typedef struct trx_savept_struct trx_savept_t; -struct trx_savept_struct{ - dulint least_undo_no; /* least undo number to undo */ -}; - -/* File objects */ -typedef byte trx_sysf_t; -typedef byte trx_rsegf_t; -typedef byte trx_usegf_t; -typedef byte trx_ulogf_t; -typedef byte trx_upagef_t; - -/* Undo log record */ -typedef byte trx_undo_rec_t; - -#endif diff --git a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h deleted file mode 100644 index 7f10e407746..00000000000 --- a/storage/innobase/include/trx0undo.h +++ /dev/null @@ -1,503 +0,0 @@ -/****************************************************** -Transaction undo log - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#ifndef trx0undo_h -#define trx0undo_h - -#include "univ.i" -#include "trx0types.h" -#include "mtr0mtr.h" -#include "trx0sys.h" -#include "page0types.h" -#include "trx0xa.h" - -/*************************************************************************** -Builds a roll pointer dulint. */ -UNIV_INLINE -dulint -trx_undo_build_roll_ptr( -/*====================*/ - /* out: roll pointer */ - ibool is_insert, /* in: TRUE if insert undo log */ - ulint rseg_id, /* in: rollback segment id */ - ulint page_no, /* in: page number */ - ulint offset); /* in: offset of the undo entry within page */ -/*************************************************************************** -Decodes a roll pointer dulint. */ -UNIV_INLINE -void -trx_undo_decode_roll_ptr( -/*=====================*/ - dulint roll_ptr, /* in: roll pointer */ - ibool* is_insert, /* out: TRUE if insert undo log */ - ulint* rseg_id, /* out: rollback segment id */ - ulint* page_no, /* out: page number */ - ulint* offset); /* out: offset of the undo entry within page */ -/*************************************************************************** -Returns TRUE if the roll pointer is of the insert type. */ -UNIV_INLINE -ibool -trx_undo_roll_ptr_is_insert( -/*========================*/ - /* out: TRUE if insert undo log */ - dulint roll_ptr); /* in: roll pointer */ -/********************************************************************* -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /* in: pointer to memory where written */ - dulint roll_ptr); /* in: roll ptr */ -/********************************************************************* -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... */ -UNIV_INLINE -dulint -trx_read_roll_ptr( -/*==============*/ - /* out: roll ptr */ - byte* ptr); /* in: pointer to memory from where to read */ -/********************************************************************** -Gets an undo log page and x-latches it. */ -UNIV_INLINE -page_t* -trx_undo_page_get( -/*==============*/ - /* out: pointer to page x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Gets an undo log page and s-latches it. */ -UNIV_INLINE -page_t* -trx_undo_page_get_s_latched( -/*========================*/ - /* out: pointer to page s-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Returns the previous undo record on the page in the specified log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_prev_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - trx_undo_rec_t* rec, /* in: undo log record */ - ulint page_no,/* in: undo log header page number */ - ulint offset);/* in: undo log header offset on page */ -/********************************************************************** -Returns the next undo log record on the page in the specified log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_next_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - trx_undo_rec_t* rec, /* in: undo log record */ - ulint page_no,/* in: undo log header page number */ - ulint offset);/* in: undo log header offset on page */ -/********************************************************************** -Returns the last undo record on the page in the specified undo log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_last_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset); /* in: undo log header offset on page */ -/********************************************************************** -Returns the first undo record on the page in the specified undo log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_first_rec( -/*========================*/ - /* out: pointer to record, NULL if none */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset);/* in: undo log header offset on page */ -/*************************************************************************** -Gets the previous record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_prev_rec( -/*==================*/ - /* out: undo log record, the page s-latched, - NULL if none */ - trx_undo_rec_t* rec, /* in: undo record */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************************** -Gets the next record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_next_rec( -/*==================*/ - /* out: undo log record, the page s-latched, - NULL if none */ - trx_undo_rec_t* rec, /* in: undo record */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - mtr_t* mtr); /* in: mtr */ -/*************************************************************************** -Gets the first record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_first_rec( -/*===================*/ - /* out: undo log record, the page latched, NULL if - none */ - ulint space, /* in: undo log header space */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -Tries to add a page to the undo log segment where the undo log is placed. */ - -ulint -trx_undo_add_page( -/*==============*/ - /* out: page number if success, else - FIL_NULL */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory object */ - mtr_t* mtr); /* in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -/*************************************************************************** -Truncates an undo log from the end. This function is used during a rollback -to free space from an undo log. */ - -void -trx_undo_truncate_end( -/*==================*/ - trx_t* trx, /* in: transaction whose undo log it is */ - trx_undo_t* undo, /* in: undo log */ - dulint limit); /* in: all undo records with undo number - >= this value should be truncated */ -/*************************************************************************** -Truncates an undo log from the start. This function is used during a purge -operation. */ - -void -trx_undo_truncate_start( -/*====================*/ - trx_rseg_t* rseg, /* in: rollback segment */ - ulint space, /* in: space id of the log */ - ulint hdr_page_no, /* in: header page number */ - ulint hdr_offset, /* in: header offset on the page */ - dulint limit); /* in: all undo pages with undo numbers < - this value should be truncated; NOTE that - the function only frees whole pages; the - header page is not freed, but emptied, if - all the records there are < limit */ -/************************************************************************ -Initializes the undo log lists for a rollback segment memory copy. -This function is only called when the database is started or a new -rollback segment created. */ - -ulint -trx_undo_lists_init( -/*================*/ - /* out: the combined size of undo log segments - in pages */ - trx_rseg_t* rseg); /* in: rollback segment memory object */ -/************************************************************************** -Assigns an undo log for a transaction. A new undo log is created or a cached -undo log reused. */ - -ulint -trx_undo_assign_undo( -/*=================*/ - /* out: DB_SUCCESS if undo log assign - * successful, possible error codes are: - * ER_TOO_MANY_CONCURRENT_TRXS - * DB_OUT_OF_FILE_SPAC - * DB_OUT_OF_MEMORY */ - trx_t* trx, /* in: transaction */ - ulint type); /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ -/********************************************************************** -Sets the state of the undo log segment at a transaction finish. */ - -page_t* -trx_undo_set_state_at_finish( -/*=========================*/ - /* out: undo log segment header page, - x-latched */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory copy */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Sets the state of the undo log segment at a transaction prepare. */ - -page_t* -trx_undo_set_state_at_prepare( -/*==========================*/ - /* out: undo log segment header page, - x-latched */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory copy */ - mtr_t* mtr); /* in: mtr */ - -/************************************************************************** -Adds the update undo log header as the first in the history list, and -frees the memory object, or puts it to the list of cached update undo log -segments. */ - -void -trx_undo_update_cleanup( -/*====================*/ - trx_t* trx, /* in: trx owning the update undo log */ - page_t* undo_page, /* in: update undo log header page, - x-latched */ - mtr_t* mtr); /* in: mtr */ -/********************************************************************** -Frees or caches an insert undo log after a transaction commit or rollback. -Knowledge of inserts is not needed after a commit or rollback, therefore -the data can be discarded. */ - -void -trx_undo_insert_cleanup( -/*====================*/ - trx_t* trx); /* in: transaction handle */ -/*************************************************************** -Parses the redo log entry of an undo log page initialization. */ - -byte* -trx_undo_parse_page_init( -/*=====================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/*************************************************************** -Parses the redo log entry of an undo log page header create or reuse. */ - -byte* -trx_undo_parse_page_header( -/*=======================*/ - /* out: end of log record or NULL */ - ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ -/*************************************************************** -Parses the redo log entry of an undo log page header discard. */ - -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ - -/* Types of an undo log segment */ -#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ -#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates - and delete markings: in short, - modifys (the name 'UPDATE' is a - historical relic) */ -/* States of an undo log segment */ -#define TRX_UNDO_ACTIVE 1 /* contains an undo log of an active - transaction */ -#define TRX_UNDO_CACHED 2 /* cached for quick reuse */ -#define TRX_UNDO_TO_FREE 3 /* insert undo segment can be freed */ -#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be - reused: it can be freed in purge when - all undo data in it is removed */ -#define TRX_UNDO_PREPARED 5 /* contains an undo log of an - prepared transaction */ - -/* Transaction undo log memory object; this is protected by the undo_mutex -in the corresponding transaction object */ - -struct trx_undo_struct{ - /*-----------------------------*/ - ulint id; /* undo log slot number within the - rollback segment */ - ulint type; /* TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - ulint state; /* state of the corresponding undo log - segment */ - ibool del_marks; /* relevant only in an update undo log: - this is TRUE if the transaction may - have delete marked records, because of - a delete of a row or an update of an - indexed field; purge is then - necessary; also TRUE if the transaction - has updated an externally stored - field */ - dulint trx_id; /* id of the trx assigned to the undo - log */ - XID xid; /* X/Open XA transaction - identification */ - ibool dict_operation; /* TRUE if a dict operation trx */ - dulint table_id; /* if a dict operation, then the table - id */ - trx_rseg_t* rseg; /* rseg where the undo log belongs */ - /*-----------------------------*/ - ulint space; /* space id where the undo log - placed */ - ulint hdr_page_no; /* page number of the header page in - the undo log */ - ulint hdr_offset; /* header offset of the undo log on the - page */ - ulint last_page_no; /* page number of the last page in the - undo log; this may differ from - top_page_no during a rollback */ - ulint size; /* current size in pages */ - /*-----------------------------*/ - ulint empty; /* TRUE if the stack of undo log - records is currently empty */ - ulint top_page_no; /* page number where the latest undo - log record was catenated; during - rollback the page from which the latest - undo record was chosen */ - ulint top_offset; /* offset of the latest undo record, - i.e., the topmost element in the undo - log if we think of it as a stack */ - dulint top_undo_no; /* undo number of the latest record */ - page_t* guess_page; /* guess for the buffer frame where - the top page might reside */ - /*-----------------------------*/ - UT_LIST_NODE_T(trx_undo_t) undo_list; - /* undo log objects in the rollback - segment are chained into lists */ -}; - -/* The offset of the undo log page header on pages of the undo log */ -#define TRX_UNDO_PAGE_HDR FSEG_PAGE_DATA -/*-------------------------------------------------------------*/ -/* Transaction undo log page header offsets */ -#define TRX_UNDO_PAGE_TYPE 0 /* TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ -#define TRX_UNDO_PAGE_START 2 /* Byte offset where the undo log - records for the LATEST transaction - start on this page (remember that - in an update undo log, the first page - can contain several undo logs) */ -#define TRX_UNDO_PAGE_FREE 4 /* On each page of the undo log this - field contains the byte offset of the - first free byte on the page */ -#define TRX_UNDO_PAGE_NODE 6 /* The file list node in the chain - of undo log pages */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE) - -/* An update undo segment with just one page can be reused if it has -< this number bytes used; we must leave space at least for one new undo -log header on the page */ - -#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4) - -/* An update undo log segment may contain several undo logs on its first page -if the undo logs took so little space that the segment could be cached and -reused. All the undo log headers are then on the first page, and the last one -owns the undo log records on subsequent pages if the segment is bigger than -one page. If an undo log is stored in a segment, then on the first page it is -allowed to have zero undo records, but if the segment extends to several -pages, then all the rest of the pages must contain at least one undo log -record. */ - -/* The offset of the undo log segment header on the first page of the undo -log segment */ - -#define TRX_UNDO_SEG_HDR (TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE) -/*-------------------------------------------------------------*/ -#define TRX_UNDO_STATE 0 /* TRX_UNDO_ACTIVE, ... */ -#define TRX_UNDO_LAST_LOG 2 /* Offset of the last undo log header - on the segment header page, 0 if - none */ -#define TRX_UNDO_FSEG_HEADER 4 /* Header for the file segment which - the undo log segment occupies */ -#define TRX_UNDO_PAGE_LIST (4 + FSEG_HEADER_SIZE) - /* Base node for the list of pages in - the undo log segment; defined only on - the undo log segment's first page */ -/*-------------------------------------------------------------*/ -/* Size of the undo log segment header */ -#define TRX_UNDO_SEG_HDR_SIZE (4 + FSEG_HEADER_SIZE + FLST_BASE_NODE_SIZE) - - -/* The undo log header. There can be several undo log headers on the first -page of an update undo log segment. */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_TRX_ID 0 /* Transaction id */ -#define TRX_UNDO_TRX_NO 8 /* Transaction number of the - transaction; defined only if the log - is in a history list */ -#define TRX_UNDO_DEL_MARKS 16 /* Defined only in an update undo - log: TRUE if the transaction may have - done delete markings of records, and - thus purge is necessary */ -#define TRX_UNDO_LOG_START 18 /* Offset of the first undo log record - of this log on the header page; purge - may remove undo log record from the - log start, and therefore this is not - necessarily the same as this log - header end offset */ -#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes - X/Open XA transaction identification - XID */ -#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table - create, index create, or drop - transaction: in recovery - the transaction cannot be rolled back - in the usual way: a 'rollback' rather - means dropping the created or dropped - table, if it still exists */ -#define TRX_UNDO_TABLE_ID 22 /* Id of the table if the preceding - field is TRUE */ -#define TRX_UNDO_NEXT_LOG 30 /* Offset of the next undo log header - on this page, 0 if none */ -#define TRX_UNDO_PREV_LOG 32 /* Offset of the previous undo log - header on this page, 0 if none */ -#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history - list, the file list node is here */ -/*-------------------------------------------------------------*/ -#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE) - -/* Note: the writing of the undo log old header is coded by a log record -MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the -header is logged separately. In this sense, the XID is not really a member -of the undo log header. TODO: do not append the XID to the log header if XA -is not needed by the user. The XID wastes about 150 bytes of space in every -undo log. In the history list we may have millions of undo logs, which means -quite a large overhead. */ - -/* X/Open XA Transaction Identification (XID) */ - -#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE) -#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4) -#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4) -#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) -/*--------------------------------------------------------------*/ -#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE) - /* Total size of the header with the XA XID */ - -#ifndef UNIV_NONINL -#include "trx0undo.ic" -#endif - -#endif diff --git a/storage/innobase/include/trx0undo.ic b/storage/innobase/include/trx0undo.ic deleted file mode 100644 index f28f36ade03..00000000000 --- a/storage/innobase/include/trx0undo.ic +++ /dev/null @@ -1,330 +0,0 @@ -/****************************************************** -Transaction undo log - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "data0type.h" - -/*************************************************************************** -Builds a roll pointer dulint. */ -UNIV_INLINE -dulint -trx_undo_build_roll_ptr( -/*====================*/ - /* out: roll pointer */ - ibool is_insert, /* in: TRUE if insert undo log */ - ulint rseg_id, /* in: rollback segment id */ - ulint page_no, /* in: page number */ - ulint offset) /* in: offset of the undo entry within page */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - ut_ad(rseg_id < 128); - - return(ut_dulint_create(is_insert * 128 * 256 * 256 - + rseg_id * 256 * 256 - + (page_no / 256) / 256, - (page_no % (256 * 256)) * 256 * 256 - + offset)); -} - -/*************************************************************************** -Decodes a roll pointer dulint. */ -UNIV_INLINE -void -trx_undo_decode_roll_ptr( -/*=====================*/ - dulint roll_ptr, /* in: roll pointer */ - ibool* is_insert, /* out: TRUE if insert undo log */ - ulint* rseg_id, /* out: rollback segment id */ - ulint* page_no, /* out: page number */ - ulint* offset) /* out: offset of the undo entry within page */ -{ - ulint low; - ulint high; -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif -#if TRUE != 1 -# error "TRUE != 1" -#endif - high = ut_dulint_get_high(roll_ptr); - low = ut_dulint_get_low(roll_ptr); - - *offset = low % (256 * 256); - - *is_insert = high / (256 * 256 * 128); /* TRUE == 1 */ - *rseg_id = (high / (256 * 256)) % 128; - - *page_no = (high % (256 * 256)) * 256 * 256 - + (low / 256) / 256; -} - -/*************************************************************************** -Returns TRUE if the roll pointer is of the insert type. */ -UNIV_INLINE -ibool -trx_undo_roll_ptr_is_insert( -/*========================*/ - /* out: TRUE if insert undo log */ - dulint roll_ptr) /* in: roll pointer */ -{ - ulint high; -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif -#if TRUE != 1 -# error "TRUE != 1" -#endif - high = ut_dulint_get_high(roll_ptr); - - return(high / (256 * 256 * 128)); -} - -/********************************************************************* -Writes a roll ptr to an index page. In case that the size changes in -some future version, this function should be used instead of -mach_write_... */ -UNIV_INLINE -void -trx_write_roll_ptr( -/*===============*/ - byte* ptr, /* in: pointer to memory where written */ - dulint roll_ptr) /* in: roll ptr */ -{ - ut_ad(DATA_ROLL_PTR_LEN == 7); - - mach_write_to_7(ptr, roll_ptr); -} - -/********************************************************************* -Reads a roll ptr from an index page. In case that the roll ptr size -changes in some future version, this function should be used instead of -mach_read_... */ -UNIV_INLINE -dulint -trx_read_roll_ptr( -/*==============*/ - /* out: roll ptr */ - byte* ptr) /* in: pointer to memory from where to read */ -{ -#if DATA_ROLL_PTR_LEN != 7 -# error "DATA_ROLL_PTR_LEN != 7" -#endif - return(mach_read_from_7(ptr)); -} - -/********************************************************************** -Gets an undo log page and x-latches it. */ -UNIV_INLINE -page_t* -trx_undo_page_get( -/*==============*/ - /* out: pointer to page x-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - page = buf_page_get(space, page_no, RW_X_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - return(page); -} - -/********************************************************************** -Gets an undo log page and s-latches it. */ -UNIV_INLINE -page_t* -trx_undo_page_get_s_latched( -/*========================*/ - /* out: pointer to page s-latched */ - ulint space, /* in: space where placed */ - ulint page_no, /* in: page number */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* page; - - page = buf_page_get(space, page_no, RW_S_LATCH, mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TRX_UNDO_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - return(page); -} - -/********************************************************************** -Returns the start offset of the undo log records of the specified undo -log on the page. */ -UNIV_INLINE -ulint -trx_undo_page_get_start( -/*====================*/ - /* out: start offset */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - ulint start; - - if (page_no == buf_frame_get_page_no(undo_page)) { - - start = mach_read_from_2(offset + undo_page - + TRX_UNDO_LOG_START); - } else { - start = TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE; - } - - return(start); -} - -/********************************************************************** -Returns the end offset of the undo log records of the specified undo -log on the page. */ -UNIV_INLINE -ulint -trx_undo_page_get_end( -/*==================*/ - /* out: end offset */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - trx_ulogf_t* log_hdr; - ulint end; - - if (page_no == buf_frame_get_page_no(undo_page)) { - - log_hdr = undo_page + offset; - - end = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); - - if (end == 0) { - end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - } - } else { - end = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - } - - return(end); -} - -/********************************************************************** -Returns the previous undo record on the page in the specified log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_prev_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - trx_undo_rec_t* rec, /* in: undo log record */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - page_t* undo_page; - ulint start; - - undo_page = buf_frame_align(rec); - - start = trx_undo_page_get_start(undo_page, page_no, offset); - - if (start + undo_page == rec) { - - return(NULL); - } - - return(undo_page + mach_read_from_2(rec - 2)); -} - -/********************************************************************** -Returns the next undo log record on the page in the specified log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_next_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - trx_undo_rec_t* rec, /* in: undo log record */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - page_t* undo_page; - ulint end; - ulint next; - - undo_page = buf_frame_align(rec); - - end = trx_undo_page_get_end(undo_page, page_no, offset); - - next = mach_read_from_2(rec); - - if (next == end) { - - return(NULL); - } - - return(undo_page + next); -} - -/********************************************************************** -Returns the last undo record on the page in the specified undo log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_last_rec( -/*=======================*/ - /* out: pointer to record, NULL if none */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - ulint start; - ulint end; - - start = trx_undo_page_get_start(undo_page, page_no, offset); - end = trx_undo_page_get_end(undo_page, page_no, offset); - - if (start == end) { - - return(NULL); - } - - return(undo_page + mach_read_from_2(undo_page + end - 2)); -} - -/********************************************************************** -Returns the first undo record on the page in the specified undo log, or -NULL if none exists. */ -UNIV_INLINE -trx_undo_rec_t* -trx_undo_page_get_first_rec( -/*========================*/ - /* out: pointer to record, NULL if none */ - page_t* undo_page,/* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header offset on page */ -{ - ulint start; - ulint end; - - start = trx_undo_page_get_start(undo_page, page_no, offset); - end = trx_undo_page_get_end(undo_page, page_no, offset); - - if (start == end) { - - return(NULL); - } - - return(undo_page + start); -} diff --git a/storage/innobase/include/trx0xa.h b/storage/innobase/include/trx0xa.h deleted file mode 100644 index df85cd663cb..00000000000 --- a/storage/innobase/include/trx0xa.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * Start of xa.h header - * - * Define a symbol to prevent multiple inclusions of this header file - */ -#ifndef XA_H -#define XA_H - -/* - * Transaction branch identification: XID and NULLXID: - */ -#ifndef XIDDATASIZE - -#define XIDDATASIZE 128 /* size in bytes */ -#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ -#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ - -struct xid_t { - long formatID; /* format identifier */ - long gtrid_length; /* value from 1 through 64 */ - long bqual_length; /* value from 1 through 64 */ - char data[XIDDATASIZE]; -}; -typedef struct xid_t XID; -#endif -/* - * A value of -1 in formatID means that the XID is null. - */ - - -#ifdef NOTDEFINED -/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */ - -/* - * Declarations of routines by which RMs call TMs: - */ -extern int ax_reg __P((int, XID *, long)); -extern int ax_unreg __P((int, long)); - -/* - * XA Switch Data Structure - */ -#define RMNAMESZ 32 /* length of resource manager name, */ - /* including the null terminator */ -#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */ - /* strings, including the null - terminator */ - - -struct xa_switch_t { - char name[RMNAMESZ]; /* name of resource manager */ - long flags; /* resource manager specific options */ - long version; /* must be 0 */ - int (*xa_open_entry) /* xa_open function pointer */ - __P((char *, int, long)); - int (*xa_close_entry) /* xa_close function pointer */ - __P((char *, int, long)); - int (*xa_start_entry) /* xa_start function pointer */ - __P((XID *, int, long)); - int (*xa_end_entry) /* xa_end function pointer */ - __P((XID *, int, long)); - int (*xa_rollback_entry) /* xa_rollback function pointer */ - __P((XID *, int, long)); - int (*xa_prepare_entry) /* xa_prepare function pointer */ - __P((XID *, int, long)); - int (*xa_commit_entry) /* xa_commit function pointer */ - __P((XID *, int, long)); - int (*xa_recover_entry) /* xa_recover function pointer */ - __P((XID *, long, int, long)); - int (*xa_forget_entry) /* xa_forget function pointer */ - __P((XID *, int, long)); - int (*xa_complete_entry) /* xa_complete function pointer */ - __P((int *, int *, int, long)); -}; -#endif /* NOTDEFINED */ - - -/* - * Flag definitions for the RM switch - */ -#define TMNOFLAGS 0x00000000L /* no resource manager features - selected */ -#define TMREGISTER 0x00000001L /* resource manager dynamically - registers */ -#define TMNOMIGRATE 0x00000002L /* resource manager does not support - association migration */ -#define TMUSEASYNC 0x00000004L /* resource manager supports - asynchronous operations */ -/* - * Flag definitions for xa_ and ax_ routines - */ -/* use TMNOFLAGGS, defined above, when not specifying other flags */ -#define TMASYNC 0x80000000L /* perform routine asynchronously */ -#define TMONEPHASE 0x40000000L /* caller is using one-phase commit - optimisation */ -#define TMFAIL 0x20000000L /* dissociates caller and marks - transaction branch rollback-only */ -#define TMNOWAIT 0x10000000L /* return if blocking condition - exists */ -#define TMRESUME 0x08000000L /* caller is resuming association with - suspended transaction branch */ -#define TMSUCCESS 0x04000000L /* dissociate caller from transaction - branch */ -#define TMSUSPEND 0x02000000L /* caller is suspending, not ending, - association */ -#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */ -#define TMENDRSCAN 0x00800000L /* end a recovery scan */ -#define TMMULTIPLE 0x00400000L /* wait for any asynchronous - operation */ -#define TMJOIN 0x00200000L /* caller is joining existing - transaction branch */ -#define TMMIGRATE 0x00100000L /* caller intends to perform - migration */ - -/* - * ax_() return codes (transaction manager reports to resource manager) - */ -#define TM_JOIN 2 /* caller is joining existing - transaction branch */ -#define TM_RESUME 1 /* caller is resuming association with - suspended transaction branch */ -#define TM_OK 0 /* normal execution */ -#define TMER_TMERR -1 /* an error occurred in the transaction - manager */ -#define TMER_INVAL -2 /* invalid arguments were given */ -#define TMER_PROTO -3 /* routine invoked in an improper - context */ - -/* - * xa_() return codes (resource manager reports to transaction manager) - */ -#define XA_RBBASE 100 /* The inclusive lower bound of the - rollback codes */ -#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an - unspecified reason */ -#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a - communication failure */ -#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */ -#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the - integrity of the resources was - detected */ -#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the - transaction branch for a reason not - on this list */ -#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the - resource manager */ -#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took - too long */ -#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */ -#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the - rollback codes */ -#define XA_NOMIGRATE 9 /* resumption must occur where - suspension occurred */ -#define XA_HEURHAZ 8 /* the transaction branch may have - been heuristically completed */ -#define XA_HEURCOM 7 /* the transaction branch has been - heuristically committed */ -#define XA_HEURRB 6 /* the transaction branch has been - heuristically rolled back */ -#define XA_HEURMIX 5 /* the transaction branch has been - heuristically committed and rolled - back */ -#define XA_RETRY 4 /* routine returned with no effect and - may be re-issued */ -#define XA_RDONLY 3 /* the transaction branch was read-only - and has been committed */ -#define XA_OK 0 /* normal execution */ -#define XAER_ASYNC -2 /* asynchronous operation already - outstanding */ -#define XAER_RMERR -3 /* a resource manager error occurred in - the transaction branch */ -#define XAER_NOTA -4 /* the XID is not valid */ -#define XAER_INVAL -5 /* invalid arguments were given */ -#define XAER_PROTO -6 /* routine invoked in an improper - context */ -#define XAER_RMFAIL -7 /* resource manager unavailable */ -#define XAER_DUPID -8 /* the XID already exists */ -#define XAER_OUTSIDE -9 /* resource manager doing work outside - transaction */ -#endif /* ifndef XA_H */ -/* - * End of xa.h header - */ diff --git a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i deleted file mode 100644 index bb44a91a343..00000000000 --- a/storage/innobase/include/univ.i +++ /dev/null @@ -1,376 +0,0 @@ -/*************************************************************************** -Version control for database, common definitions, and include files - -(c) 1994 - 2000 Innobase Oy - -Created 1/20/1994 Heikki Tuuri -****************************************************************************/ - -#ifndef univ_i -#define univ_i - -#ifdef __SUNPRO_C -# include <sun_prefetch.h> -#endif - -#if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__) -# undef __WIN__ -# define __WIN__ - -# include <windows.h> - -# if !defined(WIN64) && !defined(_WIN64) -# define UNIV_CAN_USE_X86_ASSEMBLER -# endif - -# ifdef _NT_ -# define __NT__ -# endif - -#else -/* The defines used with MySQL */ - -/* Include two header files from MySQL to make the Unix flavor used -in compiling more Posix-compatible. These headers also define __WIN__ -if we are compiling on Windows. */ - -# include <my_global.h> -# include <my_pthread.h> - -/* Include <sys/stat.h> to get S_I... macros defined for os0file.c */ -# include <sys/stat.h> - -# undef PACKAGE -# undef VERSION - -/* Include the header file generated by GNU autoconf */ -# ifndef __WIN__ -# include "config.h" -# endif - -# ifdef HAVE_SCHED_H -# include <sched.h> -# endif - -/* When compiling for Itanium IA64, undefine the flag below to prevent use -of the 32-bit x86 assembler in mutex operations. */ - -# if defined(__WIN__) && !defined(WIN64) && !defined(_WIN64) -# define UNIV_CAN_USE_X86_ASSEMBLER -# endif - -/* We only try to do explicit inlining of functions with gcc and - Sun Studio */ - -# if !defined(__GNUC__) && !defined(__SUNPRO_C) -# undef UNIV_MUST_NOT_INLINE /* Remove compiler warning */ -# define UNIV_MUST_NOT_INLINE -# endif - -# ifdef HAVE_PREAD -# define HAVE_PWRITE -# endif - -#endif /* #if (defined(WIN32) || ... */ - -/* DEBUG VERSION CONTROL - ===================== */ - -/* The following flag will make InnoDB to initialize -all memory it allocates to zero. It hides Purify -warnings about reading unallocated memory unless -memory is read outside the allocated blocks. */ -/* -#define UNIV_INIT_MEM_TO_ZERO -*/ - -/* Make a non-inline debug version */ - -#if 0 -#define UNIV_DEBUG_VALGRIND /* Enable extra - Valgrind instrumentation */ -#define UNIV_DEBUG /* Enable ut_ad() assertions */ -#define UNIV_LIST_DEBUG /* debug UT_LIST_ macros */ -#define UNIV_MEM_DEBUG /* detect memory leaks etc */ -#define UNIV_IBUF_DEBUG /* debug the insert buffer; -this limits the database to IBUF_COUNT_N_SPACES and IBUF_COUNT_N_PAGES, -and the insert buffer must be empty when the database is started */ -#define UNIV_SYNC_DEBUG /* debug mutex and latch -operations (very slow); also UNIV_DEBUG must be defined */ -#define UNIV_SEARCH_DEBUG /* debug B-tree comparisons */ -#define UNIV_SYNC_PERF_STAT /* operation counts for - rw-locks and mutexes */ -#define UNIV_SEARCH_PERF_STAT /* statistics for the - adaptive hash index */ -#define UNIV_SRV_PRINT_LATCH_WAITS /* enable diagnostic output - in sync0sync.c */ -#define UNIV_BTR_PRINT /* enable functions for - printing B-trees */ -#endif - -#define UNIV_BTR_DEBUG /* check B-tree links */ -#define UNIV_LIGHT_MEM_DEBUG /* light memory debugging */ - -#ifdef HAVE_purify -/* The following sets all new allocated memory to zero before use: -this can be used to eliminate unnecessary Purify warnings, but note that -it also masks many bugs Purify could detect. For detailed Purify analysis it -is best to remove the define below and look through the warnings one -by one. */ -#define UNIV_SET_MEM_TO_ZERO -#endif - -/* Use malloc instead of innodb additional memory pool (great with tcmalloc) */ -#define UNIV_DISABLE_MEM_POOL - -#if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMIC) -/* - * We have a full set of atomic ops available - we will use them - */ -#define UNIV_SYNC_ATOMIC -#endif - -#if defined(WIN_ATOMICS32) || defined(WIN_ATOMICS64) -/* - * We have a full set of atomic ops available - we will use them - * This is on Windows - */ -#define UNIV_SYNC_ATOMIC -#endif - -/* -#define UNIV_SQL_DEBUG -#define UNIV_LOG_DEBUG -*/ - /* the above option prevents forcing of log to disk - at a buffer page write: it should be tested with this - option off; also some ibuf tests are suppressed */ -/* -#define UNIV_BASIC_LOG_DEBUG -*/ - /* the above option enables basic recovery debugging: - new allocated file pages are reset */ - -#if (!defined(UNIV_DEBUG) && !defined(INSIDE_HA_INNOBASE_CC) && !defined(UNIV_MUST_NOT_INLINE)) -/* Definition for inline version */ - -#ifdef __WIN__ -#define UNIV_INLINE __inline -#else -#define UNIV_INLINE static __inline__ -#endif - -#else -/* If we want to compile a noninlined version we use the following macro -definitions: */ - -#define UNIV_NONINL -#define UNIV_INLINE - -#endif /* UNIV_DEBUG */ - -#ifdef _WIN32 -#define UNIV_WORD_SIZE 4 -#elif defined(_WIN64) -#define UNIV_WORD_SIZE 8 -#else -/* MySQL config.h generated by GNU autoconf will define SIZEOF_LONG in Posix */ -#define UNIV_WORD_SIZE SIZEOF_LONG -#endif - -/* The following alignment is used in memory allocations in memory heap -management to ensure correct alignment for doubles etc. */ -#define UNIV_MEM_ALIGNMENT 8 - -/* The following alignment is used in aligning lints etc. */ -#define UNIV_WORD_ALIGNMENT UNIV_WORD_SIZE - -/* - DATABASE VERSION CONTROL - ======================== -*/ - -/* The universal page size of the database */ -#define UNIV_PAGE_SIZE (2 * 8192) /* NOTE! Currently, this has to be a - power of 2 */ -/* The 2-logarithm of UNIV_PAGE_SIZE: */ -#define UNIV_PAGE_SIZE_SHIFT 14 - -/* Maximum number of parallel threads in a parallelized operation */ -#define UNIV_MAX_PARALLELISM 32 - -/* - UNIVERSAL TYPE DEFINITIONS - ========================== -*/ - -/* Note that inside MySQL 'byte' is defined as char on Linux! */ -#define byte unsigned char - -/* Define an unsigned integer type that is exactly 32 bits. */ - -#if SIZEOF_INT == 4 -typedef unsigned int ib_uint32_t; -#elif SIZEOF_LONG == 4 -typedef unsigned long ib_uint32_t; -#else -#error "Neither int or long is 4 bytes" -#endif - -/* Another basic type we use is unsigned long integer which should be equal to -the word size of the machine, that is on a 32-bit platform 32 bits, and on a -64-bit platform 64 bits. We also give the printf format for the type as a -macro ULINTPF. */ - -#ifdef _WIN64 -typedef unsigned __int64 ulint; -#define ULINTPF "%I64u" -typedef __int64 lint; -#else -typedef unsigned long int ulint; -#define ULINTPF "%lu" -typedef long int lint; -#endif - -#ifdef __WIN__ -typedef __int64 ib_longlong; -typedef unsigned __int64 ib_ulonglong; -#else -/* Note: longlong and ulonglong come from MySQL headers. */ -typedef longlong ib_longlong; -typedef ulonglong ib_ulonglong; -#endif - -typedef unsigned long long int ullint; - -#ifndef __WIN__ -#if SIZEOF_LONG != SIZEOF_VOIDP -#error "Error: InnoDB's ulint must be of the same size as void*" -#endif -#endif - -/* The 'undefined' value for a ulint */ -#define ULINT_UNDEFINED ((ulint)(-1)) - -/* The undefined 32-bit unsigned integer */ -#define ULINT32_UNDEFINED 0xFFFFFFFF - -/* Maximum value for a ulint */ -#define ULINT_MAX ((ulint)(-2)) - -/* This 'ibool' type is used within Innobase. Remember that different included -headers may define 'bool' differently. Do not assume that 'bool' is a ulint! */ -#define ibool ulint - -#ifndef TRUE - -#define TRUE 1 -#define FALSE 0 - -#endif - -/* The following number as the length of a logical field means that the field -has the SQL NULL as its value. NOTE that because we assume that the length -of a field is a 32-bit integer when we store it, for example, to an undo log -on disk, we must have also this number fit in 32 bits, also in 64-bit -computers! */ - -#define UNIV_SQL_NULL ULINT32_UNDEFINED - -/* Lengths which are not UNIV_SQL_NULL, but bigger than the following -number indicate that a field contains a reference to an externally -stored part of the field in the tablespace. The length field then -contains the sum of the following flag and the locally stored len. */ - -#define UNIV_EXTERN_STORAGE_FIELD (UNIV_SQL_NULL - UNIV_PAGE_SIZE) - -/* Some macros to improve branch prediction and reduce cache misses */ -#if defined(__GNUC__) && (__GNUC__ > 2) && ! defined(__INTEL_COMPILER) -/* Tell the compiler that 'expr' probably evaluates to 'constant'. */ -# define UNIV_EXPECT(expr,constant) __builtin_expect(expr, constant) -/* Tell the compiler that a pointer is likely to be NULL */ -# define UNIV_LIKELY_NULL(ptr) __builtin_expect((ulint) ptr, 0) -/* Minimize cache-miss latency by moving data at addr into a cache before -it is read. */ -# define UNIV_PREFETCH_R(addr) __builtin_prefetch(addr, 0, 3) -/* Minimize cache-miss latency by moving data at addr into a cache before -it is read or written. */ -# define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3) -#elif defined(__SUNPRO_C) -# define UNIV_EXPECT(expr,value) (expr) -# define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr) -# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr) -#else -/* Dummy versions of the macros */ -# define UNIV_EXPECT(expr,value) (expr) -# define UNIV_LIKELY_NULL(expr) (expr) -# define UNIV_PREFETCH_R(addr) ((void) 0) -# define UNIV_PREFETCH_RW(addr) ((void) 0) -#endif -/* Tell the compiler that cond is likely to hold */ -#define UNIV_LIKELY(cond) UNIV_EXPECT(cond, TRUE) -/* Tell the compiler that cond is unlikely to hold */ -#define UNIV_UNLIKELY(cond) UNIV_EXPECT(cond, FALSE) - -/* Compile-time constant of the given array's size. */ -#define UT_ARR_SIZE(a) (sizeof(a) / sizeof((a)[0])) - -/* The return type from a thread's start function differs between Unix and -Windows, so define a typedef for it and a macro to use at the end of such -functions. */ - -#ifdef __WIN__ -typedef ulint os_thread_ret_t; -#define OS_THREAD_DUMMY_RETURN return(0) -#else -typedef void* os_thread_ret_t; -#define OS_THREAD_DUMMY_RETURN return(NULL) -#endif - -#include <stdio.h> -#include "ut0dbg.h" -#include "ut0ut.h" -#include "db0err.h" -#ifdef UNIV_DEBUG_VALGRIND -# include <valgrind/memcheck.h> -# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size) -# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) -# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size) -# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size) -# define UNIV_MEM_ASSERT_RW(addr, size) do { \ - const void* _p = (const void*) (ulint) \ - VALGRIND_CHECK_MEM_IS_DEFINED(addr, size); \ - if (UNIV_LIKELY_NULL(_p)) \ - fprintf(stderr, "%s:%d: %p[%u] undefined at %ld\n", \ - __FILE__, __LINE__, \ - (const void*) (addr), (unsigned) (size), (long) \ - (((const char*) _p) - ((const char*) (addr)))); \ - } while (0) -# define UNIV_MEM_ASSERT_W(addr, size) do { \ - const void* _p = (const void*) (ulint) \ - VALGRIND_CHECK_MEM_IS_ADDRESSABLE(addr, size); \ - if (UNIV_LIKELY_NULL(_p)) \ - fprintf(stderr, "%s:%d: %p[%u] unwritable at %ld\n", \ - __FILE__, __LINE__, \ - (const void*) (addr), (unsigned) (size), (long) \ - (((const char*) _p) - ((const char*) (addr)))); \ - } while (0) -#else -# define UNIV_MEM_VALID(addr, size) do {} while(0) -# define UNIV_MEM_INVALID(addr, size) do {} while(0) -# define UNIV_MEM_FREE(addr, size) do {} while(0) -# define UNIV_MEM_ALLOC(addr, size) do {} while(0) -# define UNIV_MEM_ASSERT_RW(addr, size) do {} while(0) -# define UNIV_MEM_ASSERT_W(addr, size) do {} while(0) -#endif -#define UNIV_MEM_ASSERT_AND_FREE(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_FREE(addr, size); \ -} while (0) -#define UNIV_MEM_ASSERT_AND_ALLOC(addr, size) do { \ - UNIV_MEM_ASSERT_W(addr, size); \ - UNIV_MEM_ALLOC(addr, size); \ -} while (0) - -#endif diff --git a/storage/innobase/include/usr0sess.h b/storage/innobase/include/usr0sess.h deleted file mode 100644 index 3ed1ea21a4d..00000000000 --- a/storage/innobase/include/usr0sess.h +++ /dev/null @@ -1,61 +0,0 @@ -/****************************************************** -Sessions - -(c) 1996 Innobase Oy - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#ifndef usr0sess_h -#define usr0sess_h - -#include "univ.i" -#include "ut0byte.h" -#include "trx0types.h" -#include "srv0srv.h" -#include "trx0types.h" -#include "usr0types.h" -#include "que0types.h" -#include "data0data.h" -#include "rem0rec.h" - -/************************************************************************* -Opens a session. */ - -sess_t* -sess_open(void); -/*============*/ - /* out, own: session object */ -/************************************************************************* -Closes a session, freeing the memory occupied by it, if it is in a state -where it should be closed. */ - -ibool -sess_try_close( -/*===========*/ - /* out: TRUE if closed */ - sess_t* sess); /* in, own: session object */ - -/* The session handle. All fields are protected by the kernel mutex */ -struct sess_struct{ - ulint state; /* state of the session */ - trx_t* trx; /* transaction object permanently - assigned for the session: the - transaction instance designated by the - trx id changes, but the memory - structure is preserved */ - UT_LIST_BASE_NODE_T(que_t) - graphs; /* query graphs belonging to this - session */ -}; - -/* Session states */ -#define SESS_ACTIVE 1 -#define SESS_ERROR 2 /* session contains an error message - which has not yet been communicated - to the client */ -#ifndef UNIV_NONINL -#include "usr0sess.ic" -#endif - -#endif diff --git a/storage/innobase/include/usr0sess.ic b/storage/innobase/include/usr0sess.ic deleted file mode 100644 index c851d5745b9..00000000000 --- a/storage/innobase/include/usr0sess.ic +++ /dev/null @@ -1,7 +0,0 @@ -/****************************************************** -Sessions - -(c) 1996 Innobase Oy - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ diff --git a/storage/innobase/include/usr0types.h b/storage/innobase/include/usr0types.h deleted file mode 100644 index 311471c1a0e..00000000000 --- a/storage/innobase/include/usr0types.h +++ /dev/null @@ -1,14 +0,0 @@ -/****************************************************** -Users and sessions global types - -(c) 1996 Innobase Oy - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#ifndef usr0types_h -#define usr0types_h - -typedef struct sess_struct sess_t; - -#endif diff --git a/storage/innobase/include/ut0byte.h b/storage/innobase/include/ut0byte.h deleted file mode 100644 index 6533f1166ca..00000000000 --- a/storage/innobase/include/ut0byte.h +++ /dev/null @@ -1,250 +0,0 @@ -/********************************************************************** -Utilities for byte operations - -(c) 1994, 1995 Innobase Oy - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0byte_h -#define ut0byte_h - - -#include "univ.i" - -/* Type definition for a 64-bit unsigned integer, which works also -in 32-bit machines. NOTE! Access the fields only with the accessor -functions. This definition appears here only for the compiler to -know the size of a dulint. */ - -typedef struct dulint_struct dulint; -struct dulint_struct{ - ulint high; /* most significant 32 bits */ - ulint low; /* least significant 32 bits */ -}; - -/* Zero value for a dulint */ -extern dulint ut_dulint_zero; - -/* Maximum value for a dulint */ -extern dulint ut_dulint_max; - -/*********************************************************** -Creates a 64-bit dulint out of two ulints. */ -UNIV_INLINE -dulint -ut_dulint_create( -/*=============*/ - /* out: created dulint */ - ulint high, /* in: high-order 32 bits */ - ulint low); /* in: low-order 32 bits */ -/*********************************************************** -Gets the high-order 32 bits of a dulint. */ -UNIV_INLINE -ulint -ut_dulint_get_high( -/*===============*/ - /* out: 32 bits in ulint */ - dulint d); /* in: dulint */ -/*********************************************************** -Gets the low-order 32 bits of a dulint. */ -UNIV_INLINE -ulint -ut_dulint_get_low( -/*==============*/ - /* out: 32 bits in ulint */ - dulint d); /* in: dulint */ -/*********************************************************** -Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit -integer type. */ -UNIV_INLINE -ib_longlong -ut_conv_dulint_to_longlong( -/*=======================*/ - /* out: value in ib_longlong type */ - dulint d); /* in: dulint */ -/*********************************************************** -Tests if a dulint is zero. */ -UNIV_INLINE -ibool -ut_dulint_is_zero( -/*==============*/ - /* out: TRUE if zero */ - dulint a); /* in: dulint */ -/*********************************************************** -Compares two dulints. */ -UNIV_INLINE -int -ut_dulint_cmp( -/*==========*/ - /* out: -1 if a < b, 0 if a == b, - 1 if a > b */ - dulint a, /* in: dulint */ - dulint b); /* in: dulint */ -/*********************************************************** -Calculates the max of two dulints. */ -UNIV_INLINE -dulint -ut_dulint_get_max( -/*==============*/ - /* out: max(a, b) */ - dulint a, /* in: dulint */ - dulint b); /* in: dulint */ -/*********************************************************** -Calculates the min of two dulints. */ -UNIV_INLINE -dulint -ut_dulint_get_min( -/*==============*/ - /* out: min(a, b) */ - dulint a, /* in: dulint */ - dulint b); /* in: dulint */ -/*********************************************************** -Adds a ulint to a dulint. */ -UNIV_INLINE -dulint -ut_dulint_add( -/*==========*/ - /* out: sum a + b */ - dulint a, /* in: dulint */ - ulint b); /* in: ulint */ -/*********************************************************** -Subtracts a ulint from a dulint. */ -UNIV_INLINE -dulint -ut_dulint_subtract( -/*===============*/ - /* out: a - b */ - dulint a, /* in: dulint */ - ulint b); /* in: ulint, b <= a */ -/*********************************************************** -Subtracts a dulint from another. NOTE that the difference must be positive -and smaller that 4G. */ -UNIV_INLINE -ulint -ut_dulint_minus( -/*============*/ - /* out: a - b */ - dulint a, /* in: dulint; NOTE a must be >= b and at most - 2 to power 32 - 1 greater */ - dulint b); /* in: dulint */ -/************************************************************ -Rounds a dulint downward to a multiple of a power of 2. */ -UNIV_INLINE -dulint -ut_dulint_align_down( -/*=================*/ - /* out: rounded value */ - dulint n, /* in: number to be rounded */ - ulint align_no); /* in: align by this number which must be a - power of 2 */ -/************************************************************ -Rounds a dulint upward to a multiple of a power of 2. */ -UNIV_INLINE -dulint -ut_dulint_align_up( -/*===============*/ - /* out: rounded value */ - dulint n, /* in: number to be rounded */ - ulint align_no); /* in: align by this number which must be a - power of 2 */ -/*********************************************************** -Increments a dulint variable by 1. */ -#define UT_DULINT_INC(D)\ -{\ - if ((D).low == 0xFFFFFFFFUL) {\ - (D).high = (D).high + 1;\ - (D).low = 0;\ - } else {\ - (D).low = (D).low + 1;\ - }\ -} -/*********************************************************** -Tests if two dulints are equal. */ -#define UT_DULINT_EQ(D1, D2) (((D1).low == (D2).low)\ - && ((D1).high == (D2).high)) -/**************************************************************** -Sort function for dulint arrays. */ -void -ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high); -/*===============================================================*/ -/************************************************************ -The following function calculates the value of an integer n rounded -to the least product of align_no which is >= n. align_no has to be a -power of 2. */ -UNIV_INLINE -ulint -ut_calc_align( -/*==========*/ - /* out: rounded value */ - ulint n, /* in: number to be rounded */ - ulint align_no); /* in: align by this number */ -/************************************************************ -The following function calculates the value of an integer n rounded -to the biggest product of align_no which is <= n. align_no has to be a -power of 2. */ -UNIV_INLINE -ulint -ut_calc_align_down( -/*===============*/ - /* out: rounded value */ - ulint n, /* in: number to be rounded */ - ulint align_no); /* in: align by this number */ -/************************************************************* -The following function rounds up a pointer to the nearest aligned address. */ -UNIV_INLINE -void* -ut_align( -/*=====*/ - /* out: aligned pointer */ - void* ptr, /* in: pointer */ - ulint align_no); /* in: align by this number */ -/************************************************************* -The following function rounds down a pointer to the nearest -aligned address. */ -UNIV_INLINE -void* -ut_align_down( -/*==========*/ - /* out: aligned pointer */ - void* ptr, /* in: pointer */ - ulint align_no) /* in: align by this number */ - __attribute__((const)); -/************************************************************* -The following function computes the offset of a pointer from the nearest -aligned address. */ -UNIV_INLINE -ulint -ut_align_offset( -/*============*/ - /* out: distance from aligned - pointer */ - const void* ptr, /* in: pointer */ - ulint align_no) /* in: align by this number */ - __attribute__((const)); -/********************************************************************* -Gets the nth bit of a ulint. */ -UNIV_INLINE -ibool -ut_bit_get_nth( -/*===========*/ - /* out: TRUE if nth bit is 1; 0th bit is defined to - be the least significant */ - ulint a, /* in: ulint */ - ulint n); /* in: nth bit requested */ -/********************************************************************* -Sets the nth bit of a ulint. */ -UNIV_INLINE -ulint -ut_bit_set_nth( -/*===========*/ - /* out: the ulint with the bit set as requested */ - ulint a, /* in: ulint */ - ulint n, /* in: nth bit requested */ - ibool val); /* in: value for the bit to set */ - -#ifndef UNIV_NONINL -#include "ut0byte.ic" -#endif - -#endif diff --git a/storage/innobase/include/ut0byte.ic b/storage/innobase/include/ut0byte.ic deleted file mode 100644 index 01b6c29d08f..00000000000 --- a/storage/innobase/include/ut0byte.ic +++ /dev/null @@ -1,397 +0,0 @@ -/****************************************************************** -Utilities for byte operations - -(c) 1994, 1995 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -/*********************************************************** -Creates a 64-bit dulint out of two ulints. */ -UNIV_INLINE -dulint -ut_dulint_create( -/*=============*/ - /* out: created dulint */ - ulint high, /* in: high-order 32 bits */ - ulint low) /* in: low-order 32 bits */ -{ - dulint res; - - ut_ad(high <= 0xFFFFFFFF); - ut_ad(low <= 0xFFFFFFFF); - - res.high = high; - res.low = low; - - return(res); -} - -/*********************************************************** -Gets the high-order 32 bits of a dulint. */ -UNIV_INLINE -ulint -ut_dulint_get_high( -/*===============*/ - /* out: 32 bits in ulint */ - dulint d) /* in: dulint */ -{ - return(d.high); -} - -/*********************************************************** -Gets the low-order 32 bits of a dulint. */ -UNIV_INLINE -ulint -ut_dulint_get_low( -/*==============*/ - /* out: 32 bits in ulint */ - dulint d) /* in: dulint */ -{ - return(d.low); -} - -/*********************************************************** -Converts a dulint (a struct of 2 ulints) to ib_longlong, which is a 64-bit -integer type. */ -UNIV_INLINE -ib_longlong -ut_conv_dulint_to_longlong( -/*=======================*/ - /* out: value in ib_longlong type */ - dulint d) /* in: dulint */ -{ - return((ib_longlong)d.low - + (((ib_longlong)d.high) << 32)); -} - -/*********************************************************** -Tests if a dulint is zero. */ -UNIV_INLINE -ibool -ut_dulint_is_zero( -/*==============*/ - /* out: TRUE if zero */ - dulint a) /* in: dulint */ -{ - if ((a.low == 0) && (a.high == 0)) { - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************** -Compares two dulints. */ -UNIV_INLINE -int -ut_dulint_cmp( -/*==========*/ - /* out: -1 if a < b, 0 if a == b, - 1 if a > b */ - dulint a, /* in: dulint */ - dulint b) /* in: dulint */ -{ - if (a.high > b.high) { - return(1); - } else if (a.high < b.high) { - return(-1); - } else if (a.low > b.low) { - return(1); - } else if (a.low < b.low) { - return(-1); - } else { - return(0); - } -} - -/*********************************************************** -Calculates the max of two dulints. */ -UNIV_INLINE -dulint -ut_dulint_get_max( -/*==============*/ - /* out: max(a, b) */ - dulint a, /* in: dulint */ - dulint b) /* in: dulint */ -{ - if (ut_dulint_cmp(a, b) > 0) { - - return(a); - } - - return(b); -} - -/*********************************************************** -Calculates the min of two dulints. */ -UNIV_INLINE -dulint -ut_dulint_get_min( -/*==============*/ - /* out: min(a, b) */ - dulint a, /* in: dulint */ - dulint b) /* in: dulint */ -{ - if (ut_dulint_cmp(a, b) > 0) { - - return(b); - } - - return(a); -} - -/*********************************************************** -Adds a ulint to a dulint. */ -UNIV_INLINE -dulint -ut_dulint_add( -/*==========*/ - /* out: sum a + b */ - dulint a, /* in: dulint */ - ulint b) /* in: ulint */ -{ - if (0xFFFFFFFFUL - b >= a.low) { - a.low += b; - - return(a); - } - - a.low = a.low - (0xFFFFFFFFUL - b) - 1; - - a.high++; - - return(a); -} - -/*********************************************************** -Subtracts a ulint from a dulint. */ -UNIV_INLINE -dulint -ut_dulint_subtract( -/*===============*/ - /* out: a - b */ - dulint a, /* in: dulint */ - ulint b) /* in: ulint, b <= a */ -{ - if (a.low >= b) { - a.low -= b; - - return(a); - } - - b -= a.low + 1; - - a.low = 0xFFFFFFFFUL - b; - - ut_ad(a.high > 0); - - a.high--; - - return(a); -} - -/*********************************************************** -Subtracts a dulint from another. NOTE that the difference must be positive -and smaller that 4G. */ -UNIV_INLINE -ulint -ut_dulint_minus( -/*============*/ - /* out: a - b */ - dulint a, /* in: dulint; NOTE a must be >= b and at most - 2 to power 32 - 1 greater */ - dulint b) /* in: dulint */ -{ - ulint diff; - - if (a.high == b.high) { - ut_ad(a.low >= b.low); - - return(a.low - b.low); - } - - ut_ad(a.high == b.high + 1); - - diff = (ulint)(0xFFFFFFFFUL - b.low); - diff += 1 + a.low; - - ut_ad(diff > a.low); - - return(diff); -} - -/************************************************************ -Rounds a dulint downward to a multiple of a power of 2. */ -UNIV_INLINE -dulint -ut_dulint_align_down( -/*=================*/ - /* out: rounded value */ - dulint n, /* in: number to be rounded */ - ulint align_no) /* in: align by this number which must be a - power of 2 */ -{ - ulint low, high; - - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - - low = ut_dulint_get_low(n); - high = ut_dulint_get_high(n); - - low = low & ~(align_no - 1); - - return(ut_dulint_create(high, low)); -} - -/************************************************************ -Rounds a dulint upward to a multiple of a power of 2. */ -UNIV_INLINE -dulint -ut_dulint_align_up( -/*===============*/ - /* out: rounded value */ - dulint n, /* in: number to be rounded */ - ulint align_no) /* in: align by this number which must be a - power of 2 */ -{ - return(ut_dulint_align_down(ut_dulint_add(n, align_no - 1), align_no)); -} - -/************************************************************ -The following function calculates the value of an integer n rounded -to the least product of align_no which is >= n. align_no -has to be a power of 2. */ -UNIV_INLINE -ulint -ut_calc_align( -/*==========*/ - /* out: rounded value */ - ulint n, /* in: number to be rounded */ - ulint align_no) /* in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - - return((n + align_no - 1) & ~(align_no - 1)); -} - -/************************************************************* -The following function rounds up a pointer to the nearest aligned address. */ -UNIV_INLINE -void* -ut_align( -/*=====*/ - /* out: aligned pointer */ - void* ptr, /* in: pointer */ - ulint align_no) /* in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return((void*)((((ulint)ptr) + align_no - 1) & ~(align_no - 1))); -} - -/************************************************************ -The following function calculates the value of an integer n rounded -to the biggest product of align_no which is <= n. align_no has to be a -power of 2. */ -UNIV_INLINE -ulint -ut_calc_align_down( -/*===============*/ - /* out: rounded value */ - ulint n, /* in: number to be rounded */ - ulint align_no) /* in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - - return(n & ~(align_no - 1)); -} - -/************************************************************* -The following function rounds down a pointer to the nearest -aligned address. */ -UNIV_INLINE -void* -ut_align_down( -/*==========*/ - /* out: aligned pointer */ - void* ptr, /* in: pointer */ - ulint align_no) /* in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return((void*)((((ulint)ptr)) & ~(align_no - 1))); -} - -/************************************************************* -The following function computes the offset of a pointer from the nearest -aligned address. */ -UNIV_INLINE -ulint -ut_align_offset( -/*============*/ - /* out: distance from - aligned pointer */ - const void* ptr, /* in: pointer */ - ulint align_no) /* in: align by this number */ -{ - ut_ad(align_no > 0); - ut_ad(((align_no - 1) & align_no) == 0); - ut_ad(ptr); - - ut_ad(sizeof(void*) == sizeof(ulint)); - - return(((ulint)ptr) & (align_no - 1)); -} - -/********************************************************************* -Gets the nth bit of a ulint. */ -UNIV_INLINE -ibool -ut_bit_get_nth( -/*===========*/ - /* out: TRUE if nth bit is 1; 0th bit is defined to - be the least significant */ - ulint a, /* in: ulint */ - ulint n) /* in: nth bit requested */ -{ - ut_ad(n < 8 * sizeof(ulint)); -#if TRUE != 1 -# error "TRUE != 1" -#endif - return(1 & (a >> n)); -} - -/********************************************************************* -Sets the nth bit of a ulint. */ -UNIV_INLINE -ulint -ut_bit_set_nth( -/*===========*/ - /* out: the ulint with the bit set as requested */ - ulint a, /* in: ulint */ - ulint n, /* in: nth bit requested */ - ibool val) /* in: value for the bit to set */ -{ - ut_ad(n < 8 * sizeof(ulint)); -#if TRUE != 1 -# error "TRUE != 1" -#endif - if (val) { - return(((ulint) 1 << n) | a); - } else { - return(~((ulint) 1 << n) & a); - } -} diff --git a/storage/innobase/include/ut0dbg.h b/storage/innobase/include/ut0dbg.h deleted file mode 100644 index a317f35f4be..00000000000 --- a/storage/innobase/include/ut0dbg.h +++ /dev/null @@ -1,113 +0,0 @@ -/********************************************************************* -Debug utilities for Innobase - -(c) 1994, 1995 Innobase Oy - -Created 1/30/1994 Heikki Tuuri -**********************************************************************/ - -#ifndef ut0dbg_h -#define ut0dbg_h - -#include "univ.i" -#include <stdlib.h> -#include "os0thread.h" - -#if defined(__GNUC__) && (__GNUC__ > 2) -# define UT_DBG_FAIL(EXPR) UNIV_UNLIKELY(!((ulint)(EXPR))) -#else -extern ulint ut_dbg_zero; /* This is used to eliminate - compiler warnings */ -# define UT_DBG_FAIL(EXPR) !((ulint)(EXPR) + ut_dbg_zero) -#endif - -/***************************************************************** -Report a failed assertion. */ - -void -ut_dbg_assertion_failed( -/*====================*/ - const char* expr, /* in: the failed assertion */ - const char* file, /* in: source file containing the assertion */ - ulint line); /* in: line number of the assertion */ - -#ifdef __NETWARE__ -/* Flag for ignoring further assertion failures. -On NetWare, have a graceful exit rather than a segfault to avoid abends. */ -extern ibool panic_shutdown; -/* Abort the execution. */ -void ut_dbg_panic(void); -# define UT_DBG_PANIC ut_dbg_panic() -/* Stop threads in ut_a(). */ -# define UT_DBG_STOP while (0) /* We do not do this on NetWare */ -#else /* __NETWARE__ */ -# if defined(__WIN__) || defined(__INTEL_COMPILER) -# undef UT_DBG_USE_ABORT -# elif defined(__GNUC__) && (__GNUC__ > 2) -# define UT_DBG_USE_ABORT -# endif - -# ifndef UT_DBG_USE_ABORT -/* A null pointer that will be dereferenced to trigger a memory trap */ -extern ulint* ut_dbg_null_ptr; -# endif - -# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) -/* Flag for indicating that all threads should stop. This will be set -by ut_dbg_assertion_failed(). */ -extern ibool ut_dbg_stop_threads; - -/***************************************************************** -Stop a thread after assertion failure. */ - -void -ut_dbg_stop_thread( -/*===============*/ - const char* file, - ulint line); -# endif - -# ifdef UT_DBG_USE_ABORT -/* Abort the execution. */ -# define UT_DBG_PANIC abort() -/* Stop threads (null operation) */ -# define UT_DBG_STOP while (0) -# else /* UT_DBG_USE_ABORT */ -/* Abort the execution. */ -# define UT_DBG_PANIC \ - if (*(ut_dbg_null_ptr)) ut_dbg_null_ptr = NULL -/* Stop threads in ut_a(). */ -# define UT_DBG_STOP do \ - if (UNIV_UNLIKELY(ut_dbg_stop_threads)) { \ - ut_dbg_stop_thread(__FILE__, (ulint) __LINE__); \ - } while (0) -# endif /* UT_DBG_USE_ABORT */ -#endif /* __NETWARE__ */ - -/* Abort execution if EXPR does not evaluate to nonzero. */ -#define ut_a(EXPR) do { \ - if (UT_DBG_FAIL(EXPR)) { \ - ut_dbg_assertion_failed(#EXPR, \ - __FILE__, (ulint) __LINE__); \ - UT_DBG_PANIC; \ - } \ - UT_DBG_STOP; \ -} while (0) - -/* Abort execution. */ -#define ut_error do { \ - ut_dbg_assertion_failed(0, __FILE__, (ulint) __LINE__); \ - UT_DBG_PANIC; \ -} while (0) - -#ifdef UNIV_DEBUG -#define ut_ad(EXPR) ut_a(EXPR) -#define ut_d(EXPR) do {EXPR;} while (0) -#else -#define ut_ad(EXPR) -#define ut_d(EXPR) -#endif - -#define UT_NOT_USED(A) A = A - -#endif diff --git a/storage/innobase/include/ut0list.h b/storage/innobase/include/ut0list.h deleted file mode 100644 index c35cf202600..00000000000 --- a/storage/innobase/include/ut0list.h +++ /dev/null @@ -1,148 +0,0 @@ -/*********************************************************************** -A double-linked list. This differs from the one in ut0lst.h in that in this -one, each list node contains a pointer to the data, whereas the one in -ut0lst.h uses a strategy where the list pointers are embedded in the data -items themselves. - -Use this one when you need to store arbitrary data in the list where you -can't embed the list pointers in the data, if a data item needs to be -stored in multiple lists, etc. - -Note about the memory management: ib_list_t is a fixed-size struct whose -allocation/deallocation is done through ib_list_create/ib_list_free, but the -memory for the list nodes is allocated through a user-given memory heap, -which can either be the same for all nodes or vary per node. Most users will -probably want to create a memory heap to store the item-specific data, and -pass in this same heap to the list node creation functions, thus -automatically freeing the list node when the item's heap is freed. - -************************************************************************/ - - -#ifndef IB_LIST_H -#define IB_LIST_H - -#include "mem0mem.h" - -typedef struct ib_list_struct ib_list_t; -typedef struct ib_list_node_struct ib_list_node_t; -typedef struct ib_list_helper_struct ib_list_helper_t; - -/******************************************************************** -Create a new list using mem_alloc. Lists created with this function must be -freed with ib_list_free. */ - -ib_list_t* -ib_list_create(void); -/*=================*/ - /* out: list */ - - -/******************************************************************** -Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for -lists created with this function. */ - -ib_list_t* -ib_list_create_heap( -/*================*/ - /* out: list */ - mem_heap_t* heap); /* in: memory heap to use */ - -/******************************************************************** -Free a list. */ - -void -ib_list_free( -/*=========*/ - ib_list_t* list); /* in: list */ - -/******************************************************************** -Add the data to the start of the list. */ - -ib_list_node_t* -ib_list_add_first( -/*==============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - void* data, /* in: data */ - mem_heap_t* heap); /* in: memory heap to use */ - -/******************************************************************** -Add the data to the end of the list. */ - -ib_list_node_t* -ib_list_add_last( -/*=============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - void* data, /* in: data */ - mem_heap_t* heap); /* in: memory heap to use */ - -/******************************************************************** -Add the data after the indicated node. */ - -ib_list_node_t* -ib_list_add_after( -/*==============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - ib_list_node_t* prev_node, /* in: node preceding new node (can - be NULL) */ - void* data, /* in: data */ - mem_heap_t* heap); /* in: memory heap to use */ - -/******************************************************************** -Remove the node from the list. */ - -void -ib_list_remove( -/*===========*/ - ib_list_t* list, /* in: list */ - ib_list_node_t* node); /* in: node to remove */ - -/******************************************************************** -Get the first node in the list. */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_first( -/*==============*/ - /* out: first node, or NULL */ - ib_list_t* list); /* in: list */ - -/******************************************************************** -Get the last node in the list. */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_last( -/*=============*/ - /* out: last node, or NULL */ - ib_list_t* list); /* in: list */ - -/* List. */ -struct ib_list_struct { - ib_list_node_t* first; /* first node */ - ib_list_node_t* last; /* last node */ - ibool is_heap_list; /* TRUE if this list was - allocated through a heap */ -}; - -/* A list node. */ -struct ib_list_node_struct { - ib_list_node_t* prev; /* previous node */ - ib_list_node_t* next; /* next node */ - void* data; /* user data */ -}; - -/* Quite often, the only additional piece of data you need is the per-item -memory heap, so we have this generic struct available to use in those -cases. */ -struct ib_list_helper_struct { - mem_heap_t* heap; /* memory heap */ - void* data; /* user data */ -}; - -#ifndef UNIV_NONINL -#include "ut0list.ic" -#endif - -#endif diff --git a/storage/innobase/include/ut0list.ic b/storage/innobase/include/ut0list.ic deleted file mode 100644 index c2d3e4557f0..00000000000 --- a/storage/innobase/include/ut0list.ic +++ /dev/null @@ -1,23 +0,0 @@ -/******************************************************************** -Get the first node in the list. */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_first( -/*==============*/ - /* out: first node, or NULL */ - ib_list_t* list) /* in: list */ -{ - return(list->first); -} - -/******************************************************************** -Get the last node in the list. */ -UNIV_INLINE -ib_list_node_t* -ib_list_get_last( -/*=============*/ - /* out: last node, or NULL */ - ib_list_t* list) /* in: list */ -{ - return(list->last); -} diff --git a/storage/innobase/include/ut0lst.h b/storage/innobase/include/ut0lst.h deleted file mode 100644 index ebe2803fe23..00000000000 --- a/storage/innobase/include/ut0lst.h +++ /dev/null @@ -1,227 +0,0 @@ -/********************************************************************** -List utilities - -(c) 1995 Innobase Oy - -Created 9/10/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0lst_h -#define ut0lst_h - -#include "univ.i" - -/* This module implements the two-way linear list which should be used -if a list is used in the database. Note that a single struct may belong -to two or more lists, provided that the list are given different names. -An example of the usage of the lists can be found in fil0fil.c. */ - -/*********************************************************************** -This macro expands to the unnamed type definition of a struct which acts -as the two-way list base node. The base node contains pointers -to both ends of the list and a count of nodes in the list (excluding -the base node from the count). TYPE should be the list node type name. */ - -#define UT_LIST_BASE_NODE_T(TYPE)\ -struct {\ - ulint count; /* count of nodes in list */\ - TYPE * start; /* pointer to list start, NULL if empty */\ - TYPE * end; /* pointer to list end, NULL if empty */\ -}\ - -/*********************************************************************** -This macro expands to the unnamed type definition of a struct which -should be embedded in the nodes of the list, the node type must be a struct. -This struct contains the pointers to next and previous nodes in the list. -The name of the field in the node struct should be the name given -to the list. TYPE should be the list node type name. Example of usage: - -typedef struct LRU_node_struct LRU_node_t; -struct LRU_node_struct { - UT_LIST_NODE_T(LRU_node_t) LRU_list; - ... -} -The example implements an LRU list of name LRU_list. Its nodes are of type -LRU_node_t. -*/ - -#define UT_LIST_NODE_T(TYPE)\ -struct {\ - TYPE * prev; /* pointer to the previous node,\ - NULL if start of list */\ - TYPE * next; /* pointer to next node, NULL if end of list */\ -}\ - -/*********************************************************************** -Initializes the base node of a two-way list. */ - -#define UT_LIST_INIT(BASE)\ -{\ - (BASE).count = 0;\ - (BASE).start = NULL;\ - (BASE).end = NULL;\ -}\ - -/*********************************************************************** -Adds the node as the first element in a two-way linked list. -BASE has to be the base node (not a pointer to it). N has to be -the pointer to the node to be added to the list. NAME is the list name. */ - -#define UT_LIST_ADD_FIRST(NAME, BASE, N)\ -{\ - ut_ad(N);\ - ((BASE).count)++;\ - ((N)->NAME).next = (BASE).start;\ - ((N)->NAME).prev = NULL;\ - if ((BASE).start != NULL) {\ - ut_ad((BASE).start != (N));\ - (((BASE).start)->NAME).prev = (N);\ - }\ - (BASE).start = (N);\ - if ((BASE).end == NULL) {\ - (BASE).end = (N);\ - }\ -}\ - -/*********************************************************************** -Adds the node as the last element in a two-way linked list. -BASE has to be the base node (not a pointer to it). N has to be -the pointer to the node to be added to the list. NAME is the list name. */ - -#define UT_LIST_ADD_LAST(NAME, BASE, N)\ -{\ - ut_ad(N);\ - ((BASE).count)++;\ - ((N)->NAME).prev = (BASE).end;\ - ((N)->NAME).next = NULL;\ - if ((BASE).end != NULL) {\ - ut_ad((BASE).end != (N));\ - (((BASE).end)->NAME).next = (N);\ - }\ - (BASE).end = (N);\ - if ((BASE).start == NULL) {\ - (BASE).start = (N);\ - }\ -}\ - -/*********************************************************************** -Inserts a NODE2 after NODE1 in a list. -BASE has to be the base node (not a pointer to it). NAME is the list -name, NODE1 and NODE2 are pointers to nodes. */ - -#define UT_LIST_INSERT_AFTER(NAME, BASE, NODE1, NODE2)\ -{\ - ut_ad(NODE1);\ - ut_ad(NODE2);\ - ut_ad((NODE1) != (NODE2));\ - ((BASE).count)++;\ - ((NODE2)->NAME).prev = (NODE1);\ - ((NODE2)->NAME).next = ((NODE1)->NAME).next;\ - if (((NODE1)->NAME).next != NULL) {\ - ((((NODE1)->NAME).next)->NAME).prev = (NODE2);\ - }\ - ((NODE1)->NAME).next = (NODE2);\ - if ((BASE).end == (NODE1)) {\ - (BASE).end = (NODE2);\ - }\ -}\ - -/* Invalidate the pointers in a list node. */ -#ifdef UNIV_LIST_DEBUG -# define UT_LIST_REMOVE_CLEAR(NAME, N) \ -((N)->NAME.prev = (N)->NAME.next = (void*) -1) -#else -# define UT_LIST_REMOVE_CLEAR(NAME, N) while (0) -#endif - -/*********************************************************************** -Removes a node from a two-way linked list. BASE has to be the base node -(not a pointer to it). N has to be the pointer to the node to be removed -from the list. NAME is the list name. */ - -#define UT_LIST_REMOVE(NAME, BASE, N) \ -do { \ - ut_ad(N); \ - ut_a((BASE).count > 0); \ - ((BASE).count)--; \ - if (((N)->NAME).next != NULL) { \ - ((((N)->NAME).next)->NAME).prev = ((N)->NAME).prev; \ - } else { \ - (BASE).end = ((N)->NAME).prev; \ - } \ - if (((N)->NAME).prev != NULL) { \ - ((((N)->NAME).prev)->NAME).next = ((N)->NAME).next; \ - } else { \ - (BASE).start = ((N)->NAME).next; \ - } \ - UT_LIST_REMOVE_CLEAR(NAME, N); \ -} while (0) - -/************************************************************************ -Gets the next node in a two-way list. NAME is the name of the list -and N is pointer to a node. */ - -#define UT_LIST_GET_NEXT(NAME, N)\ - (((N)->NAME).next) - -/************************************************************************ -Gets the previous node in a two-way list. NAME is the name of the list -and N is pointer to a node. */ - -#define UT_LIST_GET_PREV(NAME, N)\ - (((N)->NAME).prev) - -/************************************************************************ -Alternative macro to get the number of nodes in a two-way list, i.e., -its length. BASE is the base node (not a pointer to it). */ - -#define UT_LIST_GET_LEN(BASE)\ - (BASE).count - -/************************************************************************ -Gets the first node in a two-way list, or returns NULL, -if the list is empty. BASE is the base node (not a pointer to it). */ - -#define UT_LIST_GET_FIRST(BASE)\ - (BASE).start - -/************************************************************************ -Gets the last node in a two-way list, or returns NULL, -if the list is empty. BASE is the base node (not a pointer to it). */ - -#define UT_LIST_GET_LAST(BASE)\ - (BASE).end - -/************************************************************************ -Checks the consistency of a two-way list. NAME is the name of the list, -TYPE is the node type, and BASE is the base node (not a pointer to it). */ - -#define UT_LIST_VALIDATE(NAME, TYPE, BASE)\ -{\ - ulint ut_list_i_313;\ - TYPE * ut_list_node_313;\ -\ - ut_list_node_313 = (BASE).start;\ -\ - for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\ - ut_list_i_313++) {\ - ut_a(ut_list_node_313);\ - ut_list_node_313 = (ut_list_node_313->NAME).next;\ - }\ -\ - ut_a(ut_list_node_313 == NULL);\ -\ - ut_list_node_313 = (BASE).end;\ -\ - for (ut_list_i_313 = 0; ut_list_i_313 < (BASE).count;\ - ut_list_i_313++) {\ - ut_a(ut_list_node_313);\ - ut_list_node_313 = (ut_list_node_313->NAME).prev;\ - }\ -\ - ut_a(ut_list_node_313 == NULL);\ -}\ - - -#endif - diff --git a/storage/innobase/include/ut0mem.h b/storage/innobase/include/ut0mem.h deleted file mode 100644 index e56895bc142..00000000000 --- a/storage/innobase/include/ut0mem.h +++ /dev/null @@ -1,212 +0,0 @@ -/*********************************************************************** -Memory primitives - -(c) 1994, 1995 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -************************************************************************/ - -#ifndef ut0mem_h -#define ut0mem_h - -#include "univ.i" -#include <string.h> -#include <stdlib.h> - -/* The total amount of memory currently allocated from the OS with malloc */ -extern ulint ut_total_allocated_memory; - -UNIV_INLINE -void* -ut_memcpy(void* dest, const void* sour, ulint n); - -UNIV_INLINE -void* -ut_memmove(void* dest, const void* sour, ulint n); - -UNIV_INLINE -int -ut_memcmp(const void* str1, const void* str2, ulint n); - - -/************************************************************************** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. */ - -void* -ut_malloc_low( -/*==========*/ - /* out, own: allocated memory */ - ulint n, /* in: number of bytes to allocate */ - ibool set_to_zero, /* in: TRUE if allocated memory - should be set to zero if - UNIV_SET_MEM_TO_ZERO is defined */ - ibool assert_on_error); /* in: if TRUE, we crash mysqld if - the memory cannot be allocated */ -/************************************************************************** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. */ - -void* -ut_malloc( -/*======*/ - /* out, own: allocated memory */ - ulint n); /* in: number of bytes to allocate */ -/************************************************************************** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. */ - -ibool -ut_test_malloc( -/*===========*/ - /* out: TRUE if succeeded */ - ulint n); /* in: try to allocate this many bytes */ -/************************************************************************** -Frees a memory block allocated with ut_malloc. */ - -void -ut_free( -/*====*/ - void* ptr); /* in, own: memory block */ -/************************************************************************** -Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not -use this function because the allocation functions in mem0mem.h are the -recommended ones in InnoDB. - -man realloc in Linux, 2004: - - realloc() changes the size of the memory block pointed to - by ptr to size bytes. The contents will be unchanged to - the minimum of the old and new sizes; newly allocated mem - ory will be uninitialized. If ptr is NULL, the call is - equivalent to malloc(size); if size is equal to zero, the - call is equivalent to free(ptr). Unless ptr is NULL, it - must have been returned by an earlier call to malloc(), - calloc() or realloc(). - -RETURN VALUE - realloc() returns a pointer to the newly allocated memory, - which is suitably aligned for any kind of variable and may - be different from ptr, or NULL if the request fails. If - size was equal to 0, either NULL or a pointer suitable to - be passed to free() is returned. If realloc() fails the - original block is left untouched - it is not freed or - moved. */ - -void* -ut_realloc( -/*=======*/ - /* out, own: pointer to new mem block or NULL */ - void* ptr, /* in: pointer to old block or NULL */ - ulint size); /* in: desired size */ -/************************************************************************** -Frees in shutdown all allocated memory not freed yet. */ - -void -ut_free_all_mem(void); -/*=================*/ - -UNIV_INLINE -char* -ut_strcpy(char* dest, const char* sour); - -UNIV_INLINE -ulint -ut_strlen(const char* str); - -UNIV_INLINE -int -ut_strcmp(const void* str1, const void* str2); - -/************************************************************************** -Copies up to size - 1 characters from the NUL-terminated string src to -dst, NUL-terminating the result. Returns strlen(src), so truncation -occurred if the return value >= size. */ - -ulint -ut_strlcpy( -/*=======*/ - /* out: strlen(src) */ - char* dst, /* in: destination buffer */ - const char* src, /* in: source buffer */ - ulint size); /* in: size of destination buffer */ - -/************************************************************************** -Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last -(size - 1) bytes of src, not the first. */ - -ulint -ut_strlcpy_rev( -/*===========*/ - /* out: strlen(src) */ - char* dst, /* in: destination buffer */ - const char* src, /* in: source buffer */ - ulint size); /* in: size of destination buffer */ - -/************************************************************************** -Compute strlen(ut_strcpyq(str, q)). */ -UNIV_INLINE -ulint -ut_strlenq( -/*=======*/ - /* out: length of the string when quoted */ - const char* str, /* in: null-terminated string */ - char q); /* in: the quote character */ - -/************************************************************************** -Make a quoted copy of a NUL-terminated string. Leading and trailing -quotes will not be included; only embedded quotes will be escaped. -See also ut_strlenq() and ut_memcpyq(). */ - -char* -ut_strcpyq( -/*=======*/ - /* out: pointer to end of dest */ - char* dest, /* in: output buffer */ - char q, /* in: the quote character */ - const char* src); /* in: null-terminated string */ - -/************************************************************************** -Make a quoted copy of a fixed-length string. Leading and trailing -quotes will not be included; only embedded quotes will be escaped. -See also ut_strlenq() and ut_strcpyq(). */ - -char* -ut_memcpyq( -/*=======*/ - /* out: pointer to end of dest */ - char* dest, /* in: output buffer */ - char q, /* in: the quote character */ - const char* src, /* in: string to be quoted */ - ulint len); /* in: length of src */ - -/************************************************************************** -Return the number of times s2 occurs in s1. Overlapping instances of s2 -are only counted once. */ - -ulint -ut_strcount( -/*========*/ - /* out: the number of times s2 occurs in s1 */ - const char* s1, /* in: string to search in */ - const char* s2); /* in: string to search for */ - -/************************************************************************** -Replace every occurrence of s1 in str with s2. Overlapping instances of s1 -are only replaced once. */ - -char * -ut_strreplace( -/*==========*/ - /* out, own: modified string, must be - freed with mem_free() */ - const char* str, /* in: string to operate on */ - const char* s1, /* in: string to replace */ - const char* s2); /* in: string to replace s1 with */ - -#ifndef UNIV_NONINL -#include "ut0mem.ic" -#endif - -#endif diff --git a/storage/innobase/include/ut0mem.ic b/storage/innobase/include/ut0mem.ic deleted file mode 100644 index e0253ebf618..00000000000 --- a/storage/innobase/include/ut0mem.ic +++ /dev/null @@ -1,70 +0,0 @@ -/*********************************************************************** -Memory primitives - -(c) 1994, 1995 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -************************************************************************/ - -UNIV_INLINE -void* -ut_memcpy(void* dest, const void* sour, ulint n) -{ - return(memcpy(dest, sour, n)); -} - -UNIV_INLINE -void* -ut_memmove(void* dest, const void* sour, ulint n) -{ - return(memmove(dest, sour, n)); -} - -UNIV_INLINE -int -ut_memcmp(const void* str1, const void* str2, ulint n) -{ - return(memcmp(str1, str2, n)); -} - -UNIV_INLINE -char* -ut_strcpy(char* dest, const char* sour) -{ - return(strcpy(dest, sour)); -} - -UNIV_INLINE -ulint -ut_strlen(const char* str) -{ - return(strlen(str)); -} - -UNIV_INLINE -int -ut_strcmp(const void* str1, const void* str2) -{ - return(strcmp((const char*)str1, (const char*)str2)); -} - -/************************************************************************** -Compute strlen(ut_strcpyq(str, q)). */ -UNIV_INLINE -ulint -ut_strlenq( -/*=======*/ - /* out: length of the string when quoted */ - const char* str, /* in: null-terminated string */ - char q) /* in: the quote character */ -{ - ulint len; - - for (len = 0; *str; len++, str++) { - if (*str == q) { - len++; - } - } - - return(len); -} diff --git a/storage/innobase/include/ut0rnd.h b/storage/innobase/include/ut0rnd.h deleted file mode 100644 index 3f3fce1075c..00000000000 --- a/storage/innobase/include/ut0rnd.h +++ /dev/null @@ -1,121 +0,0 @@ -/********************************************************************** -Random numbers and hashing - -(c) 1994, 1995 Innobase Oy - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0rnd_h -#define ut0rnd_h - -#include "univ.i" - -#include "ut0byte.h" - -/* The 'character code' for end of field or string (used -in folding records */ -#define UT_END_OF_FIELD 257 - -/************************************************************ -This is used to set the random number seed. */ -UNIV_INLINE -void -ut_rnd_set_seed( -/*============*/ - ulint seed); /* in: seed */ -/************************************************************ -The following function generates a series of 'random' ulint integers. */ -UNIV_INLINE -ulint -ut_rnd_gen_next_ulint( -/*==================*/ - /* out: the next 'random' number */ - ulint rnd); /* in: the previous random number value */ -/************************************************************* -The following function generates 'random' ulint integers which -enumerate the value space (let there be N of them) of ulint integers -in a pseudo-random fashion. Note that the same integer is repeated -always after N calls to the generator. */ -UNIV_INLINE -ulint -ut_rnd_gen_ulint(void); -/*==================*/ - /* out: the 'random' number */ -/************************************************************ -Generates a random integer from a given interval. */ -UNIV_INLINE -ulint -ut_rnd_interval( -/*============*/ - /* out: the 'random' number */ - ulint low, /* in: low limit; can generate also this value */ - ulint high); /* in: high limit; can generate also this value */ -/************************************************************* -Generates a random iboolean value. */ -UNIV_INLINE -ibool -ut_rnd_gen_ibool(void); -/*=================*/ - /* out: the random value */ -/*********************************************************** -The following function generates a hash value for a ulint integer -to a hash table of size table_size, which should be a prime or some -random number to work reliably. */ -UNIV_INLINE -ulint -ut_hash_ulint( -/*==========*/ - /* out: hash value */ - ulint key, /* in: value to be hashed */ - ulint table_size); /* in: hash table size */ -/***************************************************************** -Folds a pair of ulints. */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - /* out: folded value */ - ulint n1, /* in: ulint */ - ulint n2); /* in: ulint */ -/***************************************************************** -Folds a dulint. */ -UNIV_INLINE -ulint -ut_fold_dulint( -/*===========*/ - /* out: folded value */ - dulint d); /* in: dulint */ -/***************************************************************** -Folds a character string ending in the null character. */ -UNIV_INLINE -ulint -ut_fold_string( -/*===========*/ - /* out: folded value */ - const char* str); /* in: null-terminated string */ -/***************************************************************** -Folds a binary string. */ -UNIV_INLINE -ulint -ut_fold_binary( -/*===========*/ - /* out: folded value */ - const byte* str, /* in: string of bytes */ - ulint len); /* in: length */ -/*************************************************************** -Looks for a prime number slightly greater than the given argument. -The prime is chosen so that it is not near any power of 2. */ - -ulint -ut_find_prime( -/*==========*/ - /* out: prime */ - ulint n); /* in: positive number > 100 */ - - -#ifndef UNIV_NONINL -#include "ut0rnd.ic" -#endif - -#endif diff --git a/storage/innobase/include/ut0rnd.ic b/storage/innobase/include/ut0rnd.ic deleted file mode 100644 index 625c378489a..00000000000 --- a/storage/innobase/include/ut0rnd.ic +++ /dev/null @@ -1,221 +0,0 @@ -/****************************************************************** -Random numbers and hashing - -(c) 1994, 1995 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -#define UT_HASH_RANDOM_MASK 1463735687 -#define UT_HASH_RANDOM_MASK2 1653893711 -#define UT_RND1 151117737 -#define UT_RND2 119785373 -#define UT_RND3 85689495 -#define UT_RND4 76595339 -#define UT_SUM_RND2 98781234 -#define UT_SUM_RND3 126792457 -#define UT_SUM_RND4 63498502 -#define UT_XOR_RND1 187678878 -#define UT_XOR_RND2 143537923 - -extern ulint ut_rnd_ulint_counter; - -/************************************************************ -This is used to set the random number seed. */ -UNIV_INLINE -void -ut_rnd_set_seed( -/*============*/ - ulint seed) /* in: seed */ -{ - ut_rnd_ulint_counter = seed; -} - -/************************************************************ -The following function generates a series of 'random' ulint integers. */ -UNIV_INLINE -ulint -ut_rnd_gen_next_ulint( -/*==================*/ - /* out: the next 'random' number */ - ulint rnd) /* in: the previous random number value */ -{ - ulint n_bits; - - n_bits = 8 * sizeof(ulint); - - rnd = UT_RND2 * rnd + UT_SUM_RND3; - rnd = UT_XOR_RND1 ^ rnd; - rnd = (rnd << 20) + (rnd >> (n_bits - 20)); - rnd = UT_RND3 * rnd + UT_SUM_RND4; - rnd = UT_XOR_RND2 ^ rnd; - rnd = (rnd << 20) + (rnd >> (n_bits - 20)); - rnd = UT_RND1 * rnd + UT_SUM_RND2; - - return(rnd); -} - -/************************************************************ -The following function generates 'random' ulint integers which -enumerate the value space of ulint integers in a pseudo random -fashion. Note that the same integer is repeated always after -2 to power 32 calls to the generator (if ulint is 32-bit). */ -UNIV_INLINE -ulint -ut_rnd_gen_ulint(void) -/*==================*/ - /* out: the 'random' number */ -{ - ulint rnd; - ulint n_bits; - - n_bits = 8 * sizeof(ulint); - - ut_rnd_ulint_counter = UT_RND1 * ut_rnd_ulint_counter + UT_RND2; - - rnd = ut_rnd_gen_next_ulint(ut_rnd_ulint_counter); - - return(rnd); -} - -/************************************************************ -Generates a random integer from a given interval. */ -UNIV_INLINE -ulint -ut_rnd_interval( -/*============*/ - /* out: the 'random' number */ - ulint low, /* in: low limit; can generate also this value */ - ulint high) /* in: high limit; can generate also this value */ -{ - ulint rnd; - - ut_ad(high >= low); - - if (low == high) { - - return(low); - } - - rnd = ut_rnd_gen_ulint(); - - return(low + (rnd % (high - low + 1))); -} - -/************************************************************* -Generates a random iboolean value. */ -UNIV_INLINE -ibool -ut_rnd_gen_ibool(void) -/*=================*/ - /* out: the random value */ -{ - ulint x; - - x = ut_rnd_gen_ulint(); - - if (((x >> 20) + (x >> 15)) & 1) { - - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************** -The following function generates a hash value for a ulint integer -to a hash table of size table_size, which should be a prime -or some random number for the hash table to work reliably. */ -UNIV_INLINE -ulint -ut_hash_ulint( -/*==========*/ - /* out: hash value */ - ulint key, /* in: value to be hashed */ - ulint table_size) /* in: hash table size */ -{ - key = key ^ UT_HASH_RANDOM_MASK2; - - return(key % table_size); -} - -/***************************************************************** -Folds a pair of ulints. */ -UNIV_INLINE -ulint -ut_fold_ulint_pair( -/*===============*/ - /* out: folded value */ - ulint n1, /* in: ulint */ - ulint n2) /* in: ulint */ -{ - return(((((n1 ^ n2 ^ UT_HASH_RANDOM_MASK2) << 8) + n1) - ^ UT_HASH_RANDOM_MASK) + n2); -} - -/***************************************************************** -Folds a dulint. */ -UNIV_INLINE -ulint -ut_fold_dulint( -/*===========*/ - /* out: folded value */ - dulint d) /* in: dulint */ -{ - return(ut_fold_ulint_pair(ut_dulint_get_low(d), - ut_dulint_get_high(d))); -} - -/***************************************************************** -Folds a character string ending in the null character. */ -UNIV_INLINE -ulint -ut_fold_string( -/*===========*/ - /* out: folded value */ - const char* str) /* in: null-terminated string */ -{ -#ifdef UNIV_DEBUG - ulint i = 0; -#endif - ulint fold = 0; - - ut_ad(str); - - while (*str != '\0') { - -#ifdef UNIV_DEBUG - i++; - ut_a(i < 100); -#endif - - fold = ut_fold_ulint_pair(fold, (ulint)(*str)); - str++; - } - - return(fold); -} - -/***************************************************************** -Folds a binary string. */ -UNIV_INLINE -ulint -ut_fold_binary( -/*===========*/ - /* out: folded value */ - const byte* str, /* in: string of bytes */ - ulint len) /* in: length */ -{ - const byte* str_end = str + len; - ulint fold = 0; - - ut_ad(str); - - while (str < str_end) { - fold = ut_fold_ulint_pair(fold, (ulint)(*str)); - - str++; - } - - return(fold); -} diff --git a/storage/innobase/include/ut0sort.h b/storage/innobase/include/ut0sort.h deleted file mode 100644 index 87d30dee6f2..00000000000 --- a/storage/innobase/include/ut0sort.h +++ /dev/null @@ -1,91 +0,0 @@ -/********************************************************************** -Sort utility - -(c) 1995 Innobase Oy - -Created 11/9/1995 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0sort_h -#define ut0sort_h - -#include "univ.i" - -/* This module gives a macro definition of the body of -a standard sort function for an array of elements of any -type. The comparison function is given as a parameter to -the macro. The sort algorithm is mergesort which has logarithmic -worst case. -*/ - -/*********************************************************************** -This macro expands to the body of a standard sort function. -The sort function uses mergesort and must be defined separately -for each type of array. -Also the comparison function has to be defined individually -for each array cell type. SORT_FUN is the sort function name. -The function takes the array to be sorted (ARR), -the array of auxiliary space (AUX_ARR) of same size, -and the low (LOW), inclusive, and high (HIGH), noninclusive, -limits for the sort interval as arguments. -CMP_FUN is the comparison function name. It takes as arguments -two elements from the array and returns 1, if the first is bigger, -0 if equal, and -1 if the second bigger. For an eaxmaple of use -see test program in tsut.c. */ - -#define UT_SORT_FUNCTION_BODY(SORT_FUN, ARR, AUX_ARR, LOW, HIGH, CMP_FUN)\ -{\ - ulint ut_sort_mid77;\ - ulint ut_sort_i77;\ - ulint ut_sort_low77;\ - ulint ut_sort_high77;\ -\ - ut_ad((LOW) < (HIGH));\ - ut_ad(ARR);\ - ut_ad(AUX_ARR);\ -\ - if ((LOW) == (HIGH) - 1) {\ - return;\ - } else if ((LOW) == (HIGH) - 2) {\ - if (CMP_FUN((ARR)[LOW], (ARR)[(HIGH) - 1]) > 0) {\ - (AUX_ARR)[LOW] = (ARR)[LOW];\ - (ARR)[LOW] = (ARR)[(HIGH) - 1];\ - (ARR)[(HIGH) - 1] = (AUX_ARR)[LOW];\ - }\ - return;\ - }\ -\ - ut_sort_mid77 = ((LOW) + (HIGH)) / 2;\ -\ - SORT_FUN((ARR), (AUX_ARR), (LOW), ut_sort_mid77);\ - SORT_FUN((ARR), (AUX_ARR), ut_sort_mid77, (HIGH));\ -\ - ut_sort_low77 = (LOW);\ - ut_sort_high77 = ut_sort_mid77;\ -\ - for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\ -\ - if (ut_sort_low77 >= ut_sort_mid77) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ - ut_sort_high77++;\ - } else if (ut_sort_high77 >= (HIGH)) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ - ut_sort_low77++;\ - } else if (CMP_FUN((ARR)[ut_sort_low77],\ - (ARR)[ut_sort_high77]) > 0) {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_high77];\ - ut_sort_high77++;\ - } else {\ - (AUX_ARR)[ut_sort_i77] = (ARR)[ut_sort_low77];\ - ut_sort_low77++;\ - }\ - }\ -\ - for (ut_sort_i77 = (LOW); ut_sort_i77 < (HIGH); ut_sort_i77++) {\ - (ARR)[ut_sort_i77] = (AUX_ARR)[ut_sort_i77];\ - }\ -}\ - - -#endif - diff --git a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h deleted file mode 100644 index 95d7ba017f1..00000000000 --- a/storage/innobase/include/ut0ut.h +++ /dev/null @@ -1,323 +0,0 @@ -/********************************************************************** -Various utilities - -(c) 1994, 1995 Innobase Oy - -Created 1/20/1994 Heikki Tuuri -***********************************************************************/ - -#ifndef ut0ut_h -#define ut0ut_h - -#include "univ.i" -#include <time.h> -#ifndef MYSQL_SERVER -#include <ctype.h> -#endif - -typedef time_t ib_time_t; - -#ifdef HAVE_PAUSE_INSTRUCTION -#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");} -#else -#ifdef HAVE_FAKE_PAUSE_INSTRUCTION -#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");} -#else -#ifdef UNIV_SYNC_ATOMIC -#define PAUSE_INSTRUCTION() \ - { \ - volatile lint volatile_var; \ - os_compare_and_swap(&volatile_var, 0, 1); \ - } -#else -#define PAUSE_INSTRUCTION() -#endif -#endif -#endif - -/************************************************************ -Gets the high 32 bits in a ulint. That is makes a shift >> 32, -but since there seem to be compiler bugs in both gcc and Visual C++, -we do this by a special conversion. */ - -ulint -ut_get_high32( -/*==========*/ - /* out: a >> 32 */ - ulint a); /* in: ulint */ -/********************************************************** -Calculates the minimum of two ulints. */ -UNIV_INLINE -ulint -ut_min( -/*===*/ - /* out: minimum */ - ulint n1, /* in: first number */ - ulint n2); /* in: second number */ -/********************************************************** -Calculates the maximum of two ulints. */ -UNIV_INLINE -ulint -ut_max( -/*===*/ - /* out: maximum */ - ulint n1, /* in: first number */ - ulint n2); /* in: second number */ -/******************************************************************** -Calculates minimum of two ulint-pairs. */ -UNIV_INLINE -void -ut_pair_min( -/*========*/ - ulint* a, /* out: more significant part of minimum */ - ulint* b, /* out: less significant part of minimum */ - ulint a1, /* in: more significant part of first pair */ - ulint b1, /* in: less significant part of first pair */ - ulint a2, /* in: more significant part of second pair */ - ulint b2); /* in: less significant part of second pair */ -/********************************************************** -Compares two ulints. */ -UNIV_INLINE -int -ut_ulint_cmp( -/*=========*/ - /* out: 1 if a > b, 0 if a == b, -1 if a < b */ - ulint a, /* in: ulint */ - ulint b); /* in: ulint */ -/*********************************************************** -Compares two pairs of ulints. */ -UNIV_INLINE -int -ut_pair_cmp( -/*========*/ - /* out: -1 if a < b, 0 if a == b, - 1 if a > b */ - ulint a1, /* in: more significant part of first pair */ - ulint a2, /* in: less significant part of first pair */ - ulint b1, /* in: more significant part of second pair */ - ulint b2); /* in: less significant part of second pair */ -/***************************************************************** -Calculates fast the remainder when divided by a power of two. */ -UNIV_INLINE -ulint -ut_2pow_remainder( -/*==============*/ /* out: remainder */ - ulint n, /* in: number to be divided */ - ulint m); /* in: divisor; power of 2 */ -/***************************************************************** -Calculates fast value rounded to a multiple of a power of 2. */ -UNIV_INLINE -ulint -ut_2pow_round( -/*==========*/ /* out: value of n rounded down to nearest - multiple of m */ - ulint n, /* in: number to be rounded */ - ulint m); /* in: divisor; power of 2 */ -/***************************************************************** -Calculates fast the 2-logarithm of a number, rounded upward to an -integer. */ -UNIV_INLINE -ulint -ut_2_log( -/*=====*/ - /* out: logarithm in the base 2, rounded upward */ - ulint n); /* in: number */ -/***************************************************************** -Calculates 2 to power n. */ -UNIV_INLINE -ulint -ut_2_exp( -/*=====*/ - /* out: 2 to power n */ - ulint n); /* in: number */ -/***************************************************************** -Calculates fast the number rounded up to the nearest power of 2. */ - -ulint -ut_2_power_up( -/*==========*/ - /* out: first power of 2 which is >= n */ - ulint n) /* in: number != 0 */ - __attribute__((const)); - -/* Determine how many bytes (groups of 8 bits) are needed to -store the given number of bits. */ -#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8) - -/**************************************************************** -Sort function for ulint arrays. */ - -void -ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high); -/*============================================================*/ -/************************************************************ -The following function returns elapsed CPU time in milliseconds. */ - -ulint -ut_clock(void); -/************************************************************** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. */ - -ib_time_t -ut_time(void); -/*=========*/ -/************************************************************** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. */ - -int -ut_usectime( -/*========*/ - /* out: 0 on success, -1 otherwise */ - ulint* sec, /* out: seconds since the Epoch */ - ulint* ms); /* out: microseconds since the Epoch+*sec */ - -/************************************************************** -Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms). */ - -ib_longlong -ut_usecdiff( -/*========*/ - ulint end_sec, /* in: seconds since the Epoch */ - ulint end_ms, /* in: microseconds since the Epoch+*sec1 */ - ulint start_sec, /* in: seconds since the Epoch */ - ulint start_ms); /* in: microseconds since the Epoch+*sec2 */ - -/************************************************************** -Returns the difference of two times in seconds. */ - -double -ut_difftime( -/*========*/ - /* out: time2 - time1 expressed in seconds */ - ib_time_t time2, /* in: time */ - ib_time_t time1); /* in: time */ -/************************************************************** -Prints a timestamp to a file. */ - -void -ut_print_timestamp( -/*===============*/ - FILE* file); /* in: file where to print */ -/************************************************************** -Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ - -void -ut_sprintf_timestamp( -/*=================*/ - char* buf); /* in: buffer where to sprintf */ -/************************************************************** -Sprintfs a timestamp to a buffer with no spaces and with ':' characters -replaced by '_'. */ - -void -ut_sprintf_timestamp_without_extra_chars( -/*=====================================*/ - char* buf); /* in: buffer where to sprintf */ -/************************************************************** -Returns current year, month, day. */ - -void -ut_get_year_month_day( -/*==================*/ - ulint* year, /* out: current year */ - ulint* month, /* out: month */ - ulint* day); /* out: day */ -/***************************************************************** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. */ - -ulint -ut_delay( -/*=====*/ - /* out: dummy value */ - ulint delay); /* in: delay in microseconds on 100 MHz Pentium */ -/***************************************************************** -Prints the contents of a memory buffer in hex and ascii. */ - -void -ut_print_buf( -/*=========*/ - FILE* file, /* in: file where to print */ - const void* buf, /* in: memory buffer */ - ulint len); /* in: length of the buffer */ - -/************************************************************************** -Outputs a NUL-terminated file name, quoted with apostrophes. */ - -void -ut_print_filename( -/*==============*/ - FILE* f, /* in: output stream */ - const char* name); /* in: name to print */ - -/* Forward declaration of transaction handle */ -struct trx_struct; - -/************************************************************************** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ - -void -ut_print_name( -/*==========*/ - FILE* f, /* in: output stream */ - struct trx_struct*trx, /* in: transaction */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name); /* in: name to print */ - -/************************************************************************** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ - -void -ut_print_namel( -/*===========*/ - FILE* f, /* in: output stream */ - struct trx_struct*trx, /* in: transaction (NULL=no quotes) */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /* in: name to print */ - ulint namelen);/* in: length of name */ - -/************************************************************************** -Catenate files. */ - -void -ut_copy_file( -/*=========*/ - FILE* dest, /* in: output file */ - FILE* src); /* in: input file to be appended to output */ - -/************************************************************************** -snprintf(). */ - -#ifdef __WIN__ -int -ut_snprintf( - /* out: number of characters that would - have been printed if the size were - unlimited, not including the terminating - '\0'. */ - char* str, /* out: string */ - size_t size, /* in: str size */ - const char* fmt, /* in: format */ - ...); /* in: format values */ -#else -#define ut_snprintf snprintf -#endif /* __WIN__ */ - -#ifndef UNIV_NONINL -#include "ut0ut.ic" -#endif - -#endif - diff --git a/storage/innobase/include/ut0ut.ic b/storage/innobase/include/ut0ut.ic deleted file mode 100644 index 412717a094e..00000000000 --- a/storage/innobase/include/ut0ut.ic +++ /dev/null @@ -1,174 +0,0 @@ -/****************************************************************** -Various utilities - -(c) 1994, 1995 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*******************************************************************/ - -/********************************************************** -Calculates the minimum of two ulints. */ -UNIV_INLINE -ulint -ut_min( -/*===*/ - /* out: minimum */ - ulint n1, /* in: first number */ - ulint n2) /* in: second number */ -{ - return((n1 <= n2) ? n1 : n2); -} - -/********************************************************** -Calculates the maximum of two ulints. */ -UNIV_INLINE -ulint -ut_max( -/*===*/ - /* out: maximum */ - ulint n1, /* in: first number */ - ulint n2) /* in: second number */ -{ - return((n1 <= n2) ? n2 : n1); -} - -/******************************************************************** -Calculates minimum of two ulint-pairs. */ -UNIV_INLINE -void -ut_pair_min( -/*========*/ - ulint* a, /* out: more significant part of minimum */ - ulint* b, /* out: less significant part of minimum */ - ulint a1, /* in: more significant part of first pair */ - ulint b1, /* in: less significant part of first pair */ - ulint a2, /* in: more significant part of second pair */ - ulint b2) /* in: less significant part of second pair */ -{ - if (a1 == a2) { - *a = a1; - *b = ut_min(b1, b2); - } else if (a1 < a2) { - *a = a1; - *b = b1; - } else { - *a = a2; - *b = b2; - } -} - -/********************************************************** -Compares two ulints. */ -UNIV_INLINE -int -ut_ulint_cmp( -/*=========*/ - /* out: 1 if a > b, 0 if a == b, -1 if a < b */ - ulint a, /* in: ulint */ - ulint b) /* in: ulint */ -{ - if (a < b) { - return(-1); - } else if (a == b) { - return(0); - } else { - return(1); - } -} - -/*********************************************************** -Compares two pairs of ulints. */ -UNIV_INLINE -int -ut_pair_cmp( -/*========*/ - /* out: -1 if a < b, 0 if a == b, 1 if a > b */ - ulint a1, /* in: more significant part of first pair */ - ulint a2, /* in: less significant part of first pair */ - ulint b1, /* in: more significant part of second pair */ - ulint b2) /* in: less significant part of second pair */ -{ - if (a1 > b1) { - return(1); - } else if (a1 < b1) { - return(-1); - } else if (a2 > b2) { - return(1); - } else if (a2 < b2) { - return(-1); - } else { - return(0); - } -} - -/***************************************************************** -Calculates fast the remainder when divided by a power of two. */ -UNIV_INLINE -ulint -ut_2pow_remainder( -/*==============*/ /* out: remainder */ - ulint n, /* in: number to be divided */ - ulint m) /* in: divisor; power of 2 */ -{ - ut_ad(0x80000000UL % m == 0); - - return(n & (m - 1)); -} - -/***************************************************************** -Calculates fast a value rounded to a multiple of a power of 2. */ -UNIV_INLINE -ulint -ut_2pow_round( -/*==========*/ /* out: value of n rounded down to nearest - multiple of m */ - ulint n, /* in: number to be rounded */ - ulint m) /* in: divisor; power of 2 */ -{ - ut_ad(0x80000000UL % m == 0); - - return(n & ~(m - 1)); -} - -/***************************************************************** -Calculates fast the 2-logarithm of a number, rounded upward to an -integer. */ -UNIV_INLINE -ulint -ut_2_log( -/*=====*/ - /* out: logarithm in the base 2, rounded upward */ - ulint n) /* in: number != 0 */ -{ - ulint res; - - res = 0; - - ut_ad(n > 0); - - n = n - 1; - - for (;;) { - n = n / 2; - - if (n == 0) { - break; - } - - res++; - } - - return(res + 1); -} - -/***************************************************************** -Calculates 2 to power n. */ -UNIV_INLINE -ulint -ut_2_exp( -/*=====*/ - /* out: 2 to power n */ - ulint n) /* in: number */ -{ - return((ulint) 1 << n); -} diff --git a/storage/innobase/include/ut0vec.h b/storage/innobase/include/ut0vec.h deleted file mode 100644 index e0cc4dfb009..00000000000 --- a/storage/innobase/include/ut0vec.h +++ /dev/null @@ -1,73 +0,0 @@ -#ifndef IB_VECTOR_H -#define IB_VECTOR_H - -#include "univ.i" -#include "mem0mem.h" - -typedef struct ib_vector_struct ib_vector_t; - -/* An automatically resizing vector datatype with the following properties: - - -Contains void* items. - - -The items are owned by the caller. - - -All memory allocation is done through a heap owned by the caller, who is - responsible for freeing it when done with the vector. - - -When the vector is resized, the old memory area is left allocated since it - uses the same heap as the new memory area, so this is best used for - relatively small or short-lived uses. -*/ - -/******************************************************************** -Create a new vector with the given initial size. */ - -ib_vector_t* -ib_vector_create( -/*=============*/ - /* out: vector */ - mem_heap_t* heap, /* in: heap */ - ulint size); /* in: initial size */ - -/******************************************************************** -Push a new element to the vector, increasing its size if necessary. */ - -void -ib_vector_push( -/*===========*/ - ib_vector_t* vec, /* in: vector */ - void* elem); /* in: data element */ - -/******************************************************************** -Get the number of elements in the vector. */ -UNIV_INLINE -ulint -ib_vector_size( -/*===========*/ - /* out: number of elements in vector */ - ib_vector_t* vec); /* in: vector */ - -/******************************************************************** -Get the n'th element. */ -UNIV_INLINE -void* -ib_vector_get( -/*==========*/ - /* out: n'th element */ - ib_vector_t* vec, /* in: vector */ - ulint n); /* in: element index to get */ - -/* See comment at beginning of file. */ -struct ib_vector_struct { - mem_heap_t* heap; /* heap */ - void** data; /* data elements */ - ulint used; /* number of elements currently used */ - ulint total; /* number of elements allocated */ -}; - -#ifndef UNIV_NONINL -#include "ut0vec.ic" -#endif - -#endif diff --git a/storage/innobase/include/ut0vec.ic b/storage/innobase/include/ut0vec.ic deleted file mode 100644 index 417a17d951f..00000000000 --- a/storage/innobase/include/ut0vec.ic +++ /dev/null @@ -1,26 +0,0 @@ -/******************************************************************** -Get number of elements in vector. */ -UNIV_INLINE -ulint -ib_vector_size( -/*===========*/ - /* out: number of elements in vector */ - ib_vector_t* vec) /* in: vector */ -{ - return(vec->used); -} - -/******************************************************************** -Get n'th element. */ -UNIV_INLINE -void* -ib_vector_get( -/*==========*/ - /* out: n'th element */ - ib_vector_t* vec, /* in: vector */ - ulint n) /* in: element index to get */ -{ - ut_a(n < vec->used); - - return(vec->data[n]); -} diff --git a/storage/innobase/include/ut0wqueue.h b/storage/innobase/include/ut0wqueue.h deleted file mode 100644 index 57f2297beee..00000000000 --- a/storage/innobase/include/ut0wqueue.h +++ /dev/null @@ -1,60 +0,0 @@ -/*********************************************************************** -A Work queue. Threads can add work items to the queue and other threads can -wait for work items to be available and take them off the queue for -processing. - -************************************************************************/ - -#ifndef IB_WORK_QUEUE_H -#define IB_WORK_QUEUE_H - -#include "ut0list.h" -#include "mem0mem.h" -#include "os0sync.h" -#include "sync0types.h" - -typedef struct ib_wqueue_struct ib_wqueue_t; - -/******************************************************************** -Create a new work queue. */ - -ib_wqueue_t* -ib_wqueue_create(void); -/*===================*/ - /* out: work queue */ - -/******************************************************************** -Free a work queue. */ - -void -ib_wqueue_free( -/*===========*/ - ib_wqueue_t* wq); /* in: work queue */ - -/******************************************************************** -Add a work item to the queue. */ - -void -ib_wqueue_add( -/*==========*/ - ib_wqueue_t* wq, /* in: work queue */ - void* item, /* in: work item */ - mem_heap_t* heap); /* in: memory heap to use for allocating the - list node */ - -/******************************************************************** -Wait for a work item to appear in the queue. */ - -void* -ib_wqueue_wait( - /* out: work item */ - ib_wqueue_t* wq); /* in: work queue */ - -/* Work queue. */ -struct ib_wqueue_struct { - mutex_t mutex; /* mutex protecting everything */ - ib_list_t* items; /* work item list */ - os_event_t event; /* event we use to signal additions to list */ -}; - -#endif diff --git a/storage/innobase/lock/lock0iter.c b/storage/innobase/lock/lock0iter.c deleted file mode 100644 index 0afa7019c86..00000000000 --- a/storage/innobase/lock/lock0iter.c +++ /dev/null @@ -1,90 +0,0 @@ -/****************************************************** -Lock queue iterator. Can iterate over table and record -lock queues. - -(c) 2007 Innobase Oy - -Created July 16, 2007 Vasil Dimov -*******************************************************/ - -#define LOCK_MODULE_IMPLEMENTATION - -#include "univ.i" -#include "lock0iter.h" -#include "lock0lock.h" -#include "lock0priv.h" -#include "ut0dbg.h" -#include "ut0lst.h" - -/*********************************************************************** -Initialize lock queue iterator so that it starts to iterate from -"lock". bit_no specifies the record number within the heap where the -record is stored. It can be undefined (ULINT_UNDEFINED) in two cases: -1. If the lock is a table lock, thus we have a table lock queue; -2. If the lock is a record lock and it is a wait lock. In this case - bit_no is calculated in this function by using - lock_rec_find_set_bit(). There is exactly one bit set in the bitmap - of a wait lock. */ - -void -lock_queue_iterator_reset( -/*======================*/ - lock_queue_iterator_t* iter, /* out: iterator */ - lock_t* lock, /* in: lock to start from */ - ulint bit_no) /* in: record number in the - heap */ -{ - iter->current_lock = lock; - - if (bit_no != ULINT_UNDEFINED) { - - iter->bit_no = bit_no; - } else { - - switch (lock_get_type(lock)) { - case LOCK_TABLE: - iter->bit_no = ULINT_UNDEFINED; - break; - case LOCK_REC: - iter->bit_no = lock_rec_find_set_bit(lock); - ut_a(iter->bit_no != ULINT_UNDEFINED); - break; - default: - ut_error; - } - } -} - -/*********************************************************************** -Gets the previous lock in the lock queue, returns NULL if there are no -more locks (i.e. the current lock is the first one). The iterator is -receded (if not-NULL is returned). */ - -lock_t* -lock_queue_iterator_get_prev( -/*=========================*/ - /* out: previous lock or NULL */ - lock_queue_iterator_t* iter) /* in/out: iterator */ -{ - lock_t* prev_lock; - - switch (lock_get_type(iter->current_lock)) { - case LOCK_REC: - prev_lock = lock_rec_get_prev( - iter->current_lock, iter->bit_no); - break; - case LOCK_TABLE: - prev_lock = UT_LIST_GET_PREV( - un_member.tab_lock.locks, iter->current_lock); - break; - default: - ut_error; - } - - if (prev_lock != NULL) { - - iter->current_lock = prev_lock; - } - - return(prev_lock); -} diff --git a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c deleted file mode 100644 index 5afd19aa7e7..00000000000 --- a/storage/innobase/lock/lock0lock.c +++ /dev/null @@ -1,5189 +0,0 @@ -/****************************************************** -The transaction lock system - -(c) 1996 Innobase Oy - -Created 5/7/1996 Heikki Tuuri -*******************************************************/ - -#define LOCK_MODULE_IMPLEMENTATION - -#include "lock0lock.h" -#include "lock0priv.h" - -#ifdef UNIV_NONINL -#include "lock0lock.ic" -#include "lock0priv.ic" -#endif - -#include "usr0sess.h" -#include "trx0purge.h" -#include "dict0mem.h" -#include "trx0sys.h" - - -/* 2 function prototypes copied from ha_innodb.cc: */ - -/***************************************************************** -If you want to print a thd that is not associated with the current thread, -you must call this function before reserving the InnoDB kernel_mutex, to -protect MySQL from setting thd->query NULL. If you print a thd of the current -thread, we know that MySQL cannot modify thd->query, and it is not necessary -to call this. Call innobase_mysql_end_print_arbitrary_thd() after you release -the kernel_mutex. -NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this -function! */ - -void -innobase_mysql_prepare_print_arbitrary_thd(void); -/*============================================*/ - -/***************************************************************** -Relases the mutex reserved by innobase_mysql_prepare_print_arbitrary_thd(). -NOTE that /mysql/innobase/lock/lock0lock.c must contain the prototype for this -function! */ - -void -innobase_mysql_end_print_arbitrary_thd(void); -/*========================================*/ - -/* Restricts the length of search we will do in the waits-for -graph of transactions */ -#define LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK 1000000 - -/* Restricts the recursion depth of the search we will do in the waits-for -graph of transactions */ -#define LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK 200 - -/* When releasing transaction locks, this specifies how often we release -the kernel mutex for a moment to give also others access to it */ - -#define LOCK_RELEASE_KERNEL_INTERVAL 1000 - -/* Safety margin when creating a new record lock: this many extra records -can be inserted to the page without need to create a lock with a bigger -bitmap */ - -#define LOCK_PAGE_BITMAP_MARGIN 64 - -/* An explicit record lock affects both the record and the gap before it. -An implicit x-lock does not affect the gap, it only locks the index -record from read or update. - -If a transaction has modified or inserted an index record, then -it owns an implicit x-lock on the record. On a secondary index record, -a transaction has an implicit x-lock also if it has modified the -clustered index record, the max trx id of the page where the secondary -index record resides is >= trx id of the transaction (or database recovery -is running), and there are no explicit non-gap lock requests on the -secondary index record. - -This complicated definition for a secondary index comes from the -implementation: we want to be able to determine if a secondary index -record has an implicit x-lock, just by looking at the present clustered -index record, not at the historical versions of the record. The -complicated definition can be explained to the user so that there is -nondeterminism in the access path when a query is answered: we may, -or may not, access the clustered index record and thus may, or may not, -bump into an x-lock set there. - -Different transaction can have conflicting locks set on the gap at the -same time. The locks on the gap are purely inhibitive: an insert cannot -be made, or a select cursor may have to wait if a different transaction -has a conflicting lock on the gap. An x-lock on the gap does not give -the right to insert into the gap. - -An explicit lock can be placed on a user record or the supremum record of -a page. The locks on the supremum record are always thought to be of the gap -type, though the gap bit is not set. When we perform an update of a record -where the size of the record changes, we may temporarily store its explicit -locks on the infimum record of the page, though the infimum otherwise never -carries locks. - -A waiting record lock can also be of the gap type. A waiting lock request -can be granted when there is no conflicting mode lock request by another -transaction ahead of it in the explicit lock queue. - -In version 4.0.5 we added yet another explicit lock type: LOCK_REC_NOT_GAP. -It only locks the record it is placed on, not the gap before the record. -This lock type is necessary to emulate an Oracle-like READ COMMITTED isolation -level. - -------------------------------------------------------------------------- -RULE 1: If there is an implicit x-lock on a record, and there are non-gap -------- -lock requests waiting in the queue, then the transaction holding the implicit -x-lock also has an explicit non-gap record x-lock. Therefore, as locks are -released, we can grant locks to waiting lock requests purely by looking at -the explicit lock requests in the queue. - -RULE 3: Different transactions cannot have conflicting granted non-gap locks -------- -on a record at the same time. However, they can have conflicting granted gap -locks. -RULE 4: If a there is a waiting lock request in a queue, no lock request, -------- -gap or not, can be inserted ahead of it in the queue. In record deletes -and page splits new gap type locks can be created by the database manager -for a transaction, and without rule 4, the waits-for graph of transactions -might become cyclic without the database noticing it, as the deadlock check -is only performed when a transaction itself requests a lock! -------------------------------------------------------------------------- - -An insert is allowed to a gap if there are no explicit lock requests by -other transactions on the next record. It does not matter if these lock -requests are granted or waiting, gap bit set or not, with the exception -that a gap type request set by another transaction to wait for -its turn to do an insert is ignored. On the other hand, an -implicit x-lock by another transaction does not prevent an insert, which -allows for more concurrency when using an Oracle-style sequence number -generator for the primary key with many transactions doing inserts -concurrently. - -A modify of a record is allowed if the transaction has an x-lock on the -record, or if other transactions do not have any non-gap lock requests on the -record. - -A read of a single user record with a cursor is allowed if the transaction -has a non-gap explicit, or an implicit lock on the record, or if the other -transactions have no x-lock requests on the record. At a page supremum a -read is always allowed. - -In summary, an implicit lock is seen as a granted x-lock only on the -record, not on the gap. An explicit lock with no gap bit set is a lock -both on the record and the gap. If the gap bit is set, the lock is only -on the gap. Different transaction cannot own conflicting locks on the -record at the same time, but they may own conflicting locks on the gap. -Granted locks on a record give an access right to the record, but gap type -locks just inhibit operations. - -NOTE: Finding out if some transaction has an implicit x-lock on a secondary -index record can be cumbersome. We may have to look at previous versions of -the corresponding clustered index record to find out if a delete marked -secondary index record was delete marked by an active transaction, not by -a committed one. - -FACT A: If a transaction has inserted a row, it can delete it any time -without need to wait for locks. - -PROOF: The transaction has an implicit x-lock on every index record inserted -for the row, and can thus modify each record without the need to wait. Q.E.D. - -FACT B: If a transaction has read some result set with a cursor, it can read -it again, and retrieves the same result set, if it has not modified the -result set in the meantime. Hence, there is no phantom problem. If the -biggest record, in the alphabetical order, touched by the cursor is removed, -a lock wait may occur, otherwise not. - -PROOF: When a read cursor proceeds, it sets an s-lock on each user record -it passes, and a gap type s-lock on each page supremum. The cursor must -wait until it has these locks granted. Then no other transaction can -have a granted x-lock on any of the user records, and therefore cannot -modify the user records. Neither can any other transaction insert into -the gaps which were passed over by the cursor. Page splits and merges, -and removal of obsolete versions of records do not affect this, because -when a user record or a page supremum is removed, the next record inherits -its locks as gap type locks, and therefore blocks inserts to the same gap. -Also, if a page supremum is inserted, it inherits its locks from the successor -record. When the cursor is positioned again at the start of the result set, -the records it will touch on its course are either records it touched -during the last pass or new inserted page supremums. It can immediately -access all these records, and when it arrives at the biggest record, it -notices that the result set is complete. If the biggest record was removed, -lock wait can occur because the next record only inherits a gap type lock, -and a wait may be needed. Q.E.D. */ - -/* If an index record should be changed or a new inserted, we must check -the lock on the record or the next. When a read cursor starts reading, -we will set a record level s-lock on each record it passes, except on the -initial record on which the cursor is positioned before we start to fetch -records. Our index tree search has the convention that the B-tree -cursor is positioned BEFORE the first possibly matching record in -the search. Optimizations are possible here: if the record is searched -on an equality condition to a unique key, we could actually set a special -lock on the record, a lock which would not prevent any insert before -this record. In the next key locking an x-lock set on a record also -prevents inserts just before that record. - There are special infimum and supremum records on each page. -A supremum record can be locked by a read cursor. This records cannot be -updated but the lock prevents insert of a user record to the end of -the page. - Next key locks will prevent the phantom problem where new rows -could appear to SELECT result sets after the select operation has been -performed. Prevention of phantoms ensures the serilizability of -transactions. - What should we check if an insert of a new record is wanted? -Only the lock on the next record on the same page, because also the -supremum record can carry a lock. An s-lock prevents insertion, but -what about an x-lock? If it was set by a searched update, then there -is implicitly an s-lock, too, and the insert should be prevented. -What if our transaction owns an x-lock to the next record, but there is -a waiting s-lock request on the next record? If this s-lock was placed -by a read cursor moving in the ascending order in the index, we cannot -do the insert immediately, because when we finally commit our transaction, -the read cursor should see also the new inserted record. So we should -move the read cursor backward from the the next record for it to pass over -the new inserted record. This move backward may be too cumbersome to -implement. If we in this situation just enqueue a second x-lock request -for our transaction on the next record, then the deadlock mechanism -notices a deadlock between our transaction and the s-lock request -transaction. This seems to be an ok solution. - We could have the convention that granted explicit record locks, -lock the corresponding records from changing, and also lock the gaps -before them from inserting. A waiting explicit lock request locks the gap -before from inserting. Implicit record x-locks, which we derive from the -transaction id in the clustered index record, only lock the record itself -from modification, not the gap before it from inserting. - How should we store update locks? If the search is done by a unique -key, we could just modify the record trx id. Otherwise, we could put a record -x-lock on the record. If the update changes ordering fields of the -clustered index record, the inserted new record needs no record lock in -lock table, the trx id is enough. The same holds for a secondary index -record. Searched delete is similar to update. - -PROBLEM: -What about waiting lock requests? If a transaction is waiting to make an -update to a record which another modified, how does the other transaction -know to send the end-lock-wait signal to the waiting transaction? If we have -the convention that a transaction may wait for just one lock at a time, how -do we preserve it if lock wait ends? - -PROBLEM: -Checking the trx id label of a secondary index record. In the case of a -modification, not an insert, is this necessary? A secondary index record -is modified only by setting or resetting its deleted flag. A secondary index -record contains fields to uniquely determine the corresponding clustered -index record. A secondary index record is therefore only modified if we -also modify the clustered index record, and the trx id checking is done -on the clustered index record, before we come to modify the secondary index -record. So, in the case of delete marking or unmarking a secondary index -record, we do not have to care about trx ids, only the locks in the lock -table must be checked. In the case of a select from a secondary index, the -trx id is relevant, and in this case we may have to search the clustered -index record. - -PROBLEM: How to update record locks when page is split or merged, or --------------------------------------------------------------------- -a record is deleted or updated? -If the size of fields in a record changes, we perform the update by -a delete followed by an insert. How can we retain the locks set or -waiting on the record? Because a record lock is indexed in the bitmap -by the heap number of the record, when we remove the record from the -record list, it is possible still to keep the lock bits. If the page -is reorganized, we could make a table of old and new heap numbers, -and permute the bitmaps in the locks accordingly. We can add to the -table a row telling where the updated record ended. If the update does -not require a reorganization of the page, we can simply move the lock -bits for the updated record to the position determined by its new heap -number (we may have to allocate a new lock, if we run out of the bitmap -in the old one). - A more complicated case is the one where the reinsertion of the -updated record is done pessimistically, because the structure of the -tree may change. - -PROBLEM: If a supremum record is removed in a page merge, or a record ---------------------------------------------------------------------- -removed in a purge, what to do to the waiting lock requests? In a split to -the right, we just move the lock requests to the new supremum. If a record -is removed, we could move the waiting lock request to its inheritor, the -next record in the index. But, the next record may already have lock -requests on its own queue. A new deadlock check should be made then. Maybe -it is easier just to release the waiting transactions. They can then enqueue -new lock requests on appropriate records. - -PROBLEM: When a record is inserted, what locks should it inherit from the -------------------------------------------------------------------------- -upper neighbor? An insert of a new supremum record in a page split is -always possible, but an insert of a new user record requires that the upper -neighbor does not have any lock requests by other transactions, granted or -waiting, in its lock queue. Solution: We can copy the locks as gap type -locks, so that also the waiting locks are transformed to granted gap type -locks on the inserted record. */ - -/* LOCK COMPATIBILITY MATRIX - * IS IX S X AI - * IS + + + - + - * IX + + - - + - * S + - + - - - * X - - - - - - * AI + + - - - - * - * Note that for rows, InnoDB only acquires S or X locks. - * For tables, InnoDB normally acquires IS or IX locks. - * S or X table locks are only acquired for LOCK TABLES. - * Auto-increment (AI) locks are needed because of - * statement-level MySQL binlog. - * See also lock_mode_compatible(). - */ - -#ifdef UNIV_DEBUG -ibool lock_print_waits = FALSE; -#endif /* UNIV_DEBUG */ - -/* The lock system */ -lock_sys_t* lock_sys = NULL; - -/* We store info on the latest deadlock error to this buffer. InnoDB -Monitor will then fetch it and print */ -ibool lock_deadlock_found = FALSE; -FILE* lock_latest_err_file; - -/* Flags for recursive deadlock search */ -#define LOCK_VICTIM_IS_START 1 -#define LOCK_VICTIM_IS_OTHER 2 - -/************************************************************************ -Checks if a lock request results in a deadlock. */ -static -ibool -lock_deadlock_occurs( -/*=================*/ - /* out: TRUE if a deadlock was detected and we - chose trx as a victim; FALSE if no deadlock, or - there was a deadlock, but we chose other - transaction(s) as victim(s) */ - lock_t* lock, /* in: lock the transaction is requesting */ - trx_t* trx); /* in: transaction */ -/************************************************************************ -Looks recursively for a deadlock. */ -static -ulint -lock_deadlock_recursive( -/*====================*/ - /* out: 0 if no deadlock found, - LOCK_VICTIM_IS_START if there was a deadlock - and we chose 'start' as the victim, - LOCK_VICTIM_IS_OTHER if a deadlock - was found and we chose some other trx as a - victim: we must do the search again in this - last case because there may be another - deadlock! */ - trx_t* start, /* in: recursion starting point */ - trx_t* trx, /* in: a transaction waiting for a lock */ - lock_t* wait_lock, /* in: the lock trx is waiting to be granted */ - ulint* cost, /* in/out: number of calculation steps thus - far: if this exceeds LOCK_MAX_N_STEPS_... - we return LOCK_VICTIM_IS_START */ - ulint depth); /* in: recursion depth: if this exceeds - LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we - return LOCK_VICTIM_IS_START */ - -/************************************************************************* -Gets the nth bit of a record lock. */ -UNIV_INLINE -ibool -lock_rec_get_nth_bit( -/*=================*/ - /* out: TRUE if bit set */ - lock_t* lock, /* in: record lock */ - ulint i) /* in: index of the bit */ -{ - ulint byte_index; - ulint bit_index; - ulint b; - - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - - if (i >= lock->un_member.rec_lock.n_bits) { - - return(FALSE); - } - - byte_index = i / 8; - bit_index = i % 8; - - b = (ulint)*((byte*)lock + sizeof(lock_t) + byte_index); - - return(ut_bit_get_nth(b, bit_index)); -} - -/*************************************************************************/ - -#define lock_mutex_enter_kernel() mutex_enter(&kernel_mutex) -#define lock_mutex_exit_kernel() mutex_exit(&kernel_mutex) - -/************************************************************************* -Checks that a transaction id is sensible, i.e., not in the future. */ - -ibool -lock_check_trx_id_sanity( -/*=====================*/ - /* out: TRUE if ok */ - dulint trx_id, /* in: trx id */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: index */ - const ulint* offsets, /* in: rec_get_offsets(rec, index) */ - ibool has_kernel_mutex)/* in: TRUE if the caller owns the - kernel mutex */ -{ - ibool is_ok = TRUE; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (!has_kernel_mutex) { - mutex_enter(&kernel_mutex); - } - - /* A sanity check: the trx_id in rec must be smaller than the global - trx id counter */ - - if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: transaction id associated" - " with record\n", - stderr); - rec_print_new(stderr, rec, offsets); - fputs("InnoDB: in ", stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, "\n" - "InnoDB: is %lu %lu which is higher than the" - " global trx id counter %lu %lu!\n" - "InnoDB: The table is corrupt. You have to do" - " dump + drop + reimport.\n", - (ulong) ut_dulint_get_high(trx_id), - (ulong) ut_dulint_get_low(trx_id), - (ulong) ut_dulint_get_high(trx_sys->max_trx_id), - (ulong) ut_dulint_get_low(trx_sys->max_trx_id)); - - is_ok = FALSE; - } - - if (!has_kernel_mutex) { - mutex_exit(&kernel_mutex); - } - - return(is_ok); -} - -/************************************************************************* -Checks that a record is seen in a consistent read. */ - -ibool -lock_clust_rec_cons_read_sees( -/*==========================*/ - /* out: TRUE if sees, or FALSE if an earlier - version of the record should be retrieved */ - rec_t* rec, /* in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - read_view_t* view) /* in: consistent read view */ -{ - dulint trx_id; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - /* NOTE that we call this function while holding the search - system latch. To obey the latching order we must NOT reserve the - kernel mutex here! */ - - trx_id = row_get_rec_trx_id(rec, index, offsets); - - return(read_view_sees_trx_id(view, trx_id)); -} - -/************************************************************************* -Checks that a non-clustered index record is seen in a consistent read. */ - -ulint -lock_sec_rec_cons_read_sees( -/*========================*/ - /* out: TRUE if certainly sees, or FALSE if an - earlier version of the clustered index record - might be needed: NOTE that a non-clustered - index page contains so little information on - its modifications that also in the case FALSE, - the present version of rec may be the right, - but we must check this from the clustered - index record */ - rec_t* rec, /* in: user record which should be read or - passed over by a read cursor */ - dict_index_t* index, /* in: non-clustered index */ - read_view_t* view) /* in: consistent read view */ -{ - dulint max_trx_id; - - UT_NOT_USED(index); - - ut_ad(!(index->type & DICT_CLUSTERED)); - ut_ad(page_rec_is_user_rec(rec)); - - /* NOTE that we might call this function while holding the search - system latch. To obey the latching order we must NOT reserve the - kernel mutex here! */ - - if (recv_recovery_is_on()) { - - return(FALSE); - } - - max_trx_id = page_get_max_trx_id(buf_frame_align(rec)); - - if (ut_dulint_cmp(max_trx_id, view->up_limit_id) >= 0) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Creates the lock system at database start. */ - -void -lock_sys_create( -/*============*/ - ulint n_cells) /* in: number of slots in lock hash table */ -{ - lock_sys = mem_alloc(sizeof(lock_sys_t)); - - lock_sys->rec_hash = hash_create(n_cells); - - /* hash_create_mutexes(lock_sys->rec_hash, 2, SYNC_REC_LOCK); */ - - lock_latest_err_file = os_file_create_tmpfile(); - ut_a(lock_latest_err_file); -} - -/************************************************************************* -Gets the size of a lock struct. */ - -ulint -lock_get_size(void) -/*===============*/ - /* out: size in bytes */ -{ - return((ulint)sizeof(lock_t)); -} - -/************************************************************************* -Gets the mode of a lock. */ -UNIV_INLINE -ulint -lock_get_mode( -/*==========*/ - /* out: mode */ - const lock_t* lock) /* in: lock */ -{ - ut_ad(lock); - - return(lock->type_mode & LOCK_MODE_MASK); -} - -/************************************************************************* -Gets the wait flag of a lock. */ -UNIV_INLINE -ibool -lock_get_wait( -/*==========*/ - /* out: TRUE if waiting */ - lock_t* lock) /* in: lock */ -{ - ut_ad(lock); - - if (lock->type_mode & LOCK_WAIT) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Gets the source table of an ALTER TABLE transaction. The table must be -covered by an IX or IS table lock. */ - -dict_table_t* -lock_get_src_table( -/*===============*/ - /* out: the source table of transaction, - if it is covered by an IX or IS table lock; - dest if there is no source table, and - NULL if the transaction is locking more than - two tables or an inconsistency is found */ - trx_t* trx, /* in: transaction */ - dict_table_t* dest, /* in: destination of ALTER TABLE */ - ulint* mode) /* out: lock mode of the source table */ -{ - dict_table_t* src; - lock_t* lock; - - src = NULL; - *mode = LOCK_NONE; - - for (lock = UT_LIST_GET_FIRST(trx->trx_locks); - lock; - lock = UT_LIST_GET_NEXT(trx_locks, lock)) { - lock_table_t* tab_lock; - ulint lock_mode; - if (!(lock_get_type(lock) & LOCK_TABLE)) { - /* We are only interested in table locks. */ - continue; - } - tab_lock = &lock->un_member.tab_lock; - if (dest == tab_lock->table) { - /* We are not interested in the destination table. */ - continue; - } else if (!src) { - /* This presumably is the source table. */ - src = tab_lock->table; - if (UT_LIST_GET_LEN(src->locks) != 1 - || UT_LIST_GET_FIRST(src->locks) != lock) { - /* We only support the case when - there is only one lock on this table. */ - return(NULL); - } - } else if (src != tab_lock->table) { - /* The transaction is locking more than - two tables (src and dest): abort */ - return(NULL); - } - - /* Check that the source table is locked by - LOCK_IX or LOCK_IS. */ - lock_mode = lock_get_mode(lock); - switch (lock_mode) { - case LOCK_IX: - case LOCK_IS: - if (*mode != LOCK_NONE && *mode != lock_mode) { - /* There are multiple locks on src. */ - return(NULL); - } - *mode = lock_mode; - break; - } - } - - if (!src) { - /* No source table lock found: flag the situation to caller */ - src = dest; - } - - return(src); -} - -/************************************************************************* -Determine if the given table is exclusively "owned" by the given -transaction, i.e., transaction holds LOCK_IX and possibly LOCK_AUTO_INC -on the table. */ - -ibool -lock_is_table_exclusive( -/*====================*/ - /* out: TRUE if table is only locked by trx, - with LOCK_IX, and possibly LOCK_AUTO_INC */ - dict_table_t* table, /* in: table */ - trx_t* trx) /* in: transaction */ -{ - lock_t* lock; - ibool ok = FALSE; - - ut_ad(table); - ut_ad(trx); - - lock_mutex_enter_kernel(); - - for (lock = UT_LIST_GET_FIRST(table->locks); - lock; - lock = UT_LIST_GET_NEXT(locks, &lock->un_member.tab_lock)) { - if (lock->trx != trx) { - /* A lock on the table is held - by some other transaction. */ - goto not_ok; - } - - if (!(lock_get_type(lock) & LOCK_TABLE)) { - /* We are interested in table locks only. */ - continue; - } - - switch (lock_get_mode(lock)) { - case LOCK_IX: - ok = TRUE; - break; - case LOCK_AUTO_INC: - /* It is allowed for trx to hold an - auto_increment lock. */ - break; - default: -not_ok: - /* Other table locks than LOCK_IX are not allowed. */ - ok = FALSE; - goto func_exit; - } - } - -func_exit: - lock_mutex_exit_kernel(); - - return(ok); -} - -/************************************************************************* -Sets the wait flag of a lock and the back pointer in trx to lock. */ -UNIV_INLINE -void -lock_set_lock_and_trx_wait( -/*=======================*/ - lock_t* lock, /* in: lock */ - trx_t* trx) /* in: trx */ -{ - ut_ad(lock); - ut_ad(trx->wait_lock == NULL); - - trx->wait_lock = lock; - lock->type_mode = lock->type_mode | LOCK_WAIT; -} - -/************************************************************************** -The back pointer to a waiting lock request in the transaction is set to NULL -and the wait bit in lock type_mode is reset. */ -UNIV_INLINE -void -lock_reset_lock_and_trx_wait( -/*=========================*/ - lock_t* lock) /* in: record lock */ -{ - ut_ad((lock->trx)->wait_lock == lock); - ut_ad(lock_get_wait(lock)); - - /* Reset the back pointer in trx to this waiting lock request */ - - (lock->trx)->wait_lock = NULL; - lock->type_mode = lock->type_mode & ~LOCK_WAIT; -} - -/************************************************************************* -Gets the gap flag of a record lock. */ -UNIV_INLINE -ibool -lock_rec_get_gap( -/*=============*/ - /* out: TRUE if gap flag set */ - lock_t* lock) /* in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - - if (lock->type_mode & LOCK_GAP) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Gets the LOCK_REC_NOT_GAP flag of a record lock. */ -UNIV_INLINE -ibool -lock_rec_get_rec_not_gap( -/*=====================*/ - /* out: TRUE if LOCK_REC_NOT_GAP flag set */ - lock_t* lock) /* in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - - if (lock->type_mode & LOCK_REC_NOT_GAP) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Gets the waiting insert flag of a record lock. */ -UNIV_INLINE -ibool -lock_rec_get_insert_intention( -/*==========================*/ - /* out: TRUE if gap flag set */ - lock_t* lock) /* in: record lock */ -{ - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - - if (lock->type_mode & LOCK_INSERT_INTENTION) { - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Calculates if lock mode 1 is stronger or equal to lock mode 2. */ -UNIV_INLINE -ibool -lock_mode_stronger_or_eq( -/*=====================*/ - /* out: TRUE if mode1 stronger or equal to mode2 */ - ulint mode1, /* in: lock mode */ - ulint mode2) /* in: lock mode */ -{ - ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); - ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); - if (mode1 == LOCK_X) { - - return(TRUE); - - } else if (mode1 == LOCK_AUTO_INC && mode2 == LOCK_AUTO_INC) { - - return(TRUE); - - } else if (mode1 == LOCK_S - && (mode2 == LOCK_S || mode2 == LOCK_IS)) { - return(TRUE); - - } else if (mode1 == LOCK_IS && mode2 == LOCK_IS) { - - return(TRUE); - - } else if (mode1 == LOCK_IX && (mode2 == LOCK_IX - || mode2 == LOCK_IS)) { - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Calculates if lock mode 1 is compatible with lock mode 2. */ -UNIV_INLINE -ibool -lock_mode_compatible( -/*=================*/ - /* out: TRUE if mode1 compatible with mode2 */ - ulint mode1, /* in: lock mode */ - ulint mode2) /* in: lock mode */ -{ - ut_ad(mode1 == LOCK_X || mode1 == LOCK_S || mode1 == LOCK_IX - || mode1 == LOCK_IS || mode1 == LOCK_AUTO_INC); - ut_ad(mode2 == LOCK_X || mode2 == LOCK_S || mode2 == LOCK_IX - || mode2 == LOCK_IS || mode2 == LOCK_AUTO_INC); - - if (mode1 == LOCK_S && (mode2 == LOCK_IS || mode2 == LOCK_S)) { - - return(TRUE); - - } else if (mode1 == LOCK_X) { - - return(FALSE); - - } else if (mode1 == LOCK_AUTO_INC && (mode2 == LOCK_IS - || mode2 == LOCK_IX)) { - return(TRUE); - - } else if (mode1 == LOCK_IS && (mode2 == LOCK_IS - || mode2 == LOCK_IX - || mode2 == LOCK_AUTO_INC - || mode2 == LOCK_S)) { - return(TRUE); - - } else if (mode1 == LOCK_IX && (mode2 == LOCK_IS - || mode2 == LOCK_AUTO_INC - || mode2 == LOCK_IX)) { - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Checks if a lock request for a new lock has to wait for request lock2. */ -UNIV_INLINE -ibool -lock_rec_has_to_wait( -/*=================*/ - /* out: TRUE if new lock has to wait for lock2 to be - removed */ - trx_t* trx, /* in: trx of new lock */ - ulint type_mode,/* in: precise mode of the new lock to set: - LOCK_S or LOCK_X, possibly ORed to - LOCK_GAP or LOCK_REC_NOT_GAP, LOCK_INSERT_INTENTION */ - lock_t* lock2, /* in: another record lock; NOTE that it is assumed - that this has a lock bit set on the same record as - in the new lock we are setting */ - ibool lock_is_on_supremum) /* in: TRUE if we are setting the lock - on the 'supremum' record of an index - page: we know then that the lock request - is really for a 'gap' type lock */ -{ - ut_ad(trx && lock2); - ut_ad(lock_get_type(lock2) == LOCK_REC); - - if (trx != lock2->trx - && !lock_mode_compatible(LOCK_MODE_MASK & type_mode, - lock_get_mode(lock2))) { - - /* We have somewhat complex rules when gap type record locks - cause waits */ - - if ((lock_is_on_supremum || (type_mode & LOCK_GAP)) - && !(type_mode & LOCK_INSERT_INTENTION)) { - - /* Gap type locks without LOCK_INSERT_INTENTION flag - do not need to wait for anything. This is because - different users can have conflicting lock types - on gaps. */ - - return(FALSE); - } - - if (!(type_mode & LOCK_INSERT_INTENTION) - && lock_rec_get_gap(lock2)) { - - /* Record lock (LOCK_ORDINARY or LOCK_REC_NOT_GAP - does not need to wait for a gap type lock */ - - return(FALSE); - } - - if ((type_mode & LOCK_GAP) - && lock_rec_get_rec_not_gap(lock2)) { - - /* Lock on gap does not need to wait for - a LOCK_REC_NOT_GAP type lock */ - - return(FALSE); - } - - if (lock_rec_get_insert_intention(lock2)) { - - /* No lock request needs to wait for an insert - intention lock to be removed. This is ok since our - rules allow conflicting locks on gaps. This eliminates - a spurious deadlock caused by a next-key lock waiting - for an insert intention lock; when the insert - intention lock was granted, the insert deadlocked on - the waiting next-key lock. - - Also, insert intention locks do not disturb each - other. */ - - return(FALSE); - } - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************* -Checks if a lock request lock1 has to wait for request lock2. */ - -ibool -lock_has_to_wait( -/*=============*/ - /* out: TRUE if lock1 has to wait for lock2 to be - removed */ - lock_t* lock1, /* in: waiting lock */ - lock_t* lock2) /* in: another lock; NOTE that it is assumed that this - has a lock bit set on the same record as in lock1 if - the locks are record locks */ -{ - ut_ad(lock1 && lock2); - - if (lock1->trx != lock2->trx - && !lock_mode_compatible(lock_get_mode(lock1), - lock_get_mode(lock2))) { - if (lock_get_type(lock1) == LOCK_REC) { - ut_ad(lock_get_type(lock2) == LOCK_REC); - - /* If this lock request is for a supremum record - then the second bit on the lock bitmap is set */ - - return(lock_rec_has_to_wait(lock1->trx, - lock1->type_mode, lock2, - lock_rec_get_nth_bit( - lock1, 1))); - } - - return(TRUE); - } - - return(FALSE); -} - -/*============== RECORD LOCK BASIC FUNCTIONS ============================*/ - -/************************************************************************* -Gets the number of bits in a record lock bitmap. */ -UNIV_INLINE -ulint -lock_rec_get_n_bits( -/*================*/ - /* out: number of bits */ - lock_t* lock) /* in: record lock */ -{ - return(lock->un_member.rec_lock.n_bits); -} - -/************************************************************************** -Sets the nth bit of a record lock to TRUE. */ -UNIV_INLINE -void -lock_rec_set_nth_bit( -/*=================*/ - lock_t* lock, /* in: record lock */ - ulint i) /* in: index of the bit */ -{ - ulint byte_index; - ulint bit_index; - byte* ptr; - ulint b; - - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - ut_ad(i < lock->un_member.rec_lock.n_bits); - - byte_index = i / 8; - bit_index = i % 8; - - ptr = (byte*)lock + sizeof(lock_t) + byte_index; - - b = (ulint)*ptr; - - b = ut_bit_set_nth(b, bit_index, TRUE); - - *ptr = (byte)b; -} - -/************************************************************************** -Looks for a set bit in a record lock bitmap. Returns ULINT_UNDEFINED, -if none found. */ - -ulint -lock_rec_find_set_bit( -/*==================*/ - /* out: bit index == heap number of the record, or - ULINT_UNDEFINED if none found */ - lock_t* lock) /* in: record lock with at least one bit set */ -{ - ulint i; - - for (i = 0; i < lock_rec_get_n_bits(lock); i++) { - - if (lock_rec_get_nth_bit(lock, i)) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/************************************************************************** -Resets the nth bit of a record lock. */ -UNIV_INLINE -void -lock_rec_reset_nth_bit( -/*===================*/ - lock_t* lock, /* in: record lock */ - ulint i) /* in: index of the bit which must be set to TRUE - when this function is called */ -{ - ulint byte_index; - ulint bit_index; - byte* ptr; - ulint b; - - ut_ad(lock); - ut_ad(lock_get_type(lock) == LOCK_REC); - ut_ad(i < lock->un_member.rec_lock.n_bits); - - byte_index = i / 8; - bit_index = i % 8; - - ptr = (byte*)lock + sizeof(lock_t) + byte_index; - - b = (ulint)*ptr; - - b = ut_bit_set_nth(b, bit_index, FALSE); - - *ptr = (byte)b; -} - -/************************************************************************* -Gets the first or next record lock on a page. */ -UNIV_INLINE -lock_t* -lock_rec_get_next_on_page( -/*======================*/ - /* out: next lock, NULL if none exists */ - lock_t* lock) /* in: a record lock */ -{ - ulint space; - ulint page_no; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(lock) == LOCK_REC); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - for (;;) { - lock = HASH_GET_NEXT(hash, lock); - - if (!lock) { - - break; - } - - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no)) { - - break; - } - } - - return(lock); -} - -/************************************************************************* -Gets the first record lock on a page, where the page is identified by its -file address. */ -UNIV_INLINE -lock_t* -lock_rec_get_first_on_page_addr( -/*============================*/ - /* out: first lock, NULL if none exists */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = HASH_GET_FIRST(lock_sys->rec_hash, - lock_rec_hash(space, page_no)); - while (lock) { - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no)) { - - break; - } - - lock = HASH_GET_NEXT(hash, lock); - } - - return(lock); -} - -/************************************************************************* -Returns TRUE if there are explicit record locks on a page. */ - -ibool -lock_rec_expl_exist_on_page( -/*========================*/ - /* out: TRUE if there are explicit record locks on - the page */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - ibool ret; - - mutex_enter(&kernel_mutex); - - if (lock_rec_get_first_on_page_addr(space, page_no)) { - ret = TRUE; - } else { - ret = FALSE; - } - - mutex_exit(&kernel_mutex); - - return(ret); -} - -/************************************************************************* -Gets the first record lock on a page, where the page is identified by a -pointer to it. */ -UNIV_INLINE -lock_t* -lock_rec_get_first_on_page( -/*=======================*/ - /* out: first lock, NULL if none exists */ - byte* ptr) /* in: pointer to somewhere on the page */ -{ - ulint hash; - lock_t* lock; - ulint space; - ulint page_no; - - ut_ad(mutex_own(&kernel_mutex)); - - hash = buf_frame_get_lock_hash_val(ptr); - - lock = HASH_GET_FIRST(lock_sys->rec_hash, hash); - - while (lock) { - space = buf_frame_get_space_id(ptr); - page_no = buf_frame_get_page_no(ptr); - - if ((lock->un_member.rec_lock.space == space) - && (lock->un_member.rec_lock.page_no == page_no)) { - - break; - } - - lock = HASH_GET_NEXT(hash, lock); - } - - return(lock); -} - -/************************************************************************* -Gets the next explicit lock request on a record. */ -UNIV_INLINE -lock_t* -lock_rec_get_next( -/*==============*/ - /* out: next lock, NULL if none exists */ - rec_t* rec, /* in: record on a page */ - lock_t* lock) /* in: lock */ -{ - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(lock) == LOCK_REC); - - if (page_rec_is_comp(rec)) { - do { - lock = lock_rec_get_next_on_page(lock); - } while (lock && !lock_rec_get_nth_bit( - lock, rec_get_heap_no(rec, TRUE))); - } else { - do { - lock = lock_rec_get_next_on_page(lock); - } while (lock && !lock_rec_get_nth_bit( - lock, rec_get_heap_no(rec, FALSE))); - } - - return(lock); -} - -/************************************************************************* -Gets the first explicit lock request on a record. */ -UNIV_INLINE -lock_t* -lock_rec_get_first( -/*===============*/ - /* out: first lock, NULL if none exists */ - rec_t* rec) /* in: record on a page */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = lock_rec_get_first_on_page(rec); - if (UNIV_LIKELY_NULL(lock)) { - ulint heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - - while (lock && !lock_rec_get_nth_bit(lock, heap_no)) { - lock = lock_rec_get_next_on_page(lock); - } - } - - return(lock); -} - -/************************************************************************* -Resets the record lock bitmap to zero. NOTE: does not touch the wait_lock -pointer in the transaction! This function is used in lock object creation -and resetting. */ -static -void -lock_rec_bitmap_reset( -/*==================*/ - lock_t* lock) /* in: record lock */ -{ - byte* ptr; - ulint n_bytes; - ulint i; - - ut_ad(lock_get_type(lock) == LOCK_REC); - - /* Reset to zero the bitmap which resides immediately after the lock - struct */ - - ptr = (byte*)lock + sizeof(lock_t); - - n_bytes = lock_rec_get_n_bits(lock) / 8; - - ut_ad((lock_rec_get_n_bits(lock) % 8) == 0); - - for (i = 0; i < n_bytes; i++) { - - *ptr = 0; - ptr++; - } -} - -/************************************************************************* -Copies a record lock to heap. */ -static -lock_t* -lock_rec_copy( -/*==========*/ - /* out: copy of lock */ - lock_t* lock, /* in: record lock */ - mem_heap_t* heap) /* in: memory heap */ -{ - lock_t* dupl_lock; - ulint size; - - ut_ad(lock_get_type(lock) == LOCK_REC); - - size = sizeof(lock_t) + lock_rec_get_n_bits(lock) / 8; - - dupl_lock = mem_heap_alloc(heap, size); - - ut_memcpy(dupl_lock, lock, size); - - return(dupl_lock); -} - -/************************************************************************* -Gets the previous record lock set on a record. */ - -lock_t* -lock_rec_get_prev( -/*==============*/ - /* out: previous lock on the same record, NULL if - none exists */ - lock_t* in_lock,/* in: record lock */ - ulint heap_no)/* in: heap number of the record */ -{ - lock_t* lock; - ulint space; - ulint page_no; - lock_t* found_lock = NULL; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(in_lock) == LOCK_REC); - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - lock = lock_rec_get_first_on_page_addr(space, page_no); - - for (;;) { - ut_ad(lock); - - if (lock == in_lock) { - - return(found_lock); - } - - if (lock_rec_get_nth_bit(lock, heap_no)) { - - found_lock = lock; - } - - lock = lock_rec_get_next_on_page(lock); - } -} - -/*============= FUNCTIONS FOR ANALYZING TABLE LOCK QUEUE ================*/ - -/************************************************************************* -Checks if a transaction has the specified table lock, or stronger. */ -UNIV_INLINE -lock_t* -lock_table_has( -/*===========*/ - /* out: lock or NULL */ - trx_t* trx, /* in: transaction */ - dict_table_t* table, /* in: table */ - ulint mode) /* in: lock mode */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - /* Look for stronger locks the same trx already has on the table */ - - lock = UT_LIST_GET_LAST(table->locks); - - while (lock != NULL) { - - if (lock->trx == trx - && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) { - - /* The same trx already has locked the table in - a mode stronger or equal to the mode given */ - - ut_ad(!lock_get_wait(lock)); - - return(lock); - } - - lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); - } - - return(NULL); -} - -/*============= FUNCTIONS FOR ANALYZING RECORD LOCK QUEUE ================*/ - -/************************************************************************* -Checks if a transaction has a GRANTED explicit lock on rec stronger or equal -to precise_mode. */ -UNIV_INLINE -lock_t* -lock_rec_has_expl( -/*==============*/ - /* out: lock or NULL */ - ulint precise_mode,/* in: LOCK_S or LOCK_X possibly ORed to - LOCK_GAP or LOCK_REC_NOT_GAP, - for a supremum record we regard this always a gap - type request */ - rec_t* rec, /* in: record */ - trx_t* trx) /* in: transaction */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((precise_mode & LOCK_MODE_MASK) == LOCK_S - || (precise_mode & LOCK_MODE_MASK) == LOCK_X); - ut_ad(!(precise_mode & LOCK_INSERT_INTENTION)); - - lock = lock_rec_get_first(rec); - - while (lock) { - if (lock->trx == trx - && lock_mode_stronger_or_eq(lock_get_mode(lock), - precise_mode & LOCK_MODE_MASK) - && !lock_get_wait(lock) - && (!lock_rec_get_rec_not_gap(lock) - || (precise_mode & LOCK_REC_NOT_GAP) - || page_rec_is_supremum(rec)) - && (!lock_rec_get_gap(lock) - || (precise_mode & LOCK_GAP) - || page_rec_is_supremum(rec)) - && (!lock_rec_get_insert_intention(lock))) { - - return(lock); - } - - lock = lock_rec_get_next(rec, lock); - } - - return(NULL); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Checks if some other transaction has a lock request in the queue. */ -static -lock_t* -lock_rec_other_has_expl_req( -/*========================*/ - /* out: lock or NULL */ - ulint mode, /* in: LOCK_S or LOCK_X */ - ulint gap, /* in: LOCK_GAP if also gap locks are taken - into account, or 0 if not */ - ulint wait, /* in: LOCK_WAIT if also waiting locks are - taken into account, or 0 if not */ - rec_t* rec, /* in: record to look at */ - trx_t* trx) /* in: transaction, or NULL if requests by all - transactions are taken into account */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(mode == LOCK_X || mode == LOCK_S); - ut_ad(gap == 0 || gap == LOCK_GAP); - ut_ad(wait == 0 || wait == LOCK_WAIT); - - lock = lock_rec_get_first(rec); - - while (lock) { - if (lock->trx != trx - && (gap - || !(lock_rec_get_gap(lock) - || page_rec_is_supremum(rec))) - && (wait || !lock_get_wait(lock)) - && lock_mode_stronger_or_eq(lock_get_mode(lock), mode)) { - - return(lock); - } - - lock = lock_rec_get_next(rec, lock); - } - - return(NULL); -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************* -Checks if some other transaction has a conflicting explicit lock request -in the queue, so that we have to wait. */ -static -lock_t* -lock_rec_other_has_conflicting( -/*===========================*/ - /* out: lock or NULL */ - ulint mode, /* in: LOCK_S or LOCK_X, - possibly ORed to LOCK_GAP or LOC_REC_NOT_GAP, - LOCK_INSERT_INTENTION */ - rec_t* rec, /* in: record to look at */ - trx_t* trx) /* in: our transaction */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = lock_rec_get_first(rec); - - while (lock) { - if (lock_rec_has_to_wait(trx, mode, lock, - page_rec_is_supremum(rec))) { - - return(lock); - } - - lock = lock_rec_get_next(rec, lock); - } - - return(NULL); -} - -/************************************************************************* -Looks for a suitable type record lock struct by the same trx on the same page. -This can be used to save space when a new record lock should be set on a page: -no new struct is needed, if a suitable old is found. */ -UNIV_INLINE -lock_t* -lock_rec_find_similar_on_page( -/*==========================*/ - /* out: lock or NULL */ - ulint type_mode, /* in: lock type_mode field */ - rec_t* rec, /* in: record */ - trx_t* trx) /* in: transaction */ -{ - lock_t* lock; - ulint heap_no; - - ut_ad(mutex_own(&kernel_mutex)); - - heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - lock = lock_rec_get_first_on_page(rec); - - while (lock != NULL) { - if (lock->trx == trx - && lock->type_mode == type_mode - && lock_rec_get_n_bits(lock) > heap_no) { - - return(lock); - } - - lock = lock_rec_get_next_on_page(lock); - } - - return(NULL); -} - -/************************************************************************* -Checks if some transaction has an implicit x-lock on a record in a secondary -index. */ - -trx_t* -lock_sec_rec_some_has_impl_off_kernel( -/*==================================*/ - /* out: transaction which has the x-lock, or - NULL */ - rec_t* rec, /* in: user record */ - dict_index_t* index, /* in: secondary index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - page_t* page; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(!(index->type & DICT_CLUSTERED)); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - page = buf_frame_align(rec); - - /* Some transaction may have an implicit x-lock on the record only - if the max trx id for the page >= min trx id for the trx list, or - database recovery is running. We do not write the changes of a page - max trx id to the log, and therefore during recovery, this value - for a page may be incorrect. */ - - if (!(ut_dulint_cmp(page_get_max_trx_id(page), - trx_list_get_min_trx_id()) >= 0) - && !recv_recovery_is_on()) { - - return(NULL); - } - - /* Ok, in this case it is possible that some transaction has an - implicit x-lock. We have to look in the clustered index. */ - - if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), - rec, index, offsets, TRUE)) { - buf_page_print(page); - - /* The page is corrupt: try to avoid a crash by returning - NULL */ - return(NULL); - } - - return(row_vers_impl_x_locked_off_kernel(rec, index, offsets)); -} - -/************************************************************************* -Return approximate number or record locks (bits set in the bitmap) for -this transaction. Since delete-marked records may be removed, the -record count will not be precise. */ - -ulint -lock_number_of_rows_locked( -/*=======================*/ - trx_t* trx) /* in: transaction */ -{ - lock_t* lock; - ulint n_records = 0; - ulint n_bits; - ulint n_bit; - - lock = UT_LIST_GET_FIRST(trx->trx_locks); - - while (lock) { - if (lock_get_type(lock) == LOCK_REC) { - n_bits = lock_rec_get_n_bits(lock); - - for (n_bit = 0; n_bit < n_bits; n_bit++) { - if (lock_rec_get_nth_bit(lock, n_bit)) { - n_records++; - } - } - } - - lock = UT_LIST_GET_NEXT(trx_locks, lock); - } - - return (n_records); -} - -/*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ - -/************************************************************************* -Creates a new record lock and inserts it to the lock queue. Does NOT check -for deadlocks or lock compatibility! */ -static -lock_t* -lock_rec_create( -/*============*/ - /* out: created lock */ - ulint type_mode,/* in: lock mode and wait flag, type is - ignored and replaced by LOCK_REC */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: index of record */ - trx_t* trx) /* in: transaction */ -{ - page_t* page; - lock_t* lock; - ulint page_no; - ulint heap_no; - ulint space; - ulint n_bits; - ulint n_bytes; - - ut_ad(mutex_own(&kernel_mutex)); - - page = buf_frame_align(rec); - space = buf_frame_get_space_id(page); - page_no = buf_frame_get_page_no(page); - heap_no = rec_get_heap_no(rec, page_is_comp(page)); - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - /* If rec is the supremum record, then we reset the gap and - LOCK_REC_NOT_GAP bits, as all locks on the supremum are - automatically of the gap type */ - - if (rec == page_get_supremum_rec(page)) { - ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); - - type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); - } - - /* Make lock bitmap bigger by a safety margin */ - n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN; - n_bytes = 1 + n_bits / 8; - - lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes); - - UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock); - - lock->trx = trx; - - lock->type_mode = (type_mode & ~LOCK_TYPE_MASK) | LOCK_REC; - lock->index = index; - - lock->un_member.rec_lock.space = space; - lock->un_member.rec_lock.page_no = page_no; - lock->un_member.rec_lock.n_bits = n_bytes * 8; - - /* Reset to zero the bitmap which resides immediately after the - lock struct */ - - lock_rec_bitmap_reset(lock); - - /* Set the bit corresponding to rec */ - lock_rec_set_nth_bit(lock, heap_no); - - HASH_INSERT(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), lock); - if (type_mode & LOCK_WAIT) { - - lock_set_lock_and_trx_wait(lock, trx); - } - - return(lock); -} - -/************************************************************************* -Enqueues a waiting request for a lock which cannot be granted immediately. -Checks for deadlocks. */ -static -ulint -lock_rec_enqueue_waiting( -/*=====================*/ - /* out: DB_LOCK_WAIT, DB_DEADLOCK, or - DB_QUE_THR_SUSPENDED, or DB_SUCCESS; - DB_SUCCESS means that there was a deadlock, - but another transaction was chosen as a - victim, and we got the lock immediately: - no need to wait then */ - ulint type_mode,/* in: lock mode this transaction is - requesting: LOCK_S or LOCK_X, possibly ORed - with LOCK_GAP or LOCK_REC_NOT_GAP, ORed - with LOCK_INSERT_INTENTION if this waiting - lock request is set when performing an - insert of an index record */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index of record */ - que_thr_t* thr) /* in: query thread */ -{ - lock_t* lock; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - - /* Test if there already is some other reason to suspend thread: - we do not enqueue a lock request if the query thread should be - stopped anyway */ - - if (que_thr_stop(thr)) { - - ut_error; - - return(DB_QUE_THR_SUSPENDED); - } - - trx = thr_get_trx(thr); - - if (trx->dict_operation) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: a record lock wait happens" - " in a dictionary operation!\n" - "InnoDB: Table name ", stderr); - ut_print_name(stderr, trx, TRUE, index->table_name); - fputs(".\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - stderr); - } - - /* Enqueue the lock request that will wait to be granted */ - lock = lock_rec_create(type_mode | LOCK_WAIT, rec, index, trx); - - /* Check if a deadlock occurs: if yes, remove the lock request and - return an error code */ - - if (lock_deadlock_occurs(lock, trx)) { - - lock_reset_lock_and_trx_wait(lock); - lock_rec_reset_nth_bit(lock, rec_get_heap_no( - rec, page_rec_is_comp(rec))); - - return(DB_DEADLOCK); - } - - /* If there was a deadlock but we chose another transaction as a - victim, it is possible that we already have the lock now granted! */ - - if (trx->wait_lock == NULL) { - - return(DB_SUCCESS); - } - - trx->que_state = TRX_QUE_LOCK_WAIT; - trx->was_chosen_as_deadlock_victim = FALSE; - trx->wait_started = time(NULL); - - ut_a(que_thr_stop(thr)); - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fprintf(stderr, "Lock wait for trx %lu in index ", - (ulong) ut_dulint_get_low(trx->id)); - ut_print_name(stderr, trx, FALSE, index->name); - } -#endif /* UNIV_DEBUG */ - - return(DB_LOCK_WAIT); -} - -/************************************************************************* -Adds a record lock request in the record queue. The request is normally -added as the last in the queue, but if there are no waiting lock requests -on the record, and the request to be added is not a waiting request, we -can reuse a suitable record lock object already existing on the same page, -just setting the appropriate bit in its bitmap. This is a low-level function -which does NOT check for deadlocks or lock compatibility! */ -static -lock_t* -lock_rec_add_to_queue( -/*==================*/ - /* out: lock where the bit was set */ - ulint type_mode,/* in: lock mode, wait, gap etc. flags; - type is ignored and replaced by LOCK_REC */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: index of record */ - trx_t* trx) /* in: transaction */ -{ - lock_t* lock; - lock_t* similar_lock = NULL; - ulint heap_no; - ibool somebody_waits = FALSE; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) - || ((type_mode & LOCK_MODE_MASK) != LOCK_S) - || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, - rec, trx)); - ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) - || ((type_mode & LOCK_MODE_MASK) != LOCK_X) - || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, - rec, trx)); - - type_mode = type_mode | LOCK_REC; - - /* If rec is the supremum record, then we can reset the gap bit, as - all locks on the supremum are automatically of the gap type, and we - try to avoid unnecessary memory consumption of a new record lock - struct for a gap type lock */ - - if (page_rec_is_supremum(rec)) { - ut_ad(!(type_mode & LOCK_REC_NOT_GAP)); - - /* There should never be LOCK_REC_NOT_GAP on a supremum - record, but let us play safe */ - - type_mode = type_mode & ~(LOCK_GAP | LOCK_REC_NOT_GAP); - } - - /* Look for a waiting lock request on the same record or on a gap */ - - heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - lock = lock_rec_get_first_on_page(rec); - - while (lock != NULL) { - if (lock_get_wait(lock) - && (lock_rec_get_nth_bit(lock, heap_no))) { - - somebody_waits = TRUE; - } - - lock = lock_rec_get_next_on_page(lock); - } - - /* Look for a similar record lock on the same page: if one is found - and there are no waiting lock requests, we can just set the bit */ - - similar_lock = lock_rec_find_similar_on_page(type_mode, rec, trx); - - if (similar_lock && !somebody_waits && !(type_mode & LOCK_WAIT)) { - - lock_rec_set_nth_bit(similar_lock, heap_no); - - return(similar_lock); - } - - return(lock_rec_create(type_mode, rec, index, trx)); -} - -/************************************************************************* -This is a fast routine for locking a record in the most common cases: -there are no explicit locks on the page, or there is just one lock, owned -by this transaction, and of the right type_mode. This is a low-level function -which does NOT look at implicit locks! Checks lock compatibility within -explicit locks. This function sets a normal next-key lock, or in the case of -a page supremum record, a gap type lock. */ -UNIV_INLINE -ibool -lock_rec_lock_fast( -/*===============*/ - /* out: TRUE if locking succeeded */ - ibool impl, /* in: if TRUE, no lock is set if no wait - is necessary: we assume that the caller will - set an implicit lock */ - ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly - ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index of record */ - que_thr_t* thr) /* in: query thread */ -{ - lock_t* lock; - ulint heap_no; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == 0 - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - - heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - - lock = lock_rec_get_first_on_page(rec); - - trx = thr_get_trx(thr); - - if (lock == NULL) { - if (!impl) { - lock_rec_create(mode, rec, index, trx); - } - - return(TRUE); - } - - if (lock_rec_get_next_on_page(lock)) { - - return(FALSE); - } - - if (lock->trx != trx - || lock->type_mode != (mode | LOCK_REC) - || lock_rec_get_n_bits(lock) <= heap_no) { - - return(FALSE); - } - - if (!impl) { - /* If the nth bit of the record lock is already set then we - do not set a new lock bit, otherwise we do set */ - - if (!lock_rec_get_nth_bit(lock, heap_no)) { - lock_rec_set_nth_bit(lock, heap_no); - } - } - - return(TRUE); -} - -/************************************************************************* -This is the general, and slower, routine for locking a record. This is a -low-level function which does NOT look at implicit locks! Checks lock -compatibility within explicit locks. This function sets a normal next-key -lock, or in the case of a page supremum record, a gap type lock. */ -static -ulint -lock_rec_lock_slow( -/*===============*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - code */ - ibool impl, /* in: if TRUE, no lock is set if no wait is - necessary: we assume that the caller will set - an implicit lock */ - ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly - ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index of record */ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - ulint err; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == 0 - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - - trx = thr_get_trx(thr); - - if (lock_rec_has_expl(mode, rec, trx)) { - /* The trx already has a strong enough lock on rec: do - nothing */ - - err = DB_SUCCESS; - } else if (lock_rec_other_has_conflicting(mode, rec, trx)) { - - /* If another transaction has a non-gap conflicting request in - the queue, as this transaction does not have a lock strong - enough already granted on the record, we have to wait. */ - - err = lock_rec_enqueue_waiting(mode, rec, index, thr); - } else { - if (!impl) { - /* Set the requested lock on the record */ - - lock_rec_add_to_queue(LOCK_REC | mode, rec, index, - trx); - } - - err = DB_SUCCESS; - } - - return(err); -} - -/************************************************************************* -Tries to lock the specified record in the mode requested. If not immediately -possible, enqueues a waiting lock request. This is a low-level function -which does NOT look at implicit locks! Checks lock compatibility within -explicit locks. This function sets a normal next-key lock, or in the case -of a page supremum record, a gap type lock. */ -static -ulint -lock_rec_lock( -/*==========*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, or error - code */ - ibool impl, /* in: if TRUE, no lock is set if no wait is - necessary: we assume that the caller will set - an implicit lock */ - ulint mode, /* in: lock mode: LOCK_X or LOCK_S possibly - ORed to either LOCK_GAP or LOCK_REC_NOT_GAP */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index of record */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - ut_ad((LOCK_MODE_MASK & mode) != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad((LOCK_MODE_MASK & mode) == LOCK_S - || (LOCK_MODE_MASK & mode) == LOCK_X); - ut_ad(mode - (LOCK_MODE_MASK & mode) == LOCK_GAP - || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP - || mode - (LOCK_MODE_MASK & mode) == 0); - - if (lock_rec_lock_fast(impl, mode, rec, index, thr)) { - - /* We try a simplified and faster subroutine for the most - common cases */ - - err = DB_SUCCESS; - } else { - err = lock_rec_lock_slow(impl, mode, rec, index, thr); - } - - return(err); -} - -/************************************************************************* -Checks if a waiting record lock request still has to wait in a queue. */ -static -ibool -lock_rec_has_to_wait_in_queue( -/*==========================*/ - /* out: TRUE if still has to wait */ - lock_t* wait_lock) /* in: waiting record lock */ -{ - lock_t* lock; - ulint space; - ulint page_no; - ulint heap_no; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_wait(wait_lock)); - ut_ad(lock_get_type(wait_lock) == LOCK_REC); - - space = wait_lock->un_member.rec_lock.space; - page_no = wait_lock->un_member.rec_lock.page_no; - heap_no = lock_rec_find_set_bit(wait_lock); - - lock = lock_rec_get_first_on_page_addr(space, page_no); - - while (lock != wait_lock) { - - if (lock_rec_get_nth_bit(lock, heap_no) - && lock_has_to_wait(wait_lock, lock)) { - - return(TRUE); - } - - lock = lock_rec_get_next_on_page(lock); - } - - return(FALSE); -} - -/***************************************************************** -Grants a lock to a waiting lock request and releases the waiting -transaction. */ -static -void -lock_grant( -/*=======*/ - lock_t* lock) /* in: waiting lock request */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - lock_reset_lock_and_trx_wait(lock); - - if (lock_get_mode(lock) == LOCK_AUTO_INC) { - - if (lock->trx->auto_inc_lock != NULL) { - fprintf(stderr, - "InnoDB: Error: trx already had" - " an AUTO-INC lock!\n"); - } - - /* Store pointer to lock to trx so that we know to - release it at the end of the SQL statement */ - - lock->trx->auto_inc_lock = lock; - } - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fprintf(stderr, "Lock wait for trx %lu ends\n", - (ulong) ut_dulint_get_low(lock->trx->id)); - } -#endif /* UNIV_DEBUG */ - - /* If we are resolving a deadlock by choosing another transaction - as a victim, then our original transaction may not be in the - TRX_QUE_LOCK_WAIT state, and there is no need to end the lock wait - for it */ - - if (lock->trx->que_state == TRX_QUE_LOCK_WAIT) { - trx_end_lock_wait(lock->trx); - } -} - -/***************************************************************** -Cancels a waiting record lock request and releases the waiting transaction -that requested it. NOTE: does NOT check if waiting lock requests behind this -one can now be granted! */ -static -void -lock_rec_cancel( -/*============*/ - lock_t* lock) /* in: waiting record lock request */ -{ - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(lock) == LOCK_REC); - - /* Reset the bit (there can be only one set bit) in the lock bitmap */ - lock_rec_reset_nth_bit(lock, lock_rec_find_set_bit(lock)); - - /* Reset the wait flag and the back pointer to lock in trx */ - - lock_reset_lock_and_trx_wait(lock); - - /* The following function releases the trx from lock wait */ - - trx_end_lock_wait(lock->trx); -} - -/***************************************************************** -Removes a record lock request, waiting or granted, from the queue and -grants locks to other transactions in the queue if they now are entitled -to a lock. NOTE: all record locks contained in in_lock are removed. */ -static -void -lock_rec_dequeue_from_page( -/*=======================*/ - lock_t* in_lock)/* in: record lock object: all record locks which - are contained in this lock object are removed; - transactions waiting behind will get their lock - requests granted, if they are now qualified to it */ -{ - ulint space; - ulint page_no; - lock_t* lock; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(in_lock) == LOCK_REC); - - trx = in_lock->trx; - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - HASH_DELETE(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), in_lock); - - UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock); - - /* Check if waiting locks in the queue can now be granted: grant - locks if there are no conflicting locks ahead. */ - - lock = lock_rec_get_first_on_page_addr(space, page_no); - - while (lock != NULL) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - lock_grant(lock); - } - - lock = lock_rec_get_next_on_page(lock); - } -} - -/***************************************************************** -Removes a record lock request, waiting or granted, from the queue. */ -static -void -lock_rec_discard( -/*=============*/ - lock_t* in_lock)/* in: record lock object: all record locks which - are contained in this lock object are removed */ -{ - ulint space; - ulint page_no; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_type(in_lock) == LOCK_REC); - - trx = in_lock->trx; - - space = in_lock->un_member.rec_lock.space; - page_no = in_lock->un_member.rec_lock.page_no; - - HASH_DELETE(lock_t, hash, lock_sys->rec_hash, - lock_rec_fold(space, page_no), in_lock); - - UT_LIST_REMOVE(trx_locks, trx->trx_locks, in_lock); -} - -/***************************************************************** -Removes record lock objects set on an index page which is discarded. This -function does not move locks, or check for waiting locks, therefore the -lock bitmaps must already be reset when this function is called. */ -static -void -lock_rec_free_all_from_discard_page( -/*================================*/ - page_t* page) /* in: page to be discarded */ -{ - ulint space; - ulint page_no; - lock_t* lock; - lock_t* next_lock; - - ut_ad(mutex_own(&kernel_mutex)); - - space = buf_frame_get_space_id(page); - page_no = buf_frame_get_page_no(page); - - lock = lock_rec_get_first_on_page_addr(space, page_no); - - while (lock != NULL) { - ut_ad(lock_rec_find_set_bit(lock) == ULINT_UNDEFINED); - ut_ad(!lock_get_wait(lock)); - - next_lock = lock_rec_get_next_on_page(lock); - - lock_rec_discard(lock); - - lock = next_lock; - } -} - -/*============= RECORD LOCK MOVING AND INHERITING ===================*/ - -/***************************************************************** -Resets the lock bits for a single record. Releases transactions waiting for -lock requests here. */ -static -void -lock_rec_reset_and_release_wait( -/*============================*/ - rec_t* rec) /* in: record whose locks bits should be reset */ -{ - lock_t* lock; - ulint heap_no; - - ut_ad(mutex_own(&kernel_mutex)); - - heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - - lock = lock_rec_get_first(rec); - - while (lock != NULL) { - if (lock_get_wait(lock)) { - lock_rec_cancel(lock); - } else { - lock_rec_reset_nth_bit(lock, heap_no); - } - - lock = lock_rec_get_next(rec, lock); - } -} - -/***************************************************************** -Makes a record to inherit the locks (except LOCK_INSERT_INTENTION type) -of another record as gap type locks, but does not reset the lock bits of -the other record. Also waiting lock requests on rec are inherited as -GRANTED gap locks. */ - -void -lock_rec_inherit_to_gap( -/*====================*/ - rec_t* heir, /* in: record which inherits */ - rec_t* rec) /* in: record from which inherited; does NOT reset - the locks on this record */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = lock_rec_get_first(rec); - - /* If srv_locks_unsafe_for_binlog is TRUE or session is using - READ COMMITTED isolation level, we do not want locks set - by an UPDATE or a DELETE to be inherited as gap type locks. But we - DO want S-locks set by a consistency constraint to be inherited also - then. */ - - while (lock != NULL) { - if (!lock_rec_get_insert_intention(lock) - && !((srv_locks_unsafe_for_binlog - || lock->trx->isolation_level - == TRX_ISO_READ_COMMITTED) - && lock_get_mode(lock) == LOCK_X)) { - - lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) - | LOCK_GAP, - heir, lock->index, lock->trx); - } - - lock = lock_rec_get_next(rec, lock); - } -} - -/***************************************************************** -Makes a record to inherit the gap locks (except LOCK_INSERT_INTENTION type) -of another record as gap type locks, but does not reset the lock bits of the -other record. Also waiting lock requests are inherited as GRANTED gap locks. */ -static -void -lock_rec_inherit_to_gap_if_gap_lock( -/*================================*/ - rec_t* heir, /* in: record which inherits */ - rec_t* rec) /* in: record from which inherited; does NOT reset - the locks on this record */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = lock_rec_get_first(rec); - - while (lock != NULL) { - if (!lock_rec_get_insert_intention(lock) - && (page_rec_is_supremum(rec) - || !lock_rec_get_rec_not_gap(lock))) { - - lock_rec_add_to_queue(LOCK_REC | lock_get_mode(lock) - | LOCK_GAP, - heir, lock->index, lock->trx); - } - - lock = lock_rec_get_next(rec, lock); - } -} - -/***************************************************************** -Moves the locks of a record to another record and resets the lock bits of -the donating record. */ -static -void -lock_rec_move( -/*==========*/ - rec_t* receiver, /* in: record which gets locks; this record - must have no lock requests on it! */ - rec_t* donator, /* in: record which gives locks */ - ulint comp) /* in: nonzero=compact page format */ -{ - lock_t* lock; - ulint heap_no; - ulint type_mode; - - ut_ad(mutex_own(&kernel_mutex)); - - heap_no = rec_get_heap_no(donator, comp); - - lock = lock_rec_get_first(donator); - - ut_ad(lock_rec_get_first(receiver) == NULL); - - while (lock != NULL) { - type_mode = lock->type_mode; - - lock_rec_reset_nth_bit(lock, heap_no); - - if (lock_get_wait(lock)) { - lock_reset_lock_and_trx_wait(lock); - } - - /* Note that we FIRST reset the bit, and then set the lock: - the function works also if donator == receiver */ - - lock_rec_add_to_queue(type_mode, receiver, lock->index, - lock->trx); - lock = lock_rec_get_next(donator, lock); - } - - ut_ad(lock_rec_get_first(donator) == NULL); -} - -/***************************************************************** -Updates the lock table when we have reorganized a page. NOTE: we copy -also the locks set on the infimum of the page; the infimum may carry -locks if an update of a record is occurring on the page, and its locks -were temporarily stored on the infimum. */ - -void -lock_move_reorganize_page( -/*======================*/ - page_t* page, /* in: old index page, now reorganized */ - page_t* old_page) /* in: copy of the old, not reorganized page */ -{ - lock_t* lock; - lock_t* old_lock; - page_cur_t cur1; - page_cur_t cur2; - ulint old_heap_no; - UT_LIST_BASE_NODE_T(lock_t) old_locks; - mem_heap_t* heap = NULL; - rec_t* sup; - ulint comp; - - lock_mutex_enter_kernel(); - - lock = lock_rec_get_first_on_page(page); - - if (lock == NULL) { - lock_mutex_exit_kernel(); - - return; - } - - heap = mem_heap_create(256); - - /* Copy first all the locks on the page to heap and reset the - bitmaps in the original locks; chain the copies of the locks - using the trx_locks field in them. */ - - UT_LIST_INIT(old_locks); - - while (lock != NULL) { - - /* Make a copy of the lock */ - old_lock = lock_rec_copy(lock, heap); - - UT_LIST_ADD_LAST(trx_locks, old_locks, old_lock); - - /* Reset bitmap of lock */ - lock_rec_bitmap_reset(lock); - - if (lock_get_wait(lock)) { - lock_reset_lock_and_trx_wait(lock); - } - - lock = lock_rec_get_next_on_page(lock); - } - - sup = page_get_supremum_rec(page); - - lock = UT_LIST_GET_FIRST(old_locks); - - comp = page_is_comp(page); - ut_ad(comp == page_is_comp(old_page)); - - while (lock) { - /* NOTE: we copy also the locks set on the infimum and - supremum of the page; the infimum may carry locks if an - update of a record is occurring on the page, and its locks - were temporarily stored on the infimum */ - - page_cur_set_before_first(page, &cur1); - page_cur_set_before_first(old_page, &cur2); - - /* Set locks according to old locks */ - for (;;) { - ut_ad(comp || !memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec( - &cur2)))); - old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2), - comp); - - if (lock_rec_get_nth_bit(lock, old_heap_no)) { - - /* NOTE that the old lock bitmap could be too - small for the new heap number! */ - - lock_rec_add_to_queue(lock->type_mode, - page_cur_get_rec(&cur1), - lock->index, lock->trx); - - /* if ((page_cur_get_rec(&cur1) == sup) - && lock_get_wait(lock)) { - fprintf(stderr, - "---\n--\n!!!Lock reorg: supr type %lu\n", - lock->type_mode); - } */ - } - - if (page_cur_get_rec(&cur1) == sup) { - - break; - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - - /* Remember that we chained old locks on the trx_locks field */ - - lock = UT_LIST_GET_NEXT(trx_locks, lock); - } - - lock_mutex_exit_kernel(); - - mem_heap_free(heap); - -#if 0 - ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), - buf_frame_get_page_no(page))); -#endif -} - -/***************************************************************** -Moves the explicit locks on user records to another page if a record -list end is moved to another page. */ - -void -lock_move_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page to move to */ - page_t* page, /* in: index page */ - rec_t* rec) /* in: record on page: this is the - first record moved */ -{ - lock_t* lock; - page_cur_t cur1; - page_cur_t cur2; - ulint heap_no; - rec_t* sup; - ulint type_mode; - ulint comp; - ut_ad(page == buf_frame_align(rec)); - - lock_mutex_enter_kernel(); - - /* Note: when we move locks from record to record, waiting locks - and possible granted gap type locks behind them are enqueued in - the original order, because new elements are inserted to a hash - table to the end of the hash chain, and lock_rec_add_to_queue - does not reuse locks if there are waiters in the queue. */ - - sup = page_get_supremum_rec(page); - - lock = lock_rec_get_first_on_page(page); - - comp = page_is_comp(page); - - while (lock != NULL) { - - page_cur_position(rec, &cur1); - - if (page_cur_is_before_first(&cur1)) { - page_cur_move_to_next(&cur1); - } - - page_cur_set_before_first(new_page, &cur2); - page_cur_move_to_next(&cur2); - - /* Copy lock requests on user records to new page and - reset the lock bits on the old */ - - while (page_cur_get_rec(&cur1) != sup) { - ut_ad(comp || !memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec( - &cur2)))); - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), - comp); - - if (lock_rec_get_nth_bit(lock, heap_no)) { - type_mode = lock->type_mode; - - lock_rec_reset_nth_bit(lock, heap_no); - - if (lock_get_wait(lock)) { - lock_reset_lock_and_trx_wait(lock); - } - - lock_rec_add_to_queue(type_mode, - page_cur_get_rec(&cur2), - lock->index, lock->trx); - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - - lock = lock_rec_get_next_on_page(lock); - } - - lock_mutex_exit_kernel(); - -#if 0 - ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), - buf_frame_get_page_no(page))); - ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page), - buf_frame_get_page_no(new_page))); -#endif -} - -/***************************************************************** -Moves the explicit locks on user records to another page if a record -list start is moved to another page. */ - -void -lock_move_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page to move to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page: this is the - first record NOT copied */ - rec_t* old_end) /* in: old previous-to-last record on - new_page before the records were copied */ -{ - lock_t* lock; - page_cur_t cur1; - page_cur_t cur2; - ulint heap_no; - ulint type_mode; - ulint comp; - - ut_a(new_page); - - lock_mutex_enter_kernel(); - - lock = lock_rec_get_first_on_page(page); - comp = page_is_comp(page); - ut_ad(comp == page_is_comp(new_page)); - ut_ad(page == buf_frame_align(rec)); - - while (lock != NULL) { - - page_cur_set_before_first(page, &cur1); - page_cur_move_to_next(&cur1); - - page_cur_position(old_end, &cur2); - page_cur_move_to_next(&cur2); - - /* Copy lock requests on user records to new page and - reset the lock bits on the old */ - - while (page_cur_get_rec(&cur1) != rec) { - ut_ad(comp || !memcmp(page_cur_get_rec(&cur1), - page_cur_get_rec(&cur2), - rec_get_data_size_old( - page_cur_get_rec( - &cur2)))); - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), - comp); - - if (lock_rec_get_nth_bit(lock, heap_no)) { - type_mode = lock->type_mode; - - lock_rec_reset_nth_bit(lock, heap_no); - - if (lock_get_wait(lock)) { - lock_reset_lock_and_trx_wait(lock); - } - - lock_rec_add_to_queue(type_mode, - page_cur_get_rec(&cur2), - lock->index, lock->trx); - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - - lock = lock_rec_get_next_on_page(lock); - } - - lock_mutex_exit_kernel(); -#if 0 - ut_ad(lock_rec_validate_page(buf_frame_get_space_id(page), - buf_frame_get_page_no(page))); - ut_ad(lock_rec_validate_page(buf_frame_get_space_id(new_page), - buf_frame_get_page_no(new_page))); -#endif -} - -/***************************************************************** -Updates the lock table when a page is split to the right. */ - -void -lock_update_split_right( -/*====================*/ - page_t* right_page, /* in: right page */ - page_t* left_page) /* in: left page */ -{ - ulint comp; - lock_mutex_enter_kernel(); - comp = page_is_comp(left_page); - ut_ad(comp == page_is_comp(right_page)); - - /* Move the locks on the supremum of the left page to the supremum - of the right page */ - - lock_rec_move(page_get_supremum_rec(right_page), - page_get_supremum_rec(left_page), comp); - - /* Inherit the locks to the supremum of left page from the successor - of the infimum on right page */ - - lock_rec_inherit_to_gap(page_get_supremum_rec(left_page), - page_rec_get_next( - page_get_infimum_rec(right_page))); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a page is merged to the right. */ - -void -lock_update_merge_right( -/*====================*/ - rec_t* orig_succ, /* in: original successor of infimum - on the right page before merge */ - page_t* left_page) /* in: merged index page which will be - discarded */ -{ - lock_mutex_enter_kernel(); - - /* Inherit the locks from the supremum of the left page to the - original successor of infimum on the right page, to which the left - page was merged */ - - lock_rec_inherit_to_gap(orig_succ, page_get_supremum_rec(left_page)); - - /* Reset the locks on the supremum of the left page, releasing - waiting transactions */ - - lock_rec_reset_and_release_wait(page_get_supremum_rec(left_page)); - - lock_rec_free_all_from_discard_page(left_page); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when the root page is copied to another in -btr_root_raise_and_insert. Note that we leave lock structs on the -root page, even though they do not make sense on other than leaf -pages: the reason is that in a pessimistic update the infimum record -of the root page will act as a dummy carrier of the locks of the record -to be updated. */ - -void -lock_update_root_raise( -/*===================*/ - page_t* new_page, /* in: index page to which copied */ - page_t* root) /* in: root page */ -{ - ulint comp; - lock_mutex_enter_kernel(); - comp = page_is_comp(root); - ut_ad(comp == page_is_comp(new_page)); - - /* Move the locks on the supremum of the root to the supremum - of new_page */ - - lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(root), comp); - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a page is copied to another and the original page -is removed from the chain of leaf pages, except if page is the root! */ - -void -lock_update_copy_and_discard( -/*=========================*/ - page_t* new_page, /* in: index page to which copied */ - page_t* page) /* in: index page; NOT the root! */ -{ - ulint comp; - lock_mutex_enter_kernel(); - comp = page_is_comp(page); - ut_ad(comp == page_is_comp(new_page)); - - /* Move the locks on the supremum of the old page to the supremum - of new_page */ - - lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(page), comp); - lock_rec_free_all_from_discard_page(page); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a page is split to the left. */ - -void -lock_update_split_left( -/*===================*/ - page_t* right_page, /* in: right page */ - page_t* left_page) /* in: left page */ -{ - lock_mutex_enter_kernel(); - - /* Inherit the locks to the supremum of the left page from the - successor of the infimum on the right page */ - - lock_rec_inherit_to_gap(page_get_supremum_rec(left_page), - page_rec_get_next( - page_get_infimum_rec(right_page))); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a page is merged to the left. */ - -void -lock_update_merge_left( -/*===================*/ - page_t* left_page, /* in: left page to which merged */ - rec_t* orig_pred, /* in: original predecessor of supremum - on the left page before merge */ - page_t* right_page) /* in: merged index page which will be - discarded */ -{ - rec_t* left_next_rec; - rec_t* left_supremum; - ulint comp; - lock_mutex_enter_kernel(); - comp = page_is_comp(left_page); - ut_ad(comp == page_is_comp(right_page)); - ut_ad(left_page == buf_frame_align(orig_pred)); - - left_next_rec = page_rec_get_next(orig_pred); - left_supremum = page_get_supremum_rec(left_page); - - if (UNIV_LIKELY(left_next_rec != left_supremum)) { - - /* Inherit the locks on the supremum of the left page to the - first record which was moved from the right page */ - - lock_rec_inherit_to_gap(left_next_rec, left_supremum); - - /* Reset the locks on the supremum of the left page, - releasing waiting transactions */ - - lock_rec_reset_and_release_wait(left_supremum); - } - - /* Move the locks from the supremum of right page to the supremum - of the left page */ - - lock_rec_move(left_supremum, page_get_supremum_rec(right_page), comp); - - lock_rec_free_all_from_discard_page(right_page); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Resets the original locks on heir and replaces them with gap type locks -inherited from rec. */ - -void -lock_rec_reset_and_inherit_gap_locks( -/*=================================*/ - rec_t* heir, /* in: heir record */ - rec_t* rec) /* in: record */ -{ - mutex_enter(&kernel_mutex); - - lock_rec_reset_and_release_wait(heir); - - lock_rec_inherit_to_gap(heir, rec); - - mutex_exit(&kernel_mutex); -} - -/***************************************************************** -Updates the lock table when a page is discarded. */ - -void -lock_update_discard( -/*================*/ - rec_t* heir, /* in: record which will inherit the locks */ - page_t* page) /* in: index page which will be discarded */ -{ - rec_t* rec; - - lock_mutex_enter_kernel(); - - if (NULL == lock_rec_get_first_on_page(page)) { - /* No locks exist on page, nothing to do */ - - lock_mutex_exit_kernel(); - - return; - } - - /* Inherit all the locks on the page to the record and reset all - the locks on the page */ - - rec = page_get_infimum_rec(page); - - for (;;) { - lock_rec_inherit_to_gap(heir, rec); - - /* Reset the locks on rec, releasing waiting transactions */ - - lock_rec_reset_and_release_wait(rec); - - if (page_rec_is_supremum(rec)) { - - break; - } - - rec = page_rec_get_next(rec); - } - - lock_rec_free_all_from_discard_page(page); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a new user record is inserted. */ - -void -lock_update_insert( -/*===============*/ - rec_t* rec) /* in: the inserted record */ -{ - lock_mutex_enter_kernel(); - - /* Inherit the gap-locking locks for rec, in gap mode, from the next - record */ - - lock_rec_inherit_to_gap_if_gap_lock(rec, page_rec_get_next(rec)); - - lock_mutex_exit_kernel(); -} - -/***************************************************************** -Updates the lock table when a record is removed. */ - -void -lock_update_delete( -/*===============*/ - rec_t* rec) /* in: the record to be removed */ -{ - lock_mutex_enter_kernel(); - - /* Let the next record inherit the locks from rec, in gap mode */ - - lock_rec_inherit_to_gap(page_rec_get_next(rec), rec); - - /* Reset the lock bits on rec and release waiting transactions */ - - lock_rec_reset_and_release_wait(rec); - - lock_mutex_exit_kernel(); -} - -/************************************************************************* -Stores on the page infimum record the explicit locks of another record. -This function is used to store the lock state of a record when it is -updated and the size of the record changes in the update. The record -is moved in such an update, perhaps to another page. The infimum record -acts as a dummy carrier record, taking care of lock releases while the -actual record is being moved. */ - -void -lock_rec_store_on_page_infimum( -/*===========================*/ - page_t* page, /* in: page containing the record */ - rec_t* rec) /* in: record whose lock state is stored - on the infimum record of the same page; lock - bits are reset on the record */ -{ - ut_ad(page == buf_frame_align(rec)); - - lock_mutex_enter_kernel(); - - lock_rec_move(page_get_infimum_rec(page), rec, page_is_comp(page)); - - lock_mutex_exit_kernel(); -} - -/************************************************************************* -Restores the state of explicit lock requests on a single record, where the -state was stored on the infimum of the page. */ - -void -lock_rec_restore_from_page_infimum( -/*===============================*/ - rec_t* rec, /* in: record whose lock state is restored */ - page_t* page) /* in: page (rec is not necessarily on this page) - whose infimum stored the lock state; lock bits are - reset on the infimum */ -{ - ulint comp; - lock_mutex_enter_kernel(); - comp = page_is_comp(page); - ut_ad(!comp == !page_rec_is_comp(rec)); - - lock_rec_move(rec, page_get_infimum_rec(page), comp); - - lock_mutex_exit_kernel(); -} - -/*=========== DEADLOCK CHECKING ======================================*/ - -/************************************************************************ -Checks if a lock request results in a deadlock. */ -static -ibool -lock_deadlock_occurs( -/*=================*/ - /* out: TRUE if a deadlock was detected and we - chose trx as a victim; FALSE if no deadlock, or - there was a deadlock, but we chose other - transaction(s) as victim(s) */ - lock_t* lock, /* in: lock the transaction is requesting */ - trx_t* trx) /* in: transaction */ -{ - dict_table_t* table; - dict_index_t* index; - trx_t* mark_trx; - ulint ret; - ulint cost = 0; - - ut_ad(trx); - ut_ad(lock); - ut_ad(mutex_own(&kernel_mutex)); -retry: - /* We check that adding this trx to the waits-for graph - does not produce a cycle. First mark all active transactions - with 0: */ - - mark_trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (mark_trx) { - mark_trx->deadlock_mark = 0; - mark_trx = UT_LIST_GET_NEXT(trx_list, mark_trx); - } - - ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0); - - if (ret == LOCK_VICTIM_IS_OTHER) { - /* We chose some other trx as a victim: retry if there still - is a deadlock */ - - goto retry; - } - - if (ret == LOCK_VICTIM_IS_START) { - if (lock_get_type(lock) & LOCK_TABLE) { - table = lock->un_member.tab_lock.table; - index = NULL; - } else { - index = lock->index; - table = index->table; - } - - lock_deadlock_found = TRUE; - - fputs("*** WE ROLL BACK TRANSACTION (2)\n", - lock_latest_err_file); - - return(TRUE); - } - - return(FALSE); -} - -/************************************************************************ -Looks recursively for a deadlock. */ -static -ulint -lock_deadlock_recursive( -/*====================*/ - /* out: 0 if no deadlock found, - LOCK_VICTIM_IS_START if there was a deadlock - and we chose 'start' as the victim, - LOCK_VICTIM_IS_OTHER if a deadlock - was found and we chose some other trx as a - victim: we must do the search again in this - last case because there may be another - deadlock! */ - trx_t* start, /* in: recursion starting point */ - trx_t* trx, /* in: a transaction waiting for a lock */ - lock_t* wait_lock, /* in: the lock trx is waiting to be granted */ - ulint* cost, /* in/out: number of calculation steps thus - far: if this exceeds LOCK_MAX_N_STEPS_... - we return LOCK_VICTIM_IS_START */ - ulint depth) /* in: recursion depth: if this exceeds - LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we - return LOCK_VICTIM_IS_START */ -{ - lock_t* lock; - ulint bit_no = ULINT_UNDEFINED; - trx_t* lock_trx; - ulint ret; - - ut_a(trx); - ut_a(start); - ut_a(wait_lock); - ut_ad(mutex_own(&kernel_mutex)); - - if (trx->deadlock_mark == 1) { - /* We have already exhaustively searched the subtree starting - from this trx */ - - return(0); - } - - *cost = *cost + 1; - - lock = wait_lock; - - if (lock_get_type(wait_lock) == LOCK_REC) { - - bit_no = lock_rec_find_set_bit(wait_lock); - - ut_a(bit_no != ULINT_UNDEFINED); - } - - /* Look at the locks ahead of wait_lock in the lock queue */ - - for (;;) { - if (lock_get_type(lock) & LOCK_TABLE) { - - lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, - lock); - } else { - ut_ad(lock_get_type(lock) == LOCK_REC); - ut_a(bit_no != ULINT_UNDEFINED); - - lock = lock_rec_get_prev(lock, bit_no); - } - - if (lock == NULL) { - /* We can mark this subtree as searched */ - trx->deadlock_mark = 1; - - return(FALSE); - } - - if (lock_has_to_wait(wait_lock, lock)) { - - ibool too_far - = depth > LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK - || *cost > LOCK_MAX_N_STEPS_IN_DEADLOCK_CHECK; - - lock_trx = lock->trx; - - if (lock_trx == start || too_far) { - - /* We came back to the recursion starting - point: a deadlock detected; or we have - searched the waits-for graph too long */ - - FILE* ef = lock_latest_err_file; - - rewind(ef); - ut_print_timestamp(ef); - - fputs("\n*** (1) TRANSACTION:\n", ef); - - trx_print(ef, wait_lock->trx, 3000); - - fputs("*** (1) WAITING FOR THIS LOCK" - " TO BE GRANTED:\n", ef); - - if (lock_get_type(wait_lock) == LOCK_REC) { - lock_rec_print(ef, wait_lock); - } else { - lock_table_print(ef, wait_lock); - } - - fputs("*** (2) TRANSACTION:\n", ef); - - trx_print(ef, lock->trx, 3000); - - fputs("*** (2) HOLDS THE LOCK(S):\n", ef); - - if (lock_get_type(lock) == LOCK_REC) { - lock_rec_print(ef, lock); - } else { - lock_table_print(ef, lock); - } - - fputs("*** (2) WAITING FOR THIS LOCK" - " TO BE GRANTED:\n", ef); - - if (lock_get_type(start->wait_lock) - == LOCK_REC) { - lock_rec_print(ef, start->wait_lock); - } else { - lock_table_print(ef, start->wait_lock); - } -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fputs("Deadlock detected" - " or too long search\n", - stderr); - } -#endif /* UNIV_DEBUG */ - if (too_far) { - - fputs("TOO DEEP OR LONG SEARCH" - " IN THE LOCK TABLE" - " WAITS-FOR GRAPH\n", ef); - - return(LOCK_VICTIM_IS_START); - } - - if (trx_weight_cmp(wait_lock->trx, - start) >= 0) { - /* Our recursion starting point - transaction is 'smaller', let us - choose 'start' as the victim and roll - back it */ - - return(LOCK_VICTIM_IS_START); - } - - lock_deadlock_found = TRUE; - - /* Let us choose the transaction of wait_lock - as a victim to try to avoid deadlocking our - recursion starting point transaction */ - - fputs("*** WE ROLL BACK TRANSACTION (1)\n", - ef); - - wait_lock->trx->was_chosen_as_deadlock_victim - = TRUE; - - lock_cancel_waiting_and_release(wait_lock); - - /* Since trx and wait_lock are no longer - in the waits-for graph, we can return FALSE; - note that our selective algorithm can choose - several transactions as victims, but still - we may end up rolling back also the recursion - starting point transaction! */ - - return(LOCK_VICTIM_IS_OTHER); - } - - if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) { - - /* Another trx ahead has requested lock in an - incompatible mode, and is itself waiting for - a lock */ - - ret = lock_deadlock_recursive( - start, lock_trx, - lock_trx->wait_lock, cost, depth + 1); - if (ret != 0) { - - return(ret); - } - } - } - }/* end of the 'for (;;)'-loop */ -} - -/*========================= TABLE LOCKS ==============================*/ - -/************************************************************************* -Creates a table lock object and adds it as the last in the lock queue -of the table. Does NOT check for deadlocks or lock compatibility. */ -UNIV_INLINE -lock_t* -lock_table_create( -/*==============*/ - /* out, own: new lock object */ - dict_table_t* table, /* in: database table in dictionary cache */ - ulint type_mode,/* in: lock mode possibly ORed with - LOCK_WAIT */ - trx_t* trx) /* in: trx */ -{ - lock_t* lock; - - ut_ad(table && trx); - ut_ad(mutex_own(&kernel_mutex)); - - if ((type_mode & LOCK_MODE_MASK) == LOCK_AUTO_INC) { - ++table->n_waiting_or_granted_auto_inc_locks; - } - - if (type_mode == LOCK_AUTO_INC) { - /* Only one trx can have the lock on the table - at a time: we may use the memory preallocated - to the table object */ - - lock = table->auto_inc_lock; - - ut_a(trx->auto_inc_lock == NULL); - trx->auto_inc_lock = lock; - } else { - lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t)); - } - - UT_LIST_ADD_LAST(trx_locks, trx->trx_locks, lock); - - lock->type_mode = type_mode | LOCK_TABLE; - lock->trx = trx; - - lock->un_member.tab_lock.table = table; - - UT_LIST_ADD_LAST(un_member.tab_lock.locks, table->locks, lock); - - if (type_mode & LOCK_WAIT) { - - lock_set_lock_and_trx_wait(lock, trx); - } - - return(lock); -} - -/***************************************************************** -Removes a table lock request from the queue and the trx list of locks; -this is a low-level function which does NOT check if waiting requests -can now be granted. */ -UNIV_INLINE -void -lock_table_remove_low( -/*==================*/ - lock_t* lock) /* in: table lock */ -{ - dict_table_t* table; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - - table = lock->un_member.tab_lock.table; - trx = lock->trx; - - if (lock == trx->auto_inc_lock) { - trx->auto_inc_lock = NULL; - - ut_a(table->n_waiting_or_granted_auto_inc_locks > 0); - --table->n_waiting_or_granted_auto_inc_locks; - } - - UT_LIST_REMOVE(trx_locks, trx->trx_locks, lock); - UT_LIST_REMOVE(un_member.tab_lock.locks, table->locks, lock); -} - -/************************************************************************* -Enqueues a waiting request for a table lock which cannot be granted -immediately. Checks for deadlocks. */ -static -ulint -lock_table_enqueue_waiting( -/*=======================*/ - /* out: DB_LOCK_WAIT, DB_DEADLOCK, or - DB_QUE_THR_SUSPENDED, or DB_SUCCESS; - DB_SUCCESS means that there was a deadlock, - but another transaction was chosen as a - victim, and we got the lock immediately: - no need to wait then */ - ulint mode, /* in: lock mode this transaction is - requesting */ - dict_table_t* table, /* in: table */ - que_thr_t* thr) /* in: query thread */ -{ - lock_t* lock; - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - - /* Test if there already is some other reason to suspend thread: - we do not enqueue a lock request if the query thread should be - stopped anyway */ - - if (que_thr_stop(thr)) { - ut_error; - - return(DB_QUE_THR_SUSPENDED); - } - - trx = thr_get_trx(thr); - - if (trx->dict_operation) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: a table lock wait happens" - " in a dictionary operation!\n" - "InnoDB: Table name ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(".\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", - stderr); - } - - /* Enqueue the lock request that will wait to be granted */ - - lock = lock_table_create(table, mode | LOCK_WAIT, trx); - - /* Check if a deadlock occurs: if yes, remove the lock request and - return an error code */ - - if (lock_deadlock_occurs(lock, trx)) { - - lock_reset_lock_and_trx_wait(lock); - lock_table_remove_low(lock); - - return(DB_DEADLOCK); - } - - if (trx->wait_lock == NULL) { - /* Deadlock resolution chose another transaction as a victim, - and we accidentally got our lock granted! */ - - return(DB_SUCCESS); - } - - trx->que_state = TRX_QUE_LOCK_WAIT; - trx->was_chosen_as_deadlock_victim = FALSE; - trx->wait_started = time(NULL); - - ut_a(que_thr_stop(thr)); - - return(DB_LOCK_WAIT); -} - -/************************************************************************* -Checks if other transactions have an incompatible mode lock request in -the lock queue. */ -UNIV_INLINE -ibool -lock_table_other_has_incompatible( -/*==============================*/ - trx_t* trx, /* in: transaction, or NULL if all - transactions should be included */ - ulint wait, /* in: LOCK_WAIT if also waiting locks are - taken into account, or 0 if not */ - dict_table_t* table, /* in: table */ - ulint mode) /* in: lock mode */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = UT_LIST_GET_LAST(table->locks); - - while (lock != NULL) { - - if ((lock->trx != trx) - && (!lock_mode_compatible(lock_get_mode(lock), mode)) - && (wait || !(lock_get_wait(lock)))) { - - return(TRUE); - } - - lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock); - } - - return(FALSE); -} - -/************************************************************************* -Locks the specified database table in the mode given. If the lock cannot -be granted immediately, the query thread is put to wait. */ - -ulint -lock_table( -/*=======*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - dict_table_t* table, /* in: database table in dictionary cache */ - ulint mode, /* in: lock mode */ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - ulint err; - - ut_ad(table && thr); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - ut_a(flags == 0); - - trx = thr_get_trx(thr); - - lock_mutex_enter_kernel(); - - /* Look for stronger locks the same trx already has on the table */ - - if (lock_table_has(trx, table, mode)) { - - lock_mutex_exit_kernel(); - - return(DB_SUCCESS); - } - - /* We have to check if the new lock is compatible with any locks - other transactions have in the table lock queue. */ - - if (lock_table_other_has_incompatible(trx, LOCK_WAIT, table, mode)) { - - /* Another trx has a request on the table in an incompatible - mode: this trx may have to wait */ - - err = lock_table_enqueue_waiting(mode | flags, table, thr); - - lock_mutex_exit_kernel(); - - return(err); - } - - lock_table_create(table, mode | flags, trx); - - ut_a(!flags || mode == LOCK_S || mode == LOCK_X); - - lock_mutex_exit_kernel(); - - return(DB_SUCCESS); -} - -/************************************************************************* -Checks if there are any locks set on the table. */ - -ibool -lock_is_on_table( -/*=============*/ - /* out: TRUE if there are lock(s) */ - dict_table_t* table) /* in: database table in dictionary cache */ -{ - ibool ret; - - ut_ad(table); - - lock_mutex_enter_kernel(); - - if (UT_LIST_GET_LAST(table->locks)) { - ret = TRUE; - } else { - ret = FALSE; - } - - lock_mutex_exit_kernel(); - - return(ret); -} - -/************************************************************************* -Checks if a waiting table lock request still has to wait in a queue. */ -static -ibool -lock_table_has_to_wait_in_queue( -/*============================*/ - /* out: TRUE if still has to wait */ - lock_t* wait_lock) /* in: waiting table lock */ -{ - dict_table_t* table; - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(lock_get_wait(wait_lock)); - - table = wait_lock->un_member.tab_lock.table; - - lock = UT_LIST_GET_FIRST(table->locks); - - while (lock != wait_lock) { - - if (lock_has_to_wait(wait_lock, lock)) { - - return(TRUE); - } - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); - } - - return(FALSE); -} - -/***************************************************************** -Removes a table lock request, waiting or granted, from the queue and grants -locks to other transactions in the queue, if they now are entitled to a -lock. */ -static -void -lock_table_dequeue( -/*===============*/ - lock_t* in_lock)/* in: table lock object; transactions waiting - behind will get their lock requests granted, if - they are now qualified to it */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - ut_a(lock_get_type(in_lock) == LOCK_TABLE); - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, in_lock); - - lock_table_remove_low(in_lock); - - /* Check if waiting locks in the queue can now be granted: grant - locks if there are no conflicting locks ahead. */ - - while (lock != NULL) { - - if (lock_get_wait(lock) - && !lock_table_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - lock_grant(lock); - } - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); - } -} - -/*=========================== LOCK RELEASE ==============================*/ - -/***************************************************************** -Removes a granted record lock of a transaction from the queue and grants -locks to other transactions waiting in the queue if they now are entitled -to a lock. */ - -void -lock_rec_unlock( -/*============*/ - trx_t* trx, /* in: transaction that has set a record - lock */ - rec_t* rec, /* in: record */ - ulint lock_mode) /* in: LOCK_S or LOCK_X */ -{ - lock_t* lock; - lock_t* release_lock = NULL; - ulint heap_no; - - ut_ad(trx && rec); - - mutex_enter(&kernel_mutex); - - heap_no = rec_get_heap_no(rec, page_rec_is_comp(rec)); - - lock = lock_rec_get_first(rec); - - /* Find the last lock with the same lock_mode and transaction - from the record. */ - - while (lock != NULL) { - if (lock->trx == trx && lock_get_mode(lock) == lock_mode) { - release_lock = lock; - ut_a(!lock_get_wait(lock)); - } - - lock = lock_rec_get_next(rec, lock); - } - - /* If a record lock is found, release the record lock */ - - if (UNIV_LIKELY(release_lock != NULL)) { - lock_rec_reset_nth_bit(release_lock, heap_no); - } else { - mutex_exit(&kernel_mutex); - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unlock row could not" - " find a %lu mode lock on the record\n", - (ulong) lock_mode); - - return; - } - - /* Check if we can now grant waiting lock requests */ - - lock = lock_rec_get_first(rec); - - while (lock != NULL) { - if (lock_get_wait(lock) - && !lock_rec_has_to_wait_in_queue(lock)) { - - /* Grant the lock */ - lock_grant(lock); - } - - lock = lock_rec_get_next(rec, lock); - } - - mutex_exit(&kernel_mutex); -} - -/************************************************************************* -Releases a table lock. -Releases possible other transactions waiting for this lock. */ - -void -lock_table_unlock( -/*==============*/ - lock_t* lock) /* in: lock */ -{ - mutex_enter(&kernel_mutex); - - lock_table_dequeue(lock); - - mutex_exit(&kernel_mutex); -} - -/************************************************************************* -Releases an auto-inc lock a transaction possibly has on a table. -Releases possible other transactions waiting for this lock. */ - -void -lock_table_unlock_auto_inc( -/*=======================*/ - trx_t* trx) /* in: transaction */ -{ - if (trx->auto_inc_lock) { - mutex_enter(&kernel_mutex); - - lock_table_dequeue(trx->auto_inc_lock); - - mutex_exit(&kernel_mutex); - } -} - -/************************************************************************* -Releases transaction locks, and releases possible other transactions waiting -because of these locks. */ - -void -lock_release_off_kernel( -/*====================*/ - trx_t* trx) /* in: transaction */ -{ - dict_table_t* table; - ulint count; - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = UT_LIST_GET_LAST(trx->trx_locks); - - count = 0; - - while (lock != NULL) { - - count++; - - if (lock_get_type(lock) == LOCK_REC) { - - lock_rec_dequeue_from_page(lock); - } else { - ut_ad(lock_get_type(lock) & LOCK_TABLE); - - if (lock_get_mode(lock) != LOCK_IS - && 0 != ut_dulint_cmp(trx->undo_no, - ut_dulint_zero)) { - - /* The trx may have modified the table. We - block the use of the MySQL query cache for - all currently active transactions. */ - - table = lock->un_member.tab_lock.table; - - table->query_cache_inv_trx_id - = trx_sys->max_trx_id; - } - - lock_table_dequeue(lock); - } - - if (count == LOCK_RELEASE_KERNEL_INTERVAL) { - /* Release the kernel mutex for a while, so that we - do not monopolize it */ - - lock_mutex_exit_kernel(); - - lock_mutex_enter_kernel(); - - count = 0; - } - - lock = UT_LIST_GET_LAST(trx->trx_locks); - } - - mem_heap_empty(trx->lock_heap); - - ut_a(trx->auto_inc_lock == NULL); -} - -/************************************************************************* -Cancels a waiting lock request and releases possible other transactions -waiting behind it. */ - -void -lock_cancel_waiting_and_release( -/*============================*/ - lock_t* lock) /* in: waiting lock request */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - if (lock_get_type(lock) == LOCK_REC) { - - lock_rec_dequeue_from_page(lock); - } else { - ut_ad(lock_get_type(lock) & LOCK_TABLE); - - lock_table_dequeue(lock); - } - - /* Reset the wait flag and the back pointer to lock in trx */ - - lock_reset_lock_and_trx_wait(lock); - - /* The following function releases the trx from lock wait */ - - trx_end_lock_wait(lock->trx); -} - -/* True if a lock mode is S or X */ -#define IS_LOCK_S_OR_X(lock) \ - (lock_get_mode(lock) == LOCK_S \ - || lock_get_mode(lock) == LOCK_X) - - -/************************************************************************* -Removes locks of a transaction on a table to be dropped. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock, that is going to be removed, is allowed to be a wait lock. */ -static -void -lock_remove_all_on_table_for_trx( -/*=============================*/ - dict_table_t* table, /* in: table to be dropped */ - trx_t* trx, /* in: a transaction */ - ibool remove_also_table_sx_locks)/* in: also removes - table S and X locks */ -{ - lock_t* lock; - lock_t* prev_lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = UT_LIST_GET_LAST(trx->trx_locks); - - while (lock != NULL) { - prev_lock = UT_LIST_GET_PREV(trx_locks, lock); - - if (lock_get_type(lock) == LOCK_REC - && lock->index->table == table) { - ut_a(!lock_get_wait(lock)); - - lock_rec_discard(lock); - } else if (lock_get_type(lock) & LOCK_TABLE - && lock->un_member.tab_lock.table == table - && (remove_also_table_sx_locks - || !IS_LOCK_S_OR_X(lock))) { - - ut_a(!lock_get_wait(lock)); - - lock_table_remove_low(lock); - } - - lock = prev_lock; - } -} - -/************************************************************************* -Removes locks on a table to be dropped or truncated. -If remove_also_table_sx_locks is TRUE then table-level S and X locks are -also removed in addition to other table-level and record-level locks. -No lock, that is going to be removed, is allowed to be a wait lock. */ - -void -lock_remove_all_on_table( -/*=====================*/ - dict_table_t* table, /* in: table to be dropped - or truncated */ - ibool remove_also_table_sx_locks)/* in: also removes - table S and X locks */ -{ - lock_t* lock; - lock_t* prev_lock; - - mutex_enter(&kernel_mutex); - - lock = UT_LIST_GET_FIRST(table->locks); - - while (lock != NULL) { - - prev_lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, - lock); - - /* If we should remove all locks (remove_also_table_sx_locks - is TRUE), or if the lock is not table-level S or X lock, - then check we are not going to remove a wait lock. */ - if (remove_also_table_sx_locks - || !(lock_get_type(lock) == LOCK_TABLE - && IS_LOCK_S_OR_X(lock))) { - - ut_a(!lock_get_wait(lock)); - } - - lock_remove_all_on_table_for_trx(table, lock->trx, - remove_also_table_sx_locks); - - if (prev_lock == NULL) { - if (lock == UT_LIST_GET_FIRST(table->locks)) { - /* lock was not removed, pick its successor */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, lock); - } else { - /* lock was removed, pick the first one */ - lock = UT_LIST_GET_FIRST(table->locks); - } - } else if (UT_LIST_GET_NEXT(un_member.tab_lock.locks, - prev_lock) != lock) { - /* If lock was removed by - lock_remove_all_on_table_for_trx() then pick the - successor of prev_lock ... */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, prev_lock); - } else { - /* ... otherwise pick the successor of lock. */ - lock = UT_LIST_GET_NEXT( - un_member.tab_lock.locks, lock); - } - } - - mutex_exit(&kernel_mutex); -} - -/*===================== VALIDATION AND DEBUGGING ====================*/ - -/************************************************************************* -Prints info of a table lock. */ - -void -lock_table_print( -/*=============*/ - FILE* file, /* in: file where to print */ - lock_t* lock) /* in: table type lock */ -{ - ut_ad(mutex_own(&kernel_mutex)); - ut_a(lock_get_type(lock) == LOCK_TABLE); - - fputs("TABLE LOCK table ", file); - ut_print_name(file, lock->trx, TRUE, - lock->un_member.tab_lock.table->name); - fprintf(file, " trx id %lu %lu", - (ulong) (lock->trx)->id.high, (ulong) (lock->trx)->id.low); - - if (lock_get_mode(lock) == LOCK_S) { - fputs(" lock mode S", file); - } else if (lock_get_mode(lock) == LOCK_X) { - fputs(" lock mode X", file); - } else if (lock_get_mode(lock) == LOCK_IS) { - fputs(" lock mode IS", file); - } else if (lock_get_mode(lock) == LOCK_IX) { - fputs(" lock mode IX", file); - } else if (lock_get_mode(lock) == LOCK_AUTO_INC) { - fputs(" lock mode AUTO-INC", file); - } else { - fprintf(file, " unknown lock mode %lu", - (ulong) lock_get_mode(lock)); - } - - if (lock_get_wait(lock)) { - fputs(" waiting", file); - } - - putc('\n', file); -} - -/************************************************************************* -Prints info of a record lock. */ - -void -lock_rec_print( -/*===========*/ - FILE* file, /* in: file where to print */ - lock_t* lock) /* in: record type lock */ -{ - page_t* page; - ulint space; - ulint page_no; - ulint i; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(mutex_own(&kernel_mutex)); - ut_a(lock_get_type(lock) == LOCK_REC); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - fprintf(file, "RECORD LOCKS space id %lu page no %lu n bits %lu ", - (ulong) space, (ulong) page_no, - (ulong) lock_rec_get_n_bits(lock)); - dict_index_name_print(file, lock->trx, lock->index); - fprintf(file, " trx id %lu %lu", - (ulong) (lock->trx)->id.high, - (ulong) (lock->trx)->id.low); - - if (lock_get_mode(lock) == LOCK_S) { - fputs(" lock mode S", file); - } else if (lock_get_mode(lock) == LOCK_X) { - fputs(" lock_mode X", file); - } else { - ut_error; - } - - if (lock_rec_get_gap(lock)) { - fputs(" locks gap before rec", file); - } - - if (lock_rec_get_rec_not_gap(lock)) { - fputs(" locks rec but not gap", file); - } - - if (lock_rec_get_insert_intention(lock)) { - fputs(" insert intention", file); - } - - if (lock_get_wait(lock)) { - fputs(" waiting", file); - } - - mtr_start(&mtr); - - putc('\n', file); - - /* If the page is not in the buffer pool, we cannot load it - because we have the kernel mutex and ibuf operations would - break the latching order */ - - page = buf_page_get_gen(space, page_no, RW_NO_LATCH, - NULL, BUF_GET_IF_IN_POOL, - __FILE__, __LINE__, &mtr); - if (page) { - page = buf_page_get_nowait(space, page_no, RW_S_LATCH, &mtr); - - if (!page) { - /* Let us try to get an X-latch. If the current thread - is holding an X-latch on the page, we cannot get an - S-latch. */ - - page = buf_page_get_nowait(space, page_no, RW_X_LATCH, - &mtr); - } - } - - if (page) { -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - } - - for (i = 0; i < lock_rec_get_n_bits(lock); i++) { - - if (lock_rec_get_nth_bit(lock, i)) { - - fprintf(file, "Record lock, heap no %lu ", (ulong) i); - - if (page) { - rec_t* rec - = page_find_rec_with_heap_no(page, i); - offsets = rec_get_offsets( - rec, lock->index, offsets, - ULINT_UNDEFINED, &heap); - rec_print_new(file, rec, offsets); - } - - putc('\n', file); - } - } - - mtr_commit(&mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -#ifndef UNIV_HOTBACKUP - -#ifdef UNIV_DEBUG -/* Print the number of lock structs from lock_print_info_summary() only -in non-production builds for performance reasons, see -http://bugs.mysql.com/36942 */ -#define PRINT_NUM_OF_LOCK_STRUCTS -#endif /* UNIV_DEBUG */ - -#ifdef PRINT_NUM_OF_LOCK_STRUCTS -/************************************************************************* -Calculates the number of record lock structs in the record lock hash table. */ -static -ulint -lock_get_n_rec_locks(void) -/*======================*/ -{ - lock_t* lock; - ulint n_locks = 0; - ulint i; - - ut_ad(mutex_own(&kernel_mutex)); - - for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - - lock = HASH_GET_FIRST(lock_sys->rec_hash, i); - - while (lock) { - n_locks++; - - lock = HASH_GET_NEXT(hash, lock); - } - } - - return(n_locks); -} -#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ - -/************************************************************************* -Prints info of locks for all transactions. */ - -void -lock_print_info_summary( -/*====================*/ - FILE* file) /* in: file where to print */ -{ - /* We must protect the MySQL thd->query field with a MySQL mutex, and - because the MySQL mutex must be reserved before the kernel_mutex of - InnoDB, we call innobase_mysql_prepare_print_arbitrary_thd() here. */ - - innobase_mysql_prepare_print_arbitrary_thd(); - lock_mutex_enter_kernel(); - - if (lock_deadlock_found) { - fputs("------------------------\n" - "LATEST DETECTED DEADLOCK\n" - "------------------------\n", file); - - ut_copy_file(file, lock_latest_err_file); - } - - fputs("------------\n" - "TRANSACTIONS\n" - "------------\n", file); - - fprintf(file, "Trx id counter %lu %lu\n", - (ulong) ut_dulint_get_high(trx_sys->max_trx_id), - (ulong) ut_dulint_get_low(trx_sys->max_trx_id)); - - fprintf(file, - "Purge done for trx's n:o < %lu %lu undo n:o < %lu %lu\n", - (ulong) ut_dulint_get_high(purge_sys->purge_trx_no), - (ulong) ut_dulint_get_low(purge_sys->purge_trx_no), - (ulong) ut_dulint_get_high(purge_sys->purge_undo_no), - (ulong) ut_dulint_get_low(purge_sys->purge_undo_no)); - - fprintf(file, - "History list length %lu\n", - (ulong) trx_sys->rseg_history_len); - -#ifdef PRINT_NUM_OF_LOCK_STRUCTS - fprintf(file, - "Total number of lock structs in row lock hash table %lu\n", - (ulong) lock_get_n_rec_locks()); -#endif /* PRINT_NUM_OF_LOCK_STRUCTS */ -} - -/************************************************************************* -Prints info of locks for each transaction. */ - -void -lock_print_info_all_transactions( -/*=============================*/ - FILE* file) /* in: file where to print */ -{ - lock_t* lock; - ulint space; - ulint page_no; - page_t* page; - ibool load_page_first = TRUE; - ulint nth_trx = 0; - ulint nth_lock = 0; - ulint i; - mtr_t mtr; - trx_t* trx; - - fprintf(file, "LIST OF TRANSACTIONS FOR EACH SESSION:\n"); - - /* First print info on non-active transactions */ - - trx = UT_LIST_GET_FIRST(trx_sys->mysql_trx_list); - - while (trx) { - if (trx->conc_state == TRX_NOT_STARTED) { - fputs("---", file); - trx_print(file, trx, 600); - } - - trx = UT_LIST_GET_NEXT(mysql_trx_list, trx); - } - -loop: - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - i = 0; - - /* Since we temporarily release the kernel mutex when - reading a database page in below, variable trx may be - obsolete now and we must loop through the trx list to - get probably the same trx, or some other trx. */ - - while (trx && (i < nth_trx)) { - trx = UT_LIST_GET_NEXT(trx_list, trx); - i++; - } - - if (trx == NULL) { - lock_mutex_exit_kernel(); - innobase_mysql_end_print_arbitrary_thd(); - - ut_ad(lock_validate()); - - return; - } - - if (nth_lock == 0) { - fputs("---", file); - trx_print(file, trx, 600); - - if (trx->read_view) { - fprintf(file, - "Trx read view will not see trx with" - " id >= %lu %lu, sees < %lu %lu\n", - (ulong) ut_dulint_get_high( - trx->read_view->low_limit_id), - (ulong) ut_dulint_get_low( - trx->read_view->low_limit_id), - (ulong) ut_dulint_get_high( - trx->read_view->up_limit_id), - (ulong) ut_dulint_get_low( - trx->read_view->up_limit_id)); - } - - if (trx->que_state == TRX_QUE_LOCK_WAIT) { - fprintf(file, - "------- TRX HAS BEEN WAITING %lu SEC" - " FOR THIS LOCK TO BE GRANTED:\n", - (ulong) difftime(time(NULL), - trx->wait_started)); - - if (lock_get_type(trx->wait_lock) == LOCK_REC) { - lock_rec_print(file, trx->wait_lock); - } else { - lock_table_print(file, trx->wait_lock); - } - - fputs("------------------\n", file); - } - } - - if (!srv_print_innodb_lock_monitor) { - nth_trx++; - goto loop; - } - - i = 0; - - /* Look at the note about the trx loop above why we loop here: - lock may be an obsolete pointer now. */ - - lock = UT_LIST_GET_FIRST(trx->trx_locks); - - while (lock && (i < nth_lock)) { - lock = UT_LIST_GET_NEXT(trx_locks, lock); - i++; - } - - if (lock == NULL) { - nth_trx++; - nth_lock = 0; - - goto loop; - } - - if (lock_get_type(lock) == LOCK_REC) { - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - if (load_page_first) { - lock_mutex_exit_kernel(); - innobase_mysql_end_print_arbitrary_thd(); - - mtr_start(&mtr); - - page = buf_page_get_with_no_latch( - space, page_no, &mtr); - - mtr_commit(&mtr); - - load_page_first = FALSE; - - innobase_mysql_prepare_print_arbitrary_thd(); - lock_mutex_enter_kernel(); - - goto loop; - } - - lock_rec_print(file, lock); - } else { - ut_ad(lock_get_type(lock) & LOCK_TABLE); - - lock_table_print(file, lock); - } - - load_page_first = TRUE; - - nth_lock++; - - if (nth_lock >= 10) { - fputs("10 LOCKS PRINTED FOR THIS TRX:" - " SUPPRESSING FURTHER PRINTS\n", - file); - - nth_trx++; - nth_lock = 0; - - goto loop; - } - - goto loop; -} - -/************************************************************************* -Validates the lock queue on a table. */ - -ibool -lock_table_queue_validate( -/*======================*/ - /* out: TRUE if ok */ - dict_table_t* table) /* in: table */ -{ - lock_t* lock; - - ut_ad(mutex_own(&kernel_mutex)); - - lock = UT_LIST_GET_FIRST(table->locks); - - while (lock) { - ut_a(((lock->trx)->conc_state == TRX_ACTIVE) - || ((lock->trx)->conc_state == TRX_PREPARED) - || ((lock->trx)->conc_state == TRX_COMMITTED_IN_MEMORY)); - - if (!lock_get_wait(lock)) { - - ut_a(!lock_table_other_has_incompatible( - lock->trx, 0, table, - lock_get_mode(lock))); - } else { - - ut_a(lock_table_has_to_wait_in_queue(lock)); - } - - lock = UT_LIST_GET_NEXT(un_member.tab_lock.locks, lock); - } - - return(TRUE); -} - -/************************************************************************* -Validates the lock queue on a single record. */ - -ibool -lock_rec_queue_validate( -/*====================*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: record to look at */ - dict_index_t* index, /* in: index, or NULL if not known */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - trx_t* impl_trx; - lock_t* lock; - - ut_a(rec); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); - - lock_mutex_enter_kernel(); - - if (!page_rec_is_user_rec(rec)) { - - lock = lock_rec_get_first(rec); - - while (lock) { - switch(lock->trx->conc_state) { - case TRX_ACTIVE: - case TRX_PREPARED: - case TRX_COMMITTED_IN_MEMORY: - break; - default: - ut_error; - } - - ut_a(trx_in_trx_list(lock->trx)); - - if (lock_get_wait(lock)) { - ut_a(lock_rec_has_to_wait_in_queue(lock)); - } - - if (index) { - ut_a(lock->index == index); - } - - lock = lock_rec_get_next(rec, lock); - } - - lock_mutex_exit_kernel(); - - return(TRUE); - } - - if (index && (index->type & DICT_CLUSTERED)) { - - impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); - - if (impl_trx && lock_rec_other_has_expl_req( - LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) { - - ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - rec, impl_trx)); - } - } - - if (index && !(index->type & DICT_CLUSTERED)) { - - /* The kernel mutex may get released temporarily in the - next function call: we have to release lock table mutex - to obey the latching order */ - - impl_trx = lock_sec_rec_some_has_impl_off_kernel( - rec, index, offsets); - - if (impl_trx && lock_rec_other_has_expl_req( - LOCK_S, 0, LOCK_WAIT, rec, impl_trx)) { - - ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, - rec, impl_trx)); - } - } - - lock = lock_rec_get_first(rec); - - while (lock) { - ut_a(lock->trx->conc_state == TRX_ACTIVE - || lock->trx->conc_state == TRX_PREPARED - || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY); - ut_a(trx_in_trx_list(lock->trx)); - - if (index) { - ut_a(lock->index == index); - } - - if (!lock_rec_get_gap(lock) && !lock_get_wait(lock)) { - - ulint mode; - - if (lock_get_mode(lock) == LOCK_S) { - mode = LOCK_X; - } else { - mode = LOCK_S; - } - ut_a(!lock_rec_other_has_expl_req( - mode, 0, 0, rec, lock->trx)); - - } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { - - ut_a(lock_rec_has_to_wait_in_queue(lock)); - } - - lock = lock_rec_get_next(rec, lock); - } - - lock_mutex_exit_kernel(); - - return(TRUE); -} - -/************************************************************************* -Validates the record lock queues on a page. */ - -ibool -lock_rec_validate_page( -/*===================*/ - /* out: TRUE if ok */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - dict_index_t* index; - page_t* page; - lock_t* lock; - rec_t* rec; - ulint nth_lock = 0; - ulint nth_bit = 0; - ulint i; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(!mutex_own(&kernel_mutex)); - - mtr_start(&mtr); - - page = buf_page_get(space, page_no, RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - lock_mutex_enter_kernel(); -loop: - lock = lock_rec_get_first_on_page_addr(space, page_no); - - if (!lock) { - goto function_exit; - } - - for (i = 0; i < nth_lock; i++) { - - lock = lock_rec_get_next_on_page(lock); - - if (!lock) { - goto function_exit; - } - } - - ut_a(trx_in_trx_list(lock->trx)); - ut_a(lock->trx->conc_state == TRX_ACTIVE - || lock->trx->conc_state == TRX_PREPARED - || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY); - - for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) { - - if (i == 1 || lock_rec_get_nth_bit(lock, i)) { - - index = lock->index; - rec = page_find_rec_with_heap_no(page, i); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - fprintf(stderr, - "Validating %lu %lu\n", - (ulong) space, (ulong) page_no); - - lock_mutex_exit_kernel(); - - lock_rec_queue_validate(rec, index, offsets); - - lock_mutex_enter_kernel(); - - nth_bit = i + 1; - - goto loop; - } - } - - nth_bit = 0; - nth_lock++; - - goto loop; - -function_exit: - lock_mutex_exit_kernel(); - - mtr_commit(&mtr); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(TRUE); -} - -/************************************************************************* -Validates the lock system. */ - -ibool -lock_validate(void) -/*===============*/ - /* out: TRUE if ok */ -{ - lock_t* lock; - trx_t* trx; - dulint limit; - ulint space; - ulint page_no; - ulint i; - - lock_mutex_enter_kernel(); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx) { - lock = UT_LIST_GET_FIRST(trx->trx_locks); - - while (lock) { - if (lock_get_type(lock) & LOCK_TABLE) { - - lock_table_queue_validate( - lock->un_member.tab_lock.table); - } - - lock = UT_LIST_GET_NEXT(trx_locks, lock); - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - for (i = 0; i < hash_get_n_cells(lock_sys->rec_hash); i++) { - - limit = ut_dulint_zero; - - for (;;) { - lock = HASH_GET_FIRST(lock_sys->rec_hash, i); - - while (lock) { - ut_a(trx_in_trx_list(lock->trx)); - - space = lock->un_member.rec_lock.space; - page_no = lock->un_member.rec_lock.page_no; - - if (ut_dulint_cmp( - ut_dulint_create(space, page_no), - limit) >= 0) { - break; - } - - lock = HASH_GET_NEXT(hash, lock); - } - - if (!lock) { - - break; - } - - lock_mutex_exit_kernel(); - - lock_rec_validate_page(space, page_no); - - lock_mutex_enter_kernel(); - - limit = ut_dulint_create(space, page_no + 1); - } - } - - lock_mutex_exit_kernel(); - - return(TRUE); -} -#endif /* !UNIV_HOTBACKUP */ -/*============ RECORD LOCK CHECKS FOR ROW OPERATIONS ====================*/ - -/************************************************************************* -Checks if locks of other transactions prevent an immediate insert of -a record. If they do, first tests if the query thread should anyway -be suspended for some reason; if not, then puts the transaction and -the query thread to the lock wait state and inserts a waiting request -for a gap x-lock to the lock queue. */ - -ulint -lock_rec_insert_check_and_lock( -/*===========================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record after which to insert */ - dict_index_t* index, /* in: index */ - que_thr_t* thr, /* in: query thread */ - ibool* inherit)/* out: set to TRUE if the new inserted - record maybe should inherit LOCK_GAP type - locks from the successor record */ -{ - rec_t* next_rec; - trx_t* trx; - lock_t* lock; - ulint err; - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - ut_ad(rec); - - trx = thr_get_trx(thr); - next_rec = page_rec_get_next(rec); - - *inherit = FALSE; - - lock_mutex_enter_kernel(); - - ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - - lock = lock_rec_get_first(next_rec); - - if (lock == NULL) { - /* We optimize CPU time usage in the simplest case */ - - lock_mutex_exit_kernel(); - - if (!(index->type & DICT_CLUSTERED)) { - - /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(rec), - thr_get_trx(thr)->id); - } - - return(DB_SUCCESS); - } - - *inherit = TRUE; - - /* If another transaction has an explicit lock request which locks - the gap, waiting or granted, on the successor, the insert has to wait. - - An exception is the case where the lock by the another transaction - is a gap type lock which it placed to wait for its turn to insert. We - do not consider that kind of a lock conflicting with our insert. This - eliminates an unnecessary deadlock which resulted when 2 transactions - had to wait for their insert. Both had waiting gap type lock requests - on the successor, which produced an unnecessary deadlock. */ - - if (lock_rec_other_has_conflicting( - LOCK_X | LOCK_GAP | LOCK_INSERT_INTENTION, next_rec, - trx)) { - - /* Note that we may get DB_SUCCESS also here! */ - err = lock_rec_enqueue_waiting(LOCK_X | LOCK_GAP - | LOCK_INSERT_INTENTION, - next_rec, index, thr); - } else { - err = DB_SUCCESS; - } - - lock_mutex_exit_kernel(); - - if (!(index->type & DICT_CLUSTERED) && (err == DB_SUCCESS)) { - - /* Update the page max trx id field */ - page_update_max_trx_id(buf_frame_align(rec), - thr_get_trx(thr)->id); - } - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(next_rec, index, offsets_, - ULINT_UNDEFINED, &heap); - ut_ad(lock_rec_queue_validate(next_rec, index, offsets)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - - return(err); -} - -/************************************************************************* -If a transaction has an implicit x-lock on a record, but no explicit x-lock -set on the record, sets one for it. NOTE that in the case of a secondary -index, the kernel mutex may get temporarily released. */ -static -void -lock_rec_convert_impl_to_expl( -/*==========================*/ - rec_t* rec, /* in: user record on page */ - dict_index_t* index, /* in: index of record */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - trx_t* impl_trx; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(page_rec_is_user_rec(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); - - if (index->type & DICT_CLUSTERED) { - impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); - } else { - impl_trx = lock_sec_rec_some_has_impl_off_kernel( - rec, index, offsets); - } - - if (impl_trx) { - /* If the transaction has no explicit x-lock set on the - record, set one for it */ - - if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)) { - - lock_rec_add_to_queue( - LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, - rec, index, impl_trx); - } - } -} - -/************************************************************************* -Checks if locks of other transactions prevent an immediate modify (update, -delete mark, or delete unmark) of a clustered index record. If they do, -first tests if the query thread should anyway be suspended for some -reason; if not, then puts the transaction and the query thread to the -lock wait state and inserts a waiting request for a record x-lock to the -lock queue. */ - -ulint -lock_clust_rec_modify_check_and_lock( -/*=================================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record which should be modified */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(index->type & DICT_CLUSTERED); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - lock_mutex_enter_kernel(); - - ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - - /* If a transaction has no explicit x-lock set on the record, set one - for it */ - - lock_rec_convert_impl_to_expl(rec, index, offsets); - - err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); - - lock_mutex_exit_kernel(); - - ut_ad(lock_rec_queue_validate(rec, index, offsets)); - - return(err); -} - -/************************************************************************* -Checks if locks of other transactions prevent an immediate modify (delete -mark or delete unmark) of a secondary index record. */ - -ulint -lock_sec_rec_modify_check_and_lock( -/*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: record which should be modified; - NOTE: as this is a secondary index, we - always have to modify the clustered index - record first: see the comment below */ - dict_index_t* index, /* in: secondary index */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - ut_ad(!(index->type & DICT_CLUSTERED)); - - /* Another transaction cannot have an implicit lock on the record, - because when we come here, we already have modified the clustered - index record, and this would not have been possible if another active - transaction had modified this secondary index record. */ - - lock_mutex_enter_kernel(); - - ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - - err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); - - lock_mutex_exit_kernel(); - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - ut_ad(lock_rec_queue_validate(rec, index, offsets)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - - if (err == DB_SUCCESS) { - /* Update the page max trx id field */ - - page_update_max_trx_id(buf_frame_align(rec), - thr_get_trx(thr)->id); - } - - return(err); -} - -/************************************************************************* -Like the counterpart for a clustered index below, but now we read a -secondary index record. */ - -ulint -lock_sec_rec_read_check_and_lock( -/*=============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: secondary index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(!(index->type & DICT_CLUSTERED)); - ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - lock_mutex_enter_kernel(); - - ut_ad(mode != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad(mode != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - - /* Some transaction may have an implicit x-lock on the record only - if the max trx id for the page >= min trx id for the trx list or a - database recovery is running. */ - - if (((ut_dulint_cmp(page_get_max_trx_id(buf_frame_align(rec)), - trx_list_get_min_trx_id()) >= 0) - || recv_recovery_is_on()) - && !page_rec_is_supremum(rec)) { - - lock_rec_convert_impl_to_expl(rec, index, offsets); - } - - err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); - - lock_mutex_exit_kernel(); - - ut_ad(lock_rec_queue_validate(rec, index, offsets)); - - return(err); -} - -/************************************************************************* -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. */ - -ulint -lock_clust_rec_read_check_and_lock( -/*===============================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); - ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP - || gap_mode == LOCK_REC_NOT_GAP); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (flags & BTR_NO_LOCKING_FLAG) { - - return(DB_SUCCESS); - } - - lock_mutex_enter_kernel(); - - ut_ad(mode != LOCK_X - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); - ut_ad(mode != LOCK_S - || lock_table_has(thr_get_trx(thr), index->table, LOCK_IS)); - - if (!page_rec_is_supremum(rec)) { - - lock_rec_convert_impl_to_expl(rec, index, offsets); - } - - err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); - - lock_mutex_exit_kernel(); - - ut_ad(lock_rec_queue_validate(rec, index, offsets)); - - return(err); -} -/************************************************************************* -Checks if locks of other transactions prevent an immediate read, or passing -over by a read cursor, of a clustered index record. If they do, first tests -if the query thread should anyway be suspended for some reason; if not, then -puts the transaction and the query thread to the lock wait state and inserts a -waiting request for a record lock to the lock queue. Sets the requested mode -lock on the record. This is an alternative version of -lock_clust_rec_read_check_and_lock() that does not require the parameter -"offsets". */ - -ulint -lock_clust_rec_read_check_and_lock_alt( -/*===================================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ - ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, - does nothing */ - rec_t* rec, /* in: user record or page supremum record - which should be read or passed over by a read - cursor */ - dict_index_t* index, /* in: clustered index */ - ulint mode, /* in: mode of the lock which the read cursor - should set on records: LOCK_S or LOCK_X; the - latter is possible in SELECT FOR UPDATE */ - ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP */ - que_thr_t* thr) /* in: query thread */ -{ - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ulint ret; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &tmp_heap); - ret = lock_clust_rec_read_check_and_lock(flags, rec, index, - offsets, mode, gap_mode, thr); - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - return(ret); -} - diff --git a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c deleted file mode 100644 index 7eb5e3db422..00000000000 --- a/storage/innobase/log/log0log.c +++ /dev/null @@ -1,3354 +0,0 @@ -/****************************************************** -Database log - -(c) 1995-1997 Innobase Oy - -Created 12/9/1995 Heikki Tuuri -*******************************************************/ - -#include "log0log.h" - -#ifdef UNIV_NONINL -#include "log0log.ic" -#endif - -#include "mem0mem.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "srv0srv.h" -#include "log0recv.h" -#include "fil0fil.h" -#include "dict0boot.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "trx0sys.h" -#include "trx0trx.h" - -/* -General philosophy of InnoDB redo-logs: - -1) Every change to a contents of a data page must be done -through mtr, which in mtr_commit() writes log records -to the InnoDB redo log. - -2) Normally these changes are performed using a mlog_write_ulint() -or similar function. - -3) In some page level operations only a code number of a -c-function and its parameters are written to the log to -reduce the size of the log. - - 3a) You should not add parameters to these kind of functions - (e.g. trx_undo_header_create(), trx_undo_insert_header_reuse()) - - 3b) You should not add such functionality which either change - working when compared with the old or are dependent on data - outside of the page. These kind of functions should implement - self-contained page transformation and it should be unchanged - if you don't have very essential reasons to change log - semantics or format. - -*/ - -/* Current free limit of space 0; protected by the log sys mutex; 0 means -uninitialized */ -ulint log_fsp_current_free_limit = 0; - -/* Global log system variable */ -log_t* log_sys = NULL; - -#ifdef UNIV_DEBUG -ibool log_do_write = TRUE; - -ibool log_debug_writes = FALSE; -#endif /* UNIV_DEBUG */ - -/* These control how often we print warnings if the last checkpoint is too -old */ -ibool log_has_printed_chkp_warning = FALSE; -time_t log_last_warning_time; - -#ifdef UNIV_LOG_ARCHIVE -/* Pointer to this variable is used as the i/o-message when we do i/o to an -archive */ -byte log_archive_io; -#endif /* UNIV_LOG_ARCHIVE */ - -/* A margin for free space in the log buffer before a log entry is catenated */ -#define LOG_BUF_WRITE_MARGIN (4 * OS_FILE_LOG_BLOCK_SIZE) - -/* Margins for free space in the log buffer after a log entry is catenated */ -#define LOG_BUF_FLUSH_RATIO 2 -#define LOG_BUF_FLUSH_MARGIN (LOG_BUF_WRITE_MARGIN + 4 * UNIV_PAGE_SIZE) - -/* Margin for the free space in the smallest log group, before a new query -step which modifies the database, is started */ - -#define LOG_CHECKPOINT_FREE_PER_THREAD (4 * UNIV_PAGE_SIZE) -#define LOG_CHECKPOINT_EXTRA_FREE (8 * UNIV_PAGE_SIZE) - -/* This parameter controls asynchronous making of a new checkpoint; the value -should be bigger than LOG_POOL_PREFLUSH_RATIO_SYNC */ - -#define LOG_POOL_CHECKPOINT_RATIO_ASYNC 32 - -/* This parameter controls synchronous preflushing of modified buffer pages */ -#define LOG_POOL_PREFLUSH_RATIO_SYNC 16 - -/* The same ratio for asynchronous preflushing; this value should be less than -the previous */ -#define LOG_POOL_PREFLUSH_RATIO_ASYNC 8 - -/* Extra margin, in addition to one log file, used in archiving */ -#define LOG_ARCHIVE_EXTRA_MARGIN (4 * UNIV_PAGE_SIZE) - -/* This parameter controls asynchronous writing to the archive */ -#define LOG_ARCHIVE_RATIO_ASYNC 16 - -/* Codes used in unlocking flush latches */ -#define LOG_UNLOCK_NONE_FLUSHED_LOCK 1 -#define LOG_UNLOCK_FLUSH_LOCK 2 - -/* States of an archiving operation */ -#define LOG_ARCHIVE_READ 1 -#define LOG_ARCHIVE_WRITE 2 - -/********************************************************** -Completes a checkpoint write i/o to a log file. */ -static -void -log_io_complete_checkpoint(void); -/*============================*/ -#ifdef UNIV_LOG_ARCHIVE -/********************************************************** -Completes an archiving i/o. */ -static -void -log_io_complete_archive(void); -/*=========================*/ -#endif /* UNIV_LOG_ARCHIVE */ - -/******************************************************************** -Sets the global variable log_fsp_current_free_limit. Also makes a checkpoint, -so that we know that the limit has been written to a log checkpoint field -on disk. */ - -void -log_fsp_current_free_limit_set_and_checkpoint( -/*==========================================*/ - ulint limit) /* in: limit to set */ -{ - ibool success; - - mutex_enter(&(log_sys->mutex)); - - log_fsp_current_free_limit = limit; - - mutex_exit(&(log_sys->mutex)); - - /* Try to make a synchronous checkpoint */ - - success = FALSE; - - while (!success) { - success = log_checkpoint(TRUE, TRUE); - } -} - -/******************************************************************** -Returns the oldest modified block lsn in the pool, or log_sys->lsn if none -exists. */ -static -dulint -log_buf_pool_get_oldest_modification(void) -/*======================================*/ -{ - dulint lsn; - - ut_ad(mutex_own(&(log_sys->mutex))); - - lsn = buf_pool_get_oldest_modification(); - - if (ut_dulint_is_zero(lsn)) { - - lsn = log_sys->lsn; - } - - return(lsn); -} - -/**************************************************************** -Opens the log for log_write_low. The log must be closed with log_close and -released with log_release. */ - -dulint -log_reserve_and_open( -/*=================*/ - /* out: start lsn of the log record */ - ulint len) /* in: length of data to be catenated */ -{ - log_t* log = log_sys; - ulint len_upper_limit; -#ifdef UNIV_LOG_ARCHIVE - ulint archived_lsn_age; - ulint dummy; -#endif /* UNIV_LOG_ARCHIVE */ -#ifdef UNIV_DEBUG - ulint count = 0; -#endif /* UNIV_DEBUG */ - - ut_a(len < log->buf_size / 2); -loop: - mutex_enter(&(log->mutex)); - - /* Calculate an upper limit for the space the string may take in the - log buffer */ - - len_upper_limit = LOG_BUF_WRITE_MARGIN + (5 * len) / 4; - - if (log->buf_free + len_upper_limit > log->buf_size) { - - mutex_exit(&(log->mutex)); - - /* Not enough free space, do a syncronous flush of the log - buffer */ - - log_buffer_flush_to_disk(); - - srv_log_waits++; - - ut_ad(++count < 50); - - goto loop; - } - -#ifdef UNIV_LOG_ARCHIVE - if (log->archiving_state != LOG_ARCH_OFF) { - - archived_lsn_age = ut_dulint_minus(log->lsn, - log->archived_lsn); - if (archived_lsn_age + len_upper_limit - > log->max_archived_lsn_age) { - /* Not enough free archived space in log groups: do a - synchronous archive write batch: */ - - mutex_exit(&(log->mutex)); - - ut_ad(len_upper_limit <= log->max_archived_lsn_age); - - log_archive_do(TRUE, &dummy); - - ut_ad(++count < 50); - - goto loop; - } - } -#endif /* UNIV_LOG_ARCHIVE */ - -#ifdef UNIV_LOG_DEBUG - log->old_buf_free = log->buf_free; - log->old_lsn = log->lsn; -#endif - return(log->lsn); -} - -/**************************************************************** -Writes to the log the string given. It is assumed that the caller holds the -log mutex. */ - -void -log_write_low( -/*==========*/ - byte* str, /* in: string */ - ulint str_len) /* in: string length */ -{ - log_t* log = log_sys; - ulint len; - ulint data_len; - byte* log_block; - - ut_ad(mutex_own(&(log->mutex))); -part_loop: - /* Calculate a part length */ - - data_len = (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) + str_len; - - if (data_len <= OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - - /* The string fits within the current log block */ - - len = str_len; - } else { - data_len = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; - - len = OS_FILE_LOG_BLOCK_SIZE - - (log->buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; - } - - ut_memcpy(log->buf + log->buf_free, str, len); - - str_len -= len; - str = str + len; - - log_block = ut_align_down(log->buf + log->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - log_block_set_data_len(log_block, data_len); - - if (data_len == OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - /* This block became full */ - log_block_set_data_len(log_block, OS_FILE_LOG_BLOCK_SIZE); - log_block_set_checkpoint_no(log_block, - log_sys->next_checkpoint_no); - len += LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE; - - log->lsn = ut_dulint_add(log->lsn, len); - - /* Initialize the next block header */ - log_block_init(log_block + OS_FILE_LOG_BLOCK_SIZE, log->lsn); - } else { - log->lsn = ut_dulint_add(log->lsn, len); - } - - log->buf_free += len; - - ut_ad(log->buf_free <= log->buf_size); - - if (str_len > 0) { - goto part_loop; - } - - srv_log_write_requests++; -} - -/**************************************************************** -Closes the log. */ - -dulint -log_close(void) -/*===========*/ - /* out: lsn */ -{ - byte* log_block; - ulint first_rec_group; - dulint oldest_lsn; - dulint lsn; - log_t* log = log_sys; - ulint checkpoint_age; - - ut_ad(mutex_own(&(log->mutex))); - - lsn = log->lsn; - - log_block = ut_align_down(log->buf + log->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - first_rec_group = log_block_get_first_rec_group(log_block); - - if (first_rec_group == 0) { - /* We initialized a new log block which was not written - full by the current mtr: the next mtr log record group - will start within this block at the offset data_len */ - - log_block_set_first_rec_group( - log_block, log_block_get_data_len(log_block)); - } - - if (log->buf_free > log->max_buf_free) { - - log->check_flush_or_checkpoint = TRUE; - } - - checkpoint_age = ut_dulint_minus(lsn, log->last_checkpoint_lsn); - - if (checkpoint_age >= log->log_group_capacity) { - /* TODO: split btr_store_big_rec_extern_fields() into small - steps so that we can release all latches in the middle, and - call log_free_check() to ensure we never write over log written - after the latest checkpoint. In principle, we should split all - big_rec operations, but other operations are smaller. */ - - if (!log_has_printed_chkp_warning - || difftime(time(NULL), log_last_warning_time) > 15) { - - log_has_printed_chkp_warning = TRUE; - log_last_warning_time = time(NULL); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: the age of the last" - " checkpoint is %lu,\n" - "InnoDB: which exceeds the log group" - " capacity %lu.\n" - "InnoDB: If you are using big" - " BLOB or TEXT rows, you must set the\n" - "InnoDB: combined size of log files" - " at least 10 times bigger than the\n" - "InnoDB: largest such row.\n", - (ulong) checkpoint_age, - (ulong) log->log_group_capacity); - } - } - - if (checkpoint_age <= log->max_modified_age_async) { - - goto function_exit; - } - - oldest_lsn = buf_pool_get_oldest_modification(); - - if (ut_dulint_is_zero(oldest_lsn) - || (ut_dulint_minus(lsn, oldest_lsn) - > log->max_modified_age_async) - || checkpoint_age > log->max_checkpoint_age_async) { - - log->check_flush_or_checkpoint = TRUE; - } -function_exit: - -#ifdef UNIV_LOG_DEBUG - log_check_log_recs(log->buf + log->old_buf_free, - log->buf_free - log->old_buf_free, log->old_lsn); -#endif - - return(lsn); -} - -#ifdef UNIV_LOG_ARCHIVE -/********************************************************** -Pads the current log block full with dummy log records. Used in producing -consistent archived log files. */ -static -void -log_pad_current_log_block(void) -/*===========================*/ -{ - byte b = MLOG_DUMMY_RECORD; - ulint pad_length; - ulint i; - dulint lsn; - - /* We retrieve lsn only because otherwise gcc crashed on HP-UX */ - lsn = log_reserve_and_open(OS_FILE_LOG_BLOCK_SIZE); - - pad_length = OS_FILE_LOG_BLOCK_SIZE - - (log_sys->buf_free % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_TRL_SIZE; - - for (i = 0; i < pad_length; i++) { - log_write_low(&b, 1); - } - - lsn = log_sys->lsn; - - log_close(); - log_release(); - - ut_a((ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) - == LOG_BLOCK_HDR_SIZE); -} -#endif /* UNIV_LOG_ARCHIVE */ - -/********************************************************** -Calculates the data capacity of a log group, when the log file headers are not -included. */ - -ulint -log_group_get_capacity( -/*===================*/ - /* out: capacity in bytes */ - log_group_t* group) /* in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return((group->file_size - LOG_FILE_HDR_SIZE) * group->n_files); -} - -/********************************************************** -Calculates the offset within a log group, when the log file headers are not -included. */ -UNIV_INLINE -ulint -log_group_calc_size_offset( -/*=======================*/ - /* out: size offset (<= offset) */ - ulint offset, /* in: real offset within the log group */ - log_group_t* group) /* in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return(offset - LOG_FILE_HDR_SIZE * (1 + offset / group->file_size)); -} - -/********************************************************** -Calculates the offset within a log group, when the log file headers are -included. */ -UNIV_INLINE -ulint -log_group_calc_real_offset( -/*=======================*/ - /* out: real offset (>= offset) */ - ulint offset, /* in: size offset within the log group */ - log_group_t* group) /* in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - return(offset + LOG_FILE_HDR_SIZE - * (1 + offset / (group->file_size - LOG_FILE_HDR_SIZE))); -} - -/********************************************************** -Calculates the offset of an lsn within a log group. */ -static -ulint -log_group_calc_lsn_offset( -/*======================*/ - /* out: offset within the log group */ - dulint lsn, /* in: lsn, must be within 4 GB of - group->lsn */ - log_group_t* group) /* in: log group */ -{ - dulint gr_lsn; - ib_longlong gr_lsn_size_offset; - ib_longlong difference; - ib_longlong group_size; - ib_longlong offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - - /* If total log file size is > 2 GB we can easily get overflows - with 32-bit integers. Use 64-bit integers instead. */ - - gr_lsn = group->lsn; - - gr_lsn_size_offset = (ib_longlong) - log_group_calc_size_offset(group->lsn_offset, group); - - group_size = (ib_longlong) log_group_get_capacity(group); - - if (ut_dulint_cmp(lsn, gr_lsn) >= 0) { - - difference = (ib_longlong) ut_dulint_minus(lsn, gr_lsn); - } else { - difference = (ib_longlong) ut_dulint_minus(gr_lsn, lsn); - - difference = difference % group_size; - - difference = group_size - difference; - } - - offset = (gr_lsn_size_offset + difference) % group_size; - - ut_a(offset < (((ib_longlong) 1) << 32)); /* offset must be < 4 GB */ - - /* fprintf(stderr, - "Offset is %lu gr_lsn_offset is %lu difference is %lu\n", - (ulint)offset,(ulint)gr_lsn_size_offset, (ulint)difference); - */ - - return(log_group_calc_real_offset((ulint)offset, group)); -} - -/*********************************************************************** -Calculates where in log files we find a specified lsn. */ - -ulint -log_calc_where_lsn_is( -/*==================*/ - /* out: log file number */ - ib_longlong* log_file_offset, /* out: offset in that file - (including the header) */ - dulint first_header_lsn, /* in: first log file start - lsn */ - dulint lsn, /* in: lsn whose position to - determine */ - ulint n_log_files, /* in: total number of log - files */ - ib_longlong log_file_size) /* in: log file size - (including the header) */ -{ - ib_longlong ib_lsn; - ib_longlong ib_first_header_lsn; - ib_longlong capacity = log_file_size - LOG_FILE_HDR_SIZE; - ulint file_no; - ib_longlong add_this_many; - - ib_lsn = ut_conv_dulint_to_longlong(lsn); - ib_first_header_lsn = ut_conv_dulint_to_longlong(first_header_lsn); - - if (ib_lsn < ib_first_header_lsn) { - add_this_many = 1 + (ib_first_header_lsn - ib_lsn) - / (capacity * (ib_longlong)n_log_files); - ib_lsn += add_this_many - * capacity * (ib_longlong)n_log_files; - } - - ut_a(ib_lsn >= ib_first_header_lsn); - - file_no = ((ulint)((ib_lsn - ib_first_header_lsn) / capacity)) - % n_log_files; - *log_file_offset = (ib_lsn - ib_first_header_lsn) % capacity; - - *log_file_offset = *log_file_offset + LOG_FILE_HDR_SIZE; - - return(file_no); -} - -/************************************************************ -Sets the field values in group to correspond to a given lsn. For this function -to work, the values must already be correctly initialized to correspond to -some lsn, for instance, a checkpoint lsn. */ - -void -log_group_set_fields( -/*=================*/ - log_group_t* group, /* in: group */ - dulint lsn) /* in: lsn for which the values should be - set */ -{ - group->lsn_offset = log_group_calc_lsn_offset(lsn, group); - group->lsn = lsn; -} - -/********************************************************************* -Calculates the recommended highest values for lsn - last_checkpoint_lsn, -lsn - buf_get_oldest_modification(), and lsn - max_archive_lsn_age. */ -static -ibool -log_calc_max_ages(void) -/*===================*/ - /* out: error value FALSE if the smallest log group is - too small to accommodate the number of OS threads in - the database server */ -{ - log_group_t* group; - ulint margin; - ulint free; - ibool success = TRUE; - ulint smallest_capacity; - ulint archive_margin; - ulint smallest_archive_margin; - - ut_ad(!mutex_own(&(log_sys->mutex))); - - mutex_enter(&(log_sys->mutex)); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - ut_ad(group); - - smallest_capacity = ULINT_MAX; - smallest_archive_margin = ULINT_MAX; - - while (group) { - if (log_group_get_capacity(group) < smallest_capacity) { - - smallest_capacity = log_group_get_capacity(group); - } - - archive_margin = log_group_get_capacity(group) - - (group->file_size - LOG_FILE_HDR_SIZE) - - LOG_ARCHIVE_EXTRA_MARGIN; - - if (archive_margin < smallest_archive_margin) { - - smallest_archive_margin = archive_margin; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - /* Add extra safety */ - smallest_capacity = smallest_capacity - smallest_capacity / 10; - - /* For each OS thread we must reserve so much free space in the - smallest log group that it can accommodate the log entries produced - by single query steps: running out of free log space is a serious - system error which requires rebooting the database. */ - - free = LOG_CHECKPOINT_FREE_PER_THREAD * (10 + srv_thread_concurrency) - + LOG_CHECKPOINT_EXTRA_FREE; - if (free >= smallest_capacity / 2) { - success = FALSE; - - goto failure; - } else { - margin = smallest_capacity - free; - } - - margin = ut_min(margin, log_sys->adm_checkpoint_interval); - - margin = margin - margin / 10; /* Add still some extra safety */ - - log_sys->log_group_capacity = smallest_capacity; - - log_sys->max_modified_age_async = margin - - margin / LOG_POOL_PREFLUSH_RATIO_ASYNC; - log_sys->max_modified_age_sync = margin - - margin / LOG_POOL_PREFLUSH_RATIO_SYNC; - - log_sys->max_checkpoint_age_async = margin - margin - / LOG_POOL_CHECKPOINT_RATIO_ASYNC; - log_sys->max_checkpoint_age = margin; - -#ifdef UNIV_LOG_ARCHIVE - log_sys->max_archived_lsn_age = smallest_archive_margin; - - log_sys->max_archived_lsn_age_async = smallest_archive_margin - - smallest_archive_margin / LOG_ARCHIVE_RATIO_ASYNC; -#endif /* UNIV_LOG_ARCHIVE */ -failure: - mutex_exit(&(log_sys->mutex)); - - if (!success) { - fprintf(stderr, - "InnoDB: Error: ib_logfiles are too small" - " for innodb_thread_concurrency %lu.\n" - "InnoDB: The combined size of ib_logfiles" - " should be bigger than\n" - "InnoDB: 200 kB * innodb_thread_concurrency.\n" - "InnoDB: To get mysqld to start up, set" - " innodb_thread_concurrency in my.cnf\n" - "InnoDB: to a lower value, for example, to 8." - " After an ERROR-FREE shutdown\n" - "InnoDB: of mysqld you can adjust the size of" - " ib_logfiles, as explained in\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "adding-and-removing.html\n" - "InnoDB: Cannot continue operation." - " Calling exit(1).\n", - (ulong)srv_thread_concurrency); - - exit(1); - } - - return(success); -} - -/********************************************************** -Initializes the log. */ - -void -log_init(void) -/*==========*/ -{ - byte* buf; - - log_sys = mem_alloc(sizeof(log_t)); - - mutex_create(&log_sys->mutex, SYNC_LOG); - - mutex_enter(&(log_sys->mutex)); - - /* Start the lsn from one log block from zero: this way every - log record has a start lsn != zero, a fact which we will use */ - - log_sys->lsn = LOG_START_LSN; - - ut_a(LOG_BUFFER_SIZE >= 16 * OS_FILE_LOG_BLOCK_SIZE); - ut_a(LOG_BUFFER_SIZE >= 4 * UNIV_PAGE_SIZE); - - buf = ut_malloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); - log_sys->buf = ut_align(buf, OS_FILE_LOG_BLOCK_SIZE); - - log_sys->buf_size = LOG_BUFFER_SIZE; - - memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); - - log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO - - LOG_BUF_FLUSH_MARGIN; - log_sys->check_flush_or_checkpoint = TRUE; - UT_LIST_INIT(log_sys->log_groups); - - log_sys->n_log_ios = 0; - - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = time(NULL); - /*----------------------------*/ - - log_sys->buf_next_to_write = 0; - - log_sys->write_lsn = ut_dulint_zero; - log_sys->current_flush_lsn = ut_dulint_zero; - log_sys->flushed_to_disk_lsn = ut_dulint_zero; - - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->n_pending_writes = 0; - - log_sys->no_flush_event = os_event_create(NULL); - - os_event_set(log_sys->no_flush_event); - - log_sys->one_flushed_event = os_event_create(NULL); - - os_event_set(log_sys->one_flushed_event); - - /*----------------------------*/ - log_sys->adm_checkpoint_interval = ULINT_MAX; - - log_sys->next_checkpoint_no = ut_dulint_zero; - log_sys->last_checkpoint_lsn = log_sys->lsn; - log_sys->n_pending_checkpoint_writes = 0; - - rw_lock_create(&log_sys->checkpoint_lock, SYNC_NO_ORDER_CHECK); - - log_sys->checkpoint_buf - = ut_align(mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), - OS_FILE_LOG_BLOCK_SIZE); - memset(log_sys->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); - /*----------------------------*/ - -#ifdef UNIV_LOG_ARCHIVE - /* Under MySQL, log archiving is always off */ - log_sys->archiving_state = LOG_ARCH_OFF; - log_sys->archived_lsn = log_sys->lsn; - log_sys->next_archived_lsn = ut_dulint_zero; - - log_sys->n_pending_archive_ios = 0; - - rw_lock_create(&log_sys->archive_lock, SYNC_NO_ORDER_CHECK); - - log_sys->archive_buf = NULL; - - /* ut_align( - ut_malloc(LOG_ARCHIVE_BUF_SIZE - + OS_FILE_LOG_BLOCK_SIZE), - OS_FILE_LOG_BLOCK_SIZE); */ - log_sys->archive_buf_size = 0; - - /* memset(log_sys->archive_buf, '\0', LOG_ARCHIVE_BUF_SIZE); */ - - log_sys->archiving_on = os_event_create(NULL); -#endif /* UNIV_LOG_ARCHIVE */ - - /*----------------------------*/ - - log_block_init(log_sys->buf, log_sys->lsn); - log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); - - log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn = ut_dulint_add(LOG_START_LSN, LOG_BLOCK_HDR_SIZE); - - mutex_exit(&(log_sys->mutex)); - -#ifdef UNIV_LOG_DEBUG - recv_sys_create(); - recv_sys_init(FALSE, buf_pool_get_curr_size()); - - recv_sys->parse_start_lsn = log_sys->lsn; - recv_sys->scanned_lsn = log_sys->lsn; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = log_sys->lsn; - recv_sys->limit_lsn = ut_dulint_max; -#endif -} - -/********************************************************************** -Inits a log group to the log system. */ - -void -log_group_init( -/*===========*/ - ulint id, /* in: group id */ - ulint n_files, /* in: number of log files */ - ulint file_size, /* in: log file size in bytes */ - ulint space_id, /* in: space id of the file space - which contains the log files of this - group */ - ulint archive_space_id __attribute__((unused))) - /* in: space id of the file space - which contains some archived log - files for this group; currently, only - for the first log group this is - used */ -{ - ulint i; - - log_group_t* group; - - group = mem_alloc(sizeof(log_group_t)); - - group->id = id; - group->n_files = n_files; - group->file_size = file_size; - group->space_id = space_id; - group->state = LOG_GROUP_OK; - group->lsn = LOG_START_LSN; - group->lsn_offset = LOG_FILE_HDR_SIZE; - group->n_pending_writes = 0; - - group->file_header_bufs = mem_alloc(sizeof(byte*) * n_files); -#ifdef UNIV_LOG_ARCHIVE - group->archive_file_header_bufs = mem_alloc(sizeof(byte*) * n_files); -#endif /* UNIV_LOG_ARCHIVE */ - - for (i = 0; i < n_files; i++) { - *(group->file_header_bufs + i) = ut_align( - mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), - OS_FILE_LOG_BLOCK_SIZE); - - memset(*(group->file_header_bufs + i), '\0', - LOG_FILE_HDR_SIZE); - -#ifdef UNIV_LOG_ARCHIVE - *(group->archive_file_header_bufs + i) = ut_align( - mem_alloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE), - OS_FILE_LOG_BLOCK_SIZE); - memset(*(group->archive_file_header_bufs + i), '\0', - LOG_FILE_HDR_SIZE); -#endif /* UNIV_LOG_ARCHIVE */ - } - -#ifdef UNIV_LOG_ARCHIVE - group->archive_space_id = archive_space_id; - - group->archived_file_no = 0; - group->archived_offset = 0; -#endif /* UNIV_LOG_ARCHIVE */ - - group->checkpoint_buf = ut_align( - mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE), OS_FILE_LOG_BLOCK_SIZE); - - memset(group->checkpoint_buf, '\0', OS_FILE_LOG_BLOCK_SIZE); - - UT_LIST_ADD_LAST(log_groups, log_sys->log_groups, group); - - ut_a(log_calc_max_ages()); -} - -/********************************************************************** -Does the unlockings needed in flush i/o completion. */ -UNIV_INLINE -void -log_flush_do_unlocks( -/*=================*/ - ulint code) /* in: any ORed combination of LOG_UNLOCK_FLUSH_LOCK - and LOG_UNLOCK_NONE_FLUSHED_LOCK */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - /* NOTE that we must own the log mutex when doing the setting of the - events: this is because transactions will wait for these events to - be set, and at that moment the log flush they were waiting for must - have ended. If the log mutex were not reserved here, the i/o-thread - calling this function might be preempted for a while, and when it - resumed execution, it might be that a new flush had been started, and - this function would erroneously signal the NEW flush as completed. - Thus, the changes in the state of these events are performed - atomically in conjunction with the changes in the state of - log_sys->n_pending_writes etc. */ - - if (code & LOG_UNLOCK_NONE_FLUSHED_LOCK) { - os_event_set(log_sys->one_flushed_event); - } - - if (code & LOG_UNLOCK_FLUSH_LOCK) { - os_event_set(log_sys->no_flush_event); - } -} - -/********************************************************************** -Checks if a flush is completed for a log group and does the completion -routine if yes. */ -UNIV_INLINE -ulint -log_group_check_flush_completion( -/*=============================*/ - /* out: LOG_UNLOCK_NONE_FLUSHED_LOCK or 0 */ - log_group_t* group) /* in: log group */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - if (!log_sys->one_flushed && group->n_pending_writes == 0) { -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Log flushed first to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - log_sys->written_to_some_lsn = log_sys->write_lsn; - log_sys->one_flushed = TRUE; - - return(LOG_UNLOCK_NONE_FLUSHED_LOCK); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes && (group->n_pending_writes == 0)) { - - fprintf(stderr, "Log flushed to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - return(0); -} - -/********************************************************** -Checks if a flush is completed and does the completion routine if yes. */ -static -ulint -log_sys_check_flush_completion(void) -/*================================*/ - /* out: LOG_UNLOCK_FLUSH_LOCK or 0 */ -{ - ulint move_start; - ulint move_end; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->n_pending_writes == 0) { - - log_sys->written_to_all_lsn = log_sys->write_lsn; - log_sys->buf_next_to_write = log_sys->write_end_offset; - - if (log_sys->write_end_offset > log_sys->max_buf_free / 2) { - /* Move the log buffer content to the start of the - buffer */ - - move_start = ut_calc_align_down( - log_sys->write_end_offset, - OS_FILE_LOG_BLOCK_SIZE); - move_end = ut_calc_align(log_sys->buf_free, - OS_FILE_LOG_BLOCK_SIZE); - - ut_memmove(log_sys->buf, log_sys->buf + move_start, - move_end - move_start); - log_sys->buf_free -= move_start; - - log_sys->buf_next_to_write -= move_start; - } - - return(LOG_UNLOCK_FLUSH_LOCK); - } - - return(0); -} - -/********************************************************** -Completes an i/o to a log file. */ - -void -log_io_complete( -/*============*/ - log_group_t* group) /* in: log group or a dummy pointer */ -{ - ulint unlock; - -#ifdef UNIV_LOG_ARCHIVE - if ((byte*)group == &log_archive_io) { - /* It was an archive write */ - - log_io_complete_archive(); - - return; - } -#endif /* UNIV_LOG_ARCHIVE */ - - if ((ulint)group & 0x1UL) { - /* It was a checkpoint write */ - group = (log_group_t*)((ulint)group - 1); - - if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - - fil_flush(group->space_id); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Checkpoint info written to group %lu\n", - group->id); - } -#endif /* UNIV_DEBUG */ - log_io_complete_checkpoint(); - - return; - } - - ut_error; /* We currently use synchronous writing of the - logs and cannot end up here! */ - - if (srv_unix_file_flush_method != SRV_UNIX_O_DSYNC - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC - && srv_flush_log_at_trx_commit != 2) { - - fil_flush(group->space_id); - } - - mutex_enter(&(log_sys->mutex)); - - ut_a(group->n_pending_writes > 0); - ut_a(log_sys->n_pending_writes > 0); - - group->n_pending_writes--; - log_sys->n_pending_writes--; - - unlock = log_group_check_flush_completion(group); - unlock = unlock | log_sys_check_flush_completion(); - - log_flush_do_unlocks(unlock); - - mutex_exit(&(log_sys->mutex)); -} - -/********************************************************** -Writes a log file header to a log file space. */ -static -void -log_group_file_header_flush( -/*========================*/ - log_group_t* group, /* in: log group */ - ulint nth_file, /* in: header to the nth file in the - log file space */ - dulint start_lsn) /* in: log file data starts at this - lsn */ -{ - byte* buf; - ulint dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_a(nth_file < group->n_files); - - buf = *(group->file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_GROUP_ID, group->id); - mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); - - /* Wipe over possible label of ibbackup --restore */ - memcpy(buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, " ", 4); - - dest_offset = nth_file * group->file_size; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Writing log file header to group %lu file %lu\n", - (ulong) group->id, (ulong) nth_file); - } -#endif /* UNIV_DEBUG */ - if (log_do_write) { - log_sys->n_log_ios++; - - srv_os_log_pending_writes++; - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, - dest_offset / UNIV_PAGE_SIZE, - dest_offset % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, - buf, group); - - srv_os_log_pending_writes--; - } -} - -/********************************************************** -Stores a 4-byte checksum to the trailer checksum field of a log block -before writing it to a log file. This checksum is used in recovery to -check the consistency of a log block. */ -static -void -log_block_store_checksum( -/*=====================*/ - byte* block) /* in/out: pointer to a log block */ -{ - log_block_set_checksum(block, log_block_calc_checksum(block)); -} - -/********************************************************** -Writes a buffer to a log file group. */ - -void -log_group_write_buf( -/*================*/ - log_group_t* group, /* in: log group */ - byte* buf, /* in: buffer */ - ulint len, /* in: buffer len; must be divisible - by OS_FILE_LOG_BLOCK_SIZE */ - dulint start_lsn, /* in: start lsn of the buffer; must - be divisible by - OS_FILE_LOG_BLOCK_SIZE */ - ulint new_data_offset)/* in: start offset of new data in - buf: this parameter is used to decide - if we have to write a new log file - header */ -{ - ulint write_len; - ibool write_header; - ulint next_offset; - ulint i; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_a(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); - - if (new_data_offset == 0) { - write_header = TRUE; - } else { - write_header = FALSE; - } -loop: - if (len == 0) { - - return; - } - - next_offset = log_group_calc_lsn_offset(start_lsn, group); - - if ((next_offset % group->file_size == LOG_FILE_HDR_SIZE) - && write_header) { - /* We start to write a new log file instance in the group */ - - log_group_file_header_flush(group, - next_offset / group->file_size, - start_lsn); - srv_os_log_written+= OS_FILE_LOG_BLOCK_SIZE; - srv_log_writes++; - } - - if ((next_offset % group->file_size) + len > group->file_size) { - - write_len = group->file_size - - (next_offset % group->file_size); - } else { - write_len = len; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - - fprintf(stderr, - "Writing log file segment to group %lu" - " offset %lu len %lu\n" - "start lsn %lu %lu\n" - "First block n:o %lu last block n:o %lu\n", - (ulong) group->id, (ulong) next_offset, - (ulong) write_len, - (ulong) ut_dulint_get_high(start_lsn), - (ulong) ut_dulint_get_low(start_lsn), - (ulong) log_block_get_hdr_no(buf), - (ulong) log_block_get_hdr_no( - buf + write_len - OS_FILE_LOG_BLOCK_SIZE)); - ut_a(log_block_get_hdr_no(buf) - == log_block_convert_lsn_to_no(start_lsn)); - - for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { - - ut_a(log_block_get_hdr_no(buf) + i - == log_block_get_hdr_no( - buf + i * OS_FILE_LOG_BLOCK_SIZE)); - } - } -#endif /* UNIV_DEBUG */ - /* Calculate the checksums for each log block and write them to - the trailer fields of the log blocks */ - - for (i = 0; i < write_len / OS_FILE_LOG_BLOCK_SIZE; i++) { - log_block_store_checksum(buf + i * OS_FILE_LOG_BLOCK_SIZE); - } - - if (log_do_write) { - log_sys->n_log_ios++; - - srv_os_log_pending_writes++; - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->space_id, - next_offset / UNIV_PAGE_SIZE, - next_offset % UNIV_PAGE_SIZE, write_len, buf, group); - - srv_os_log_pending_writes--; - - srv_os_log_written+= write_len; - srv_log_writes++; - } - - if (write_len < len) { - start_lsn = ut_dulint_add(start_lsn, write_len); - len -= write_len; - buf += write_len; - - write_header = TRUE; - - goto loop; - } -} - -/********************************************************** -This function is called, e.g., when a transaction wants to commit. It checks -that the log has been written to the log file up to the last log entry written -by the transaction. If there is a flush running, it waits and checks if the -flush flushed enough. If not, starts a new flush. */ - -void -log_write_up_to( -/*============*/ - dulint lsn, /* in: log sequence number up to which the log should - be written, ut_dulint_max if not specified */ - ulint wait, /* in: LOG_NO_WAIT, LOG_WAIT_ONE_GROUP, - or LOG_WAIT_ALL_GROUPS */ - ibool flush_to_disk) - /* in: TRUE if we want the written log also to be - flushed to disk */ -{ - log_group_t* group; - ulint start_offset; - ulint end_offset; - ulint area_start; - ulint area_end; -#ifdef UNIV_DEBUG - ulint loop_count = 0; -#endif /* UNIV_DEBUG */ - ulint unlock; - - if (recv_no_ibuf_operations) { - /* Recovery is running and no operations on the log files are - allowed yet (the variable name .._no_ibuf_.. is misleading) */ - - return; - } - -loop: -#ifdef UNIV_DEBUG - loop_count++; - - ut_ad(loop_count < 5); - -# if 0 - if (loop_count > 2) { - fprintf(stderr, "Log loop count %lu\n", loop_count); - } -# endif -#endif - - mutex_enter(&(log_sys->mutex)); - - if (flush_to_disk - && ut_dulint_cmp(log_sys->flushed_to_disk_lsn, lsn) >= 0) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - if (!flush_to_disk - && (ut_dulint_cmp(log_sys->written_to_all_lsn, lsn) >= 0 - || (ut_dulint_cmp(log_sys->written_to_some_lsn, lsn) - >= 0 - && wait != LOG_WAIT_ALL_GROUPS))) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - if (log_sys->n_pending_writes > 0) { - /* A write (+ possibly flush to disk) is running */ - - if (flush_to_disk - && ut_dulint_cmp(log_sys->current_flush_lsn, lsn) - >= 0) { - /* The write + flush will write enough: wait for it to - complete */ - - goto do_waits; - } - - if (!flush_to_disk - && ut_dulint_cmp(log_sys->write_lsn, lsn) >= 0) { - /* The write will write enough: wait for it to - complete */ - - goto do_waits; - } - - mutex_exit(&(log_sys->mutex)); - - /* Wait for the write to complete and try to start a new - write */ - - os_event_wait(log_sys->no_flush_event); - - goto loop; - } - - if (!flush_to_disk - && log_sys->buf_free == log_sys->buf_next_to_write) { - /* Nothing to write and no flush to disk requested */ - - mutex_exit(&(log_sys->mutex)); - - return; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Writing log from %lu %lu up to lsn %lu %lu\n", - (ulong) ut_dulint_get_high( - log_sys->written_to_all_lsn), - (ulong) ut_dulint_get_low( - log_sys->written_to_all_lsn), - (ulong) ut_dulint_get_high(log_sys->lsn), - (ulong) ut_dulint_get_low(log_sys->lsn)); - } -#endif /* UNIV_DEBUG */ - log_sys->n_pending_writes++; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - group->n_pending_writes++; /* We assume here that we have only - one log group! */ - - os_event_reset(log_sys->no_flush_event); - os_event_reset(log_sys->one_flushed_event); - - start_offset = log_sys->buf_next_to_write; - end_offset = log_sys->buf_free; - - area_start = ut_calc_align_down(start_offset, OS_FILE_LOG_BLOCK_SIZE); - area_end = ut_calc_align(end_offset, OS_FILE_LOG_BLOCK_SIZE); - - ut_ad(area_end - area_start > 0); - - log_sys->write_lsn = log_sys->lsn; - - if (flush_to_disk) { - log_sys->current_flush_lsn = log_sys->lsn; - } - - log_sys->one_flushed = FALSE; - - log_block_set_flush_bit(log_sys->buf + area_start, TRUE); - log_block_set_checkpoint_no( - log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, - log_sys->next_checkpoint_no); - - /* Copy the last, incompletely written, log block a log block length - up, so that when the flush operation writes from the log buffer, the - segment to write will not be changed by writers to the log */ - - ut_memcpy(log_sys->buf + area_end, - log_sys->buf + area_end - OS_FILE_LOG_BLOCK_SIZE, - OS_FILE_LOG_BLOCK_SIZE); - - log_sys->buf_free += OS_FILE_LOG_BLOCK_SIZE; - log_sys->write_end_offset = log_sys->buf_free; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - /* Do the write to the log files */ - - while (group) { - log_group_write_buf( - group, log_sys->buf + area_start, - area_end - area_start, - ut_dulint_align_down(log_sys->written_to_all_lsn, - OS_FILE_LOG_BLOCK_SIZE), - start_offset - area_start); - - log_group_set_fields(group, log_sys->write_lsn); - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - mutex_exit(&(log_sys->mutex)); - - if (srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { - /* O_DSYNC means the OS did not buffer the log file at all: - so we have also flushed to disk what we have written */ - - log_sys->flushed_to_disk_lsn = log_sys->write_lsn; - - } else if (flush_to_disk) { - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - fil_flush(group->space_id); - log_sys->flushed_to_disk_lsn = log_sys->write_lsn; - } - - mutex_enter(&(log_sys->mutex)); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - ut_a(group->n_pending_writes == 1); - ut_a(log_sys->n_pending_writes == 1); - - group->n_pending_writes--; - log_sys->n_pending_writes--; - - unlock = log_group_check_flush_completion(group); - unlock = unlock | log_sys_check_flush_completion(); - - log_flush_do_unlocks(unlock); - - mutex_exit(&(log_sys->mutex)); - - return; - -do_waits: - mutex_exit(&(log_sys->mutex)); - - if (wait == LOG_WAIT_ONE_GROUP) { - os_event_wait(log_sys->one_flushed_event); - } else if (wait == LOG_WAIT_ALL_GROUPS) { - os_event_wait(log_sys->no_flush_event); - } else { - ut_ad(wait == LOG_NO_WAIT); - } -} - -/******************************************************************** -Does a syncronous flush of the log buffer to disk. */ - -void -log_buffer_flush_to_disk(void) -/*==========================*/ -{ - dulint lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, TRUE); -} - -/******************************************************************** -Flush the log buffer. Force it to disk depending on the value of -innodb_flush_log_at_trx_commit. */ - -void -log_buffer_flush_maybe_sync(void) -/*==========================*/ -{ - dulint lsn; - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - /* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */ - log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS, - srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE); -} -/******************************************************************** -Tries to establish a big enough margin of free space in the log buffer, such -that a new log entry can be catenated without an immediate need for a flush. */ -static -void -log_flush_margin(void) -/*==================*/ -{ - ibool do_flush = FALSE; - log_t* log = log_sys; - dulint lsn; - - mutex_enter(&(log->mutex)); - - if (log->buf_free > log->max_buf_free) { - - if (log->n_pending_writes > 0) { - /* A flush is running: hope that it will provide enough - free space */ - } else { - do_flush = TRUE; - lsn = log->lsn; - } - } - - mutex_exit(&(log->mutex)); - - if (do_flush) { - log_write_up_to(lsn, LOG_NO_WAIT, FALSE); - } -} - -/******************************************************************** -Advances the smallest lsn for which there are unflushed dirty blocks in the -buffer pool. NOTE: this function may only be called if the calling thread owns -no synchronization objects! */ - -ibool -log_preflush_pool_modified_pages( -/*=============================*/ - /* out: FALSE if there was a flush batch of - the same type running, which means that we - could not start this flush batch */ - dulint new_oldest, /* in: try to advance oldest_modified_lsn - at least to this lsn */ - ibool sync) /* in: TRUE if synchronous operation is - desired */ -{ - ulint n_pages; - - if (recv_recovery_on) { - /* If the recovery is running, we must first apply all - log records to their respective file pages to get the - right modify lsn values to these pages: otherwise, there - might be pages on disk which are not yet recovered to the - current lsn, and even after calling this function, we could - not know how up-to-date the disk version of the database is, - and we could not make a new checkpoint on the basis of the - info on the buffer pool only. */ - - recv_apply_hashed_log_recs(TRUE); - } - - n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, new_oldest); - - if (sync) { - buf_flush_wait_batch_end(BUF_FLUSH_LIST); - } - - if (n_pages == ULINT_UNDEFINED) { - - return(FALSE); - } - - return(TRUE); -} - -/********************************************************** -Completes a checkpoint. */ -static -void -log_complete_checkpoint(void) -/*=========================*/ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(log_sys->n_pending_checkpoint_writes == 0); - - log_sys->next_checkpoint_no - = ut_dulint_add(log_sys->next_checkpoint_no, 1); - - log_sys->last_checkpoint_lsn = log_sys->next_checkpoint_lsn; - - rw_lock_x_unlock_gen(&(log_sys->checkpoint_lock), LOG_CHECKPOINT); -} - -/********************************************************** -Completes an asynchronous checkpoint info write i/o to a log file. */ -static -void -log_io_complete_checkpoint(void) -/*============================*/ -{ - mutex_enter(&(log_sys->mutex)); - - ut_ad(log_sys->n_pending_checkpoint_writes > 0); - - log_sys->n_pending_checkpoint_writes--; - - if (log_sys->n_pending_checkpoint_writes == 0) { - log_complete_checkpoint(); - } - - mutex_exit(&(log_sys->mutex)); -} - -/*********************************************************************** -Writes info to a checkpoint about a log group. */ -static -void -log_checkpoint_set_nth_group_info( -/*==============================*/ - byte* buf, /* in: buffer for checkpoint info */ - ulint n, /* in: nth slot */ - ulint file_no,/* in: archived file number */ - ulint offset) /* in: archived file offset */ -{ - ut_ad(n < LOG_MAX_N_GROUPS); - - mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY - + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO, file_no); - mach_write_to_4(buf + LOG_CHECKPOINT_GROUP_ARRAY - + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET, offset); -} - -/*********************************************************************** -Gets info from a checkpoint about a log group. */ - -void -log_checkpoint_get_nth_group_info( -/*==============================*/ - byte* buf, /* in: buffer containing checkpoint info */ - ulint n, /* in: nth slot */ - ulint* file_no,/* out: archived file number */ - ulint* offset) /* out: archived file offset */ -{ - ut_ad(n < LOG_MAX_N_GROUPS); - - *file_no = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY - + 8 * n + LOG_CHECKPOINT_ARCHIVED_FILE_NO); - *offset = mach_read_from_4(buf + LOG_CHECKPOINT_GROUP_ARRAY - + 8 * n + LOG_CHECKPOINT_ARCHIVED_OFFSET); -} - -/********************************************************** -Writes the checkpoint info to a log group header. */ -static -void -log_group_checkpoint( -/*=================*/ - log_group_t* group) /* in: log group */ -{ - log_group_t* group2; -#ifdef UNIV_LOG_ARCHIVE - dulint archived_lsn; - dulint next_archived_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - ulint write_offset; - ulint fold; - byte* buf; - ulint i; - - ut_ad(mutex_own(&(log_sys->mutex))); -#if LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE -# error "LOG_CHECKPOINT_SIZE > OS_FILE_LOG_BLOCK_SIZE" -#endif - - buf = group->checkpoint_buf; - - mach_write_to_8(buf + LOG_CHECKPOINT_NO, log_sys->next_checkpoint_no); - mach_write_to_8(buf + LOG_CHECKPOINT_LSN, - log_sys->next_checkpoint_lsn); - - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET, - log_group_calc_lsn_offset( - log_sys->next_checkpoint_lsn, group)); - - mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, log_sys->buf_size); - -#ifdef UNIV_LOG_ARCHIVE - if (log_sys->archiving_state == LOG_ARCH_OFF) { - archived_lsn = ut_dulint_max; - } else { - archived_lsn = log_sys->archived_lsn; - - if (0 != ut_dulint_cmp(archived_lsn, - log_sys->next_archived_lsn)) { - next_archived_lsn = log_sys->next_archived_lsn; - /* For debugging only */ - } - } - - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, archived_lsn); -#else /* UNIV_LOG_ARCHIVE */ - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max); -#endif /* UNIV_LOG_ARCHIVE */ - - for (i = 0; i < LOG_MAX_N_GROUPS; i++) { - log_checkpoint_set_nth_group_info(buf, i, 0, 0); - } - - group2 = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group2) { - log_checkpoint_set_nth_group_info(buf, group2->id, -#ifdef UNIV_LOG_ARCHIVE - group2->archived_file_no, - group2->archived_offset -#else /* UNIV_LOG_ARCHIVE */ - 0, 0 -#endif /* UNIV_LOG_ARCHIVE */ - ); - - group2 = UT_LIST_GET_NEXT(log_groups, group2); - } - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); - - /* Starting from InnoDB-3.23.50, we also write info on allocated - size in the tablespace */ - - mach_write_to_4(buf + LOG_CHECKPOINT_FSP_FREE_LIMIT, - log_fsp_current_free_limit); - - mach_write_to_4(buf + LOG_CHECKPOINT_FSP_MAGIC_N, - LOG_CHECKPOINT_FSP_MAGIC_N_VAL); - - /* We alternate the physical place of the checkpoint info in the first - log file */ - - if (ut_dulint_get_low(log_sys->next_checkpoint_no) % 2 == 0) { - write_offset = LOG_CHECKPOINT_1; - } else { - write_offset = LOG_CHECKPOINT_2; - } - - if (log_do_write) { - if (log_sys->n_pending_checkpoint_writes == 0) { - - rw_lock_x_lock_gen(&(log_sys->checkpoint_lock), - LOG_CHECKPOINT); - } - - log_sys->n_pending_checkpoint_writes++; - - log_sys->n_log_ios++; - - /* We send as the last parameter the group machine address - added with 1, as we want to distinguish between a normal log - file write and a checkpoint field write */ - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->space_id, - write_offset / UNIV_PAGE_SIZE, - write_offset % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, - buf, ((byte*)group + 1)); - - ut_ad(((ulint)group & 0x1UL) == 0); - } -} - -/********************************************************** -Writes info to a buffer of a log group when log files are created in -backup restoration. */ - -void -log_reset_first_header_and_checkpoint( -/*==================================*/ - byte* hdr_buf,/* in: buffer which will be written to the start - of the first log file */ - dulint start) /* in: lsn of the start of the first log file; - we pretend that there is a checkpoint at - start + LOG_BLOCK_HDR_SIZE */ -{ - ulint fold; - byte* buf; - dulint lsn; - - mach_write_to_4(hdr_buf + LOG_GROUP_ID, 0); - mach_write_to_8(hdr_buf + LOG_FILE_START_LSN, start); - - lsn = ut_dulint_add(start, LOG_BLOCK_HDR_SIZE); - - /* Write the label of ibbackup --restore */ - strcpy((char*) hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - "ibbackup "); - ut_sprintf_timestamp((char*) hdr_buf - + (LOG_FILE_WAS_CREATED_BY_HOT_BACKUP - + (sizeof "ibbackup ") - 1)); - buf = hdr_buf + LOG_CHECKPOINT_1; - - mach_write_to_8(buf + LOG_CHECKPOINT_NO, ut_dulint_zero); - mach_write_to_8(buf + LOG_CHECKPOINT_LSN, lsn); - - mach_write_to_4(buf + LOG_CHECKPOINT_OFFSET, - LOG_FILE_HDR_SIZE + LOG_BLOCK_HDR_SIZE); - - mach_write_to_4(buf + LOG_CHECKPOINT_LOG_BUF_SIZE, 2 * 1024 * 1024); - - mach_write_to_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN, ut_dulint_max); - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_1, fold); - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - mach_write_to_4(buf + LOG_CHECKPOINT_CHECKSUM_2, fold); - - /* Starting from InnoDB-3.23.50, we should also write info on - allocated size in the tablespace, but unfortunately we do not - know it here */ -} - -/********************************************************** -Reads a checkpoint info from a log group header to log_sys->checkpoint_buf. */ - -void -log_group_read_checkpoint_info( -/*===========================*/ - log_group_t* group, /* in: log group */ - ulint field) /* in: LOG_CHECKPOINT_1 or LOG_CHECKPOINT_2 */ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - log_sys->n_log_ios++; - - fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->space_id, - field / UNIV_PAGE_SIZE, field % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, log_sys->checkpoint_buf, NULL); -} - -/********************************************************** -Writes checkpoint info to groups. */ - -void -log_groups_write_checkpoint_info(void) -/*==================================*/ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - log_group_checkpoint(group); - - group = UT_LIST_GET_NEXT(log_groups, group); - } -} - -/********************************************************** -Makes a checkpoint. Note that this function does not flush dirty -blocks from the buffer pool: it only checks what is lsn of the oldest -modification in the pool, and writes information about the lsn in -log files. Use log_make_checkpoint_at to flush also the pool. */ - -ibool -log_checkpoint( -/*===========*/ - /* out: TRUE if success, FALSE if a checkpoint - write was already running */ - ibool sync, /* in: TRUE if synchronous operation is - desired */ - ibool write_always) /* in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ -{ - dulint oldest_lsn; - - if (recv_recovery_is_on()) { - recv_apply_hashed_log_recs(TRUE); - } - - if (srv_unix_file_flush_method != SRV_UNIX_NOSYNC) { - fil_flush_file_spaces(FIL_TABLESPACE); - } - - mutex_enter(&(log_sys->mutex)); - - oldest_lsn = log_buf_pool_get_oldest_modification(); - - mutex_exit(&(log_sys->mutex)); - - /* Because log also contains headers and dummy log records, - if the buffer pool contains no dirty buffers, oldest_lsn - gets the value log_sys->lsn from the previous function, - and we must make sure that the log is flushed up to that - lsn. If there are dirty buffers in the buffer pool, then our - write-ahead-logging algorithm ensures that the log has been flushed - up to oldest_lsn. */ - - log_write_up_to(oldest_lsn, LOG_WAIT_ALL_GROUPS, TRUE); - - mutex_enter(&(log_sys->mutex)); - - if (!write_always - && ut_dulint_cmp(log_sys->last_checkpoint_lsn, oldest_lsn) >= 0) { - - mutex_exit(&(log_sys->mutex)); - - return(TRUE); - } - - ut_ad(ut_dulint_cmp(log_sys->written_to_all_lsn, oldest_lsn) >= 0); - - if (log_sys->n_pending_checkpoint_writes > 0) { - /* A checkpoint write is running */ - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - } - - return(FALSE); - } - - log_sys->next_checkpoint_lsn = oldest_lsn; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, "Making checkpoint no %lu at lsn %lu %lu\n", - (ulong) ut_dulint_get_low(log_sys->next_checkpoint_no), - (ulong) ut_dulint_get_high(oldest_lsn), - (ulong) ut_dulint_get_low(oldest_lsn)); - } -#endif /* UNIV_DEBUG */ - - log_groups_write_checkpoint_info(); - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - } - - return(TRUE); -} - -/******************************************************************** -Makes a checkpoint at a given lsn or later. */ - -void -log_make_checkpoint_at( -/*===================*/ - dulint lsn, /* in: make a checkpoint at this or a later - lsn, if ut_dulint_max, makes a checkpoint at - the latest lsn */ - ibool write_always) /* in: the function normally checks if the - the new checkpoint would have a greater - lsn than the previous one: if not, then no - physical write is done; by setting this - parameter TRUE, a physical write will always be - made to log files */ -{ - ibool success; - - /* Preflush pages synchronously */ - - success = FALSE; - - while (!success) { - success = log_preflush_pool_modified_pages(lsn, TRUE); - } - - success = FALSE; - - while (!success) { - success = log_checkpoint(TRUE, write_always); - } -} - -/******************************************************************** -Tries to establish a big enough margin of free space in the log groups, such -that a new log entry can be catenated without an immediate need for a -checkpoint. NOTE: this function may only be called if the calling thread -owns no synchronization objects! */ -static -void -log_checkpoint_margin(void) -/*=======================*/ -{ - log_t* log = log_sys; - ulint age; - ulint checkpoint_age; - ulint advance; - dulint oldest_lsn; - ibool sync; - ibool checkpoint_sync; - ibool do_checkpoint; - ibool success; -loop: - sync = FALSE; - checkpoint_sync = FALSE; - do_checkpoint = FALSE; - - mutex_enter(&(log->mutex)); - - if (log->check_flush_or_checkpoint == FALSE) { - mutex_exit(&(log->mutex)); - - return; - } - - oldest_lsn = log_buf_pool_get_oldest_modification(); - - age = ut_dulint_minus(log->lsn, oldest_lsn); - - if (age > log->max_modified_age_sync) { - - /* A flush is urgent: we have to do a synchronous preflush */ - - sync = TRUE; - advance = 2 * (age - log->max_modified_age_sync); - } else if (age > log->max_modified_age_async) { - - /* A flush is not urgent: we do an asynchronous preflush */ - advance = age - log->max_modified_age_async; - } else { - advance = 0; - } - - checkpoint_age = ut_dulint_minus(log->lsn, log->last_checkpoint_lsn); - - if (checkpoint_age > log->max_checkpoint_age) { - /* A checkpoint is urgent: we do it synchronously */ - - checkpoint_sync = TRUE; - - do_checkpoint = TRUE; - - } else if (checkpoint_age > log->max_checkpoint_age_async) { - /* A checkpoint is not urgent: do it asynchronously */ - - do_checkpoint = TRUE; - - log->check_flush_or_checkpoint = FALSE; - } else { - log->check_flush_or_checkpoint = FALSE; - } - - mutex_exit(&(log->mutex)); - - if (advance) { - dulint new_oldest = ut_dulint_add(oldest_lsn, advance); - - success = log_preflush_pool_modified_pages(new_oldest, sync); - - /* If the flush succeeded, this thread has done its part - and can proceed. If it did not succeed, there was another - thread doing a flush at the same time. If sync was FALSE, - the flush was not urgent, and we let this thread proceed. - Otherwise, we let it start from the beginning again. */ - - if (sync && !success) { - mutex_enter(&(log->mutex)); - - log->check_flush_or_checkpoint = TRUE; - - mutex_exit(&(log->mutex)); - goto loop; - } - } - - if (do_checkpoint) { - log_checkpoint(checkpoint_sync, FALSE); - - if (checkpoint_sync) { - - goto loop; - } - } -} - -/********************************************************** -Reads a specified log segment to a buffer. */ - -void -log_group_read_log_seg( -/*===================*/ - ulint type, /* in: LOG_ARCHIVE or LOG_RECOVER */ - byte* buf, /* in: buffer where to read */ - log_group_t* group, /* in: log group */ - dulint start_lsn, /* in: read area start */ - dulint end_lsn) /* in: read area end */ -{ - ulint len; - ulint source_offset; - ibool sync; - - ut_ad(mutex_own(&(log_sys->mutex))); - - sync = FALSE; - - if (type == LOG_RECOVER) { - sync = TRUE; - } -loop: - source_offset = log_group_calc_lsn_offset(start_lsn, group); - - len = ut_dulint_minus(end_lsn, start_lsn); - - ut_ad(len != 0); - - if ((source_offset % group->file_size) + len > group->file_size) { - - len = group->file_size - (source_offset % group->file_size); - } - -#ifdef UNIV_LOG_ARCHIVE - if (type == LOG_ARCHIVE) { - - log_sys->n_pending_archive_ios++; - } -#endif /* UNIV_LOG_ARCHIVE */ - - log_sys->n_log_ios++; - - fil_io(OS_FILE_READ | OS_FILE_LOG, sync, group->space_id, - source_offset / UNIV_PAGE_SIZE, source_offset % UNIV_PAGE_SIZE, - len, buf, NULL); - - start_lsn = ut_dulint_add(start_lsn, len); - buf += len; - - if (ut_dulint_cmp(start_lsn, end_lsn) != 0) { - - goto loop; - } -} - -#ifdef UNIV_LOG_ARCHIVE -/********************************************************** -Generates an archived log file name. */ - -void -log_archived_file_name_gen( -/*=======================*/ - char* buf, /* in: buffer where to write */ - ulint id __attribute__((unused)), - /* in: group id; - currently we only archive the first group */ - ulint file_no)/* in: file number */ -{ - sprintf(buf, "%sib_arch_log_%010lu", srv_arch_dir, (ulong) file_no); -} - -/********************************************************** -Writes a log file header to a log file space. */ -static -void -log_group_archive_file_header_write( -/*================================*/ - log_group_t* group, /* in: log group */ - ulint nth_file, /* in: header to the nth file in the - archive log file space */ - ulint file_no, /* in: archived file number */ - dulint start_lsn) /* in: log file data starts at this - lsn */ -{ - byte* buf; - ulint dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - - ut_a(nth_file < group->n_files); - - buf = *(group->archive_file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_GROUP_ID, group->id); - mach_write_to_8(buf + LOG_FILE_START_LSN, start_lsn); - mach_write_to_4(buf + LOG_FILE_NO, file_no); - - mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, FALSE); - - dest_offset = nth_file * group->file_size; - - log_sys->n_log_ios++; - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, - dest_offset / UNIV_PAGE_SIZE, - dest_offset % UNIV_PAGE_SIZE, - 2 * OS_FILE_LOG_BLOCK_SIZE, - buf, &log_archive_io); -} - -/********************************************************** -Writes a log file header to a completed archived log file. */ -static -void -log_group_archive_completed_header_write( -/*=====================================*/ - log_group_t* group, /* in: log group */ - ulint nth_file, /* in: header to the nth file in the - archive log file space */ - dulint end_lsn) /* in: end lsn of the file */ -{ - byte* buf; - ulint dest_offset; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_a(nth_file < group->n_files); - - buf = *(group->archive_file_header_bufs + nth_file); - - mach_write_to_4(buf + LOG_FILE_ARCH_COMPLETED, TRUE); - mach_write_to_8(buf + LOG_FILE_END_LSN, end_lsn); - - dest_offset = nth_file * group->file_size + LOG_FILE_ARCH_COMPLETED; - - log_sys->n_log_ios++; - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, group->archive_space_id, - dest_offset / UNIV_PAGE_SIZE, - dest_offset % UNIV_PAGE_SIZE, - OS_FILE_LOG_BLOCK_SIZE, - buf + LOG_FILE_ARCH_COMPLETED, - &log_archive_io); -} - -/********************************************************** -Does the archive writes for a single log group. */ -static -void -log_group_archive( -/*==============*/ - log_group_t* group) /* in: log group */ -{ - os_file_t file_handle; - dulint start_lsn; - dulint end_lsn; - char name[1024]; - byte* buf; - ulint len; - ibool ret; - ulint next_offset; - ulint n_files; - ulint open_mode; - - ut_ad(mutex_own(&(log_sys->mutex))); - - start_lsn = log_sys->archived_lsn; - - ut_a(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); - - end_lsn = log_sys->next_archived_lsn; - - ut_a(ut_dulint_get_low(end_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); - - buf = log_sys->archive_buf; - - n_files = 0; - - next_offset = group->archived_offset; -loop: - if ((next_offset % group->file_size == 0) - || (fil_space_get_size(group->archive_space_id) == 0)) { - - /* Add the file to the archive file space; create or open the - file */ - - if (next_offset % group->file_size == 0) { - open_mode = OS_FILE_CREATE; - } else { - open_mode = OS_FILE_OPEN; - } - - log_archived_file_name_gen(name, group->id, - group->archived_file_no + n_files); - - file_handle = os_file_create(name, open_mode, OS_FILE_AIO, - OS_DATA_FILE, &ret); - - if (!ret && (open_mode == OS_FILE_CREATE)) { - file_handle = os_file_create( - name, OS_FILE_OPEN, OS_FILE_AIO, - OS_DATA_FILE, &ret); - } - - if (!ret) { - fprintf(stderr, - "InnoDB: Cannot create or open" - " archive log file %s.\n" - "InnoDB: Cannot continue operation.\n" - "InnoDB: Check that the log archive" - " directory exists,\n" - "InnoDB: you have access rights to it, and\n" - "InnoDB: there is space available.\n", name); - exit(1); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, "Created archive file %s\n", name); - } -#endif /* UNIV_DEBUG */ - - ret = os_file_close(file_handle); - - ut_a(ret); - - /* Add the archive file as a node to the space */ - - fil_node_create(name, group->file_size / UNIV_PAGE_SIZE, - group->archive_space_id, FALSE); - - if (next_offset % group->file_size == 0) { - log_group_archive_file_header_write( - group, n_files, - group->archived_file_no + n_files, - start_lsn); - - next_offset += LOG_FILE_HDR_SIZE; - } - } - - len = ut_dulint_minus(end_lsn, start_lsn); - - if (group->file_size < (next_offset % group->file_size) + len) { - - len = group->file_size - (next_offset % group->file_size); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Archiving starting at lsn %lu %lu, len %lu" - " to group %lu\n", - (ulong) ut_dulint_get_high(start_lsn), - (ulong) ut_dulint_get_low(start_lsn), - (ulong) len, (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - log_sys->n_pending_archive_ios++; - - log_sys->n_log_ios++; - - fil_io(OS_FILE_WRITE | OS_FILE_LOG, FALSE, group->archive_space_id, - next_offset / UNIV_PAGE_SIZE, next_offset % UNIV_PAGE_SIZE, - ut_calc_align(len, OS_FILE_LOG_BLOCK_SIZE), buf, - &log_archive_io); - - start_lsn = ut_dulint_add(start_lsn, len); - next_offset += len; - buf += len; - - if (next_offset % group->file_size == 0) { - n_files++; - } - - if (ut_dulint_cmp(end_lsn, start_lsn) != 0) { - - goto loop; - } - - group->next_archived_file_no = group->archived_file_no + n_files; - group->next_archived_offset = next_offset % group->file_size; - - ut_a(group->next_archived_offset % OS_FILE_LOG_BLOCK_SIZE == 0); -} - -/********************************************************* -(Writes to the archive of each log group.) Currently, only the first -group is archived. */ -static -void -log_archive_groups(void) -/*====================*/ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - log_group_archive(group); -} - -/********************************************************* -Completes the archiving write phase for (each log group), currently, -the first log group. */ -static -void -log_archive_write_complete_groups(void) -/*===================================*/ -{ - log_group_t* group; - ulint end_offset; - ulint trunc_files; - ulint n_files; - dulint start_lsn; - dulint end_lsn; - ulint i; - - ut_ad(mutex_own(&(log_sys->mutex))); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - group->archived_file_no = group->next_archived_file_no; - group->archived_offset = group->next_archived_offset; - - /* Truncate from the archive file space all but the last - file, or if it has been written full, all files */ - - n_files = (UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id)) - / group->file_size; - ut_ad(n_files > 0); - - end_offset = group->archived_offset; - - if (end_offset % group->file_size == 0) { - - trunc_files = n_files; - } else { - trunc_files = n_files - 1; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes && trunc_files) { - fprintf(stderr, - "Complete file(s) archived to group %lu\n", - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - /* Calculate the archive file space start lsn */ - start_lsn = ut_dulint_subtract( - log_sys->next_archived_lsn, - end_offset - LOG_FILE_HDR_SIZE + trunc_files - * (group->file_size - LOG_FILE_HDR_SIZE)); - end_lsn = start_lsn; - - for (i = 0; i < trunc_files; i++) { - - end_lsn = ut_dulint_add(end_lsn, - group->file_size - LOG_FILE_HDR_SIZE); - - /* Write a notice to the headers of archived log - files that the file write has been completed */ - - log_group_archive_completed_header_write(group, i, end_lsn); - } - - fil_space_truncate_start(group->archive_space_id, - trunc_files * group->file_size); - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fputs("Archiving writes completed\n", stderr); - } -#endif /* UNIV_DEBUG */ -} - -/********************************************************** -Completes an archiving i/o. */ -static -void -log_archive_check_completion_low(void) -/*==================================*/ -{ - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->n_pending_archive_ios == 0 - && log_sys->archiving_phase == LOG_ARCHIVE_READ) { - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fputs("Archiving read completed\n", stderr); - } -#endif /* UNIV_DEBUG */ - - /* Archive buffer has now been read in: start archive writes */ - - log_sys->archiving_phase = LOG_ARCHIVE_WRITE; - - log_archive_groups(); - } - - if (log_sys->n_pending_archive_ios == 0 - && log_sys->archiving_phase == LOG_ARCHIVE_WRITE) { - - log_archive_write_complete_groups(); - - log_sys->archived_lsn = log_sys->next_archived_lsn; - - rw_lock_x_unlock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); - } -} - -/********************************************************** -Completes an archiving i/o. */ -static -void -log_io_complete_archive(void) -/*=========================*/ -{ - log_group_t* group; - - mutex_enter(&(log_sys->mutex)); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - mutex_exit(&(log_sys->mutex)); - - fil_flush(group->archive_space_id); - - mutex_enter(&(log_sys->mutex)); - - ut_ad(log_sys->n_pending_archive_ios > 0); - - log_sys->n_pending_archive_ios--; - - log_archive_check_completion_low(); - - mutex_exit(&(log_sys->mutex)); -} - -/************************************************************************ -Starts an archiving operation. */ - -ibool -log_archive_do( -/*===========*/ - /* out: TRUE if succeed, FALSE if an archiving - operation was already running */ - ibool sync, /* in: TRUE if synchronous operation is desired */ - ulint* n_bytes)/* out: archive log buffer size, 0 if nothing to - archive */ -{ - ibool calc_new_limit; - dulint start_lsn; - dulint limit_lsn; - - calc_new_limit = TRUE; -loop: - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - mutex_exit(&(log_sys->mutex)); - - *n_bytes = 0; - - return(TRUE); - - } else if (log_sys->archiving_state == LOG_ARCH_STOPPED - || log_sys->archiving_state == LOG_ARCH_STOPPING2) { - - mutex_exit(&(log_sys->mutex)); - - os_event_wait(log_sys->archiving_on); - - mutex_enter(&(log_sys->mutex)); - - goto loop; - } - - start_lsn = log_sys->archived_lsn; - - if (calc_new_limit) { - ut_a(log_sys->archive_buf_size % OS_FILE_LOG_BLOCK_SIZE == 0); - limit_lsn = ut_dulint_add(start_lsn, - log_sys->archive_buf_size); - - *n_bytes = log_sys->archive_buf_size; - - if (ut_dulint_cmp(limit_lsn, log_sys->lsn) >= 0) { - - limit_lsn = ut_dulint_align_down( - log_sys->lsn, OS_FILE_LOG_BLOCK_SIZE); - } - } - - if (ut_dulint_cmp(log_sys->archived_lsn, limit_lsn) >= 0) { - - mutex_exit(&(log_sys->mutex)); - - *n_bytes = 0; - - return(TRUE); - } - - if (ut_dulint_cmp(log_sys->written_to_all_lsn, limit_lsn) < 0) { - - mutex_exit(&(log_sys->mutex)); - - log_write_up_to(limit_lsn, LOG_WAIT_ALL_GROUPS, TRUE); - - calc_new_limit = FALSE; - - goto loop; - } - - if (log_sys->n_pending_archive_ios > 0) { - /* An archiving operation is running */ - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - } - - *n_bytes = log_sys->archive_buf_size; - - return(FALSE); - } - - rw_lock_x_lock_gen(&(log_sys->archive_lock), LOG_ARCHIVE); - - log_sys->archiving_phase = LOG_ARCHIVE_READ; - - log_sys->next_archived_lsn = limit_lsn; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Archiving from lsn %lu %lu to lsn %lu %lu\n", - (ulong) ut_dulint_get_high(log_sys->archived_lsn), - (ulong) ut_dulint_get_low(log_sys->archived_lsn), - (ulong) ut_dulint_get_high(limit_lsn), - (ulong) ut_dulint_get_low(limit_lsn)); - } -#endif /* UNIV_DEBUG */ - - /* Read the log segment to the archive buffer */ - - log_group_read_log_seg(LOG_ARCHIVE, log_sys->archive_buf, - UT_LIST_GET_FIRST(log_sys->log_groups), - start_lsn, limit_lsn); - - mutex_exit(&(log_sys->mutex)); - - if (sync) { - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - } - - *n_bytes = log_sys->archive_buf_size; - - return(TRUE); -} - -/******************************************************************** -Writes the log contents to the archive at least up to the lsn when this -function was called. */ -static -void -log_archive_all(void) -/*=================*/ -{ - dulint present_lsn; - ulint dummy; - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - mutex_exit(&(log_sys->mutex)); - - return; - } - - present_lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - log_pad_current_log_block(); - - for (;;) { - mutex_enter(&(log_sys->mutex)); - - if (ut_dulint_cmp(present_lsn, log_sys->archived_lsn) <= 0) { - - mutex_exit(&(log_sys->mutex)); - - return; - } - - mutex_exit(&(log_sys->mutex)); - - log_archive_do(TRUE, &dummy); - } -} - -/********************************************************* -Closes the possible open archive log file (for each group) the first group, -and if it was open, increments the group file count by 2, if desired. */ -static -void -log_archive_close_groups( -/*=====================*/ - ibool increment_file_count) /* in: TRUE if we want to increment - the file count */ -{ - log_group_t* group; - ulint trunc_len; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - - return; - } - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - trunc_len = UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id); - if (trunc_len > 0) { - ut_a(trunc_len == group->file_size); - - /* Write a notice to the headers of archived log - files that the file write has been completed */ - - log_group_archive_completed_header_write( - group, 0, log_sys->archived_lsn); - - fil_space_truncate_start(group->archive_space_id, - trunc_len); - if (increment_file_count) { - group->archived_offset = 0; - group->archived_file_no += 2; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "Incrementing arch file no to %lu" - " in log group %lu\n", - (ulong) group->archived_file_no + 2, - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - } -} - -/******************************************************************** -Writes the log contents to the archive up to the lsn when this function was -called, and stops the archiving. When archiving is started again, the archived -log file numbers start from 2 higher, so that the archiving will not write -again to the archived log files which exist when this function returns. */ - -ulint -log_archive_stop(void) -/*==================*/ - /* out: DB_SUCCESS or DB_ERROR */ -{ - ibool success; - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state != LOG_ARCH_ON) { - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); - } - - log_sys->archiving_state = LOG_ARCH_STOPPING; - - mutex_exit(&(log_sys->mutex)); - - log_archive_all(); - - mutex_enter(&(log_sys->mutex)); - - log_sys->archiving_state = LOG_ARCH_STOPPING2; - os_event_reset(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - /* Wait for a possible archiving operation to end */ - - rw_lock_s_lock(&(log_sys->archive_lock)); - rw_lock_s_unlock(&(log_sys->archive_lock)); - - mutex_enter(&(log_sys->mutex)); - - /* Close all archived log files, incrementing the file count by 2, - if appropriate */ - - log_archive_close_groups(TRUE); - - mutex_exit(&(log_sys->mutex)); - - /* Make a checkpoint, so that if recovery is needed, the file numbers - of new archived log files will start from the right value */ - - success = FALSE; - - while (!success) { - success = log_checkpoint(TRUE, TRUE); - } - - mutex_enter(&(log_sys->mutex)); - - log_sys->archiving_state = LOG_ARCH_STOPPED; - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/******************************************************************** -Starts again archiving which has been stopped. */ - -ulint -log_archive_start(void) -/*===================*/ - /* out: DB_SUCCESS or DB_ERROR */ -{ - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state != LOG_ARCH_STOPPED) { - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); - } - - log_sys->archiving_state = LOG_ARCH_ON; - - os_event_set(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/******************************************************************** -Stop archiving the log so that a gap may occur in the archived log files. */ - -ulint -log_archive_noarchivelog(void) -/*==========================*/ - /* out: DB_SUCCESS or DB_ERROR */ -{ -loop: - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_STOPPED - || log_sys->archiving_state == LOG_ARCH_OFF) { - - log_sys->archiving_state = LOG_ARCH_OFF; - - os_event_set(log_sys->archiving_on); - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); - } - - mutex_exit(&(log_sys->mutex)); - - log_archive_stop(); - - os_thread_sleep(500000); - - goto loop; -} - -/******************************************************************** -Start archiving the log so that a gap may occur in the archived log files. */ - -ulint -log_archive_archivelog(void) -/*========================*/ - /* out: DB_SUCCESS or DB_ERROR */ -{ - mutex_enter(&(log_sys->mutex)); - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - - log_sys->archiving_state = LOG_ARCH_ON; - - log_sys->archived_lsn - = ut_dulint_align_down(log_sys->lsn, - OS_FILE_LOG_BLOCK_SIZE); - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); - } - - mutex_exit(&(log_sys->mutex)); - - return(DB_ERROR); -} - -/******************************************************************** -Tries to establish a big enough margin of free space in the log groups, such -that a new log entry can be catenated without an immediate need for -archiving. */ -static -void -log_archive_margin(void) -/*====================*/ -{ - log_t* log = log_sys; - ulint age; - ibool sync; - ulint dummy; -loop: - mutex_enter(&(log->mutex)); - - if (log->archiving_state == LOG_ARCH_OFF) { - mutex_exit(&(log->mutex)); - - return; - } - - age = ut_dulint_minus(log->lsn, log->archived_lsn); - - if (age > log->max_archived_lsn_age) { - - /* An archiving is urgent: we have to do synchronous i/o */ - - sync = TRUE; - - } else if (age > log->max_archived_lsn_age_async) { - - /* An archiving is not urgent: we do asynchronous i/o */ - - sync = FALSE; - } else { - /* No archiving required yet */ - - mutex_exit(&(log->mutex)); - - return; - } - - mutex_exit(&(log->mutex)); - - log_archive_do(sync, &dummy); - - if (sync == TRUE) { - /* Check again that enough was written to the archive */ - - goto loop; - } -} -#endif /* UNIV_LOG_ARCHIVE */ - -/************************************************************************ -Checks that there is enough free space in the log to start a new query step. -Flushes the log buffer or makes a new checkpoint if necessary. NOTE: this -function may only be called if the calling thread owns no synchronization -objects! */ - -void -log_check_margins(void) -/*===================*/ -{ -loop: - log_flush_margin(); - - log_checkpoint_margin(); - -#ifdef UNIV_LOG_ARCHIVE - log_archive_margin(); -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->check_flush_or_checkpoint) { - - mutex_exit(&(log_sys->mutex)); - - goto loop; - } - - mutex_exit(&(log_sys->mutex)); -} - -/******************************************************************** -Makes a checkpoint at the latest lsn and writes it to first page of each -data file in the database, so that we know that the file spaces contain -all modifications up to that lsn. This can only be called at database -shutdown. This function also writes all log in log files to the log archive. */ - -void -logs_empty_and_mark_files_at_shutdown(void) -/*=======================================*/ -{ - dulint lsn; - ulint arch_log_no; - - if (srv_print_verbose_log) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Starting shutdown...\n"); - } - /* Wait until the master thread and all other operations are idle: our - algorithm only works if the server is idle at shutdown */ - - srv_shutdown_state = SRV_SHUTDOWN_CLEANUP; -loop: - os_thread_sleep(100000); - - mutex_enter(&kernel_mutex); - - /* We need the monitor threads to stop before we proceed with a - normal shutdown. In case of very fast shutdown, however, we can - proceed without waiting for monitor threads. */ - - if (srv_fast_shutdown < 2 - && (srv_error_monitor_active - || srv_lock_timeout_and_monitor_active)) { - - mutex_exit(&kernel_mutex); - - goto loop; - } - - /* Check that there are no longer transactions. We need this wait even - for the 'very fast' shutdown, because the InnoDB layer may have - committed or prepared transactions and we don't want to lose them. */ - - if (trx_n_mysql_transactions > 0 - || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { - - mutex_exit(&kernel_mutex); - - goto loop; - } - - if (srv_fast_shutdown == 2) { - /* In this fastest shutdown we do not flush the buffer pool: - it is essentially a 'crash' of the InnoDB server. Make sure - that the log is all flushed to disk, so that we can recover - all committed transactions in a crash recovery. We must not - write the lsn stamps to the data files, since at a startup - InnoDB deduces from the stamps if the previous shutdown was - clean. */ - - log_buffer_flush_to_disk(); - - return; /* We SKIP ALL THE REST !! */ - } - - /* Check that the master thread is suspended */ - - if (srv_n_threads_active[SRV_MASTER] != 0) { - - mutex_exit(&kernel_mutex); - - goto loop; - } - - mutex_exit(&kernel_mutex); - - mutex_enter(&(log_sys->mutex)); - - if (log_sys->n_pending_checkpoint_writes -#ifdef UNIV_LOG_ARCHIVE - || log_sys->n_pending_archive_ios -#endif /* UNIV_LOG_ARCHIVE */ - || log_sys->n_pending_writes) { - - mutex_exit(&(log_sys->mutex)); - - goto loop; - } - - mutex_exit(&(log_sys->mutex)); - - if (!buf_pool_check_no_pending_io()) { - - goto loop; - } - -#ifdef UNIV_LOG_ARCHIVE - log_archive_all(); -#endif /* UNIV_LOG_ARCHIVE */ - - log_make_checkpoint_at(ut_dulint_max, TRUE); - - mutex_enter(&(log_sys->mutex)); - - lsn = log_sys->lsn; - - if ((ut_dulint_cmp(lsn, log_sys->last_checkpoint_lsn) != 0) -#ifdef UNIV_LOG_ARCHIVE - || (srv_log_archive_on - && ut_dulint_cmp(lsn, - ut_dulint_add(log_sys->archived_lsn, - LOG_BLOCK_HDR_SIZE)) - != 0) -#endif /* UNIV_LOG_ARCHIVE */ - ) { - - mutex_exit(&(log_sys->mutex)); - - goto loop; - } - - arch_log_no = 0; - -#ifdef UNIV_LOG_ARCHIVE - UT_LIST_GET_FIRST(log_sys->log_groups)->archived_file_no; - - if (0 == UT_LIST_GET_FIRST(log_sys->log_groups)->archived_offset) { - - arch_log_no--; - } - - log_archive_close_groups(TRUE); -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_exit(&(log_sys->mutex)); - - mutex_enter(&kernel_mutex); - /* Check that the master thread has stayed suspended */ - if (srv_n_threads_active[SRV_MASTER] != 0) { - fprintf(stderr, - "InnoDB: Warning: the master thread woke up" - " during shutdown\n"); - - mutex_exit(&kernel_mutex); - - goto loop; - } - mutex_exit(&kernel_mutex); - - fil_flush_file_spaces(FIL_TABLESPACE); - fil_flush_file_spaces(FIL_LOG); - - /* The call fil_write_flushed_lsn_to_data_files() will pass the buffer - pool: therefore it is essential that the buffer pool has been - completely flushed to disk! (We do not call fil_write... if the - 'very fast' shutdown is enabled.) */ - - if (!buf_all_freed()) { - - goto loop; - } - - srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE; - - /* Make some checks that the server really is quiet */ - ut_a(srv_n_threads_active[SRV_MASTER] == 0); - ut_a(buf_all_freed()); - ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); - - if (ut_dulint_cmp(lsn, srv_start_lsn) < 0) { - fprintf(stderr, - "InnoDB: Error: log sequence number" - " at shutdown %lu %lu\n" - "InnoDB: is lower than at startup %lu %lu!\n", - (ulong) ut_dulint_get_high(lsn), - (ulong) ut_dulint_get_low(lsn), - (ulong) ut_dulint_get_high(srv_start_lsn), - (ulong) ut_dulint_get_low(srv_start_lsn)); - } - - srv_shutdown_lsn = lsn; - - fil_write_flushed_lsn_to_data_files(lsn, arch_log_no); - - fil_flush_file_spaces(FIL_TABLESPACE); - - fil_close_all_files(); - - /* Make some checks that the server really is quiet */ - ut_a(srv_n_threads_active[SRV_MASTER] == 0); - ut_a(buf_all_freed()); - ut_a(0 == ut_dulint_cmp(lsn, log_sys->lsn)); -} - -/********************************************************** -Checks by parsing that the catenated log segment for a single mtr is -consistent. */ - -ibool -log_check_log_recs( -/*===============*/ - byte* buf, /* in: pointer to the start of the log segment - in the log_sys->buf log buffer */ - ulint len, /* in: segment length in bytes */ - dulint buf_start_lsn) /* in: buffer start lsn */ -{ - dulint contiguous_lsn; - dulint scanned_lsn; - byte* start; - byte* end; - byte* buf1; - byte* scan_buf; - - ut_ad(mutex_own(&(log_sys->mutex))); - - if (len == 0) { - - return(TRUE); - } - - start = ut_align_down(buf, OS_FILE_LOG_BLOCK_SIZE); - end = ut_align(buf + len, OS_FILE_LOG_BLOCK_SIZE); - - buf1 = mem_alloc((end - start) + OS_FILE_LOG_BLOCK_SIZE); - scan_buf = ut_align(buf1, OS_FILE_LOG_BLOCK_SIZE); - - ut_memcpy(scan_buf, start, end - start); - - recv_scan_log_recs(TRUE, - (buf_pool->n_frames - - recv_n_pool_free_frames) * UNIV_PAGE_SIZE, - FALSE, scan_buf, end - start, - ut_dulint_align_down(buf_start_lsn, - OS_FILE_LOG_BLOCK_SIZE), - &contiguous_lsn, &scanned_lsn); - - ut_a(ut_dulint_cmp(scanned_lsn, ut_dulint_add(buf_start_lsn, len)) - == 0); - ut_a(ut_dulint_cmp(recv_sys->recovered_lsn, scanned_lsn) == 0); - - mem_free(buf1); - - return(TRUE); -} - -/********************************************************** -Peeks the current lsn. */ - -ibool -log_peek_lsn( -/*=========*/ - /* out: TRUE if success, FALSE if could not get the - log system mutex */ - dulint* lsn) /* out: if returns TRUE, current lsn is here */ -{ - if (0 == mutex_enter_nowait(&(log_sys->mutex))) { - *lsn = log_sys->lsn; - - mutex_exit(&(log_sys->mutex)); - - return(TRUE); - } - - return(FALSE); -} - -/********************************************************** -Prints info of the log. */ - -void -log_print( -/*======*/ - FILE* file) /* in: file where to print */ -{ - double time_elapsed; - time_t current_time; - - mutex_enter(&(log_sys->mutex)); - - fprintf(file, - "Log sequence number %lu %lu\n" - "Log flushed up to %lu %lu\n" - "Last checkpoint at %lu %lu\n", - (ulong) ut_dulint_get_high(log_sys->lsn), - (ulong) ut_dulint_get_low(log_sys->lsn), - (ulong) ut_dulint_get_high(log_sys->flushed_to_disk_lsn), - (ulong) ut_dulint_get_low(log_sys->flushed_to_disk_lsn), - (ulong) ut_dulint_get_high(log_sys->last_checkpoint_lsn), - (ulong) ut_dulint_get_low(log_sys->last_checkpoint_lsn)); - - current_time = time(NULL); - - time_elapsed = 0.001 + difftime(current_time, - log_sys->last_printout_time); - fprintf(file, - "%lu pending log writes, %lu pending chkp writes\n" - "%lu log i/o's done, %.2f log i/o's/second\n", - (ulong) log_sys->n_pending_writes, - (ulong) log_sys->n_pending_checkpoint_writes, - (ulong) log_sys->n_log_ios, - ((log_sys->n_log_ios - log_sys->n_log_ios_old) - / time_elapsed)); - - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = current_time; - - mutex_exit(&(log_sys->mutex)); -} - -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -log_refresh_stats(void) -/*===================*/ -{ - log_sys->n_log_ios_old = log_sys->n_log_ios; - log_sys->last_printout_time = time(NULL); -} diff --git a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c deleted file mode 100644 index aef58b7b576..00000000000 --- a/storage/innobase/log/log0recv.c +++ /dev/null @@ -1,3398 +0,0 @@ -/****************************************************** -Recovery - -(c) 1997 Innobase Oy - -Created 9/20/1997 Heikki Tuuri -*******************************************************/ - -#include "log0recv.h" - -#ifdef UNIV_NONINL -#include "log0recv.ic" -#endif - -#include "mem0mem.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0rea.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "mtr0mtr.h" -#include "mtr0log.h" -#include "page0page.h" -#include "page0cur.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "ibuf0ibuf.h" -#include "trx0undo.h" -#include "trx0rec.h" -#include "trx0roll.h" -#include "btr0cur.h" -#include "btr0cur.h" -#include "btr0cur.h" -#include "dict0boot.h" -#include "fil0fil.h" -#include "sync0sync.h" - -#ifdef UNIV_HOTBACKUP -/* This is set to FALSE if the backup was originally taken with the -ibbackup --include regexp option: then we do not want to create tables in -directories which were not included */ -ibool recv_replay_file_ops = TRUE; -#endif /* UNIV_HOTBACKUP */ - -/* Log records are stored in the hash table in chunks at most of this size; -this must be less than UNIV_PAGE_SIZE as it is stored in the buffer pool */ -#define RECV_DATA_BLOCK_SIZE (MEM_MAX_ALLOC_IN_BUF - sizeof(recv_data_t)) - -/* Read-ahead area in applying log records to file pages */ -#define RECV_READ_AHEAD_AREA 32 - -recv_sys_t* recv_sys = NULL; -ibool recv_recovery_on = FALSE; -ibool recv_recovery_from_backup_on = FALSE; - -ibool recv_needed_recovery = FALSE; - -ibool recv_lsn_checks_on = FALSE; - -/* There are two conditions under which we scan the logs, the first -is normal startup and the second is when we do a recovery from an -archive. -This flag is set if we are doing a scan from the last checkpoint during -startup. If we find log entries that were written after the last checkpoint -we know that the server was not cleanly shutdown. We must then initialize -the crash recovery environment before attempting to store these entries in -the log hash table. */ -ibool recv_log_scan_is_startup_type = FALSE; - -/* If the following is TRUE, the buffer pool file pages must be invalidated -after recovery and no ibuf operations are allowed; this becomes TRUE if -the log record hash table becomes too full, and log records must be merged -to file pages already before the recovery is finished: in this case no -ibuf operations are allowed, as they could modify the pages read in the -buffer pool before the pages have been recovered to the up-to-date state */ - -/* Recovery is running and no operations on the log files are allowed -yet: the variable name is misleading */ - -ibool recv_no_ibuf_operations = FALSE; - -/* The following counter is used to decide when to print info on -log scan */ -ulint recv_scan_print_counter = 0; - -ibool recv_is_from_backup = FALSE; -#ifdef UNIV_HOTBACKUP -ibool recv_is_making_a_backup = FALSE; -#else -# define recv_is_making_a_backup FALSE -#endif /* UNIV_HOTBACKUP */ - -ulint recv_previous_parsed_rec_type = 999999; -ulint recv_previous_parsed_rec_offset = 0; -ulint recv_previous_parsed_rec_is_multi = 0; - -ulint recv_max_parsed_page_no = 0; - -/* This many frames must be left free in the buffer pool when we scan -the log and store the scanned log records in the buffer pool: we will -use these free frames to read in pages when we start applying the -log records to the database. */ - -ulint recv_n_pool_free_frames = 256; - -/* The maximum lsn we see for a page during the recovery process. If this -is bigger than the lsn we are able to scan up to, that is an indication that -the recovery failed and the database may be corrupt. */ - -dulint recv_max_page_lsn; - -/* prototypes */ - -/*********************************************************** -Initialize crash recovery environment. Can be called iff -recv_needed_recovery == FALSE. */ -static -void -recv_init_crash_recovery(void); -/*===========================*/ - -/************************************************************ -Creates the recovery system. */ - -void -recv_sys_create(void) -/*=================*/ -{ - if (recv_sys != NULL) { - - return; - } - - recv_sys = mem_alloc(sizeof(recv_sys_t)); - - mutex_create(&recv_sys->mutex, SYNC_RECV); - - recv_sys->heap = NULL; - recv_sys->addr_hash = NULL; -} - -/************************************************************ -Inits the recovery system for a recovery operation. */ - -void -recv_sys_init( -/*==========*/ - ibool recover_from_backup, /* in: TRUE if this is called - to recover from a hot backup */ - ulint available_memory) /* in: available memory in bytes */ -{ - if (recv_sys->heap != NULL) { - - return; - } - - mutex_enter(&(recv_sys->mutex)); - - if (!recover_from_backup) { - recv_sys->heap = mem_heap_create_in_buffer(256); - } else { - recv_sys->heap = mem_heap_create(256); - recv_is_from_backup = TRUE; - } - - recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE); - recv_sys->len = 0; - recv_sys->recovered_offset = 0; - - recv_sys->addr_hash = hash_create(available_memory / 64); - recv_sys->n_addrs = 0; - - recv_sys->apply_log_recs = FALSE; - recv_sys->apply_batch_on = FALSE; - - recv_sys->last_block_buf_start = mem_alloc(2 * OS_FILE_LOG_BLOCK_SIZE); - - recv_sys->last_block = ut_align(recv_sys->last_block_buf_start, - OS_FILE_LOG_BLOCK_SIZE); - recv_sys->found_corrupt_log = FALSE; - - recv_max_page_lsn = ut_dulint_zero; - - mutex_exit(&(recv_sys->mutex)); -} - -/************************************************************ -Empties the hash table when it has been fully processed. */ -static -void -recv_sys_empty_hash(void) -/*=====================*/ -{ - ut_ad(mutex_own(&(recv_sys->mutex))); - - if (recv_sys->n_addrs != 0) { - fprintf(stderr, - "InnoDB: Error: %lu pages with log records" - " were left unprocessed!\n" - "InnoDB: Maximum page number with" - " log records on it %lu\n", - (ulong) recv_sys->n_addrs, - (ulong) recv_max_parsed_page_no); - ut_error; - } - - hash_table_free(recv_sys->addr_hash); - mem_heap_empty(recv_sys->heap); - - recv_sys->addr_hash = hash_create(buf_pool_get_curr_size() / 256); -} - -#ifndef UNIV_LOG_DEBUG -/************************************************************ -Frees the recovery system. */ -static -void -recv_sys_free(void) -/*===============*/ -{ - mutex_enter(&(recv_sys->mutex)); - - hash_table_free(recv_sys->addr_hash); - mem_heap_free(recv_sys->heap); - ut_free(recv_sys->buf); - mem_free(recv_sys->last_block_buf_start); - - recv_sys->addr_hash = NULL; - recv_sys->heap = NULL; - - mutex_exit(&(recv_sys->mutex)); -} -#endif /* UNIV_LOG_DEBUG */ - -/************************************************************ -Truncates possible corrupted or extra records from a log group. */ -static -void -recv_truncate_group( -/*================*/ - log_group_t* group, /* in: log group */ - dulint recovered_lsn, /* in: recovery succeeded up to this - lsn */ - dulint limit_lsn, /* in: this was the limit for - recovery */ - dulint checkpoint_lsn, /* in: recovery was started from this - checkpoint */ - dulint archived_lsn) /* in: the log has been archived up to - this lsn */ -{ - dulint start_lsn; - dulint end_lsn; - dulint finish_lsn1; - dulint finish_lsn2; - dulint finish_lsn; - ulint len; - ulint i; - - if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) { - /* Checkpoint was taken in the NOARCHIVELOG mode */ - archived_lsn = checkpoint_lsn; - } - - finish_lsn1 = ut_dulint_add(ut_dulint_align_down( - archived_lsn, - OS_FILE_LOG_BLOCK_SIZE), - log_group_get_capacity(group)); - - finish_lsn2 = ut_dulint_add(ut_dulint_align_up( - recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE), - recv_sys->last_log_buf_size); - - if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) { - /* We do not know how far we should erase log records: erase - as much as possible */ - - finish_lsn = finish_lsn1; - } else { - /* It is enough to erase the length of the log buffer */ - finish_lsn = ut_dulint_get_min(finish_lsn1, finish_lsn2); - } - - ut_a(RECV_SCAN_SIZE <= log_sys->buf_size); - - /* Write the log buffer full of zeros */ - for (i = 0; i < RECV_SCAN_SIZE; i++) { - - *(log_sys->buf + i) = '\0'; - } - - start_lsn = ut_dulint_align_down(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - - if (ut_dulint_cmp(start_lsn, recovered_lsn) != 0) { - /* Copy the last incomplete log block to the log buffer and - edit its data length: */ - - ut_memcpy(log_sys->buf, recv_sys->last_block, - OS_FILE_LOG_BLOCK_SIZE); - log_block_set_data_len(log_sys->buf, ut_dulint_minus( - recovered_lsn, start_lsn)); - } - - if (ut_dulint_cmp(start_lsn, finish_lsn) >= 0) { - - return; - } - - for (;;) { - end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE); - - if (ut_dulint_cmp(end_lsn, finish_lsn) > 0) { - - end_lsn = finish_lsn; - } - - len = ut_dulint_minus(end_lsn, start_lsn); - - log_group_write_buf(group, log_sys->buf, len, start_lsn, 0); - if (ut_dulint_cmp(end_lsn, finish_lsn) >= 0) { - - return; - } - - /* Write the log buffer full of zeros */ - for (i = 0; i < RECV_SCAN_SIZE; i++) { - - *(log_sys->buf + i) = '\0'; - } - - start_lsn = end_lsn; - } -} - -/************************************************************ -Copies the log segment between group->recovered_lsn and recovered_lsn from the -most up-to-date log group to group, so that it contains the latest log data. */ -static -void -recv_copy_group( -/*============*/ - log_group_t* up_to_date_group, /* in: the most up-to-date log - group */ - log_group_t* group, /* in: copy to this log - group */ - dulint recovered_lsn) /* in: recovery succeeded up - to this lsn */ -{ - dulint start_lsn; - dulint end_lsn; - ulint len; - - if (ut_dulint_cmp(group->scanned_lsn, recovered_lsn) >= 0) { - - return; - } - - ut_a(RECV_SCAN_SIZE <= log_sys->buf_size); - - start_lsn = ut_dulint_align_down(group->scanned_lsn, - OS_FILE_LOG_BLOCK_SIZE); - for (;;) { - end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE); - - if (ut_dulint_cmp(end_lsn, recovered_lsn) > 0) { - end_lsn = ut_dulint_align_up(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - } - - log_group_read_log_seg(LOG_RECOVER, log_sys->buf, - up_to_date_group, start_lsn, end_lsn); - - len = ut_dulint_minus(end_lsn, start_lsn); - - log_group_write_buf(group, log_sys->buf, len, start_lsn, 0); - - if (ut_dulint_cmp(end_lsn, recovered_lsn) >= 0) { - - return; - } - - start_lsn = end_lsn; - } -} - -/************************************************************ -Copies a log segment from the most up-to-date log group to the other log -groups, so that they all contain the latest log data. Also writes the info -about the latest checkpoint to the groups, and inits the fields in the group -memory structs to up-to-date values. */ -static -void -recv_synchronize_groups( -/*====================*/ - log_group_t* up_to_date_group) /* in: the most up-to-date - log group */ -{ - log_group_t* group; - dulint start_lsn; - dulint end_lsn; - dulint recovered_lsn; - dulint limit_lsn; - - recovered_lsn = recv_sys->recovered_lsn; - limit_lsn = recv_sys->limit_lsn; - - /* Read the last recovered log block to the recovery system buffer: - the block is always incomplete */ - - start_lsn = ut_dulint_align_down(recovered_lsn, - OS_FILE_LOG_BLOCK_SIZE); - end_lsn = ut_dulint_align_up(recovered_lsn, OS_FILE_LOG_BLOCK_SIZE); - - ut_a(ut_dulint_cmp(start_lsn, end_lsn) != 0); - - log_group_read_log_seg(LOG_RECOVER, recv_sys->last_block, - up_to_date_group, start_lsn, end_lsn); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - if (group != up_to_date_group) { - - /* Copy log data if needed */ - - recv_copy_group(group, up_to_date_group, - recovered_lsn); - } - - /* Update the fields in the group struct to correspond to - recovered_lsn */ - - log_group_set_fields(group, recovered_lsn); - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - /* Copy the checkpoint info to the groups; remember that we have - incremented checkpoint_no by one, and the info will not be written - over the max checkpoint info, thus making the preservation of max - checkpoint info on disk certain */ - - log_groups_write_checkpoint_info(); - - mutex_exit(&(log_sys->mutex)); - - /* Wait for the checkpoint write to complete */ - rw_lock_s_lock(&(log_sys->checkpoint_lock)); - rw_lock_s_unlock(&(log_sys->checkpoint_lock)); - - mutex_enter(&(log_sys->mutex)); -} - -/*************************************************************************** -Checks the consistency of the checkpoint info */ -static -ibool -recv_check_cp_is_consistent( -/*========================*/ - /* out: TRUE if ok */ - byte* buf) /* in: buffer containing checkpoint info */ -{ - ulint fold; - - fold = ut_fold_binary(buf, LOG_CHECKPOINT_CHECKSUM_1); - - if ((fold & 0xFFFFFFFFUL) != mach_read_from_4( - buf + LOG_CHECKPOINT_CHECKSUM_1)) { - return(FALSE); - } - - fold = ut_fold_binary(buf + LOG_CHECKPOINT_LSN, - LOG_CHECKPOINT_CHECKSUM_2 - LOG_CHECKPOINT_LSN); - - if ((fold & 0xFFFFFFFFUL) != mach_read_from_4( - buf + LOG_CHECKPOINT_CHECKSUM_2)) { - return(FALSE); - } - - return(TRUE); -} - -/************************************************************ -Looks for the maximum consistent checkpoint from the log groups. */ -static -ulint -recv_find_max_checkpoint( -/*=====================*/ - /* out: error code or DB_SUCCESS */ - log_group_t** max_group, /* out: max group */ - ulint* max_field) /* out: LOG_CHECKPOINT_1 or - LOG_CHECKPOINT_2 */ -{ - log_group_t* group; - dulint max_no; - dulint checkpoint_no; - ulint field; - byte* buf; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - max_no = ut_dulint_zero; - *max_group = NULL; - *max_field = 0; - - buf = log_sys->checkpoint_buf; - - while (group) { - group->state = LOG_GROUP_CORRUPTED; - - for (field = LOG_CHECKPOINT_1; field <= LOG_CHECKPOINT_2; - field += LOG_CHECKPOINT_2 - LOG_CHECKPOINT_1) { - - log_group_read_checkpoint_info(group, field); - - if (!recv_check_cp_is_consistent(buf)) { -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Checkpoint in group" - " %lu at %lu invalid, %lu\n", - (ulong) group->id, - (ulong) field, - (ulong) mach_read_from_4( - buf - + LOG_CHECKPOINT_CHECKSUM_1)); - - } -#endif /* UNIV_DEBUG */ - goto not_consistent; - } - - group->state = LOG_GROUP_OK; - - group->lsn = mach_read_from_8( - buf + LOG_CHECKPOINT_LSN); - group->lsn_offset = mach_read_from_4( - buf + LOG_CHECKPOINT_OFFSET); - checkpoint_no = mach_read_from_8( - buf + LOG_CHECKPOINT_NO); - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Checkpoint number %lu" - " found in group %lu\n", - (ulong) ut_dulint_get_low( - checkpoint_no), - (ulong) group->id); - } -#endif /* UNIV_DEBUG */ - - if (ut_dulint_cmp(checkpoint_no, max_no) >= 0) { - *max_group = group; - *max_field = field; - max_no = checkpoint_no; - } - -not_consistent: - ; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - if (*max_group == NULL) { - - fprintf(stderr, - "InnoDB: No valid checkpoint found.\n" - "InnoDB: If this error appears when you are" - " creating an InnoDB database,\n" - "InnoDB: the problem may be that during" - " an earlier attempt you managed\n" - "InnoDB: to create the InnoDB data files," - " but log file creation failed.\n" - "InnoDB: If that is the case, please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "error-creating-innodb.html\n"); - return(DB_ERROR); - } - - return(DB_SUCCESS); -} - -/*********************************************************************** -Reads the checkpoint info needed in hot backup. */ - -ibool -recv_read_cp_info_for_backup( -/*=========================*/ - /* out: TRUE if success */ - byte* hdr, /* in: buffer containing the log group header */ - dulint* lsn, /* out: checkpoint lsn */ - ulint* offset, /* out: checkpoint offset in the log group */ - ulint* fsp_limit,/* out: fsp limit of space 0, 1000000000 if the - database is running with < version 3.23.50 of InnoDB */ - dulint* cp_no, /* out: checkpoint number */ - dulint* first_header_lsn) - /* out: lsn of of the start of the first log file */ -{ - ulint max_cp = 0; - dulint max_cp_no = ut_dulint_zero; - byte* cp_buf; - - cp_buf = hdr + LOG_CHECKPOINT_1; - - if (recv_check_cp_is_consistent(cp_buf)) { - max_cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO); - max_cp = LOG_CHECKPOINT_1; - } - - cp_buf = hdr + LOG_CHECKPOINT_2; - - if (recv_check_cp_is_consistent(cp_buf)) { - if (ut_dulint_cmp(mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO), - max_cp_no) > 0) { - max_cp = LOG_CHECKPOINT_2; - } - } - - if (max_cp == 0) { - return(FALSE); - } - - cp_buf = hdr + max_cp; - - *lsn = mach_read_from_8(cp_buf + LOG_CHECKPOINT_LSN); - *offset = mach_read_from_4(cp_buf + LOG_CHECKPOINT_OFFSET); - - /* If the user is running a pre-3.23.50 version of InnoDB, its - checkpoint data does not contain the fsp limit info */ - if (mach_read_from_4(cp_buf + LOG_CHECKPOINT_FSP_MAGIC_N) - == LOG_CHECKPOINT_FSP_MAGIC_N_VAL) { - - *fsp_limit = mach_read_from_4( - cp_buf + LOG_CHECKPOINT_FSP_FREE_LIMIT); - - if (*fsp_limit == 0) { - *fsp_limit = 1000000000; - } - } else { - *fsp_limit = 1000000000; - } - - /* fprintf(stderr, "fsp limit %lu MB\n", *fsp_limit); */ - - *cp_no = mach_read_from_8(cp_buf + LOG_CHECKPOINT_NO); - - *first_header_lsn = mach_read_from_8(hdr + LOG_FILE_START_LSN); - - return(TRUE); -} - -/********************************************************** -Checks the 4-byte checksum to the trailer checksum field of a log block. -We also accept a log block in the old format < InnoDB-3.23.52 where the -checksum field contains the log block number. */ -static -ibool -log_block_checksum_is_ok_or_old_format( -/*===================================*/ - /* out: TRUE if ok, or if the log block may be in the - format of InnoDB version < 3.23.52 */ - byte* block) /* in: pointer to a log block */ -{ -#ifdef UNIV_LOG_DEBUG - return(TRUE); -#endif /* UNIV_LOG_DEBUG */ - if (log_block_calc_checksum(block) == log_block_get_checksum(block)) { - - return(TRUE); - } - - if (log_block_get_hdr_no(block) == log_block_get_checksum(block)) { - - /* We assume the log block is in the format of - InnoDB version < 3.23.52 and the block is ok */ -#if 0 - fprintf(stderr, - "InnoDB: Scanned old format < InnoDB-3.23.52" - " log block number %lu\n", - log_block_get_hdr_no(block)); -#endif - return(TRUE); - } - - return(FALSE); -} - -/*********************************************************************** -Scans the log segment and n_bytes_scanned is set to the length of valid -log scanned. */ - -void -recv_scan_log_seg_for_backup( -/*=========================*/ - byte* buf, /* in: buffer containing log data */ - ulint buf_len, /* in: data length in that buffer */ - dulint* scanned_lsn, /* in/out: lsn of buffer start, - we return scanned lsn */ - ulint* scanned_checkpoint_no, - /* in/out: 4 lowest bytes of the - highest scanned checkpoint number so - far */ - ulint* n_bytes_scanned)/* out: how much we were able to - scan, smaller than buf_len if log - data ended here */ -{ - ulint data_len; - byte* log_block; - ulint no; - - *n_bytes_scanned = 0; - - for (log_block = buf; log_block < buf + buf_len; - log_block += OS_FILE_LOG_BLOCK_SIZE) { - - no = log_block_get_hdr_no(log_block); - -#if 0 - fprintf(stderr, "Log block header no %lu\n", no); -#endif - - if (no != log_block_convert_lsn_to_no(*scanned_lsn) - || !log_block_checksum_is_ok_or_old_format(log_block)) { -#if 0 - fprintf(stderr, - "Log block n:o %lu, scanned lsn n:o %lu\n", - no, log_block_convert_lsn_to_no(*scanned_lsn)); -#endif - /* Garbage or an incompletely written log block */ - - log_block += OS_FILE_LOG_BLOCK_SIZE; -#if 0 - fprintf(stderr, - "Next log block n:o %lu\n", - log_block_get_hdr_no(log_block)); -#endif - break; - } - - if (*scanned_checkpoint_no > 0 - && log_block_get_checkpoint_no(log_block) - < *scanned_checkpoint_no - && *scanned_checkpoint_no - - log_block_get_checkpoint_no(log_block) - > 0x80000000UL) { - - /* Garbage from a log buffer flush which was made - before the most recent database recovery */ -#if 0 - fprintf(stderr, - "Scanned cp n:o %lu, block cp n:o %lu\n", - *scanned_checkpoint_no, - log_block_get_checkpoint_no(log_block)); -#endif - break; - } - - data_len = log_block_get_data_len(log_block); - - *scanned_checkpoint_no - = log_block_get_checkpoint_no(log_block); - *scanned_lsn = ut_dulint_add(*scanned_lsn, data_len); - - *n_bytes_scanned += data_len; - - if (data_len < OS_FILE_LOG_BLOCK_SIZE) { - /* Log data ends here */ - -#if 0 - fprintf(stderr, "Log block data len %lu\n", - data_len); -#endif - break; - } - } -} - -/*********************************************************************** -Tries to parse a single log record body and also applies it to a page if -specified. File ops are parsed, but not applied in this function. */ -static -byte* -recv_parse_or_apply_log_rec_body( -/*=============================*/ - /* out: log record end, NULL if not a complete - record */ - byte type, /* in: type */ - byte* ptr, /* in: pointer to a buffer */ - byte* end_ptr,/* in: pointer to the buffer end */ - page_t* page, /* in: buffer page or NULL; if not NULL, then the log - record is applied to the page, and the log record - should be complete then */ - mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if - page is non-NULL */ -{ - dict_index_t* index = NULL; - - switch (type) { - case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES: - ptr = mlog_parse_nbytes(type, ptr, end_ptr, page); - break; - case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_INSERT, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, - index, page, mtr); - } - break; - case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_CLUST_DELETE_MARK, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_cur_parse_del_mark_set_clust_rec( - ptr, end_ptr, index, page); - } - break; - case MLOG_COMP_REC_SEC_DELETE_MARK: - /* This log record type is obsolete, but we process it for - backward compatibility with MySQL 5.0.3 and 5.0.4. */ - ut_a(!page || page_is_comp(page)); - ptr = mlog_parse_index(ptr, end_ptr, TRUE, &index); - if (!ptr) { - break; - } - /* Fall through */ - case MLOG_REC_SEC_DELETE_MARK: - ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, page); - break; - case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_UPDATE_IN_PLACE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_cur_parse_update_in_place(ptr, end_ptr, - page, index); - } - break; - case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE: - case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_parse_delete_rec_list(type, ptr, end_ptr, - index, page, mtr); - } - break; - case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_LIST_END_COPY_CREATED, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_parse_copy_rec_list_to_created_page( - ptr, end_ptr, index, page, mtr); - } - break; - case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_PAGE_REORGANIZE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = btr_parse_page_reorganize(ptr, end_ptr, index, - page, mtr); - } - break; - case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE: - ptr = page_parse_create(ptr, end_ptr, - type == MLOG_COMP_PAGE_CREATE, - page, mtr); - break; - case MLOG_UNDO_INSERT: - ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); - break; - case MLOG_UNDO_ERASE_END: - ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_INIT: - ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_HDR_DISCARD: - ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); - break; - case MLOG_UNDO_HDR_CREATE: - case MLOG_UNDO_HDR_REUSE: - ptr = trx_undo_parse_page_header(type, ptr, end_ptr, - page, mtr); - break; - case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK: - ptr = btr_parse_set_min_rec_mark( - ptr, end_ptr, type == MLOG_COMP_REC_MIN_MARK, - page, mtr); - break; - case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: - if (NULL != (ptr = mlog_parse_index( - ptr, end_ptr, - type == MLOG_COMP_REC_DELETE, - &index))) { - ut_a(!page - || (ibool)!!page_is_comp(page) - == dict_table_is_comp(index->table)); - ptr = page_cur_parse_delete_rec(ptr, end_ptr, - index, page, mtr); - } - break; - case MLOG_IBUF_BITMAP_INIT: - ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr); - break; - case MLOG_INIT_FILE_PAGE: - ptr = fsp_parse_init_file_page(ptr, end_ptr, page); - break; - case MLOG_WRITE_STRING: - ptr = mlog_parse_string(ptr, end_ptr, page); - break; - case MLOG_FILE_CREATE: - case MLOG_FILE_RENAME: - case MLOG_FILE_DELETE: - ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, - ULINT_UNDEFINED); - break; - default: - ptr = NULL; - recv_sys->found_corrupt_log = TRUE; - } - - if (index) { - dict_table_t* table = index->table; - - dict_mem_index_free(index); - dict_mem_table_free(table); - } - - return(ptr); -} - -/************************************************************************* -Calculates the fold value of a page file address: used in inserting or -searching for a log record in the hash table. */ -UNIV_INLINE -ulint -recv_fold( -/*======*/ - /* out: folded value */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - return(ut_fold_ulint_pair(space, page_no)); -} - -/************************************************************************* -Calculates the hash value of a page file address: used in inserting or -searching for a log record in the hash table. */ -UNIV_INLINE -ulint -recv_hash( -/*======*/ - /* out: folded value */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - return(hash_calc_hash(recv_fold(space, page_no), recv_sys->addr_hash)); -} - -/************************************************************************* -Gets the hashed file address struct for a page. */ -static -recv_addr_t* -recv_get_fil_addr_struct( -/*=====================*/ - /* out: file address struct, NULL if not found from - the hash table */ - ulint space, /* in: space id */ - ulint page_no)/* in: page number */ -{ - recv_addr_t* recv_addr; - - recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, - recv_hash(space, page_no)); - while (recv_addr) { - if ((recv_addr->space == space) - && (recv_addr->page_no == page_no)) { - - break; - } - - recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); - } - - return(recv_addr); -} - -/*********************************************************************** -Adds a new log record to the hash table of log records. */ -static -void -recv_add_to_hash_table( -/*===================*/ - byte type, /* in: log record type */ - ulint space, /* in: space id */ - ulint page_no, /* in: page number */ - byte* body, /* in: log record body */ - byte* rec_end, /* in: log record end */ - dulint start_lsn, /* in: start lsn of the mtr */ - dulint end_lsn) /* in: end lsn of the mtr */ -{ - recv_t* recv; - ulint len; - recv_data_t* recv_data; - recv_data_t** prev_field; - recv_addr_t* recv_addr; - - if (fil_tablespace_deleted_or_being_deleted_in_mem(space, -1)) { - /* The tablespace does not exist any more: do not store the - log record */ - - return; - } - - len = rec_end - body; - - recv = mem_heap_alloc(recv_sys->heap, sizeof(recv_t)); - recv->type = type; - recv->len = rec_end - body; - recv->start_lsn = start_lsn; - recv->end_lsn = end_lsn; - - recv_addr = recv_get_fil_addr_struct(space, page_no); - - if (recv_addr == NULL) { - recv_addr = mem_heap_alloc(recv_sys->heap, - sizeof(recv_addr_t)); - recv_addr->space = space; - recv_addr->page_no = page_no; - recv_addr->state = RECV_NOT_PROCESSED; - - UT_LIST_INIT(recv_addr->rec_list); - - HASH_INSERT(recv_addr_t, addr_hash, recv_sys->addr_hash, - recv_fold(space, page_no), recv_addr); - recv_sys->n_addrs++; -#if 0 - fprintf(stderr, "Inserting log rec for space %lu, page %lu\n", - space, page_no); -#endif - } - - UT_LIST_ADD_LAST(rec_list, recv_addr->rec_list, recv); - - prev_field = &(recv->data); - - /* Store the log record body in chunks of less than UNIV_PAGE_SIZE: - recv_sys->heap grows into the buffer pool, and bigger chunks could not - be allocated */ - - while (rec_end > body) { - - len = rec_end - body; - - if (len > RECV_DATA_BLOCK_SIZE) { - len = RECV_DATA_BLOCK_SIZE; - } - - recv_data = mem_heap_alloc(recv_sys->heap, - sizeof(recv_data_t) + len); - *prev_field = recv_data; - - ut_memcpy(((byte*)recv_data) + sizeof(recv_data_t), body, len); - - prev_field = &(recv_data->next); - - body += len; - } - - *prev_field = NULL; -} - -/************************************************************************* -Copies the log record body from recv to buf. */ -static -void -recv_data_copy_to_buf( -/*==================*/ - byte* buf, /* in: buffer of length at least recv->len */ - recv_t* recv) /* in: log record */ -{ - recv_data_t* recv_data; - ulint part_len; - ulint len; - - len = recv->len; - recv_data = recv->data; - - while (len > 0) { - if (len > RECV_DATA_BLOCK_SIZE) { - part_len = RECV_DATA_BLOCK_SIZE; - } else { - part_len = len; - } - - ut_memcpy(buf, ((byte*)recv_data) + sizeof(recv_data_t), - part_len); - buf += part_len; - len -= part_len; - - recv_data = recv_data->next; - } -} - -/**************************************************************************** -Applies the hashed log records to the page, if the page lsn is less than the -lsn of a log record. This can be called when a buffer page has just been -read in, or also for a page already in the buffer pool. */ - -void -recv_recover_page( -/*==============*/ - ibool recover_backup, /* in: TRUE if we are recovering a backup - page: then we do not acquire any latches - since the page was read in outside the - buffer pool */ - ibool just_read_in, /* in: TRUE if the i/o-handler calls this for - a freshly read page */ - page_t* page, /* in: buffer page */ - ulint space, /* in: space id */ - ulint page_no) /* in: page number */ -{ - buf_block_t* block = NULL; - recv_addr_t* recv_addr; - recv_t* recv; - byte* buf; - dulint start_lsn; - dulint end_lsn; - dulint page_lsn; - dulint page_newest_lsn; - ibool modification_to_page; - ibool success; - mtr_t mtr; - - mutex_enter(&(recv_sys->mutex)); - - if (recv_sys->apply_log_recs == FALSE) { - - /* Log records should not be applied now */ - - mutex_exit(&(recv_sys->mutex)); - - return; - } - - recv_addr = recv_get_fil_addr_struct(space, page_no); - - if ((recv_addr == NULL) - || (recv_addr->state == RECV_BEING_PROCESSED) - || (recv_addr->state == RECV_PROCESSED)) { - - mutex_exit(&(recv_sys->mutex)); - - return; - } - -#if 0 - fprintf(stderr, "Recovering space %lu, page %lu\n", space, page_no); -#endif - - recv_addr->state = RECV_BEING_PROCESSED; - - mutex_exit(&(recv_sys->mutex)); - - mtr_start(&mtr); - mtr_set_log_mode(&mtr, MTR_LOG_NONE); - - if (!recover_backup) { - block = buf_block_align(page); - - if (just_read_in) { - /* Move the ownership of the x-latch on the - page to this OS thread, so that we can acquire - a second x-latch on it. This is needed for the - operations to the page to pass the debug - checks. */ - - rw_lock_x_lock_move_ownership(&(block->lock)); - } - - success = buf_page_get_known_nowait(RW_X_LATCH, page, - BUF_KEEP_OLD, - __FILE__, __LINE__, - &mtr); - ut_a(success); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - } - - /* Read the newest modification lsn from the page */ - page_lsn = mach_read_from_8(page + FIL_PAGE_LSN); - - if (!recover_backup) { - /* It may be that the page has been modified in the buffer - pool: read the newest modification lsn there */ - - page_newest_lsn = buf_frame_get_newest_modification(page); - - if (!ut_dulint_is_zero(page_newest_lsn)) { - - page_lsn = page_newest_lsn; - } - } else { - /* In recovery from a backup we do not really use the buffer - pool */ - - page_newest_lsn = ut_dulint_zero; - } - - modification_to_page = FALSE; - start_lsn = end_lsn = ut_dulint_zero; - - recv = UT_LIST_GET_FIRST(recv_addr->rec_list); - - while (recv) { - end_lsn = recv->end_lsn; - - if (recv->len > RECV_DATA_BLOCK_SIZE) { - /* We have to copy the record body to a separate - buffer */ - - buf = mem_alloc(recv->len); - - recv_data_copy_to_buf(buf, recv); - } else { - buf = ((byte*)(recv->data)) + sizeof(recv_data_t); - } - - if (recv->type == MLOG_INIT_FILE_PAGE) { - page_lsn = page_newest_lsn; - - mach_write_to_8(page + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM, - ut_dulint_zero); - mach_write_to_8(page + FIL_PAGE_LSN, ut_dulint_zero); - } - - if (ut_dulint_cmp(recv->start_lsn, page_lsn) >= 0) { - - if (!modification_to_page) { - - modification_to_page = TRUE; - start_lsn = recv->start_lsn; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Applying log rec" - " type %lu len %lu" - " to space %lu page no %lu\n", - (ulong) recv->type, (ulong) recv->len, - (ulong) recv_addr->space, - (ulong) recv_addr->page_no); - } -#endif /* UNIV_DEBUG */ - - recv_parse_or_apply_log_rec_body(recv->type, buf, - buf + recv->len, - page, &mtr); - mach_write_to_8(page + UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM, - ut_dulint_add(recv->start_lsn, - recv->len)); - mach_write_to_8(page + FIL_PAGE_LSN, - ut_dulint_add(recv->start_lsn, - recv->len)); - } - - if (recv->len > RECV_DATA_BLOCK_SIZE) { - mem_free(buf); - } - - recv = UT_LIST_GET_NEXT(rec_list, recv); - } - - mutex_enter(&(recv_sys->mutex)); - - if (ut_dulint_cmp(recv_max_page_lsn, page_lsn) < 0) { - recv_max_page_lsn = page_lsn; - } - - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs); - recv_sys->n_addrs--; - - mutex_exit(&(recv_sys->mutex)); - - if (!recover_backup && modification_to_page) { - ut_a(block); - - buf_flush_recv_note_modification(block, start_lsn, end_lsn); - } - - /* Make sure that committing mtr does not change the modification - lsn values of page */ - - mtr.modifications = FALSE; - - mtr_commit(&mtr); -} - -/*********************************************************************** -Reads in pages which have hashed log records, from an area around a given -page number. */ -static -ulint -recv_read_in_area( -/*==============*/ - /* out: number of pages found */ - ulint space, /* in: space */ - ulint page_no)/* in: page number */ -{ - recv_addr_t* recv_addr; - ulint page_nos[RECV_READ_AHEAD_AREA]; - ulint low_limit; - ulint n; - - low_limit = page_no - (page_no % RECV_READ_AHEAD_AREA); - - n = 0; - - for (page_no = low_limit; page_no < low_limit + RECV_READ_AHEAD_AREA; - page_no++) { - recv_addr = recv_get_fil_addr_struct(space, page_no); - - if (recv_addr && !buf_page_peek(space, page_no)) { - - mutex_enter(&(recv_sys->mutex)); - - if (recv_addr->state == RECV_NOT_PROCESSED) { - recv_addr->state = RECV_BEING_READ; - - page_nos[n] = page_no; - - n++; - } - - mutex_exit(&(recv_sys->mutex)); - } - } - - buf_read_recv_pages(FALSE, space, page_nos, n); - /* - fprintf(stderr, "Recv pages at %lu n %lu\n", page_nos[0], n); - */ - return(n); -} - -/*********************************************************************** -Empties the hash table of stored log records, applying them to appropriate -pages. */ - -void -recv_apply_hashed_log_recs( -/*=======================*/ - ibool allow_ibuf) /* in: if TRUE, also ibuf operations are - allowed during the application; if FALSE, - no ibuf operations are allowed, and after - the application all file pages are flushed to - disk and invalidated in buffer pool: this - alternative means that no new log records - can be generated during the application; - the caller must in this case own the log - mutex */ -{ - recv_addr_t* recv_addr; - page_t* page; - ulint i; - ulint space; - ulint page_no; - ulint n_pages; - ibool has_printed = FALSE; - mtr_t mtr; -loop: - mutex_enter(&(recv_sys->mutex)); - - if (recv_sys->apply_batch_on) { - - mutex_exit(&(recv_sys->mutex)); - - os_thread_sleep(500000); - - goto loop; - } - - ut_ad(!allow_ibuf == mutex_own(&log_sys->mutex)); - - if (!allow_ibuf) { - recv_no_ibuf_operations = TRUE; - } - - recv_sys->apply_log_recs = TRUE; - recv_sys->apply_batch_on = TRUE; - - for (i = 0; i < hash_get_n_cells(recv_sys->addr_hash); i++) { - - recv_addr = HASH_GET_FIRST(recv_sys->addr_hash, i); - - while (recv_addr) { - space = recv_addr->space; - page_no = recv_addr->page_no; - - if (recv_addr->state == RECV_NOT_PROCESSED) { - if (!has_printed) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Starting an" - " apply batch of log records" - " to the database...\n" - "InnoDB: Progress in percents: ", - stderr); - has_printed = TRUE; - } - - mutex_exit(&(recv_sys->mutex)); - - if (buf_page_peek(space, page_no)) { - - mtr_start(&mtr); - - page = buf_page_get(space, page_no, - RW_X_LATCH, &mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level( - page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - recv_recover_page(FALSE, FALSE, page, - space, page_no); - mtr_commit(&mtr); - } else { - recv_read_in_area(space, page_no); - } - - mutex_enter(&(recv_sys->mutex)); - } - - recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); - } - - if (has_printed - && (i * 100) / hash_get_n_cells(recv_sys->addr_hash) - != ((i + 1) * 100) - / hash_get_n_cells(recv_sys->addr_hash)) { - - fprintf(stderr, "%lu ", (ulong) - ((i * 100) - / hash_get_n_cells(recv_sys->addr_hash))); - } - } - - /* Wait until all the pages have been processed */ - - while (recv_sys->n_addrs != 0) { - - mutex_exit(&(recv_sys->mutex)); - - os_thread_sleep(500000); - - mutex_enter(&(recv_sys->mutex)); - } - - if (has_printed) { - - fprintf(stderr, "\n"); - } - - if (!allow_ibuf) { - /* Flush all the file pages to disk and invalidate them in - the buffer pool */ - - mutex_exit(&(recv_sys->mutex)); - mutex_exit(&(log_sys->mutex)); - - n_pages = buf_flush_batch(BUF_FLUSH_LIST, ULINT_MAX, - ut_dulint_max); - ut_a(n_pages != ULINT_UNDEFINED); - - buf_flush_wait_batch_end(BUF_FLUSH_LIST); - - buf_pool_invalidate(); - - mutex_enter(&(log_sys->mutex)); - mutex_enter(&(recv_sys->mutex)); - - recv_no_ibuf_operations = FALSE; - } - - recv_sys->apply_log_recs = FALSE; - recv_sys->apply_batch_on = FALSE; - - recv_sys_empty_hash(); - - if (has_printed) { - fprintf(stderr, "InnoDB: Apply batch completed\n"); - } - - mutex_exit(&(recv_sys->mutex)); -} - -/* This page is allocated from the buffer pool and used in the function -below */ -static page_t* recv_backup_application_page = NULL; - -/*********************************************************************** -Applies log records in the hash table to a backup. */ - -void -recv_apply_log_recs_for_backup(void) -/*================================*/ -{ - recv_addr_t* recv_addr; - ulint n_hash_cells; - byte* page; - ulint actual_size; - ibool success; - ulint error; - ulint i; - - recv_sys->apply_log_recs = TRUE; - recv_sys->apply_batch_on = TRUE; - - if (recv_backup_application_page == NULL) { - recv_backup_application_page = buf_frame_alloc(); - } - - page = recv_backup_application_page; - - fputs("InnoDB: Starting an apply batch of log records" - " to the database...\n" - "InnoDB: Progress in percents: ", stderr); - - n_hash_cells = hash_get_n_cells(recv_sys->addr_hash); - - for (i = 0; i < n_hash_cells; i++) { - /* The address hash table is externally chained */ - recv_addr = hash_get_nth_cell(recv_sys->addr_hash, i)->node; - - while (recv_addr != NULL) { - - if (!fil_tablespace_exists_in_mem(recv_addr->space)) { -#if 0 - fprintf(stderr, - "InnoDB: Warning: cannot apply" - " log record to" - " tablespace %lu page %lu,\n" - "InnoDB: because tablespace with" - " that id does not exist.\n", - recv_addr->space, recv_addr->page_no); -#endif - recv_addr->state = RECV_PROCESSED; - - ut_a(recv_sys->n_addrs); - recv_sys->n_addrs--; - - goto skip_this_recv_addr; - } - - /* We simulate a page read made by the buffer pool, to - make sure the recovery apparatus works ok, for - example, the buf_frame_align() function. We must init - the block corresponding to buf_pool->frame_zero - (== page). */ - - buf_page_init_for_backup_restore( - recv_addr->space, recv_addr->page_no, - buf_block_align(page)); - - /* Extend the tablespace's last file if the page_no - does not fall inside its bounds; we assume the last - file is auto-extending, and ibbackup copied the file - when it still was smaller */ - - success = fil_extend_space_to_desired_size( - &actual_size, - recv_addr->space, recv_addr->page_no + 1); - if (!success) { - fprintf(stderr, - "InnoDB: Fatal error: cannot extend" - " tablespace %lu to hold %lu pages\n", - recv_addr->space, recv_addr->page_no); - - exit(1); - } - - /* Read the page from the tablespace file using the - fil0fil.c routines */ - - error = fil_io(OS_FILE_READ, TRUE, recv_addr->space, - recv_addr->page_no, 0, UNIV_PAGE_SIZE, - page, NULL); - if (error != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Fatal error: cannot read" - " from tablespace" - " %lu page number %lu\n", - (ulong) recv_addr->space, - (ulong) recv_addr->page_no); - - exit(1); - } - - /* Apply the log records to this page */ - recv_recover_page(TRUE, FALSE, page, recv_addr->space, - recv_addr->page_no); - - /* Write the page back to the tablespace file using the - fil0fil.c routines */ - - buf_flush_init_for_writing( - page, mach_read_from_8(page + FIL_PAGE_LSN), - recv_addr->space, recv_addr->page_no); - - error = fil_io(OS_FILE_WRITE, TRUE, recv_addr->space, - recv_addr->page_no, 0, UNIV_PAGE_SIZE, - page, NULL); -skip_this_recv_addr: - recv_addr = HASH_GET_NEXT(addr_hash, recv_addr); - } - - if ((100 * i) / n_hash_cells - != (100 * (i + 1)) / n_hash_cells) { - fprintf(stderr, "%lu ", - (ulong) ((100 * i) / n_hash_cells)); - fflush(stderr); - } - } - - recv_sys_empty_hash(); -} - -/*********************************************************************** -Tries to parse a single log record and returns its length. */ -static -ulint -recv_parse_log_rec( -/*===============*/ - /* out: length of the record, or 0 if the record was - not complete */ - byte* ptr, /* in: pointer to a buffer */ - byte* end_ptr,/* in: pointer to the buffer end */ - byte* type, /* out: type */ - ulint* space, /* out: space id */ - ulint* page_no,/* out: page number */ - byte** body) /* out: log record body start */ -{ - byte* new_ptr; - - *body = NULL; - - if (ptr == end_ptr) { - - return(0); - } - - if (*ptr == MLOG_MULTI_REC_END) { - - *type = *ptr; - - return(1); - } - - if (*ptr == MLOG_DUMMY_RECORD) { - *type = *ptr; - - *space = ULINT_UNDEFINED - 1; /* For debugging */ - - return(1); - } - - new_ptr = mlog_parse_initial_log_record(ptr, end_ptr, type, space, - page_no); - *body = new_ptr; - - if (UNIV_UNLIKELY(!new_ptr)) { - - return(0); - } - - /* Check that page_no is sensible */ - - if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) { - - recv_sys->found_corrupt_log = TRUE; - - return(0); - } - - new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr, - NULL, NULL); - if (UNIV_UNLIKELY(new_ptr == NULL)) { - - return(0); - } - - if (*page_no > recv_max_parsed_page_no) { - recv_max_parsed_page_no = *page_no; - } - - return(new_ptr - ptr); -} - -/*********************************************************** -Calculates the new value for lsn when more data is added to the log. */ -static -dulint -recv_calc_lsn_on_data_add( -/*======================*/ - dulint lsn, /* in: old lsn */ - ulint len) /* in: this many bytes of data is added, log block - headers not included */ -{ - ulint frag_len; - ulint lsn_len; - - frag_len = (ut_dulint_get_low(lsn) % OS_FILE_LOG_BLOCK_SIZE) - - LOG_BLOCK_HDR_SIZE; - ut_ad(frag_len < OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE); - lsn_len = len + ((len + frag_len) - / (OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_HDR_SIZE - - LOG_BLOCK_TRL_SIZE)) - * (LOG_BLOCK_HDR_SIZE + LOG_BLOCK_TRL_SIZE); - - return(ut_dulint_add(lsn, lsn_len)); -} - -/*********************************************************** -Checks that the parser recognizes incomplete initial segments of a log -record as incomplete. */ - -void -recv_check_incomplete_log_recs( -/*===========================*/ - byte* ptr, /* in: pointer to a complete log record */ - ulint len) /* in: length of the log record */ -{ - ulint i; - byte type; - ulint space; - ulint page_no; - byte* body; - - for (i = 0; i < len; i++) { - ut_a(0 == recv_parse_log_rec(ptr, ptr + i, &type, &space, - &page_no, &body)); - } -} - -/*********************************************************** -Prints diagnostic info of corrupt log. */ -static -void -recv_report_corrupt_log( -/*====================*/ - byte* ptr, /* in: pointer to corrupt log record */ - byte type, /* in: type of the record */ - ulint space, /* in: space id, this may also be garbage */ - ulint page_no)/* in: page number, this may also be garbage */ -{ - fprintf(stderr, - "InnoDB: ############### CORRUPT LOG RECORD FOUND\n" - "InnoDB: Log record type %lu, space id %lu, page number %lu\n" - "InnoDB: Log parsing proceeded successfully up to %lu %lu\n" - "InnoDB: Previous log record type %lu, is multi %lu\n" - "InnoDB: Recv offset %lu, prev %lu\n", - (ulong) type, (ulong) space, (ulong) page_no, - (ulong) ut_dulint_get_high(recv_sys->recovered_lsn), - (ulong) ut_dulint_get_low(recv_sys->recovered_lsn), - (ulong) recv_previous_parsed_rec_type, - (ulong) recv_previous_parsed_rec_is_multi, - (ulong) (ptr - recv_sys->buf), - (ulong) recv_previous_parsed_rec_offset); - - if ((ulint)(ptr - recv_sys->buf + 100) - > recv_previous_parsed_rec_offset - && (ulint)(ptr - recv_sys->buf + 100 - - recv_previous_parsed_rec_offset) - < 200000) { - fputs("InnoDB: Hex dump of corrupt log starting" - " 100 bytes before the start\n" - "InnoDB: of the previous log rec,\n" - "InnoDB: and ending 100 bytes after the start" - " of the corrupt rec:\n", - stderr); - - ut_print_buf(stderr, - recv_sys->buf - + recv_previous_parsed_rec_offset - 100, - ptr - recv_sys->buf + 200 - - recv_previous_parsed_rec_offset); - putc('\n', stderr); - } - - fputs("InnoDB: WARNING: the log file may have been corrupt and it\n" - "InnoDB: is possible that the log scan did not proceed\n" - "InnoDB: far enough in recovery! Please run CHECK TABLE\n" - "InnoDB: on your InnoDB tables to check that they are ok!\n" - "InnoDB: If mysqld crashes after this recovery, look at\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); - - fflush(stderr); -} - -/*********************************************************** -Parses log records from a buffer and stores them to a hash table to wait -merging to file pages. */ -static -ibool -recv_parse_log_recs( -/*================*/ - /* out: currently always returns FALSE */ - ibool store_to_hash) /* in: TRUE if the records should be stored - to the hash table; this is set to FALSE if just - debug checking is needed */ -{ - byte* ptr; - byte* end_ptr; - ulint single_rec; - ulint len; - ulint total_len; - dulint new_recovered_lsn; - dulint old_lsn; - byte type; - ulint space; - ulint page_no; - byte* body; - ulint n_recs; - - ut_ad(mutex_own(&(log_sys->mutex))); - ut_ad(!ut_dulint_is_zero(recv_sys->parse_start_lsn)); -loop: - ptr = recv_sys->buf + recv_sys->recovered_offset; - - end_ptr = recv_sys->buf + recv_sys->len; - - if (ptr == end_ptr) { - - return(FALSE); - } - - single_rec = (ulint)*ptr & MLOG_SINGLE_REC_FLAG; - - if (single_rec || *ptr == MLOG_DUMMY_RECORD) { - /* The mtr only modified a single page, or this is a file op */ - - old_lsn = recv_sys->recovered_lsn; - - /* Try to parse a log record, fetching its type, space id, - page no, and a pointer to the body of the log record */ - - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - - if (len == 0 || recv_sys->found_corrupt_log) { - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log(ptr, - type, space, page_no); - } - - return(FALSE); - } - - new_recovered_lsn = recv_calc_lsn_on_data_add(old_lsn, len); - - if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn) - > 0) { - /* The log record filled a log block, and we require - that also the next log block should have been scanned - in */ - - return(FALSE); - } - - recv_previous_parsed_rec_type = (ulint)type; - recv_previous_parsed_rec_offset = recv_sys->recovered_offset; - recv_previous_parsed_rec_is_multi = 0; - - recv_sys->recovered_offset += len; - recv_sys->recovered_lsn = new_recovered_lsn; - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Parsed a single log rec" - " type %lu len %lu space %lu page no %lu\n", - (ulong) type, (ulong) len, (ulong) space, - (ulong) page_no); - } -#endif /* UNIV_DEBUG */ - - if (type == MLOG_DUMMY_RECORD) { - /* Do nothing */ - - } else if (store_to_hash && (type == MLOG_FILE_CREATE - || type == MLOG_FILE_RENAME - || type == MLOG_FILE_DELETE)) { -#ifdef UNIV_HOTBACKUP - if (recv_replay_file_ops) { - - /* In ibbackup --apply-log, replay an .ibd file - operation, if possible; note that - fil_path_to_mysql_datadir is set in ibbackup to - point to the datadir we should use there */ - - if (NULL == fil_op_log_parse_or_replay( - body, end_ptr, type, TRUE, - space)) { - fprintf(stderr, - "InnoDB: Error: file op" - " log record of type %lu" - " space %lu not complete in\n" - "InnoDB: the replay phase." - " Path %s\n", - (ulint)type, space, - (char*)(body + 2)); - - ut_a(0); - } - } -#endif - /* In normal mysqld crash recovery we do not try to - replay file operations */ - } else if (store_to_hash) { - recv_add_to_hash_table(type, space, page_no, body, - ptr + len, old_lsn, - recv_sys->recovered_lsn); - } else { -#ifdef UNIV_LOG_DEBUG - recv_check_incomplete_log_recs(ptr, len); -#endif/* UNIV_LOG_DEBUG */ - } - } else { - /* Check that all the records associated with the single mtr - are included within the buffer */ - - total_len = 0; - n_recs = 0; - - for (;;) { - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - if (len == 0 || recv_sys->found_corrupt_log) { - - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log( - ptr, type, space, page_no); - } - - return(FALSE); - } - - recv_previous_parsed_rec_type = (ulint)type; - recv_previous_parsed_rec_offset - = recv_sys->recovered_offset + total_len; - recv_previous_parsed_rec_is_multi = 1; - - if ((!store_to_hash) && (type != MLOG_MULTI_REC_END)) { -#ifdef UNIV_LOG_DEBUG - recv_check_incomplete_log_recs(ptr, len); -#endif /* UNIV_LOG_DEBUG */ - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Parsed a multi log rec" - " type %lu len %lu" - " space %lu page no %lu\n", - (ulong) type, (ulong) len, - (ulong) space, (ulong) page_no); - } -#endif /* UNIV_DEBUG */ - - total_len += len; - n_recs++; - - ptr += len; - - if (type == MLOG_MULTI_REC_END) { - - /* Found the end mark for the records */ - - break; - } - } - - new_recovered_lsn = recv_calc_lsn_on_data_add( - recv_sys->recovered_lsn, total_len); - - if (ut_dulint_cmp(new_recovered_lsn, recv_sys->scanned_lsn) - > 0) { - /* The log record filled a log block, and we require - that also the next log block should have been scanned - in */ - - return(FALSE); - } - - /* Add all the records to the hash table */ - - ptr = recv_sys->buf + recv_sys->recovered_offset; - - for (;;) { - old_lsn = recv_sys->recovered_lsn; - len = recv_parse_log_rec(ptr, end_ptr, &type, &space, - &page_no, &body); - if (recv_sys->found_corrupt_log) { - - recv_report_corrupt_log(ptr, - type, space, page_no); - } - - ut_a(len != 0); - ut_a(0 == ((ulint)*ptr & MLOG_SINGLE_REC_FLAG)); - - recv_sys->recovered_offset += len; - recv_sys->recovered_lsn - = recv_calc_lsn_on_data_add(old_lsn, len); - if (type == MLOG_MULTI_REC_END) { - - /* Found the end mark for the records */ - - break; - } - - if (store_to_hash) { - recv_add_to_hash_table(type, space, page_no, - body, ptr + len, - old_lsn, - new_recovered_lsn); - } - - ptr += len; - } - } - - goto loop; -} - -/*********************************************************** -Adds data from a new log block to the parsing buffer of recv_sys if -recv_sys->parse_start_lsn is non-zero. */ -static -ibool -recv_sys_add_to_parsing_buf( -/*========================*/ - /* out: TRUE if more data added */ - byte* log_block, /* in: log block */ - dulint scanned_lsn) /* in: lsn of how far we were able to find - data in this log block */ -{ - ulint more_len; - ulint data_len; - ulint start_offset; - ulint end_offset; - - ut_ad(ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) >= 0); - - if (ut_dulint_is_zero(recv_sys->parse_start_lsn)) { - /* Cannot start parsing yet because no start point for - it found */ - - return(FALSE); - } - - data_len = log_block_get_data_len(log_block); - - if (ut_dulint_cmp(recv_sys->parse_start_lsn, scanned_lsn) >= 0) { - - return(FALSE); - - } else if (ut_dulint_cmp(recv_sys->scanned_lsn, scanned_lsn) >= 0) { - - return(FALSE); - - } else if (ut_dulint_cmp(recv_sys->parse_start_lsn, - recv_sys->scanned_lsn) > 0) { - more_len = ut_dulint_minus(scanned_lsn, - recv_sys->parse_start_lsn); - } else { - more_len = ut_dulint_minus(scanned_lsn, recv_sys->scanned_lsn); - } - - if (more_len == 0) { - - return(FALSE); - } - - ut_ad(data_len >= more_len); - - start_offset = data_len - more_len; - - if (start_offset < LOG_BLOCK_HDR_SIZE) { - start_offset = LOG_BLOCK_HDR_SIZE; - } - - end_offset = data_len; - - if (end_offset > OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE) { - end_offset = OS_FILE_LOG_BLOCK_SIZE - LOG_BLOCK_TRL_SIZE; - } - - ut_ad(start_offset <= end_offset); - - if (start_offset < end_offset) { - ut_memcpy(recv_sys->buf + recv_sys->len, - log_block + start_offset, end_offset - start_offset); - - recv_sys->len += end_offset - start_offset; - - ut_a(recv_sys->len <= RECV_PARSING_BUF_SIZE); - } - - return(TRUE); -} - -/*********************************************************** -Moves the parsing buffer data left to the buffer start. */ -static -void -recv_sys_justify_left_parsing_buf(void) -/*===================================*/ -{ - ut_memmove(recv_sys->buf, recv_sys->buf + recv_sys->recovered_offset, - recv_sys->len - recv_sys->recovered_offset); - - recv_sys->len -= recv_sys->recovered_offset; - - recv_sys->recovered_offset = 0; -} - -/*********************************************************** -Scans log from a buffer and stores new log data to the parsing buffer. Parses -and hashes the log records if new data found. */ - -ibool -recv_scan_log_recs( -/*===============*/ - /* out: TRUE if limit_lsn has been reached, or - not able to scan any more in this log group */ - ibool apply_automatically,/* in: TRUE if we want this function to - apply log records automatically when the - hash table becomes full; in the hot backup tool - the tool does the applying, not this - function */ - ulint available_memory,/* in: we let the hash table of recs to grow - to this size, at the maximum */ - ibool store_to_hash, /* in: TRUE if the records should be stored - to the hash table; this is set to FALSE if just - debug checking is needed */ - byte* buf, /* in: buffer containing a log segment or - garbage */ - ulint len, /* in: buffer length */ - dulint start_lsn, /* in: buffer start lsn */ - dulint* contiguous_lsn, /* in/out: it is known that all log groups - contain contiguous log data up to this lsn */ - dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */ -{ - byte* log_block; - ulint no; - dulint scanned_lsn; - ibool finished; - ulint data_len; - ibool more_data; - - ut_ad(ut_dulint_get_low(start_lsn) % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(len > 0); - ut_a(apply_automatically <= TRUE); - ut_a(store_to_hash <= TRUE); - - finished = FALSE; - - log_block = buf; - scanned_lsn = start_lsn; - more_data = FALSE; - - while (log_block < buf + len && !finished) { - - no = log_block_get_hdr_no(log_block); - /* - fprintf(stderr, "Log block header no %lu\n", no); - - fprintf(stderr, "Scanned lsn no %lu\n", - log_block_convert_lsn_to_no(scanned_lsn)); - */ - if (no != log_block_convert_lsn_to_no(scanned_lsn) - || !log_block_checksum_is_ok_or_old_format(log_block)) { - - if (no == log_block_convert_lsn_to_no(scanned_lsn) - && !log_block_checksum_is_ok_or_old_format( - log_block)) { - fprintf(stderr, - "InnoDB: Log block no %lu at" - " lsn %lu %lu has\n" - "InnoDB: ok header, but checksum field" - " contains %lu, should be %lu\n", - (ulong) no, - (ulong) ut_dulint_get_high( - scanned_lsn), - (ulong) ut_dulint_get_low(scanned_lsn), - (ulong) log_block_get_checksum( - log_block), - (ulong) log_block_calc_checksum( - log_block)); - } - - /* Garbage or an incompletely written log block */ - - finished = TRUE; - - break; - } - - if (log_block_get_flush_bit(log_block)) { - /* This block was a start of a log flush operation: - we know that the previous flush operation must have - been completed for all log groups before this block - can have been flushed to any of the groups. Therefore, - we know that log data is contiguous up to scanned_lsn - in all non-corrupt log groups. */ - - if (ut_dulint_cmp(scanned_lsn, *contiguous_lsn) > 0) { - *contiguous_lsn = scanned_lsn; - } - } - - data_len = log_block_get_data_len(log_block); - - if ((store_to_hash || (data_len == OS_FILE_LOG_BLOCK_SIZE)) - && (ut_dulint_cmp(ut_dulint_add(scanned_lsn, data_len), - recv_sys->scanned_lsn) > 0) - && (recv_sys->scanned_checkpoint_no > 0) - && (log_block_get_checkpoint_no(log_block) - < recv_sys->scanned_checkpoint_no) - && (recv_sys->scanned_checkpoint_no - - log_block_get_checkpoint_no(log_block) - > 0x80000000UL)) { - - /* Garbage from a log buffer flush which was made - before the most recent database recovery */ - - finished = TRUE; -#ifdef UNIV_LOG_DEBUG - /* This is not really an error, but currently - we stop here in the debug version: */ - - ut_error; -#endif - break; - } - - if (ut_dulint_is_zero(recv_sys->parse_start_lsn) - && (log_block_get_first_rec_group(log_block) > 0)) { - - /* We found a point from which to start the parsing - of log records */ - - recv_sys->parse_start_lsn - = ut_dulint_add(scanned_lsn, - log_block_get_first_rec_group( - log_block)); - recv_sys->scanned_lsn = recv_sys->parse_start_lsn; - recv_sys->recovered_lsn = recv_sys->parse_start_lsn; - } - - scanned_lsn = ut_dulint_add(scanned_lsn, data_len); - - if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) { - - /* We have found more entries. If this scan is - of startup type, we must initiate crash recovery - environment before parsing these log records. */ - - if (recv_log_scan_is_startup_type - && !recv_needed_recovery) { - - fprintf(stderr, - "InnoDB: Log scan progressed" - " past the checkpoint lsn %lu %lu\n", - (ulong) ut_dulint_get_high( - recv_sys->scanned_lsn), - (ulong) ut_dulint_get_low( - recv_sys->scanned_lsn)); - recv_init_crash_recovery(); - } - - /* We were able to find more log data: add it to the - parsing buffer if parse_start_lsn is already - non-zero */ - - if (recv_sys->len + 4 * OS_FILE_LOG_BLOCK_SIZE - >= RECV_PARSING_BUF_SIZE) { - fprintf(stderr, - "InnoDB: Error: log parsing" - " buffer overflow." - " Recovery may have failed!\n"); - - recv_sys->found_corrupt_log = TRUE; - - } else if (!recv_sys->found_corrupt_log) { - more_data = recv_sys_add_to_parsing_buf( - log_block, scanned_lsn); - } - - recv_sys->scanned_lsn = scanned_lsn; - recv_sys->scanned_checkpoint_no - = log_block_get_checkpoint_no(log_block); - } - - if (data_len < OS_FILE_LOG_BLOCK_SIZE) { - /* Log data for this group ends here */ - - finished = TRUE; - } else { - log_block += OS_FILE_LOG_BLOCK_SIZE; - } - } - - *group_scanned_lsn = scanned_lsn; - - if (recv_needed_recovery - || (recv_is_from_backup && !recv_is_making_a_backup)) { - recv_scan_print_counter++; - - if (finished || (recv_scan_print_counter % 80 == 0)) { - - fprintf(stderr, - "InnoDB: Doing recovery: scanned up to" - " log sequence number %lu %lu\n", - (ulong) ut_dulint_get_high(*group_scanned_lsn), - (ulong) ut_dulint_get_low(*group_scanned_lsn)); - } - } - - if (more_data && !recv_sys->found_corrupt_log) { - /* Try to parse more log records */ - - recv_parse_log_recs(store_to_hash); - - if (store_to_hash && mem_heap_get_size(recv_sys->heap) - > available_memory - && apply_automatically) { - - /* Hash table of log records has grown too big: - empty it; FALSE means no ibuf operations - allowed, as we cannot add new records to the - log yet: they would be produced by ibuf - operations */ - - recv_apply_hashed_log_recs(FALSE); - } - - if (recv_sys->recovered_offset > RECV_PARSING_BUF_SIZE / 4) { - /* Move parsing buffer data to the buffer start */ - - recv_sys_justify_left_parsing_buf(); - } - } - - return(finished); -} - -/*********************************************************** -Scans log from a buffer and stores new log data to the parsing buffer. Parses -and hashes the log records if new data found. */ -static -void -recv_group_scan_log_recs( -/*=====================*/ - log_group_t* group, /* in: log group */ - dulint* contiguous_lsn, /* in/out: it is known that all log groups - contain contiguous log data up to this lsn */ - dulint* group_scanned_lsn)/* out: scanning succeeded up to this lsn */ -{ - ibool finished; - dulint start_lsn; - dulint end_lsn; - - finished = FALSE; - - start_lsn = *contiguous_lsn; - - while (!finished) { - end_lsn = ut_dulint_add(start_lsn, RECV_SCAN_SIZE); - - log_group_read_log_seg(LOG_RECOVER, log_sys->buf, - group, start_lsn, end_lsn); - - finished = recv_scan_log_recs( - TRUE, (buf_pool->n_frames - recv_n_pool_free_frames) - * UNIV_PAGE_SIZE, TRUE, log_sys->buf, RECV_SCAN_SIZE, - start_lsn, contiguous_lsn, group_scanned_lsn); - start_lsn = end_lsn; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Scanned group %lu up to" - " log sequence number %lu %lu\n", - (ulong) group->id, - (ulong) ut_dulint_get_high(*group_scanned_lsn), - (ulong) ut_dulint_get_low(*group_scanned_lsn)); - } -#endif /* UNIV_DEBUG */ -} - -/*********************************************************** -Initialize crash recovery environment. Can be called iff -recv_needed_recovery == FALSE. */ -static -void -recv_init_crash_recovery(void) -/*==========================*/ -{ - ut_a(!recv_needed_recovery); - - recv_needed_recovery = TRUE; - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Database was not" - " shut down normally!\n" - "InnoDB: Starting crash recovery.\n"); - - fprintf(stderr, - "InnoDB: Reading tablespace information" - " from the .ibd files...\n"); - - fil_load_single_table_tablespaces(); - - /* If we are using the doublewrite method, we will - check if there are half-written pages in data files, - and restore them from the doublewrite buffer if - possible */ - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - - fprintf(stderr, - "InnoDB: Restoring possible" - " half-written data pages from" - " the doublewrite\n" - "InnoDB: buffer...\n"); - trx_sys_doublewrite_init_or_restore_pages(TRUE); - } -} - -/************************************************************ -Recovers from a checkpoint. When this function returns, the database is able -to start processing of new user transactions, but the function -recv_recovery_from_checkpoint_finish should be called later to complete -the recovery and free the resources used in it. */ - -ulint -recv_recovery_from_checkpoint_start( -/*================================*/ - /* out: error code or DB_SUCCESS */ - ulint type, /* in: LOG_CHECKPOINT or LOG_ARCHIVE */ - dulint limit_lsn, /* in: recover up to this lsn if possible */ - dulint min_flushed_lsn,/* in: min flushed lsn from data files */ - dulint max_flushed_lsn)/* in: max flushed lsn from data files */ -{ - log_group_t* group; - log_group_t* max_cp_group; - log_group_t* up_to_date_group; - ulint max_cp_field; - dulint checkpoint_lsn; - dulint checkpoint_no; - dulint old_scanned_lsn; - dulint group_scanned_lsn; - dulint contiguous_lsn; - dulint archived_lsn; - ulint capacity; - byte* buf; - byte log_hdr_buf[LOG_FILE_HDR_SIZE]; - ulint err; - - ut_ad((type != LOG_CHECKPOINT) - || (ut_dulint_cmp(limit_lsn, ut_dulint_max) == 0)); - - if (type == LOG_CHECKPOINT) { - recv_sys_create(); - recv_sys_init(FALSE, buf_pool_get_curr_size()); - } - - if (srv_force_recovery >= SRV_FORCE_NO_LOG_REDO) { - fprintf(stderr, - "InnoDB: The user has set SRV_FORCE_NO_LOG_REDO on\n"); - fprintf(stderr, - "InnoDB: Skipping log redo\n"); - - return(DB_SUCCESS); - } - - recv_recovery_on = TRUE; - - recv_sys->limit_lsn = limit_lsn; - - mutex_enter(&(log_sys->mutex)); - - /* Look for the latest checkpoint from any of the log groups */ - - err = recv_find_max_checkpoint(&max_cp_group, &max_cp_field); - - if (err != DB_SUCCESS) { - - mutex_exit(&(log_sys->mutex)); - - return(err); - } - - log_group_read_checkpoint_info(max_cp_group, max_cp_field); - - buf = log_sys->checkpoint_buf; - - checkpoint_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_LSN); - checkpoint_no = mach_read_from_8(buf + LOG_CHECKPOINT_NO); - archived_lsn = mach_read_from_8(buf + LOG_CHECKPOINT_ARCHIVED_LSN); - - /* Read the first log file header to print a note if this is - a recovery from a restored InnoDB Hot Backup */ - - fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, max_cp_group->space_id, - 0, 0, LOG_FILE_HDR_SIZE, - log_hdr_buf, max_cp_group); - - if (0 == ut_memcmp(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - (byte*)"ibbackup", (sizeof "ibbackup") - 1)) { - /* This log file was created by ibbackup --restore: print - a note to the user about it */ - - fprintf(stderr, - "InnoDB: The log file was created by" - " ibbackup --apply-log at\n" - "InnoDB: %s\n", - log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP); - fprintf(stderr, - "InnoDB: NOTE: the following crash recovery" - " is part of a normal restore.\n"); - - /* Wipe over the label now */ - - memset(log_hdr_buf + LOG_FILE_WAS_CREATED_BY_HOT_BACKUP, - ' ', 4); - /* Write to the log file to wipe over the label */ - fil_io(OS_FILE_WRITE | OS_FILE_LOG, TRUE, - max_cp_group->space_id, - 0, 0, OS_FILE_LOG_BLOCK_SIZE, - log_hdr_buf, max_cp_group); - } - -#ifdef UNIV_LOG_ARCHIVE - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - log_checkpoint_get_nth_group_info(buf, group->id, - &(group->archived_file_no), - &(group->archived_offset)); - - group = UT_LIST_GET_NEXT(log_groups, group); - } -#endif /* UNIV_LOG_ARCHIVE */ - - if (type == LOG_CHECKPOINT) { - /* Start reading the log groups from the checkpoint lsn up. The - variable contiguous_lsn contains an lsn up to which the log is - known to be contiguously written to all log groups. */ - - recv_sys->parse_start_lsn = checkpoint_lsn; - recv_sys->scanned_lsn = checkpoint_lsn; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = checkpoint_lsn; - - srv_start_lsn = checkpoint_lsn; - } - - contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn, - OS_FILE_LOG_BLOCK_SIZE); - if (type == LOG_ARCHIVE) { - /* Try to recover the remaining part from logs: first from - the logs of the archived group */ - - group = recv_sys->archive_group; - capacity = log_group_get_capacity(group); - - if ((ut_dulint_cmp(recv_sys->scanned_lsn, ut_dulint_add( - checkpoint_lsn, capacity)) > 0) - || (ut_dulint_cmp(checkpoint_lsn, ut_dulint_add( - recv_sys->scanned_lsn, capacity)) - > 0)) { - - mutex_exit(&(log_sys->mutex)); - - /* The group does not contain enough log: probably - an archived log file was missing or corrupt */ - - return(DB_ERROR); - } - - recv_group_scan_log_recs(group, &contiguous_lsn, - &group_scanned_lsn); - if (ut_dulint_cmp(recv_sys->scanned_lsn, checkpoint_lsn) < 0) { - - mutex_exit(&(log_sys->mutex)); - - /* The group did not contain enough log: an archived - log file was missing or invalid, or the log group - was corrupt */ - - return(DB_ERROR); - } - - group->scanned_lsn = group_scanned_lsn; - up_to_date_group = group; - } else { - up_to_date_group = max_cp_group; - } - - ut_ad(RECV_SCAN_SIZE <= log_sys->buf_size); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - if ((type == LOG_ARCHIVE) && (group == recv_sys->archive_group)) { - group = UT_LIST_GET_NEXT(log_groups, group); - } - - /* Set the flag to publish that we are doing startup scan. */ - recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT); - while (group) { - old_scanned_lsn = recv_sys->scanned_lsn; - - recv_group_scan_log_recs(group, &contiguous_lsn, - &group_scanned_lsn); - group->scanned_lsn = group_scanned_lsn; - - if (ut_dulint_cmp(old_scanned_lsn, group_scanned_lsn) < 0) { - /* We found a more up-to-date group */ - - up_to_date_group = group; - } - - if ((type == LOG_ARCHIVE) - && (group == recv_sys->archive_group)) { - group = UT_LIST_GET_NEXT(log_groups, group); - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - /* Done with startup scan. Clear the flag. */ - recv_log_scan_is_startup_type = FALSE; - if (type == LOG_CHECKPOINT) { - /* NOTE: we always do a 'recovery' at startup, but only if - there is something wrong we will print a message to the - user about recovery: */ - - if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0 - || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) { - - if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) - < 0) { - fprintf(stderr, - "InnoDB: #########################" - "#################################\n" - "InnoDB: " - "WARNING!\n" - "InnoDB: The log sequence number" - " in ibdata files is higher\n" - "InnoDB: than the log sequence number" - " in the ib_logfiles! Are you sure\n" - "InnoDB: you are using the right" - " ib_logfiles to start up" - " the database?\n" - "InnoDB: Log sequence number in" - " ib_logfiles is %lu %lu, log\n" - "InnoDB: sequence numbers stamped" - " to ibdata file headers are between\n" - "InnoDB: %lu %lu and %lu %lu.\n" - "InnoDB: #########################" - "#################################\n", - (ulong) ut_dulint_get_high( - checkpoint_lsn), - (ulong) ut_dulint_get_low( - checkpoint_lsn), - (ulong) ut_dulint_get_high( - min_flushed_lsn), - (ulong) ut_dulint_get_low( - min_flushed_lsn), - (ulong) ut_dulint_get_high( - max_flushed_lsn), - (ulong) ut_dulint_get_low( - max_flushed_lsn)); - - - } - - if (!recv_needed_recovery) { - fprintf(stderr, - "InnoDB: The log sequence number" - " in ibdata files does not match\n" - "InnoDB: the log sequence number" - " in the ib_logfiles!\n"); - recv_init_crash_recovery(); - } - - } - if (!recv_needed_recovery) { - /* Init the doublewrite buffer memory structure */ - trx_sys_doublewrite_init_or_restore_pages(FALSE); - } - } - - /* We currently have only one log group */ - if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: We were only able to scan the log" - " up to\n" - "InnoDB: %lu %lu, but a checkpoint was at %lu %lu.\n" - "InnoDB: It is possible that" - " the database is now corrupt!\n", - (ulong) ut_dulint_get_high(group_scanned_lsn), - (ulong) ut_dulint_get_low(group_scanned_lsn), - (ulong) ut_dulint_get_high(checkpoint_lsn), - (ulong) ut_dulint_get_low(checkpoint_lsn)); - } - - if (ut_dulint_cmp(group_scanned_lsn, recv_max_page_lsn) < 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: We were only able to scan the log" - " up to %lu %lu\n" - "InnoDB: but a database page a had an lsn %lu %lu." - " It is possible that the\n" - "InnoDB: database is now corrupt!\n", - (ulong) ut_dulint_get_high(group_scanned_lsn), - (ulong) ut_dulint_get_low(group_scanned_lsn), - (ulong) ut_dulint_get_high(recv_max_page_lsn), - (ulong) ut_dulint_get_low(recv_max_page_lsn)); - } - - if (ut_dulint_cmp(recv_sys->recovered_lsn, checkpoint_lsn) < 0) { - - mutex_exit(&(log_sys->mutex)); - - if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) >= 0) { - - return(DB_SUCCESS); - } - - ut_error; - - return(DB_ERROR); - } - - /* Synchronize the uncorrupted log groups to the most up-to-date log - group; we also copy checkpoint info to groups */ - - log_sys->next_checkpoint_lsn = checkpoint_lsn; - log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1); - -#ifdef UNIV_LOG_ARCHIVE - log_sys->archived_lsn = archived_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - - recv_synchronize_groups(up_to_date_group); - - if (!recv_needed_recovery) { - ut_a(ut_dulint_cmp(checkpoint_lsn, - recv_sys->recovered_lsn) == 0); - - } else { - srv_start_lsn = recv_sys->recovered_lsn; - } - - log_sys->lsn = recv_sys->recovered_lsn; - - ut_memcpy(log_sys->buf, recv_sys->last_block, OS_FILE_LOG_BLOCK_SIZE); - - log_sys->buf_free = ut_dulint_get_low(log_sys->lsn) - % OS_FILE_LOG_BLOCK_SIZE; - log_sys->buf_next_to_write = log_sys->buf_free; - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->last_checkpoint_lsn = checkpoint_lsn; - - log_sys->next_checkpoint_no = ut_dulint_add(checkpoint_no, 1); - -#ifdef UNIV_LOG_ARCHIVE - if (ut_dulint_cmp(archived_lsn, ut_dulint_max) == 0) { - - log_sys->archiving_state = LOG_ARCH_OFF; - } -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_enter(&(recv_sys->mutex)); - - recv_sys->apply_log_recs = TRUE; - - mutex_exit(&(recv_sys->mutex)); - - mutex_exit(&(log_sys->mutex)); - - recv_lsn_checks_on = TRUE; - - /* The database is now ready to start almost normal processing of user - transactions: transaction rollbacks and the application of the log - records in the hash table can be run in background. */ - - return(DB_SUCCESS); -} - -/************************************************************ -Completes recovery from a checkpoint. */ - -void -recv_recovery_from_checkpoint_finish(void) -/*======================================*/ -{ - int i; - - /* Apply the hashed log records to the respective file pages */ - - if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) { - - recv_apply_hashed_log_recs(TRUE); - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Log records applied to the database\n"); - } -#endif /* UNIV_DEBUG */ - - if (recv_needed_recovery) { - trx_sys_print_mysql_master_log_pos(); - trx_sys_print_mysql_binlog_offset(); - } - - if (recv_sys->found_corrupt_log) { - - fprintf(stderr, - "InnoDB: WARNING: the log file may have been" - " corrupt and it\n" - "InnoDB: is possible that the log scan or parsing" - " did not proceed\n" - "InnoDB: far enough in recovery. Please run" - " CHECK TABLE\n" - "InnoDB: on your InnoDB tables to check that" - " they are ok!\n" - "InnoDB: It may be safest to recover your" - " InnoDB database from\n" - "InnoDB: a backup!\n"); - } - - /* Free the resources of the recovery system */ - - recv_recovery_on = FALSE; - -#ifndef UNIV_LOG_DEBUG - recv_sys_free(); -#endif - -#ifdef UNIV_SYNC_DEBUG - /* Wait for a while so that created threads have time to suspend - themselves before we switch the latching order checks on */ - os_thread_sleep(1000000); - - /* Switch latching order checks on in sync0sync.c */ - sync_order_checks_on = TRUE; -#endif - if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) { - /* Rollback the uncommitted transactions which have no user - session */ - - os_thread_create(trx_rollback_or_clean_all_without_sess, - (void *)&i, NULL); - } -} - -/********************************************************** -Resets the logs. The contents of log files will be lost! */ - -void -recv_reset_logs( -/*============*/ - dulint lsn, /* in: reset to this lsn rounded up to - be divisible by OS_FILE_LOG_BLOCK_SIZE, - after which we add LOG_BLOCK_HDR_SIZE */ -#ifdef UNIV_LOG_ARCHIVE - ulint arch_log_no, /* in: next archived log file number */ -#endif /* UNIV_LOG_ARCHIVE */ - ibool new_logs_created)/* in: TRUE if resetting logs is done - at the log creation; FALSE if it is done - after archive recovery */ -{ - log_group_t* group; - - ut_ad(mutex_own(&(log_sys->mutex))); - - log_sys->lsn = ut_dulint_align_up(lsn, OS_FILE_LOG_BLOCK_SIZE); - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - group->lsn = log_sys->lsn; - group->lsn_offset = LOG_FILE_HDR_SIZE; -#ifdef UNIV_LOG_ARCHIVE - group->archived_file_no = arch_log_no; - group->archived_offset = 0; -#endif /* UNIV_LOG_ARCHIVE */ - - if (!new_logs_created) { - recv_truncate_group(group, group->lsn, group->lsn, - group->lsn, group->lsn); - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - log_sys->buf_next_to_write = 0; - log_sys->written_to_some_lsn = log_sys->lsn; - log_sys->written_to_all_lsn = log_sys->lsn; - - log_sys->next_checkpoint_no = ut_dulint_zero; - log_sys->last_checkpoint_lsn = ut_dulint_zero; - -#ifdef UNIV_LOG_ARCHIVE - log_sys->archived_lsn = log_sys->lsn; -#endif /* UNIV_LOG_ARCHIVE */ - - log_block_init(log_sys->buf, log_sys->lsn); - log_block_set_first_rec_group(log_sys->buf, LOG_BLOCK_HDR_SIZE); - - log_sys->buf_free = LOG_BLOCK_HDR_SIZE; - log_sys->lsn = ut_dulint_add(log_sys->lsn, LOG_BLOCK_HDR_SIZE); - - mutex_exit(&(log_sys->mutex)); - - /* Reset the checkpoint fields in logs */ - - log_make_checkpoint_at(ut_dulint_max, TRUE); - log_make_checkpoint_at(ut_dulint_max, TRUE); - - mutex_enter(&(log_sys->mutex)); -} - -#ifdef UNIV_HOTBACKUP -/********************************************************** -Creates new log files after a backup has been restored. */ - -void -recv_reset_log_files_for_backup( -/*============================*/ - const char* log_dir, /* in: log file directory path */ - ulint n_log_files, /* in: number of log files */ - ulint log_file_size, /* in: log file size */ - dulint lsn) /* in: new start lsn, must be - divisible by OS_FILE_LOG_BLOCK_SIZE */ -{ - os_file_t log_file; - ibool success; - byte* buf; - ulint i; - ulint log_dir_len; - char name[5000]; - static const char ib_logfile_basename[] = "ib_logfile"; - - log_dir_len = strlen(log_dir); - /* full path name of ib_logfile consists of log dir path + basename - + number. This must fit in the name buffer. - */ - ut_a(log_dir_len + strlen(ib_logfile_basename) + 11 < sizeof(name)); - - buf = ut_malloc(LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - memset(buf, '\0', LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - - for (i = 0; i < n_log_files; i++) { - - sprintf(name, "%s%s%lu", log_dir, - ib_logfile_basename, (ulong)i); - - log_file = os_file_create_simple(name, OS_FILE_CREATE, - OS_FILE_READ_WRITE, &success); - if (!success) { - fprintf(stderr, - "InnoDB: Cannot create %s. Check that" - " the file does not exist yet.\n", name); - - exit(1); - } - - fprintf(stderr, - "Setting log file size to %lu %lu\n", - (ulong) ut_get_high32(log_file_size), - (ulong) log_file_size & 0xFFFFFFFFUL); - - success = os_file_set_size(name, log_file, - log_file_size & 0xFFFFFFFFUL, - ut_get_high32(log_file_size)); - - if (!success) { - fprintf(stderr, - "InnoDB: Cannot set %s size to %lu %lu\n", - name, (ulong) ut_get_high32(log_file_size), - (ulong) (log_file_size & 0xFFFFFFFFUL)); - exit(1); - } - - os_file_flush(log_file); - os_file_close(log_file); - } - - /* We pretend there is a checkpoint at lsn + LOG_BLOCK_HDR_SIZE */ - - log_reset_first_header_and_checkpoint(buf, lsn); - - log_block_init_in_old_format(buf + LOG_FILE_HDR_SIZE, lsn); - log_block_set_first_rec_group(buf + LOG_FILE_HDR_SIZE, - LOG_BLOCK_HDR_SIZE); - sprintf(name, "%s%s%lu", log_dir, ib_logfile_basename, (ulong)0); - - log_file = os_file_create_simple(name, OS_FILE_OPEN, - OS_FILE_READ_WRITE, &success); - if (!success) { - fprintf(stderr, "InnoDB: Cannot open %s.\n", name); - - exit(1); - } - - os_file_write(name, log_file, buf, 0, 0, - LOG_FILE_HDR_SIZE + OS_FILE_LOG_BLOCK_SIZE); - os_file_flush(log_file); - os_file_close(log_file); - - ut_free(buf); -} -#endif /* UNIV_HOTBACKUP */ - -#ifdef UNIV_LOG_ARCHIVE -/********************************************************** -Reads from the archive of a log group and performs recovery. */ -static -ibool -log_group_recover_from_archive_file( -/*================================*/ - /* out: TRUE if no more complete - consistent archive files */ - log_group_t* group) /* in: log group */ -{ - os_file_t file_handle; - dulint start_lsn; - dulint file_end_lsn; - dulint dummy_lsn; - dulint scanned_lsn; - ulint len; - ibool ret; - byte* buf; - ulint read_offset; - ulint file_size; - ulint file_size_high; - int input_char; - char name[10000]; - - ut_a(0); - -try_open_again: - buf = log_sys->buf; - - /* Add the file to the archive file space; open the file */ - - log_archived_file_name_gen(name, group->id, group->archived_file_no); - - file_handle = os_file_create(name, OS_FILE_OPEN, - OS_FILE_LOG, OS_FILE_AIO, &ret); - - if (ret == FALSE) { -ask_again: - fprintf(stderr, - "InnoDB: Do you want to copy additional" - " archived log files\n" - "InnoDB: to the directory\n"); - fprintf(stderr, - "InnoDB: or were these all the files needed" - " in recovery?\n"); - fprintf(stderr, - "InnoDB: (Y == copy more files; N == this is all)?"); - - input_char = getchar(); - - if (input_char == (int) 'N') { - - return(TRUE); - } else if (input_char == (int) 'Y') { - - goto try_open_again; - } else { - goto ask_again; - } - } - - ret = os_file_get_size(file_handle, &file_size, &file_size_high); - ut_a(ret); - - ut_a(file_size_high == 0); - - fprintf(stderr, "InnoDB: Opened archived log file %s\n", name); - - ret = os_file_close(file_handle); - - if (file_size < LOG_FILE_HDR_SIZE) { - fprintf(stderr, - "InnoDB: Archive file header incomplete %s\n", name); - - return(TRUE); - } - - ut_a(ret); - - /* Add the archive file as a node to the space */ - - fil_node_create(name, 1 + file_size / UNIV_PAGE_SIZE, - group->archive_space_id, FALSE); -#if RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE -# error "RECV_SCAN_SIZE < LOG_FILE_HDR_SIZE" -#endif - - /* Read the archive file header */ - fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, group->archive_space_id, 0, 0, - LOG_FILE_HDR_SIZE, buf, NULL); - - /* Check if the archive file header is consistent */ - - if (mach_read_from_4(buf + LOG_GROUP_ID) != group->id - || mach_read_from_4(buf + LOG_FILE_NO) - != group->archived_file_no) { - fprintf(stderr, - "InnoDB: Archive file header inconsistent %s\n", name); - - return(TRUE); - } - - if (!mach_read_from_4(buf + LOG_FILE_ARCH_COMPLETED)) { - fprintf(stderr, - "InnoDB: Archive file not completely written %s\n", - name); - - return(TRUE); - } - - start_lsn = mach_read_from_8(buf + LOG_FILE_START_LSN); - file_end_lsn = mach_read_from_8(buf + LOG_FILE_END_LSN); - - if (ut_dulint_is_zero(recv_sys->scanned_lsn)) { - - if (ut_dulint_cmp(recv_sys->parse_start_lsn, start_lsn) < 0) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " starts from too big a lsn\n", - name); - return(TRUE); - } - - recv_sys->scanned_lsn = start_lsn; - } - - if (ut_dulint_cmp(recv_sys->scanned_lsn, start_lsn) != 0) { - - fprintf(stderr, - "InnoDB: Archive log file %s starts from" - " a wrong lsn\n", - name); - return(TRUE); - } - - read_offset = LOG_FILE_HDR_SIZE; - - for (;;) { - len = RECV_SCAN_SIZE; - - if (read_offset + len > file_size) { - len = ut_calc_align_down(file_size - read_offset, - OS_FILE_LOG_BLOCK_SIZE); - } - - if (len == 0) { - - break; - } - -#ifdef UNIV_DEBUG - if (log_debug_writes) { - fprintf(stderr, - "InnoDB: Archive read starting at" - " lsn %lu %lu, len %lu from file %s\n", - (ulong) ut_dulint_get_high(start_lsn), - (ulong) ut_dulint_get_low(start_lsn), - (ulong) len, name); - } -#endif /* UNIV_DEBUG */ - - fil_io(OS_FILE_READ | OS_FILE_LOG, TRUE, - group->archive_space_id, read_offset / UNIV_PAGE_SIZE, - read_offset % UNIV_PAGE_SIZE, len, buf, NULL); - - ret = recv_scan_log_recs( - TRUE, (buf_pool->n_frames - recv_n_pool_free_frames) - * UNIV_PAGE_SIZE, TRUE, buf, len, start_lsn, - &dummy_lsn, &scanned_lsn); - - if (ut_dulint_cmp(scanned_lsn, file_end_lsn) == 0) { - - return(FALSE); - } - - if (ret) { - fprintf(stderr, - "InnoDB: Archive log file %s" - " does not scan right\n", - name); - return(TRUE); - } - - read_offset += len; - start_lsn = ut_dulint_add(start_lsn, len); - - ut_ad(ut_dulint_cmp(start_lsn, scanned_lsn) == 0); - } - - return(FALSE); -} - -/************************************************************ -Recovers from archived log files, and also from log files, if they exist. */ - -ulint -recv_recovery_from_archive_start( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - dulint min_flushed_lsn,/* in: min flushed lsn field from the - data files */ - dulint limit_lsn, /* in: recover up to this lsn if possible */ - ulint first_log_no) /* in: number of the first archived log file - to use in the recovery; the file will be - searched from INNOBASE_LOG_ARCH_DIR specified - in server config file */ -{ - log_group_t* group; - ulint group_id; - ulint trunc_len; - ibool ret; - ulint err; - - ut_a(0); - - recv_sys_create(); - recv_sys_init(FALSE, buf_pool_get_curr_size()); - - recv_recovery_on = TRUE; - recv_recovery_from_backup_on = TRUE; - - recv_sys->limit_lsn = limit_lsn; - - group_id = 0; - - group = UT_LIST_GET_FIRST(log_sys->log_groups); - - while (group) { - if (group->id == group_id) { - - break; - } - - group = UT_LIST_GET_NEXT(log_groups, group); - } - - if (!group) { - fprintf(stderr, - "InnoDB: There is no log group defined with id %lu!\n", - (ulong) group_id); - return(DB_ERROR); - } - - group->archived_file_no = first_log_no; - - recv_sys->parse_start_lsn = min_flushed_lsn; - - recv_sys->scanned_lsn = ut_dulint_zero; - recv_sys->scanned_checkpoint_no = 0; - recv_sys->recovered_lsn = recv_sys->parse_start_lsn; - - recv_sys->archive_group = group; - - ret = FALSE; - - mutex_enter(&(log_sys->mutex)); - - while (!ret) { - ret = log_group_recover_from_archive_file(group); - - /* Close and truncate a possible processed archive file - from the file space */ - - trunc_len = UNIV_PAGE_SIZE - * fil_space_get_size(group->archive_space_id); - if (trunc_len > 0) { - fil_space_truncate_start(group->archive_space_id, - trunc_len); - } - - group->archived_file_no++; - } - - if (ut_dulint_cmp(recv_sys->recovered_lsn, limit_lsn) < 0) { - - if (ut_dulint_is_zero(recv_sys->scanned_lsn)) { - - recv_sys->scanned_lsn = recv_sys->parse_start_lsn; - } - - mutex_exit(&(log_sys->mutex)); - - err = recv_recovery_from_checkpoint_start(LOG_ARCHIVE, - limit_lsn, - ut_dulint_max, - ut_dulint_max); - if (err != DB_SUCCESS) { - - return(err); - } - - mutex_enter(&(log_sys->mutex)); - } - - if (ut_dulint_cmp(limit_lsn, ut_dulint_max) != 0) { - - recv_apply_hashed_log_recs(FALSE); - - recv_reset_logs(recv_sys->recovered_lsn, 0, FALSE); - } - - mutex_exit(&(log_sys->mutex)); - - return(DB_SUCCESS); -} - -/************************************************************ -Completes recovery from archive. */ - -void -recv_recovery_from_archive_finish(void) -/*===================================*/ -{ - recv_recovery_from_checkpoint_finish(); - - recv_recovery_from_backup_on = FALSE; -} -#endif /* UNIV_LOG_ARCHIVE */ diff --git a/storage/innobase/mach/mach0data.c b/storage/innobase/mach/mach0data.c deleted file mode 100644 index b92293fd037..00000000000 --- a/storage/innobase/mach/mach0data.c +++ /dev/null @@ -1,119 +0,0 @@ -/********************************************************************** -Utilities for converting data from the database file -to the machine format. - -(c) 1995 Innobase Oy - -Created 11/28/1995 Heikki Tuuri -***********************************************************************/ - -#include "mach0data.h" - -#ifdef UNIV_NONINL -#include "mach0data.ic" -#endif - -/************************************************************* -Reads a ulint in a compressed form if the log record fully contains it. */ - -byte* -mach_parse_compressed( -/*==================*/ - /* out: pointer to end of the stored field, NULL if - not complete */ - byte* ptr, /* in: pointer to buffer from where to read */ - byte* end_ptr,/* in: pointer to end of the buffer */ - ulint* val) /* out: read value (< 2^32) */ -{ - ulint flag; - - ut_ad(ptr && end_ptr && val); - - if (ptr >= end_ptr) { - - return(NULL); - } - - flag = mach_read_from_1(ptr); - - if (flag < 0x80UL) { - *val = flag; - return(ptr + 1); - - } else if (flag < 0xC0UL) { - if (end_ptr < ptr + 2) { - return(NULL); - } - - *val = mach_read_from_2(ptr) & 0x7FFFUL; - - return(ptr + 2); - - } else if (flag < 0xE0UL) { - if (end_ptr < ptr + 3) { - return(NULL); - } - - *val = mach_read_from_3(ptr) & 0x3FFFFFUL; - - return(ptr + 3); - } else if (flag < 0xF0UL) { - if (end_ptr < ptr + 4) { - return(NULL); - } - - *val = mach_read_from_4(ptr) & 0x1FFFFFFFUL; - - return(ptr + 4); - } else { - ut_ad(flag == 0xF0UL); - - if (end_ptr < ptr + 5) { - return(NULL); - } - - *val = mach_read_from_4(ptr + 1); - return(ptr + 5); - } -} - -/************************************************************* -Reads a dulint in a compressed form if the log record fully contains it. */ - -byte* -mach_dulint_parse_compressed( -/*=========================*/ - /* out: pointer to end of the stored field, NULL if - not complete */ - byte* ptr, /* in: pointer to buffer from where to read */ - byte* end_ptr,/* in: pointer to end of the buffer */ - dulint* val) /* out: read value */ -{ - ulint high; - ulint low; - ulint size; - - ut_ad(ptr && end_ptr && val); - - if (end_ptr < ptr + 5) { - - return(NULL); - } - - high = mach_read_compressed(ptr); - - size = mach_get_compressed_size(high); - - ptr += size; - - if (end_ptr < ptr + 4) { - - return(NULL); - } - - low = mach_read_from_4(ptr); - - *val = ut_dulint_create(high, low); - - return(ptr + 4); -} diff --git a/storage/innobase/mem/mem0dbg.c b/storage/innobase/mem/mem0dbg.c deleted file mode 100644 index 72452907c3f..00000000000 --- a/storage/innobase/mem/mem0dbg.c +++ /dev/null @@ -1,984 +0,0 @@ -/************************************************************************ -The memory management: the debug code. This is not a compilation module, -but is included in mem0mem.* ! - -(c) 1994, 1995 Innobase Oy - -Created 6/9/1994 Heikki Tuuri -*************************************************************************/ - -#ifdef UNIV_MEM_DEBUG -mutex_t mem_hash_mutex; /* The mutex which protects in the - debug version the hash table containing - the list of live memory heaps, and - also the global variables below. */ - -/* The following variables contain information about the -extent of memory allocations. Only used in the debug version. -Protected by mem_hash_mutex above. */ - -static ulint mem_n_created_heaps = 0; -static ulint mem_n_allocations = 0; -static ulint mem_total_allocated_memory = 0; -ulint mem_current_allocated_memory = 0; -static ulint mem_max_allocated_memory = 0; -static ulint mem_last_print_info = 0; - -/* Size of the hash table for memory management tracking */ -#define MEM_HASH_SIZE 997 - -/* The node of the list containing currently allocated memory heaps */ - -typedef struct mem_hash_node_struct mem_hash_node_t; -struct mem_hash_node_struct { - UT_LIST_NODE_T(mem_hash_node_t) - list; /* hash list node */ - mem_heap_t* heap; /* memory heap */ - const char* file_name;/* file where heap was created*/ - ulint line; /* file line of creation */ - ulint nth_heap;/* this is the nth heap created */ - UT_LIST_NODE_T(mem_hash_node_t) - all_list;/* list of all created heaps */ -}; - -typedef UT_LIST_BASE_NODE_T(mem_hash_node_t) mem_hash_cell_t; - -/* The hash table of allocated heaps */ -static mem_hash_cell_t mem_hash_table[MEM_HASH_SIZE]; - -/* The base node of the list of all allocated heaps */ -static mem_hash_cell_t mem_all_list_base; - -static ibool mem_hash_initialized = FALSE; - - -UNIV_INLINE -mem_hash_cell_t* -mem_hash_get_nth_cell(ulint i); - -/* Accessor function for the hash table. Returns a pointer to the -table cell. */ -UNIV_INLINE -mem_hash_cell_t* -mem_hash_get_nth_cell(ulint i) -{ - ut_a(i < MEM_HASH_SIZE); - - return(&(mem_hash_table[i])); -} - -/* Accessor functions for a memory field in the debug version */ - -void -mem_field_header_set_len(byte* field, ulint len) -{ - mach_write_to_4(field - 2 * sizeof(ulint), len); -} - -ulint -mem_field_header_get_len(byte* field) -{ - return(mach_read_from_4(field - 2 * sizeof(ulint))); -} - -void -mem_field_header_set_check(byte* field, ulint check) -{ - mach_write_to_4(field - sizeof(ulint), check); -} - -ulint -mem_field_header_get_check(byte* field) -{ - return(mach_read_from_4(field - sizeof(ulint))); -} - -void -mem_field_trailer_set_check(byte* field, ulint check) -{ - mach_write_to_4(field + mem_field_header_get_len(field), check); -} - -ulint -mem_field_trailer_get_check(byte* field) -{ - return(mach_read_from_4(field - + mem_field_header_get_len(field))); -} -#endif /* UNIV_MEM_DEBUG */ - -/********************************************************************** -Initializes the memory system. */ - -void -mem_init( -/*=====*/ - ulint size) /* in: common pool size in bytes */ -{ -#ifdef UNIV_MEM_DEBUG - - ulint i; - - /* Initialize the hash table */ - ut_a(FALSE == mem_hash_initialized); - - mutex_create(&mem_hash_mutex, SYNC_MEM_HASH); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - UT_LIST_INIT(*mem_hash_get_nth_cell(i)); - } - - UT_LIST_INIT(mem_all_list_base); - - mem_hash_initialized = TRUE; -#endif - - mem_comm_pool = mem_pool_create(size); -} - -#ifdef UNIV_MEM_DEBUG -/********************************************************************** -Initializes an allocated memory field in the debug version. */ - -void -mem_field_init( -/*===========*/ - byte* buf, /* in: memory field */ - ulint n) /* in: how many bytes the user requested */ -{ - ulint rnd; - byte* usr_buf; - - usr_buf = buf + MEM_FIELD_HEADER_SIZE; - - /* In the debug version write the length field and the - check fields to the start and the end of the allocated storage. - The field header consists of a length field and - a random number field, in this order. The field trailer contains - the same random number as a check field. */ - - mem_field_header_set_len(usr_buf, n); - - rnd = ut_rnd_gen_ulint(); - - mem_field_header_set_check(usr_buf, rnd); - mem_field_trailer_set_check(usr_buf, rnd); - - /* Update the memory allocation information */ - - mutex_enter(&mem_hash_mutex); - - mem_total_allocated_memory += n; - mem_current_allocated_memory += n; - mem_n_allocations++; - - if (mem_current_allocated_memory > mem_max_allocated_memory) { - mem_max_allocated_memory = mem_current_allocated_memory; - } - - mutex_exit(&mem_hash_mutex); - - /* In the debug version set the buffer to a random - combination of 0xBA and 0xBE */ - - mem_init_buf(usr_buf, n); -} - -/********************************************************************** -Erases an allocated memory field in the debug version. */ - -void -mem_field_erase( -/*============*/ - byte* buf, /* in: memory field */ - ulint n __attribute__((unused))) - /* in: how many bytes the user requested */ -{ - byte* usr_buf; - - usr_buf = buf + MEM_FIELD_HEADER_SIZE; - - mutex_enter(&mem_hash_mutex); - mem_current_allocated_memory -= n; - mutex_exit(&mem_hash_mutex); - - /* Check that the field lengths agree */ - ut_ad(n == (ulint)mem_field_header_get_len(usr_buf)); - - /* In the debug version, set the freed space to a random - combination of 0xDE and 0xAD */ - - mem_erase_buf(buf, MEM_SPACE_NEEDED(n)); -} - -/******************************************************************* -Initializes a buffer to a random combination of hex BA and BE. -Used to initialize allocated memory. */ - -void -mem_init_buf( -/*=========*/ - byte* buf, /* in: pointer to buffer */ - ulint n) /* in: length of buffer */ -{ - byte* ptr; - - UNIV_MEM_ASSERT_W(buf, n); - - for (ptr = buf; ptr < buf + n; ptr++) { - - if (ut_rnd_gen_ibool()) { - *ptr = 0xBA; - } else { - *ptr = 0xBE; - } - } - - UNIV_MEM_INVALID(buf, n); -} - -/******************************************************************* -Initializes a buffer to a random combination of hex DE and AD. -Used to erase freed memory.*/ - -void -mem_erase_buf( -/*==========*/ - byte* buf, /* in: pointer to buffer */ - ulint n) /* in: length of buffer */ -{ - byte* ptr; - - UNIV_MEM_ASSERT_W(buf, n); - - for (ptr = buf; ptr < buf + n; ptr++) { - if (ut_rnd_gen_ibool()) { - *ptr = 0xDE; - } else { - *ptr = 0xAD; - } - } - - UNIV_MEM_FREE(buf, n); -} - -/******************************************************************* -Inserts a created memory heap to the hash table of current allocated -memory heaps. */ - -void -mem_hash_insert( -/*============*/ - mem_heap_t* heap, /* in: the created heap */ - const char* file_name, /* in: file name of creation */ - ulint line) /* in: line where created */ -{ - mem_hash_node_t* new_node; - ulint cell_no ; - - ut_ad(mem_heap_check(heap)); - - mutex_enter(&mem_hash_mutex); - - cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE); - - /* Allocate a new node to the list */ - new_node = ut_malloc(sizeof(mem_hash_node_t)); - - new_node->heap = heap; - new_node->file_name = file_name; - new_node->line = line; - new_node->nth_heap = mem_n_created_heaps; - - /* Insert into lists */ - UT_LIST_ADD_FIRST(list, *mem_hash_get_nth_cell(cell_no), new_node); - - UT_LIST_ADD_LAST(all_list, mem_all_list_base, new_node); - - mem_n_created_heaps++; - - mutex_exit(&mem_hash_mutex); -} - -/******************************************************************* -Removes a memory heap (which is going to be freed by the caller) -from the list of live memory heaps. Returns the size of the heap -in terms of how much memory in bytes was allocated for the user of -the heap (not the total space occupied by the heap). -Also validates the heap. -NOTE: This function does not free the storage occupied by the -heap itself, only the node in the list of heaps. */ - -void -mem_hash_remove( -/*============*/ - mem_heap_t* heap, /* in: the heap to be freed */ - const char* file_name, /* in: file name of freeing */ - ulint line) /* in: line where freed */ -{ - mem_hash_node_t* node; - ulint cell_no; - ibool error; - ulint size; - - ut_ad(mem_heap_check(heap)); - - mutex_enter(&mem_hash_mutex); - - cell_no = ut_hash_ulint((ulint)heap, MEM_HASH_SIZE); - - /* Look for the heap in the hash table list */ - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(cell_no)); - - while (node != NULL) { - if (node->heap == heap) { - - break; - } - - node = UT_LIST_GET_NEXT(list, node); - } - - if (node == NULL) { - fprintf(stderr, - "Memory heap or buffer freed in %s line %lu" - " did not exist.\n", - file_name, (ulong) line); - ut_error; - } - - /* Remove from lists */ - UT_LIST_REMOVE(list, *mem_hash_get_nth_cell(cell_no), node); - - UT_LIST_REMOVE(all_list, mem_all_list_base, node); - - /* Validate the heap which will be freed */ - mem_heap_validate_or_print(node->heap, NULL, FALSE, &error, &size, - NULL, NULL); - if (error) { - fprintf(stderr, - "Inconsistency in memory heap or" - " buffer n:o %lu created\n" - "in %s line %lu and tried to free in %s line %lu.\n" - "Hex dump of 400 bytes around memory heap" - " first block start:\n", - node->nth_heap, node->file_name, (ulong) node->line, - file_name, (ulong) line); - ut_print_buf(stderr, (byte*)node->heap - 200, 400); - fputs("\nDump of the mem heap:\n", stderr); - mem_heap_validate_or_print(node->heap, NULL, TRUE, &error, - &size, NULL, NULL); - ut_error; - } - - /* Free the memory occupied by the node struct */ - ut_free(node); - - mem_current_allocated_memory -= size; - - mutex_exit(&mem_hash_mutex); -} -#endif /* UNIV_MEM_DEBUG */ - -#if defined UNIV_MEM_DEBUG || defined UNIV_DEBUG -/******************************************************************* -Checks a memory heap for consistency and prints the contents if requested. -Outputs the sum of sizes of buffers given to the user (only in -the debug version), the physical size of the heap and the number of -blocks in the heap. In case of error returns 0 as sizes and number -of blocks. */ - -void -mem_heap_validate_or_print( -/*=======================*/ - mem_heap_t* heap, /* in: memory heap */ - byte* top __attribute__((unused)), - /* in: calculate and validate only until - this top pointer in the heap is reached, - if this pointer is NULL, ignored */ - ibool print, /* in: if TRUE, prints the contents - of the heap; works only in - the debug version */ - ibool* error, /* out: TRUE if error */ - ulint* us_size,/* out: allocated memory - (for the user) in the heap, - if a NULL pointer is passed as this - argument, it is ignored; in the - non-debug version this is always -1 */ - ulint* ph_size,/* out: physical size of the heap, - if a NULL pointer is passed as this - argument, it is ignored */ - ulint* n_blocks) /* out: number of blocks in the heap, - if a NULL pointer is passed as this - argument, it is ignored */ -{ - mem_block_t* block; - ulint total_len = 0; - ulint block_count = 0; - ulint phys_len = 0; -#ifdef UNIV_MEM_DEBUG - ulint len; - byte* field; - byte* user_field; - ulint check_field; -#endif - - /* Pessimistically, we set the parameters to error values */ - if (us_size != NULL) { - *us_size = 0; - } - if (ph_size != NULL) { - *ph_size = 0; - } - if (n_blocks != NULL) { - *n_blocks = 0; - } - *error = TRUE; - - block = heap; - - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - return; - } - - if (print) { - fputs("Memory heap:", stderr); - } - - while (block != NULL) { - phys_len += mem_block_get_len(block); - - if ((block->type == MEM_HEAP_BUFFER) - && (mem_block_get_len(block) > UNIV_PAGE_SIZE)) { - - fprintf(stderr, - "InnoDB: Error: mem block %p" - " length %lu > UNIV_PAGE_SIZE\n", - (void*) block, - (ulong) mem_block_get_len(block)); - /* error */ - - return; - } - -#ifdef UNIV_MEM_DEBUG - /* We can trace the fields of the block only in the debug - version */ - if (print) { - fprintf(stderr, " Block %ld:", block_count); - } - - field = (byte*)block + mem_block_get_start(block); - - if (top && (field == top)) { - - goto completed; - } - - while (field < (byte*)block + mem_block_get_free(block)) { - - /* Calculate the pointer to the storage - which was given to the user */ - - user_field = field + MEM_FIELD_HEADER_SIZE; - - len = mem_field_header_get_len(user_field); - - if (print) { - ut_print_buf(stderr, user_field, len); - } - - total_len += len; - check_field = mem_field_header_get_check(user_field); - - if (check_field - != mem_field_trailer_get_check(user_field)) { - /* error */ - - fprintf(stderr, - "InnoDB: Error: block %lx mem" - " field %lx len %lu\n" - "InnoDB: header check field is" - " %lx but trailer %lx\n", - (ulint)block, - (ulint)field, len, check_field, - mem_field_trailer_get_check( - user_field)); - - return; - } - - /* Move to next field */ - field = field + MEM_SPACE_NEEDED(len); - - if (top && (field == top)) { - - goto completed; - } - - } - - /* At the end check that we have arrived to the first free - position */ - - if (field != (byte*)block + mem_block_get_free(block)) { - /* error */ - - fprintf(stderr, - "InnoDB: Error: block %lx end of" - " mem fields %lx\n" - "InnoDB: but block free at %lx\n", - (ulint)block, (ulint)field, - (ulint)((byte*)block - + mem_block_get_free(block))); - - return; - } - -#endif - - block = UT_LIST_GET_NEXT(list, block); - block_count++; - } -#ifdef UNIV_MEM_DEBUG -completed: -#endif - if (us_size != NULL) { - *us_size = total_len; - } - if (ph_size != NULL) { - *ph_size = phys_len; - } - if (n_blocks != NULL) { - *n_blocks = block_count; - } - *error = FALSE; -} - -/****************************************************************** -Prints the contents of a memory heap. */ -static -void -mem_heap_print( -/*===========*/ - mem_heap_t* heap) /* in: memory heap */ -{ - ibool error; - ulint us_size; - ulint phys_size; - ulint n_blocks; - - ut_ad(mem_heap_check(heap)); - - mem_heap_validate_or_print(heap, NULL, TRUE, &error, - &us_size, &phys_size, &n_blocks); - fprintf(stderr, - "\nheap type: %lu; size: user size %lu;" - " physical size %lu; blocks %lu.\n", - (ulong) heap->type, (ulong) us_size, - (ulong) phys_size, (ulong) n_blocks); - ut_a(!error); -} - -/****************************************************************** -Validates the contents of a memory heap. */ - -ibool -mem_heap_validate( -/*==============*/ - /* out: TRUE if ok */ - mem_heap_t* heap) /* in: memory heap */ -{ - ibool error; - ulint us_size; - ulint phys_size; - ulint n_blocks; - - ut_ad(mem_heap_check(heap)); - - mem_heap_validate_or_print(heap, NULL, FALSE, &error, &us_size, - &phys_size, &n_blocks); - if (error) { - mem_heap_print(heap); - } - - ut_a(!error); - - return(TRUE); -} -#endif /* UNIV_MEM_DEBUG || UNIV_DEBUG */ - -#ifdef UNIV_DEBUG -/****************************************************************** -Checks that an object is a memory heap (or a block of it). */ - -ibool -mem_heap_check( -/*===========*/ - /* out: TRUE if ok */ - mem_heap_t* heap) /* in: memory heap */ -{ - ut_a(heap->magic_n == MEM_BLOCK_MAGIC_N); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -#ifdef UNIV_MEM_DEBUG -/********************************************************************* -TRUE if no memory is currently allocated. */ - -ibool -mem_all_freed(void) -/*===============*/ - /* out: TRUE if no heaps exist */ -{ - mem_hash_node_t* node; - ulint heap_count = 0; - ulint i; - - mem_validate(); - - mutex_enter(&mem_hash_mutex); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i)); - while (node != NULL) { - heap_count++; - node = UT_LIST_GET_NEXT(list, node); - } - } - - mutex_exit(&mem_hash_mutex); - - if (heap_count == 0) { - - ut_a(mem_pool_get_reserved(mem_comm_pool) == 0); - - return(TRUE); - } else { - return(FALSE); - } -} - -/********************************************************************* -Validates the dynamic memory allocation system. */ - -ibool -mem_validate_no_assert(void) -/*========================*/ - /* out: TRUE if error */ -{ - mem_hash_node_t* node; - ulint n_heaps = 0; - ulint allocated_mem; - ulint ph_size; - ulint total_allocated_mem = 0; - ibool error = FALSE; - ulint n_blocks; - ulint i; - - mem_pool_validate(mem_comm_pool); - - mutex_enter(&mem_hash_mutex); - - for (i = 0; i < MEM_HASH_SIZE; i++) { - - node = UT_LIST_GET_FIRST(*mem_hash_get_nth_cell(i)); - - while (node != NULL) { - n_heaps++; - - mem_heap_validate_or_print(node->heap, NULL, - FALSE, &error, - &allocated_mem, - &ph_size, &n_blocks); - - if (error) { - fprintf(stderr, - "\nERROR!!!!!!!!!!!!!!!!!!!" - "!!!!!!!!!!!!!!!!!!!!!!!\n\n" - "Inconsistency in memory heap" - " or buffer created\n" - "in %s line %lu.\n", - node->file_name, node->line); - - mutex_exit(&mem_hash_mutex); - - return(TRUE); - } - - total_allocated_mem += allocated_mem; - node = UT_LIST_GET_NEXT(list, node); - } - } - - if ((n_heaps == 0) && (mem_current_allocated_memory != 0)) { - error = TRUE; - } - - if (mem_total_allocated_memory < mem_current_allocated_memory) { - error = TRUE; - } - - if (mem_max_allocated_memory > mem_total_allocated_memory) { - error = TRUE; - } - - if (mem_n_created_heaps < n_heaps) { - error = TRUE; - } - - mutex_exit(&mem_hash_mutex); - - return(error); -} - -/**************************************************************** -Validates the dynamic memory */ - -ibool -mem_validate(void) -/*==============*/ - /* out: TRUE if ok */ -{ - ut_a(!mem_validate_no_assert()); - - return(TRUE); -} -#endif /* UNIV_MEM_DEBUG */ - -/**************************************************************** -Tries to find neigboring memory allocation blocks and dumps to stderr -the neighborhood of a given pointer. */ - -void -mem_analyze_corruption( -/*===================*/ - void* ptr) /* in: pointer to place of possible corruption */ -{ - byte* p; - ulint i; - ulint dist; - - fputs("InnoDB: Apparent memory corruption: mem dump ", stderr); - ut_print_buf(stderr, (byte*)ptr - 250, 500); - - fputs("\nInnoDB: Scanning backward trying to find" - " previous allocated mem blocks\n", stderr); - - p = (byte*)ptr; - dist = 0; - - for (i = 0; i < 10; i++) { - for (;;) { - if (((ulint)p) % 4 == 0) { - - if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) { - fprintf(stderr, - "Mem block at - %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - - if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) { - fprintf(stderr, - "Freed mem block at - %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - } - - p--; - dist++; - } - - p--; - dist++; - } - - fprintf(stderr, - "InnoDB: Scanning forward trying to find next" - " allocated mem blocks\n"); - - p = (byte*)ptr; - dist = 0; - - for (i = 0; i < 10; i++) { - for (;;) { - if (((ulint)p) % 4 == 0) { - - if (*((ulint*)p) == MEM_BLOCK_MAGIC_N) { - fprintf(stderr, - "Mem block at + %lu, file %s," - " line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - - if (*((ulint*)p) == MEM_FREED_BLOCK_MAGIC_N) { - fprintf(stderr, - "Freed mem block at + %lu," - " file %s, line %lu\n", - (ulong) dist, - (p + sizeof(ulint)), - (ulong) - (*(ulint*)(p + 8 - + sizeof(ulint)))); - - break; - } - } - - p++; - dist++; - } - - p++; - dist++; - } -} - -/********************************************************************* -Prints information of dynamic memory usage and currently allocated -memory heaps or buffers. Can only be used in the debug version. */ -static -void -mem_print_info_low( -/*===============*/ - ibool print_all) /* in: if TRUE, all heaps are printed, - else only the heaps allocated after the - previous call of this function */ -{ -#ifdef UNIV_MEM_DEBUG - mem_hash_node_t* node; - ulint n_heaps = 0; - ulint allocated_mem; - ulint ph_size; - ulint total_allocated_mem = 0; - ibool error; - ulint n_blocks; -#endif - FILE* outfile; - - /* outfile = fopen("ibdebug", "a"); */ - - outfile = stdout; - - fprintf(outfile, "\n"); - fprintf(outfile, - "________________________________________________________\n"); - fprintf(outfile, "MEMORY ALLOCATION INFORMATION\n\n"); - -#ifndef UNIV_MEM_DEBUG - - UT_NOT_USED(print_all); - - mem_pool_print_info(outfile, mem_comm_pool); - - fprintf(outfile, - "Sorry, non-debug version cannot give more memory info\n"); - - /* fclose(outfile); */ - - return; -#else - mutex_enter(&mem_hash_mutex); - - fprintf(outfile, "LIST OF CREATED HEAPS AND ALLOCATED BUFFERS: \n\n"); - - if (!print_all) { - fprintf(outfile, "AFTER THE LAST PRINT INFO\n"); - } - - node = UT_LIST_GET_FIRST(mem_all_list_base); - - while (node != NULL) { - n_heaps++; - - if (!print_all && node->nth_heap < mem_last_print_info) { - - goto next_heap; - } - - mem_heap_validate_or_print(node->heap, NULL, - FALSE, &error, &allocated_mem, - &ph_size, &n_blocks); - total_allocated_mem += allocated_mem; - - fprintf(outfile, - "%lu: file %s line %lu of size %lu phys.size %lu" - " with %lu blocks, type %lu\n", - node->nth_heap, node->file_name, node->line, - allocated_mem, ph_size, n_blocks, - (node->heap)->type); -next_heap: - node = UT_LIST_GET_NEXT(all_list, node); - } - - fprintf(outfile, "\n"); - - fprintf(outfile, "Current allocated memory : %lu\n", - mem_current_allocated_memory); - fprintf(outfile, "Current allocated heaps and buffers : %lu\n", - n_heaps); - fprintf(outfile, "Cumulative allocated memory : %lu\n", - mem_total_allocated_memory); - fprintf(outfile, "Maximum allocated memory : %lu\n", - mem_max_allocated_memory); - fprintf(outfile, "Cumulative created heaps and buffers : %lu\n", - mem_n_created_heaps); - fprintf(outfile, "Cumulative number of allocations : %lu\n", - mem_n_allocations); - - mem_last_print_info = mem_n_created_heaps; - - mutex_exit(&mem_hash_mutex); - - mem_pool_print_info(outfile, mem_comm_pool); - - /* mem_validate(); */ - - /* fclose(outfile); */ -#endif -} - -/********************************************************************* -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers. Can only be used in the debug version. */ - -void -mem_print_info(void) -/*================*/ -{ - mem_print_info_low(TRUE); -} - -/********************************************************************* -Prints information of dynamic memory usage and currently allocated memory -heaps or buffers since the last ..._print_info or..._print_new_info. */ - -void -mem_print_new_info(void) -/*====================*/ -{ - mem_print_info_low(FALSE); -} diff --git a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c deleted file mode 100644 index f4fd178a39c..00000000000 --- a/storage/innobase/mem/mem0mem.c +++ /dev/null @@ -1,577 +0,0 @@ -/************************************************************************ -The memory management - -(c) 1994, 1995 Innobase Oy - -Created 6/9/1994 Heikki Tuuri -*************************************************************************/ - - -#include "mem0mem.h" -#ifdef UNIV_NONINL -#include "mem0mem.ic" -#endif - -#include "mach0data.h" -#include "buf0buf.h" -#include "btr0sea.h" -#include "srv0srv.h" -#include "mem0dbg.c" -#include <stdarg.h> - -/* - THE MEMORY MANAGEMENT - ===================== - -The basic element of the memory management is called a memory -heap. A memory heap is conceptually a -stack from which memory can be allocated. The stack may grow infinitely. -The top element of the stack may be freed, or -the whole stack can be freed at one time. The advantage of the -memory heap concept is that we can avoid using the malloc and free -functions of C which are quite expensive, for example, on the Solaris + GCC -system (50 MHz Sparc, 1993) the pair takes 3 microseconds, -on Win NT + 100MHz Pentium, 2.5 microseconds. -When we use a memory heap, -we can allocate larger blocks of memory at a time and thus -reduce overhead. Slightly more efficient the method is when we -allocate the memory from the index page buffer pool, as we can -claim a new page fast. This is called buffer allocation. -When we allocate the memory from the dynamic memory of the -C environment, that is called dynamic allocation. - -The default way of operation of the memory heap is the following. -First, when the heap is created, an initial block of memory is -allocated. In dynamic allocation this may be about 50 bytes. -If more space is needed, additional blocks are allocated -and they are put into a linked list. -After the initial block, each allocated block is twice the size of the -previous, until a threshold is attained, after which the sizes -of the blocks stay the same. An exception is, of course, the case -where the caller requests a memory buffer whose size is -bigger than the threshold. In that case a block big enough must -be allocated. - -The heap is physically arranged so that if the current block -becomes full, a new block is allocated and always inserted in the -chain of blocks as the last block. - -In the debug version of the memory management, all the allocated -heaps are kept in a list (which is implemented as a hash table). -Thus we can notice if the caller tries to free an already freed -heap. In addition, each buffer given to the caller contains -start field at the start and a trailer field at the end of the buffer. - -The start field has the following content: -A. sizeof(ulint) bytes of field length (in the standard byte order) -B. sizeof(ulint) bytes of check field (a random number) - -The trailer field contains: -A. sizeof(ulint) bytes of check field (the same random number as at the start) - -Thus we can notice if something has been copied over the -borders of the buffer, which is illegal. -The memory in the buffers is initialized to a random byte sequence. -After freeing, all the blocks in the heap are set to random bytes -to help us discover errors which result from the use of -buffers in an already freed heap. */ - -#ifdef MEM_PERIODIC_CHECK - -ibool mem_block_list_inited; -/* List of all mem blocks allocated; protected by the mem_comm_pool mutex */ -UT_LIST_BASE_NODE_T(mem_block_t) mem_block_list; - -#endif - -/******************************************************************* -NOTE: Use the corresponding macro instead of this function. -Allocates a single buffer of memory from the dynamic memory of -the C compiler. Is like malloc of C. The buffer must be freed -with mem_free. */ - -void* -mem_alloc_func_noninline( -/*=====================*/ - /* out, own: free storage */ - ulint n, /* in: desired number of bytes */ - const char* file_name, /* in: file name where created */ - ulint line) /* in: line where created */ -{ - return(mem_alloc_func(n, file_name, line)); -} - -/************************************************************************** -Duplicates a NUL-terminated string, allocated from a memory heap. */ - -char* -mem_heap_strdup( -/*============*/ - /* out, own: a copy of the string */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* str) /* in: string to be copied */ -{ - return(mem_heap_dup(heap, str, strlen(str) + 1)); -} - -/************************************************************************** -Duplicate a block of data, allocated from a memory heap. */ - -void* -mem_heap_dup( -/*=========*/ - /* out, own: a copy of the data */ - mem_heap_t* heap, /* in: memory heap where copy is allocated */ - const void* data, /* in: data to be copied */ - ulint len) /* in: length of data, in bytes */ -{ - return(memcpy(mem_heap_alloc(heap, len), data, len)); -} - -/************************************************************************** -Concatenate two memory blocks and return the result, using a memory heap. */ - -void* -mem_heap_cat( -/*=========*/ - /* out, own: the result */ - mem_heap_t* heap, /* in: memory heap where result is allocated */ - const void* b1, /* in: block 1 */ - ulint len1, /* in: length of b1, in bytes */ - const void* b2, /* in: block 2 */ - ulint len2) /* in: length of b2, in bytes */ -{ - void* res = mem_heap_alloc(heap, len1 + len2); - - memcpy(res, b1, len1); - memcpy((char*)res + len1, b2, len2); - - return(res); -} - -/************************************************************************** -Concatenate two strings and return the result, using a memory heap. */ - -char* -mem_heap_strcat( -/*============*/ - /* out, own: the result */ - mem_heap_t* heap, /* in: memory heap where string is allocated */ - const char* s1, /* in: string 1 */ - const char* s2) /* in: string 2 */ -{ - char* s; - ulint s1_len = strlen(s1); - ulint s2_len = strlen(s2); - - s = mem_heap_alloc(heap, s1_len + s2_len + 1); - - memcpy(s, s1, s1_len); - memcpy(s + s1_len, s2, s2_len); - - s[s1_len + s2_len] = '\0'; - - return(s); -} - - -/******************************************************************** -Helper function for mem_heap_printf. */ -static -ulint -mem_heap_printf_low( -/*================*/ - /* out: length of formatted string, - including terminating NUL */ - char* buf, /* in/out: buffer to store formatted string - in, or NULL to just calculate length */ - const char* format, /* in: format string */ - va_list ap) /* in: arguments */ -{ - ulint len = 0; - - while (*format) { - - /* Does this format specifier have the 'l' length modifier. */ - ibool is_long = FALSE; - - /* Length of one parameter. */ - size_t plen; - - if (*format++ != '%') { - /* Non-format character. */ - - len++; - - if (buf) { - *buf++ = *(format - 1); - } - - continue; - } - - if (*format == 'l') { - is_long = TRUE; - format++; - } - - switch (*format++) { - case 's': - /* string */ - { - char* s = va_arg(ap, char*); - - /* "%ls" is a non-sensical format specifier. */ - ut_a(!is_long); - - plen = strlen(s); - len += plen; - - if (buf) { - memcpy(buf, s, plen); - buf += plen; - } - } - - break; - - case 'u': - /* unsigned int */ - { - char tmp[32]; - unsigned long val; - - /* We only support 'long' values for now. */ - ut_a(is_long); - - val = va_arg(ap, unsigned long); - - plen = sprintf(tmp, "%lu", val); - len += plen; - - if (buf) { - memcpy(buf, tmp, plen); - buf += plen; - } - } - - break; - - case '%': - - /* "%l%" is a non-sensical format specifier. */ - ut_a(!is_long); - - len++; - - if (buf) { - *buf++ = '%'; - } - - break; - - default: - ut_error; - } - } - - /* For the NUL character. */ - len++; - - if (buf) { - *buf = '\0'; - } - - return(len); -} - -/******************************************************************** -A simple (s)printf replacement that dynamically allocates the space for the -formatted string from the given heap. This supports a very limited set of -the printf syntax: types 's' and 'u' and length modifier 'l' (which is -required for the 'u' type). */ - -char* -mem_heap_printf( -/*============*/ - /* out: heap-allocated formatted string */ - mem_heap_t* heap, /* in: memory heap */ - const char* format, /* in: format string */ - ...) -{ - va_list ap; - char* str; - ulint len; - - /* Calculate length of string */ - len = 0; - va_start(ap, format); - len = mem_heap_printf_low(NULL, format, ap); - va_end(ap); - - /* Now create it for real. */ - str = mem_heap_alloc(heap, len); - va_start(ap, format); - mem_heap_printf_low(str, format, ap); - va_end(ap); - - return(str); -} - -/******************************************************************* -Creates a memory heap block where data can be allocated. */ - -mem_block_t* -mem_heap_create_block( -/*==================*/ - /* out, own: memory heap block, NULL if - did not succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps) */ - mem_heap_t* heap, /* in: memory heap or NULL if first block - should be created */ - ulint n, /* in: number of bytes needed for user data, or - if init_block is not NULL, its size in bytes */ - void* init_block, /* in: init block in fast create, - type must be MEM_HEAP_DYNAMIC */ - ulint type, /* in: type of heap: MEM_HEAP_DYNAMIC or - MEM_HEAP_BUFFER */ - const char* file_name,/* in: file name where created */ - ulint line) /* in: line where created */ -{ - mem_block_t* block; - ulint len; - - ut_ad((type == MEM_HEAP_DYNAMIC) || (type == MEM_HEAP_BUFFER) - || (type == MEM_HEAP_BUFFER + MEM_HEAP_BTR_SEARCH)); - - if (heap && heap->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(heap); - } - - /* In dynamic allocation, calculate the size: block header + data. */ - - if (init_block != NULL) { - ut_ad(type == MEM_HEAP_DYNAMIC); - ut_ad(n > MEM_BLOCK_START_SIZE + MEM_BLOCK_HEADER_SIZE); - len = n; - block = init_block; - - } else if (type == MEM_HEAP_DYNAMIC) { - - len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n); - block = mem_area_alloc(len, mem_comm_pool); - } else { - ut_ad(n <= MEM_MAX_ALLOC_IN_BUF); - - len = MEM_BLOCK_HEADER_SIZE + MEM_SPACE_NEEDED(n); - - if (len < UNIV_PAGE_SIZE / 2) { - - block = mem_area_alloc(len, mem_comm_pool); - } else { - len = UNIV_PAGE_SIZE; - - if ((type & MEM_HEAP_BTR_SEARCH) && heap) { - /* We cannot allocate the block from the - buffer pool, but must get the free block from - the heap header free block field */ - - block = (mem_block_t*)heap->free_block; - heap->free_block = NULL; - } else { - block = (mem_block_t*)buf_frame_alloc(); - } - } - } - - if (block == NULL) { - /* Only MEM_HEAP_BTR_SEARCH allocation should ever fail. */ - ut_a(type & MEM_HEAP_BTR_SEARCH); - - return(NULL); - } - - block->magic_n = MEM_BLOCK_MAGIC_N; - ut_strlcpy_rev(block->file_name, file_name, sizeof(block->file_name)); - block->line = line; - -#ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); - - if (!mem_block_list_inited) { - mem_block_list_inited = TRUE; - UT_LIST_INIT(mem_block_list); - } - - UT_LIST_ADD_LAST(mem_block_list, mem_block_list, block); - - mem_pool_mutex_exit(); -#endif - mem_block_set_len(block, len); - mem_block_set_type(block, type); - mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE); - mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE); - - block->free_block = NULL; - block->init_block = (init_block != NULL); - - ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len); - - return(block); -} - -/******************************************************************* -Adds a new block to a memory heap. */ - -mem_block_t* -mem_heap_add_block( -/*===============*/ - /* out: created block, NULL if did not - succeed (only possible for - MEM_HEAP_BTR_SEARCH type heaps)*/ - mem_heap_t* heap, /* in: memory heap */ - ulint n) /* in: number of bytes user needs */ -{ - mem_block_t* block; - mem_block_t* new_block; - ulint new_size; - - ut_ad(mem_heap_check(heap)); - - block = UT_LIST_GET_LAST(heap->base); - - /* We have to allocate a new block. The size is always at least - doubled until the standard size is reached. After that the size - stays the same, except in cases where the caller needs more space. */ - - new_size = 2 * mem_block_get_len(block); - - if (heap->type != MEM_HEAP_DYNAMIC) { - /* From the buffer pool we allocate buffer frames */ - ut_a(n <= MEM_MAX_ALLOC_IN_BUF); - - if (new_size > MEM_MAX_ALLOC_IN_BUF) { - new_size = MEM_MAX_ALLOC_IN_BUF; - } - } else if (new_size > MEM_BLOCK_STANDARD_SIZE) { - - new_size = MEM_BLOCK_STANDARD_SIZE; - } - - if (new_size < n) { - new_size = n; - } - - new_block = mem_heap_create_block(heap, new_size, NULL, heap->type, - heap->file_name, heap->line); - if (new_block == NULL) { - - return(NULL); - } - - /* Add the new block as the last block */ - - UT_LIST_INSERT_AFTER(list, heap->base, block, new_block); - - return(new_block); -} - -/********************************************************************** -Frees a block from a memory heap. */ - -void -mem_heap_block_free( -/*================*/ - mem_heap_t* heap, /* in: heap */ - mem_block_t* block) /* in: block to free */ -{ - ulint type; - ulint len; - ibool init_block; - - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(block); - } - - UT_LIST_REMOVE(list, heap->base, block); - -#ifdef MEM_PERIODIC_CHECK - mem_pool_mutex_enter(); - - UT_LIST_REMOVE(mem_block_list, mem_block_list, block); - - mem_pool_mutex_exit(); -#endif - type = heap->type; - len = block->len; - init_block = block->init_block; - block->magic_n = MEM_FREED_BLOCK_MAGIC_N; - -#ifdef UNIV_MEM_DEBUG - /* In the debug version we set the memory to a random combination - of hex 0xDE and 0xAD. */ - - mem_erase_buf((byte*)block, len); -#else /* UNIV_MEM_DEBUG */ - UNIV_MEM_ASSERT_AND_FREE(block, len); -#endif /* UNIV_MEM_DEBUG */ - - if (init_block) { - /* Do not have to free: do nothing */ - - } else if (type == MEM_HEAP_DYNAMIC) { - - mem_area_free(block, mem_comm_pool); - } else { - ut_ad(type & MEM_HEAP_BUFFER); - - if (len >= UNIV_PAGE_SIZE / 2) { - buf_frame_free((byte*)block); - } else { - mem_area_free(block, mem_comm_pool); - } - } -} - -/********************************************************************** -Frees the free_block field from a memory heap. */ - -void -mem_heap_free_block_free( -/*=====================*/ - mem_heap_t* heap) /* in: heap */ -{ - if (heap->free_block) { - - buf_frame_free(heap->free_block); - - heap->free_block = NULL; - } -} - -#ifdef MEM_PERIODIC_CHECK -/********************************************************************** -Goes through the list of all allocated mem blocks, checks their magic -numbers, and reports possible corruption. */ - -void -mem_validate_all_blocks(void) -/*=========================*/ -{ - mem_block_t* block; - - mem_pool_mutex_enter(); - - block = UT_LIST_GET_FIRST(mem_block_list); - - while (block) { - if (block->magic_n != MEM_BLOCK_MAGIC_N) { - mem_analyze_corruption(block); - } - - block = UT_LIST_GET_NEXT(mem_block_list, block); - } - - mem_pool_mutex_exit(); -} -#endif diff --git a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c deleted file mode 100644 index 315f719ca09..00000000000 --- a/storage/innobase/mem/mem0pool.c +++ /dev/null @@ -1,682 +0,0 @@ -/************************************************************************ -The lowest-level memory management - -(c) 1997 Innobase Oy - -Created 5/12/1997 Heikki Tuuri -*************************************************************************/ - -#include "mem0pool.h" -#ifdef UNIV_NONINL -#include "mem0pool.ic" -#endif - -#include "sync0sync.h" -#include "ut0mem.h" -#include "ut0lst.h" -#include "ut0byte.h" -#include "mem0mem.h" - -/* We would like to use also the buffer frames to allocate memory. This -would be desirable, because then the memory consumption of the database -would be fixed, and we might even lock the buffer pool to the main memory. -The problem here is that the buffer management routines can themselves call -memory allocation, while the buffer pool mutex is reserved. - -The main components of the memory consumption are: - -1. buffer pool, -2. parsed and optimized SQL statements, -3. data dictionary cache, -4. log buffer, -5. locks for each transaction, -6. hash table for the adaptive index, -7. state and buffers for each SQL query currently being executed, -8. session for each user, and -9. stack for each OS thread. - -Items 1 and 2 are managed by an LRU algorithm. Items 5 and 6 can potentially -consume very much memory. Items 7 and 8 should consume quite little memory, -and the OS should take care of item 9, which too should consume little memory. - -A solution to the memory management: - -1. the buffer pool size is set separately; -2. log buffer size is set separately; -3. the common pool size for all the other entries, except 8, is set separately. - -Problems: we may waste memory if the common pool is set too big. Another -problem is the locks, which may take very much space in big transactions. -Then the shared pool size should be set very big. We can allow locks to take -space from the buffer pool, but the SQL optimizer is then unaware of the -usable size of the buffer pool. We could also combine the objects in the -common pool and the buffers in the buffer pool into a single LRU list and -manage it uniformly, but this approach does not take into account the parsing -and other costs unique to SQL statements. - -The locks for a transaction can be seen as a part of the state of the -transaction. Hence, they should be stored in the common pool. We still -have the problem of a very big update transaction, for example, which -will set very many x-locks on rows, and the locks will consume a lot -of memory, say, half of the buffer pool size. - -Another problem is what to do if we are not able to malloc a requested -block of memory from the common pool. Then we can request memory from -the operating system. If it does not help, a system error results. - -Because 5 and 6 may potentially consume very much memory, we let them grow -into the buffer pool. We may let the locks of a transaction take frames -from the buffer pool, when the corresponding memory heap block has grown to -the size of a buffer frame. Similarly for the hash node cells of the locks, -and for the adaptive index. Thus, for each individual transaction, its locks -can occupy at most about the size of the buffer frame of memory in the common -pool, and after that its locks will grow into the buffer pool. */ - -/* Mask used to extract the free bit from area->size */ -#define MEM_AREA_FREE 1 - -/* The smallest memory area total size */ -#define MEM_AREA_MIN_SIZE (2 * MEM_AREA_EXTRA_SIZE) - - -/* Data structure for a memory pool. The space is allocated using the buddy -algorithm, where free list i contains areas of size 2 to power i. */ -struct mem_pool_struct{ - byte* buf; /* memory pool */ - ulint size; /* memory common pool size */ - ulint reserved; /* amount of currently allocated - memory */ - mutex_t mutex; /* mutex protecting this struct */ - UT_LIST_BASE_NODE_T(mem_area_t) - free_list[64]; /* lists of free memory areas: an - area is put to the list whose number - is the 2-logarithm of the area size */ -}; - -/* The common memory pool */ -mem_pool_t* mem_comm_pool = NULL; - -/* We use this counter to check that the mem pool mutex does not leak; -this is to track a strange assertion failure reported at -mysql@lists.mysql.com */ - -ulint mem_n_threads_inside = 0; - -/************************************************************************ -Reserves the mem pool mutex. */ - -void -mem_pool_mutex_enter(void) -/*======================*/ -{ - mutex_enter(&(mem_comm_pool->mutex)); -} - -/************************************************************************ -Releases the mem pool mutex. */ - -void -mem_pool_mutex_exit(void) -/*=====================*/ -{ - mutex_exit(&(mem_comm_pool->mutex)); -} - -/************************************************************************ -Returns memory area size. */ -UNIV_INLINE -ulint -mem_area_get_size( -/*==============*/ - /* out: size */ - mem_area_t* area) /* in: area */ -{ - return(area->size_and_free & ~MEM_AREA_FREE); -} - -/************************************************************************ -Sets memory area size. */ -UNIV_INLINE -void -mem_area_set_size( -/*==============*/ - mem_area_t* area, /* in: area */ - ulint size) /* in: size */ -{ - area->size_and_free = (area->size_and_free & MEM_AREA_FREE) - | size; -} - -/************************************************************************ -Returns memory area free bit. */ -UNIV_INLINE -ibool -mem_area_get_free( -/*==============*/ - /* out: TRUE if free */ - mem_area_t* area) /* in: area */ -{ -#if TRUE != MEM_AREA_FREE -# error "TRUE != MEM_AREA_FREE" -#endif - return(area->size_and_free & MEM_AREA_FREE); -} - -/************************************************************************ -Sets memory area free bit. */ -UNIV_INLINE -void -mem_area_set_free( -/*==============*/ - mem_area_t* area, /* in: area */ - ibool free) /* in: free bit value */ -{ -#if TRUE != MEM_AREA_FREE -# error "TRUE != MEM_AREA_FREE" -#endif - area->size_and_free = (area->size_and_free & ~MEM_AREA_FREE) - | free; -} - -/************************************************************************ -Creates a memory pool. */ - -mem_pool_t* -mem_pool_create( -/*============*/ - /* out: memory pool */ - ulint size) /* in: pool size in bytes */ -{ - mem_pool_t* pool; - mem_area_t* area; - ulint i; - ulint used; - - ut_a(size > 10000); - - pool = ut_malloc(sizeof(mem_pool_t)); - - /* We do not set the memory to zero (FALSE) in the pool, - but only when allocated at a higher level in mem0mem.c. - This is to avoid masking useful Purify warnings. */ - - pool->buf = ut_malloc_low(size, FALSE, TRUE); - pool->size = size; - - mutex_create(&pool->mutex, SYNC_MEM_POOL); - - /* Initialize the free lists */ - - for (i = 0; i < 64; i++) { - - UT_LIST_INIT(pool->free_list[i]); - } - - used = 0; - - while (size - used >= MEM_AREA_MIN_SIZE) { - - i = ut_2_log(size - used); - - if (ut_2_exp(i) > size - used) { - - /* ut_2_log rounds upward */ - - i--; - } - - area = (mem_area_t*)(pool->buf + used); - - mem_area_set_size(area, ut_2_exp(i)); - mem_area_set_free(area, TRUE); - UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area, - ut_2_exp(i) - MEM_AREA_EXTRA_SIZE); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); - - used = used + ut_2_exp(i); - } - - ut_ad(size >= used); - - pool->reserved = 0; - - return(pool); -} - -/************************************************************************ -Fills the specified free list. */ -static -ibool -mem_pool_fill_free_list( -/*====================*/ - /* out: TRUE if we were able to insert a - block to the free list */ - ulint i, /* in: free list index */ - mem_pool_t* pool) /* in: memory pool */ -{ - mem_area_t* area; - mem_area_t* area2; - ibool ret; - - ut_ad(mutex_own(&(pool->mutex))); - - if (i >= 63) { - /* We come here when we have run out of space in the - memory pool: */ - - return(FALSE); - } - - area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); - - if (area == NULL) { - if (UT_LIST_GET_LEN(pool->free_list[i + 1]) > 0) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: mem pool free list %lu" - " length is %lu\n" - "InnoDB: though the list is empty!\n", - (ulong) i + 1, - (ulong) - UT_LIST_GET_LEN(pool->free_list[i + 1])); - } - - ret = mem_pool_fill_free_list(i + 1, pool); - - if (ret == FALSE) { - - return(FALSE); - } - - area = UT_LIST_GET_FIRST(pool->free_list[i + 1]); - } - - if (UT_LIST_GET_LEN(pool->free_list[i + 1]) == 0) { - mem_analyze_corruption(area); - - ut_error; - } - - UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area); - - area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i)); - UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE); - - mem_area_set_size(area2, ut_2_exp(i)); - mem_area_set_free(area2, TRUE); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area2); - - mem_area_set_size(area, ut_2_exp(i)); - - UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area); - - return(TRUE); -} - -/************************************************************************ -Allocates memory from a pool. NOTE: This low-level function should only be -used in mem0mem.*! */ - -void* -mem_area_alloc( -/*===========*/ - /* out, own: allocated memory buffer */ - ulint size, /* in: allocated size in bytes; for optimum - space usage, the size should be a power of 2 - minus MEM_AREA_EXTRA_SIZE */ - mem_pool_t* pool) /* in: memory pool */ -{ -#ifdef UNIV_DISABLE_MEM_POOL - (void)pool; /* Remove compiler warning */ - return malloc(size); -#else /* UNIV_DISABLE_MEM_POOL */ - mem_area_t* area; - ulint n; - ibool ret; - - n = ut_2_log(ut_max(size + MEM_AREA_EXTRA_SIZE, MEM_AREA_MIN_SIZE)); - - mutex_enter(&(pool->mutex)); - mem_n_threads_inside++; - - ut_a(mem_n_threads_inside == 1); - - area = UT_LIST_GET_FIRST(pool->free_list[n]); - - if (area == NULL) { - ret = mem_pool_fill_free_list(n, pool); - - if (ret == FALSE) { - /* Out of memory in memory pool: we try to allocate - from the operating system with the regular malloc: */ - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - return(ut_malloc(size)); - } - - area = UT_LIST_GET_FIRST(pool->free_list[n]); - } - - if (!mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Error: Removing element from mem pool" - " free list %lu though the\n" - "InnoDB: element is not marked free!\n", - (ulong) n); - - mem_analyze_corruption(area); - - /* Try to analyze a strange assertion failure reported at - mysql@lists.mysql.com where the free bit IS 1 in the - hex dump above */ - - if (mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Probably a race condition" - " because now the area is marked free!\n"); - } - - ut_error; - } - - if (UT_LIST_GET_LEN(pool->free_list[n]) == 0) { - fprintf(stderr, - "InnoDB: Error: Removing element from mem pool" - " free list %lu\n" - "InnoDB: though the list length is 0!\n", - (ulong) n); - mem_analyze_corruption(area); - - ut_error; - } - - ut_ad(mem_area_get_size(area) == ut_2_exp(n)); - - mem_area_set_free(area, FALSE); - - UT_LIST_REMOVE(free_list, pool->free_list[n], area); - - pool->reserved += mem_area_get_size(area); - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - ut_ad(mem_pool_validate(pool)); - UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area, - ut_2_exp(n) - MEM_AREA_EXTRA_SIZE); - - return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area))); -#endif /* UNIV_DISABLE_MEM_POOL */ -} - -/************************************************************************ -Gets the buddy of an area, if it exists in pool. */ -UNIV_INLINE -mem_area_t* -mem_area_get_buddy( -/*===============*/ - /* out: the buddy, NULL if no buddy in pool */ - mem_area_t* area, /* in: memory area */ - ulint size, /* in: memory area size */ - mem_pool_t* pool) /* in: memory pool */ -{ - mem_area_t* buddy; - - ut_ad(size != 0); - - if (((((byte*)area) - pool->buf) % (2 * size)) == 0) { - - /* The buddy is in a higher address */ - - buddy = (mem_area_t*)(((byte*)area) + size); - - if ((((byte*)buddy) - pool->buf) + size > pool->size) { - - /* The buddy is not wholly contained in the pool: - there is no buddy */ - - buddy = NULL; - } - } else { - /* The buddy is in a lower address; NOTE that area cannot - be at the pool lower end, because then we would end up to - the upper branch in this if-clause: the remainder would be - 0 */ - - buddy = (mem_area_t*)(((byte*)area) - size); - } - - return(buddy); -} - -/************************************************************************ -Frees memory to a pool. */ - -void -mem_area_free( -/*==========*/ - void* ptr, /* in, own: pointer to allocated memory - buffer */ - mem_pool_t* pool) /* in: memory pool */ -{ -#ifdef UNIV_DISABLE_MEM_POOL - (void)pool; /* Remove compiler warning */ - free(ptr); -#else /* UNIV_DISABLE_MEM_POOL */ - mem_area_t* area; - mem_area_t* buddy; - void* new_ptr; - ulint size; - ulint n; - - /* It may be that the area was really allocated from the OS with - regular malloc: check if ptr points within our memory pool */ - - if ((byte*)ptr < pool->buf || (byte*)ptr >= pool->buf + pool->size) { - ut_free(ptr); - - return; - } - - area = (mem_area_t*) (((byte*)ptr) - MEM_AREA_EXTRA_SIZE); - - if (mem_area_get_free(area)) { - fprintf(stderr, - "InnoDB: Error: Freeing element to mem pool" - " free list though the\n" - "InnoDB: element is marked free!\n"); - - mem_analyze_corruption(area); - ut_error; - } - - size = mem_area_get_size(area); - UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE); - - if (size == 0) { - fprintf(stderr, - "InnoDB: Error: Mem area size is 0. Possibly a" - " memory overrun of the\n" - "InnoDB: previous allocated area!\n"); - - mem_analyze_corruption(area); - ut_error; - } - -#ifdef UNIV_LIGHT_MEM_DEBUG - if (((byte*)area) + size < pool->buf + pool->size) { - - ulint next_size; - - next_size = mem_area_get_size( - (mem_area_t*)(((byte*)area) + size)); - if (ut_2_power_up(next_size) != next_size) { - fprintf(stderr, - "InnoDB: Error: Memory area size %lu," - " next area size %lu not a power of 2!\n" - "InnoDB: Possibly a memory overrun of" - " the buffer being freed here.\n", - (ulong) size, (ulong) next_size); - mem_analyze_corruption(area); - - ut_error; - } - } -#endif - buddy = mem_area_get_buddy(area, size, pool); - - n = ut_2_log(size); - - mutex_enter(&(pool->mutex)); - mem_n_threads_inside++; - - ut_a(mem_n_threads_inside == 1); - - if (buddy && mem_area_get_free(buddy) - && (size == mem_area_get_size(buddy))) { - - /* The buddy is in a free list */ - - if ((byte*)buddy < (byte*)area) { - new_ptr = ((byte*)buddy) + MEM_AREA_EXTRA_SIZE; - - mem_area_set_size(buddy, 2 * size); - mem_area_set_free(buddy, FALSE); - } else { - new_ptr = ptr; - - mem_area_set_size(area, 2 * size); - } - - /* Remove the buddy from its free list and merge it to area */ - - UT_LIST_REMOVE(free_list, pool->free_list[n], buddy); - - pool->reserved += ut_2_exp(n); - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - mem_area_free(new_ptr, pool); - - return; - } else { - UT_LIST_ADD_FIRST(free_list, pool->free_list[n], area); - - mem_area_set_free(area, TRUE); - - ut_ad(pool->reserved >= size); - - pool->reserved -= size; - } - - mem_n_threads_inside--; - mutex_exit(&(pool->mutex)); - - ut_ad(mem_pool_validate(pool)); -#endif /* UNIV_DISABLE_MEM_POOL */ -} - -/************************************************************************ -Validates a memory pool. */ - -ibool -mem_pool_validate( -/*==============*/ - /* out: TRUE if ok */ - mem_pool_t* pool) /* in: memory pool */ -{ - mem_area_t* area; - mem_area_t* buddy; - ulint free; - ulint i; - - mutex_enter(&(pool->mutex)); - - free = 0; - - for (i = 0; i < 64; i++) { - - UT_LIST_VALIDATE(free_list, mem_area_t, pool->free_list[i]); - - area = UT_LIST_GET_FIRST(pool->free_list[i]); - - while (area != NULL) { - ut_a(mem_area_get_free(area)); - ut_a(mem_area_get_size(area) == ut_2_exp(i)); - - buddy = mem_area_get_buddy(area, ut_2_exp(i), pool); - - ut_a(!buddy || !mem_area_get_free(buddy) - || (ut_2_exp(i) != mem_area_get_size(buddy))); - - area = UT_LIST_GET_NEXT(free_list, area); - - free += ut_2_exp(i); - } - } - - ut_a(free + pool->reserved == pool->size); - - mutex_exit(&(pool->mutex)); - - return(TRUE); -} - -/************************************************************************ -Prints info of a memory pool. */ - -void -mem_pool_print_info( -/*================*/ - FILE* outfile,/* in: output file to write to */ - mem_pool_t* pool) /* in: memory pool */ -{ - ulint i; - - mem_pool_validate(pool); - - fprintf(outfile, "INFO OF A MEMORY POOL\n"); - - mutex_enter(&(pool->mutex)); - - for (i = 0; i < 64; i++) { - if (UT_LIST_GET_LEN(pool->free_list[i]) > 0) { - - fprintf(outfile, - "Free list length %lu for" - " blocks of size %lu\n", - (ulong) UT_LIST_GET_LEN(pool->free_list[i]), - (ulong) ut_2_exp(i)); - } - } - - fprintf(outfile, "Pool size %lu, reserved %lu.\n", (ulong) pool->size, - (ulong) pool->reserved); - mutex_exit(&(pool->mutex)); -} - -/************************************************************************ -Returns the amount of reserved memory. */ - -ulint -mem_pool_get_reserved( -/*==================*/ - /* out: reserved memory in bytes */ - mem_pool_t* pool) /* in: memory pool */ -{ - ulint reserved; - - mutex_enter(&(pool->mutex)); - - reserved = pool->reserved; - - mutex_exit(&(pool->mutex)); - - return(reserved); -} diff --git a/storage/innobase/mtr/mtr0log.c b/storage/innobase/mtr/mtr0log.c deleted file mode 100644 index e5d572bbfa7..00000000000 --- a/storage/innobase/mtr/mtr0log.c +++ /dev/null @@ -1,575 +0,0 @@ -/****************************************************** -Mini-transaction log routines - -(c) 1995 Innobase Oy - -Created 12/7/1995 Heikki Tuuri -*******************************************************/ - -#include "mtr0log.h" - -#ifdef UNIV_NONINL -#include "mtr0log.ic" -#endif - -#include "buf0buf.h" -#include "dict0boot.h" -#include "log0recv.h" -#include "page0page.h" - -/************************************************************ -Catenates n bytes to the mtr log. */ - -void -mlog_catenate_string( -/*=================*/ - mtr_t* mtr, /* in: mtr */ - const byte* str, /* in: string to write */ - ulint len) /* in: string length */ -{ - dyn_array_t* mlog; - - if (mtr_get_log_mode(mtr) == MTR_LOG_NONE) { - - return; - } - - mlog = &(mtr->log); - - dyn_push_string(mlog, str, len); -} - -/************************************************************ -Writes the initial part of a log record consisting of one-byte item -type and four-byte space and page numbers. Also pushes info -to the mtr memo that a buffer page has been modified. */ - -void -mlog_write_initial_log_record( -/*==========================*/ - byte* ptr, /* in: pointer to (inside) a buffer frame holding the - file page where modification is made */ - byte type, /* in: log item type: MLOG_1BYTE, ... */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - - ut_ad(type <= MLOG_BIGGEST_TYPE); - ut_ad(type > MLOG_8BYTES); - - if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { - fprintf(stderr, - "InnoDB: Error: trying to write to" - " a stray memory location %p\n", (void*) ptr); - ut_error; - } - - log_ptr = mlog_open(mtr, 11); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr); - - mlog_close(mtr, log_ptr); -} - -/************************************************************ -Parses an initial log record written by mlog_write_initial_log_record. */ - -byte* -mlog_parse_initial_log_record( -/*==========================*/ - /* out: parsed record end, NULL if not a complete - record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* type, /* out: log record type: MLOG_1BYTE, ... */ - ulint* space, /* out: space id */ - ulint* page_no)/* out: page number */ -{ - if (end_ptr < ptr + 1) { - - return(NULL); - } - - *type = (byte)((ulint)*ptr & ~MLOG_SINGLE_REC_FLAG); - ut_ad(*type <= MLOG_BIGGEST_TYPE); - - ptr++; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - ptr = mach_parse_compressed(ptr, end_ptr, space); - - if (ptr == NULL) { - - return(NULL); - } - - ptr = mach_parse_compressed(ptr, end_ptr, page_no); - - return(ptr); -} - -/************************************************************ -Parses a log record written by mlog_write_ulint or mlog_write_dulint. */ - -byte* -mlog_parse_nbytes( -/*==============*/ - /* out: parsed record end, NULL if not a complete - record or a corrupt record */ - ulint type, /* in: log record type: MLOG_1BYTE, ... */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* page) /* in: page where to apply the log record, or NULL */ -{ - ulint offset; - ulint val; - dulint dval; - - ut_a(type <= MLOG_8BYTES); - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - if (offset >= UNIV_PAGE_SIZE) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (type == MLOG_8BYTES) { - ptr = mach_dulint_parse_compressed(ptr, end_ptr, &dval); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - mach_write_to_8(page + offset, dval); - } - - return(ptr); - } - - ptr = mach_parse_compressed(ptr, end_ptr, &val); - - if (ptr == NULL) { - - return(NULL); - } - - if (type == MLOG_1BYTE) { - if (val > 0xFFUL) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - } else if (type == MLOG_2BYTES) { - if (val > 0xFFFFUL) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - } else { - if (type != MLOG_4BYTES) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - } - - if (page) { - if (type == MLOG_1BYTE) { - mach_write_to_1(page + offset, val); - } else if (type == MLOG_2BYTES) { - mach_write_to_2(page + offset, val); - } else { - ut_a(type == MLOG_4BYTES); - mach_write_to_4(page + offset, val); - } - } - - return(ptr); -} - -/************************************************************ -Writes 1 - 4 bytes to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ - -void -mlog_write_ulint( -/*=============*/ - byte* ptr, /* in: pointer where to write */ - ulint val, /* in: value to write */ - byte type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - - if (ptr < buf_pool->frame_zero || ptr >= buf_pool->high_end) { - fprintf(stderr, - "InnoDB: Error: trying to write to" - " a stray memory location %p\n", (void*) ptr); - ut_error; - } - - if (type == MLOG_1BYTE) { - mach_write_to_1(ptr, val); - } else if (type == MLOG_2BYTES) { - mach_write_to_2(ptr, val); - } else { - ut_ad(type == MLOG_4BYTES); - mach_write_to_4(ptr, val); - } - - log_ptr = mlog_open(mtr, 11 + 2 + 5); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, type, log_ptr, mtr); - - mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr)); - log_ptr += 2; - - log_ptr += mach_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/************************************************************ -Writes 8 bytes to a file page buffered in the buffer pool. -Writes the corresponding log record to the mini-transaction log. */ - -void -mlog_write_dulint( -/*==============*/ - byte* ptr, /* in: pointer where to write */ - dulint val, /* in: value to write */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - - if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero) - || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) { - fprintf(stderr, - "InnoDB: Error: trying to write to" - " a stray memory location %p\n", (void*) ptr); - ut_error; - } - - ut_ad(ptr && mtr); - - mach_write_to_8(ptr, val); - - log_ptr = mlog_open(mtr, 11 + 2 + 9); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_8BYTES, - log_ptr, mtr); - - mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr)); - log_ptr += 2; - - log_ptr += mach_dulint_write_compressed(log_ptr, val); - - mlog_close(mtr, log_ptr); -} - -/************************************************************ -Writes a string to a file page buffered in the buffer pool. Writes the -corresponding log record to the mini-transaction log. */ - -void -mlog_write_string( -/*==============*/ - byte* ptr, /* in: pointer where to write */ - const byte* str, /* in: string to write */ - ulint len, /* in: string length */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - - if (UNIV_UNLIKELY(ptr < buf_pool->frame_zero) - || UNIV_UNLIKELY(ptr >= buf_pool->high_end)) { - fprintf(stderr, - "InnoDB: Error: trying to write to" - " a stray memory location %p\n", (void*) ptr); - ut_error; - } - ut_ad(ptr && mtr); - ut_a(len < UNIV_PAGE_SIZE); - - ut_memcpy(ptr, str, len); - - log_ptr = mlog_open(mtr, 30); - - /* If no logging is requested, we may return now */ - if (log_ptr == NULL) { - - return; - } - - log_ptr = mlog_write_initial_log_record_fast(ptr, MLOG_WRITE_STRING, - log_ptr, mtr); - mach_write_to_2(log_ptr, ptr - buf_frame_align(ptr)); - log_ptr += 2; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, str, len); -} - -/************************************************************ -Parses a log record written by mlog_write_string. */ - -byte* -mlog_parse_string( -/*==============*/ - /* out: parsed record end, NULL if not a complete - record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - byte* page) /* in: page where to apply the log record, or NULL */ -{ - ulint offset; - ulint len; - - if (end_ptr < ptr + 4) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - if (offset >= UNIV_PAGE_SIZE) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - len = mach_read_from_2(ptr); - ptr += 2; - - ut_a(len + offset < UNIV_PAGE_SIZE); - - if (end_ptr < ptr + len) { - - return(NULL); - } - - if (page) { - ut_memcpy(page + offset, ptr, len); - } - - return(ptr + len); -} - -/************************************************************ -Opens a buffer for mlog, writes the initial log record and, -if needed, the field lengths of an index. */ - -byte* -mlog_open_and_write_index( -/*======================*/ - /* out: buffer, NULL if log mode - MTR_LOG_NONE */ - mtr_t* mtr, /* in: mtr */ - byte* rec, /* in: index record or page */ - dict_index_t* index, /* in: record descriptor */ - byte type, /* in: log item type */ - ulint size) /* in: requested buffer size in bytes - (if 0, calls mlog_close() and returns NULL) */ -{ - byte* log_ptr; - const byte* log_start; - const byte* log_end; - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - if (!page_rec_is_comp(rec)) { - log_start = log_ptr = mlog_open(mtr, 11 + size); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_ptr = mlog_write_initial_log_record_fast(rec, type, - log_ptr, mtr); - log_end = log_ptr + 11 + size; - } else { - ulint i; - ulint n = dict_index_get_n_fields(index); - /* total size needed */ - ulint total = 11 + size + (n + 2) * 2; - ulint alloc = total; - /* allocate at most DYN_ARRAY_DATA_SIZE at a time */ - if (alloc > DYN_ARRAY_DATA_SIZE) { - alloc = DYN_ARRAY_DATA_SIZE; - } - log_start = log_ptr = mlog_open(mtr, alloc); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_end = log_ptr + alloc; - log_ptr = mlog_write_initial_log_record_fast(rec, type, - log_ptr, mtr); - mach_write_to_2(log_ptr, n); - log_ptr += 2; - mach_write_to_2(log_ptr, - dict_index_get_n_unique_in_tree(index)); - log_ptr += 2; - for (i = 0; i < n; i++) { - dict_field_t* field; - const dict_col_t* col; - ulint len; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - len = field->fixed_len; - ut_ad(len < 0x7fff); - if (len == 0 - && (col->len > 255 || col->mtype == DATA_BLOB)) { - /* variable-length field - with maximum length > 255 */ - len = 0x7fff; - } - if (col->prtype & DATA_NOT_NULL) { - len |= 0x8000; - } - if (log_ptr + 2 > log_end) { - mlog_close(mtr, log_ptr); - ut_a(total > (ulint) (log_ptr - log_start)); - total -= log_ptr - log_start; - alloc = total; - if (alloc > DYN_ARRAY_DATA_SIZE) { - alloc = DYN_ARRAY_DATA_SIZE; - } - log_start = log_ptr = mlog_open(mtr, alloc); - if (!log_ptr) { - return(NULL); /* logging is disabled */ - } - log_end = log_ptr + alloc; - } - mach_write_to_2(log_ptr, len); - log_ptr += 2; - } - } - if (size == 0) { - mlog_close(mtr, log_ptr); - log_ptr = NULL; - } else if (log_ptr + size > log_end) { - mlog_close(mtr, log_ptr); - log_ptr = mlog_open(mtr, size); - } - return(log_ptr); -} - -/************************************************************ -Parses a log record written by mlog_open_and_write_index. */ - -byte* -mlog_parse_index( -/*=============*/ - /* out: parsed record end, - NULL if not a complete record */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - /* out: new value of log_ptr */ - ibool comp, /* in: TRUE=compact record format */ - dict_index_t** index) /* out, own: dummy index */ -{ - ulint i, n, n_uniq; - dict_table_t* table; - dict_index_t* ind; - - ut_ad(comp == FALSE || comp == TRUE); - - if (comp) { - if (end_ptr < ptr + 4) { - return(NULL); - } - n = mach_read_from_2(ptr); - ptr += 2; - n_uniq = mach_read_from_2(ptr); - ptr += 2; - ut_ad(n_uniq <= n); - if (end_ptr < ptr + n * 2) { - return(NULL); - } - } else { - n = n_uniq = 1; - } - table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, - comp ? DICT_TF_COMPACT : 0); - ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", - DICT_HDR_SPACE, 0, n); - ind->table = table; - ind->n_uniq = (unsigned int) n_uniq; - if (n_uniq != n) { - ut_a(n_uniq + DATA_ROLL_PTR <= n); - ind->type = DICT_CLUSTERED; - } - if (comp) { - for (i = 0; i < n; i++) { - ulint len = mach_read_from_2(ptr); - ptr += 2; - /* The high-order bit of len is the NOT NULL flag; - the rest is 0 or 0x7fff for variable-length fields, - and 1..0x7ffe for fixed-length fields. */ - dict_mem_table_add_col( - table, NULL, NULL, - ((len + 1) & 0x7fff) <= 1 - ? DATA_BINARY : DATA_FIXBINARY, - len & 0x8000 ? DATA_NOT_NULL : 0, - len & 0x7fff); - - dict_index_add_col(ind, table, (dict_col_t*) - dict_table_get_nth_col(table, i), - 0); - } - dict_table_add_system_columns(table, table->heap); - if (n_uniq != n) { - /* Identify DB_TRX_ID and DB_ROLL_PTR in the index. */ - ut_a(DATA_TRX_ID_LEN - == dict_index_get_nth_col(ind, DATA_TRX_ID - 1 - + n_uniq)->len); - ut_a(DATA_ROLL_PTR_LEN - == dict_index_get_nth_col(ind, DATA_ROLL_PTR - 1 - + n_uniq)->len); - ind->fields[DATA_TRX_ID - 1 + n_uniq].col - = &table->cols[n + DATA_TRX_ID]; - ind->fields[DATA_ROLL_PTR - 1 + n_uniq].col - = &table->cols[n + DATA_ROLL_PTR]; - } - } - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - ind->cached = TRUE; - *index = ind; - return(ptr); -} diff --git a/storage/innobase/mtr/mtr0mtr.c b/storage/innobase/mtr/mtr0mtr.c deleted file mode 100644 index 365fa15878a..00000000000 --- a/storage/innobase/mtr/mtr0mtr.c +++ /dev/null @@ -1,336 +0,0 @@ -/****************************************************** -Mini-transaction buffer - -(c) 1995 Innobase Oy - -Created 11/26/1995 Heikki Tuuri -*******************************************************/ - -#include "mtr0mtr.h" - -#ifdef UNIV_NONINL -#include "mtr0mtr.ic" -#endif - -#include "buf0buf.h" -#include "page0types.h" -#include "mtr0log.h" -#include "log0log.h" - -/******************************************************************* -Starts a mini-transaction and creates a mini-transaction handle -and buffer in the memory buffer given by the caller. */ - -mtr_t* -mtr_start_noninline( -/*================*/ - /* out: mtr buffer which also acts as - the mtr handle */ - mtr_t* mtr) /* in: memory buffer for the mtr buffer */ -{ - return(mtr_start(mtr)); -} - -/********************************************************************* -Releases the item in the slot given. */ -UNIV_INLINE -void -mtr_memo_slot_release( -/*==================*/ - mtr_t* mtr, /* in: mtr */ - mtr_memo_slot_t* slot) /* in: memo slot */ -{ - void* object; - ulint type; - - ut_ad(mtr && slot); - - object = slot->object; - type = slot->type; - - if (UNIV_LIKELY(object != NULL)) { - if (type <= MTR_MEMO_BUF_FIX) { - buf_page_release((buf_block_t*)object, type, mtr); - } else if (type == MTR_MEMO_S_LOCK) { - rw_lock_s_unlock((rw_lock_t*)object); -#ifdef UNIV_DEBUG - } else if (type == MTR_MEMO_X_LOCK) { - rw_lock_x_unlock((rw_lock_t*)object); - } else { - ut_ad(type == MTR_MEMO_MODIFY); - ut_ad(mtr_memo_contains(mtr, object, - MTR_MEMO_PAGE_X_FIX)); -#else - } else { - rw_lock_x_unlock((rw_lock_t*)object); -#endif - } - } - - slot->object = NULL; -} - -/************************************************************** -Releases the mlocks and other objects stored in an mtr memo. They are released -in the order opposite to which they were pushed to the memo. NOTE! It is -essential that the x-rw-lock on a modified buffer page is not released before -buf_page_note_modification is called for that page! Otherwise, some thread -might race to modify it, and the flush list sort order on lsn would be -destroyed. */ -UNIV_INLINE -void -mtr_memo_pop_all( -/*=============*/ - mtr_t* mtr) /* in: mtr */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - ulint offset; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_COMMITTING); /* Currently only used in - commit */ - memo = &(mtr->memo); - - offset = dyn_array_get_data_size(memo); - - while (offset > 0) { - offset -= sizeof(mtr_memo_slot_t); - slot = dyn_array_get_element(memo, offset); - - mtr_memo_slot_release(mtr, slot); - } -} - -/**************************************************************** -Writes the contents of a mini-transaction log, if any, to the database log. */ -static -void -mtr_log_reserve_and_write( -/*======================*/ - mtr_t* mtr) /* in: mtr */ -{ - dyn_array_t* mlog; - dyn_block_t* block; - ulint data_size; - ibool success; - byte* first_data; - - ut_ad(mtr); - - mlog = &(mtr->log); - - first_data = dyn_block_get_data(mlog); - - if (mtr->n_log_recs > 1) { - mlog_catenate_ulint(mtr, MLOG_MULTI_REC_END, MLOG_1BYTE); - } else { - *first_data = (byte)((ulint)*first_data - | MLOG_SINGLE_REC_FLAG); - } - - if (mlog->heap == NULL) { - mtr->end_lsn = log_reserve_and_write_fast( - first_data, dyn_block_get_used(mlog), - &(mtr->start_lsn), &success); - if (success) { - - return; - } - } - - data_size = dyn_array_get_data_size(mlog); - - /* Open the database log for log_write_low */ - mtr->start_lsn = log_reserve_and_open(data_size); - - if (mtr->log_mode == MTR_LOG_ALL) { - - block = mlog; - - while (block != NULL) { - log_write_low(dyn_block_get_data(block), - dyn_block_get_used(block)); - block = dyn_array_get_next_block(mlog, block); - } - } else { - ut_ad(mtr->log_mode == MTR_LOG_NONE); - /* Do nothing */ - } - - mtr->end_lsn = log_close(); -} - -/******************************************************************* -Commits a mini-transaction. */ - -void -mtr_commit( -/*=======*/ - mtr_t* mtr) /* in: mini-transaction */ -{ - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); -#ifdef UNIV_DEBUG - mtr->state = MTR_COMMITTING; -#endif - if (mtr->modifications) { - mtr_log_reserve_and_write(mtr); - } - - /* We first update the modification info to buffer pages, and only - after that release the log mutex: this guarantees that when the log - mutex is free, all buffer pages contain an up-to-date info of their - modifications. This fact is used in making a checkpoint when we look - at the oldest modification of any page in the buffer pool. It is also - required when we insert modified buffer pages in to the flush list - which must be sorted on oldest_modification. */ - - mtr_memo_pop_all(mtr); - - if (mtr->modifications) { - log_release(); - } - -#ifdef UNIV_DEBUG - mtr->state = MTR_COMMITTED; -#endif - dyn_array_free(&(mtr->memo)); - dyn_array_free(&(mtr->log)); -} - -/************************************************************** -Releases the latches stored in an mtr memo down to a savepoint. -NOTE! The mtr must not have made changes to buffer pages after the -savepoint, as these can be handled only by mtr_commit. */ - -void -mtr_rollback_to_savepoint( -/*======================*/ - mtr_t* mtr, /* in: mtr */ - ulint savepoint) /* in: savepoint */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - ulint offset; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - offset = dyn_array_get_data_size(memo); - ut_ad(offset >= savepoint); - - while (offset > savepoint) { - offset -= sizeof(mtr_memo_slot_t); - - slot = dyn_array_get_element(memo, offset); - - ut_ad(slot->type != MTR_MEMO_MODIFY); - mtr_memo_slot_release(mtr, slot); - } -} - -/******************************************************* -Releases an object in the memo stack. */ - -void -mtr_memo_release( -/*=============*/ - mtr_t* mtr, /* in: mtr */ - void* object, /* in: object */ - ulint type) /* in: object type: MTR_MEMO_S_LOCK, ... */ -{ - mtr_memo_slot_t* slot; - dyn_array_t* memo; - ulint offset; - - ut_ad(mtr); - ut_ad(mtr->magic_n == MTR_MAGIC_N); - ut_ad(mtr->state == MTR_ACTIVE); - - memo = &(mtr->memo); - - offset = dyn_array_get_data_size(memo); - - while (offset > 0) { - offset -= sizeof(mtr_memo_slot_t); - - slot = dyn_array_get_element(memo, offset); - - if ((object == slot->object) && (type == slot->type)) { - - mtr_memo_slot_release(mtr, slot); - - break; - } - } -} - -/************************************************************ -Reads 1 - 4 bytes from a file page buffered in the buffer pool. */ - -ulint -mtr_read_ulint( -/*===========*/ - /* out: value read */ - byte* ptr, /* in: pointer from where to read */ - ulint type, /* in: MLOG_1BYTE, MLOG_2BYTES, MLOG_4BYTES */ - mtr_t* mtr __attribute__((unused))) - /* in: mini-transaction handle */ -{ - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr), - MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, buf_block_align(ptr), - MTR_MEMO_PAGE_X_FIX)); - if (type == MLOG_1BYTE) { - return(mach_read_from_1(ptr)); - } else if (type == MLOG_2BYTES) { - return(mach_read_from_2(ptr)); - } else { - ut_ad(type == MLOG_4BYTES); - return(mach_read_from_4(ptr)); - } -} - -/************************************************************ -Reads 8 bytes from a file page buffered in the buffer pool. */ - -dulint -mtr_read_dulint( -/*============*/ - /* out: value read */ - byte* ptr, /* in: pointer from where to read */ - mtr_t* mtr __attribute__((unused))) - /* in: mini-transaction handle */ -{ - ut_ad(mtr->state == MTR_ACTIVE); - ut_ad(ptr && mtr); - ut_ad(mtr_memo_contains(mtr, buf_block_align(ptr), - MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(mtr, buf_block_align(ptr), - MTR_MEMO_PAGE_X_FIX)); - return(mach_read_from_8(ptr)); -} - -#ifdef UNIV_DEBUG -/************************************************************* -Prints info of an mtr handle. */ - -void -mtr_print( -/*======*/ - mtr_t* mtr) /* in: mtr */ -{ - fprintf(stderr, - "Mini-transaction handle: memo size %lu bytes" - " log size %lu bytes\n", - (ulong) dyn_array_get_data_size(&(mtr->memo)), - (ulong) dyn_array_get_data_size(&(mtr->log))); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c deleted file mode 100644 index 4a9d3334e7d..00000000000 --- a/storage/innobase/os/os0file.c +++ /dev/null @@ -1,4550 +0,0 @@ -/****************************************************** -The interface to the operating system file i/o primitives - -(c) 1995 Innobase Oy - -Created 10/21/1995 Heikki Tuuri -*******************************************************/ - -#include "os0file.h" -#include "os0sync.h" -#include "os0thread.h" -#include "ut0mem.h" -#include "srv0srv.h" -#include "srv0start.h" -#include "fil0fil.h" -#include "buf0buf.h" - -#if defined(UNIV_HOTBACKUP) && defined(__WIN__) -/* Add includes for the _stat() call to compile on Windows */ -#include <sys/types.h> -#include <sys/stat.h> -#include <errno.h> -#endif /* UNIV_HOTBACKUP */ - -#ifdef POSIX_ASYNC_IO -/* We assume in this case that the OS has standard Posix aio (at least SunOS -2.6, HP-UX 11i and AIX 4.3 have) */ - -#endif - -/* This specifies the file permissions InnoDB uses when it creates files in -Unix; the value of os_innodb_umask is initialized in ha_innodb.cc to -my_umask */ - -#ifndef __WIN__ -ulint os_innodb_umask = S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP; -#else -ulint os_innodb_umask = 0; -#endif - -#ifdef UNIV_DO_FLUSH -/* If the following is set to TRUE, we do not call os_file_flush in every -os_file_write. We can set this TRUE when the doublewrite buffer is used. */ -ibool os_do_not_call_flush_at_each_write = FALSE; -#else -/* We do not call os_file_flush in every os_file_write. */ -#endif /* UNIV_DO_FLUSH */ - -/* We use these mutexes to protect lseek + file i/o operation, if the -OS does not provide an atomic pread or pwrite, or similar */ -#define OS_FILE_N_SEEK_MUTEXES 16 -os_mutex_t os_file_seek_mutexes[OS_FILE_N_SEEK_MUTEXES]; - -/* In simulated aio, merge at most this many consecutive i/os */ -#define OS_AIO_MERGE_N_CONSECUTIVE 64 - -/* If this flag is TRUE, then we will use the native aio of the -OS (provided we compiled Innobase with it in), otherwise we will -use simulated aio we build below with threads */ - -ibool os_aio_use_native_aio = FALSE; - -ibool os_aio_print_debug = FALSE; - -/* State for the state of an IO request in simulated AIO. - Protocol for simulated aio: - client requests IO: find slot with reserved = FALSE. Add entry with - status = OS_AIO_NOT_ISSUED. - IO thread wakes: find adjacent slots with reserved = TRUE and status = - OS_AIO_NOT_ISSUED. Change status for slots to - OS_AIO_ISSUED. - IO operation completes: set status for slots to OS_AIO_DONE. set status - for the first slot to OS_AIO_CLAIMED and return - result for that slot. - When there are multiple read and write threads, they all compete to execute - the requests in the array (os_aio_array_t). This avoids the need to load - balance requests at the time the request is made at the cost of waking all - threads when a request is available. -*/ -typedef enum { - OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */ - OS_AIO_ISSUED, /* Being processed by an IO thread. */ - OS_AIO_DONE, /* Request processed. */ - OS_AIO_CLAIMED /* Result being returned to client. */ -} os_aio_status; - -/* The aio array slot structure */ -typedef struct os_aio_slot_struct os_aio_slot_t; - -struct os_aio_slot_struct{ - ibool is_read; /* TRUE if a read operation */ - ulint pos; /* index of the slot in the aio - array */ - ibool reserved; /* TRUE if this slot is reserved */ - os_aio_status status; /* Status for current request. Valid when reserved - is TRUE. Used only in simulated aio. */ - time_t reservation_time;/* time when reserved */ - ulint len; /* length of the block to read or - write */ - byte* buf; /* buffer used in i/o */ - ulint type; /* OS_FILE_READ or OS_FILE_WRITE */ - ulint offset; /* 32 low bits of file offset in - bytes */ - ulint offset_high; /* 32 high bits of file offset */ - os_file_t file; /* file where to read or write */ - const char* name; /* file name or path */ - fil_node_t* message1; /* message which is given by the */ - void* message2; /* the requester of an aio operation - and which can be used to identify - which pending aio operation was - completed */ -#ifdef WIN_ASYNC_IO - os_event_t event; /* event object we need in the - OVERLAPPED struct */ - OVERLAPPED control; /* Windows control block for the - aio request */ -#elif defined(POSIX_ASYNC_IO) - struct aiocb control; /* Posix control block for aio - request */ -#endif -}; - -/* The aio array structure */ -typedef struct os_aio_array_struct os_aio_array_t; - -struct os_aio_array_struct{ - os_mutex_t mutex; /* the mutex protecting the aio array */ - os_event_t not_full; /* The event which is set to the signaled - state when there is space in the aio - outside the ibuf segment */ - os_event_t is_empty; /* The event which is set to the signaled - state when there are no pending i/os - in this array */ - ulint n_slots; /* Total number of slots in the aio array. - This must be divisible by n_threads. */ - ulint n_reserved;/* Number of reserved slots in the - aio array outside the ibuf segment */ - os_aio_slot_t* slots; /* Pointer to the slots in the array */ -#ifdef __WIN__ - os_native_event_t* native_events; - /* Pointer to an array of OS native event - handles where we copied the handles from - slots, in the same order. This can be used - in WaitForMultipleObjects; used only in - Windows */ -#endif -}; - -/* Array of events used in simulated aio */ -os_event_t* os_aio_segment_wait_events = NULL; - -/* Number of threads for reading and writing. */ -ulint os_aio_read_threads = 0; -ulint os_aio_write_threads = 0; - -/* Number for the first global segment for reading. */ -const ulint os_aio_first_read_segment = 2; - -/* Number for the first global segment for writing. Set to -2 + os_aio_read_write_threads. */ -ulint os_aio_first_write_segment = 0; - -/* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These -are NULL when the module has not yet been initialized. */ -static os_aio_array_t* os_aio_read_array = NULL; -static os_aio_array_t* os_aio_write_array = NULL; -static os_aio_array_t* os_aio_ibuf_array = NULL; -static os_aio_array_t* os_aio_log_array = NULL; -static os_aio_array_t* os_aio_sync_array = NULL; - -/* Per thread buffer used for merged IO requests. Used by -os_aio_simulated_handle so that a buffer doesn't have to be allocated -for each request. */ -static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS]; -static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS]; - -/* Count pages read and written per thread */ -static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS]; -static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS]; - -/* Number of IO operations done. One request can be for N pages. */ -static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS]; - -/* usecs spent blocked on an IO request */ -static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS]; -/* max usecs spent blocked on an IO request */ -static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS]; - -/* Number of IO global segments. An IO handler thread is created for each -global segment, except for the segment associated with os_aio_sync_array. -Several segments can be associated with os_aio_{read,write}_array. One -segment is created for each of the other arrays. This is also the number -of valid entries in srv_io_thread_reads, srv_io_thread_writes, -srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */ -static ulint os_aio_n_segments = ULINT_UNDEFINED; - -/* Set to TRUE to temporarily block reads from being scheduled while a batch -of read requests is added to allow them to be merged by the IO handler thread -if they are adjacent. Declared volatile because we don't want this to be -read from a register in a loop when another thread may change the value in -memory. -*/ -static volatile ibool os_aio_recommend_sleep_for_read_threads = FALSE; - -ulint os_n_file_reads = 0; -ulint os_bytes_read_since_printout = 0; -ulint os_n_file_writes = 0; -ulint os_n_fsyncs = 0; -ulint os_n_file_reads_old = 0; -ulint os_n_file_writes_old = 0; -ulint os_n_fsyncs_old = 0; -time_t os_last_printout; - -ibool os_has_said_disk_full = FALSE; - -/* The mutex protecting the following counts of pending I/O operations */ -static os_mutex_t os_file_count_mutex; -ulint os_file_n_pending_preads = 0; -ulint os_file_n_pending_pwrites = 0; -ulint os_n_pending_writes = 0; -ulint os_n_pending_reads = 0; - -static double time_usecs() { - ulint sec, ms; - if (ut_usectime(&sec, &ms)) - return 0; - else - return sec * 1000000.0 + ms; -} - -/*************************************************************************** -Gets the operating system version. Currently works only on Windows. */ - -ulint -os_get_os_version(void) -/*===================*/ - /* out: OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000 */ -{ -#ifdef __WIN__ - OSVERSIONINFO os_info; - - os_info.dwOSVersionInfoSize = sizeof(OSVERSIONINFO); - - ut_a(GetVersionEx(&os_info)); - - if (os_info.dwPlatformId == VER_PLATFORM_WIN32s) { - return(OS_WIN31); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_WINDOWS) { - return(OS_WIN95); - } else if (os_info.dwPlatformId == VER_PLATFORM_WIN32_NT) { - if (os_info.dwMajorVersion <= 4) { - return(OS_WINNT); - } else { - return(OS_WIN2000); - } - } else { - ut_error; - return(0); - } -#else - ut_error; - - return(0); -#endif -} - -/*************************************************************************** -Retrieves the last error number if an error occurs in a file io function. -The number should be retrieved before any other OS calls (because they may -overwrite the error number). If the number is not known to this program, -the OS error number + 100 is returned. */ - -ulint -os_file_get_last_error( -/*===================*/ - /* out: error number, or OS error - number + 100 */ - ibool report_all_errors) /* in: TRUE if we want an error message - printed of all errors */ -{ - ulint err; - -#ifdef __WIN__ - - err = (ulint) GetLastError(); - - if (report_all_errors - || (err != ERROR_DISK_FULL && err != ERROR_FILE_EXISTS)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Operating system error number %lu" - " in a file operation.\n", (ulong) err); - - if (err == ERROR_PATH_NOT_FOUND) { - fprintf(stderr, - "InnoDB: The error means the system" - " cannot find the path specified.\n"); - - if (srv_is_being_started) { - fprintf(stderr, - "InnoDB: If you are installing InnoDB," - " remember that you must create\n" - "InnoDB: directories yourself, InnoDB" - " does not create them.\n"); - } - } else if (err == ERROR_ACCESS_DENIED) { - fprintf(stderr, - "InnoDB: The error means mysqld does not have" - " the access rights to\n" - "InnoDB: the directory. It may also be" - " you have created a subdirectory\n" - "InnoDB: of the same name as a data file.\n"); - } else if (err == ERROR_SHARING_VIOLATION - || err == ERROR_LOCK_VIOLATION) { - fprintf(stderr, - "InnoDB: The error means that another program" - " is using InnoDB's files.\n" - "InnoDB: This might be a backup or antivirus" - " software or another instance\n" - "InnoDB: of MySQL." - " Please close it to get rid of this error.\n"); - } else { - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - "http://dev.mysql.com/doc/refman/5.1/en/" - "operating-system-error-codes.html\n"); - } - } - - fflush(stderr); - - if (err == ERROR_FILE_NOT_FOUND) { - return(OS_FILE_NOT_FOUND); - } else if (err == ERROR_DISK_FULL) { - return(OS_FILE_DISK_FULL); - } else if (err == ERROR_FILE_EXISTS) { - return(OS_FILE_ALREADY_EXISTS); - } else if (err == ERROR_SHARING_VIOLATION - || err == ERROR_LOCK_VIOLATION) { - return(OS_FILE_SHARING_VIOLATION); - } else { - return(100 + err); - } -#else - err = (ulint) errno; - - if (report_all_errors - || (err != ENOSPC && err != EEXIST)) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Operating system error number %lu" - " in a file operation.\n", (ulong) err); - - if (err == ENOENT) { - fprintf(stderr, - "InnoDB: The error means the system" - " cannot find the path specified.\n"); - - if (srv_is_being_started) { - fprintf(stderr, - "InnoDB: If you are installing InnoDB," - " remember that you must create\n" - "InnoDB: directories yourself, InnoDB" - " does not create them.\n"); - } - } else if (err == EACCES) { - fprintf(stderr, - "InnoDB: The error means mysqld does not have" - " the access rights to\n" - "InnoDB: the directory.\n"); - } else { - if (strerror((int)err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %lu" - " means '%s'.\n", - err, strerror((int)err)); - } - - fprintf(stderr, - "InnoDB: Some operating system" - " error numbers are described at\n" - "InnoDB: " - "http://dev.mysql.com/doc/refman/5.1/en/" - "operating-system-error-codes.html\n"); - } - } - - fflush(stderr); - - if (err == ENOSPC) { - return(OS_FILE_DISK_FULL); -#ifdef POSIX_ASYNC_IO - } else if (err == EAGAIN) { - return(OS_FILE_AIO_RESOURCES_RESERVED); -#endif - } else if (err == ENOENT) { - return(OS_FILE_NOT_FOUND); - } else if (err == EEXIST) { - return(OS_FILE_ALREADY_EXISTS); - } else if (err == EXDEV || err == ENOTDIR || err == EISDIR) { - return(OS_FILE_PATH_ERROR); - } else { - return(100 + err); - } -#endif -} - -/******************************************************************** -Does error handling when a file operation fails. -Conditionally exits (calling exit(3)) based on should_exit value and the -error type */ - -static -ibool -os_file_handle_error_cond_exit( -/*===========================*/ - /* out: TRUE if we should retry the - operation */ - const char* name, /* in: name of a file or NULL */ - const char* operation, /* in: operation */ - ibool should_exit) /* in: call exit(3) if unknown error - and this parameter is TRUE */ -{ - ulint err; - - err = os_file_get_last_error(FALSE); - - if (err == OS_FILE_DISK_FULL) { - /* We only print a warning about disk full once */ - - if (os_has_said_disk_full) { - - return(FALSE); - } - - if (name) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Encountered a problem with" - " file %s\n", name); - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Disk is full. Try to clean the disk" - " to free space.\n"); - - os_has_said_disk_full = TRUE; - - fflush(stderr); - - return(FALSE); - } else if (err == OS_FILE_AIO_RESOURCES_RESERVED) { - - return(TRUE); - } else if (err == OS_FILE_ALREADY_EXISTS - || err == OS_FILE_PATH_ERROR) { - - return(FALSE); - } else if (err == OS_FILE_SHARING_VIOLATION) { - - os_thread_sleep(10000000); /* 10 sec */ - return(TRUE); - } else { - if (name) { - fprintf(stderr, "InnoDB: File name %s\n", name); - } - - fprintf(stderr, "InnoDB: File operation call: '%s'.\n", - operation); - - if (should_exit) { - fprintf(stderr, "InnoDB: Cannot continue operation.\n"); - - fflush(stderr); - - exit(1); - } - } - - return(FALSE); -} - -/******************************************************************** -Does error handling when a file operation fails. */ -static -ibool -os_file_handle_error( -/*=================*/ - /* out: TRUE if we should retry the - operation */ - const char* name, /* in: name of a file or NULL */ - const char* operation)/* in: operation */ -{ - /* exit in case of unknown error */ - return(os_file_handle_error_cond_exit(name, operation, TRUE)); -} - -/******************************************************************** -Does error handling when a file operation fails. */ -static -ibool -os_file_handle_error_no_exit( -/*=========================*/ - /* out: TRUE if we should retry the - operation */ - const char* name, /* in: name of a file or NULL */ - const char* operation)/* in: operation */ -{ - /* don't exit in case of unknown error */ - return(os_file_handle_error_cond_exit(name, operation, FALSE)); -} - -#undef USE_FILE_LOCK -#define USE_FILE_LOCK -#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__) -/* InnoDB Hot Backup does not lock the data files. - * On Windows, mandatory locking is used. - */ -# undef USE_FILE_LOCK -#endif -#ifdef USE_FILE_LOCK -/******************************************************************** -Obtain an exclusive lock on a file. */ -static -int -os_file_lock( -/*=========*/ - /* out: 0 on success */ - int fd, /* in: file descriptor */ - const char* name) /* in: file name */ -{ - struct flock lk; - lk.l_type = F_WRLCK; - lk.l_whence = SEEK_SET; - lk.l_start = lk.l_len = 0; - if (fcntl(fd, F_SETLK, &lk) == -1) { - fprintf(stderr, - "InnoDB: Unable to lock %s, error: %d\n", name, errno); - - if (errno == EAGAIN || errno == EACCES) { - fprintf(stderr, - "InnoDB: Check that you do not already have" - " another mysqld process\n" - "InnoDB: using the same InnoDB data" - " or log files.\n"); - } - - return(-1); - } - - return(0); -} -#endif /* USE_FILE_LOCK */ - -/******************************************************************** -Creates the seek mutexes used in positioned reads and writes. */ - -void -os_io_init_simple(void) -/*===================*/ -{ - ulint i; - - os_file_count_mutex = os_mutex_create(NULL); - - for (i = 0; i < OS_FILE_N_SEEK_MUTEXES; i++) { - os_file_seek_mutexes[i] = os_mutex_create(NULL); - } -} - -#if !defined(UNIV_HOTBACKUP) && !defined(__NETWARE__) -/************************************************************************* -Creates a temporary file that will be deleted on close. -This function is defined in ha_innodb.cc. */ - -int -innobase_mysql_tmpfile(void); -/*========================*/ - /* out: temporary file descriptor, or < 0 on error */ -#endif /* !UNIV_HOTBACKUP && !__NETWARE__ */ - -/*************************************************************************** -Creates a temporary file. This function is like tmpfile(3), but -the temporary file is created in the MySQL temporary directory. -On Netware, this function is like tmpfile(3), because the C run-time -library of Netware does not expose the delete-on-close flag. */ - -FILE* -os_file_create_tmpfile(void) -/*========================*/ - /* out: temporary file handle, or NULL on error */ -{ -#ifdef UNIV_HOTBACKUP - ut_error; - - return(NULL); -#else -# ifdef __NETWARE__ - FILE* file = tmpfile(); -# else /* __NETWARE__ */ - FILE* file = NULL; - int fd = innobase_mysql_tmpfile(); - - if (fd >= 0) { - file = fdopen(fd, "w+b"); - } -# endif /* __NETWARE__ */ - - if (!file) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: unable to create temporary file;" - " errno: %d\n", errno); -# ifndef __NETWARE__ - if (fd >= 0) { - close(fd); - } -# endif /* !__NETWARE__ */ - } - - return(file); -#endif /* UNIV_HOTBACKUP */ -} - -/*************************************************************************** -The os_file_opendir() function opens a directory stream corresponding to the -directory named by the dirname argument. The directory stream is positioned -at the first entry. In both Unix and Windows we automatically skip the '.' -and '..' items at the start of the directory listing. */ - -os_file_dir_t -os_file_opendir( -/*============*/ - /* out: directory stream, NULL if - error */ - const char* dirname, /* in: directory name; it must not - contain a trailing '\' or '/' */ - ibool error_is_fatal) /* in: TRUE if we should treat an - error as a fatal error; if we try to - open symlinks then we do not wish a - fatal error if it happens not to be - a directory */ -{ - os_file_dir_t dir; -#ifdef __WIN__ - LPWIN32_FIND_DATA lpFindFileData; - char path[OS_FILE_MAX_PATH + 3]; - - ut_a(strlen(dirname) < OS_FILE_MAX_PATH); - - strcpy(path, dirname); - strcpy(path + strlen(path), "\\*"); - - /* Note that in Windows opening the 'directory stream' also retrieves - the first entry in the directory. Since it is '.', that is no problem, - as we will skip over the '.' and '..' entries anyway. */ - - lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA)); - - dir = FindFirstFile((LPCTSTR) path, lpFindFileData); - - ut_free(lpFindFileData); - - if (dir == INVALID_HANDLE_VALUE) { - - if (error_is_fatal) { - os_file_handle_error(dirname, "opendir"); - } - - return(NULL); - } - - return(dir); -#else - dir = opendir(dirname); - - if (dir == NULL && error_is_fatal) { - os_file_handle_error(dirname, "opendir"); - } - - return(dir); -#endif -} - -/*************************************************************************** -Closes a directory stream. */ - -int -os_file_closedir( -/*=============*/ - /* out: 0 if success, -1 if failure */ - os_file_dir_t dir) /* in: directory stream */ -{ -#ifdef __WIN__ - BOOL ret; - - ret = FindClose(dir); - - if (!ret) { - os_file_handle_error_no_exit(NULL, "closedir"); - - return(-1); - } - - return(0); -#else - int ret; - - ret = closedir(dir); - - if (ret) { - os_file_handle_error_no_exit(NULL, "closedir"); - } - - return(ret); -#endif -} - -/*************************************************************************** -This function returns information of the next file in the directory. We jump -over the '.' and '..' entries in the directory. */ - -int -os_file_readdir_next_file( -/*======================*/ - /* out: 0 if ok, -1 if error, 1 if at the end - of the directory */ - const char* dirname,/* in: directory name or path */ - os_file_dir_t dir, /* in: directory stream */ - os_file_stat_t* info) /* in/out: buffer where the info is returned */ -{ -#ifdef __WIN__ - LPWIN32_FIND_DATA lpFindFileData; - BOOL ret; - - lpFindFileData = ut_malloc(sizeof(WIN32_FIND_DATA)); -next_file: - ret = FindNextFile(dir, lpFindFileData); - - if (ret) { - ut_a(strlen((char *) lpFindFileData->cFileName) - < OS_FILE_MAX_PATH); - - if (strcmp((char *) lpFindFileData->cFileName, ".") == 0 - || strcmp((char *) lpFindFileData->cFileName, "..") == 0) { - - goto next_file; - } - - strcpy(info->name, (char *) lpFindFileData->cFileName); - - info->size = (ib_longlong)(lpFindFileData->nFileSizeLow) - + (((ib_longlong)(lpFindFileData->nFileSizeHigh)) - << 32); - - if (lpFindFileData->dwFileAttributes - & FILE_ATTRIBUTE_REPARSE_POINT) { - /* TODO: test Windows symlinks */ - /* TODO: MySQL has apparently its own symlink - implementation in Windows, dbname.sym can - redirect a database directory: - http://dev.mysql.com/doc/refman/5.1/en/ - windows-symbolic-links.html */ - info->type = OS_FILE_TYPE_LINK; - } else if (lpFindFileData->dwFileAttributes - & FILE_ATTRIBUTE_DIRECTORY) { - info->type = OS_FILE_TYPE_DIR; - } else { - /* It is probably safest to assume that all other - file types are normal. Better to check them rather - than blindly skip them. */ - - info->type = OS_FILE_TYPE_FILE; - } - } - - ut_free(lpFindFileData); - - if (ret) { - return(0); - } else if (GetLastError() == ERROR_NO_MORE_FILES) { - - return(1); - } else { - os_file_handle_error_no_exit(dirname, - "readdir_next_file"); - return(-1); - } -#else - struct dirent* ent; - char* full_path; - int ret; - struct stat statinfo; -#ifdef HAVE_READDIR_R - char dirent_buf[sizeof(struct dirent) - + _POSIX_PATH_MAX + 100]; - /* In /mysys/my_lib.c, _POSIX_PATH_MAX + 1 is used as - the max file name len; but in most standards, the - length is NAME_MAX; we add 100 to be even safer */ -#endif - -next_file: - -#ifdef HAVE_READDIR_R - ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent); - - if (ret != 0) { - fprintf(stderr, - "InnoDB: cannot read directory %s, error %lu\n", - dirname, (ulong)ret); - - return(-1); - } - - if (ent == NULL) { - /* End of directory */ - - return(1); - } - - ut_a(strlen(ent->d_name) < _POSIX_PATH_MAX + 100 - 1); -#else - ent = readdir(dir); - - if (ent == NULL) { - - return(1); - } -#endif - ut_a(strlen(ent->d_name) < OS_FILE_MAX_PATH); - - if (strcmp(ent->d_name, ".") == 0 || strcmp(ent->d_name, "..") == 0) { - - goto next_file; - } - - strcpy(info->name, ent->d_name); - - full_path = ut_malloc(strlen(dirname) + strlen(ent->d_name) + 10); - - sprintf(full_path, "%s/%s", dirname, ent->d_name); - - ret = stat(full_path, &statinfo); - - if (ret) { - os_file_handle_error_no_exit(full_path, "stat"); - - ut_free(full_path); - - return(-1); - } - - info->size = (ib_longlong)statinfo.st_size; - - if (S_ISDIR(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { - info->type = OS_FILE_TYPE_FILE; - } else { - info->type = OS_FILE_TYPE_UNKNOWN; - } - - ut_free(full_path); - - return(0); -#endif -} - -/********************************************************************* -This function attempts to create a directory named pathname. The new directory -gets default permissions. On Unix the permissions are (0770 & ~umask). If the -directory exists already, nothing is done and the call succeeds, unless the -fail_if_exists arguments is true. */ - -ibool -os_file_create_directory( -/*=====================*/ - /* out: TRUE if call succeeds, - FALSE on error */ - const char* pathname, /* in: directory name as - null-terminated string */ - ibool fail_if_exists) /* in: if TRUE, pre-existing directory - is treated as an error. */ -{ -#ifdef __WIN__ - BOOL rcode; - - rcode = CreateDirectory((LPCTSTR) pathname, NULL); - if (!(rcode != 0 - || (GetLastError() == ERROR_ALREADY_EXISTS - && !fail_if_exists))) { - /* failure */ - os_file_handle_error(pathname, "CreateDirectory"); - - return(FALSE); - } - - return (TRUE); -#else - int rcode; - - rcode = mkdir(pathname, 0770); - - if (!(rcode == 0 || (errno == EEXIST && !fail_if_exists))) { - /* failure */ - os_file_handle_error(pathname, "mkdir"); - - return(FALSE); - } - - return (TRUE); -#endif -} - -/******************************************************************** -A simple function to open or create a file. */ - -os_file_t -os_file_create_simple( -/*==================*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file is - opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), or - OS_FILE_CREATE_PATH if new file - (if exists, error) and subdirectories along - its path are created (if needed)*/ - ulint access_type,/* in: OS_FILE_READ_ONLY or - OS_FILE_READ_WRITE */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ -{ -#ifdef __WIN__ - os_file_t file; - DWORD create_flag; - DWORD access; - DWORD attributes = 0; - ibool retry; - -try_again: - ut_a(name); - - if (create_mode == OS_FILE_OPEN) { - create_flag = OPEN_EXISTING; - } else if (create_mode == OS_FILE_CREATE) { - create_flag = CREATE_NEW; - } else if (create_mode == OS_FILE_CREATE_PATH) { - /* create subdirs along the path if needed */ - *success = os_file_create_subdirs_if_needed(name); - if (!*success) { - ut_error; - } - create_flag = CREATE_NEW; - create_mode = OS_FILE_CREATE; - } else { - create_flag = 0; - ut_error; - } - - if (access_type == OS_FILE_READ_ONLY) { - access = GENERIC_READ; - } else if (access_type == OS_FILE_READ_WRITE) { - access = GENERIC_READ | GENERIC_WRITE; - } else { - access = 0; - ut_error; - } - - file = CreateFile((LPCTSTR) name, - access, - FILE_SHARE_READ | FILE_SHARE_WRITE, - /* file can be read and written also - by other processes */ - NULL, /* default security attributes */ - create_flag, - attributes, - NULL); /* no template file */ - - if (file == INVALID_HANDLE_VALUE) { - *success = FALSE; - - retry = os_file_handle_error(name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - if (retry) { - goto try_again; - } - } else { - *success = TRUE; - } - - return(file); -#else /* __WIN__ */ - os_file_t file; - int create_flag; - ibool retry; - -try_again: - ut_a(name); - - if (create_mode == OS_FILE_OPEN) { - if (access_type == OS_FILE_READ_ONLY) { - create_flag = O_RDONLY; - } else { - create_flag = O_RDWR; - } - } else if (create_mode == OS_FILE_CREATE) { - create_flag = O_RDWR | O_CREAT | O_EXCL; - } else if (create_mode == OS_FILE_CREATE_PATH) { - /* create subdirs along the path if needed */ - *success = os_file_create_subdirs_if_needed(name); - if (!*success) { - return (-1); - } - create_flag = O_RDWR | O_CREAT | O_EXCL; - create_mode = OS_FILE_CREATE; - } else { - create_flag = 0; - ut_error; - } - - if (create_mode == OS_FILE_CREATE) { - file = open(name, create_flag, S_IRUSR | S_IWUSR - | S_IRGRP | S_IWGRP); - } else { - file = open(name, create_flag); - } - - if (file == -1) { - *success = FALSE; - - retry = os_file_handle_error(name, - create_mode == OS_FILE_OPEN ? - "open" : "create"); - if (retry) { - goto try_again; - } -#ifdef USE_FILE_LOCK - } else if (access_type == OS_FILE_READ_WRITE - && os_file_lock(file, name)) { - *success = FALSE; - close(file); - file = -1; -#endif - } else { - *success = TRUE; - } - - return(file); -#endif /* __WIN__ */ -} - -/******************************************************************** -A simple function to open or create a file. */ - -os_file_t -os_file_create_simple_no_error_handling( -/*====================================*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error) */ - ulint access_type,/* in: OS_FILE_READ_ONLY, - OS_FILE_READ_WRITE, or - OS_FILE_READ_ALLOW_DELETE; the last option is - used by a backup program reading the file */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ -{ -#ifdef __WIN__ - os_file_t file; - DWORD create_flag; - DWORD access; - DWORD attributes = 0; - DWORD share_mode = FILE_SHARE_READ | FILE_SHARE_WRITE; - - ut_a(name); - - if (create_mode == OS_FILE_OPEN) { - create_flag = OPEN_EXISTING; - } else if (create_mode == OS_FILE_CREATE) { - create_flag = CREATE_NEW; - } else { - create_flag = 0; - ut_error; - } - - if (access_type == OS_FILE_READ_ONLY) { - access = GENERIC_READ; - } else if (access_type == OS_FILE_READ_WRITE) { - access = GENERIC_READ | GENERIC_WRITE; - } else if (access_type == OS_FILE_READ_ALLOW_DELETE) { - access = GENERIC_READ; - share_mode = FILE_SHARE_DELETE | FILE_SHARE_READ - | FILE_SHARE_WRITE; /* A backup program has to give - mysqld the maximum freedom to - do what it likes with the - file */ - } else { - access = 0; - ut_error; - } - - file = CreateFile((LPCTSTR) name, - access, - share_mode, - NULL, /* default security attributes */ - create_flag, - attributes, - NULL); /* no template file */ - - if (file == INVALID_HANDLE_VALUE) { - *success = FALSE; - } else { - *success = TRUE; - } - - return(file); -#else /* __WIN__ */ - os_file_t file; - int create_flag; - - ut_a(name); - - if (create_mode == OS_FILE_OPEN) { - if (access_type == OS_FILE_READ_ONLY) { - create_flag = O_RDONLY; - } else { - create_flag = O_RDWR; - } - } else if (create_mode == OS_FILE_CREATE) { - create_flag = O_RDWR | O_CREAT | O_EXCL; - } else { - create_flag = 0; - ut_error; - } - - if (create_mode == OS_FILE_CREATE) { - file = open(name, create_flag, S_IRUSR | S_IWUSR - | S_IRGRP | S_IWGRP); - } else { - file = open(name, create_flag); - } - - if (file == -1) { - *success = FALSE; -#ifdef USE_FILE_LOCK - } else if (access_type == OS_FILE_READ_WRITE - && os_file_lock(file, name)) { - *success = FALSE; - close(file); - file = -1; -#endif - } else { - *success = TRUE; - } - - return(file); -#endif /* __WIN__ */ -} - -/******************************************************************** -Tries to disable OS caching on an opened file descriptor. */ - -void -os_file_set_nocache( -/*================*/ - int fd, /* in: file descriptor to alter */ - const char* file_name, /* in: used in the diagnostic message */ - const char* operation_name) /* in: used in the diagnostic message, - we call os_file_set_nocache() - immediately after opening or creating - a file, so this is either "open" or - "create" */ -{ - /* some versions of Solaris may not have DIRECTIO_ON */ -#if defined(UNIV_SOLARIS) && defined(DIRECTIO_ON) - if (directio(fd, DIRECTIO_ON) == -1) { - int errno_save; - errno_save = (int)errno; - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Failed to set DIRECTIO_ON " - "on file %s: %s: %s, continuing anyway\n", - file_name, operation_name, strerror(errno_save)); - } -#elif defined(O_DIRECT) - if (fcntl(fd, F_SETFL, O_DIRECT) == -1) { - int errno_save; - errno_save = (int)errno; - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Failed to set O_DIRECT " - "on file %s: %s: %s, continuing anyway\n", - file_name, operation_name, strerror(errno_save)); - if (errno_save == EINVAL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: O_DIRECT is known to result in " - "'Invalid argument' on Linux on tmpfs, " - "see MySQL Bug#26662\n"); - } - } -#endif -} - -/******************************************************************** -Opens an existing file or creates a new. */ - -os_file_t -os_file_create( -/*===========*/ - /* out, own: handle to the file, not defined - if error, error number can be retrieved with - os_file_get_last_error */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - ulint create_mode,/* in: OS_FILE_OPEN if an existing file - is opened (if does not exist, error), or - OS_FILE_CREATE if a new file is created - (if exists, error), - OS_FILE_OVERWRITE if a new file is created - or an old overwritten; - OS_FILE_OPEN_RAW, if a raw device or disk - partition should be opened */ - ulint purpose,/* in: OS_FILE_AIO, if asynchronous, - non-buffered i/o is desired, - OS_FILE_NORMAL, if any normal file; - NOTE that it also depends on type, os_aio_.. - and srv_.. variables whether we really use - async i/o or unbuffered i/o: look in the - function source code for the exact rules */ - ulint type, /* in: OS_DATA_FILE or OS_LOG_FILE */ - ibool* success)/* out: TRUE if succeed, FALSE if error */ -{ -#ifdef __WIN__ - os_file_t file; - DWORD share_mode = FILE_SHARE_READ; - DWORD create_flag; - DWORD attributes; - ibool retry; -try_again: - ut_a(name); - - if (create_mode == OS_FILE_OPEN_RAW) { - create_flag = OPEN_EXISTING; - share_mode = FILE_SHARE_WRITE; - } else if (create_mode == OS_FILE_OPEN - || create_mode == OS_FILE_OPEN_RETRY) { - create_flag = OPEN_EXISTING; - } else if (create_mode == OS_FILE_CREATE) { - create_flag = CREATE_NEW; - } else if (create_mode == OS_FILE_OVERWRITE) { - create_flag = CREATE_ALWAYS; - } else { - create_flag = 0; - ut_error; - } - - if (purpose == OS_FILE_AIO) { - /* If specified, use asynchronous (overlapped) io and no - buffering of writes in the OS */ - attributes = 0; -#ifdef WIN_ASYNC_IO - if (os_aio_use_native_aio) { - attributes = attributes | FILE_FLAG_OVERLAPPED; - } -#endif -#ifdef UNIV_NON_BUFFERED_IO - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { - /* Do not use unbuffered i/o to log files because - value 2 denotes that we do not flush the log at every - commit, but only once per second */ - } else if (srv_win_file_flush_method - == SRV_WIN_IO_UNBUFFERED) { - attributes = attributes | FILE_FLAG_NO_BUFFERING; - } -#endif - } else if (purpose == OS_FILE_NORMAL) { - attributes = 0; -#ifdef UNIV_NON_BUFFERED_IO - if (type == OS_LOG_FILE && srv_flush_log_at_trx_commit == 2) { - /* Do not use unbuffered i/o to log files because - value 2 denotes that we do not flush the log at every - commit, but only once per second */ - } else if (srv_win_file_flush_method - == SRV_WIN_IO_UNBUFFERED) { - attributes = attributes | FILE_FLAG_NO_BUFFERING; - } -#endif - } else { - attributes = 0; - ut_error; - } - - file = CreateFile((LPCTSTR) name, - GENERIC_READ | GENERIC_WRITE, /* read and write - access */ - share_mode, /* File can be read also by other - processes; we must give the read - permission because of ibbackup. We do - not give the write permission to - others because if one would succeed to - start 2 instances of mysqld on the - SAME files, that could cause severe - database corruption! When opening - raw disk partitions, Microsoft manuals - say that we must give also the write - permission. */ - NULL, /* default security attributes */ - create_flag, - attributes, - NULL); /* no template file */ - - if (file == INVALID_HANDLE_VALUE) { - *success = FALSE; - - /* When srv_file_per_table is on, file creation failure may not - be critical to the whole instance. Do not crash the server in - case of unknown errors. */ - if (srv_file_per_table) { - retry = os_file_handle_error_no_exit(name, - create_mode == OS_FILE_CREATE ? - "create" : "open"); - } else { - retry = os_file_handle_error(name, - create_mode == OS_FILE_CREATE ? - "create" : "open"); - } - - if (retry) { - goto try_again; - } - } else { - *success = TRUE; - } - - return(file); -#else /* __WIN__ */ - os_file_t file; - int create_flag; - ibool retry; - const char* mode_str = NULL; - const char* type_str = NULL; - const char* purpose_str = NULL; - -try_again: - ut_a(name); - - if (create_mode == OS_FILE_OPEN || create_mode == OS_FILE_OPEN_RAW - || create_mode == OS_FILE_OPEN_RETRY) { - mode_str = "OPEN"; - create_flag = O_RDWR; - } else if (create_mode == OS_FILE_CREATE) { - mode_str = "CREATE"; - create_flag = O_RDWR | O_CREAT | O_EXCL; - } else if (create_mode == OS_FILE_OVERWRITE) { - mode_str = "OVERWRITE"; - create_flag = O_RDWR | O_CREAT | O_TRUNC; - } else { - create_flag = 0; - ut_error; - } - - if (type == OS_LOG_FILE) { - type_str = "LOG"; - } else if (type == OS_DATA_FILE) { - type_str = "DATA"; - } else { - ut_error; - } - - if (purpose == OS_FILE_AIO) { - purpose_str = "AIO"; - } else if (purpose == OS_FILE_NORMAL) { - purpose_str = "NORMAL"; - } else { - ut_error; - } - -#if 0 - fprintf(stderr, "Opening file %s, mode %s, type %s, purpose %s\n", - name, mode_str, type_str, purpose_str); -#endif -#ifdef O_SYNC - /* We let O_SYNC only affect log files; note that we map O_DSYNC to - O_SYNC because the datasync options seemed to corrupt files in 2001 - in both Linux and Solaris */ - if (type == OS_LOG_FILE - && srv_unix_file_flush_method == SRV_UNIX_O_DSYNC) { - -# if 0 - fprintf(stderr, "Using O_SYNC for file %s\n", name); -# endif - - create_flag = create_flag | O_SYNC; - } -#endif /* O_SYNC */ - - file = open(name, create_flag, os_innodb_umask); - - if (file == -1) { - *success = FALSE; - - /* When srv_file_per_table is on, file creation failure may not - be critical to the whole instance. Do not crash the server in - case of unknown errors. */ - if (srv_file_per_table) { - retry = os_file_handle_error_no_exit(name, - create_mode == OS_FILE_CREATE ? - "create" : "open"); - } else { - retry = os_file_handle_error(name, - create_mode == OS_FILE_CREATE ? - "create" : "open"); - } - - if (retry) { - goto try_again; - } else { - return(file /* -1 */); - } - } - /* else */ - - *success = TRUE; - - /* We disable OS caching (O_DIRECT) only on data files */ - if (type != OS_LOG_FILE - && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) { - - os_file_set_nocache(file, name, mode_str); - } - -#ifdef USE_FILE_LOCK - if (create_mode != OS_FILE_OPEN_RAW && os_file_lock(file, name)) { - - if (create_mode == OS_FILE_OPEN_RETRY) { - int i; - ut_print_timestamp(stderr); - fputs(" InnoDB: Retrying to lock" - " the first data file\n", - stderr); - for (i = 0; i < 100; i++) { - os_thread_sleep(1000000); - if (!os_file_lock(file, name)) { - *success = TRUE; - return(file); - } - } - ut_print_timestamp(stderr); - fputs(" InnoDB: Unable to open the first data file\n", - stderr); - } - - *success = FALSE; - close(file); - file = -1; - } -#endif /* USE_FILE_LOCK */ - - return(file); -#endif /* __WIN__ */ -} - -/*************************************************************************** -Deletes a file if it exists. The file has to be closed before calling this. */ - -ibool -os_file_delete_if_exists( -/*=====================*/ - /* out: TRUE if success */ - const char* name) /* in: file path as a null-terminated string */ -{ -#ifdef __WIN__ - BOOL ret; - ulint count = 0; -loop: - /* In Windows, deleting an .ibd file may fail if ibbackup is copying - it */ - - ret = DeleteFile((LPCTSTR)name); - - if (ret) { - return(TRUE); - } - - if (GetLastError() == ERROR_FILE_NOT_FOUND) { - /* the file does not exist, this not an error */ - - return(TRUE); - } - - count++; - - if (count > 100 && 0 == (count % 10)) { - fprintf(stderr, - "InnoDB: Warning: cannot delete file %s\n" - "InnoDB: Are you running ibbackup" - " to back up the file?\n", name); - - os_file_get_last_error(TRUE); /* print error information */ - } - - os_thread_sleep(1000000); /* sleep for a second */ - - if (count > 2000) { - - return(FALSE); - } - - goto loop; -#else - int ret; - - ret = unlink((const char*)name); - - if (ret != 0 && errno != ENOENT) { - os_file_handle_error_no_exit(name, "delete"); - - return(FALSE); - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Deletes a file. The file has to be closed before calling this. */ - -ibool -os_file_delete( -/*===========*/ - /* out: TRUE if success */ - const char* name) /* in: file path as a null-terminated string */ -{ -#ifdef __WIN__ - BOOL ret; - ulint count = 0; -loop: - /* In Windows, deleting an .ibd file may fail if ibbackup is copying - it */ - - ret = DeleteFile((LPCTSTR)name); - - if (ret) { - return(TRUE); - } - - if (GetLastError() == ERROR_FILE_NOT_FOUND) { - /* If the file does not exist, we classify this as a 'mild' - error and return */ - - return(FALSE); - } - - count++; - - if (count > 100 && 0 == (count % 10)) { - fprintf(stderr, - "InnoDB: Warning: cannot delete file %s\n" - "InnoDB: Are you running ibbackup" - " to back up the file?\n", name); - - os_file_get_last_error(TRUE); /* print error information */ - } - - os_thread_sleep(1000000); /* sleep for a second */ - - if (count > 2000) { - - return(FALSE); - } - - goto loop; -#else - int ret; - - ret = unlink((const char*)name); - - if (ret != 0) { - os_file_handle_error_no_exit(name, "delete"); - - return(FALSE); - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Renames a file (can also move it to another directory). It is safest that the -file is closed before calling this function. */ - -ibool -os_file_rename( -/*===========*/ - /* out: TRUE if success */ - const char* oldpath,/* in: old file path as a null-terminated - string */ - const char* newpath)/* in: new file path */ -{ -#ifdef __WIN__ - BOOL ret; - - ret = MoveFile((LPCTSTR)oldpath, (LPCTSTR)newpath); - - if (ret) { - return(TRUE); - } - - os_file_handle_error_no_exit(oldpath, "rename"); - - return(FALSE); -#else - int ret; - - ret = rename((const char*)oldpath, (const char*)newpath); - - if (ret != 0) { - os_file_handle_error_no_exit(oldpath, "rename"); - - return(FALSE); - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Closes a file handle. In case of error, error number can be retrieved with -os_file_get_last_error. */ - -ibool -os_file_close( -/*==========*/ - /* out: TRUE if success */ - os_file_t file) /* in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - ut_a(file); - - ret = CloseHandle(file); - - if (ret) { - return(TRUE); - } - - os_file_handle_error(NULL, "close"); - - return(FALSE); -#else - int ret; - - ret = close(file); - - if (ret == -1) { - os_file_handle_error(NULL, "close"); - - return(FALSE); - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Closes a file handle. */ - -ibool -os_file_close_no_error_handling( -/*============================*/ - /* out: TRUE if success */ - os_file_t file) /* in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - ut_a(file); - - ret = CloseHandle(file); - - if (ret) { - return(TRUE); - } - - return(FALSE); -#else - int ret; - - ret = close(file); - - if (ret == -1) { - - return(FALSE); - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Gets a file size. */ - -ibool -os_file_get_size( -/*=============*/ - /* out: TRUE if success */ - os_file_t file, /* in: handle to a file */ - ulint* size, /* out: least significant 32 bits of file - size */ - ulint* size_high)/* out: most significant 32 bits of size */ -{ -#ifdef __WIN__ - DWORD high; - DWORD low; - - low = GetFileSize(file, &high); - - if ((low == 0xFFFFFFFF) && (GetLastError() != NO_ERROR)) { - return(FALSE); - } - - *size = low; - *size_high = high; - - return(TRUE); -#else - off_t offs; - - offs = lseek(file, 0, SEEK_END); - - if (offs == ((off_t)-1)) { - - return(FALSE); - } - - if (sizeof(off_t) > 4) { - *size = (ulint)(offs & 0xFFFFFFFFUL); - *size_high = (ulint)(offs >> 32); - } else { - *size = (ulint) offs; - *size_high = 0; - } - - return(TRUE); -#endif -} - -/*************************************************************************** -Gets file size as a 64-bit integer ib_longlong. */ - -ib_longlong -os_file_get_size_as_iblonglong( -/*===========================*/ - /* out: size in bytes, -1 if error */ - os_file_t file) /* in: handle to a file */ -{ - ulint size; - ulint size_high; - ibool success; - - success = os_file_get_size(file, &size, &size_high); - - if (!success) { - - return(-1); - } - - return((((ib_longlong)size_high) << 32) + (ib_longlong)size); -} - -/*************************************************************************** -Write the specified number of zeros to a newly created file. */ - -ibool -os_file_set_size( -/*=============*/ - /* out: TRUE if success */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - ulint size, /* in: least significant 32 bits of file - size */ - ulint size_high)/* in: most significant 32 bits of size */ -{ - ib_longlong current_size; - ib_longlong desired_size; - ibool ret; - byte* buf; - byte* buf2; - ulint buf_size; - - ut_a(size == (size & 0xFFFFFFFF)); - - current_size = 0; - desired_size = (ib_longlong)size + (((ib_longlong)size_high) << 32); - - /* Write up to 1 megabyte at a time. */ - buf_size = ut_min(64, (ulint) (desired_size / UNIV_PAGE_SIZE)) - * UNIV_PAGE_SIZE; - buf2 = ut_malloc(buf_size + UNIV_PAGE_SIZE); - - /* Align the buffer for possible raw i/o */ - buf = ut_align(buf2, UNIV_PAGE_SIZE); - - /* Write buffer full of zeros */ - memset(buf, 0, buf_size); - - if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { - - fprintf(stderr, "InnoDB: Progress in MB:"); - } - - while (current_size < desired_size) { - ulint n_bytes; - - if (desired_size - current_size < (ib_longlong) buf_size) { - n_bytes = (ulint) (desired_size - current_size); - } else { - n_bytes = buf_size; - } - - ret = os_file_write(name, file, buf, - (ulint)(current_size & 0xFFFFFFFF), - (ulint)(current_size >> 32), - n_bytes); - if (!ret) { - ut_free(buf2); - goto error_handling; - } - - /* Print about progress for each 100 MB written */ - if ((ib_longlong) (current_size + n_bytes) / (ib_longlong)(100 * 1024 * 1024) - != current_size / (ib_longlong)(100 * 1024 * 1024)) { - - fprintf(stderr, " %lu00", - (ulong) ((current_size + n_bytes) - / (ib_longlong)(100 * 1024 * 1024))); - } - - current_size += n_bytes; - } - - if (desired_size >= (ib_longlong)(100 * 1024 * 1024)) { - - fprintf(stderr, "\n"); - } - - ut_free(buf2); - - ret = os_file_flush(file); - - if (ret) { - return(TRUE); - } - -error_handling: - return(FALSE); -} - -/*************************************************************************** -Truncates a file at its current position. */ - -ibool -os_file_set_eof( -/*============*/ - /* out: TRUE if success */ - FILE* file) /* in: file to be truncated */ -{ -#ifdef __WIN__ - HANDLE h = (HANDLE) _get_osfhandle(fileno(file)); - return(SetEndOfFile(h)); -#else /* __WIN__ */ - return(!ftruncate(fileno(file), ftell(file))); -#endif /* __WIN__ */ -} - -#ifndef __WIN__ -/*************************************************************************** -Wrapper to fsync(2) that retries the call on some errors. -Returns the value 0 if successful; otherwise the value -1 is returned and -the global variable errno is set to indicate the error. */ - -static -int -os_file_fsync( -/*==========*/ - /* out: 0 if success, -1 otherwise */ - os_file_t file) /* in: handle to a file */ -{ - int ret; - int failures; - ibool retry; - - failures = 0; - - do { - ret = fsync(file); - - os_n_fsyncs++; - - if (ret == -1 && errno == ENOLCK) { - - if (failures % 100 == 0) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: fsync(): " - "No locks available; retrying\n"); - } - - os_thread_sleep(200000 /* 0.2 sec */); - - failures++; - - retry = TRUE; - } else { - - retry = FALSE; - } - } while (retry); - - return(ret); -} -#endif /* !__WIN__ */ - -/*************************************************************************** -Flushes the write buffers of a given file to the disk. */ - -ibool -os_file_flush( -/*==========*/ - /* out: TRUE if success */ - os_file_t file) /* in, own: handle to a file */ -{ -#ifdef __WIN__ - BOOL ret; - - ut_a(file); - - os_n_fsyncs++; - - ret = FlushFileBuffers(file); - - if (ret) { - return(TRUE); - } - - /* Since Windows returns ERROR_INVALID_FUNCTION if the 'file' is - actually a raw device, we choose to ignore that error if we are using - raw disks */ - - if (srv_start_raw_disk_in_use && GetLastError() - == ERROR_INVALID_FUNCTION) { - return(TRUE); - } - - os_file_handle_error(NULL, "flush"); - - /* It is a fatal error if a file flush does not succeed, because then - the database can get corrupt on disk */ - ut_error; - - return(FALSE); -#else - int ret; - -#if defined(HAVE_DARWIN_THREADS) -# ifndef F_FULLFSYNC - /* The following definition is from the Mac OS X 10.3 <sys/fcntl.h> */ -# define F_FULLFSYNC 51 /* fsync + ask the drive to flush to the media */ -# elif F_FULLFSYNC != 51 -# error "F_FULLFSYNC != 51: ABI incompatibility with Mac OS X 10.3" -# endif - /* Apple has disabled fsync() for internal disk drives in OS X. That - caused corruption for a user when he tested a power outage. Let us in - OS X use a nonstandard flush method recommended by an Apple - engineer. */ - - if (!srv_have_fullfsync) { - /* If we are not on an operating system that supports this, - then fall back to a plain fsync. */ - - ret = os_file_fsync(file); - } else { - ret = fcntl(file, F_FULLFSYNC, NULL); - - if (ret) { - /* If we are not on a file system that supports this, - then fall back to a plain fsync. */ - ret = os_file_fsync(file); - } - } -#else - ret = os_file_fsync(file); -#endif - - if (ret == 0) { - return(TRUE); - } - - /* Since Linux returns EINVAL if the 'file' is actually a raw device, - we choose to ignore that error if we are using raw disks */ - - if (srv_start_raw_disk_in_use && errno == EINVAL) { - - return(TRUE); - } - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: the OS said file flush did not succeed\n"); - - os_file_handle_error(NULL, "flush"); - - /* It is a fatal error if a file flush does not succeed, because then - the database can get corrupt on disk */ - ut_error; - - return(FALSE); -#endif -} - -#ifndef __WIN__ -/*********************************************************************** -Does a synchronous read operation in Posix. */ -static -ssize_t -os_file_pread( -/*==========*/ - /* out: number of bytes read, -1 if error */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read */ - ulint n, /* in: number of bytes to read */ - ulint offset, /* in: least significant 32 bits of file - offset from where to read */ - ulint offset_high) /* in: most significant 32 bits of - offset */ -{ - off_t offs; - ssize_t n_bytes; - - ut_a((offset & 0xFFFFFFFFUL) == offset); - - /* If off_t is > 4 bytes in size, then we assume we can pass a - 64-bit address */ - - if (sizeof(off_t) > 4) { - offs = (off_t)offset + (((off_t)offset_high) << 32); - - } else { - offs = (off_t)offset; - - if (offset_high > 0) { - fprintf(stderr, - "InnoDB: Error: file read at offset > 4 GB\n"); - } - } - - os_n_file_reads++; - -#if defined(HAVE_PREAD) && !defined(HAVE_BROKEN_PREAD) - os_mutex_enter(os_file_count_mutex); - os_file_n_pending_preads++; - os_n_pending_reads++; - os_mutex_exit(os_file_count_mutex); - - n_bytes = pread(file, buf, (ssize_t)n, offs); - - os_mutex_enter(os_file_count_mutex); - os_file_n_pending_preads--; - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - return(n_bytes); -#else - { - off_t ret_offset; - ssize_t ret; - ulint i; - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads++; - os_mutex_exit(os_file_count_mutex); - - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); - - ret_offset = lseek(file, offs, SEEK_SET); - - if (ret_offset < 0) { - ret = -1; - } else { - ret = read(file, buf, (ssize_t)n); - } - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - return(ret); - } -#endif -} - -/*********************************************************************** -Does a synchronous write operation in Posix. */ -static -ssize_t -os_file_pwrite( -/*===========*/ - /* out: number of bytes written, -1 if error */ - os_file_t file, /* in: handle to a file */ - const void* buf, /* in: buffer from where to write */ - ulint n, /* in: number of bytes to write */ - ulint offset, /* in: least significant 32 bits of file - offset where to write */ - ulint offset_high) /* in: most significant 32 bits of - offset */ -{ - ssize_t ret; - off_t offs; - - ut_a((offset & 0xFFFFFFFFUL) == offset); - - /* If off_t is > 4 bytes in size, then we assume we can pass a - 64-bit address */ - - if (sizeof(off_t) > 4) { - offs = (off_t)offset + (((off_t)offset_high) << 32); - } else { - offs = (off_t)offset; - - if (offset_high > 0) { - fprintf(stderr, - "InnoDB: Error: file write" - " at offset > 4 GB\n"); - } - } - - os_n_file_writes++; - -#if defined(HAVE_PWRITE) && !defined(HAVE_BROKEN_PREAD) - os_mutex_enter(os_file_count_mutex); - os_file_n_pending_pwrites++; - os_n_pending_writes++; - os_mutex_exit(os_file_count_mutex); - - ret = pwrite(file, buf, (ssize_t)n, offs); - - os_mutex_enter(os_file_count_mutex); - os_file_n_pending_pwrites--; - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - -# ifdef UNIV_DO_FLUSH - if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC - && !os_do_not_call_flush_at_each_write) { - - /* Always do fsync to reduce the probability that when - the OS crashes, a database page is only partially - physically written to disk. */ - - ut_a(TRUE == os_file_flush(file)); - } -# endif /* UNIV_DO_FLUSH */ - - return(ret); -#else - { - off_t ret_offset; - ulint i; - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes++; - os_mutex_exit(os_file_count_mutex); - - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); - - ret_offset = lseek(file, offs, SEEK_SET); - - if (ret_offset < 0) { - ret = -1; - - goto func_exit; - } - - ret = write(file, buf, (ssize_t)n); - -# ifdef UNIV_DO_FLUSH - if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC - && srv_unix_file_flush_method != SRV_UNIX_NOSYNC - && !os_do_not_call_flush_at_each_write) { - - /* Always do fsync to reduce the probability that when - the OS crashes, a database page is only partially - physically written to disk. */ - - ut_a(TRUE == os_file_flush(file)); - } -# endif /* UNIV_DO_FLUSH */ - -func_exit: - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - return(ret); - } -#endif -} -#endif - -/*********************************************************************** -Requests a synchronous positioned read operation. */ - -ibool -os_file_read( -/*=========*/ - /* out: TRUE if request was - successful, FALSE if fail */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read */ - ulint offset, /* in: least significant 32 bits of file - offset where to read */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint n) /* in: number of bytes to read */ -{ -#ifdef __WIN__ - BOOL ret; - DWORD len; - DWORD ret2; - DWORD low; - DWORD high; - ibool retry; - ulint i; - - ut_a((offset & 0xFFFFFFFFUL) == offset); - - os_n_file_reads++; - os_bytes_read_since_printout += n; - -try_again: - ut_ad(file); - ut_ad(buf); - ut_ad(n > 0); - - low = (DWORD) offset; - high = (DWORD) offset_high; - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads++; - os_mutex_exit(os_file_count_mutex); - - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - if (ret && len == n) { - return(TRUE); - } -#else - ibool retry; - ssize_t ret; - - os_bytes_read_since_printout += n; - -try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); - - if ((ulint)ret == n) { - - return(TRUE); - } - - fprintf(stderr, - "InnoDB: Error: tried to read %lu bytes at offset %lu %lu.\n" - "InnoDB: Was only able to read %ld.\n", - (ulong)n, (ulong)offset_high, - (ulong)offset, (long)ret); -#endif -#ifdef __WIN__ -error_handling: -#endif - retry = os_file_handle_error(NULL, "read"); - - if (retry) { - goto try_again; - } - - fprintf(stderr, - "InnoDB: Fatal error: cannot read from file." - " OS error number %lu.\n", -#ifdef __WIN__ - (ulong) GetLastError() -#else - (ulong) errno -#endif - ); - fflush(stderr); - - ut_error; - - return(FALSE); -} - -/*********************************************************************** -Requests a synchronous positioned read operation. This function does not do -any error handling. In case of error it returns FALSE. */ - -ibool -os_file_read_no_error_handling( -/*===========================*/ - /* out: TRUE if request was - successful, FALSE if fail */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read */ - ulint offset, /* in: least significant 32 bits of file - offset where to read */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint n) /* in: number of bytes to read */ -{ -#ifdef __WIN__ - BOOL ret; - DWORD len; - DWORD ret2; - DWORD low; - DWORD high; - ibool retry; - ulint i; - - ut_a((offset & 0xFFFFFFFFUL) == offset); - - os_n_file_reads++; - os_bytes_read_since_printout += n; - -try_again: - ut_ad(file); - ut_ad(buf); - ut_ad(n > 0); - - low = (DWORD) offset; - high = (DWORD) offset_high; - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads++; - os_mutex_exit(os_file_count_mutex); - - /* Protect the seek / read operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - goto error_handling; - } - - ret = ReadFile(file, buf, (DWORD) n, &len, NULL); - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_reads--; - os_mutex_exit(os_file_count_mutex); - - if (ret && len == n) { - return(TRUE); - } -#else - ibool retry; - ssize_t ret; - - os_bytes_read_since_printout += n; - -try_again: - ret = os_file_pread(file, buf, n, offset, offset_high); - - if ((ulint)ret == n) { - - return(TRUE); - } -#endif -#ifdef __WIN__ -error_handling: -#endif - retry = os_file_handle_error_no_exit(NULL, "read"); - - if (retry) { - goto try_again; - } - - return(FALSE); -} - -/*********************************************************************** -Rewind file to its start, read at most size - 1 bytes from it to str, and -NUL-terminate str. All errors are silently ignored. This function is -mostly meant to be used with temporary files. */ - -void -os_file_read_string( -/*================*/ - FILE* file, /* in: file to read from */ - char* str, /* in: buffer where to read */ - ulint size) /* in: size of buffer */ -{ - size_t flen; - - if (size == 0) { - return; - } - - rewind(file); - flen = fread(str, 1, size - 1, file); - str[flen] = '\0'; -} - -/*********************************************************************** -Requests a synchronous write operation. */ - -ibool -os_file_write( -/*==========*/ - /* out: TRUE if request was - successful, FALSE if fail */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - const void* buf, /* in: buffer from which to write */ - ulint offset, /* in: least significant 32 bits of file - offset where to write */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint n) /* in: number of bytes to write */ -{ -#ifdef __WIN__ - BOOL ret; - DWORD len; - DWORD ret2; - DWORD low; - DWORD high; - ulint i; - ulint n_retries = 0; - ulint err; - - ut_a((offset & 0xFFFFFFFF) == offset); - - os_n_file_writes++; - - ut_ad(file); - ut_ad(buf); - ut_ad(n > 0); -retry: - low = (DWORD) offset; - high = (DWORD) offset_high; - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes++; - os_mutex_exit(os_file_count_mutex); - - /* Protect the seek / write operation with a mutex */ - i = ((ulint) file) % OS_FILE_N_SEEK_MUTEXES; - - os_mutex_enter(os_file_seek_mutexes[i]); - - ret2 = SetFilePointer(file, low, &high, FILE_BEGIN); - - if (ret2 == 0xFFFFFFFF && GetLastError() != NO_ERROR) { - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: File pointer positioning to" - " file %s failed at\n" - "InnoDB: offset %lu %lu. Operating system" - " error number %lu.\n" - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - "http://dev.mysql.com/doc/refman/5.1/en/" - "operating-system-error-codes.html\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) GetLastError()); - - return(FALSE); - } - - ret = WriteFile(file, buf, (DWORD) n, &len, NULL); - - /* Always do fsync to reduce the probability that when the OS crashes, - a database page is only partially physically written to disk. */ - -# ifdef UNIV_DO_FLUSH - if (!os_do_not_call_flush_at_each_write) { - ut_a(TRUE == os_file_flush(file)); - } -# endif /* UNIV_DO_FLUSH */ - - os_mutex_exit(os_file_seek_mutexes[i]); - - os_mutex_enter(os_file_count_mutex); - os_n_pending_writes--; - os_mutex_exit(os_file_count_mutex); - - if (ret && len == n) { - - return(TRUE); - } - - /* If some background file system backup tool is running, then, at - least in Windows 2000, we may get here a specific error. Let us - retry the operation 100 times, with 1 second waits. */ - - if (GetLastError() == ERROR_LOCK_VIOLATION && n_retries < 100) { - - os_thread_sleep(1000000); - - n_retries++; - - goto retry; - } - - if (!os_has_said_disk_full) { - - err = (ulint)GetLastError(); - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Write to file %s failed" - " at offset %lu %lu.\n" - "InnoDB: %lu bytes should have been written," - " only %lu were written.\n" - "InnoDB: Operating system error number %lu.\n" - "InnoDB: Check that your OS and file system" - " support files of this size.\n" - "InnoDB: Check also that the disk is not full" - " or a disk quota exceeded.\n", - name, (ulong) offset_high, (ulong) offset, - (ulong) n, (ulong) len, (ulong) err); - - if (strerror((int)err) != NULL) { - fprintf(stderr, - "InnoDB: Error number %lu means '%s'.\n", - (ulong) err, strerror((int)err)); - } - - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - "http://dev.mysql.com/doc/refman/5.1/en/" - "operating-system-error-codes.html\n"); - - os_has_said_disk_full = TRUE; - } - - return(FALSE); -#else - ssize_t ret; - - ret = os_file_pwrite(file, buf, n, offset, offset_high); - - if ((ulint)ret == n) { - - return(TRUE); - } - - if (!os_has_said_disk_full) { - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: Write to file %s failed" - " at offset %lu %lu.\n" - "InnoDB: %lu bytes should have been written," - " only %ld were written.\n" - "InnoDB: Operating system error number %lu.\n" - "InnoDB: Check that your OS and file system" - " support files of this size.\n" - "InnoDB: Check also that the disk is not full" - " or a disk quota exceeded.\n", - name, offset_high, offset, n, (long int)ret, - (ulint)errno); - if (strerror(errno) != NULL) { - fprintf(stderr, - "InnoDB: Error number %lu means '%s'.\n", - (ulint)errno, strerror(errno)); - } - - fprintf(stderr, - "InnoDB: Some operating system error numbers" - " are described at\n" - "InnoDB: " - "http://dev.mysql.com/doc/refman/5.1/en/" - "operating-system-error-codes.html\n"); - - os_has_said_disk_full = TRUE; - } - - return(FALSE); -#endif -} - -/*********************************************************************** -Check the existence and type of the given file. */ - -ibool -os_file_status( -/*===========*/ - /* out: TRUE if call succeeded */ - const char* path, /* in: pathname of the file */ - ibool* exists, /* out: TRUE if file exists */ - os_file_type_t* type) /* out: type of the file (if it exists) */ -{ -#ifdef __WIN__ - int ret; - struct _stat statinfo; - - ret = _stat(path, &statinfo); - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - *exists = FALSE; - return(TRUE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat"); - - return(FALSE); - } - - if (_S_IFDIR & statinfo.st_mode) { - *type = OS_FILE_TYPE_DIR; - } else if (_S_IFREG & statinfo.st_mode) { - *type = OS_FILE_TYPE_FILE; - } else { - *type = OS_FILE_TYPE_UNKNOWN; - } - - *exists = TRUE; - - return(TRUE); -#else - int ret; - struct stat statinfo; - - ret = stat(path, &statinfo); - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - *exists = FALSE; - return(TRUE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat"); - - return(FALSE); - } - - if (S_ISDIR(statinfo.st_mode)) { - *type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { - *type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { - *type = OS_FILE_TYPE_FILE; - } else { - *type = OS_FILE_TYPE_UNKNOWN; - } - - *exists = TRUE; - - return(TRUE); -#endif -} - -/*********************************************************************** -This function returns information about the specified file */ - -ibool -os_file_get_status( -/*===============*/ - /* out: TRUE if stat - information found */ - const char* path, /* in: pathname of the file */ - os_file_stat_t* stat_info) /* information of a file in a - directory */ -{ -#ifdef __WIN__ - int ret; - struct _stat statinfo; - - ret = _stat(path, &statinfo); - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - - return(FALSE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat"); - - return(FALSE); - } - if (_S_IFDIR & statinfo.st_mode) { - stat_info->type = OS_FILE_TYPE_DIR; - } else if (_S_IFREG & statinfo.st_mode) { - stat_info->type = OS_FILE_TYPE_FILE; - } else { - stat_info->type = OS_FILE_TYPE_UNKNOWN; - } - - stat_info->ctime = statinfo.st_ctime; - stat_info->atime = statinfo.st_atime; - stat_info->mtime = statinfo.st_mtime; - stat_info->size = statinfo.st_size; - - return(TRUE); -#else - int ret; - struct stat statinfo; - - ret = stat(path, &statinfo); - - if (ret && (errno == ENOENT || errno == ENOTDIR)) { - /* file does not exist */ - - return(FALSE); - } else if (ret) { - /* file exists, but stat call failed */ - - os_file_handle_error_no_exit(path, "stat"); - - return(FALSE); - } - - if (S_ISDIR(statinfo.st_mode)) { - stat_info->type = OS_FILE_TYPE_DIR; - } else if (S_ISLNK(statinfo.st_mode)) { - stat_info->type = OS_FILE_TYPE_LINK; - } else if (S_ISREG(statinfo.st_mode)) { - stat_info->type = OS_FILE_TYPE_FILE; - } else { - stat_info->type = OS_FILE_TYPE_UNKNOWN; - } - - stat_info->ctime = statinfo.st_ctime; - stat_info->atime = statinfo.st_atime; - stat_info->mtime = statinfo.st_mtime; - stat_info->size = statinfo.st_size; - - return(TRUE); -#endif -} - -/* path name separator character */ -#ifdef __WIN__ -# define OS_FILE_PATH_SEPARATOR '\\' -#else -# define OS_FILE_PATH_SEPARATOR '/' -#endif - -/******************************************************************** -The function os_file_dirname returns a directory component of a -null-terminated pathname string. In the usual case, dirname returns -the string up to, but not including, the final '/', and basename -is the component following the final '/'. Trailing '/' charac -ters are not counted as part of the pathname. - -If path does not contain a slash, dirname returns the string ".". - -Concatenating the string returned by dirname, a "/", and the basename -yields a complete pathname. - -The return value is a copy of the directory component of the pathname. -The copy is allocated from heap. It is the caller responsibility -to free it after it is no longer needed. - -The following list of examples (taken from SUSv2) shows the strings -returned by dirname and basename for different paths: - - path dirname basename - "/usr/lib" "/usr" "lib" - "/usr/" "/" "usr" - "usr" "." "usr" - "/" "/" "/" - "." "." "." - ".." "." ".." -*/ - -char* -os_file_dirname( -/*============*/ - /* out, own: directory component of the - pathname */ - const char* path) /* in: pathname */ -{ - /* Find the offset of the last slash */ - const char* last_slash = strrchr(path, OS_FILE_PATH_SEPARATOR); - if (!last_slash) { - /* No slash in the path, return "." */ - - return(mem_strdup(".")); - } - - /* Ok, there is a slash */ - - if (last_slash == path) { - /* last slash is the first char of the path */ - - return(mem_strdup("/")); - } - - /* Non-trivial directory component */ - - return(mem_strdupl(path, last_slash - path)); -} - -/******************************************************************** -Creates all missing subdirectories along the given path. */ - -ibool -os_file_create_subdirs_if_needed( -/*=============================*/ - /* out: TRUE if call succeeded - FALSE otherwise */ - const char* path) /* in: path name */ -{ - char* subdir; - ibool success, subdir_exists; - os_file_type_t type; - - subdir = os_file_dirname(path); - if (strlen(subdir) == 1 - && (*subdir == OS_FILE_PATH_SEPARATOR || *subdir == '.')) { - /* subdir is root or cwd, nothing to do */ - mem_free(subdir); - - return(TRUE); - } - - /* Test if subdir exists */ - success = os_file_status(subdir, &subdir_exists, &type); - if (success && !subdir_exists) { - /* subdir does not exist, create it */ - success = os_file_create_subdirs_if_needed(subdir); - if (!success) { - mem_free(subdir); - - return(FALSE); - } - success = os_file_create_directory(subdir, FALSE); - } - - mem_free(subdir); - - return(success); -} - -/******************************************************************** -Returns a pointer to the nth slot in the aio array. */ -static -os_aio_slot_t* -os_aio_array_get_nth_slot( -/*======================*/ - /* out: pointer to slot */ - os_aio_array_t* array, /* in: aio array */ - ulint index) /* in: index of the slot */ -{ - ut_a(index < array->n_slots); - - return((array->slots) + index); -} - -/**************************************************************************** -Creates an aio wait array. */ -static -os_aio_array_t* -os_aio_array_create( -/*================*/ - /* out, own: aio array */ - ulint n) /* in: maximum number of pending aio operations - allowed */ -{ - os_aio_array_t* array; - ulint i; - os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - OVERLAPPED* over; -#endif - ut_a(n > 0); - - array = ut_malloc(sizeof(os_aio_array_t)); - - array->mutex = os_mutex_create(NULL); - array->not_full = os_event_create(NULL); - array->is_empty = os_event_create(NULL); - - os_event_set(array->is_empty); - - array->n_slots = n; - array->n_reserved = 0; - array->slots = ut_malloc(n * sizeof(os_aio_slot_t)); -#ifdef __WIN__ - array->native_events = ut_malloc(n * sizeof(os_native_event_t)); -#endif - for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - slot->pos = i; - slot->reserved = FALSE; -#ifdef WIN_ASYNC_IO - slot->event = os_event_create(NULL); - - over = &(slot->control); - - over->hEvent = slot->event->handle; - - *((array->native_events) + i) = over->hEvent; -#endif - } - - return(array); -} - -/**************************************************************************** -Initializes the asynchronous io system. Calls also os_io_init_simple. -Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO, -log IO, and synchronous IO. The caller must create i/o handler thread for all -but the synchronous aio array. Multiple threads can access the same array for -the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays. -Return the number of AIO handler threads. */ - -ulint -os_aio_init( -/*========*/ - ulint ios_per_array, /* in: maximum number of pending aio operations - allowed per array */ - ulint n_read_threads, /* in: number of read threads */ - ulint n_write_threads, /* in: number of write threads */ - ulint n_slots_sync) /* in: number of slots in the sync aio array */ -{ - ulint i; - ulint n_segments = 2 + n_read_threads + n_write_threads; -#ifdef POSIX_ASYNC_IO - sigset_t sigset; -#endif - ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD); - ut_a(n_read_threads >= 1 && n_read_threads <= 64); - ut_a(n_write_threads >= 1 && n_write_threads <= 64); - ut_a(n_segments < SRV_MAX_N_IO_THREADS); - - os_io_init_simple(); - - for (i = 0; i < n_segments; i++) { - srv_set_io_thread_op_info(i, "not started yet"); - os_aio_thread_io_reads[i] = 0; - os_aio_thread_io_writes[i] = 0; - os_aio_thread_io_requests[i] = 0; - os_aio_thread_buffer[i] = 0; - os_aio_thread_buffer_size[i] = 0; - os_aio_thread_io_wait[i] = 0; - os_aio_thread_max_io_wait[i] = 0; - } - - os_aio_read_threads = n_read_threads; - os_aio_write_threads = n_write_threads; - os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads; - - fprintf(stderr, - "InnoDB: ios_per_array %lu read threads %lu write threads %lu\n", - ios_per_array, os_aio_read_threads, os_aio_write_threads); - - os_aio_ibuf_array = os_aio_array_create(ios_per_array); - - srv_io_thread_function[0] = "insert buffer thread"; - - os_aio_log_array = os_aio_array_create(ios_per_array); - - srv_io_thread_function[1] = "log thread"; - - os_aio_read_array = os_aio_array_create(ios_per_array); - for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) { - ut_a(i < SRV_MAX_N_IO_THREADS); - srv_io_thread_function[i] = "read thread"; - } - - os_aio_write_array = os_aio_array_create(ios_per_array); - for (i = os_aio_first_write_segment; i < n_segments; i++) { - ut_a(i < SRV_MAX_N_IO_THREADS); - srv_io_thread_function[i] = "write thread"; - } - - os_aio_sync_array = os_aio_array_create(n_slots_sync); - - os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads; - - os_aio_validate(); - - os_aio_segment_wait_events = ut_malloc(n_segments * sizeof(void*)); - - for (i = 0; i < n_segments; i++) { - os_aio_segment_wait_events[i] = os_event_create(NULL); - } - - os_last_printout = time(NULL); - -#ifdef POSIX_ASYNC_IO - /* Block aio signals from the current thread and its children: - for this to work, the current thread must be the first created - in the database, so that all its children will inherit its - signal mask */ - - /* TODO: to work MySQL needs the SIGALARM signal; the following - will not work yet! */ - sigemptyset(&sigset); - sigaddset(&sigset, SIGRTMIN + 1 + 0); - sigaddset(&sigset, SIGRTMIN + 1 + 1); - sigaddset(&sigset, SIGRTMIN + 1 + 2); - sigaddset(&sigset, SIGRTMIN + 1 + 3); - - pthread_sigmask(SIG_BLOCK, &sigset, NULL); */ -#endif - return os_aio_n_segments; -} - -#ifdef WIN_ASYNC_IO -/**************************************************************************** -Wakes up all async i/o threads in the array in Windows async i/o at -shutdown. */ -static -void -os_aio_array_wake_win_aio_at_shutdown( -/*==================================*/ - os_aio_array_t* array) /* in: aio array */ -{ - ulint i; - - for (i = 0; i < array->n_slots; i++) { - - os_event_set((array->slots + i)->event); - } -} -#endif - -/**************************************************************************** -Wakes up all async i/o threads so that they know to exit themselves in -shutdown. */ - -void -os_aio_wake_all_threads_at_shutdown(void) -/*=====================================*/ -{ - ulint i; - -#ifdef WIN_ASYNC_IO - /* This code wakes up all ai/o threads in Windows native aio */ - os_aio_array_wake_win_aio_at_shutdown(os_aio_read_array); - os_aio_array_wake_win_aio_at_shutdown(os_aio_write_array); - os_aio_array_wake_win_aio_at_shutdown(os_aio_ibuf_array); - os_aio_array_wake_win_aio_at_shutdown(os_aio_log_array); -#endif - /* This loop wakes up all simulated ai/o threads */ - - for (i = 0; i < os_aio_n_segments; i++) { - - os_event_set(os_aio_segment_wait_events[i]); - } -} - -/**************************************************************************** -Waits until there are no pending writes in os_aio_write_array. There can -be other, synchronous, pending writes. */ - -void -os_aio_wait_until_no_pending_writes(void) -/*=====================================*/ -{ - os_event_wait(os_aio_write_array->is_empty); -} - -/************************************************************************** -Calculates aio array from global segment number. */ -static -os_aio_array_t* -os_aio_get_array( -/*===============================*/ - /* out: aio wait array */ - ulint global_segment)/* in: global segment number */ -{ - ut_a(global_segment < os_aio_n_segments); - - if (global_segment == 0) { - return os_aio_ibuf_array; - - } else if (global_segment == 1) { - return os_aio_log_array; - - } else if (global_segment < os_aio_first_write_segment) { - return os_aio_read_array; - - } else { - return os_aio_write_array; - } -} - -/*********************************************************************** -Gets an integer value designating a specified aio array. This is used -to give numbers to signals in Posix aio. */ - -#if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) -static -ulint -os_aio_get_array_no( -/*================*/ - os_aio_array_t* array) /* in: aio array */ -{ - if (array == os_aio_ibuf_array) { - - return(0); - - } else if (array == os_aio_log_array) { - - return(1); - - } else if (array == os_aio_read_array) { - - return(2); - } else if (array == os_aio_write_array) { - - return(3); - } else { - ut_error; - - return(0); - } -} - -/*********************************************************************** -Gets the aio array for its number. */ -static -os_aio_array_t* -os_aio_get_array_from_no( -/*=====================*/ - /* out: aio array */ - ulint n) /* in: array number */ -{ - if (n == 0) { - return(os_aio_ibuf_array); - } else if (n == 1) { - - return(os_aio_log_array); - } else if (n == 2) { - - return(os_aio_read_array); - } else if (n == 3) { - - return(os_aio_write_array); - } else { - ut_error; - - return(NULL); - } -} -#endif /* if !defined(WIN_ASYNC_IO) && defined(POSIX_ASYNC_IO) */ - -/*********************************************************************** -Requests for a slot in the aio array. If no slot is available, waits until -not_full-event becomes signaled. */ -static -os_aio_slot_t* -os_aio_array_reserve_slot( -/*======================*/ - /* out: pointer to slot */ - ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ - os_aio_array_t* array, /* in: aio array */ - fil_node_t* message1,/* in: message to be passed along with - the aio operation */ - void* message2,/* in: message to be passed along with - the aio operation */ - os_file_t file, /* in: file handle */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - void* buf, /* in: buffer where to read or from which - to write */ - ulint offset, /* in: least significant 32 bits of file - offset */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint len) /* in: length of the block to read or write */ -{ - os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - OVERLAPPED* control; - -#elif defined(POSIX_ASYNC_IO) - - struct aiocb* control; -#endif - ulint i; -loop: - os_mutex_enter(array->mutex); - - if (array->n_reserved == array->n_slots) { - os_mutex_exit(array->mutex); - - if (!os_aio_use_native_aio) { - /* If the handler threads are suspended, wake them - so that we get more slots */ - - os_aio_simulated_wake_handler_threads(); - } - - os_event_wait(array->not_full); - - goto loop; - } - - for (i = 0;; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved == FALSE) { - break; - } - } - ut_a(i < array->n_slots); - array->n_reserved++; - - if (array->n_reserved == 1) { - os_event_reset(array->is_empty); - } - - if (array->n_reserved == array->n_slots) { - os_event_reset(array->not_full); - } - - slot->reserved = TRUE; - slot->reservation_time = time(NULL); - slot->message1 = message1; - slot->message2 = message2; - slot->file = file; - slot->name = name; - slot->len = len; - slot->type = type; - slot->buf = buf; - slot->offset = offset; - slot->offset_high = offset_high; - slot->status = OS_AIO_NOT_ISSUED; - -#ifdef WIN_ASYNC_IO - control = &(slot->control); - control->Offset = (DWORD)offset; - control->OffsetHigh = (DWORD)offset_high; - os_event_reset(slot->event); - -#elif defined(POSIX_ASYNC_IO) - -#if (UNIV_WORD_SIZE == 8) - offset = offset + (offset_high << 32); -#else - ut_a(offset_high == 0); -#endif - control = &(slot->control); - control->aio_fildes = file; - control->aio_buf = buf; - control->aio_nbytes = len; - control->aio_offset = offset; - control->aio_reqprio = 0; - control->aio_sigevent.sigev_notify = SIGEV_SIGNAL; - control->aio_sigevent.sigev_signo - = SIGRTMIN + 1 + os_aio_get_array_no(array); - /* TODO: How to choose the signal numbers? */ - /* - fprintf(stderr, "AIO signal number %lu\n", - (ulint) control->aio_sigevent.sigev_signo); - */ - control->aio_sigevent.sigev_value.sival_ptr = slot; -#endif - os_mutex_exit(array->mutex); - - return(slot); -} - -/*********************************************************************** -Frees a slot in the aio array. */ -static -void -os_aio_array_free_slot( -/*===================*/ - os_aio_array_t* array, /* in: aio array */ - os_aio_slot_t* slot) /* in: pointer to slot */ -{ - ut_ad(array); - ut_ad(slot); - - os_mutex_enter(array->mutex); - - ut_ad(slot->reserved); - - slot->reserved = FALSE; - slot->status = OS_AIO_NOT_ISSUED; - - array->n_reserved--; - - if (array->n_reserved == array->n_slots - 1) { - os_event_set(array->not_full); - } - - if (array->n_reserved == 0) { - os_event_set(array->is_empty); - } - -#ifdef WIN_ASYNC_IO - os_event_reset(slot->event); -#endif - os_mutex_exit(array->mutex); -} - -/************************************************************************** -Wakes up a simulated aio i/o-handler thread if it has something to do. */ -static -void -os_aio_simulated_wake_handler_thread( -/*=================================*/ - os_aio_array_t* array) /* in: aio array for which wakeup is done */ -{ - os_aio_slot_t* slot; - ulint n; - ulint i; - - ut_ad(!os_aio_use_native_aio); - n = array->n_slots; - - /* Look through n slots */ - - os_mutex_enter(array->mutex); - - for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved && - (slot->status == OS_AIO_NOT_ISSUED || - slot->status == OS_AIO_DONE)) { - /* Found an i/o request - OS_AIO_NOT_ISSUED means the read or write request has - * yet to be done. OS_AIO_DONE means the request has been - * done but it was part of a set of requests merged into - * one read or write call and was not the first block in - * the request, so the handling of the IO completion for - * that block has not been done. */ - break; - } - } - - os_mutex_exit(array->mutex); - - if (i < n) { - if (array == os_aio_ibuf_array) { - os_event_set(os_aio_segment_wait_events[0]); - - } else if (array == os_aio_log_array) { - os_event_set(os_aio_segment_wait_events[1]); - - } else if (array == os_aio_read_array) { - ulint x; - for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++) - os_event_set(os_aio_segment_wait_events[x]); - - } else if (array == os_aio_write_array) { - ulint x; - for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++) - os_event_set(os_aio_segment_wait_events[x]); - - } else { - ut_a(0); - } - } -} - -/************************************************************************** -Wakes up simulated aio i/o-handler threads if they have something to do. */ - -void -os_aio_simulated_wake_handler_threads(void) -/*=======================================*/ -{ - if (os_aio_use_native_aio) { - /* We do not use simulated aio: do nothing */ - - return; - } - - os_aio_recommend_sleep_for_read_threads = FALSE; - - os_aio_simulated_wake_handler_thread(os_aio_ibuf_array); - os_aio_simulated_wake_handler_thread(os_aio_log_array); - os_aio_simulated_wake_handler_thread(os_aio_read_array); - os_aio_simulated_wake_handler_thread(os_aio_write_array); -} - -/************************************************************************** -This function can be called if one wants to post a batch of reads and -prefers an i/o-handler thread to handle them all at once later. You must -call os_aio_simulated_wake_handler_threads later to ensure the threads -are not left sleeping! */ - -void -os_aio_simulated_put_read_threads_to_sleep(void) -/*============================================*/ -{ - ulint g; - - /* TODO(mcallaghan): provide similar function for write? */ - os_aio_recommend_sleep_for_read_threads = TRUE; - - for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) { - os_event_reset(os_aio_segment_wait_events[g]); - } -} - -/*********************************************************************** -Requests an asynchronous i/o operation. */ - -ibool -os_aio( -/*===*/ - /* out: TRUE if request was queued - successfully, FALSE if fail */ - ulint type, /* in: OS_FILE_READ or OS_FILE_WRITE */ - ulint mode, /* in: OS_AIO_NORMAL, ..., possibly ORed - to OS_AIO_SIMULATED_WAKE_LATER: the - last flag advises this function not to wake - i/o-handler threads, but the caller will - do the waking explicitly later, in this - way the caller can post several requests in - a batch; NOTE that the batch must not be - so big that it exhausts the slots in aio - arrays! NOTE that a simulated batch - may introduce hidden chances of deadlocks, - because i/os are not actually handled until - all have been posted: use with great - caution! */ - const char* name, /* in: name of the file or path as a - null-terminated string */ - os_file_t file, /* in: handle to a file */ - void* buf, /* in: buffer where to read or from which - to write */ - ulint offset, /* in: least significant 32 bits of file - offset where to read or write */ - ulint offset_high, /* in: most significant 32 bits of - offset */ - ulint n, /* in: number of bytes to read or write */ - fil_node_t* message1,/* in: messages for the aio handler (these - can be used to identify a completed aio - operation); if mode is OS_AIO_SYNC, these - are ignored */ - void* message2) -{ - os_aio_array_t* array; - os_aio_slot_t* slot; -#ifdef WIN_ASYNC_IO - ibool retval; - BOOL ret = TRUE; - DWORD len = (DWORD) n; - struct fil_node_struct * dummy_mess1; - void* dummy_mess2; - ulint dummy_type; -#endif - ulint err = 0; - ibool retry; - ulint wake_later; - - ut_ad(file); - ut_ad(buf); - ut_ad(n > 0); - ut_ad(n % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(offset % OS_FILE_LOG_BLOCK_SIZE == 0); - ut_ad(os_aio_validate()); - - wake_later = mode & OS_AIO_SIMULATED_WAKE_LATER; - mode = mode & (~OS_AIO_SIMULATED_WAKE_LATER); - - if (mode == OS_AIO_SYNC -#ifdef WIN_ASYNC_IO - && !os_aio_use_native_aio -#endif - ) { - /* This is actually an ordinary synchronous read or write: - no need to use an i/o-handler thread. NOTE that if we use - Windows async i/o, Windows does not allow us to use - ordinary synchronous os_file_read etc. on the same file, - therefore we have built a special mechanism for synchronous - wait in the Windows case. */ - - if (type == OS_FILE_READ) { - return(os_file_read(file, buf, offset, - offset_high, n)); - } - - ut_a(type == OS_FILE_WRITE); - - return(os_file_write(name, file, buf, offset, offset_high, n)); - } - -try_again: - if (mode == OS_AIO_NORMAL) { - if (type == OS_FILE_READ) { - array = os_aio_read_array; - } else { - array = os_aio_write_array; - } - } else if (mode == OS_AIO_IBUF) { - ut_ad(type == OS_FILE_READ); - /* Reduce probability of deadlock bugs in connection with ibuf: - do not let the ibuf i/o handler sleep */ - - wake_later = FALSE; - - array = os_aio_ibuf_array; - } else if (mode == OS_AIO_LOG) { - - array = os_aio_log_array; - } else if (mode == OS_AIO_SYNC) { - array = os_aio_sync_array; - } else { - array = NULL; /* Eliminate compiler warning */ - ut_error; - } - - slot = os_aio_array_reserve_slot(type, array, message1, message2, file, - name, buf, offset, offset_high, n); - if (type == OS_FILE_READ) { - if (os_aio_use_native_aio) { -#ifdef WIN_ASYNC_IO - os_n_file_reads++; - os_bytes_read_since_printout += len; - - ret = ReadFile(file, buf, (DWORD)n, &len, - &(slot->control)); -#elif defined(POSIX_ASYNC_IO) - slot->control.aio_lio_opcode = LIO_READ; - err = (ulint) aio_read(&(slot->control)); - fprintf(stderr, "Starting POSIX aio read %lu\n", err); -#endif - } else { - if (!wake_later) { - os_aio_simulated_wake_handler_thread(array); - } - } - } else if (type == OS_FILE_WRITE) { - if (os_aio_use_native_aio) { -#ifdef WIN_ASYNC_IO - os_n_file_writes++; - ret = WriteFile(file, buf, (DWORD)n, &len, - &(slot->control)); -#elif defined(POSIX_ASYNC_IO) - slot->control.aio_lio_opcode = LIO_WRITE; - err = (ulint) aio_write(&(slot->control)); - fprintf(stderr, "Starting POSIX aio write %lu\n", err); -#endif - } else { - if (!wake_later) { - os_aio_simulated_wake_handler_thread(array); - } - } - } else { - ut_error; - } - -#ifdef WIN_ASYNC_IO - if (os_aio_use_native_aio) { - if ((ret && len == n) - || (!ret && GetLastError() == ERROR_IO_PENDING)) { - /* aio was queued successfully! */ - - if (mode == OS_AIO_SYNC) { - /* We want a synchronous i/o operation on a - file where we also use async i/o: in Windows - we must use the same wait mechanism as for - async i/o */ - - retval = os_aio_windows_handle(ULINT_UNDEFINED, - slot->pos, - &dummy_mess1, - &dummy_mess2, - &dummy_type); - - return(retval); - } - - return(TRUE); - } - - err = 1; /* Fall through the next if */ - } -#endif - if (err == 0) { - /* aio was queued successfully! */ - - return(TRUE); - } - - os_aio_array_free_slot(array, slot); - - retry = os_file_handle_error(name, - type == OS_FILE_READ - ? "aio read" : "aio write"); - if (retry) { - - goto try_again; - } - - return(FALSE); -} - -#ifdef WIN_ASYNC_IO -/************************************************************************** -This function is only used in Windows asynchronous i/o. -Waits for an aio operation to complete. This function is used to wait the -for completed requests. The aio array of pending requests is divided -into segments. The thread specifies which segment or slot it wants to wait -for. NOTE: this function will also take care of freeing the aio slot, -therefore no other thread is allowed to do the freeing! */ - -ibool -os_aio_windows_handle( -/*==================*/ - /* out: TRUE if the aio operation succeeded */ - ulint global_segment, /* in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads; if - this is ULINT_UNDEFINED, then it means that - sync aio is used, and this parameter is - ignored */ - ulint pos, /* this parameter is used only in sync aio: - wait for the aio slot at this position */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type) /* out: OS_FILE_WRITE or ..._READ */ -{ - os_aio_array_t* array; - os_aio_slot_t* slot; - ulint n; - ulint i; - ibool ret_val; - BOOL ret; - DWORD len; - - if (global_segment == ULINT_UNDEFINED) { - array = os_aio_sync_array; - } else { - array = os_aio_get_array(global_segment); - } - - /* NOTE! We only access constant fields in os_aio_array. Therefore - we do not have to acquire the protecting mutex yet */ - - ut_ad(os_aio_validate()); - - n = array->n_slots; - - if (array == os_aio_sync_array) { - os_event_wait(os_aio_array_get_nth_slot(array, pos)->event); - i = pos; - } else { - srv_set_io_thread_op_info(global_segment, "wait Windows aio"); - i = os_event_wait_multiple(n, (array->native_events)); - } - - os_mutex_enter(array->mutex); - - slot = os_aio_array_get_nth_slot(array, i); - - ut_a(slot->reserved); - - if (global_segment != ULINT_UNDEFINED) { - srv_set_io_thread_op_info(global_segment, - "get windows aio return value"); - } - - ret = GetOverlappedResult(slot->file, &(slot->control), &len, TRUE); - - *message1 = slot->message1; - *message2 = slot->message2; - - *type = slot->type; - - if (ret && len == slot->len) { - ret_val = TRUE; - -# ifdef UNIV_DO_FLUSH - if (slot->type == OS_FILE_WRITE - && !os_do_not_call_flush_at_each_write) { - ut_a(TRUE == os_file_flush(slot->file)); - } -# endif /* UNIV_DO_FLUSH */ - } else { - os_file_handle_error(slot->name, "Windows aio"); - - ret_val = FALSE; - } - - os_mutex_exit(array->mutex); - - os_aio_array_free_slot(array, slot); - - return(ret_val); -} -#endif - -#ifdef POSIX_ASYNC_IO - -/************************************************************************** -This function is only used in Posix asynchronous i/o. Waits for an aio -operation to complete. */ - -ibool -os_aio_posix_handle( -/*================*/ - /* out: TRUE if the aio operation succeeded */ - ulint array_no, /* in: array number 0 - 3 */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2) -{ - os_aio_array_t* array; - os_aio_slot_t* slot; - siginfo_t info; - sigset_t sigset; - sigset_t proc_sigset; - sigset_t thr_sigset; - int ret; - int i; - int sig; - - sigemptyset(&sigset); - sigaddset(&sigset, SIGRTMIN + 1 + array_no); - - pthread_sigmask(SIG_UNBLOCK, &sigset, NULL); - -#if 0 - sigprocmask(0, NULL, &proc_sigset); - pthread_sigmask(0, NULL, &thr_sigset); - - for (i = 32 ; i < 40; i++) { - fprintf(stderr, "%lu : %lu %lu\n", (ulint)i, - (ulint) sigismember(&proc_sigset, i), - (ulint) sigismember(&thr_sigset, i)); - } -#endif - - ret = sigwaitinfo(&sigset, &info); - - if (sig != SIGRTMIN + 1 + array_no) { - - ut_error; - - return(FALSE); - } - - fputs("Handling POSIX aio\n", stderr); - - array = os_aio_get_array_from_no(array_no); - - os_mutex_enter(array->mutex); - - slot = info.si_value.sival_ptr; - - ut_a(slot->reserved); - - *message1 = slot->message1; - *message2 = slot->message2; - -# ifdef UNIV_DO_FLUSH - if (slot->type == OS_FILE_WRITE - && !os_do_not_call_flush_at_each_write) { - ut_a(TRUE == os_file_flush(slot->file)); - } -# endif /* UNIV_DO_FLUSH */ - - os_mutex_exit(array->mutex); - - os_aio_array_free_slot(array, slot); - - return(TRUE); -} -#endif - -/************************************************************************** -Do a 'last millisecond' check that the page end is sensible; -reported page checksum errors from Linux seem to wipe over the page end. */ -static -void -os_file_check_page_trailers( -/*========================*/ - byte* combined_buf, /* in: combined write buffer */ - ulint total_len) /* in: size of combined_buf, in bytes - (a multiple of UNIV_PAGE_SIZE) */ -{ - ulint len; - - for (len = 0; len + UNIV_PAGE_SIZE <= total_len; - len += UNIV_PAGE_SIZE) { - byte* buf = combined_buf + len; - - if (UNIV_UNLIKELY - (memcmp(buf + (FIL_PAGE_LSN + 4), - buf + (UNIV_PAGE_SIZE - - FIL_PAGE_END_LSN_OLD_CHKSUM + 4), 4))) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: ERROR: The page to be written" - " seems corrupt!\n" - "InnoDB: Writing a block of %lu bytes," - " currently at offset %lu\n", - (ulong)total_len, (ulong)len); - buf_page_print(buf); - fprintf(stderr, - "InnoDB: ERROR: The page to be written" - " seems corrupt!\n"); - } - } -} - -/************************************************************************** -Does simulated aio. This function should be called by an i/o-handler -thread. */ - -ibool -os_aio_simulated_handle( -/*====================*/ - /* out: TRUE if the aio operation succeeded */ - ulint global_segment, /* in: the number of the segment in the aio - arrays to wait for; segment 0 is the ibuf - i/o thread, segment 1 the log i/o thread, - then follow the non-ibuf read threads, and as - the last are the non-ibuf write threads */ - fil_node_t**message1, /* out: the messages passed with the aio - request; note that also in the case where - the aio operation failed, these output - parameters are valid and can be used to - restart the operation, for example */ - void** message2, - ulint* type) /* out: OS_FILE_WRITE or ..._READ */ -{ - os_aio_array_t* array; - os_aio_slot_t* slot; - os_aio_slot_t* slot2; - os_aio_slot_t* consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE]; - os_aio_slot_t* lowest_request; - os_aio_slot_t* oldest_request; - ulint n_consecutive; - ulint total_len; - ulint offs; - ulint lowest_offset; - ulint oldest_offset; - ulint biggest_age; - ulint age; - byte* combined_buf; - byte* combined_buf2; - ibool ret; - ulint n; - ulint i; - - double start_usecs, stop_usecs, elapsed_usecs; - time_t now; - array = os_aio_get_array(global_segment); - -restart: - /* NOTE! We only access constant fields in os_aio_array. Therefore - we do not have to acquire the protecting mutex yet */ - - srv_set_io_thread_op_info(global_segment, - "looking for i/o requests (a)"); - ut_ad(os_aio_validate()); - - n = array->n_slots; - - /* Look through n slots */ - - if (array == os_aio_read_array - && os_aio_recommend_sleep_for_read_threads) { - - /* Give other threads chance to add several i/os to the array - at once. */ - - goto recommended_sleep; - } - - os_mutex_enter(array->mutex); - - srv_set_io_thread_op_info(global_segment, - "looking for i/o requests (b)"); - - /* Check if there is a slot for which the i/o has already been - done */ - - for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved && slot->status == OS_AIO_DONE) { - - if (os_aio_print_debug) { - fprintf(stderr, - "InnoDB: i/o for slot %lu" - " already done, returning\n", - (ulong) i); - } - - ret = TRUE; - - goto slot_io_done; - } - } - - biggest_age = 0; - now = time(NULL); - oldest_request = lowest_request = NULL; - oldest_offset = lowest_offset = ULINT_MAX; - - /* Find the oldest request and the request with the smallest offset */ - for (i = 0; i < n; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) { - age = (ulint)difftime(now, slot->reservation_time); - - /* If there are at least 2 seconds old requests, then pick the oldest - one to prevent starvation. If several requests have the same age, - then pick the one at the lowest offset. */ - if ((age >= 2 && age > biggest_age) - || (age >= 2 && age == biggest_age - && slot->offset < oldest_offset)) { - - /* Found an i/o request */ - biggest_age = age; - oldest_request = slot; - oldest_offset = slot->offset; - } - - /* Look for an i/o request at the lowest offset in the array - * (we ignore the high 32 bits of the offset) */ - if (slot->offset < lowest_offset) { - /* Found an i/o request */ - lowest_request = slot; - - - - lowest_offset = slot->offset; - } - } - } - - if (!lowest_request && !oldest_request) { - - /* No i/o requested at the moment */ - - goto wait_for_io; - } - - if (oldest_request) { - slot = oldest_request; - } else { - slot = lowest_request; - } - consecutive_ios[0] = slot; - n_consecutive = 1; - - /* Check if there are several consecutive blocks to read or write */ - -consecutive_loop: - for (i = 0; i < n; i++) { - slot2 = os_aio_array_get_nth_slot(array, i); - - if (slot2->reserved && slot2 != slot - && slot2->offset == slot->offset + slot->len - /* check that sum does not wrap over */ - && slot->offset + slot->len > slot->offset - && slot2->offset_high == slot->offset_high - && slot2->type == slot->type - && slot2->file == slot->file - && slot2->status == OS_AIO_NOT_ISSUED) { - - /* Found a consecutive i/o request */ - - consecutive_ios[n_consecutive] = slot2; - n_consecutive++; - - slot = slot2; - - if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE && - n_consecutive < srv_max_merged_io) { - - goto consecutive_loop; - } else { - break; - } - } - } - - srv_set_io_thread_op_info(global_segment, "consecutive i/o requests"); - - /* We have now collected n_consecutive i/o requests in the array; - allocate a single buffer which can hold all data, and perform the - i/o */ - - total_len = 0; - slot = consecutive_ios[0]; - - for (i = 0; i < n_consecutive; i++) { - total_len += consecutive_ios[i]->len; - ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED); - consecutive_ios[i]->status = OS_AIO_ISSUED; - } - - if (n_consecutive == 1) { - /* We can use the buffer of the i/o request */ - combined_buf = slot->buf; - combined_buf2 = NULL; - } else { - if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) { - - if (os_aio_thread_buffer[global_segment]) - ut_free(os_aio_thread_buffer[global_segment]); - - os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE); - - os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE; - } - combined_buf2 = os_aio_thread_buffer[global_segment]; - - ut_a(combined_buf2); - - combined_buf = ut_align(combined_buf2, UNIV_PAGE_SIZE); - } - - /* We release the array mutex for the time of the i/o: NOTE that - this assumes that there is just one i/o-handler thread serving - a single segment of slots! */ - - ut_a(slot->reserved); - ut_a(slot->status == OS_AIO_ISSUED); - - os_mutex_exit(array->mutex); - - if (slot->type == OS_FILE_WRITE && n_consecutive > 1) { - /* Copy the buffers to the combined buffer */ - offs = 0; - - for (i = 0; i < n_consecutive; i++) { - - ut_memcpy(combined_buf + offs, consecutive_ios[i]->buf, - consecutive_ios[i]->len); - offs += consecutive_ios[i]->len; - } - } - - srv_set_io_thread_op_info(global_segment, "doing file i/o"); - - if (os_aio_print_debug) { - fprintf(stderr, - "InnoDB: doing i/o of type %lu at offset %lu %lu," - " length %lu\n", - (ulong) slot->type, (ulong) slot->offset_high, - (ulong) slot->offset, (ulong) total_len); - } - - /* Do the i/o with ordinary, synchronous i/o functions: */ - if (slot->type == OS_FILE_WRITE) { - os_aio_thread_io_writes[global_segment] += n_consecutive; - if (array == os_aio_write_array) { - if ((total_len % UNIV_PAGE_SIZE != 0) - || (slot->offset % UNIV_PAGE_SIZE != 0)) { - fprintf(stderr, - "InnoDB: Error: trying a displaced" - " write to %s %lu %lu, len %lu\n", - slot->name, (ulong) slot->offset_high, - (ulong) slot->offset, - (ulong) total_len); - ut_error; - } - - os_file_check_page_trailers(combined_buf, total_len); - } - start_usecs = time_usecs(); - ret = os_file_write(slot->name, slot->file, combined_buf, - slot->offset, slot->offset_high, - total_len); - stop_usecs = time_usecs(); - elapsed_usecs = stop_usecs - start_usecs; - if (elapsed_usecs < 0) elapsed_usecs = 0; - - if (array == os_aio_write_array) { - os_file_check_page_trailers(combined_buf, total_len); - } - } else { - start_usecs = time_usecs(); - os_aio_thread_io_reads[global_segment] += n_consecutive; - ret = os_file_read(slot->file, combined_buf, - slot->offset, slot->offset_high, total_len); - stop_usecs = time_usecs(); - elapsed_usecs = stop_usecs - start_usecs; - if (elapsed_usecs < 0) elapsed_usecs = 0; - } - if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment]) - os_aio_thread_max_io_wait[global_segment] = elapsed_usecs; - os_aio_thread_io_wait[global_segment] += elapsed_usecs; - os_aio_thread_io_requests[global_segment]++; - - ut_a(ret); - srv_set_io_thread_op_info(global_segment, "file i/o done"); - -#if 0 - fprintf(stderr, - "aio: %lu consecutive %lu:th segment, first offs %lu blocks\n", - n_consecutive, global_segment, slot->offset / UNIV_PAGE_SIZE); -#endif - - if (slot->type == OS_FILE_READ && n_consecutive > 1) { - /* Copy the combined buffer to individual buffers */ - offs = 0; - - for (i = 0; i < n_consecutive; i++) { - - ut_memcpy(consecutive_ios[i]->buf, combined_buf + offs, - consecutive_ios[i]->len); - offs += consecutive_ios[i]->len; - } - } - - os_mutex_enter(array->mutex); - - /* Mark the i/os done in slots */ - - for (i = 0; i < n_consecutive; i++) { - ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED); - consecutive_ios[i]->status = OS_AIO_DONE; - } - - /* We return the messages for the first slot now, and if there were - several slots, the messages will be returned with subsequent calls - of this function */ - -slot_io_done: - - ut_a(slot->reserved); - ut_a(slot->status == OS_AIO_DONE); - slot->status = OS_AIO_CLAIMED; - - *message1 = slot->message1; - *message2 = slot->message2; - - *type = slot->type; - - os_mutex_exit(array->mutex); - - os_aio_array_free_slot(array, slot); - srv_set_io_thread_op_info(global_segment, "exited handler"); - - return(ret); - -wait_for_io: - srv_set_io_thread_op_info(global_segment, "resetting wait event"); - - /* We wait here until there again can be i/os in the segment - of this thread */ - - os_event_reset(os_aio_segment_wait_events[global_segment]); - - os_mutex_exit(array->mutex); - -recommended_sleep: - srv_set_io_thread_op_info(global_segment, "waiting for i/o request"); - - os_event_wait(os_aio_segment_wait_events[global_segment]); - - if (os_aio_print_debug) { - fprintf(stderr, - "InnoDB: i/o handler thread for i/o" - " segment %lu wakes up\n", - (ulong) global_segment); - } - - goto restart; -} - -/************************************************************************** -Validates the consistency of an aio array. */ -static -ibool -os_aio_array_validate( -/*==================*/ - /* out: TRUE if ok */ - os_aio_array_t* array) /* in: aio wait array */ -{ - os_aio_slot_t* slot; - ulint n_reserved = 0; - ulint i; - - ut_a(array); - - os_mutex_enter(array->mutex); - - ut_a(array->n_slots > 0); - - for (i = 0; i < array->n_slots; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved) { - n_reserved++; - ut_a(slot->len > 0); - } - } - - ut_a(array->n_reserved == n_reserved); - - os_mutex_exit(array->mutex); - - return(TRUE); -} - -/************************************************************************** -Validates the consistency the aio system. */ - -ibool -os_aio_validate(void) -/*=================*/ - /* out: TRUE if ok */ -{ - os_aio_array_validate(os_aio_read_array); - os_aio_array_validate(os_aio_write_array); - os_aio_array_validate(os_aio_ibuf_array); - os_aio_array_validate(os_aio_log_array); - os_aio_array_validate(os_aio_sync_array); - - return(TRUE); -} - -/************************************************************************** -Prints info of the aio arrays. */ - -void -os_aio_print( -/*=========*/ - FILE* file) /* in: file where to print */ -{ - os_aio_array_t* array; - os_aio_slot_t* slot; - ulint n_reserved; - time_t current_time; - double time_elapsed; - double avg_bytes_read; - ulint i; - ulint num_issued, num_done, num_claimed; - - for (i = 0; i < os_aio_n_segments; i++) { - fprintf(file, - "I/O thread %lu state: %s (%s) reads %lu writes %lu " - "requests %lu io secs %lf io msecs/request %lf max_io_wait %lf", - i, srv_io_thread_op_info[i], srv_io_thread_function[i], - os_aio_thread_io_reads[i], os_aio_thread_io_writes[i], - os_aio_thread_io_requests[i], - os_aio_thread_io_wait[i] / 1000000.0, - os_aio_thread_io_requests[i] ? - os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0, - os_aio_thread_max_io_wait[i] / 1000.0); - -#ifndef __WIN__ - if (os_aio_segment_wait_events[i]->is_set) { - fprintf(file, " ev set"); - } -#endif - - fprintf(file, "\n"); - } - - fputs("Pending normal aio reads:", file); - - array = os_aio_read_array; -loop: - ut_a(array); - - os_mutex_enter(array->mutex); - - ut_a(array->n_slots > 0); - n_reserved = 0; - num_done = num_issued = num_claimed = 0; - - for (i = 0; i < array->n_slots; i++) { - slot = os_aio_array_get_nth_slot(array, i); - - if (slot->reserved) { - if (slot->status == OS_AIO_ISSUED) - num_issued++; - else if (slot->status == OS_AIO_DONE) - num_done++; - else { - ut_ad(slot->status == OS_AIO_CLAIMED); - num_claimed++; - } - n_reserved++; -#if 0 - fprintf(stderr, "Reserved slot, messages %p %p\n", - (void*) slot->message1, - (void*) slot->message2); -#endif - ut_a(slot->len > 0); - } - } - - ut_a(array->n_reserved == n_reserved); - - fprintf(file, " %lu", (ulong) n_reserved); - - os_mutex_exit(array->mutex); - - if (array == os_aio_read_array) { - fputs(", aio writes:", file); - - array = os_aio_write_array; - - goto loop; - } - - if (array == os_aio_write_array) { - fputs(",\n ibuf aio reads:", file); - array = os_aio_ibuf_array; - - goto loop; - } - - if (array == os_aio_ibuf_array) { - fputs(", log i/o's:", file); - array = os_aio_log_array; - - goto loop; - } - - if (array == os_aio_log_array) { - fputs(", sync i/o's:", file); - array = os_aio_sync_array; - - goto loop; - } - - putc('\n', file); - fprintf(file, - "Summary of background IO slot status: %lu issued, " - "%lu done, %lu claimed, sleep set %d\n", - num_issued, num_done, num_claimed, - (int)os_aio_recommend_sleep_for_read_threads); - - putc('\n', file); - current_time = time(NULL); - time_elapsed = 0.001 + difftime(current_time, os_last_printout); - - fprintf(file, - "Pending flushes (fsync) log: %lu; buffer pool: %lu\n" - "%lu OS file reads, %lu OS file writes, %lu OS fsyncs\n", - (ulong) fil_n_pending_log_flushes, - (ulong) fil_n_pending_tablespace_flushes, - (ulong) os_n_file_reads, (ulong) os_n_file_writes, - (ulong) os_n_fsyncs); - - if (os_file_n_pending_preads != 0 || os_file_n_pending_pwrites != 0) { - fprintf(file, - "%lu pending preads, %lu pending pwrites\n", - (ulong) os_file_n_pending_preads, - (ulong) os_file_n_pending_pwrites); - } - - if (os_n_file_reads == os_n_file_reads_old) { - avg_bytes_read = 0.0; - } else { - avg_bytes_read = (double) os_bytes_read_since_printout - / (os_n_file_reads - os_n_file_reads_old); - } - - fprintf(file, - "%.2f reads/s, %lu avg bytes/read," - " %.2f writes/s, %.2f fsyncs/s\n", - (os_n_file_reads - os_n_file_reads_old) - / time_elapsed, - (ulong)avg_bytes_read, - (os_n_file_writes - os_n_file_writes_old) - / time_elapsed, - (os_n_fsyncs - os_n_fsyncs_old) - / time_elapsed); - - os_n_file_reads_old = os_n_file_reads; - os_n_file_writes_old = os_n_file_writes; - os_n_fsyncs_old = os_n_fsyncs; - os_bytes_read_since_printout = 0; - - os_last_printout = current_time; -} - -/************************************************************************** -Refreshes the statistics used to print per-second averages. */ - -void -os_aio_refresh_stats(void) -/*======================*/ -{ - os_n_file_reads_old = os_n_file_reads; - os_n_file_writes_old = os_n_file_writes; - os_n_fsyncs_old = os_n_fsyncs; - os_bytes_read_since_printout = 0; - - os_last_printout = time(NULL); -} - -#ifdef UNIV_DEBUG -/************************************************************************** -Checks that all slots in the system have been freed, that is, there are -no pending io operations. */ - -ibool -os_aio_all_slots_free(void) -/*=======================*/ - /* out: TRUE if all free */ -{ - os_aio_array_t* array; - ulint n_res = 0; - - array = os_aio_read_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - array = os_aio_write_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - array = os_aio_ibuf_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - array = os_aio_log_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - array = os_aio_sync_array; - - os_mutex_enter(array->mutex); - - n_res += array->n_reserved; - - os_mutex_exit(array->mutex); - - if (n_res == 0) { - - return(TRUE); - } - - return(FALSE); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/os/os0proc.c b/storage/innobase/os/os0proc.c deleted file mode 100644 index a99fe8b6a0e..00000000000 --- a/storage/innobase/os/os0proc.c +++ /dev/null @@ -1,674 +0,0 @@ -/****************************************************** -The interface to the operating system -process control primitives - -(c) 1995 Innobase Oy - -Created 9/30/1995 Heikki Tuuri -*******************************************************/ - -#include "os0proc.h" -#ifdef UNIV_NONINL -#include "os0proc.ic" -#endif - -#include "ut0mem.h" -#include "ut0byte.h" - - -/* -How to get AWE to compile on Windows? -------------------------------------- - -In the project settings of the innobase project the Visual C++ source, -__WIN2000__ has to be defined. - -The Visual C++ has to be relatively recent and _WIN32_WINNT has to be -defined to a value >= 0x0500 when windows.h is included. - -#define _WIN32_WINNT 0x0500 - -Where does AWE work? -------------------- - -See the error message in os_awe_allocate_physical_mem(). - -How to assign privileges for mysqld to use AWE? ------------------------------------------------ - -See the error message in os_awe_enable_lock_pages_in_mem(). - -Use Windows AWE functions in this order ---------------------------------------- - -(1) os_awe_enable_lock_pages_in_mem(); -(2) os_awe_allocate_physical_mem(); -(3) os_awe_allocate_virtual_mem_window(); -(4) os_awe_map_physical_mem_to_window(). - -To test 'AWE' in a computer which does not have the AWE API, -you can compile with UNIV_SIMULATE_AWE defined in this file. -*/ - -#ifdef UNIV_SIMULATE_AWE -/* If we simulate AWE, we allocate the 'physical memory' here */ -byte* os_awe_simulate_mem; -ulint os_awe_simulate_mem_size; -os_awe_t* os_awe_simulate_page_info; -byte* os_awe_simulate_window; -ulint os_awe_simulate_window_size; -/* In simulated AWE the following contains a NULL pointer or a pointer -to a mapped 'physical page' for each 4 kB page in the AWE window */ -byte** os_awe_simulate_map; -#endif - -#ifdef __WIN2000__ -os_awe_t* os_awe_page_info; -ulint os_awe_n_pages; -byte* os_awe_window; -ulint os_awe_window_size; -#endif - -ibool os_use_large_pages; -/* Large page size. This may be a boot-time option on some platforms */ -ulint os_large_page_size; - -/******************************************************************** -Windows AWE support. Tries to enable the "lock pages in memory" privilege for -the current process so that the current process can allocate memory-locked -virtual address space to act as the window where AWE maps physical memory. */ - -ibool -os_awe_enable_lock_pages_in_mem(void) -/*=================================*/ - /* out: TRUE if success, FALSE if error; - prints error info to stderr if no success */ -{ -#ifdef UNIV_SIMULATE_AWE - - return(TRUE); - -#elif defined(__WIN2000__) - struct { - DWORD Count; - LUID_AND_ATTRIBUTES Privilege[1]; - } Info; - HANDLE hProcess; - HANDLE Token; - BOOL Result; - - hProcess = GetCurrentProcess(); - - /* Open the token of the current process */ - - Result = OpenProcessToken(hProcess, - TOKEN_ADJUST_PRIVILEGES, &Token); - if (Result != TRUE) { - fprintf(stderr, - "InnoDB: AWE: Cannot open process token, error %lu\n", - (ulint)GetLastError()); - return(FALSE); - } - - Info.Count = 1; - - Info.Privilege[0].Attributes = SE_PRIVILEGE_ENABLED; - - /* Get the local unique identifier (LUID) of the SE_LOCK_MEMORY - privilege */ - - Result = LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, - &(Info.Privilege[0].Luid)); - if (Result != TRUE) { - fprintf(stderr, - "InnoDB: AWE: Cannot get local privilege" - " value for %s, error %lu.\n", - SE_LOCK_MEMORY_NAME, (ulint)GetLastError()); - - return(FALSE); - } - - /* Try to adjust the privilege */ - - Result = AdjustTokenPrivileges(Token, FALSE, - (PTOKEN_PRIVILEGES)&Info, - 0, NULL, NULL); - /* Check the result */ - - if (Result != TRUE) { - fprintf(stderr, - "InnoDB: AWE: Cannot adjust process token privileges," - " error %u.\n", - GetLastError()); - return(FALSE); - } else if (GetLastError() != ERROR_SUCCESS) { - fprintf(stderr, - "InnoDB: AWE: Cannot enable SE_LOCK_MEMORY privilege," - " error %lu.\n" - "InnoDB: In Windows XP Home you cannot use AWE." - " In Windows 2000 and XP\n" - "InnoDB: Professional you must go to the" - " Control Panel, to\n" - "InnoDB: Security Settings, to Local Policies," - " and enable\n" - "InnoDB: the 'lock pages in memory' privilege" - " for the user who runs\n" - "InnoDB: the MySQL server.\n", GetLastError()); - - return(FALSE); - } - - CloseHandle(Token); - - return(TRUE); -#else -#ifdef __WIN__ - fprintf(stderr, - "InnoDB: AWE: Error: to use AWE you must use" - " a ...-nt MySQL executable.\n"); -#endif - return(FALSE); -#endif -} - -/******************************************************************** -Allocates physical RAM memory up to 64 GB in an Intel 32-bit x86 -processor. */ - -ibool -os_awe_allocate_physical_mem( -/*=========================*/ - /* out: TRUE if success */ - os_awe_t** page_info, /* out, own: array of opaque data containing - the info for allocated physical memory pages; - each allocated 4 kB physical memory page has - one slot of type os_awe_t in the array */ - ulint n_megabytes) /* in: number of megabytes to allocate */ -{ -#ifdef UNIV_SIMULATE_AWE - os_awe_simulate_page_info = ut_malloc - (sizeof(os_awe_t) * n_megabytes - * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE)); - - os_awe_simulate_mem - = ut_align(ut_malloc(4096 + 1024 * 1024 * n_megabytes), 4096); - os_awe_simulate_mem_size = n_megabytes * 1024 * 1024; - - *page_info = os_awe_simulate_page_info; - - return(TRUE); - -#elif defined(__WIN2000__) - BOOL bResult; - os_awe_t NumberOfPages; /* Question: why does Windows - use the name ULONG_PTR for - a scalar integer type? Maybe - because we may also refer to - &NumberOfPages? */ - os_awe_t NumberOfPagesInitial; - SYSTEM_INFO sSysInfo; - int PFNArraySize; - - if (n_megabytes > 64 * 1024) { - - fprintf(stderr, - "InnoDB: AWE: Error: tried to allocate %lu MB.\n" - "InnoDB: AWE cannot allocate more than" - " 64 GB in any computer.\n", n_megabytes); - - return(FALSE); - } - - GetSystemInfo(&sSysInfo); /* fill the system information structure */ - - if ((ulint)OS_AWE_X86_PAGE_SIZE != (ulint)sSysInfo.dwPageSize) { - fprintf(stderr, - "InnoDB: AWE: Error: this computer has a page size" - " of %lu.\n" - "InnoDB: Should be 4096 bytes for" - " InnoDB AWE support to work.\n", - (ulint)sSysInfo.dwPageSize); - - return(FALSE); - } - - /* Calculate the number of pages of memory to request */ - - NumberOfPages = n_megabytes * ((1024 * 1024) / OS_AWE_X86_PAGE_SIZE); - - /* Calculate the size of page_info for allocated physical pages */ - - PFNArraySize = NumberOfPages * sizeof(os_awe_t); - - *page_info = (os_awe_t*)HeapAlloc(GetProcessHeap(), 0, PFNArraySize); - - if (*page_info == NULL) { - fprintf(stderr, - "InnoDB: AWE: Failed to allocate page info" - " array from process heap, error %lu\n", - (ulint)GetLastError()); - - return(FALSE); - } - - ut_total_allocated_memory += PFNArraySize; - - /* Enable this process' privilege to lock pages to physical memory */ - - if (!os_awe_enable_lock_pages_in_mem()) { - - return(FALSE); - } - - /* Allocate the physical memory */ - - NumberOfPagesInitial = NumberOfPages; - - os_awe_page_info = *page_info; - os_awe_n_pages = (ulint)NumberOfPages; - - /* Compilation note: if the compiler complains the function is not - defined, see the note at the start of this file */ - - bResult = AllocateUserPhysicalPages(GetCurrentProcess(), - &NumberOfPages, *page_info); - if (bResult != TRUE) { - fprintf(stderr, - "InnoDB: AWE: Cannot allocate physical pages," - " error %lu.\n", - (ulint)GetLastError()); - - return(FALSE); - } - - if (NumberOfPagesInitial != NumberOfPages) { - fprintf(stderr, - "InnoDB: AWE: Error: allocated only %lu pages" - " of %lu requested.\n" - "InnoDB: Check that you have enough free RAM.\n" - "InnoDB: In Windows XP Professional and" - " 2000 Professional\n" - "InnoDB: Windows PAE size is max 4 GB." - " In 2000 and .NET\n" - "InnoDB: Advanced Servers and 2000 Datacenter Server" - " it is 32 GB,\n" - "InnoDB: and in .NET Datacenter Server it is 64 GB.\n" - "InnoDB: A Microsoft web page said that" - " the processor must be an Intel\n" - "InnoDB: processor.\n", - (ulint)NumberOfPages, - (ulint)NumberOfPagesInitial); - - return(FALSE); - } - - fprintf(stderr, - "InnoDB: Using Address Windowing Extensions (AWE);" - " allocated %lu MB\n", - n_megabytes); - - return(TRUE); -#else - UT_NOT_USED(n_megabytes); - UT_NOT_USED(page_info); - - return(FALSE); -#endif -} - -/******************************************************************** -Allocates a window in the virtual address space where we can map then -pages of physical memory. */ - -byte* -os_awe_allocate_virtual_mem_window( -/*===============================*/ - /* out, own: allocated memory, or NULL if did not - succeed */ - ulint size) /* in: virtual memory allocation size in bytes, must - be < 2 GB */ -{ -#ifdef UNIV_SIMULATE_AWE - ulint i; - - os_awe_simulate_window = ut_align(ut_malloc(4096 + size), 4096); - os_awe_simulate_window_size = size; - - os_awe_simulate_map = ut_malloc(sizeof(byte*) * (size / 4096)); - - for (i = 0; i < (size / 4096); i++) { - *(os_awe_simulate_map + i) = NULL; - } - - return(os_awe_simulate_window); - -#elif defined(__WIN2000__) - byte* ptr; - - if (size > (ulint)0x7FFFFFFFUL) { - fprintf(stderr, - "InnoDB: AWE: Cannot allocate %lu bytes" - " of virtual memory\n", size); - - return(NULL); - } - - ptr = VirtualAlloc(NULL, (SIZE_T)size, MEM_RESERVE | MEM_PHYSICAL, - PAGE_READWRITE); - if (ptr == NULL) { - fprintf(stderr, - "InnoDB: AWE: Cannot allocate %lu bytes" - " of virtual memory, error %lu\n", - size, (ulint)GetLastError()); - - return(NULL); - } - - os_awe_window = ptr; - os_awe_window_size = size; - - ut_total_allocated_memory += size; - - return(ptr); -#else - UT_NOT_USED(size); - - return(NULL); -#endif -} - -/******************************************************************** -With this function you can map parts of physical memory allocated with -the ..._allocate_physical_mem to the virtual address space allocated with -the previous function. Intel implements this so that the process page -tables are updated accordingly. A test on a 1.5 GHz AMD processor and XP -showed that this takes < 1 microsecond, much better than the estimated 80 us -for copying a 16 kB page memory to memory. But, the operation will at least -partially invalidate the translation lookaside buffer (TLB) of all -processors. Under a real-world load the performance hit may be bigger. */ - -ibool -os_awe_map_physical_mem_to_window( -/*==============================*/ - /* out: TRUE if success; the function - calls exit(1) in case of an error */ - byte* ptr, /* in: a page-aligned pointer to - somewhere in the virtual address - space window; we map the physical mem - pages here */ - ulint n_mem_pages, /* in: number of 4 kB mem pages to - map */ - os_awe_t* page_info) /* in: array of page infos for those - pages; each page has one slot in the - array */ -{ -#ifdef UNIV_SIMULATE_AWE - ulint i; - byte** map; - byte* page; - byte* phys_page; - - ut_a(ptr >= os_awe_simulate_window); - ut_a(ptr < os_awe_simulate_window + os_awe_simulate_window_size); - ut_a(page_info >= os_awe_simulate_page_info); - ut_a(page_info < os_awe_simulate_page_info - + (os_awe_simulate_mem_size / 4096)); - - /* First look if some other 'physical pages' are mapped at ptr, - and copy them back to where they were if yes */ - - map = os_awe_simulate_map - + ((ulint)(ptr - os_awe_simulate_window)) / 4096; - page = ptr; - - for (i = 0; i < n_mem_pages; i++) { - if (*map != NULL) { - ut_memcpy(*map, page, 4096); - } - map++; - page += 4096; - } - - /* Then copy to ptr the 'physical pages' determined by page_info; we - assume page_info is a segment of the array we created at the start */ - - phys_page = os_awe_simulate_mem - + (ulint)(page_info - os_awe_simulate_page_info) - * 4096; - - ut_memcpy(ptr, phys_page, n_mem_pages * 4096); - - /* Update the map */ - - map = os_awe_simulate_map - + ((ulint)(ptr - os_awe_simulate_window)) / 4096; - - for (i = 0; i < n_mem_pages; i++) { - *map = phys_page; - - map++; - phys_page += 4096; - } - - return(TRUE); - -#elif defined(__WIN2000__) - BOOL bResult; - os_awe_t n_pages; - - n_pages = (os_awe_t)n_mem_pages; - - if (!(ptr >= os_awe_window)) { - fprintf(stderr, - "InnoDB: AWE: Error: trying to map to address %lx" - " but AWE window start %lx\n", - (ulint)ptr, (ulint)os_awe_window); - ut_a(0); - } - - if (!(ptr <= os_awe_window + os_awe_window_size - UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: AWE: Error: trying to map to address %lx" - " but AWE window end %lx\n", - (ulint)ptr, (ulint)os_awe_window + os_awe_window_size); - ut_a(0); - } - - if (!(page_info >= os_awe_page_info)) { - fprintf(stderr, - "InnoDB: AWE: Error: trying to map page info" - " at %lx but array start %lx\n", - (ulint)page_info, (ulint)os_awe_page_info); - ut_a(0); - } - - if (!(page_info <= os_awe_page_info + (os_awe_n_pages - 4))) { - fprintf(stderr, - "InnoDB: AWE: Error: trying to map page info" - " at %lx but array end %lx\n", - (ulint)page_info, - (ulint)(os_awe_page_info + os_awe_n_pages)); - ut_a(0); - } - - bResult = MapUserPhysicalPages((PVOID)ptr, n_pages, page_info); - - if (bResult != TRUE) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: AWE: Mapping of %lu physical pages" - " to address %lx failed,\n" - "InnoDB: error %lu.\n" - "InnoDB: Cannot continue operation.\n", - n_mem_pages, (ulint)ptr, (ulint)GetLastError()); - exit(1); - } - - return(TRUE); -#else - UT_NOT_USED(ptr); - UT_NOT_USED(n_mem_pages); - UT_NOT_USED(page_info); - - return(FALSE); -#endif -} - -/******************************************************************** -Converts the current process id to a number. It is not guaranteed that the -number is unique. In Linux returns the 'process number' of the current -thread. That number is the same as one sees in 'top', for example. In Linux -the thread id is not the same as one sees in 'top'. */ - -ulint -os_proc_get_number(void) -/*====================*/ -{ -#ifdef __WIN__ - return((ulint)GetCurrentProcessId()); -#else - return((ulint)getpid()); -#endif -} - -/******************************************************************** -Allocates non-cacheable memory. */ - -void* -os_mem_alloc_nocache( -/*=================*/ - /* out: allocated memory */ - ulint n) /* in: number of bytes */ -{ -#ifdef __WIN__ - void* ptr; - - ptr = VirtualAlloc(NULL, n, MEM_COMMIT, - PAGE_READWRITE | PAGE_NOCACHE); - ut_a(ptr); - - return(ptr); -#else - return(ut_malloc(n)); -#endif -} - -/******************************************************************** -Allocates large pages memory. */ - -void* -os_mem_alloc_large( -/*===============*/ - /* out: allocated memory */ - ulint n, /* in: number of bytes */ - ibool set_to_zero, /* in: TRUE if allocated memory - should be set to zero if - UNIV_SET_MEM_TO_ZERO is defined */ - ibool assert_on_error)/* in: if TRUE, we crash mysqld if - the memory cannot be allocated */ -{ -#ifdef HAVE_LARGE_PAGES - ulint size; - int shmid; - void *ptr = NULL; - struct shmid_ds buf; - - if (!os_use_large_pages || !os_large_page_size) { - goto skip; - } - -#ifdef UNIV_LINUX - /* Align block size to os_large_page_size */ - size = ((n - 1) & ~(os_large_page_size - 1)) + os_large_page_size; - - shmid = shmget(IPC_PRIVATE, (size_t)size, SHM_HUGETLB | SHM_R | SHM_W); - if (shmid < 0) { - fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to allocate" - " %lu bytes. errno %d\n", n, errno); - } else { - ptr = shmat(shmid, NULL, 0); - if (ptr == (void *)-1) { - fprintf(stderr, "InnoDB: HugeTLB: Warning: Failed to" - " attach shared memory segment, errno %d\n", - errno); - } - - /* Remove the shared memory segment so that it will be - automatically freed after memory is detached or - process exits */ - shmctl(shmid, IPC_RMID, &buf); - } -#endif - - if (ptr) { - if (set_to_zero) { -#ifdef UNIV_SET_MEM_TO_ZERO - memset(ptr, '\0', size); -#endif - } - - return(ptr); - } - - fprintf(stderr, "InnoDB HugeTLB: Warning: Using conventional" - " memory pool\n"); -skip: -#endif /* HAVE_LARGE_PAGES */ - - return(ut_malloc_low(n, set_to_zero, assert_on_error)); -} - -/******************************************************************** -Frees large pages memory. */ - -void -os_mem_free_large( -/*==============*/ - void *ptr) /* in: number of bytes */ -{ -#ifdef HAVE_LARGE_PAGES - if (os_use_large_pages && os_large_page_size -#ifdef UNIV_LINUX - && !shmdt(ptr) -#endif - ) { - return; - } -#endif - - ut_free(ptr); -} - -/******************************************************************** -Sets the priority boost for threads released from waiting within the current -process. */ - -void -os_process_set_priority_boost( -/*==========================*/ - ibool do_boost) /* in: TRUE if priority boost should be done, - FALSE if not */ -{ -#ifdef __WIN__ - ibool no_boost; - - if (do_boost) { - no_boost = FALSE; - } else { - no_boost = TRUE; - } - -#if TRUE != 1 -# error "TRUE != 1" -#endif - - /* Does not do anything currently! - SetProcessPriorityBoost(GetCurrentProcess(), no_boost); - */ - fputs("Warning: process priority boost setting" - " currently not functional!\n", - stderr); -#else - UT_NOT_USED(do_boost); -#endif -} diff --git a/storage/innobase/os/os0sync.c b/storage/innobase/os/os0sync.c deleted file mode 100644 index 18fd38f3f9b..00000000000 --- a/storage/innobase/os/os0sync.c +++ /dev/null @@ -1,753 +0,0 @@ -/****************************************************** -The interface to the operating system -synchronization primitives. - -(c) 1995 Innobase Oy - -Created 9/6/1995 Heikki Tuuri -*******************************************************/ - -#include "os0sync.h" -#ifdef UNIV_NONINL -#include "os0sync.ic" -#endif - -#ifdef __WIN__ -#include <windows.h> -#endif - -#include "ut0mem.h" -#include "srv0start.h" - -/* Type definition for an operating system mutex struct */ -struct os_mutex_struct{ - os_event_t event; /* Used by sync0arr.c for queing threads */ - void* handle; /* OS handle to mutex */ - ulint count; /* we use this counter to check - that the same thread does not - recursively lock the mutex: we - do not assume that the OS mutex - supports recursive locking, though - NT seems to do that */ - UT_LIST_NODE_T(os_mutex_str_t) os_mutex_list; - /* list of all 'slow' OS mutexes created */ -}; - -/* Mutex protecting counts and the lists of OS mutexes and events */ -os_mutex_t os_sync_mutex; -ibool os_sync_mutex_inited = FALSE; -ibool os_sync_free_called = FALSE; - -/* This is incremented by 1 in os_thread_create and decremented by 1 in -os_thread_exit */ -ulint os_thread_count = 0; - -/* The list of all events created */ -UT_LIST_BASE_NODE_T(os_event_struct_t) os_event_list; - -/* The list of all OS 'slow' mutexes */ -UT_LIST_BASE_NODE_T(os_mutex_str_t) os_mutex_list; - -ulint os_event_count = 0; -ulint os_mutex_count = 0; -ulint os_fast_mutex_count = 0; - -/* Because a mutex is embedded inside an event and there is an -event embedded inside a mutex, on free, this generates a recursive call. -This version of the free event function doesn't acquire the global lock */ -static void os_event_free_internal(os_event_t event); - -/************************************************************* -Initializes global event and OS 'slow' mutex lists. */ - -void -os_sync_init(void) -/*==============*/ -{ - UT_LIST_INIT(os_event_list); - UT_LIST_INIT(os_mutex_list); - - os_sync_mutex = os_mutex_create(NULL); - - os_sync_mutex_inited = TRUE; -} - -/************************************************************* -Frees created events and OS 'slow' mutexes. */ - -void -os_sync_free(void) -/*==============*/ -{ - os_event_t event; - os_mutex_t mutex; - - os_sync_free_called = TRUE; - event = UT_LIST_GET_FIRST(os_event_list); - - while (event) { - - os_event_free(event); - - event = UT_LIST_GET_FIRST(os_event_list); - } - - mutex = UT_LIST_GET_FIRST(os_mutex_list); - - while (mutex) { - if (mutex == os_sync_mutex) { - /* Set the flag to FALSE so that we do not try to - reserve os_sync_mutex any more in remaining freeing - operations in shutdown */ - os_sync_mutex_inited = FALSE; - } - - os_mutex_free(mutex); - - mutex = UT_LIST_GET_FIRST(os_mutex_list); - } - os_sync_free_called = FALSE; -} - -/************************************************************* -Creates an event semaphore, i.e., a semaphore which may just have two -states: signaled and nonsignaled. The created event is manual reset: it -must be reset explicitly by calling sync_os_reset_event. */ - -os_event_t -os_event_create( -/*============*/ - /* out: the event handle */ - const char* name) /* in: the name of the event, if NULL - the event is created without a name */ -{ -#ifdef __WIN__ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - TRUE, /* Manual reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows event semaphore;" - " Windows error %lu\n", - (ulong) GetLastError()); - } -#else /* Unix */ - os_event_t event; - - UT_NOT_USED(name); - - event = ut_malloc(sizeof(struct os_event_struct)); - - os_fast_mutex_init(&(event->os_mutex)); - -#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10) - ut_a(0 == pthread_cond_init(&(event->cond_var), - pthread_condattr_default)); -#else - ut_a(0 == pthread_cond_init(&(event->cond_var), NULL)); -#endif - event->is_set = FALSE; - - /* We return this value in os_event_reset(), which can then be - be used to pass to the os_event_wait_low(). The value of zero - is reserved in os_event_wait_low() for the case when the - caller does not want to pass any signal_count value. To - distinguish between the two cases we initialize signal_count - to 1 here. */ - event->signal_count = 1; -#endif /* __WIN__ */ - - /* The os_sync_mutex can be NULL because during startup an event - can be created [ because it's embedded in the mutex/rwlock ] before - this module has been initialized */ - if (os_sync_mutex != NULL) { - os_mutex_enter(os_sync_mutex); - } - - /* Put to the list of events */ - UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); - - os_event_count++; - - if (os_sync_mutex != NULL) { - os_mutex_exit(os_sync_mutex); - } - - return(event); -} - -#ifdef __WIN__ -/************************************************************* -Creates an auto-reset event semaphore, i.e., an event which is automatically -reset when a single thread is released. Works only in Windows. */ - -os_event_t -os_event_create_auto( -/*=================*/ - /* out: the event handle */ - const char* name) /* in: the name of the event, if NULL - the event is created without a name */ -{ - os_event_t event; - - event = ut_malloc(sizeof(struct os_event_struct)); - - event->handle = CreateEvent(NULL, /* No security attributes */ - FALSE, /* Auto-reset */ - FALSE, /* Initial state nonsignaled */ - (LPCTSTR) name); - - if (!event->handle) { - fprintf(stderr, - "InnoDB: Could not create a Windows auto" - " event semaphore; Windows error %lu\n", - (ulong) GetLastError()); - } - - /* Put to the list of events */ - os_mutex_enter(os_sync_mutex); - - UT_LIST_ADD_FIRST(os_event_list, os_event_list, event); - - os_event_count++; - - os_mutex_exit(os_sync_mutex); - - return(event); -} -#endif - -/************************************************************** -Sets an event semaphore to the signaled state: lets waiting threads -proceed. */ - -void -os_event_set( -/*=========*/ - os_event_t event) /* in: event to set */ -{ -#ifdef __WIN__ - ut_a(event); - ut_a(SetEvent(event->handle)); -#else - ut_a(event); - - os_fast_mutex_lock(&(event->os_mutex)); - - if (event->is_set) { - /* Do nothing */ - } else { - event->is_set = TRUE; - event->signal_count += 1; - ut_a(0 == pthread_cond_broadcast(&(event->cond_var))); - } - - os_fast_mutex_unlock(&(event->os_mutex)); -#endif -} - -/************************************************************** -Resets an event semaphore to the nonsignaled state. Waiting threads will -stop to wait for the event. -The return value should be passed to os_even_wait_low() if it is desired -that this thread should not wait in case of an intervening call to -os_event_set() between this os_event_reset() and the -os_event_wait_low() call. See comments for os_event_wait_low(). */ - -ib_longlong -os_event_reset( -/*===========*/ - /* out: current signal_count. */ - os_event_t event) /* in: event to reset */ -{ - ib_longlong ret = 0; - -#ifdef __WIN__ - ut_a(event); - - ut_a(ResetEvent(event->handle)); -#else - ut_a(event); - - os_fast_mutex_lock(&(event->os_mutex)); - - if (!event->is_set) { - /* Do nothing */ - } else { - event->is_set = FALSE; - } - ret = event->signal_count; - - os_fast_mutex_unlock(&(event->os_mutex)); -#endif - return(ret); -} - -/************************************************************** -Frees an event object, without acquiring the global lock. */ -static -void -os_event_free_internal( -/*===================*/ - os_event_t event) /* in: event to free */ -{ -#ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - /* This is to avoid freeing the mutex twice */ - os_fast_mutex_free(&(event->os_mutex)); - - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); -#endif - /* Remove from the list of events */ - - UT_LIST_REMOVE(os_event_list, os_event_list, event); - - os_event_count--; - - ut_free(event); -} - -/************************************************************** -Frees an event object. */ - -void -os_event_free( -/*==========*/ - os_event_t event) /* in: event to free */ - -{ -#ifdef __WIN__ - ut_a(event); - - ut_a(CloseHandle(event->handle)); -#else - ut_a(event); - - os_fast_mutex_free(&(event->os_mutex)); - ut_a(0 == pthread_cond_destroy(&(event->cond_var))); -#endif - /* Remove from the list of events */ - - os_mutex_enter(os_sync_mutex); - - UT_LIST_REMOVE(os_event_list, os_event_list, event); - - os_event_count--; - - os_mutex_exit(os_sync_mutex); - - ut_free(event); -} - -/************************************************************** -Waits for an event object until it is in the signaled state. If -srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS this also exits the -waiting thread when the event becomes signaled (or immediately if the -event is already in the signaled state). - -Typically, if the event has been signalled after the os_event_reset() -we'll return immediately because event->is_set == TRUE. -There are, however, situations (e.g.: sync_array code) where we may -lose this information. For example: - -thread A calls os_event_reset() -thread B calls os_event_set() [event->is_set == TRUE] -thread C calls os_event_reset() [event->is_set == FALSE] -thread A calls os_event_wait() [infinite wait!] -thread C calls os_event_wait() [infinite wait!] - -Where such a scenario is possible, to avoid infinite wait, the -value returned by os_event_reset() should be passed in as -reset_sig_count. */ - -void -os_event_wait_low( -/*==============*/ - os_event_t event, /* in: event to wait */ - ib_longlong reset_sig_count)/* in: zero or the value - returned by previous call of - os_event_reset(). */ -{ -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - UT_NOT_USED(reset_sig_count); - - /* Specify an infinite time limit for waiting */ - err = WaitForSingleObject(event->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } -#else - ib_longlong old_signal_count; - - os_fast_mutex_lock(&(event->os_mutex)); - - if (reset_sig_count) { - old_signal_count = reset_sig_count; - } else { - old_signal_count = event->signal_count; - } - - for (;;) { - if (event->is_set == TRUE - || event->signal_count != old_signal_count) { - - os_fast_mutex_unlock(&(event->os_mutex)); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - - os_thread_exit(NULL); - } - /* Ok, we may return */ - - return; - } - - pthread_cond_wait(&(event->cond_var), &(event->os_mutex)); - - /* Solaris manual said that spurious wakeups may occur: we - have to check if the event really has been signaled after - we came here to wait */ - } -#endif -} - -/************************************************************** -Waits for an event object until it is in the signaled state or -a timeout is exceeded. In Unix the timeout is always infinite. */ - -ulint -os_event_wait_time( -/*===============*/ - /* out: 0 if success, OS_SYNC_TIME_EXCEEDED if - timeout was exceeded */ - os_event_t event, /* in: event to wait */ - ulint time) /* in: timeout in microseconds, or - OS_SYNC_INFINITE_TIME */ -{ -#ifdef __WIN__ - DWORD err; - - ut_a(event); - - if (time != OS_SYNC_INFINITE_TIME) { - err = WaitForSingleObject(event->handle, (DWORD) time / 1000); - } else { - err = WaitForSingleObject(event->handle, INFINITE); - } - - if (err == WAIT_OBJECT_0) { - - return(0); - } else if (err == WAIT_TIMEOUT) { - - return(OS_SYNC_TIME_EXCEEDED); - } else { - ut_error; - return(1000000); /* dummy value to eliminate compiler warn. */ - } -#else - UT_NOT_USED(time); - - /* In Posix this is just an ordinary, infinite wait */ - - os_event_wait(event); - - return(0); -#endif -} - -#ifdef __WIN__ -/************************************************************** -Waits for any event in an OS native event array. Returns if even a single -one is signaled or becomes signaled. */ - -ulint -os_event_wait_multiple( -/*===================*/ - /* out: index of the event - which was signaled */ - ulint n, /* in: number of events in the - array */ - os_native_event_t* native_event_array) - /* in: pointer to an array of event - handles */ -{ - DWORD index; - - ut_a(native_event_array); - ut_a(n > 0); - - index = WaitForMultipleObjects((DWORD) n, native_event_array, - FALSE, /* Wait for any 1 event */ - INFINITE); /* Infinite wait time - limit */ - ut_a(index >= WAIT_OBJECT_0); /* NOTE: Pointless comparision */ - ut_a(index < WAIT_OBJECT_0 + n); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - os_thread_exit(NULL); - } - - return(index - WAIT_OBJECT_0); -} -#endif - -/************************************************************* -Creates an operating system mutex semaphore. Because these are slow, the -mutex semaphore of InnoDB itself (mutex_t) should be used where possible. */ - -os_mutex_t -os_mutex_create( -/*============*/ - /* out: the mutex handle */ - const char* name) /* in: the name of the mutex, if NULL - the mutex is created without a name */ -{ -#ifdef __WIN__ - HANDLE mutex; - os_mutex_t mutex_str; - - mutex = CreateMutex(NULL, /* No security attributes */ - FALSE, /* Initial state: no owner */ - (LPCTSTR) name); - ut_a(mutex); -#else - os_fast_mutex_t* mutex; - os_mutex_t mutex_str; - - UT_NOT_USED(name); - - mutex = ut_malloc(sizeof(os_fast_mutex_t)); - - os_fast_mutex_init(mutex); -#endif - mutex_str = ut_malloc(sizeof(os_mutex_str_t)); - - mutex_str->handle = mutex; - mutex_str->count = 0; - mutex_str->event = os_event_create(NULL); - - if (os_sync_mutex_inited) { - /* When creating os_sync_mutex itself we cannot reserve it */ - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_ADD_FIRST(os_mutex_list, os_mutex_list, mutex_str); - - os_mutex_count++; - - if (os_sync_mutex_inited) { - os_mutex_exit(os_sync_mutex); - } - - return(mutex_str); -} - -/************************************************************** -Acquires ownership of a mutex semaphore. */ - -void -os_mutex_enter( -/*===========*/ - os_mutex_t mutex) /* in: mutex to acquire */ -{ -#ifdef __WIN__ - DWORD err; - - ut_a(mutex); - - /* Specify infinite time limit for waiting */ - err = WaitForSingleObject(mutex->handle, INFINITE); - - ut_a(err == WAIT_OBJECT_0); - - (mutex->count)++; - ut_a(mutex->count == 1); -#else - os_fast_mutex_lock(mutex->handle); - - (mutex->count)++; - - ut_a(mutex->count == 1); -#endif -} - -/************************************************************** -Releases ownership of a mutex. */ - -void -os_mutex_exit( -/*==========*/ - os_mutex_t mutex) /* in: mutex to release */ -{ - ut_a(mutex); - - ut_a(mutex->count == 1); - - (mutex->count)--; -#ifdef __WIN__ - ut_a(ReleaseMutex(mutex->handle)); -#else - os_fast_mutex_unlock(mutex->handle); -#endif -} - -/************************************************************** -Frees a mutex object. */ - -void -os_mutex_free( -/*==========*/ - os_mutex_t mutex) /* in: mutex to free */ -{ - ut_a(mutex); - - if (!os_sync_free_called) { - os_event_free_internal(mutex->event); - } - - if (os_sync_mutex_inited) { - os_mutex_enter(os_sync_mutex); - } - - UT_LIST_REMOVE(os_mutex_list, os_mutex_list, mutex); - - os_mutex_count--; - - if (os_sync_mutex_inited) { - os_mutex_exit(os_sync_mutex); - } - -#ifdef __WIN__ - ut_a(CloseHandle(mutex->handle)); - - ut_free(mutex); -#else - os_fast_mutex_free(mutex->handle); - ut_free(mutex->handle); - ut_free(mutex); -#endif -} - -/************************************************************* -Initializes an operating system fast mutex semaphore. */ - -void -os_fast_mutex_init( -/*===============*/ - os_fast_mutex_t* fast_mutex) /* in: fast mutex */ -{ -#ifdef __WIN__ - ut_a(fast_mutex); - - InitializeCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else -#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10) - ut_a(0 == pthread_mutex_init(fast_mutex, pthread_mutexattr_default)); -#else - ut_a(0 == pthread_mutex_init(fast_mutex, MY_MUTEX_INIT_FAST)); -#endif -#endif - if (os_sync_mutex_inited) { - /* When creating os_sync_mutex itself (in Unix) we cannot - reserve it */ - - os_mutex_enter(os_sync_mutex); - } - - os_fast_mutex_count++; - - if (os_sync_mutex_inited) { - os_mutex_exit(os_sync_mutex); - } -} - -/************************************************************** -Acquires ownership of a fast mutex. */ - -void -os_fast_mutex_lock( -/*===============*/ - os_fast_mutex_t* fast_mutex) /* in: mutex to acquire */ -{ -#ifdef __WIN__ - EnterCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else - pthread_mutex_lock(fast_mutex); -#endif -} - -/************************************************************** -Releases ownership of a fast mutex. */ - -void -os_fast_mutex_unlock( -/*=================*/ - os_fast_mutex_t* fast_mutex) /* in: mutex to release */ -{ -#ifdef __WIN__ - LeaveCriticalSection(fast_mutex); -#else - pthread_mutex_unlock(fast_mutex); -#endif -} - -/************************************************************** -Frees a mutex object. */ - -void -os_fast_mutex_free( -/*===============*/ - os_fast_mutex_t* fast_mutex) /* in: mutex to free */ -{ -#ifdef __WIN__ - ut_a(fast_mutex); - - DeleteCriticalSection((LPCRITICAL_SECTION) fast_mutex); -#else - int ret; - - ret = pthread_mutex_destroy(fast_mutex); - - if (ret != 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: error: return value %lu when calling\n" - "InnoDB: pthread_mutex_destroy().\n", (ulint)ret); - fprintf(stderr, - "InnoDB: Byte contents of the pthread mutex at %p:\n", - (void*) fast_mutex); - ut_print_buf(stderr, fast_mutex, sizeof(os_fast_mutex_t)); - fprintf(stderr, "\n"); - } -#endif - if (os_sync_mutex_inited) { - /* When freeing the last mutexes, we have - already freed os_sync_mutex */ - - os_mutex_enter(os_sync_mutex); - } - - os_fast_mutex_count--; - - if (os_sync_mutex_inited) { - os_mutex_exit(os_sync_mutex); - } -} diff --git a/storage/innobase/os/os0thread.c b/storage/innobase/os/os0thread.c deleted file mode 100644 index a0b1e51d359..00000000000 --- a/storage/innobase/os/os0thread.c +++ /dev/null @@ -1,358 +0,0 @@ -/****************************************************** -The interface to the operating system thread control primitives - -(c) 1995 Innobase Oy - -Created 9/8/1995 Heikki Tuuri -*******************************************************/ - -#include "os0thread.h" -#ifdef UNIV_NONINL -#include "os0thread.ic" -#endif - -#ifdef __WIN__ -#include <windows.h> -#endif - -#include "srv0srv.h" -#include "os0sync.h" - -/******************************************************************* -Compares two thread ids for equality. */ - -ibool -os_thread_eq( -/*=========*/ - /* out: TRUE if equal */ - os_thread_id_t a, /* in: OS thread or thread id */ - os_thread_id_t b) /* in: OS thread or thread id */ -{ -#ifdef __WIN__ - if (a == b) { - return(TRUE); - } - - return(FALSE); -#else - if (pthread_equal(a, b)) { - return(TRUE); - } - - return(FALSE); -#endif -} - -/******************************************************************** -Converts an OS thread id to a ulint. It is NOT guaranteed that the ulint is -unique for the thread though! */ - -ulint -os_thread_pf( -/*=========*/ - os_thread_id_t a) -{ -#ifdef UNIV_HPUX10 - /* In HP-UX-10.20 a pthread_t is a struct of 3 fields: field1, field2, - field3. We do not know if field1 determines the thread uniquely. */ - - return((ulint)(a.field1)); -#else - return((ulint)a); -#endif -} - -/********************************************************************* -Returns the thread identifier of current thread. Currently the thread -identifier in Unix is the thread handle itself. Note that in HP-UX -pthread_t is a struct of 3 fields. */ - -os_thread_id_t -os_thread_get_curr_id(void) -/*=======================*/ -{ -#ifdef __WIN__ - return(GetCurrentThreadId()); -#else - return(pthread_self()); -#endif -} - -/******************************************************************** -Creates a new thread of execution. The execution starts from -the function given. The start function takes a void* parameter -and returns an ulint. */ - -os_thread_t -os_thread_create( -/*=============*/ - /* out: handle to the thread */ -#ifndef __WIN__ - os_posix_f_t start_f, -#else - ulint (*start_f)(void*), /* in: pointer to function - from which to start */ -#endif - void* arg, /* in: argument to start - function */ - os_thread_id_t* thread_id) /* out: id of the created - thread, or NULL */ -{ -#ifdef __WIN__ - os_thread_t thread; - DWORD win_thread_id; - - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); - - thread = CreateThread(NULL, /* no security attributes */ - 0, /* default size stack */ - (LPTHREAD_START_ROUTINE)start_f, - arg, - 0, /* thread runs immediately */ - &win_thread_id); - - if (srv_set_thread_priorities) { - - /* Set created thread priority the same as a normal query - in MYSQL: we try to prevent starvation of threads by - assigning same priority QUERY_PRIOR to all */ - - ut_a(SetThreadPriority(thread, srv_query_thread_priority)); - } - - if (thread_id) { - *thread_id = win_thread_id; - } - - return(thread); -#else - int ret; - os_thread_t pthread; - pthread_attr_t attr; - -#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)) - pthread_attr_init(&attr); -#endif - -#ifdef UNIV_AIX - /* We must make sure a thread stack is at least 32 kB, otherwise - InnoDB might crash; we do not know if the default stack size on - AIX is always big enough. An empirical test on AIX-4.3 suggested - the size was 96 kB, though. */ - - ret = pthread_attr_setstacksize(&attr, - (size_t)(PTHREAD_STACK_MIN - + 32 * 1024)); - if (ret) { - fprintf(stderr, - "InnoDB: Error: pthread_attr_setstacksize" - " returned %d\n", ret); - exit(1); - } -#endif -#ifdef __NETWARE__ - ret = pthread_attr_setstacksize(&attr, - (size_t) NW_THD_STACKSIZE); - if (ret) { - fprintf(stderr, - "InnoDB: Error: pthread_attr_setstacksize" - " returned %d\n", ret); - exit(1); - } -#endif - os_mutex_enter(os_sync_mutex); - os_thread_count++; - os_mutex_exit(os_sync_mutex); - -#if defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10) - ret = pthread_create(&pthread, pthread_attr_default, start_f, arg); -#else - ret = pthread_create(&pthread, &attr, start_f, arg); -#endif - if (ret) { - fprintf(stderr, - "InnoDB: Error: pthread_create returned %d\n", ret); - exit(1); - } - -#if !(defined(UNIV_HOTBACKUP) && defined(UNIV_HPUX10)) - pthread_attr_destroy(&attr); -#endif - if (srv_set_thread_priorities) { - - my_pthread_setprio(pthread, srv_query_thread_priority); - } - - if (thread_id) { - *thread_id = pthread; - } - - return(pthread); -#endif -} - -/********************************************************************* -Exits the current thread. */ - -void -os_thread_exit( -/*===========*/ - void* exit_value) /* in: exit value; in Windows this void* - is cast as a DWORD */ -{ -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Thread exits, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif - os_mutex_enter(os_sync_mutex); - os_thread_count--; - os_mutex_exit(os_sync_mutex); - -#ifdef __WIN__ - ExitThread((DWORD)exit_value); -#else - pthread_exit(exit_value); -#endif -} - -#ifdef HAVE_PTHREAD_JOIN -int -os_thread_join( -/*===========*/ - os_thread_id_t thread_id) /* in: id of the thread to join */ -{ - return(pthread_join(thread_id, NULL)); -} -#endif -/********************************************************************* -Returns handle to the current thread. */ - -os_thread_t -os_thread_get_curr(void) -/*====================*/ -{ -#ifdef __WIN__ - return(GetCurrentThread()); -#else - return(pthread_self()); -#endif -} - -/********************************************************************* -Advises the os to give up remainder of the thread's time slice. */ - -void -os_thread_yield(void) -/*=================*/ -{ -#if defined(__WIN__) - Sleep(0); -#elif (defined(HAVE_SCHED_YIELD) && defined(HAVE_SCHED_H)) - sched_yield(); -#elif defined(HAVE_PTHREAD_YIELD_ZERO_ARG) - pthread_yield(); -#elif defined(HAVE_PTHREAD_YIELD_ONE_ARG) - pthread_yield(0); -#else - os_thread_sleep(0); -#endif -} - -/********************************************************************* -The thread sleeps at least the time given in microseconds. */ - -void -os_thread_sleep( -/*============*/ - ulint tm) /* in: time in microseconds */ -{ -#ifdef __WIN__ - Sleep((DWORD) tm / 1000); -#elif defined(__NETWARE__) - delay(tm / 1000); -#else - struct timeval t; - - t.tv_sec = tm / 1000000; - t.tv_usec = tm % 1000000; - - select(0, NULL, NULL, NULL, &t); -#endif -} - -/********************************************************************** -Sets a thread priority. */ - -void -os_thread_set_priority( -/*===================*/ - os_thread_t handle, /* in: OS handle to the thread */ - ulint pri) /* in: priority */ -{ -#ifdef __WIN__ - int os_pri; - - if (pri == OS_THREAD_PRIORITY_BACKGROUND) { - os_pri = THREAD_PRIORITY_BELOW_NORMAL; - } else if (pri == OS_THREAD_PRIORITY_NORMAL) { - os_pri = THREAD_PRIORITY_NORMAL; - } else if (pri == OS_THREAD_PRIORITY_ABOVE_NORMAL) { - os_pri = THREAD_PRIORITY_HIGHEST; - } else { - ut_error; - } - - ut_a(SetThreadPriority(handle, os_pri)); -#else - UT_NOT_USED(handle); - UT_NOT_USED(pri); -#endif -} - -/********************************************************************** -Gets a thread priority. */ - -ulint -os_thread_get_priority( -/*===================*/ - /* out: priority */ - os_thread_t handle __attribute__((unused))) - /* in: OS handle to the thread */ -{ -#ifdef __WIN__ - int os_pri; - ulint pri; - - os_pri = GetThreadPriority(handle); - - if (os_pri == THREAD_PRIORITY_BELOW_NORMAL) { - pri = OS_THREAD_PRIORITY_BACKGROUND; - } else if (os_pri == THREAD_PRIORITY_NORMAL) { - pri = OS_THREAD_PRIORITY_NORMAL; - } else if (os_pri == THREAD_PRIORITY_HIGHEST) { - pri = OS_THREAD_PRIORITY_ABOVE_NORMAL; - } else { - ut_error; - } - - return(pri); -#else - return(0); -#endif -} - -/********************************************************************** -Gets the last operating system error code for the calling thread. */ - -ulint -os_thread_get_last_error(void) -/*==========================*/ -{ -#ifdef __WIN__ - return(GetLastError()); -#else - return(0); -#endif -} diff --git a/storage/innobase/page/page0cur.c b/storage/innobase/page/page0cur.c deleted file mode 100644 index 70b7de194fd..00000000000 --- a/storage/innobase/page/page0cur.c +++ /dev/null @@ -1,1510 +0,0 @@ -/************************************************************************ -The page cursor - -(c) 1994-1996 Innobase Oy - -Created 10/4/1994 Heikki Tuuri -*************************************************************************/ - -#include "page0cur.h" -#ifdef UNIV_NONINL -#include "page0cur.ic" -#endif - -#include "rem0cmp.h" -#include "mtr0log.h" -#include "log0recv.h" -#include "rem0cmp.h" -#include "srv0srv.h" -#include "ut0ut.h" - -static ulint page_rnd = 976722341; - -#ifdef PAGE_CUR_ADAPT -# ifdef UNIV_SEARCH_PERF_STAT -ulint page_cur_short_succ = 0; -# endif /* UNIV_SEARCH_PERF_STAT */ - -/*********************************************************************** -This is a linear congruential generator PRNG. Returns a pseudo random -number between 0 and 2^64-1 inclusive. The formula and the constants -being used are: -X[n+1] = (a * X[n] + c) mod m -where: -X[0] = ut_usectime() -a = 1103515245 (3^5 * 5 * 7 * 129749) -c = 12345 (3 * 5 * 823) -m = 18446744073709551616 (2^64) -*/ -#define LCG_a 1103515245 -#define LCG_c 12345 -static -unsigned long long -page_cur_lcg_prng() -/*===============*/ - /* out: number between 0 and 2^64-1 */ -{ - static unsigned long long lcg_current = 0; - static ibool initialized = FALSE; - ulint time_sec; - ulint time_ms; - - if (!initialized) { - ut_usectime(&time_sec, &time_ms); - lcg_current = (unsigned long long) (time_sec * 1000000 - + time_ms); - initialized = TRUE; - } - - /* no need to "% 2^64" explicitly because lcg_current is - 64 bit and this will be done anyway */ - lcg_current = LCG_a * lcg_current + LCG_c; - - return(lcg_current); -} - -/******************************************************************** -Tries a search shortcut based on the last insert. */ -UNIV_INLINE -ibool -page_cur_try_search_shortcut( -/*=========================*/ - /* out: TRUE on success */ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* tuple, /* in: data tuple */ - ulint* iup_matched_fields, - /* in/out: already matched fields in upper - limit record */ - ulint* iup_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - ulint* ilow_matched_fields, - /* in/out: already matched fields in lower - limit record */ - ulint* ilow_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - page_cur_t* cursor) /* out: page cursor */ -{ - rec_t* rec; - rec_t* next_rec; - ulint low_match; - ulint low_bytes; - ulint up_match; - ulint up_bytes; -#ifdef UNIV_SEARCH_DEBUG - page_cur_t cursor2; -#endif - ibool success = FALSE; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(dtuple_check_typed(tuple)); - - rec = page_header_get_ptr(page, PAGE_LAST_INSERT); - offsets = rec_get_offsets(rec, index, offsets, - dtuple_get_n_fields(tuple), &heap); - - ut_ad(rec); - ut_ad(page_rec_is_user_rec(rec)); - - ut_pair_min(&low_match, &low_bytes, - *ilow_matched_fields, *ilow_matched_bytes, - *iup_matched_fields, *iup_matched_bytes); - - up_match = low_match; - up_bytes = low_bytes; - - if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets, - &low_match, &low_bytes) < 0) { - goto exit_func; - } - - next_rec = page_rec_get_next(rec); - offsets = rec_get_offsets(next_rec, index, offsets, - dtuple_get_n_fields(tuple), &heap); - - if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, - &up_match, &up_bytes) >= 0) { - goto exit_func; - } - - cursor->rec = rec; - -#ifdef UNIV_SEARCH_DEBUG - page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG, - iup_matched_fields, - iup_matched_bytes, - ilow_matched_fields, - ilow_matched_bytes, - &cursor2); - ut_a(cursor2.rec == cursor->rec); - - if (next_rec != page_get_supremum_rec(page)) { - - ut_a(*iup_matched_fields == up_match); - ut_a(*iup_matched_bytes == up_bytes); - } - - ut_a(*ilow_matched_fields == low_match); - ut_a(*ilow_matched_bytes == low_bytes); -#endif - if (!page_rec_is_supremum(next_rec)) { - - *iup_matched_fields = up_match; - *iup_matched_bytes = up_bytes; - } - - *ilow_matched_fields = low_match; - *ilow_matched_bytes = low_bytes; - -#ifdef UNIV_SEARCH_PERF_STAT - page_cur_short_succ++; -#endif - success = TRUE; -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(success); -} - -#endif - -#ifdef PAGE_CUR_LE_OR_EXTENDS -/******************************************************************** -Checks if the nth field in a record is a character type field which extends -the nth field in tuple, i.e., the field is longer or equal in length and has -common first characters. */ -static -ibool -page_cur_rec_field_extends( -/*=======================*/ - /* out: TRUE if rec field - extends tuple field */ - dtuple_t* tuple, /* in: data tuple */ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint n) /* in: compare nth field */ -{ - dtype_t* type; - dfield_t* dfield; - byte* rec_f; - ulint rec_f_len; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - dfield = dtuple_get_nth_field(tuple, n); - - type = dfield_get_type(dfield); - - rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len); - - if (type->mtype == DATA_VARCHAR - || type->mtype == DATA_CHAR - || type->mtype == DATA_FIXBINARY - || type->mtype == DATA_BINARY - || type->mtype == DATA_BLOB - || type->mtype == DATA_VARMYSQL - || type->mtype == DATA_MYSQL) { - - if (dfield_get_len(dfield) != UNIV_SQL_NULL - && rec_f_len != UNIV_SQL_NULL - && rec_f_len >= dfield_get_len(dfield) - && !cmp_data_data_slow(type, - dfield_get_data(dfield), - dfield_get_len(dfield), - rec_f, dfield_get_len(dfield))) { - - return(TRUE); - } - } - - return(FALSE); -} -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - -/******************************************************************** -Searches the right position for a page cursor. */ - -void -page_cur_search_with_match( -/*=======================*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* tuple, /* in: data tuple */ - ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, - or PAGE_CUR_GE */ - ulint* iup_matched_fields, - /* in/out: already matched fields in upper - limit record */ - ulint* iup_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - ulint* ilow_matched_fields, - /* in/out: already matched fields in lower - limit record */ - ulint* ilow_matched_bytes, - /* in/out: already matched bytes in a field - not yet completely matched */ - page_cur_t* cursor) /* out: page cursor */ -{ - ulint up; - ulint low; - ulint mid; - page_dir_slot_t* slot; - rec_t* up_rec; - rec_t* low_rec; - rec_t* mid_rec; - ulint up_matched_fields; - ulint up_matched_bytes; - ulint low_matched_fields; - ulint low_matched_bytes; - ulint cur_matched_fields; - ulint cur_matched_bytes; - int cmp; -#ifdef UNIV_SEARCH_DEBUG - int dbg_cmp; - ulint dbg_matched_fields; - ulint dbg_matched_bytes; -#endif - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes - && ilow_matched_fields && ilow_matched_bytes && cursor); - ut_ad(dtuple_validate(tuple)); - ut_ad(dtuple_check_typed(tuple)); -#ifdef UNIV_DEBUG -# ifdef PAGE_CUR_DBG - if (mode != PAGE_CUR_DBG) -# endif /* PAGE_CUR_DBG */ -# ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode != PAGE_CUR_LE_OR_EXTENDS) -# endif /* PAGE_CUR_LE_OR_EXTENDS */ - ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE - || mode == PAGE_CUR_G || mode == PAGE_CUR_GE); -#endif /* UNIV_DEBUG */ - - page_check_dir(page); - -#ifdef PAGE_CUR_ADAPT - if ((page_header_get_field(page, PAGE_LEVEL) == 0) - && (mode == PAGE_CUR_LE) - && (page_header_get_field(page, PAGE_N_DIRECTION) > 3) - && (page_header_get_ptr(page, PAGE_LAST_INSERT)) - && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { - - if (page_cur_try_search_shortcut( - page, index, tuple, - iup_matched_fields, iup_matched_bytes, - ilow_matched_fields, ilow_matched_bytes, - cursor)) { - return; - } - } -# ifdef PAGE_CUR_DBG - if (mode == PAGE_CUR_DBG) { - mode = PAGE_CUR_LE; - } -# endif -#endif - - /* The following flag does not work for non-latin1 char sets because - cmp_full_field does not tell how many bytes matched */ -#ifdef PAGE_CUR_LE_OR_EXTENDS - ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - - /* If mode PAGE_CUR_G is specified, we are trying to position the - cursor to answer a query of the form "tuple < X", where tuple is - the input parameter, and X denotes an arbitrary physical record on - the page. We want to position the cursor on the first X which - satisfies the condition. */ - - up_matched_fields = *iup_matched_fields; - up_matched_bytes = *iup_matched_bytes; - low_matched_fields = *ilow_matched_fields; - low_matched_bytes = *ilow_matched_bytes; - - /* Perform binary search. First the search is done through the page - directory, after that as a linear search in the list of records - owned by the upper limit directory slot. */ - - low = 0; - up = page_dir_get_n_slots(page) - 1; - - /* Perform binary search until the lower and upper limit directory - slots come to the distance 1 of each other */ - - while (up - low > 1) { - mid = (low + up) / 2; - slot = page_dir_get_nth_slot(page, mid); - mid_rec = page_dir_slot_get_rec(slot); - - ut_pair_min(&cur_matched_fields, &cur_matched_bytes, - low_matched_fields, low_matched_bytes, - up_matched_fields, up_matched_bytes); - - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); - - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { -low_slot_match: - low = mid; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - - } else if (UNIV_EXPECT(cmp, -1)) { -#ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode == PAGE_CUR_LE_OR_EXTENDS - && page_cur_rec_field_extends( - tuple, mid_rec, offsets, - cur_matched_fields)) { - - goto low_slot_match; - } -#endif /* PAGE_CUR_LE_OR_EXTENDS */ -up_slot_match: - up = mid; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE -#ifdef PAGE_CUR_LE_OR_EXTENDS - || mode == PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - ) { - - goto low_slot_match; - } else { - - goto up_slot_match; - } - } - - slot = page_dir_get_nth_slot(page, low); - low_rec = page_dir_slot_get_rec(slot); - slot = page_dir_get_nth_slot(page, up); - up_rec = page_dir_slot_get_rec(slot); - - /* Perform linear search until the upper and lower records come to - distance 1 of each other. */ - - while (page_rec_get_next(low_rec) != up_rec) { - - mid_rec = page_rec_get_next(low_rec); - - ut_pair_min(&cur_matched_fields, &cur_matched_bytes, - low_matched_fields, low_matched_bytes, - up_matched_fields, up_matched_bytes); - - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); - - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { -low_rec_match: - low_rec = mid_rec; - low_matched_fields = cur_matched_fields; - low_matched_bytes = cur_matched_bytes; - - } else if (UNIV_EXPECT(cmp, -1)) { -#ifdef PAGE_CUR_LE_OR_EXTENDS - if (mode == PAGE_CUR_LE_OR_EXTENDS - && page_cur_rec_field_extends( - tuple, mid_rec, offsets, - cur_matched_fields)) { - - goto low_rec_match; - } -#endif /* PAGE_CUR_LE_OR_EXTENDS */ -up_rec_match: - up_rec = mid_rec; - up_matched_fields = cur_matched_fields; - up_matched_bytes = cur_matched_bytes; - } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE -#ifdef PAGE_CUR_LE_OR_EXTENDS - || mode == PAGE_CUR_LE_OR_EXTENDS -#endif /* PAGE_CUR_LE_OR_EXTENDS */ - ) { - - goto low_rec_match; - } else { - - goto up_rec_match; - } - } - -#ifdef UNIV_SEARCH_DEBUG - - /* Check that the lower and upper limit records have the - right alphabetical order compared to tuple. */ - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(low_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp >= 0); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp >= 0); - } - - if (low_rec != page_get_infimum_rec(page)) { - - ut_a(low_matched_fields == dbg_matched_fields); - ut_a(low_matched_bytes == dbg_matched_bytes); - } - - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(up_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp == -1); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp == -1); - } - - if (up_rec != page_get_supremum_rec(page)) { - - ut_a(up_matched_fields == dbg_matched_fields); - ut_a(up_matched_bytes == dbg_matched_bytes); - } -#endif - if (mode <= PAGE_CUR_GE) { - cursor->rec = up_rec; - } else { - cursor->rec = low_rec; - } - - *iup_matched_fields = up_matched_fields; - *iup_matched_bytes = up_matched_bytes; - *ilow_matched_fields = low_matched_fields; - *ilow_matched_bytes = low_matched_bytes; - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*************************************************************** -Positions a page cursor on a randomly chosen user record on a page. If there -are no user records, sets the cursor on the infimum record. */ - -void -page_cur_open_on_rnd_user_rec( -/*==========================*/ - page_t* page, /* in: page */ - page_cur_t* cursor) /* in/out: page cursor */ -{ - ulint rnd; - rec_t* rec; - - if (page_get_n_recs(page) == 0) { - page_cur_position(page_get_infimum_rec(page), cursor); - - return; - } - - if (srv_use_legacy_cardinality_algorithm) { - page_rnd += 87584577; - - rnd = page_rnd % page_get_n_recs(page); - } else { - rnd = (ulint) page_cur_lcg_prng() % page_get_n_recs(page); - } - - rec = page_get_infimum_rec(page); - - rec = page_rec_get_next(rec); - - while (rnd > 0) { - rec = page_rec_get_next(rec); - - rnd--; - } - - page_cur_position(rec, cursor); -} - -/*************************************************************** -Writes the log record of a record insert on a page. */ -static -void -page_cur_insert_rec_write_log( -/*==========================*/ - rec_t* insert_rec, /* in: inserted physical record */ - ulint rec_size, /* in: insert_rec size */ - rec_t* cursor_rec, /* in: record the - cursor is pointing to */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - ulint cur_rec_size; - ulint extra_size; - ulint cur_extra_size; - ulint min_rec_size; - byte* ins_ptr; - byte* cur_ptr; - ulint extra_info_yes; - byte* log_ptr; - byte* log_end; - ulint i; - ulint comp; - - ut_a(rec_size < UNIV_PAGE_SIZE); - ut_ad(buf_frame_align(insert_rec) == buf_frame_align(cursor_rec)); - ut_ad(!page_rec_is_comp(insert_rec) - == !dict_table_is_comp(index->table)); - comp = page_rec_is_comp(insert_rec); - - { - mem_heap_t* heap = NULL; - ulint cur_offs_[REC_OFFS_NORMAL_SIZE]; - ulint ins_offs_[REC_OFFS_NORMAL_SIZE]; - - ulint* cur_offs; - ulint* ins_offs; - - *cur_offs_ = (sizeof cur_offs_) / sizeof *cur_offs_; - *ins_offs_ = (sizeof ins_offs_) / sizeof *ins_offs_; - - cur_offs = rec_get_offsets(cursor_rec, index, cur_offs_, - ULINT_UNDEFINED, &heap); - ins_offs = rec_get_offsets(insert_rec, index, ins_offs_, - ULINT_UNDEFINED, &heap); - - extra_size = rec_offs_extra_size(ins_offs); - cur_extra_size = rec_offs_extra_size(cur_offs); - ut_ad(rec_size == rec_offs_size(ins_offs)); - cur_rec_size = rec_offs_size(cur_offs); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - ins_ptr = insert_rec - extra_size; - - i = 0; - - if (cur_extra_size == extra_size) { - min_rec_size = ut_min(cur_rec_size, rec_size); - - cur_ptr = cursor_rec - cur_extra_size; - - /* Find out the first byte in insert_rec which differs from - cursor_rec; skip the bytes in the record info */ - - for (;;) { - if (i >= min_rec_size) { - - break; - } else if (*ins_ptr == *cur_ptr) { - i++; - ins_ptr++; - cur_ptr++; - } else if ((i < extra_size) - && (i >= extra_size - - (comp - ? REC_N_NEW_EXTRA_BYTES - : REC_N_OLD_EXTRA_BYTES))) { - i = extra_size; - ins_ptr = insert_rec; - cur_ptr = cursor_rec; - } else { - break; - } - } - } - - if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { - - log_ptr = mlog_open_and_write_index(mtr, insert_rec, index, - comp - ? MLOG_COMP_REC_INSERT - : MLOG_REC_INSERT, - 2 + 5 + 1 + 5 + 5 - + MLOG_BUF_MARGIN); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash - recovery: in that case mlog_open returns NULL */ - return; - } - - log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; - /* Write the cursor rec offset as a 2-byte ulint */ - mach_write_to_2(log_ptr, cursor_rec - - buf_frame_align(cursor_rec)); - log_ptr += 2; - } else { - log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); - if (!log_ptr) { - /* Logging in mtr is switched off during crash - recovery: in that case mlog_open returns NULL */ - return; - } - log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; - } - - if ((rec_get_info_and_status_bits(insert_rec, comp) - != rec_get_info_and_status_bits(cursor_rec, comp)) - || (extra_size != cur_extra_size) - || (rec_size != cur_rec_size)) { - - extra_info_yes = 1; - } else { - extra_info_yes = 0; - } - - /* Write the record end segment length and the extra info storage - flag */ - log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i) - + extra_info_yes); - if (extra_info_yes) { - /* Write the info bits */ - mach_write_to_1(log_ptr, - rec_get_info_and_status_bits(insert_rec, - comp)); - log_ptr++; - - /* Write the record origin offset */ - log_ptr += mach_write_compressed(log_ptr, extra_size); - - /* Write the mismatch index */ - log_ptr += mach_write_compressed(log_ptr, i); - - ut_a(i < UNIV_PAGE_SIZE); - ut_a(extra_size < UNIV_PAGE_SIZE); - } - - /* Write to the log the inserted index record end segment which - differs from the cursor record */ - - rec_size -= i; - - if (log_ptr + rec_size <= log_end) { - memcpy(log_ptr, ins_ptr, rec_size); - mlog_close(mtr, log_ptr + rec_size); - } else { - mlog_close(mtr, log_ptr); - ut_a(rec_size < UNIV_PAGE_SIZE); - mlog_catenate_string(mtr, ins_ptr, rec_size); - } -} - -/*************************************************************** -Parses a log record of a record insert on a page. */ - -byte* -page_cur_parse_insert_rec( -/*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ulint extra_info_yes; - ulint offset = 0; /* remove warning */ - ulint origin_offset; - ulint end_seg_len; - ulint mismatch_index; - rec_t* cursor_rec; - byte buf1[1024]; - byte* buf; - byte* ptr2 = ptr; - ulint info_and_status_bits = 0; /* remove warning */ - page_cur_t cursor; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - if (!is_short) { - /* Read the cursor rec offset as a 2-byte ulint */ - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - - if (offset >= UNIV_PAGE_SIZE) { - - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - ptr += 2; - } - - ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len); - - if (ptr == NULL) { - - return(NULL); - } - - extra_info_yes = end_seg_len & 0x1UL; - end_seg_len >>= 1; - - if (end_seg_len >= UNIV_PAGE_SIZE) { - recv_sys->found_corrupt_log = TRUE; - - return(NULL); - } - - if (extra_info_yes) { - /* Read the info bits */ - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - info_and_status_bits = mach_read_from_1(ptr); - ptr++; - - ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset); - - if (ptr == NULL) { - - return(NULL); - } - - ut_a(origin_offset < UNIV_PAGE_SIZE); - - ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index); - - if (ptr == NULL) { - - return(NULL); - } - - ut_a(mismatch_index < UNIV_PAGE_SIZE); - } - - if (end_ptr < ptr + end_seg_len) { - - return(NULL); - } - - if (page == NULL) { - - return(ptr + end_seg_len); - } - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - /* Read from the log the inserted index record end segment which - differs from the cursor record */ - - if (is_short) { - cursor_rec = page_rec_get_prev(page_get_supremum_rec(page)); - } else { - cursor_rec = page + offset; - } - - offsets = rec_get_offsets(cursor_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (extra_info_yes == 0) { - info_and_status_bits = rec_get_info_and_status_bits( - cursor_rec, page_is_comp(page)); - origin_offset = rec_offs_extra_size(offsets); - mismatch_index = rec_offs_size(offsets) - end_seg_len; - } - - if (mismatch_index + end_seg_len < sizeof buf1) { - buf = buf1; - } else { - buf = mem_alloc(mismatch_index + end_seg_len); - } - - /* Build the inserted record to buf */ - - if (mismatch_index >= UNIV_PAGE_SIZE) { - fprintf(stderr, - "Is short %lu, info_and_status_bits %lu, offset %lu, " - "o_offset %lu\n" - "mismatch index %lu, end_seg_len %lu\n" - "parsed len %lu\n", - (ulong) is_short, (ulong) info_and_status_bits, - (ulong) offset, - (ulong) origin_offset, - (ulong) mismatch_index, (ulong) end_seg_len, - (ulong) (ptr - ptr2)); - - fputs("Dump of 300 bytes of log:\n", stderr); - ut_print_buf(stderr, ptr2, 300); - - buf_page_print(page); - - ut_error; - } - - ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); - ut_memcpy(buf + mismatch_index, ptr, end_seg_len); - - rec_set_info_and_status_bits(buf + origin_offset, page_is_comp(page), - info_and_status_bits); - - page_cur_position(cursor_rec, &cursor); - - offsets = rec_get_offsets(buf + origin_offset, index, offsets, - ULINT_UNDEFINED, &heap); - page_cur_rec_insert(&cursor, buf + origin_offset, index, offsets, mtr); - - if (buf != buf1) { - - mem_free(buf); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - return(ptr + end_seg_len); -} - -/*************************************************************** -Inserts a record next to page cursor. Returns pointer to inserted record if -succeed, i.e., enough space available, NULL otherwise. The record to be -inserted can be in a data tuple or as a physical record. The other parameter -must then be NULL. The cursor stays at the same position. */ - -rec_t* -page_cur_insert_rec_low( -/*====================*/ - /* out: pointer to record if succeed, NULL - otherwise */ - page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - dict_index_t* index, /* in: record descriptor */ - rec_t* rec, /* in: pointer to a physical record or NULL */ - ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* insert_buf = NULL; - ulint rec_size; - byte* page; /* the relevant page */ - rec_t* last_insert; /* cursor position at previous - insert */ - rec_t* insert_rec; /* inserted record */ - ulint heap_no; /* heap number of the inserted - record */ - rec_t* current_rec; /* current record after which the - new record is inserted */ - rec_t* next_rec; /* next record after current before - the insertion */ - ulint owner_slot; /* the slot which owns the - inserted record */ - rec_t* owner_rec; - ulint n_owned; - mem_heap_t* heap = NULL; - ulint comp; - - ut_ad(cursor && mtr); - ut_ad(tuple || rec); - ut_ad(!(tuple && rec)); - ut_ad(rec || dtuple_check_typed(tuple)); - - page = page_cur_get_page(cursor); - comp = page_is_comp(page); - ut_ad(dict_table_is_comp(index->table) == !!comp); - - ut_ad(cursor->rec != page_get_supremum_rec(page)); - - /* 1. Get the size of the physical record in the page */ - if (tuple != NULL) { - rec_size = rec_get_converted_size(index, tuple); - } else { - if (!offsets) { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - } - ut_ad(rec_offs_validate(rec, index, offsets)); - rec_size = rec_offs_size(offsets); - } - - /* 2. Try to find suitable space from page memory management */ - insert_buf = page_mem_alloc(page, rec_size, index, &heap_no); - - if (insert_buf == NULL) { - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(NULL); - } - - /* 3. Create the record */ - if (tuple != NULL) { - insert_rec = rec_convert_dtuple_to_rec(insert_buf, - index, tuple); - offsets = rec_get_offsets(insert_rec, index, offsets, - ULINT_UNDEFINED, &heap); - } else { - insert_rec = rec_copy(insert_buf, rec, offsets); - ut_ad(rec_offs_validate(rec, index, offsets)); - rec_offs_make_valid(insert_rec, index, offsets); - } - - ut_ad(insert_rec); - ut_ad(rec_size == rec_offs_size(offsets)); - - /* 4. Insert the record in the linked list of records */ - current_rec = cursor->rec; - - ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM); - ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM); - - next_rec = page_rec_get_next(current_rec); - ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM); - page_rec_set_next(insert_rec, next_rec); - page_rec_set_next(current_rec, insert_rec); - - page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page)); - - /* 5. Set the n_owned field in the inserted record to zero, - and set the heap_no field */ - - rec_set_n_owned(insert_rec, comp, 0); - rec_set_heap_no(insert_rec, comp, heap_no); - - /* 6. Update the last insertion info in page header */ - - last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); - ut_ad(!last_insert || !comp - || rec_get_node_ptr_flag(last_insert) - == rec_get_node_ptr_flag(insert_rec)); - - if (last_insert == NULL) { - page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, PAGE_N_DIRECTION, 0); - - } else if ((last_insert == current_rec) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_LEFT)) { - - page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT); - page_header_set_field(page, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - - } else if ((page_rec_get_next(insert_rec) == last_insert) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_RIGHT)) { - - page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT); - page_header_set_field(page, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - } else { - page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, PAGE_N_DIRECTION, 0); - } - - page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec); - - /* 7. It remains to update the owner record. */ - - owner_rec = page_rec_find_owner_rec(insert_rec); - n_owned = rec_get_n_owned(owner_rec, comp); - rec_set_n_owned(owner_rec, comp, n_owned + 1); - - /* 8. Now we have incremented the n_owned field of the owner - record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, - we have to split the corresponding directory slot in two. */ - - if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) { - owner_slot = page_dir_find_owner_slot(owner_rec); - page_dir_split_slot(page, owner_slot); - } - - /* 9. Write log record of the insert */ - page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, - index, mtr); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(insert_rec); -} - -/************************************************************** -Writes a log record of copying a record list end to a new created page. */ -UNIV_INLINE -byte* -page_copy_rec_list_to_created_page_write_log( -/*=========================================*/ - /* out: 4-byte field where to - write the log data length */ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, page, index, - page_is_comp(page) - ? MLOG_COMP_LIST_END_COPY_CREATED - : MLOG_LIST_END_COPY_CREATED, 4); - ut_a(log_ptr); - mlog_close(mtr, log_ptr + 4); - - return(log_ptr); -} - -/************************************************************** -Parses a log record of copying a record list end to a new created page. */ - -byte* -page_parse_copy_rec_list_to_created_page( -/*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - byte* rec_end; - ulint log_data_len; - - if (ptr + 4 > end_ptr) { - - return(NULL); - } - - log_data_len = mach_read_from_4(ptr); - ptr += 4; - - rec_end = ptr + log_data_len; - - if (rec_end > end_ptr) { - - return(NULL); - } - - if (!page) { - - return(rec_end); - } - - while (ptr < rec_end) { - ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, - index, page, mtr); - } - - ut_a(ptr == rec_end); - - page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, PAGE_N_DIRECTION, 0); - - return(rec_end); -} - -/***************************************************************** -Copies records from page to a newly created page, from a given record onward, -including that record. Infimum and supremum records are not copied. */ - -void -page_copy_rec_list_end_to_created_page( -/*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_dir_slot_t* slot = 0; /* remove warning */ - byte* heap_top; - rec_t* insert_rec = 0; /* remove warning */ - rec_t* prev_rec; - ulint count; - ulint n_recs; - ulint slot_index; - ulint rec_size; - ulint log_mode; - byte* log_ptr; - ulint log_data_len; - ulint comp = page_is_comp(page); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(page_dir_get_n_heap(new_page) == 2); - ut_ad(page != new_page); - ut_ad(comp == page_is_comp(new_page)); - - if (rec == page_get_infimum_rec(page)) { - - rec = page_rec_get_next(rec); - } - - if (rec == page_get_supremum_rec(page)) { - - return; - } - -#ifdef UNIV_DEBUG - /* To pass the debug tests we have to set these dummy values - in the debug version */ - page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2); - page_header_set_ptr(new_page, PAGE_HEAP_TOP, - new_page + UNIV_PAGE_SIZE - 1); -#endif - - log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, - index, mtr); - - log_data_len = dyn_array_get_data_size(&(mtr->log)); - - /* Individual inserts are logged in a shorter form */ - - log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); - - prev_rec = page_get_infimum_rec(new_page); - if (comp) { - heap_top = new_page + PAGE_NEW_SUPREMUM_END; - } else { - heap_top = new_page + PAGE_OLD_SUPREMUM_END; - } - count = 0; - slot_index = 0; - n_recs = 0; - - /* should be do ... until, comment by Jani */ - while (rec != page_get_supremum_rec(page)) { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - insert_rec = rec_copy(heap_top, rec, offsets); - - rec_set_next_offs(prev_rec, comp, insert_rec - new_page); - - rec_set_n_owned(insert_rec, comp, 0); - rec_set_heap_no(insert_rec, comp, 2 + n_recs); - - rec_size = rec_offs_size(offsets); - - heap_top = heap_top + rec_size; - - ut_ad(heap_top < new_page + UNIV_PAGE_SIZE); - - count++; - n_recs++; - - if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) { - - slot_index++; - - slot = page_dir_get_nth_slot(new_page, slot_index); - - page_dir_slot_set_rec(slot, insert_rec); - page_dir_slot_set_n_owned(slot, count); - - count = 0; - } - - page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, - index, mtr); - prev_rec = insert_rec; - rec = page_rec_get_next(rec); - } - - if ((slot_index > 0) && (count + 1 - + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2 - <= PAGE_DIR_SLOT_MAX_N_OWNED)) { - /* We can merge the two last dir slots. This operation is - here to make this function imitate exactly the equivalent - task made using page_cur_insert_rec, which we use in database - recovery to reproduce the task performed by this function. - To be able to check the correctness of recovery, it is good - that it imitates exactly. */ - - count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2; - - page_dir_slot_set_n_owned(slot, 0); - - slot_index--; - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; - - ut_a(log_data_len < 100 * UNIV_PAGE_SIZE); - - mach_write_to_4(log_ptr, log_data_len); - - rec_set_next_offs(insert_rec, comp, - comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM); - - slot = page_dir_get_nth_slot(new_page, 1 + slot_index); - - page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); - page_dir_slot_set_n_owned(slot, count + 1); - - page_dir_set_n_slots(new_page, 2 + slot_index); - page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top); - page_dir_set_n_heap(new_page, 2 + n_recs); - page_header_set_field(new_page, PAGE_N_RECS, n_recs); - - page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL); - page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(new_page, PAGE_N_DIRECTION, 0); - - /* Restore the log mode */ - - mtr_set_log_mode(mtr, log_mode); -} - -/*************************************************************** -Writes log record of a record delete on a page. */ -UNIV_INLINE -void -page_cur_delete_rec_write_log( -/*==========================*/ - rec_t* rec, /* in: record to be deleted */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - byte* log_ptr; - - ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, - page_rec_is_comp(rec) - ? MLOG_COMP_REC_DELETE - : MLOG_REC_DELETE, 2); - - if (!log_ptr) { - /* Logging in mtr is switched off during crash recovery: - in that case mlog_open returns NULL */ - return; - } - - /* Write the cursor rec offset as a 2-byte ulint */ - mach_write_to_2(log_ptr, page_offset(rec)); - - mlog_close(mtr, log_ptr + 2); -} - -/*************************************************************** -Parses log record of a record delete on a page. */ - -byte* -page_cur_parse_delete_rec( -/*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ulint offset; - page_cur_t cursor; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - /* Read the cursor rec offset as a 2-byte ulint */ - offset = mach_read_from_2(ptr); - ptr += 2; - - ut_a(offset <= UNIV_PAGE_SIZE); - - if (page) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_t* rec = page + offset; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - page_cur_position(rec, &cursor); - - page_cur_delete_rec(&cursor, index, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - return(ptr); -} - -/*************************************************************** -Deletes a record at the page cursor. The cursor is moved to the next -record after the deleted one. */ - -void -page_cur_delete_rec( -/*================*/ - page_cur_t* cursor, /* in: a page cursor */ - dict_index_t* index, /* in: record descriptor */ - const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */ - mtr_t* mtr) /* in: mini-transaction handle */ -{ - page_dir_slot_t* cur_dir_slot; - page_dir_slot_t* prev_slot; - page_t* page; - rec_t* current_rec; - rec_t* prev_rec = NULL; - rec_t* next_rec; - ulint cur_slot_no; - ulint cur_n_owned; - rec_t* rec; - - ut_ad(cursor && mtr); - - page = page_cur_get_page(cursor); - current_rec = cursor->rec; - ut_ad(rec_offs_validate(current_rec, index, offsets)); - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - /* The record must not be the supremum or infimum record. */ - ut_ad(current_rec != page_get_supremum_rec(page)); - ut_ad(current_rec != page_get_infimum_rec(page)); - - /* Save to local variables some data associated with current_rec */ - cur_slot_no = page_dir_find_owner_slot(current_rec); - cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no); - cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); - - /* 0. Write the log record */ - page_cur_delete_rec_write_log(current_rec, index, mtr); - - /* 1. Reset the last insert info in the page header and increment - the modify clock for the frame */ - - page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); - - /* The page gets invalid for optimistic searches: increment the - frame modify clock */ - - buf_frame_modify_clock_inc(page); - - /* 2. Find the next and the previous record. Note that the cursor is - left at the next record. */ - - ut_ad(cur_slot_no > 0); - prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1); - - rec = page_dir_slot_get_rec(prev_slot); - - /* rec now points to the record of the previous directory slot. Look - for the immediate predecessor of current_rec in a loop. */ - - while(current_rec != rec) { - prev_rec = rec; - rec = page_rec_get_next(rec); - } - - page_cur_move_to_next(cursor); - next_rec = cursor->rec; - - /* 3. Remove the record from the linked list of records */ - - page_rec_set_next(prev_rec, next_rec); - page_header_set_field(page, PAGE_N_RECS, - (ulint)(page_get_n_recs(page) - 1)); - - /* 4. If the deleted record is pointed to by a dir slot, update the - record pointer in slot. In the following if-clause we assume that - prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED - >= 2. */ - -#if PAGE_DIR_SLOT_MIN_N_OWNED < 2 -# error "PAGE_DIR_SLOT_MIN_N_OWNED < 2" -#endif - ut_ad(cur_n_owned > 1); - - if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) { - page_dir_slot_set_rec(cur_dir_slot, prev_rec); - } - - /* 5. Update the number of owned records of the slot */ - - page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1); - - /* 6. Free the memory occupied by the record */ - page_mem_free(page, current_rec, offsets); - - /* 7. Now we have decremented the number of owned records of the slot. - If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the - slots. */ - - if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { - page_dir_balance_slot(page, cur_slot_no); - } -} - -#ifdef UNIV_COMPILE_TEST_FUNCS - -/*********************************************************************** -Print the first n numbers, generated by page_cur_lcg_prng() to make sure -(visually) that it works properly. */ -void -test_page_cur_lcg_prng( -/*===================*/ - int n) /* in: print first n numbers */ -{ - int i; - unsigned long long rnd; - - for (i = 0; i < n; i++) { - rnd = page_cur_lcg_prng(); - printf("%llu\t%%2=%llu %%3=%llu %%5=%llu %%7=%llu %%11=%llu\n", - rnd, - rnd % 2, - rnd % 3, - rnd % 5, - rnd % 7, - rnd % 11); - } -} - -#endif /* UNIV_COMPILE_TEST_FUNCS */ diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c deleted file mode 100644 index 543cf9e34eb..00000000000 --- a/storage/innobase/page/page0page.c +++ /dev/null @@ -1,2038 +0,0 @@ -/****************************************************** -Index page routines - -(c) 1994-1996 Innobase Oy - -Created 2/2/1994 Heikki Tuuri -*******************************************************/ - -#define THIS_MODULE -#include "page0page.h" -#ifdef UNIV_NONINL -#include "page0page.ic" -#endif -#undef THIS_MODULE - -#include "page0cur.h" -#include "lock0lock.h" -#include "fut0lst.h" -#include "btr0sea.h" -#include "buf0buf.h" -#include "srv0srv.h" -#include "btr0btr.h" - -/* THE INDEX PAGE - ============== - -The index page consists of a page header which contains the page's -id and other information. On top of it are the the index records -in a heap linked into a one way linear list according to alphabetic order. - -Just below page end is an array of pointers which we call page directory, -to about every sixth record in the list. The pointers are placed in -the directory in the alphabetical order of the records pointed to, -enabling us to make binary search using the array. Each slot n:o I -in the directory points to a record, where a 4-bit field contains a count -of those records which are in the linear list between pointer I and -the pointer I - 1 in the directory, including the record -pointed to by pointer I and not including the record pointed to by I - 1. -We say that the record pointed to by slot I, or that slot I, owns -these records. The count is always kept in the range 4 to 8, with -the exception that it is 1 for the first slot, and 1--8 for the second slot. - -An essentially binary search can be performed in the list of index -records, like we could do if we had pointer to every record in the -page directory. The data structure is, however, more efficient when -we are doing inserts, because most inserts are just pushed on a heap. -Only every 8th insert requires block move in the directory pointer -table, which itself is quite small. A record is deleted from the page -by just taking it off the linear list and updating the number of owned -records-field of the record which owns it, and updating the page directory, -if necessary. A special case is the one when the record owns itself. -Because the overhead of inserts is so small, we may also increase the -page size from the projected default of 8 kB to 64 kB without too -much loss of efficiency in inserts. Bigger page becomes actual -when the disk transfer rate compared to seek and latency time rises. -On the present system, the page size is set so that the page transfer -time (3 ms) is 20 % of the disk random access time (15 ms). - -When the page is split, merged, or becomes full but contains deleted -records, we have to reorganize the page. - -Assuming a page size of 8 kB, a typical index page of a secondary -index contains 300 index entries, and the size of the page directory -is 50 x 4 bytes = 200 bytes. */ - -/******************************************************************* -Looks for the directory slot which owns the given record. */ - -ulint -page_dir_find_owner_slot( -/*=====================*/ - /* out: the directory slot number */ - rec_t* rec) /* in: the physical record */ -{ - page_t* page; - register uint16 rec_offs_bytes; - register page_dir_slot_t* slot; - register const page_dir_slot_t* first_slot; - register rec_t* r = rec; - - ut_ad(page_rec_check(rec)); - - page = buf_frame_align(rec); - first_slot = page_dir_get_nth_slot(page, 0); - slot = page_dir_get_nth_slot(page, page_dir_get_n_slots(page) - 1); - - if (page_is_comp(page)) { - while (rec_get_n_owned(r, TRUE) == 0) { - r = page + rec_get_next_offs(r, TRUE); - ut_ad(r >= page + PAGE_NEW_SUPREMUM); - ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); - } - } else { - while (rec_get_n_owned(r, FALSE) == 0) { - r = page + rec_get_next_offs(r, FALSE); - ut_ad(r >= page + PAGE_OLD_SUPREMUM); - ut_ad(r < page + (UNIV_PAGE_SIZE - PAGE_DIR)); - } - } - - rec_offs_bytes = mach_encode_2(r - page); - - while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) { - - if (UNIV_UNLIKELY(slot == first_slot)) { - fprintf(stderr, - "InnoDB: Probable data corruption on" - " page %lu\n" - "InnoDB: Original record ", - (ulong) buf_frame_get_page_no(page)); - - if (page_is_comp(page)) { - fputs("(compact record)", stderr); - } else { - rec_print_old(stderr, rec); - } - - fputs("\n" - "InnoDB: on that page.\n" - "InnoDB: Cannot find the dir slot for record ", - stderr); - if (page_is_comp(page)) { - fputs("(compact record)", stderr); - } else { - rec_print_old(stderr, page - + mach_decode_2(rec_offs_bytes)); - } - fputs("\n" - "InnoDB: on that page!\n", stderr); - - buf_page_print(page); - - ut_error; - } - - slot += PAGE_DIR_SLOT_SIZE; - } - - return(((ulint) (first_slot - slot)) / PAGE_DIR_SLOT_SIZE); -} - -/****************************************************************** -Used to check the consistency of a directory slot. */ -static -ibool -page_dir_slot_check( -/*================*/ - /* out: TRUE if succeed */ - page_dir_slot_t* slot) /* in: slot */ -{ - page_t* page; - ulint n_slots; - ulint n_owned; - - ut_a(slot); - - page = buf_frame_align(slot); - - n_slots = page_dir_get_n_slots(page); - - ut_a(slot <= page_dir_get_nth_slot(page, 0)); - ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); - - ut_a(page_rec_check(page_dir_slot_get_rec(slot))); - - n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot), - page_is_comp(page)); - - if (slot == page_dir_get_nth_slot(page, 0)) { - ut_a(n_owned == 1); - } else if (slot == page_dir_get_nth_slot(page, n_slots - 1)) { - ut_a(n_owned >= 1); - ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); - } else { - ut_a(n_owned >= PAGE_DIR_SLOT_MIN_N_OWNED); - ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); - } - - return(TRUE); -} - -/***************************************************************** -Sets the max trx id field value. */ - -void -page_set_max_trx_id( -/*================*/ - page_t* page, /* in: page */ - dulint trx_id) /* in: transaction id */ -{ - buf_block_t* block; - - ut_ad(page); - - block = buf_block_align(page); - - if (block->is_hashed) { - rw_lock_x_lock(&btr_search_latch); - } - - /* It is not necessary to write this change to the redo log, as - during a database recovery we assume that the max trx id of every - page is the maximum trx id assigned before the crash. */ - - mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id); - - if (block->is_hashed) { - rw_lock_x_unlock(&btr_search_latch); - } -} - -/***************************************************************** -Calculates free space if a page is emptied. */ - -ulint -page_get_free_space_of_empty_noninline( -/*===================================*/ - /* out: free space */ - ulint comp) /* in: nonzero=compact page format */ -{ - return(page_get_free_space_of_empty(comp)); -} - -/**************************************************************** -Allocates a block of memory from an index page. */ - -byte* -page_mem_alloc( -/*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - dict_index_t* index, /* in: record descriptor */ - ulint* heap_no)/* out: this contains the heap number - of the allocated record - if allocation succeeds */ -{ - rec_t* rec; - byte* block; - ulint avl_space; - ulint garbage; - - ut_ad(page && heap_no); - - /* If there are records in the free list, look if the first is - big enough */ - - rec = page_header_get_ptr(page, PAGE_FREE); - - if (rec) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (rec_offs_size(offsets) >= need) { - page_header_set_ptr(page, PAGE_FREE, - page_rec_get_next(rec)); - - garbage = page_header_get_field(page, PAGE_GARBAGE); - ut_ad(garbage >= need); - - page_header_set_field(page, PAGE_GARBAGE, - garbage - need); - - *heap_no = rec_get_heap_no(rec, page_is_comp(page)); - - block = rec_get_start(rec, offsets); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(block); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - /* Could not find space from the free list, try top of heap */ - - avl_space = page_get_max_insert_size(page, 1); - - if (avl_space >= need) { - block = page_header_get_ptr(page, PAGE_HEAP_TOP); - - page_header_set_ptr(page, PAGE_HEAP_TOP, block + need); - *heap_no = page_dir_get_n_heap(page); - - page_dir_set_n_heap(page, 1 + *heap_no); - - return(block); - } - - return(NULL); -} - -/************************************************************** -Writes a log record of page creation. */ -UNIV_INLINE -void -page_create_write_log( -/*==================*/ - buf_frame_t* frame, /* in: a buffer frame where the page is - created */ - mtr_t* mtr, /* in: mini-transaction handle */ - ulint comp) /* in: nonzero=compact page format */ -{ - mlog_write_initial_log_record(frame, comp - ? MLOG_COMP_PAGE_CREATE - : MLOG_PAGE_CREATE, mtr); -} - -/*************************************************************** -Parses a redo log record of creating a page. */ - -byte* -page_parse_create( -/*==============*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - ulint comp, /* in: nonzero=compact page format */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - /* The record is empty, except for the record initial part */ - - if (page) { - page_create(page, mtr, comp); - } - - return(ptr); -} - -/************************************************************** -The index page creation function. */ - -page_t* -page_create( -/*========*/ - /* out: pointer to the page */ - buf_frame_t* frame, /* in: a buffer frame where the page is - created */ - mtr_t* mtr, /* in: mini-transaction handle */ - ulint comp) /* in: nonzero=compact page format */ -{ - page_dir_slot_t* slot; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* field; - byte* heap_top; - rec_t* infimum_rec; - rec_t* supremum_rec; - page_t* page; - dict_index_t* index; - ulint* offsets; - - index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1; - - ut_ad(frame && mtr); -#if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA -# error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA" -#endif -#if PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA -# error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA" -#endif - - /* 1. INCREMENT MODIFY CLOCK */ - buf_frame_modify_clock_inc(frame); - - /* 2. WRITE LOG INFORMATION */ - page_create_write_log(frame, mtr, comp); - - page = frame; - - fil_page_set_type(page, FIL_PAGE_INDEX); - - heap = mem_heap_create(200); - - /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */ - - /* Create first a data tuple for infimum record */ - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "infimum", 8); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8); - /* Set the corresponding physical record to its place in the page - record heap */ - - heap_top = page + PAGE_DATA; - - infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); - - ut_a(infimum_rec == page - + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); - - rec_set_n_owned(infimum_rec, comp, 1); - rec_set_heap_no(infimum_rec, comp, 0); - offsets = rec_get_offsets(infimum_rec, index, NULL, - ULINT_UNDEFINED, &heap); - - heap_top = rec_get_end(infimum_rec, offsets); - - /* Create then a tuple for supremum */ - - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "supremum", comp ? 8 : 9); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9); - - supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); - - ut_a(supremum_rec == page - + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)); - - rec_set_n_owned(supremum_rec, comp, 1); - rec_set_heap_no(supremum_rec, comp, 1); - - offsets = rec_get_offsets(supremum_rec, index, offsets, - ULINT_UNDEFINED, &heap); - heap_top = rec_get_end(supremum_rec, offsets); - - ut_ad(heap_top == page - + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); - - mem_heap_free(heap); - - /* 4. INITIALIZE THE PAGE */ - - page_header_set_field(page, PAGE_N_DIR_SLOTS, 2); - page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top); - page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2); - page_header_set_ptr(page, PAGE_FREE, NULL); - page_header_set_field(page, PAGE_GARBAGE, 0); - page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, PAGE_N_DIRECTION, 0); - page_header_set_field(page, PAGE_N_RECS, 0); - page_set_max_trx_id(page, ut_dulint_zero); - memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START - - (heap_top - page)); - - /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */ - - /* Set the slots to point to infimum and supremum. */ - - slot = page_dir_get_nth_slot(page, 0); - page_dir_slot_set_rec(slot, infimum_rec); - - slot = page_dir_get_nth_slot(page, 1); - page_dir_slot_set_rec(slot, supremum_rec); - - /* Set the next pointers in infimum and supremum */ - - rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page)); - rec_set_next_offs(supremum_rec, comp, 0); - - return(page); -} - -/***************************************************************** -Differs from page_copy_rec_list_end, because this function does not -touch the lock table and max trx id on page. */ - -void -page_copy_rec_list_end_no_locks( -/*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t cur1; - page_cur_t cur2; - rec_t* sup; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - page_cur_position(rec, &cur1); - - if (page_cur_is_before_first(&cur1)) { - - page_cur_move_to_next(&cur1); - } - - ut_a((ibool)!!page_is_comp(new_page) - == dict_table_is_comp(index->table)); - ut_a(page_is_comp(new_page) == page_is_comp(page)); - ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) - (page_is_comp(new_page) - ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); - - page_cur_set_before_first(new_page, &cur2); - - /* Copy records from the original page to the new page */ - - sup = page_get_supremum_rec(page); - - for (;;) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); - if (cur1_rec == sup) { - break; - } - offsets = rec_get_offsets(cur1_rec, index, offsets, - ULINT_UNDEFINED, &heap); - if (UNIV_UNLIKELY(!page_cur_rec_insert(&cur2, cur1_rec, index, - offsets, mtr))) { - /* Track an assertion failure reported on the mailing - list on June 18th, 2003 */ - - buf_page_print(new_page); - buf_page_print(page); - ut_print_timestamp(stderr); - - fprintf(stderr, - "InnoDB: rec offset %lu, cur1 offset %lu," - " cur2 offset %lu\n", - (ulong)(rec - page), - (ulong)(page_cur_get_rec(&cur1) - page), - (ulong)(page_cur_get_rec(&cur2) - new_page)); - - ut_error; - } - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/***************************************************************** -Copies records from page to new_page, from a given record onward, -including that record. Infimum and supremum records are not copied. -The records are copied to the start of the record list on new_page. */ - -void -page_copy_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - if (page_dir_get_n_heap(new_page) == 2) { - page_copy_rec_list_end_to_created_page(new_page, page, rec, - index, mtr); - } else { - page_copy_rec_list_end_no_locks(new_page, page, rec, - index, mtr); - } - - /* Update the lock table, MAX_TRX_ID, and possible hash index */ - - lock_move_rec_list_end(new_page, page, rec); - - page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - - btr_search_move_or_delete_hash_entries(new_page, page, index); -} - -/***************************************************************** -Copies records from page to new_page, up to the given record, -NOT including that record. Infimum and supremum records are not copied. -The records are copied to the end of the record list on new_page. */ - -void -page_copy_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t cur1; - page_cur_t cur2; - rec_t* old_end; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - page_cur_set_before_first(page, &cur1); - - if (rec == page_cur_get_rec(&cur1)) { - - return; - } - - page_cur_move_to_next(&cur1); - - page_cur_set_after_last(new_page, &cur2); - page_cur_move_to_prev(&cur2); - old_end = page_cur_get_rec(&cur2); - - /* Copy records from the original page to the new page */ - - while (page_cur_get_rec(&cur1) != rec) { - rec_t* ins_rec; - rec_t* cur1_rec = page_cur_get_rec(&cur1); - offsets = rec_get_offsets(cur1_rec, index, offsets, - ULINT_UNDEFINED, &heap); - ins_rec = page_cur_rec_insert(&cur2, cur1_rec, index, - offsets, mtr); - ut_a(ins_rec); - - page_cur_move_to_next(&cur1); - page_cur_move_to_next(&cur2); - } - - /* Update the lock table, MAX_TRX_ID, and possible hash index */ - - lock_move_rec_list_start(new_page, page, rec, old_end); - - page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - - btr_search_move_or_delete_hash_entries(new_page, page, index); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/************************************************************** -Writes a log record of a record list end or start deletion. */ -UNIV_INLINE -void -page_delete_rec_list_write_log( -/*===========================*/ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - byte type, /* in: operation type: - MLOG_LIST_END_DELETE, ... */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - ut_ad(type == MLOG_LIST_END_DELETE - || type == MLOG_LIST_START_DELETE - || type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE); - - log_ptr = mlog_open_and_write_index(mtr, rec, index, type, 2); - if (log_ptr) { - /* Write the parameter as a 2-byte ulint */ - mach_write_to_2(log_ptr, page_offset(rec)); - mlog_close(mtr, log_ptr + 2); - } -} - -/************************************************************** -Parses a log record of a record list end or start deletion. */ - -byte* -page_parse_delete_rec_list( -/*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE, - MLOG_LIST_START_DELETE, - MLOG_COMP_LIST_END_DELETE or - MLOG_COMP_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - dict_index_t* index, /* in: record descriptor */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ulint offset; - - ut_ad(type == MLOG_LIST_END_DELETE - || type == MLOG_LIST_START_DELETE - || type == MLOG_COMP_LIST_END_DELETE - || type == MLOG_COMP_LIST_START_DELETE); - - /* Read the record offset as a 2-byte ulint */ - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - offset = mach_read_from_2(ptr); - ptr += 2; - - if (!page) { - - return(ptr); - } - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - if (type == MLOG_LIST_END_DELETE - || type == MLOG_COMP_LIST_END_DELETE) { - page_delete_rec_list_end(page, page + offset, index, - ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); - } else { - page_delete_rec_list_start(page, page + offset, index, mtr); - } - - return(ptr); -} - -/***************************************************************** -Deletes records from a page from a given record onward, including that record. -The infimum and supremum records are not deleted. */ - -void -page_delete_rec_list_end( -/*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - ulint n_recs, /* in: number of records to delete, - or ULINT_UNDEFINED if not known */ - ulint size, /* in: the sum of the sizes of the - records in the end of the chain to - delete, or ULINT_UNDEFINED if not known */ - mtr_t* mtr) /* in: mtr */ -{ - page_dir_slot_t* slot; - ulint slot_index; - rec_t* last_rec; - rec_t* prev_rec; - rec_t* free; - rec_t* rec2; - ulint count; - ulint n_owned; - rec_t* sup; - ulint comp; - - /* Reset the last insert info in the page header and increment - the modify clock for the frame */ - - ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE); - page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); - - /* The page gets invalid for optimistic searches: increment the - frame modify clock */ - - buf_frame_modify_clock_inc(page); - - sup = page_get_supremum_rec(page); - - comp = page_is_comp(page); - if (page_rec_is_infimum_low(rec - page)) { - rec = page_rec_get_next(rec); - } - - page_delete_rec_list_write_log(rec, index, comp - ? MLOG_COMP_LIST_END_DELETE - : MLOG_LIST_END_DELETE, mtr); - - if (rec == sup) { - - return; - } - - prev_rec = page_rec_get_prev(rec); - - last_rec = page_rec_get_prev(sup); - - if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - /* Calculate the sum of sizes and the number of records */ - size = 0; - n_recs = 0; - rec2 = rec; - - while (rec2 != sup) { - ulint s; - offsets = rec_get_offsets(rec2, index, offsets, - ULINT_UNDEFINED, &heap); - s = rec_offs_size(offsets); - ut_ad(rec2 - page + s - rec_offs_extra_size(offsets) - < UNIV_PAGE_SIZE); - ut_ad(size + s < UNIV_PAGE_SIZE); - size += s; - n_recs++; - - rec2 = page_rec_get_next(rec2); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - ut_ad(size < UNIV_PAGE_SIZE); - - /* Update the page directory; there is no need to balance the number - of the records owned by the supremum record, as it is allowed to be - less than PAGE_DIR_SLOT_MIN_N_OWNED */ - - rec2 = rec; - count = 0; - - while (rec_get_n_owned(rec2, comp) == 0) { - count++; - - rec2 = page_rec_get_next(rec2); - } - - ut_ad(rec_get_n_owned(rec2, comp) - count > 0); - - n_owned = rec_get_n_owned(rec2, comp) - count; - - slot_index = page_dir_find_owner_slot(rec2); - slot = page_dir_get_nth_slot(page, slot_index); - - page_dir_slot_set_rec(slot, sup); - page_dir_slot_set_n_owned(slot, n_owned); - - page_dir_set_n_slots(page, slot_index + 1); - - /* Remove the record chain segment from the record chain */ - page_rec_set_next(prev_rec, page_get_supremum_rec(page)); - - /* Catenate the deleted chain segment to the page free list */ - - free = page_header_get_ptr(page, PAGE_FREE); - - page_rec_set_next(last_rec, free); - page_header_set_ptr(page, PAGE_FREE, rec); - - page_header_set_field(page, PAGE_GARBAGE, size - + page_header_get_field(page, PAGE_GARBAGE)); - - page_header_set_field(page, PAGE_N_RECS, - (ulint)(page_get_n_recs(page) - n_recs)); -} - -/***************************************************************** -Deletes records from page, up to the given record, NOT including -that record. Infimum and supremum records are not deleted. */ - -void -page_delete_rec_list_start( -/*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_cur_t cur1; - ulint log_mode; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - mem_heap_t* heap = NULL; - byte type; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - - if (page_is_comp(page)) { - type = MLOG_COMP_LIST_START_DELETE; - } else { - type = MLOG_LIST_START_DELETE; - } - - page_delete_rec_list_write_log(rec, index, type, mtr); - - page_cur_set_before_first(page, &cur1); - - if (rec == page_cur_get_rec(&cur1)) { - - return; - } - - page_cur_move_to_next(&cur1); - - /* Individual deletes are not logged */ - - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); - - while (page_cur_get_rec(&cur1) != rec) { - offsets = rec_get_offsets(page_cur_get_rec(&cur1), index, - offsets, ULINT_UNDEFINED, &heap); - page_cur_delete_rec(&cur1, index, offsets, mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Restore log mode */ - - mtr_set_log_mode(mtr, log_mode); -} - -/***************************************************************** -Moves record list end to another page. Moved records include -split_rec. */ - -void -page_move_rec_list_end( -/*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - ulint old_data_size; - ulint new_data_size; - ulint old_n_recs; - ulint new_n_recs; - - old_data_size = page_get_data_size(new_page); - old_n_recs = page_get_n_recs(new_page); - - page_copy_rec_list_end(new_page, page, split_rec, index, mtr); - - new_data_size = page_get_data_size(new_page); - new_n_recs = page_get_n_recs(new_page); - - ut_ad(new_data_size >= old_data_size); - - page_delete_rec_list_end(page, split_rec, index, - new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); -} - -/***************************************************************** -Moves record list start to another page. Moved records do not include -split_rec. */ - -void -page_move_rec_list_start( -/*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - dict_index_t* index, /* in: record descriptor */ - mtr_t* mtr) /* in: mtr */ -{ - page_copy_rec_list_start(new_page, page, split_rec, index, mtr); - - page_delete_rec_list_start(page, split_rec, index, mtr); -} - -/*************************************************************************** -This is a low-level operation which is used in a database index creation -to update the page number of a created B-tree to a data dictionary record. */ - -void -page_rec_write_index_page_no( -/*=========================*/ - rec_t* rec, /* in: record to update */ - ulint i, /* in: index of the field to update */ - ulint page_no,/* in: value to write */ - mtr_t* mtr) /* in: mtr */ -{ - byte* data; - ulint len; - - data = rec_get_nth_field_old(rec, i, &len); - - ut_ad(len == 4); - - mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr); -} - -/****************************************************************** -Used to delete n slots from the directory. This function updates -also n_owned fields in the records, so that the first slot after -the deleted ones inherits the records of the deleted slots. */ -UNIV_INLINE -void -page_dir_delete_slots( -/*==================*/ - page_t* page, /* in: the index page */ - ulint start, /* in: first slot to be deleted */ - ulint n) /* in: number of slots to delete (currently - only n == 1 allowed) */ -{ - page_dir_slot_t* slot; - ulint i; - ulint sum_owned = 0; - ulint n_slots; - rec_t* rec; - - ut_ad(n == 1); - ut_ad(start > 0); - ut_ad(start + n < page_dir_get_n_slots(page)); - - n_slots = page_dir_get_n_slots(page); - - /* 1. Reset the n_owned fields of the slots to be - deleted */ - for (i = start; i < start + n; i++) { - slot = page_dir_get_nth_slot(page, i); - sum_owned += page_dir_slot_get_n_owned(slot); - page_dir_slot_set_n_owned(slot, 0); - } - - /* 2. Update the n_owned value of the first non-deleted slot */ - - slot = page_dir_get_nth_slot(page, start + n); - page_dir_slot_set_n_owned(slot, - sum_owned + page_dir_slot_get_n_owned(slot)); - - /* 3. Destroy start and other slots by copying slots */ - for (i = start + n; i < n_slots; i++) { - slot = page_dir_get_nth_slot(page, i); - rec = page_dir_slot_get_rec(slot); - - slot = page_dir_get_nth_slot(page, i - n); - page_dir_slot_set_rec(slot, rec); - } - - /* 4. Update the page header */ - page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n); -} - -/****************************************************************** -Used to add n slots to the directory. Does not set the record pointers -in the added slots or update n_owned values: this is the responsibility -of the caller. */ -UNIV_INLINE -void -page_dir_add_slots( -/*===============*/ - page_t* page, /* in: the index page */ - ulint start, /* in: the slot above which the new slots are added */ - ulint n) /* in: number of slots to add (currently only n == 1 - allowed) */ -{ - page_dir_slot_t* slot; - ulint n_slots; - ulint i; - rec_t* rec; - - ut_ad(n == 1); - - n_slots = page_dir_get_n_slots(page); - - ut_ad(start < n_slots - 1); - - /* Update the page header */ - page_dir_set_n_slots(page, n_slots + n); - - /* Move slots up */ - - for (i = n_slots - 1; i > start; i--) { - - slot = page_dir_get_nth_slot(page, i); - rec = page_dir_slot_get_rec(slot); - - slot = page_dir_get_nth_slot(page, i + n); - page_dir_slot_set_rec(slot, rec); - } -} - -/******************************************************************** -Splits a directory slot which owns too many records. */ - -void -page_dir_split_slot( -/*================*/ - page_t* page, /* in: the index page in question */ - ulint slot_no) /* in: the directory slot */ -{ - rec_t* rec; - page_dir_slot_t* new_slot; - page_dir_slot_t* prev_slot; - page_dir_slot_t* slot; - ulint i; - ulint n_owned; - - ut_ad(page); - ut_ad(slot_no > 0); - - slot = page_dir_get_nth_slot(page, slot_no); - - n_owned = page_dir_slot_get_n_owned(slot); - ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1); - - /* 1. We loop to find a record approximately in the middle of the - records owned by the slot. */ - - prev_slot = page_dir_get_nth_slot(page, slot_no - 1); - rec = page_dir_slot_get_rec(prev_slot); - - for (i = 0; i < n_owned / 2; i++) { - rec = page_rec_get_next(rec); - } - - ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED); - - /* 2. We add one directory slot immediately below the slot to be - split. */ - - page_dir_add_slots(page, slot_no - 1, 1); - - /* The added slot is now number slot_no, and the old slot is - now number slot_no + 1 */ - - new_slot = page_dir_get_nth_slot(page, slot_no); - slot = page_dir_get_nth_slot(page, slot_no + 1); - - /* 3. We store the appropriate values to the new slot. */ - - page_dir_slot_set_rec(new_slot, rec); - page_dir_slot_set_n_owned(new_slot, n_owned / 2); - - /* 4. Finally, we update the number of records field of the - original slot */ - - page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2)); -} - -/***************************************************************** -Tries to balance the given directory slot with too few records with the upper -neighbor, so that there are at least the minimum number of records owned by -the slot; this may result in the merging of two slots. */ - -void -page_dir_balance_slot( -/*==================*/ - page_t* page, /* in: index page */ - ulint slot_no) /* in: the directory slot */ -{ - page_dir_slot_t* slot; - page_dir_slot_t* up_slot; - ulint n_owned; - ulint up_n_owned; - rec_t* old_rec; - rec_t* new_rec; - - ut_ad(page); - ut_ad(slot_no > 0); - - slot = page_dir_get_nth_slot(page, slot_no); - - /* The last directory slot cannot be balanced with the upper - neighbor, as there is none. */ - - if (slot_no == page_dir_get_n_slots(page) - 1) { - - return; - } - - up_slot = page_dir_get_nth_slot(page, slot_no + 1); - - n_owned = page_dir_slot_get_n_owned(slot); - up_n_owned = page_dir_slot_get_n_owned(up_slot); - - ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1); - - /* If the upper slot has the minimum value of n_owned, we will merge - the two slots, therefore we assert: */ - ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED); - - if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) { - - /* In this case we can just transfer one record owned - by the upper slot to the property of the lower slot */ - old_rec = page_dir_slot_get_rec(slot); - new_rec = page_rec_get_next(old_rec); - - rec_set_n_owned(old_rec, page_is_comp(page), 0); - rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1); - - page_dir_slot_set_rec(slot, new_rec); - - page_dir_slot_set_n_owned(up_slot, up_n_owned -1); - } else { - /* In this case we may merge the two slots */ - page_dir_delete_slots(page, slot_no, 1); - } -} - -/**************************************************************** -Returns the middle record of the record list. If there are an even number -of records in the list, returns the first record of the upper half-list. */ - -rec_t* -page_get_middle_rec( -/*================*/ - /* out: middle record */ - page_t* page) /* in: page */ -{ - page_dir_slot_t* slot; - ulint middle; - ulint i; - ulint n_owned; - ulint count; - rec_t* rec; - - /* This many records we must leave behind */ - middle = (page_get_n_recs(page) + 2) / 2; - - count = 0; - - for (i = 0;; i++) { - - slot = page_dir_get_nth_slot(page, i); - n_owned = page_dir_slot_get_n_owned(slot); - - if (count + n_owned > middle) { - break; - } else { - count += n_owned; - } - } - - ut_ad(i > 0); - slot = page_dir_get_nth_slot(page, i - 1); - rec = page_dir_slot_get_rec(slot); - rec = page_rec_get_next(rec); - - /* There are now count records behind rec */ - - for (i = 0; i < middle - count; i++) { - rec = page_rec_get_next(rec); - } - - return(rec); -} - -/******************************************************************* -Returns the number of records before the given record in chain. -The number includes infimum and supremum records. */ - -ulint -page_rec_get_n_recs_before( -/*=======================*/ - /* out: number of records */ - rec_t* rec) /* in: the physical record */ -{ - page_dir_slot_t* slot; - rec_t* slot_rec; - page_t* page; - ulint i; - ulint comp; - lint n = 0; - - ut_ad(page_rec_check(rec)); - - page = buf_frame_align(rec); - comp = page_is_comp(page); - - while (rec_get_n_owned(rec, comp) == 0) { - - rec = page_rec_get_next(rec); - n--; - } - - for (i = 0; ; i++) { - slot = page_dir_get_nth_slot(page, i); - slot_rec = page_dir_slot_get_rec(slot); - - n += rec_get_n_owned(slot_rec, comp); - - if (rec == slot_rec) { - - break; - } - } - - n--; - - ut_ad(n >= 0); - - return((ulint) n); -} - -/**************************************************************** -Prints record contents including the data relevant only in -the index page context. */ - -void -page_rec_print( -/*===========*/ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: record descriptor */ -{ - ulint comp = page_is_comp(buf_frame_align(rec)); - - ut_a(!comp == !rec_offs_comp(offsets)); - rec_print_new(stderr, rec, offsets); - fprintf(stderr, - " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned(rec, comp), - (ulong) rec_get_heap_no(rec, comp), - (ulong) rec_get_next_offs(rec, comp)); - - page_rec_check(rec); - rec_validate(rec, offsets); -} - -/******************************************************************* -This is used to print the contents of the directory for -debugging purposes. */ - -void -page_dir_print( -/*===========*/ - page_t* page, /* in: index page */ - ulint pr_n) /* in: print n first and n last entries */ -{ - ulint n; - ulint i; - page_dir_slot_t* slot; - - n = page_dir_get_n_slots(page); - - fprintf(stderr, "--------------------------------\n" - "PAGE DIRECTORY\n" - "Page address %p\n" - "Directory stack top at offs: %lu; number of slots: %lu\n", - page, (ulong)(page_dir_get_nth_slot(page, n - 1) - page), - (ulong) n); - for (i = 0; i < n; i++) { - slot = page_dir_get_nth_slot(page, i); - if ((i == pr_n) && (i < n - pr_n)) { - fputs(" ... \n", stderr); - } - if ((i < pr_n) || (i >= n - pr_n)) { - fprintf(stderr, - "Contents of slot: %lu: n_owned: %lu," - " rec offs: %lu\n", - (ulong) i, - (ulong) page_dir_slot_get_n_owned(slot), - (ulong)(page_dir_slot_get_rec(slot) - page)); - } - } - fprintf(stderr, "Total of %lu records\n" - "--------------------------------\n", - (ulong) (2 + page_get_n_recs(page))); -} - -/******************************************************************* -This is used to print the contents of the page record list for -debugging purposes. */ - -void -page_print_list( -/*============*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: dictionary index of the page */ - ulint pr_n) /* in: print n first and n last entries */ -{ - page_cur_t cur; - ulint count; - ulint n_recs; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); - - fprintf(stderr, - "--------------------------------\n" - "PAGE RECORD LIST\n" - "Page address %p\n", page); - - n_recs = page_get_n_recs(page); - - page_cur_set_before_first(page, &cur); - count = 0; - for (;;) { - offsets = rec_get_offsets(cur.rec, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(cur.rec, offsets); - - if (count == pr_n) { - break; - } - if (page_cur_is_after_last(&cur)) { - break; - } - page_cur_move_to_next(&cur); - count++; - } - - if (n_recs > 2 * pr_n) { - fputs(" ... \n", stderr); - } - - while (!page_cur_is_after_last(&cur)) { - page_cur_move_to_next(&cur); - - if (count + pr_n >= n_recs) { - offsets = rec_get_offsets(cur.rec, index, offsets, - ULINT_UNDEFINED, &heap); - page_rec_print(cur.rec, offsets); - } - count++; - } - - fprintf(stderr, - "Total of %lu records \n" - "--------------------------------\n", - (ulong) (count + 1)); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/******************************************************************* -Prints the info in a page header. */ - -void -page_header_print( -/*==============*/ - page_t* page) -{ - fprintf(stderr, - "--------------------------------\n" - "PAGE HEADER INFO\n" - "Page address %p, n records %lu (%s)\n" - "n dir slots %lu, heap top %lu\n" - "Page n heap %lu, free %lu, garbage %lu\n" - "Page last insert %lu, direction %lu, n direction %lu\n", - page, (ulong) page_header_get_field(page, PAGE_N_RECS), - page_is_comp(page) ? "compact format" : "original format", - (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS), - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) page_dir_get_n_heap(page), - (ulong) page_header_get_field(page, PAGE_FREE), - (ulong) page_header_get_field(page, PAGE_GARBAGE), - (ulong) page_header_get_field(page, PAGE_LAST_INSERT), - (ulong) page_header_get_field(page, PAGE_DIRECTION), - (ulong) page_header_get_field(page, PAGE_N_DIRECTION)); -} - -/******************************************************************* -This is used to print the contents of the page for -debugging purposes. */ - -void -page_print( -/*=======*/ - page_t* page, /* in: index page */ - dict_index_t* index, /* in: dictionary index of the page */ - ulint dn, /* in: print dn first and last entries - in directory */ - ulint rn) /* in: print rn first and last records - in directory */ -{ - page_header_print(page); - page_dir_print(page, dn); - page_print_list(page, index, rn); -} - -/******************************************************************* -The following is used to validate a record on a page. This function -differs from rec_validate as it can also check the n_owned field and -the heap_no field. */ - -ibool -page_rec_validate( -/*==============*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint n_owned; - ulint heap_no; - page_t* page; - ulint comp; - - page = buf_frame_align(rec); - comp = page_is_comp(page); - ut_a(!comp == !rec_offs_comp(offsets)); - - page_rec_check(rec); - rec_validate(rec, offsets); - - n_owned = rec_get_n_owned(rec, comp); - heap_no = rec_get_heap_no(rec, comp); - - if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) { - fprintf(stderr, - "InnoDB: Dir slot of rec %lu, n owned too big %lu\n", - (ulong)(rec - page), (ulong) n_owned); - return(FALSE); - } - - if (!(heap_no < page_dir_get_n_heap(page))) { - fprintf(stderr, - "InnoDB: Heap no of rec %lu too big %lu %lu\n", - (ulong)(rec - page), (ulong) heap_no, - (ulong) page_dir_get_n_heap(page)); - return(FALSE); - } - - return(TRUE); -} - -/******************************************************************* -Checks that the first directory slot points to the infimum record and -the last to the supremum. This function is intended to track if the -bug fixed in 4.0.14 has caused corruption to users' databases. */ - -void -page_check_dir( -/*===========*/ - page_t* page) /* in: index page */ -{ - ulint n_slots; - - n_slots = page_dir_get_n_slots(page); - - if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, 0)) - != page_get_infimum_rec(page)) { - - fprintf(stderr, - "InnoDB: Page directory corruption:" - " infimum not pointed to\n"); - buf_page_print(page); - } - - if (page_dir_slot_get_rec(page_dir_get_nth_slot(page, n_slots - 1)) - != page_get_supremum_rec(page)) { - - fprintf(stderr, - "InnoDB: Page directory corruption:" - " supremum not pointed to\n"); - buf_page_print(page); - } -} - -/******************************************************************* -This function checks the consistency of an index page when we do not -know the index. This is also resilient so that this should never crash -even if the page is total garbage. */ - -ibool -page_simple_validate( -/*=================*/ - /* out: TRUE if ok */ - page_t* page) /* in: index page */ -{ - page_cur_t cur; - page_dir_slot_t* slot; - ulint slot_no; - ulint n_slots; - rec_t* rec; - byte* rec_heap_top; - ulint count; - ulint own_count; - ibool ret = FALSE; - ulint comp = page_is_comp(page); - - /* Check first that the record heap and the directory do not - overlap. */ - - n_slots = page_dir_get_n_slots(page); - - if (n_slots > UNIV_PAGE_SIZE / 4) { - fprintf(stderr, - "InnoDB: Nonsensical number %lu of page dir slots\n", - (ulong) n_slots); - - goto func_exit; - } - - rec_heap_top = page_header_get_ptr(page, PAGE_HEAP_TOP); - - if (rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1)) { - - fprintf(stderr, - "InnoDB: Record heap and dir overlap on a page," - " heap top %lu, dir %lu\n", - (ulong) - (page_header_get_ptr(page, PAGE_HEAP_TOP) - page), - (ulong) - (page_dir_get_nth_slot(page, n_slots - 1) - page)); - - goto func_exit; - } - - /* Validate the record list in a loop checking also that it is - consistent with the page record directory. */ - - count = 0; - own_count = 1; - slot_no = 0; - slot = page_dir_get_nth_slot(page, slot_no); - - page_cur_set_before_first(page, &cur); - - for (;;) { - rec = (&cur)->rec; - - if (rec > rec_heap_top) { - fprintf(stderr, - "InnoDB: Record %lu is above" - " rec heap top %lu\n", - (ulong)(rec - page), - (ulong)(rec_heap_top - page)); - - goto func_exit; - } - - if (rec_get_n_owned(rec, comp) != 0) { - /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec, comp) != own_count) { - - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu," - " rec %lu\n", - (ulong) rec_get_n_owned(rec, comp), - (ulong) own_count, - (ulong)(rec - page)); - - goto func_exit; - } - - if (page_dir_slot_get_rec(slot) != rec) { - fprintf(stderr, - "InnoDB: Dir slot does not point" - " to right rec %lu\n", - (ulong)(rec - page)); - - goto func_exit; - } - - own_count = 0; - - if (!page_cur_is_after_last(&cur)) { - slot_no++; - slot = page_dir_get_nth_slot(page, slot_no); - } - } - - if (page_cur_is_after_last(&cur)) { - - break; - } - - if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA - || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Next record offset" - " nonsensical %lu for rec %lu\n", - (ulong) rec_get_next_offs(rec, comp), - (ulong)(rec - page)); - - goto func_exit; - } - - count++; - - if (count > UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Page record list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - page_cur_move_to_next(&cur); - own_count++; - } - - if (rec_get_n_owned(rec, comp) == 0) { - fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); - - goto func_exit; - } - - if (slot_no != n_slots - 1) { - fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); - goto func_exit; - } - - if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) + 2, - (ulong) (count + 1)); - - goto func_exit; - } - - /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { - if (rec < page + FIL_PAGE_DATA - || rec >= page + UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Free list record has" - " a nonsensical offset %lu\n", - (ulong) (rec - page)); - - goto func_exit; - } - - if (rec > rec_heap_top) { - fprintf(stderr, - "InnoDB: Free list record %lu" - " is above rec heap top %lu\n", - (ulong) (rec - page), - (ulong) (rec_heap_top - page)); - - goto func_exit; - } - - count++; - - if (count > UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Page free list appears" - " to be circular %lu\n", - (ulong) count); - goto func_exit; - } - - rec = page_rec_get_next(rec); - } - - if (page_dir_get_n_heap(page) != count + 1) { - - fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) (count + 1)); - - goto func_exit; - } - - ret = TRUE; - -func_exit: - return(ret); -} - -/******************************************************************* -This function checks the consistency of an index page. */ - -ibool -page_validate( -/*==========*/ - /* out: TRUE if ok */ - page_t* page, /* in: index page */ - dict_index_t* index) /* in: data dictionary index containing - the page record type definition */ -{ - page_dir_slot_t* slot; - mem_heap_t* heap; - page_cur_t cur; - byte* buf; - ulint count; - ulint own_count; - ulint slot_no; - ulint data_size; - rec_t* rec; - rec_t* old_rec = NULL; - ulint offs; - ulint n_slots; - ibool ret = FALSE; - ulint i; - ulint comp = page_is_comp(page); - ulint* offsets = NULL; - ulint* old_offsets = NULL; - - if ((ibool)!!comp != dict_table_is_comp(index->table)) { - fputs("InnoDB: 'compact format' flag mismatch\n", stderr); - goto func_exit2; - } - if (!page_simple_validate(page)) { - goto func_exit2; - } - - heap = mem_heap_create(UNIV_PAGE_SIZE + 200); - - /* The following buffer is used to check that the - records in the page record heap do not overlap */ - - buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE); - memset(buf, 0, UNIV_PAGE_SIZE); - - /* Check first that the record heap and the directory do not - overlap. */ - - n_slots = page_dir_get_n_slots(page); - - if (!(page_header_get_ptr(page, PAGE_HEAP_TOP) - <= page_dir_get_nth_slot(page, n_slots - 1))) { - - fputs("InnoDB: Record heap and dir overlap on a page ", - stderr); - dict_index_name_print(stderr, NULL, index); - fprintf(stderr, ", %p, %p\n", - page_header_get_ptr(page, PAGE_HEAP_TOP), - page_dir_get_nth_slot(page, n_slots - 1)); - - goto func_exit; - } - - /* Validate the record list in a loop checking also that - it is consistent with the directory. */ - count = 0; - data_size = 0; - own_count = 1; - slot_no = 0; - slot = page_dir_get_nth_slot(page, slot_no); - - page_cur_set_before_first(page, &cur); - - for (;;) { - rec = cur.rec; - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (comp && page_rec_is_user_rec(rec) - && rec_get_node_ptr_flag(rec) - != (ibool) - (btr_page_get_level_low(page) != 0)) { - fputs("InnoDB: node_ptr flag mismatch\n", stderr); - goto func_exit; - } - - if (!page_rec_validate(rec, offsets)) { - goto func_exit; - } - - /* Check that the records are in the ascending order */ - if ((count >= 2) && (!page_cur_is_after_last(&cur))) { - if (!(1 == cmp_rec_rec(rec, old_rec, - offsets, old_offsets, index))) { - fprintf(stderr, - "InnoDB: Records in wrong order" - " on page %lu ", - (ulong) buf_frame_get_page_no(page)); - dict_index_name_print(stderr, NULL, index); - fputs("\nInnoDB: previous record ", stderr); - rec_print_new(stderr, old_rec, old_offsets); - fputs("\nInnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - - goto func_exit; - } - } - - if (page_rec_is_user_rec(rec)) { - - data_size += rec_offs_size(offsets); - } - - offs = rec_get_start(rec, offsets) - page; - - for (i = 0; i < rec_offs_size(offsets); i++) { - if (!buf[offs + i] == 0) { - /* No other record may overlap this */ - - fputs("InnoDB: Record overlaps another\n", - stderr); - goto func_exit; - } - - buf[offs + i] = 1; - } - - if (rec_get_n_owned(rec, comp) != 0) { - /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec, comp) != own_count) { - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu\n", - (ulong) rec_get_n_owned(rec, comp), - (ulong) own_count); - goto func_exit; - } - - if (page_dir_slot_get_rec(slot) != rec) { - fputs("InnoDB: Dir slot does not" - " point to right rec\n", - stderr); - goto func_exit; - } - - page_dir_slot_check(slot); - - own_count = 0; - if (!page_cur_is_after_last(&cur)) { - slot_no++; - slot = page_dir_get_nth_slot(page, slot_no); - } - } - - if (page_cur_is_after_last(&cur)) { - break; - } - - if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA - || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { - fprintf(stderr, - "InnoDB: Next record offset wrong %lu\n", - (ulong) rec_get_next_offs(rec, comp)); - goto func_exit; - } - - count++; - page_cur_move_to_next(&cur); - own_count++; - old_rec = rec; - /* set old_offsets to offsets; recycle offsets */ - { - ulint* offs = old_offsets; - old_offsets = offsets; - offsets = offs; - } - } - - if (rec_get_n_owned(rec, comp) == 0) { - fputs("InnoDB: n owned is zero\n", stderr); - goto func_exit; - } - - if (slot_no != n_slots - 1) { - fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); - goto func_exit; - } - - if (page_header_get_field(page, PAGE_N_RECS) + 2 != count + 1) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) + 2, - (ulong) (count + 1)); - goto func_exit; - } - - if (data_size != page_get_data_size(page)) { - fprintf(stderr, - "InnoDB: Summed data size %lu, returned by func %lu\n", - (ulong) data_size, (ulong) page_get_data_size(page)); - goto func_exit; - } - - /* Check then the free list */ - rec = page_header_get_ptr(page, PAGE_FREE); - - while (rec != NULL) { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - if (!page_rec_validate(rec, offsets)) { - - goto func_exit; - } - - count++; - offs = rec_get_start(rec, offsets) - page; - - for (i = 0; i < rec_offs_size(offsets); i++) { - - if (buf[offs + i] != 0) { - fputs("InnoDB: Record overlaps another" - " in free list\n", stderr); - goto func_exit; - } - - buf[offs + i] = 1; - } - - rec = page_rec_get_next(rec); - } - - if (page_dir_get_n_heap(page) != count + 1) { - fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) count + 1); - goto func_exit; - } - - ret = TRUE; - -func_exit: - mem_heap_free(heap); - - if (ret == FALSE) { -func_exit2: - fprintf(stderr, "InnoDB: Apparent corruption in page %lu in ", - (ulong) buf_frame_get_page_no(page)); - dict_index_name_print(stderr, NULL, index); - putc('\n', stderr); - buf_page_print(page); - } - - return(ret); -} - -/******************************************************************* -Looks in the page record list for a record with the given heap number. */ - -rec_t* -page_find_rec_with_heap_no( -/*=======================*/ - /* out: record, NULL if not found */ - page_t* page, /* in: index page */ - ulint heap_no)/* in: heap number */ -{ - page_cur_t cur; - - page_cur_set_before_first(page, &cur); - - for (;;) { - if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) { - - return(cur.rec); - } - - if (page_cur_is_after_last(&cur)) { - - return(NULL); - } - - page_cur_move_to_next(&cur); - } -} diff --git a/storage/innobase/pars/lexyy.c b/storage/innobase/pars/lexyy.c deleted file mode 100644 index b65de138573..00000000000 --- a/storage/innobase/pars/lexyy.c +++ /dev/null @@ -1,2762 +0,0 @@ -#include "univ.i" -#line 2 "_flex_tmp.c" - -#line 4 "_flex_tmp.c" - -#define YY_INT_ALIGNED short int - -/* A lexical scanner generated by flex */ - -#define FLEX_SCANNER -#define YY_FLEX_MAJOR_VERSION 2 -#define YY_FLEX_MINOR_VERSION 5 -#define YY_FLEX_SUBMINOR_VERSION 31 -#if YY_FLEX_SUBMINOR_VERSION > 0 -#define FLEX_BETA -#endif - -/* First, we deal with platform-specific or compiler-specific issues. */ - -/* begin standard C headers. */ -#include <stdio.h> -#include <string.h> -#include <errno.h> -#include <stdlib.h> - -/* end standard C headers. */ - -/* flex integer type definitions */ - -#ifndef FLEXINT_H -#define FLEXINT_H - -/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ - -#if defined __STDC_VERSION__ && __STDC_VERSION__ >= 199901L -#include <inttypes.h> -typedef int8_t flex_int8_t; -typedef uint8_t flex_uint8_t; -typedef int16_t flex_int16_t; -typedef uint16_t flex_uint16_t; -typedef int32_t flex_int32_t; -typedef uint32_t flex_uint32_t; -#else -typedef signed char flex_int8_t; -typedef short int flex_int16_t; -typedef int flex_int32_t; -typedef unsigned char flex_uint8_t; -typedef unsigned short int flex_uint16_t; -typedef unsigned int flex_uint32_t; -#endif /* ! C99 */ - -/* Limits of integral types. */ -#ifndef INT8_MIN -#define INT8_MIN (-128) -#endif -#ifndef INT16_MIN -#define INT16_MIN (-32767-1) -#endif -#ifndef INT32_MIN -#define INT32_MIN (-2147483647-1) -#endif -#ifndef INT8_MAX -#define INT8_MAX (127) -#endif -#ifndef INT16_MAX -#define INT16_MAX (32767) -#endif -#ifndef INT32_MAX -#define INT32_MAX (2147483647) -#endif -#ifndef UINT8_MAX -#define UINT8_MAX (255U) -#endif -#ifndef UINT16_MAX -#define UINT16_MAX (65535U) -#endif -#ifndef UINT32_MAX -#define UINT32_MAX (4294967295U) -#endif - -#endif /* ! FLEXINT_H */ - -#ifdef __cplusplus - -/* The "const" storage-class-modifier is valid. */ -#define YY_USE_CONST - -#else /* ! __cplusplus */ - -#if __STDC__ - -#define YY_USE_CONST - -#endif /* __STDC__ */ -#endif /* ! __cplusplus */ - -#ifdef YY_USE_CONST -#define yyconst const -#else -#define yyconst -#endif - -/* Returned upon end-of-file. */ -#define YY_NULL 0 - -/* Promotes a possibly negative, possibly signed char to an unsigned - * integer for use as an array index. If the signed char is negative, - * we want to instead treat it as an 8-bit unsigned char, hence the - * double cast. - */ -#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) - -/* Enter a start condition. This macro really ought to take a parameter, - * but we do it the disgusting crufty way forced on us by the ()-less - * definition of BEGIN. - */ -#define BEGIN (yy_start) = 1 + 2 * - -/* Translate the current start state into a value that can be later handed - * to BEGIN to return to the state. The YYSTATE alias is for lex - * compatibility. - */ -#define YY_START (((yy_start) - 1) / 2) -#define YYSTATE YY_START - -/* Action number for EOF rule of a given start state. */ -#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) - -/* Special action meaning "start processing a new file". */ -#define YY_NEW_FILE yyrestart(yyin ) - -#define YY_END_OF_BUFFER_CHAR 0 - -/* Size of default input buffer. */ -#ifndef YY_BUF_SIZE -#define YY_BUF_SIZE 16384 -#endif - -#ifndef YY_TYPEDEF_YY_BUFFER_STATE -#define YY_TYPEDEF_YY_BUFFER_STATE -typedef struct yy_buffer_state *YY_BUFFER_STATE; -#endif - -extern int yyleng; - -extern FILE *yyin, *yyout; - -#define EOB_ACT_CONTINUE_SCAN 0 -#define EOB_ACT_END_OF_FILE 1 -#define EOB_ACT_LAST_MATCH 2 - - #define YY_LESS_LINENO(n) - -/* Return all but the first "n" matched characters back to the input stream. */ -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - *yy_cp = (yy_hold_char); \ - YY_RESTORE_YY_MORE_OFFSET \ - (yy_c_buf_p) = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ - YY_DO_BEFORE_ACTION; /* set up yytext again */ \ - } \ - while ( 0 ) - -#define unput(c) yyunput( c, (yytext_ptr) ) - -/* The following is because we cannot portably get our hands on size_t - * (without autoconf's help, which isn't available because we want - * flex-generated scanners to compile on their own). - */ - -#ifndef YY_TYPEDEF_YY_SIZE_T -#define YY_TYPEDEF_YY_SIZE_T -typedef unsigned int yy_size_t; -#endif - -#ifndef YY_STRUCT_YY_BUFFER_STATE -#define YY_STRUCT_YY_BUFFER_STATE -struct yy_buffer_state - { - FILE *yy_input_file; - - char *yy_ch_buf; /* input buffer */ - char *yy_buf_pos; /* current position in input buffer */ - - /* Size of input buffer in bytes, not including room for EOB - * characters. - */ - yy_size_t yy_buf_size; - - /* Number of characters read into yy_ch_buf, not including EOB - * characters. - */ - int yy_n_chars; - - /* Whether we "own" the buffer - i.e., we know we created it, - * and can realloc() it to grow it, and should free() it to - * delete it. - */ - int yy_is_our_buffer; - - /* Whether this is an "interactive" input source; if so, and - * if we're using stdio for input, then we want to use getc() - * instead of fread(), to make sure we stop fetching input after - * each newline. - */ - int yy_is_interactive; - - /* Whether we're considered to be at the beginning of a line. - * If so, '^' rules will be active on the next match, otherwise - * not. - */ - int yy_at_bol; - - int yy_bs_lineno; /**< The line count. */ - int yy_bs_column; /**< The column count. */ - - /* Whether to try to fill the input buffer when we reach the - * end of it. - */ - int yy_fill_buffer; - - int yy_buffer_status; - -#define YY_BUFFER_NEW 0 -#define YY_BUFFER_NORMAL 1 - /* When an EOF's been seen but there's still some text to process - * then we mark the buffer as YY_EOF_PENDING, to indicate that we - * shouldn't try reading from the input source any more. We might - * still have a bunch of tokens to match, though, because of - * possible backing-up. - * - * When we actually see the EOF, we change the status to "new" - * (via yyrestart()), so that the user can continue scanning by - * just pointing yyin at a new input file. - */ -#define YY_BUFFER_EOF_PENDING 2 - - }; -#endif /* !YY_STRUCT_YY_BUFFER_STATE */ - -/* Stack of input buffers. */ -static size_t yy_buffer_stack_top = 0; /**< index of top of stack. */ -static size_t yy_buffer_stack_max = 0; /**< capacity of stack. */ -static YY_BUFFER_STATE * yy_buffer_stack = 0; /**< Stack as an array. */ - -/* We provide macros for accessing buffer states in case in the - * future we want to put the buffer states in a more general - * "scanner state". - * - * Returns the top of the stack, or NULL. - */ -#define YY_CURRENT_BUFFER ( (yy_buffer_stack) \ - ? (yy_buffer_stack)[(yy_buffer_stack_top)] \ - : NULL) - -/* Same as previous macro, but useful when we know that the buffer stack is not - * NULL or when we need an lvalue. For internal use only. - */ -#define YY_CURRENT_BUFFER_LVALUE (yy_buffer_stack)[(yy_buffer_stack_top)] - -/* yy_hold_char holds the character lost when yytext is formed. */ -static char yy_hold_char; -static int yy_n_chars; /* number of characters read into yy_ch_buf */ -int yyleng; - -/* Points to current character in buffer. */ -static char *yy_c_buf_p = (char *) 0; -static int yy_init = 1; /* whether we need to initialize */ -static int yy_start = 0; /* start state number */ - -/* Flag which is used to allow yywrap()'s to do buffer switches - * instead of setting up a fresh yyin. A bit of a hack ... - */ -static int yy_did_buffer_switch_on_eof; - -void yyrestart (FILE *input_file ); -void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ); -YY_BUFFER_STATE yy_create_buffer (FILE *file,int size ); -void yy_delete_buffer (YY_BUFFER_STATE b ); -void yy_flush_buffer (YY_BUFFER_STATE b ); -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ); -void yypop_buffer_state (void ); - -static void yyensure_buffer_stack (void ); -static void yy_load_buffer_state (void ); -static void yy_init_buffer (YY_BUFFER_STATE b,FILE *file ); - -#define YY_FLUSH_BUFFER yy_flush_buffer(YY_CURRENT_BUFFER ) - -YY_BUFFER_STATE yy_scan_buffer (char *base,yy_size_t size ); -YY_BUFFER_STATE yy_scan_string (yyconst char *yy_str ); -YY_BUFFER_STATE yy_scan_bytes (yyconst char *bytes,int len ); - -void *yyalloc (yy_size_t ); -void *yyrealloc (void *,yy_size_t ); -void yyfree (void * ); - -#define yy_new_buffer yy_create_buffer - -#define yy_set_interactive(is_interactive) \ - { \ - if ( ! YY_CURRENT_BUFFER ){ \ - yyensure_buffer_stack (); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ - } - -#define yy_set_bol(at_bol) \ - { \ - if ( ! YY_CURRENT_BUFFER ){\ - yyensure_buffer_stack (); \ - YY_CURRENT_BUFFER_LVALUE = \ - yy_create_buffer(yyin,YY_BUF_SIZE ); \ - } \ - YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ - } - -#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) - -/* Begin user sect3 */ - -#define yywrap(n) 1 -#define YY_SKIP_YYWRAP - -typedef unsigned char YY_CHAR; - -FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; - -typedef int yy_state_type; - -extern int yylineno; - -int yylineno = 1; - -extern char *yytext; -#define yytext_ptr yytext - -static yy_state_type yy_get_previous_state (void ); -static yy_state_type yy_try_NUL_trans (yy_state_type current_state ); -static int yy_get_next_buffer (void ); -static void yy_fatal_error (yyconst char msg[] ); - -/* Done after the current pattern has been matched and before the - * corresponding action - sets up yytext. - */ -#define YY_DO_BEFORE_ACTION \ - (yytext_ptr) = yy_bp; \ - yyleng = (size_t) (yy_cp - yy_bp); \ - (yy_hold_char) = *yy_cp; \ - *yy_cp = '\0'; \ - (yy_c_buf_p) = yy_cp; - -#define YY_NUM_RULES 119 -#define YY_END_OF_BUFFER 120 -/* This struct is not used in this scanner, - but its presence is necessary. */ -struct yy_trans_info - { - flex_int32_t yy_verify; - flex_int32_t yy_nxt; - }; -static yyconst flex_int16_t yy_accept[399] = - { 0, - 0, 0, 114, 114, 0, 0, 0, 0, 120, 118, - 117, 117, 8, 118, 109, 5, 98, 104, 107, 105, - 102, 106, 118, 108, 1, 118, 103, 101, 99, 100, - 112, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 110, 111, 114, 115, 6, 7, 9, 10, 117, 4, - 93, 113, 2, 1, 3, 94, 95, 97, 96, 92, - 92, 92, 92, 92, 92, 44, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 92, 92, 28, 17, 25, 92, 92, 92, 92, 92, - - 54, 61, 92, 14, 92, 92, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 92, 92, 114, 115, 115, 116, 6, 7, 9, 10, - 2, 13, 45, 92, 92, 92, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 92, 27, 92, 92, 92, 41, 92, 92, 92, 92, - 21, 92, 92, 92, 92, 15, 92, 92, 92, 18, - 92, 92, 92, 92, 92, 80, 92, 92, 92, 51, - 92, 12, 92, 36, 92, 92, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 20, 24, - - 92, 92, 92, 92, 92, 92, 92, 92, 92, 92, - 46, 92, 92, 30, 92, 87, 92, 92, 39, 92, - 92, 92, 92, 92, 48, 92, 89, 32, 91, 92, - 11, 64, 92, 92, 92, 42, 92, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 29, 92, 92, 92, - 92, 92, 92, 92, 92, 92, 85, 92, 26, 92, - 66, 92, 92, 92, 37, 92, 92, 92, 92, 92, - 92, 92, 31, 65, 23, 92, 57, 92, 75, 92, - 92, 92, 43, 92, 92, 92, 92, 92, 92, 92, - 92, 90, 92, 92, 56, 92, 92, 92, 92, 92, - - 92, 92, 40, 33, 79, 19, 92, 83, 74, 55, - 92, 63, 92, 52, 92, 92, 92, 47, 92, 76, - 92, 78, 92, 92, 34, 92, 92, 92, 35, 72, - 92, 92, 92, 92, 58, 92, 50, 49, 92, 92, - 53, 62, 92, 92, 92, 22, 92, 92, 73, 81, - 92, 92, 77, 92, 68, 92, 92, 92, 92, 38, - 92, 88, 67, 92, 84, 92, 92, 92, 86, 92, - 59, 92, 16, 92, 70, 69, 92, 92, 82, 92, - 92, 92, 92, 92, 92, 92, 92, 92, 92, 71, - 92, 92, 92, 92, 92, 92, 60, 0 - - } ; - -static yyconst flex_int32_t yy_ec[256] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 2, 1, 4, 1, 5, 6, 1, 7, 8, - 9, 10, 11, 12, 13, 14, 15, 16, 16, 16, - 16, 16, 16, 16, 16, 16, 16, 17, 18, 19, - 20, 21, 22, 1, 23, 24, 25, 26, 27, 28, - 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, - 39, 40, 41, 42, 43, 44, 45, 46, 47, 32, - 1, 1, 1, 1, 48, 1, 32, 32, 32, 32, - - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, - 32, 32, 49, 1, 50, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1 - } ; - -static yyconst flex_int32_t yy_meta[51] = - { 0, - 1, 1, 1, 2, 1, 1, 3, 1, 1, 4, - 1, 1, 1, 1, 1, 5, 1, 1, 1, 6, - 1, 1, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, - 5, 5, 5, 5, 5, 5, 5, 5, 1, 1 - } ; - -static yyconst flex_int16_t yy_base[409] = - { 0, - 0, 0, 437, 436, 438, 437, 439, 438, 441, 448, - 49, 51, 448, 0, 448, 448, 448, 448, 448, 448, - 448, 448, 426, 429, 41, 418, 448, 38, 448, 417, - 448, 20, 33, 32, 46, 40, 44, 0, 54, 52, - 399, 48, 60, 395, 65, 67, 81, 27, 411, 75, - 448, 448, 0, 98, 0, 426, 0, 428, 113, 0, - 448, 448, 415, 54, 410, 448, 448, 448, 448, 0, - 403, 68, 399, 391, 389, 0, 402, 80, 84, 397, - 383, 96, 381, 394, 379, 393, 387, 375, 379, 375, - 377, 377, 0, 98, 0, 376, 97, 385, 368, 375, - - 0, 0, 381, 381, 364, 94, 103, 379, 98, 65, - 381, 369, 109, 361, 377, 373, 351, 97, 372, 363, - 115, 356, 0, 137, 138, 448, 0, 388, 0, 390, - 377, 0, 0, 365, 360, 367, 365, 348, 346, 345, - 350, 359, 347, 359, 95, 347, 353, 354, 336, 336, - 123, 0, 334, 350, 351, 0, 338, 347, 344, 122, - 124, 341, 336, 330, 340, 338, 331, 328, 336, 0, - 326, 336, 334, 325, 315, 309, 322, 307, 327, 0, - 313, 0, 311, 0, 325, 316, 313, 131, 309, 316, - 323, 302, 304, 309, 309, 301, 304, 299, 0, 0, - - 311, 295, 305, 312, 292, 291, 305, 294, 307, 287, - 0, 297, 279, 0, 298, 0, 295, 282, 0, 281, - 276, 281, 280, 290, 0, 276, 0, 0, 0, 280, - 0, 0, 276, 273, 287, 0, 272, 272, 270, 286, - 271, 283, 280, 264, 282, 277, 0, 272, 272, 258, - 257, 270, 256, 270, 269, 268, 0, 252, 0, 246, - 0, 265, 249, 248, 0, 262, 252, 247, 246, 258, - 248, 247, 0, 0, 0, 251, 0, 239, 0, 253, - 249, 235, 0, 249, 250, 233, 238, 231, 249, 231, - 228, 0, 229, 226, 0, 231, 243, 230, 237, 227, - - 235, 220, 0, 0, 0, 212, 219, 0, 0, 0, - 216, 0, 230, 0, 231, 218, 217, 0, 213, 0, - 216, 0, 208, 210, 0, 209, 223, 216, 0, 0, - 219, 222, 204, 219, 0, 215, 0, 0, 199, 213, - 0, 0, 197, 196, 201, 0, 210, 195, 0, 0, - 201, 197, 0, 192, 0, 204, 204, 192, 202, 0, - 179, 0, 0, 199, 0, 183, 177, 183, 0, 174, - 0, 193, 0, 192, 0, 0, 183, 187, 0, 174, - 174, 180, 166, 189, 181, 180, 166, 151, 118, 0, - 130, 136, 127, 123, 119, 111, 0, 448, 167, 173, - - 179, 152, 181, 124, 187, 193, 199, 205 - } ; - -static yyconst flex_int16_t yy_def[409] = - { 0, - 398, 1, 399, 399, 400, 400, 401, 401, 398, 398, - 398, 398, 398, 402, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 403, 398, 398, 398, 398, - 398, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 398, 398, 405, 406, 407, 398, 408, 398, 398, 402, - 398, 398, 398, 398, 403, 398, 398, 398, 398, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 405, 406, 406, 398, 407, 398, 408, 398, - 398, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 404, 404, 404, - 404, 404, 404, 404, 404, 404, 404, 0, 398, 398, - - 398, 398, 398, 398, 398, 398, 398, 398 - } ; - -static yyconst flex_int16_t yy_nxt[499] = - { 0, - 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, - 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, - 30, 31, 32, 33, 34, 35, 36, 37, 38, 38, - 39, 38, 38, 40, 41, 42, 43, 44, 38, 45, - 46, 47, 48, 49, 50, 38, 38, 38, 51, 52, - 59, 59, 59, 59, 63, 71, 64, 67, 68, 73, - 72, 77, 118, 74, 119, 78, 75, 63, 79, 64, - 88, 80, 82, 85, 81, 86, 83, 89, 96, 76, - 90, 93, 84, 91, 99, 87, 92, 101, 97, 94, - 100, 107, 133, 110, 95, 102, 111, 103, 179, 104, - - 108, 109, 105, 115, 121, 112, 180, 125, 134, 113, - 116, 122, 126, 114, 59, 59, 139, 117, 141, 142, - 146, 163, 140, 159, 171, 173, 143, 189, 70, 147, - 172, 177, 183, 164, 207, 208, 148, 190, 160, 161, - 174, 193, 178, 184, 175, 194, 398, 125, 222, 214, - 224, 398, 126, 215, 248, 249, 60, 397, 396, 395, - 225, 394, 393, 223, 392, 391, 250, 53, 53, 53, - 53, 53, 53, 55, 55, 55, 55, 55, 55, 57, - 57, 57, 57, 57, 57, 65, 65, 123, 123, 123, - 390, 123, 123, 124, 124, 124, 124, 124, 124, 127, - - 127, 389, 127, 127, 127, 129, 388, 129, 129, 129, - 129, 387, 386, 385, 384, 383, 382, 381, 380, 379, - 378, 377, 376, 375, 374, 373, 372, 371, 370, 369, - 368, 367, 366, 365, 364, 363, 362, 361, 360, 359, - 358, 357, 356, 355, 354, 353, 352, 351, 350, 349, - 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, - 338, 337, 336, 335, 334, 333, 332, 331, 330, 329, - 328, 327, 326, 325, 324, 323, 322, 321, 320, 319, - 318, 317, 316, 315, 314, 313, 312, 311, 310, 309, - 308, 307, 306, 305, 304, 303, 302, 301, 300, 299, - - 298, 297, 296, 295, 294, 293, 292, 291, 290, 289, - 288, 287, 286, 285, 284, 283, 282, 281, 280, 279, - 278, 277, 276, 275, 274, 273, 272, 271, 270, 269, - 268, 267, 266, 265, 264, 263, 262, 261, 260, 259, - 258, 257, 256, 255, 254, 253, 252, 251, 247, 246, - 245, 244, 243, 242, 241, 240, 239, 238, 237, 236, - 235, 234, 233, 232, 231, 230, 229, 228, 227, 226, - 221, 220, 219, 218, 217, 216, 213, 212, 211, 210, - 209, 206, 205, 204, 203, 202, 201, 200, 199, 198, - 197, 196, 131, 130, 128, 195, 192, 191, 188, 187, - - 186, 185, 182, 181, 176, 170, 169, 168, 167, 166, - 165, 162, 158, 157, 156, 155, 154, 153, 152, 151, - 150, 149, 145, 144, 138, 137, 136, 135, 132, 398, - 131, 130, 128, 120, 106, 98, 69, 66, 62, 61, - 398, 58, 58, 56, 56, 54, 54, 9, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398 - - } ; - -static yyconst flex_int16_t yy_chk[499] = - { 0, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 11, 11, 12, 12, 25, 32, 25, 28, 28, 33, - 32, 34, 48, 33, 48, 34, 33, 64, 34, 64, - 37, 34, 35, 36, 34, 36, 35, 37, 40, 33, - 37, 39, 35, 37, 42, 36, 37, 43, 40, 39, - 42, 45, 72, 46, 39, 43, 46, 43, 110, 43, - - 45, 45, 43, 47, 50, 46, 110, 54, 72, 46, - 47, 50, 54, 46, 59, 59, 78, 47, 79, 79, - 82, 97, 78, 94, 106, 107, 79, 118, 404, 82, - 106, 109, 113, 97, 145, 145, 82, 118, 94, 94, - 107, 121, 109, 113, 107, 121, 124, 125, 160, 151, - 161, 124, 125, 151, 188, 188, 402, 396, 395, 394, - 161, 393, 392, 160, 391, 389, 188, 399, 399, 399, - 399, 399, 399, 400, 400, 400, 400, 400, 400, 401, - 401, 401, 401, 401, 401, 403, 403, 405, 405, 405, - 388, 405, 405, 406, 406, 406, 406, 406, 406, 407, - - 407, 387, 407, 407, 407, 408, 386, 408, 408, 408, - 408, 385, 384, 383, 382, 381, 380, 378, 377, 374, - 372, 370, 368, 367, 366, 364, 361, 359, 358, 357, - 356, 354, 352, 351, 348, 347, 345, 344, 343, 340, - 339, 336, 334, 333, 332, 331, 328, 327, 326, 324, - 323, 321, 319, 317, 316, 315, 313, 311, 307, 306, - 302, 301, 300, 299, 298, 297, 296, 294, 293, 291, - 290, 289, 288, 287, 286, 285, 284, 282, 281, 280, - 278, 276, 272, 271, 270, 269, 268, 267, 266, 264, - 263, 262, 260, 258, 256, 255, 254, 253, 252, 251, - - 250, 249, 248, 246, 245, 244, 243, 242, 241, 240, - 239, 238, 237, 235, 234, 233, 230, 226, 224, 223, - 222, 221, 220, 218, 217, 215, 213, 212, 210, 209, - 208, 207, 206, 205, 204, 203, 202, 201, 198, 197, - 196, 195, 194, 193, 192, 191, 190, 189, 187, 186, - 185, 183, 181, 179, 178, 177, 176, 175, 174, 173, - 172, 171, 169, 168, 167, 166, 165, 164, 163, 162, - 159, 158, 157, 155, 154, 153, 150, 149, 148, 147, - 146, 144, 143, 142, 141, 140, 139, 138, 137, 136, - 135, 134, 131, 130, 128, 122, 120, 119, 117, 116, - - 115, 114, 112, 111, 108, 105, 104, 103, 100, 99, - 98, 96, 92, 91, 90, 89, 88, 87, 86, 85, - 84, 83, 81, 80, 77, 75, 74, 73, 71, 65, - 63, 58, 56, 49, 44, 41, 30, 26, 24, 23, - 9, 8, 7, 6, 5, 4, 3, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398, 398, 398, - 398, 398, 398, 398, 398, 398, 398, 398 - - } ; - -static yy_state_type yy_last_accepting_state; -static char *yy_last_accepting_cpos; - -extern int yy_flex_debug; -int yy_flex_debug = 0; - -/* The intent behind this definition is that it'll catch - * any uses of REJECT which flex missed. - */ -#define REJECT reject_used_but_not_detected -#define yymore() yymore_used_but_not_detected -#define YY_MORE_ADJ 0 -#define YY_RESTORE_YY_MORE_OFFSET -char *yytext; -#line 1 "pars0lex.l" -/****************************************************** -SQL parser lexical analyzer: input file for the GNU Flex lexer generator - -(c) 1997 Innobase Oy - -Created 12/14/1997 Heikki Tuuri -Published under the GPL version 2 - -The InnoDB parser is frozen because MySQL takes care of SQL parsing. -Therefore we normally keep the InnoDB parser C files as they are, and do -not automatically generate them from pars0grm.y and pars0lex.l. - -How to make the InnoDB parser and lexer C files: - -1. Run ./make_flex.sh to generate lexer files. - -2. Run ./make_bison.sh to generate parser files. - -These instructions seem to work at least with bison-1.875d and flex-2.5.31 on -Linux. -*******************************************************/ -#define YY_NO_INPUT 1 -#define YY_NO_UNISTD_H 1 -#line 38 "pars0lex.l" -#define YYSTYPE que_node_t* - -#include "univ.i" -#include "pars0pars.h" -#include "pars0grm.h" -#include "pars0sym.h" -#include "mem0mem.h" -#include "os0proc.h" - -#define malloc(A) ut_malloc(A) -#define free(A) ut_free(A) -#define realloc(P, A) ut_realloc(P, A) -#define exit(A) ut_error - -#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size) - -/* String buffer for removing quotes */ -static ulint stringbuf_len_alloc = 0; /* Allocated length */ -static ulint stringbuf_len = 0; /* Current length */ -static char* stringbuf; /* Start of buffer */ -/* Appends a string to the buffer. */ -static -void -string_append( -/*==========*/ - const char* str, /* in: string to be appended */ - ulint len) /* in: length of the string */ -{ - if (stringbuf == NULL) { - stringbuf = malloc(1); - stringbuf_len_alloc = 1; - } - - if (stringbuf_len + len > stringbuf_len_alloc) { - while (stringbuf_len + len > stringbuf_len_alloc) { - stringbuf_len_alloc <<= 1; - } - stringbuf = realloc(stringbuf, stringbuf_len_alloc); - } - - memcpy(stringbuf + stringbuf_len, str, len); - stringbuf_len += len; -} - - - - -#line 759 "_flex_tmp.c" - -#define INITIAL 0 -#define comment 1 -#define quoted 2 -#define id 3 - -#ifndef YY_NO_UNISTD_H -/* Special case for "unistd.h", since it is non-ANSI. We include it way - * down here because we want the user's section 1 to have been scanned first. - * The user has a chance to override it with an option. - */ -#include <unistd.h> -#endif - -#ifndef YY_EXTRA_TYPE -#define YY_EXTRA_TYPE void * -#endif - -/* Macros after this point can all be overridden by user definitions in - * section 1. - */ - -#ifndef YY_SKIP_YYWRAP -#ifdef __cplusplus -extern "C" int yywrap (void ); -#else -extern int yywrap (void ); -#endif -#endif - -#ifndef yytext_ptr -static void yy_flex_strncpy (char *,yyconst char *,int ); -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * ); -#endif - -#ifndef YY_NO_INPUT - -#ifdef __cplusplus -static int yyinput (void ); -#else -static int input (void ); -#endif - -#endif - -/* Amount of stuff to slurp up with each read. */ -#ifndef YY_READ_BUF_SIZE -#define YY_READ_BUF_SIZE 8192 -#endif - -/* Copy whatever the last rule matched to the standard output. */ -#ifndef ECHO -/* This used to be an fputs(), but since the string might contain NUL's, - * we now use fwrite(). - */ -#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) -#endif - -/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, - * is returned in "result". - */ -#ifndef YY_INPUT -#define YY_INPUT(buf,result,max_size) \ - if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ - { \ - int c = '*'; \ - size_t n; \ - for ( n = 0; n < max_size && \ - (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ - buf[n] = (char) c; \ - if ( c == '\n' ) \ - buf[n++] = (char) c; \ - if ( c == EOF && ferror( yyin ) ) \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - result = n; \ - } \ - else \ - { \ - errno=0; \ - while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ - { \ - if( errno != EINTR) \ - { \ - YY_FATAL_ERROR( "input in flex scanner failed" ); \ - break; \ - } \ - errno=0; \ - clearerr(yyin); \ - } \ - }\ -\ - -#endif - -/* No semi-colon after return; correct usage is to write "yyterminate();" - - * we don't want an extra ';' after the "return" because that will cause - * some compilers to complain about unreachable statements. - */ -#ifndef yyterminate -#define yyterminate() return YY_NULL -#endif - -/* Number of entries by which start-condition stack grows. */ -#ifndef YY_START_STACK_INCR -#define YY_START_STACK_INCR 25 -#endif - -/* Report a fatal error. */ -#ifndef YY_FATAL_ERROR -#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) -#endif - -/* end tables serialization structures and prototypes */ - -/* Default declaration of generated scanner - a define so the user can - * easily add parameters. - */ -#ifndef YY_DECL -#define YY_DECL_IS_OURS 1 - -extern int yylex (void); - -#define YY_DECL int yylex (void) -#endif /* !YY_DECL */ - -/* Code executed at the beginning of each rule, after yytext and yyleng - * have been set up. - */ -#ifndef YY_USER_ACTION -#define YY_USER_ACTION -#endif - -/* Code executed at the end of each rule. */ -#ifndef YY_BREAK -#define YY_BREAK break; -#endif - -#define YY_RULE_SETUP \ - YY_USER_ACTION - -/** The main scanner function which does all the work. - */ -YY_DECL -{ - register yy_state_type yy_current_state; - register char *yy_cp, *yy_bp; - register int yy_act; - -#line 92 "pars0lex.l" - - -#line 914 "_flex_tmp.c" - - if ( (yy_init) ) - { - (yy_init) = 0; - -#ifdef YY_USER_INIT - YY_USER_INIT; -#endif - - if ( ! (yy_start) ) - (yy_start) = 1; /* first start state */ - - if ( ! yyin ) - yyin = stdin; - - if ( ! yyout ) - yyout = stdout; - - if ( ! YY_CURRENT_BUFFER ) { - yyensure_buffer_stack (); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ); - } - - yy_load_buffer_state( ); - } - - while ( 1 ) /* loops until end-of-file is reached */ - { - yy_cp = (yy_c_buf_p); - - /* Support of yytext. */ - *yy_cp = (yy_hold_char); - - /* yy_bp points to the position in yy_ch_buf of the start of - * the current run. - */ - yy_bp = yy_cp; - - yy_current_state = (yy_start); -yy_match: - do - { - register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 399 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - ++yy_cp; - } - while ( yy_current_state != 398 ); - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - -yy_find_action: - yy_act = yy_accept[yy_current_state]; - - YY_DO_BEFORE_ACTION; - -do_action: /* This label is used only to access EOF actions. */ - - switch ( yy_act ) - { /* beginning of action switch */ - case 0: /* must back up */ - /* undo the effects of YY_DO_BEFORE_ACTION */ - *yy_cp = (yy_hold_char); - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - goto yy_find_action; - -case 1: -YY_RULE_SETUP -#line 94 "pars0lex.l" -{ - yylval = sym_tab_add_int_lit(pars_sym_tab_global, - atoi(yytext)); - return(PARS_INT_LIT); -} - YY_BREAK -case 2: -YY_RULE_SETUP -#line 100 "pars0lex.l" -{ - ut_error; /* not implemented */ - - return(PARS_FLOAT_LIT); -} - YY_BREAK -case 3: -YY_RULE_SETUP -#line 106 "pars0lex.l" -{ - ulint type; - - yylval = sym_tab_add_bound_lit(pars_sym_tab_global, - yytext + 1, &type); - - return((int) type); -} - YY_BREAK -case 4: -YY_RULE_SETUP -#line 115 "pars0lex.l" -{ - yylval = sym_tab_add_bound_id(pars_sym_tab_global, - yytext + 1); - - return(PARS_ID_TOKEN); -} - YY_BREAK -case 5: -YY_RULE_SETUP -#line 122 "pars0lex.l" -{ -/* Quoted character string literals are handled in an explicit -start state 'quoted'. This state is entered and the buffer for -the scanned string is emptied upon encountering a starting quote. - -In the state 'quoted', only two actions are possible (defined below). */ - BEGIN(quoted); - stringbuf_len = 0; -} - YY_BREAK -case 6: -/* rule 6 can match eol */ -YY_RULE_SETUP -#line 131 "pars0lex.l" -{ - /* Got a sequence of characters other than "'": - append to string buffer */ - string_append(yytext, yyleng); -} - YY_BREAK -case 7: -YY_RULE_SETUP -#line 136 "pars0lex.l" -{ - /* Got a sequence of "'" characters: - append half of them to string buffer, - as "''" represents a single "'". - We apply truncating division, - so that "'''" will result in "'". */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - string literal. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_str_lit( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - return(PARS_STR_LIT); - } -} - YY_BREAK -case 8: -YY_RULE_SETUP -#line 160 "pars0lex.l" -{ -/* Quoted identifiers are handled in an explicit start state 'id'. -This state is entered and the buffer for the scanned string is emptied -upon encountering a starting quote. - -In the state 'id', only two actions are possible (defined below). */ - BEGIN(id); - stringbuf_len = 0; -} - YY_BREAK -case 9: -/* rule 9 can match eol */ -YY_RULE_SETUP -#line 169 "pars0lex.l" -{ - /* Got a sequence of characters other than '"': - append to string buffer */ - string_append(yytext, yyleng); -} - YY_BREAK -case 10: -YY_RULE_SETUP -#line 174 "pars0lex.l" -{ - /* Got a sequence of '"' characters: - append half of them to string buffer, - as '""' represents a single '"'. - We apply truncating division, - so that '"""' will result in '"'. */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - identifier. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_id( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - - return(PARS_ID_TOKEN); - } -} - YY_BREAK -case 11: -YY_RULE_SETUP -#line 199 "pars0lex.l" -{ - yylval = sym_tab_add_null_lit(pars_sym_tab_global); - - return(PARS_NULL_LIT); -} - YY_BREAK -case 12: -YY_RULE_SETUP -#line 205 "pars0lex.l" -{ - /* Implicit cursor name */ - yylval = sym_tab_add_str_lit(pars_sym_tab_global, - (byte*) yytext, yyleng); - return(PARS_SQL_TOKEN); -} - YY_BREAK -case 13: -YY_RULE_SETUP -#line 212 "pars0lex.l" -{ - return(PARS_AND_TOKEN); -} - YY_BREAK -case 14: -YY_RULE_SETUP -#line 216 "pars0lex.l" -{ - return(PARS_OR_TOKEN); -} - YY_BREAK -case 15: -YY_RULE_SETUP -#line 220 "pars0lex.l" -{ - return(PARS_NOT_TOKEN); -} - YY_BREAK -case 16: -YY_RULE_SETUP -#line 224 "pars0lex.l" -{ - return(PARS_PROCEDURE_TOKEN); -} - YY_BREAK -case 17: -YY_RULE_SETUP -#line 228 "pars0lex.l" -{ - return(PARS_IN_TOKEN); -} - YY_BREAK -case 18: -YY_RULE_SETUP -#line 232 "pars0lex.l" -{ - return(PARS_OUT_TOKEN); -} - YY_BREAK -case 19: -YY_RULE_SETUP -#line 236 "pars0lex.l" -{ - return(PARS_BINARY_TOKEN); -} - YY_BREAK -case 20: -YY_RULE_SETUP -#line 240 "pars0lex.l" -{ - return(PARS_BLOB_TOKEN); -} - YY_BREAK -case 21: -YY_RULE_SETUP -#line 244 "pars0lex.l" -{ - return(PARS_INT_TOKEN); -} - YY_BREAK -case 22: -YY_RULE_SETUP -#line 248 "pars0lex.l" -{ - return(PARS_INT_TOKEN); -} - YY_BREAK -case 23: -YY_RULE_SETUP -#line 252 "pars0lex.l" -{ - return(PARS_FLOAT_TOKEN); -} - YY_BREAK -case 24: -YY_RULE_SETUP -#line 256 "pars0lex.l" -{ - return(PARS_CHAR_TOKEN); -} - YY_BREAK -case 25: -YY_RULE_SETUP -#line 260 "pars0lex.l" -{ - return(PARS_IS_TOKEN); -} - YY_BREAK -case 26: -YY_RULE_SETUP -#line 264 "pars0lex.l" -{ - return(PARS_BEGIN_TOKEN); -} - YY_BREAK -case 27: -YY_RULE_SETUP -#line 268 "pars0lex.l" -{ - return(PARS_END_TOKEN); -} - YY_BREAK -case 28: -YY_RULE_SETUP -#line 272 "pars0lex.l" -{ - return(PARS_IF_TOKEN); -} - YY_BREAK -case 29: -YY_RULE_SETUP -#line 276 "pars0lex.l" -{ - return(PARS_THEN_TOKEN); -} - YY_BREAK -case 30: -YY_RULE_SETUP -#line 280 "pars0lex.l" -{ - return(PARS_ELSE_TOKEN); -} - YY_BREAK -case 31: -YY_RULE_SETUP -#line 284 "pars0lex.l" -{ - return(PARS_ELSIF_TOKEN); -} - YY_BREAK -case 32: -YY_RULE_SETUP -#line 288 "pars0lex.l" -{ - return(PARS_LOOP_TOKEN); -} - YY_BREAK -case 33: -YY_RULE_SETUP -#line 292 "pars0lex.l" -{ - return(PARS_WHILE_TOKEN); -} - YY_BREAK -case 34: -YY_RULE_SETUP -#line 296 "pars0lex.l" -{ - return(PARS_RETURN_TOKEN); -} - YY_BREAK -case 35: -YY_RULE_SETUP -#line 300 "pars0lex.l" -{ - return(PARS_SELECT_TOKEN); -} - YY_BREAK -case 36: -YY_RULE_SETUP -#line 304 "pars0lex.l" -{ - return(PARS_SUM_TOKEN); -} - YY_BREAK -case 37: -YY_RULE_SETUP -#line 308 "pars0lex.l" -{ - return(PARS_COUNT_TOKEN); -} - YY_BREAK -case 38: -YY_RULE_SETUP -#line 312 "pars0lex.l" -{ - return(PARS_DISTINCT_TOKEN); -} - YY_BREAK -case 39: -YY_RULE_SETUP -#line 316 "pars0lex.l" -{ - return(PARS_FROM_TOKEN); -} - YY_BREAK -case 40: -YY_RULE_SETUP -#line 320 "pars0lex.l" -{ - return(PARS_WHERE_TOKEN); -} - YY_BREAK -case 41: -YY_RULE_SETUP -#line 324 "pars0lex.l" -{ - return(PARS_FOR_TOKEN); -} - YY_BREAK -case 42: -YY_RULE_SETUP -#line 328 "pars0lex.l" -{ - return(PARS_READ_TOKEN); -} - YY_BREAK -case 43: -YY_RULE_SETUP -#line 332 "pars0lex.l" -{ - return(PARS_ORDER_TOKEN); -} - YY_BREAK -case 44: -YY_RULE_SETUP -#line 336 "pars0lex.l" -{ - return(PARS_BY_TOKEN); -} - YY_BREAK -case 45: -YY_RULE_SETUP -#line 340 "pars0lex.l" -{ - return(PARS_ASC_TOKEN); -} - YY_BREAK -case 46: -YY_RULE_SETUP -#line 344 "pars0lex.l" -{ - return(PARS_DESC_TOKEN); -} - YY_BREAK -case 47: -YY_RULE_SETUP -#line 348 "pars0lex.l" -{ - return(PARS_INSERT_TOKEN); -} - YY_BREAK -case 48: -YY_RULE_SETUP -#line 352 "pars0lex.l" -{ - return(PARS_INTO_TOKEN); -} - YY_BREAK -case 49: -YY_RULE_SETUP -#line 356 "pars0lex.l" -{ - return(PARS_VALUES_TOKEN); -} - YY_BREAK -case 50: -YY_RULE_SETUP -#line 360 "pars0lex.l" -{ - return(PARS_UPDATE_TOKEN); -} - YY_BREAK -case 51: -YY_RULE_SETUP -#line 364 "pars0lex.l" -{ - return(PARS_SET_TOKEN); -} - YY_BREAK -case 52: -YY_RULE_SETUP -#line 368 "pars0lex.l" -{ - return(PARS_DELETE_TOKEN); -} - YY_BREAK -case 53: -YY_RULE_SETUP -#line 372 "pars0lex.l" -{ - return(PARS_CURRENT_TOKEN); -} - YY_BREAK -case 54: -YY_RULE_SETUP -#line 376 "pars0lex.l" -{ - return(PARS_OF_TOKEN); -} - YY_BREAK -case 55: -YY_RULE_SETUP -#line 380 "pars0lex.l" -{ - return(PARS_CREATE_TOKEN); -} - YY_BREAK -case 56: -YY_RULE_SETUP -#line 384 "pars0lex.l" -{ - return(PARS_TABLE_TOKEN); -} - YY_BREAK -case 57: -YY_RULE_SETUP -#line 388 "pars0lex.l" -{ - return(PARS_INDEX_TOKEN); -} - YY_BREAK -case 58: -YY_RULE_SETUP -#line 392 "pars0lex.l" -{ - return(PARS_UNIQUE_TOKEN); -} - YY_BREAK -case 59: -YY_RULE_SETUP -#line 396 "pars0lex.l" -{ - return(PARS_CLUSTERED_TOKEN); -} - YY_BREAK -case 60: -YY_RULE_SETUP -#line 400 "pars0lex.l" -{ - return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN); -} - YY_BREAK -case 61: -YY_RULE_SETUP -#line 404 "pars0lex.l" -{ - return(PARS_ON_TOKEN); -} - YY_BREAK -case 62: -YY_RULE_SETUP -#line 408 "pars0lex.l" -{ - return(PARS_DECLARE_TOKEN); -} - YY_BREAK -case 63: -YY_RULE_SETUP -#line 412 "pars0lex.l" -{ - return(PARS_CURSOR_TOKEN); -} - YY_BREAK -case 64: -YY_RULE_SETUP -#line 416 "pars0lex.l" -{ - return(PARS_OPEN_TOKEN); -} - YY_BREAK -case 65: -YY_RULE_SETUP -#line 420 "pars0lex.l" -{ - return(PARS_FETCH_TOKEN); -} - YY_BREAK -case 66: -YY_RULE_SETUP -#line 424 "pars0lex.l" -{ - return(PARS_CLOSE_TOKEN); -} - YY_BREAK -case 67: -YY_RULE_SETUP -#line 428 "pars0lex.l" -{ - return(PARS_NOTFOUND_TOKEN); -} - YY_BREAK -case 68: -YY_RULE_SETUP -#line 432 "pars0lex.l" -{ - return(PARS_TO_CHAR_TOKEN); -} - YY_BREAK -case 69: -YY_RULE_SETUP -#line 436 "pars0lex.l" -{ - return(PARS_TO_NUMBER_TOKEN); -} - YY_BREAK -case 70: -YY_RULE_SETUP -#line 440 "pars0lex.l" -{ - return(PARS_TO_BINARY_TOKEN); -} - YY_BREAK -case 71: -YY_RULE_SETUP -#line 444 "pars0lex.l" -{ - return(PARS_BINARY_TO_NUMBER_TOKEN); -} - YY_BREAK -case 72: -YY_RULE_SETUP -#line 448 "pars0lex.l" -{ - return(PARS_SUBSTR_TOKEN); -} - YY_BREAK -case 73: -YY_RULE_SETUP -#line 452 "pars0lex.l" -{ - return(PARS_REPLSTR_TOKEN); -} - YY_BREAK -case 74: -YY_RULE_SETUP -#line 456 "pars0lex.l" -{ - return(PARS_CONCAT_TOKEN); -} - YY_BREAK -case 75: -YY_RULE_SETUP -#line 460 "pars0lex.l" -{ - return(PARS_INSTR_TOKEN); -} - YY_BREAK -case 76: -YY_RULE_SETUP -#line 464 "pars0lex.l" -{ - return(PARS_LENGTH_TOKEN); -} - YY_BREAK -case 77: -YY_RULE_SETUP -#line 468 "pars0lex.l" -{ - return(PARS_SYSDATE_TOKEN); -} - YY_BREAK -case 78: -YY_RULE_SETUP -#line 472 "pars0lex.l" -{ - return(PARS_PRINTF_TOKEN); -} - YY_BREAK -case 79: -YY_RULE_SETUP -#line 476 "pars0lex.l" -{ - return(PARS_ASSERT_TOKEN); -} - YY_BREAK -case 80: -YY_RULE_SETUP -#line 480 "pars0lex.l" -{ - return(PARS_RND_TOKEN); -} - YY_BREAK -case 81: -YY_RULE_SETUP -#line 484 "pars0lex.l" -{ - return(PARS_RND_STR_TOKEN); -} - YY_BREAK -case 82: -YY_RULE_SETUP -#line 488 "pars0lex.l" -{ - return(PARS_ROW_PRINTF_TOKEN); -} - YY_BREAK -case 83: -YY_RULE_SETUP -#line 492 "pars0lex.l" -{ - return(PARS_COMMIT_TOKEN); -} - YY_BREAK -case 84: -YY_RULE_SETUP -#line 496 "pars0lex.l" -{ - return(PARS_ROLLBACK_TOKEN); -} - YY_BREAK -case 85: -YY_RULE_SETUP -#line 500 "pars0lex.l" -{ - return(PARS_WORK_TOKEN); -} - YY_BREAK -case 86: -YY_RULE_SETUP -#line 504 "pars0lex.l" -{ - return(PARS_UNSIGNED_TOKEN); -} - YY_BREAK -case 87: -YY_RULE_SETUP -#line 508 "pars0lex.l" -{ - return(PARS_EXIT_TOKEN); -} - YY_BREAK -case 88: -YY_RULE_SETUP -#line 512 "pars0lex.l" -{ - return(PARS_FUNCTION_TOKEN); -} - YY_BREAK -case 89: -YY_RULE_SETUP -#line 516 "pars0lex.l" -{ - return(PARS_LOCK_TOKEN); -} - YY_BREAK -case 90: -YY_RULE_SETUP -#line 520 "pars0lex.l" -{ - return(PARS_SHARE_TOKEN); -} - YY_BREAK -case 91: -YY_RULE_SETUP -#line 524 "pars0lex.l" -{ - return(PARS_MODE_TOKEN); -} - YY_BREAK -case 92: -YY_RULE_SETUP -#line 528 "pars0lex.l" -{ - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*)yytext, - ut_strlen(yytext)); - return(PARS_ID_TOKEN); -} - YY_BREAK -case 93: -YY_RULE_SETUP -#line 535 "pars0lex.l" -{ - return(PARS_DDOT_TOKEN); -} - YY_BREAK -case 94: -YY_RULE_SETUP -#line 539 "pars0lex.l" -{ - return(PARS_ASSIGN_TOKEN); -} - YY_BREAK -case 95: -YY_RULE_SETUP -#line 543 "pars0lex.l" -{ - return(PARS_LE_TOKEN); -} - YY_BREAK -case 96: -YY_RULE_SETUP -#line 547 "pars0lex.l" -{ - return(PARS_GE_TOKEN); -} - YY_BREAK -case 97: -YY_RULE_SETUP -#line 551 "pars0lex.l" -{ - return(PARS_NE_TOKEN); -} - YY_BREAK -case 98: -YY_RULE_SETUP -#line 555 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 99: -YY_RULE_SETUP -#line 560 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 100: -YY_RULE_SETUP -#line 565 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 101: -YY_RULE_SETUP -#line 570 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 102: -YY_RULE_SETUP -#line 575 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 103: -YY_RULE_SETUP -#line 580 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 104: -YY_RULE_SETUP -#line 585 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 105: -YY_RULE_SETUP -#line 590 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 106: -YY_RULE_SETUP -#line 595 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 107: -YY_RULE_SETUP -#line 600 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 108: -YY_RULE_SETUP -#line 605 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 109: -YY_RULE_SETUP -#line 610 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 110: -YY_RULE_SETUP -#line 615 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 111: -YY_RULE_SETUP -#line 620 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 112: -YY_RULE_SETUP -#line 625 "pars0lex.l" -{ - - return((int)(*yytext)); -} - YY_BREAK -case 113: -YY_RULE_SETUP -#line 630 "pars0lex.l" -BEGIN(comment); /* eat up comment */ - YY_BREAK -case 114: -/* rule 114 can match eol */ -YY_RULE_SETUP -#line 632 "pars0lex.l" - - YY_BREAK -case 115: -/* rule 115 can match eol */ -YY_RULE_SETUP -#line 633 "pars0lex.l" - - YY_BREAK -case 116: -YY_RULE_SETUP -#line 634 "pars0lex.l" -BEGIN(INITIAL); - YY_BREAK -case 117: -/* rule 117 can match eol */ -YY_RULE_SETUP -#line 636 "pars0lex.l" -/* eat up whitespace */ - YY_BREAK -case 118: -YY_RULE_SETUP -#line 639 "pars0lex.l" -{ - fprintf(stderr,"Unrecognized character: %02x\n", - *yytext); - - ut_error; - - return(0); -} - YY_BREAK -case 119: -YY_RULE_SETUP -#line 648 "pars0lex.l" -YY_FATAL_ERROR( "flex scanner jammed" ); - YY_BREAK -#line 1916 "_flex_tmp.c" -case YY_STATE_EOF(INITIAL): -case YY_STATE_EOF(comment): -case YY_STATE_EOF(quoted): -case YY_STATE_EOF(id): - yyterminate(); - - case YY_END_OF_BUFFER: - { - /* Amount of text matched not including the EOB char. */ - int yy_amount_of_matched_text = (int) (yy_cp - (yytext_ptr)) - 1; - - /* Undo the effects of YY_DO_BEFORE_ACTION. */ - *yy_cp = (yy_hold_char); - YY_RESTORE_YY_MORE_OFFSET - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) - { - /* We're scanning a new file or input source. It's - * possible that this happened because the user - * just pointed yyin at a new source and called - * yylex(). If so, then we have to assure - * consistency between YY_CURRENT_BUFFER and our - * globals. Here is the right place to do so, because - * this is the first action (other than possibly a - * back-up) that will match for the new input source. - */ - (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; - } - - /* Note that here we test for yy_c_buf_p "<=" to the position - * of the first EOB in the buffer, since yy_c_buf_p will - * already have been incremented past the NUL character - * (since all states make transitions on EOB to the - * end-of-buffer state). Contrast this with the test - * in input(). - */ - if ( (yy_c_buf_p) <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) - { /* This was really a NUL. */ - yy_state_type yy_next_state; - - (yy_c_buf_p) = (yytext_ptr) + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( ); - - /* Okay, we're now positioned to make the NUL - * transition. We couldn't have - * yy_get_previous_state() go ahead and do it - * for us because it doesn't know how to deal - * with the possibility of jamming (and we don't - * want to build jamming into it because then it - * will run more slowly). - */ - - yy_next_state = yy_try_NUL_trans( yy_current_state ); - - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - - if ( yy_next_state ) - { - /* Consume the NUL. */ - yy_cp = ++(yy_c_buf_p); - yy_current_state = yy_next_state; - goto yy_match; - } - - else - { - yy_cp = (yy_last_accepting_cpos); - yy_current_state = (yy_last_accepting_state); - goto yy_find_action; - } - } - - else switch ( yy_get_next_buffer( ) ) - { - case EOB_ACT_END_OF_FILE: - { - (yy_did_buffer_switch_on_eof) = 0; - - if ( yywrap( ) ) - { - /* Note: because we've taken care in - * yy_get_next_buffer() to have set up - * yytext, we can now set up - * yy_c_buf_p so that if some total - * hoser (like flex itself) wants to - * call the scanner after we return the - * YY_NULL, it'll still work - another - * YY_NULL will get returned. - */ - (yy_c_buf_p) = (yytext_ptr) + YY_MORE_ADJ; - - yy_act = YY_STATE_EOF(YY_START); - goto do_action; - } - - else - { - if ( ! (yy_did_buffer_switch_on_eof) ) - YY_NEW_FILE; - } - break; - } - - case EOB_ACT_CONTINUE_SCAN: - (yy_c_buf_p) = - (yytext_ptr) + yy_amount_of_matched_text; - - yy_current_state = yy_get_previous_state( ); - - yy_cp = (yy_c_buf_p); - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - goto yy_match; - - case EOB_ACT_LAST_MATCH: - (yy_c_buf_p) = - &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)]; - - yy_current_state = yy_get_previous_state( ); - - yy_cp = (yy_c_buf_p); - yy_bp = (yytext_ptr) + YY_MORE_ADJ; - goto yy_find_action; - } - break; - } - - default: - YY_FATAL_ERROR( - "fatal flex scanner internal error--no action found" ); - } /* end of action switch */ - } /* end of scanning one token */ -} /* end of yylex */ - -/* yy_get_next_buffer - try to read in a new buffer - * - * Returns a code representing an action: - * EOB_ACT_LAST_MATCH - - * EOB_ACT_CONTINUE_SCAN - continue scanning from current position - * EOB_ACT_END_OF_FILE - end of file - */ -static int yy_get_next_buffer (void) -{ - register char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; - register char *source = (yytext_ptr); - register int number_to_move, i; - int ret_val; - - if ( (yy_c_buf_p) > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] ) - YY_FATAL_ERROR( - "fatal flex scanner internal error--end of buffer missed" ); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) - { /* Don't try to fill the buffer, so this is an EOF. */ - if ( (yy_c_buf_p) - (yytext_ptr) - YY_MORE_ADJ == 1 ) - { - /* We matched a single character, the EOB, so - * treat this as a final EOF. - */ - return EOB_ACT_END_OF_FILE; - } - - else - { - /* We matched some text prior to the EOB, first - * process it. - */ - return EOB_ACT_LAST_MATCH; - } - } - - /* Try to read more data. */ - - /* First move last chars to start of buffer. */ - number_to_move = (int) ((yy_c_buf_p) - (yytext_ptr)) - 1; - - for ( i = 0; i < number_to_move; ++i ) - *(dest++) = *(source++); - - if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) - /* don't do the read, it's not guaranteed to return an EOF, - * just force an EOF - */ - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars) = 0; - - else - { - size_t num_to_read = - YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; - - while ( num_to_read <= 0 ) - { /* Not enough room in the buffer - grow it. */ - - /* just a shorter name for the current buffer */ - YY_BUFFER_STATE b = YY_CURRENT_BUFFER; - - int yy_c_buf_p_offset = - (int) ((yy_c_buf_p) - b->yy_ch_buf); - - if ( b->yy_is_our_buffer ) - { - int new_size = b->yy_buf_size * 2; - - if ( new_size <= 0 ) - b->yy_buf_size += b->yy_buf_size / 8; - else - b->yy_buf_size *= 2; - - b->yy_ch_buf = (char *) - /* Include room in for 2 EOB chars. */ - yyrealloc((void *) b->yy_ch_buf,b->yy_buf_size + 2 ); - } - else - /* Can't grow it, we don't own it. */ - b->yy_ch_buf = 0; - - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( - "fatal error - scanner input buffer overflow" ); - - (yy_c_buf_p) = &b->yy_ch_buf[yy_c_buf_p_offset]; - - num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - - number_to_move - 1; - - } - - if ( num_to_read > YY_READ_BUF_SIZE ) - num_to_read = YY_READ_BUF_SIZE; - - /* Read in more data. */ - YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), - (yy_n_chars), num_to_read ); - - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - if ( (yy_n_chars) == 0 ) - { - if ( number_to_move == YY_MORE_ADJ ) - { - ret_val = EOB_ACT_END_OF_FILE; - yyrestart(yyin ); - } - - else - { - ret_val = EOB_ACT_LAST_MATCH; - YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = - YY_BUFFER_EOF_PENDING; - } - } - - else - ret_val = EOB_ACT_CONTINUE_SCAN; - - (yy_n_chars) += number_to_move; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] = YY_END_OF_BUFFER_CHAR; - YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars) + 1] = YY_END_OF_BUFFER_CHAR; - - (yytext_ptr) = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; - - return ret_val; -} - -/* yy_get_previous_state - get the state just before the EOB char was reached */ - - static yy_state_type yy_get_previous_state (void) -{ - register yy_state_type yy_current_state; - register char *yy_cp; - - yy_current_state = (yy_start); - - for ( yy_cp = (yytext_ptr) + YY_MORE_ADJ; yy_cp < (yy_c_buf_p); ++yy_cp ) - { - register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 399 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - } - - return yy_current_state; -} - -/* yy_try_NUL_trans - try to make a transition on the NUL character - * - * synopsis - * next_state = yy_try_NUL_trans( current_state ); - */ - static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state ) -{ - register int yy_is_jam; - register char *yy_cp = (yy_c_buf_p); - - register YY_CHAR yy_c = 1; - if ( yy_accept[yy_current_state] ) - { - (yy_last_accepting_state) = yy_current_state; - (yy_last_accepting_cpos) = yy_cp; - } - while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) - { - yy_current_state = (int) yy_def[yy_current_state]; - if ( yy_current_state >= 399 ) - yy_c = yy_meta[(unsigned int) yy_c]; - } - yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; - yy_is_jam = (yy_current_state == 398); - - return yy_is_jam ? 0 : yy_current_state; -} - -#ifndef YY_NO_INPUT -#ifdef __cplusplus - static int yyinput (void) -#else - static int input (void) -#endif - -{ - int c; - - *(yy_c_buf_p) = (yy_hold_char); - - if ( *(yy_c_buf_p) == YY_END_OF_BUFFER_CHAR ) - { - /* yy_c_buf_p now points to the character we want to return. - * If this occurs *before* the EOB characters, then it's a - * valid NUL; if not, then we've hit the end of the buffer. - */ - if ( (yy_c_buf_p) < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[(yy_n_chars)] ) - /* This was really a NUL. */ - *(yy_c_buf_p) = '\0'; - - else - { /* need more input */ - int offset = (int)((yy_c_buf_p) - (yytext_ptr)); - ++(yy_c_buf_p); - - switch ( yy_get_next_buffer( ) ) - { - case EOB_ACT_LAST_MATCH: - /* This happens because yy_g_n_b() - * sees that we've accumulated a - * token and flags that we need to - * try matching the token before - * proceeding. But for input(), - * there's no matching to consider. - * So convert the EOB_ACT_LAST_MATCH - * to EOB_ACT_END_OF_FILE. - */ - - /* Reset buffer status. */ - yyrestart(yyin ); - - /*FALLTHROUGH*/ - - case EOB_ACT_END_OF_FILE: - { - if ( yywrap( ) ) - return EOF; - - if ( ! (yy_did_buffer_switch_on_eof) ) - YY_NEW_FILE; -#ifdef __cplusplus - return yyinput(); -#else - return input(); -#endif - } - - case EOB_ACT_CONTINUE_SCAN: - (yy_c_buf_p) = (yytext_ptr) + offset; - break; - } - } - } - - c = *(unsigned char *) (yy_c_buf_p); /* cast for 8-bit char's */ - *(yy_c_buf_p) = '\0'; /* preserve yytext */ - (yy_hold_char) = *++(yy_c_buf_p); - - return c; -} -#endif /* ifndef YY_NO_INPUT */ - -/** Immediately switch to a different input stream. - * @param input_file A readable stream. - * - * @note This function does not reset the start condition to @c INITIAL . - */ - void yyrestart (FILE * input_file ) -{ - - if ( ! YY_CURRENT_BUFFER ){ - yyensure_buffer_stack (); - YY_CURRENT_BUFFER_LVALUE = - yy_create_buffer(yyin,YY_BUF_SIZE ); - } - - yy_init_buffer(YY_CURRENT_BUFFER,input_file ); - yy_load_buffer_state( ); -} - -/** Switch to a different input buffer. - * @param new_buffer The new input buffer. - * - */ - void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer ) -{ - - /* TODO. We should be able to replace this entire function body - * with - * yypop_buffer_state(); - * yypush_buffer_state(new_buffer); - */ - yyensure_buffer_stack (); - if ( YY_CURRENT_BUFFER == new_buffer ) - return; - - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *(yy_c_buf_p) = (yy_hold_char); - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - YY_CURRENT_BUFFER_LVALUE = new_buffer; - yy_load_buffer_state( ); - - /* We don't actually know whether we did this switch during - * EOF (yywrap()) processing, but the only time this flag - * is looked at is after yywrap() is called, so it's safe - * to go ahead and always set it. - */ - (yy_did_buffer_switch_on_eof) = 1; -} - -static void yy_load_buffer_state (void) -{ - (yy_n_chars) = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; - (yytext_ptr) = (yy_c_buf_p) = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; - yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; - (yy_hold_char) = *(yy_c_buf_p); -} - -/** Allocate and initialize an input buffer state. - * @param file A readable stream. - * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. - * - * @return the allocated buffer state. - */ - YY_BUFFER_STATE yy_create_buffer (FILE * file, int size ) -{ - YY_BUFFER_STATE b; - - b = (YY_BUFFER_STATE) yyalloc(sizeof( struct yy_buffer_state ) ); - if ( ! b ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_buf_size = size; - - /* yy_ch_buf has to be 2 characters longer than the size given because - * we need to put in 2 end-of-buffer characters. - */ - b->yy_ch_buf = (char *) yyalloc(b->yy_buf_size + 2 ); - if ( ! b->yy_ch_buf ) - YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); - - b->yy_is_our_buffer = 1; - - yy_init_buffer(b,file ); - - return b; -} - -/** Destroy the buffer. - * @param b a buffer created with yy_create_buffer() - * - */ - void yy_delete_buffer (YY_BUFFER_STATE b ) -{ - - if ( ! b ) - return; - - if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ - YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; - - if ( b->yy_is_our_buffer ) - yyfree((void *) b->yy_ch_buf ); - - yyfree((void *) b ); -} - -/* Initializes or reinitializes a buffer. - * This function is sometimes called more than once on the same buffer, - * such as during a yyrestart() or at EOF. - */ - static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file ) - -{ - int oerrno = errno; - - yy_flush_buffer(b ); - - b->yy_input_file = file; - b->yy_fill_buffer = 1; - - /* If b is the current buffer, then yy_init_buffer was _probably_ - * called from yyrestart() or through yy_get_next_buffer. - * In that case, we don't want to reset the lineno or column. - */ - if (b != YY_CURRENT_BUFFER){ - b->yy_bs_lineno = 1; - b->yy_bs_column = 0; - } - - b->yy_is_interactive = 0; - - errno = oerrno; -} - -/** Discard all buffered characters. On the next scan, YY_INPUT will be called. - * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. - * - */ - void yy_flush_buffer (YY_BUFFER_STATE b ) -{ - if ( ! b ) - return; - - b->yy_n_chars = 0; - - /* We always need two end-of-buffer characters. The first causes - * a transition to the end-of-buffer state. The second causes - * a jam in that state. - */ - b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; - b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; - - b->yy_buf_pos = &b->yy_ch_buf[0]; - - b->yy_at_bol = 1; - b->yy_buffer_status = YY_BUFFER_NEW; - - if ( b == YY_CURRENT_BUFFER ) - yy_load_buffer_state( ); -} - -/** Pushes the new state onto the stack. The new state becomes - * the current state. This function will allocate the stack - * if necessary. - * @param new_buffer The new state. - * - */ -void yypush_buffer_state (YY_BUFFER_STATE new_buffer ) -{ - if (new_buffer == NULL) - return; - - yyensure_buffer_stack(); - - /* This block is copied from yy_switch_to_buffer. */ - if ( YY_CURRENT_BUFFER ) - { - /* Flush out information for old buffer. */ - *(yy_c_buf_p) = (yy_hold_char); - YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = (yy_c_buf_p); - YY_CURRENT_BUFFER_LVALUE->yy_n_chars = (yy_n_chars); - } - - /* Only push if top exists. Otherwise, replace top. */ - if (YY_CURRENT_BUFFER) - (yy_buffer_stack_top)++; - YY_CURRENT_BUFFER_LVALUE = new_buffer; - - /* copied from yy_switch_to_buffer. */ - yy_load_buffer_state( ); - (yy_did_buffer_switch_on_eof) = 1; -} - -/** Removes and deletes the top of the stack, if present. - * The next element becomes the new top. - * - */ -void yypop_buffer_state (void) -{ - if (!YY_CURRENT_BUFFER) - return; - - yy_delete_buffer(YY_CURRENT_BUFFER ); - YY_CURRENT_BUFFER_LVALUE = NULL; - if ((yy_buffer_stack_top) > 0) - --(yy_buffer_stack_top); - - if (YY_CURRENT_BUFFER) { - yy_load_buffer_state( ); - (yy_did_buffer_switch_on_eof) = 1; - } -} - -/* Allocates the stack if it does not exist. - * Guarantees space for at least one push. - */ -static void yyensure_buffer_stack (void) -{ - int num_to_alloc; - - if (!(yy_buffer_stack)) { - - /* First allocation is just for 2 elements, since we don't know if this - * scanner will even need a stack. We use 2 instead of 1 to avoid an - * immediate realloc on the next call. - */ - num_to_alloc = 1; - (yy_buffer_stack) = (struct yy_buffer_state**)yyalloc - (num_to_alloc * sizeof(struct yy_buffer_state*) - ); - - memset((yy_buffer_stack), 0, num_to_alloc * sizeof(struct yy_buffer_state*)); - - (yy_buffer_stack_max) = num_to_alloc; - (yy_buffer_stack_top) = 0; - return; - } - - if ((yy_buffer_stack_top) >= ((yy_buffer_stack_max)) - 1){ - - /* Increase the buffer to prepare for a possible push. */ - int grow_size = 8 /* arbitrary grow size */; - - num_to_alloc = (yy_buffer_stack_max) + grow_size; - (yy_buffer_stack) = (struct yy_buffer_state**)yyrealloc - ((yy_buffer_stack), - num_to_alloc * sizeof(struct yy_buffer_state*) - ); - - /* zero only the new slots.*/ - memset((yy_buffer_stack) + (yy_buffer_stack_max), 0, grow_size * sizeof(struct yy_buffer_state*)); - (yy_buffer_stack_max) = num_to_alloc; - } -} - -#ifndef YY_EXIT_FAILURE -#define YY_EXIT_FAILURE 2 -#endif - -static void yy_fatal_error (yyconst char* msg ) -{ - (void) fprintf( stderr, "%s\n", msg ); - exit( YY_EXIT_FAILURE ); -} - -/* Redefine yyless() so it works in section 3 code. */ - -#undef yyless -#define yyless(n) \ - do \ - { \ - /* Undo effects of setting up yytext. */ \ - int yyless_macro_arg = (n); \ - YY_LESS_LINENO(yyless_macro_arg);\ - yytext[yyleng] = (yy_hold_char); \ - (yy_c_buf_p) = yytext + yyless_macro_arg; \ - (yy_hold_char) = *(yy_c_buf_p); \ - *(yy_c_buf_p) = '\0'; \ - yyleng = yyless_macro_arg; \ - } \ - while ( 0 ) - -/* Accessor methods (get/set functions) to struct members. */ - -/** Get the current line number. - * - */ -int yyget_lineno (void) -{ - - return yylineno; -} - -/** Get the input stream. - * - */ -FILE *yyget_in (void) -{ - return yyin; -} - -/** Get the output stream. - * - */ -FILE *yyget_out (void) -{ - return yyout; -} - -/** Get the length of the current token. - * - */ -int yyget_leng (void) -{ - return yyleng; -} - -/** Get the current token. - * - */ - -char *yyget_text (void) -{ - return yytext; -} - -/** Set the current line number. - * @param line_number - * - */ -void yyset_lineno (int line_number ) -{ - - yylineno = line_number; -} - -/** Set the input stream. This does not discard the current - * input buffer. - * @param in_str A readable stream. - * - * @see yy_switch_to_buffer - */ -void yyset_in (FILE * in_str ) -{ - yyin = in_str ; -} - -void yyset_out (FILE * out_str ) -{ - yyout = out_str ; -} - -int yyget_debug (void) -{ - return yy_flex_debug; -} - -void yyset_debug (int bdebug ) -{ - yy_flex_debug = bdebug ; -} - -/* yylex_destroy is for both reentrant and non-reentrant scanners. */ -int yylex_destroy (void) -{ - - /* Pop the buffer stack, destroying each element. */ - while(YY_CURRENT_BUFFER){ - yy_delete_buffer(YY_CURRENT_BUFFER ); - YY_CURRENT_BUFFER_LVALUE = NULL; - yypop_buffer_state(); - } - - /* Destroy the stack itself. */ - yyfree((yy_buffer_stack) ); - (yy_buffer_stack) = NULL; - - return 0; -} - -/* - * Internal utility routines. - */ - -#ifndef yytext_ptr -static void yy_flex_strncpy (char* s1, yyconst char * s2, int n ) -{ - register int i; - for ( i = 0; i < n; ++i ) - s1[i] = s2[i]; -} -#endif - -#ifdef YY_NEED_STRLEN -static int yy_flex_strlen (yyconst char * s ) -{ - register int n; - for ( n = 0; s[n]; ++n ) - ; - - return n; -} -#endif - -void *yyalloc (yy_size_t size ) -{ - return (void *) malloc( size ); -} - -void *yyrealloc (void * ptr, yy_size_t size ) -{ - /* The cast to (char *) in the following accommodates both - * implementations that use char* generic pointers, and those - * that use void* generic pointers. It works with the latter - * because both ANSI C and C++ allow castless assignment from - * any pointer type to void*, and deal with argument conversions - * as though doing an assignment. - */ - return (void *) realloc( (char *) ptr, size ); -} - -void yyfree (void * ptr ) -{ - free( (char *) ptr ); /* see yyrealloc() for (char *) cast */ -} - -#define YYTABLES_NAME "yytables" - -#undef YY_NEW_FILE -#undef YY_FLUSH_BUFFER -#undef yy_set_bol -#undef yy_new_buffer -#undef yy_set_interactive -#undef yytext_ptr -#undef YY_DO_BEFORE_ACTION - -#ifdef YY_DECL_IS_OURS -#undef YY_DECL_IS_OURS -#undef YY_DECL -#endif -#line 648 "pars0lex.l" - - - diff --git a/storage/innobase/pars/make_bison.sh b/storage/innobase/pars/make_bison.sh deleted file mode 100755 index c11456230c4..00000000000 --- a/storage/innobase/pars/make_bison.sh +++ /dev/null @@ -1,10 +0,0 @@ -#!/bin/bash -# -# generate parser files from bison input files. - -set -eu - -bison -d pars0grm.y -mv pars0grm.tab.c pars0grm.c -mv pars0grm.tab.h pars0grm.h -cp pars0grm.h ../include diff --git a/storage/innobase/pars/make_flex.sh b/storage/innobase/pars/make_flex.sh deleted file mode 100755 index c015327bf8c..00000000000 --- a/storage/innobase/pars/make_flex.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -# -# generate lexer files from flex input files. - -set -eu - -TMPFILE=_flex_tmp.c -OUTFILE=lexyy.c - -flex -o $TMPFILE pars0lex.l - -# AIX needs its includes done in a certain order, so include "univ.i" first -# to be sure we get it right. -echo '#include "univ.i"' > $OUTFILE - -# flex assigns a pointer to an int in one place without a cast, resulting in -# a warning on Win64. this adds the cast. -sed -e 's/int offset = (yy_c_buf_p) - (yytext_ptr);/int offset = (int)((yy_c_buf_p) - (yytext_ptr));/;' < $TMPFILE >> $OUTFILE - -rm $TMPFILE diff --git a/storage/innobase/pars/pars0grm.c b/storage/innobase/pars/pars0grm.c deleted file mode 100644 index 2e39b05bada..00000000000 --- a/storage/innobase/pars/pars0grm.c +++ /dev/null @@ -1,2571 +0,0 @@ -/* A Bison parser, made by GNU Bison 1.875d. */ - -/* Skeleton parser for Yacc-like parsing with Bison, - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* As a special exception, when this file is copied by Bison into a - Bison output file, you may use that output file without restriction. - This special exception was added by the Free Software Foundation - in version 1.24 of Bison. */ - -/* Written by Richard Stallman by simplifying the original so called - ``semantic'' parser. */ - -/* All symbols defined below should begin with yy or YY, to avoid - infringing on user name space. This should be done even for local - variables, as they might otherwise be expanded by user macros. - There are some unavoidable exceptions within include files to - define necessary library symbols; they are noted "INFRINGES ON - USER NAME SPACE" below. */ - -/* Identify Bison output. */ -#define YYBISON 1 - -/* Skeleton name. */ -#define YYSKELETON_NAME "yacc.c" - -/* Pure parsers. */ -#define YYPURE 0 - -/* Using locations. */ -#define YYLSP_NEEDED 0 - - - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - PARS_INT_LIT = 258, - PARS_FLOAT_LIT = 259, - PARS_STR_LIT = 260, - PARS_FIXBINARY_LIT = 261, - PARS_BLOB_LIT = 262, - PARS_NULL_LIT = 263, - PARS_ID_TOKEN = 264, - PARS_AND_TOKEN = 265, - PARS_OR_TOKEN = 266, - PARS_NOT_TOKEN = 267, - PARS_GE_TOKEN = 268, - PARS_LE_TOKEN = 269, - PARS_NE_TOKEN = 270, - PARS_PROCEDURE_TOKEN = 271, - PARS_IN_TOKEN = 272, - PARS_OUT_TOKEN = 273, - PARS_BINARY_TOKEN = 274, - PARS_BLOB_TOKEN = 275, - PARS_INT_TOKEN = 276, - PARS_INTEGER_TOKEN = 277, - PARS_FLOAT_TOKEN = 278, - PARS_CHAR_TOKEN = 279, - PARS_IS_TOKEN = 280, - PARS_BEGIN_TOKEN = 281, - PARS_END_TOKEN = 282, - PARS_IF_TOKEN = 283, - PARS_THEN_TOKEN = 284, - PARS_ELSE_TOKEN = 285, - PARS_ELSIF_TOKEN = 286, - PARS_LOOP_TOKEN = 287, - PARS_WHILE_TOKEN = 288, - PARS_RETURN_TOKEN = 289, - PARS_SELECT_TOKEN = 290, - PARS_SUM_TOKEN = 291, - PARS_COUNT_TOKEN = 292, - PARS_DISTINCT_TOKEN = 293, - PARS_FROM_TOKEN = 294, - PARS_WHERE_TOKEN = 295, - PARS_FOR_TOKEN = 296, - PARS_DDOT_TOKEN = 297, - PARS_READ_TOKEN = 298, - PARS_ORDER_TOKEN = 299, - PARS_BY_TOKEN = 300, - PARS_ASC_TOKEN = 301, - PARS_DESC_TOKEN = 302, - PARS_INSERT_TOKEN = 303, - PARS_INTO_TOKEN = 304, - PARS_VALUES_TOKEN = 305, - PARS_UPDATE_TOKEN = 306, - PARS_SET_TOKEN = 307, - PARS_DELETE_TOKEN = 308, - PARS_CURRENT_TOKEN = 309, - PARS_OF_TOKEN = 310, - PARS_CREATE_TOKEN = 311, - PARS_TABLE_TOKEN = 312, - PARS_INDEX_TOKEN = 313, - PARS_UNIQUE_TOKEN = 314, - PARS_CLUSTERED_TOKEN = 315, - PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, - PARS_ON_TOKEN = 317, - PARS_ASSIGN_TOKEN = 318, - PARS_DECLARE_TOKEN = 319, - PARS_CURSOR_TOKEN = 320, - PARS_SQL_TOKEN = 321, - PARS_OPEN_TOKEN = 322, - PARS_FETCH_TOKEN = 323, - PARS_CLOSE_TOKEN = 324, - PARS_NOTFOUND_TOKEN = 325, - PARS_TO_CHAR_TOKEN = 326, - PARS_TO_NUMBER_TOKEN = 327, - PARS_TO_BINARY_TOKEN = 328, - PARS_BINARY_TO_NUMBER_TOKEN = 329, - PARS_SUBSTR_TOKEN = 330, - PARS_REPLSTR_TOKEN = 331, - PARS_CONCAT_TOKEN = 332, - PARS_INSTR_TOKEN = 333, - PARS_LENGTH_TOKEN = 334, - PARS_SYSDATE_TOKEN = 335, - PARS_PRINTF_TOKEN = 336, - PARS_ASSERT_TOKEN = 337, - PARS_RND_TOKEN = 338, - PARS_RND_STR_TOKEN = 339, - PARS_ROW_PRINTF_TOKEN = 340, - PARS_COMMIT_TOKEN = 341, - PARS_ROLLBACK_TOKEN = 342, - PARS_WORK_TOKEN = 343, - PARS_UNSIGNED_TOKEN = 344, - PARS_EXIT_TOKEN = 345, - PARS_FUNCTION_TOKEN = 346, - PARS_LOCK_TOKEN = 347, - PARS_SHARE_TOKEN = 348, - PARS_MODE_TOKEN = 349, - NEG = 350 - }; -#endif -#define PARS_INT_LIT 258 -#define PARS_FLOAT_LIT 259 -#define PARS_STR_LIT 260 -#define PARS_FIXBINARY_LIT 261 -#define PARS_BLOB_LIT 262 -#define PARS_NULL_LIT 263 -#define PARS_ID_TOKEN 264 -#define PARS_AND_TOKEN 265 -#define PARS_OR_TOKEN 266 -#define PARS_NOT_TOKEN 267 -#define PARS_GE_TOKEN 268 -#define PARS_LE_TOKEN 269 -#define PARS_NE_TOKEN 270 -#define PARS_PROCEDURE_TOKEN 271 -#define PARS_IN_TOKEN 272 -#define PARS_OUT_TOKEN 273 -#define PARS_BINARY_TOKEN 274 -#define PARS_BLOB_TOKEN 275 -#define PARS_INT_TOKEN 276 -#define PARS_INTEGER_TOKEN 277 -#define PARS_FLOAT_TOKEN 278 -#define PARS_CHAR_TOKEN 279 -#define PARS_IS_TOKEN 280 -#define PARS_BEGIN_TOKEN 281 -#define PARS_END_TOKEN 282 -#define PARS_IF_TOKEN 283 -#define PARS_THEN_TOKEN 284 -#define PARS_ELSE_TOKEN 285 -#define PARS_ELSIF_TOKEN 286 -#define PARS_LOOP_TOKEN 287 -#define PARS_WHILE_TOKEN 288 -#define PARS_RETURN_TOKEN 289 -#define PARS_SELECT_TOKEN 290 -#define PARS_SUM_TOKEN 291 -#define PARS_COUNT_TOKEN 292 -#define PARS_DISTINCT_TOKEN 293 -#define PARS_FROM_TOKEN 294 -#define PARS_WHERE_TOKEN 295 -#define PARS_FOR_TOKEN 296 -#define PARS_DDOT_TOKEN 297 -#define PARS_READ_TOKEN 298 -#define PARS_ORDER_TOKEN 299 -#define PARS_BY_TOKEN 300 -#define PARS_ASC_TOKEN 301 -#define PARS_DESC_TOKEN 302 -#define PARS_INSERT_TOKEN 303 -#define PARS_INTO_TOKEN 304 -#define PARS_VALUES_TOKEN 305 -#define PARS_UPDATE_TOKEN 306 -#define PARS_SET_TOKEN 307 -#define PARS_DELETE_TOKEN 308 -#define PARS_CURRENT_TOKEN 309 -#define PARS_OF_TOKEN 310 -#define PARS_CREATE_TOKEN 311 -#define PARS_TABLE_TOKEN 312 -#define PARS_INDEX_TOKEN 313 -#define PARS_UNIQUE_TOKEN 314 -#define PARS_CLUSTERED_TOKEN 315 -#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 -#define PARS_ON_TOKEN 317 -#define PARS_ASSIGN_TOKEN 318 -#define PARS_DECLARE_TOKEN 319 -#define PARS_CURSOR_TOKEN 320 -#define PARS_SQL_TOKEN 321 -#define PARS_OPEN_TOKEN 322 -#define PARS_FETCH_TOKEN 323 -#define PARS_CLOSE_TOKEN 324 -#define PARS_NOTFOUND_TOKEN 325 -#define PARS_TO_CHAR_TOKEN 326 -#define PARS_TO_NUMBER_TOKEN 327 -#define PARS_TO_BINARY_TOKEN 328 -#define PARS_BINARY_TO_NUMBER_TOKEN 329 -#define PARS_SUBSTR_TOKEN 330 -#define PARS_REPLSTR_TOKEN 331 -#define PARS_CONCAT_TOKEN 332 -#define PARS_INSTR_TOKEN 333 -#define PARS_LENGTH_TOKEN 334 -#define PARS_SYSDATE_TOKEN 335 -#define PARS_PRINTF_TOKEN 336 -#define PARS_ASSERT_TOKEN 337 -#define PARS_RND_TOKEN 338 -#define PARS_RND_STR_TOKEN 339 -#define PARS_ROW_PRINTF_TOKEN 340 -#define PARS_COMMIT_TOKEN 341 -#define PARS_ROLLBACK_TOKEN 342 -#define PARS_WORK_TOKEN 343 -#define PARS_UNSIGNED_TOKEN 344 -#define PARS_EXIT_TOKEN 345 -#define PARS_FUNCTION_TOKEN 346 -#define PARS_LOCK_TOKEN 347 -#define PARS_SHARE_TOKEN 348 -#define PARS_MODE_TOKEN 349 -#define NEG 350 - - - - -/* Copy the first part of user declarations. */ -#line 13 "pars0grm.y" - -/* The value of the semantic attribute is a pointer to a query tree node -que_node_t */ - -#include "univ.i" -#include <math.h> /* Can't be before univ.i */ -#include "pars0pars.h" -#include "mem0mem.h" -#include "que0types.h" -#include "que0que.h" -#include "row0sel.h" - -#define YYSTYPE que_node_t* - -/* #define __STDC__ */ - -int -yylex(void); - - -/* Enabling traces. */ -#ifndef YYDEBUG -# define YYDEBUG 0 -#endif - -/* Enabling verbose error messages. */ -#ifdef YYERROR_VERBOSE -# undef YYERROR_VERBOSE -# define YYERROR_VERBOSE 1 -#else -# define YYERROR_VERBOSE 0 -#endif - -#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - - - -/* Copy the second part of user declarations. */ - - -/* Line 214 of yacc.c. */ -#line 297 "pars0grm.tab.c" - -#if ! defined (yyoverflow) || YYERROR_VERBOSE - -# ifndef YYFREE -# define YYFREE free -# endif -# ifndef YYMALLOC -# define YYMALLOC malloc -# endif - -/* The parser invokes alloca or malloc; define the necessary symbols. */ - -# ifdef YYSTACK_USE_ALLOCA -# if YYSTACK_USE_ALLOCA -# define YYSTACK_ALLOC alloca -# endif -# else -# if defined (alloca) || defined (_ALLOCA_H) -# define YYSTACK_ALLOC alloca -# else -# ifdef __GNUC__ -# define YYSTACK_ALLOC __builtin_alloca -# endif -# endif -# endif - -# ifdef YYSTACK_ALLOC - /* Pacify GCC's `empty if-body' warning. */ -# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) -# else -# if defined (__STDC__) || defined (__cplusplus) -# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# endif -# define YYSTACK_ALLOC YYMALLOC -# define YYSTACK_FREE YYFREE -# endif -#endif /* ! defined (yyoverflow) || YYERROR_VERBOSE */ - - -#if (! defined (yyoverflow) \ - && (! defined (__cplusplus) \ - || (defined (YYSTYPE_IS_TRIVIAL) && YYSTYPE_IS_TRIVIAL))) - -/* A type that is properly aligned for any stack member. */ -union yyalloc -{ - short int yyss; - YYSTYPE yyvs; - }; - -/* The size of the maximum gap between one aligned stack and the next. */ -# define YYSTACK_GAP_MAXIMUM (sizeof (union yyalloc) - 1) - -/* The size of an array large to enough to hold all stacks, each with - N elements. */ -# define YYSTACK_BYTES(N) \ - ((N) * (sizeof (short int) + sizeof (YYSTYPE)) \ - + YYSTACK_GAP_MAXIMUM) - -/* Copy COUNT objects from FROM to TO. The source and destination do - not overlap. */ -# ifndef YYCOPY -# if defined (__GNUC__) && 1 < __GNUC__ -# define YYCOPY(To, From, Count) \ - __builtin_memcpy (To, From, (Count) * sizeof (*(From))) -# else -# define YYCOPY(To, From, Count) \ - do \ - { \ - register YYSIZE_T yyi; \ - for (yyi = 0; yyi < (Count); yyi++) \ - (To)[yyi] = (From)[yyi]; \ - } \ - while (0) -# endif -# endif - -/* Relocate STACK from its old location to the new one. The - local variables YYSIZE and YYSTACKSIZE give the old and new number of - elements in the stack, and YYPTR gives the new location of the - stack. Advance YYPTR to a properly aligned location for the next - stack. */ -# define YYSTACK_RELOCATE(Stack) \ - do \ - { \ - YYSIZE_T yynewbytes; \ - YYCOPY (&yyptr->Stack, Stack, yysize); \ - Stack = &yyptr->Stack; \ - yynewbytes = yystacksize * sizeof (*Stack) + YYSTACK_GAP_MAXIMUM; \ - yyptr += yynewbytes / sizeof (*yyptr); \ - } \ - while (0) - -#endif - -#if defined (__STDC__) || defined (__cplusplus) - typedef signed char yysigned_char; -#else - typedef short int yysigned_char; -#endif - -/* YYFINAL -- State number of the termination state. */ -#define YYFINAL 5 -/* YYLAST -- Last index in YYTABLE. */ -#define YYLAST 752 - -/* YYNTOKENS -- Number of terminals. */ -#define YYNTOKENS 111 -/* YYNNTS -- Number of nonterminals. */ -#define YYNNTS 70 -/* YYNRULES -- Number of rules. */ -#define YYNRULES 175 -/* YYNRULES -- Number of states. */ -#define YYNSTATES 339 - -/* YYTRANSLATE(YYLEX) -- Bison symbol number corresponding to YYLEX. */ -#define YYUNDEFTOK 2 -#define YYMAXUTOK 350 - -#define YYTRANSLATE(YYX) \ - ((unsigned int) (YYX) <= YYMAXUTOK ? yytranslate[YYX] : YYUNDEFTOK) - -/* YYTRANSLATE[YYLEX] -- Bison symbol number corresponding to YYLEX. */ -static const unsigned char yytranslate[] = -{ - 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 103, 2, 2, - 105, 106, 100, 99, 108, 98, 2, 101, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 104, - 96, 95, 97, 107, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 109, 2, 110, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, - 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, - 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, - 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, - 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, - 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, - 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, - 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, - 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, - 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, - 102 -}; - -#if YYDEBUG -/* YYPRHS[YYN] -- Index of the first RHS symbol of rule number YYN in - YYRHS. */ -static const unsigned short int yyprhs[] = -{ - 0, 0, 3, 6, 8, 11, 14, 17, 20, 23, - 26, 29, 32, 35, 38, 41, 44, 47, 50, 53, - 56, 59, 62, 65, 68, 71, 73, 76, 78, 83, - 85, 87, 89, 91, 93, 95, 97, 101, 105, 109, - 113, 116, 120, 124, 128, 132, 136, 140, 144, 148, - 152, 155, 159, 163, 165, 167, 169, 171, 173, 175, - 177, 179, 181, 183, 185, 186, 188, 192, 199, 204, - 206, 208, 210, 214, 216, 220, 221, 223, 227, 228, - 230, 234, 236, 241, 247, 252, 253, 255, 259, 261, - 265, 267, 268, 271, 272, 275, 276, 281, 282, 284, - 286, 287, 292, 301, 305, 311, 314, 318, 320, 324, - 329, 334, 337, 340, 344, 347, 350, 353, 357, 362, - 364, 367, 368, 371, 373, 381, 388, 399, 401, 403, - 406, 409, 414, 419, 425, 427, 431, 432, 436, 437, - 439, 440, 443, 444, 446, 454, 456, 460, 461, 463, - 464, 466, 477, 480, 483, 485, 487, 489, 491, 493, - 497, 501, 502, 504, 508, 512, 513, 515, 518, 525, - 530, 532, 534, 535, 537, 540 -}; - -/* YYRHS -- A `-1'-separated list of the rules' RHS. */ -static const short int yyrhs[] = -{ - 112, 0, -1, 180, 104, -1, 118, -1, 119, 104, - -1, 151, 104, -1, 152, 104, -1, 153, 104, -1, - 150, 104, -1, 154, 104, -1, 146, 104, -1, 133, - 104, -1, 135, 104, -1, 145, 104, -1, 143, 104, - -1, 144, 104, -1, 140, 104, -1, 141, 104, -1, - 155, 104, -1, 157, 104, -1, 156, 104, -1, 169, - 104, -1, 170, 104, -1, 164, 104, -1, 168, 104, - -1, 113, -1, 114, 113, -1, 9, -1, 116, 105, - 124, 106, -1, 3, -1, 4, -1, 5, -1, 6, - -1, 7, -1, 8, -1, 66, -1, 115, 99, 115, - -1, 115, 98, 115, -1, 115, 100, 115, -1, 115, - 101, 115, -1, 98, 115, -1, 105, 115, 106, -1, - 115, 95, 115, -1, 115, 96, 115, -1, 115, 97, - 115, -1, 115, 13, 115, -1, 115, 14, 115, -1, - 115, 15, 115, -1, 115, 10, 115, -1, 115, 11, - 115, -1, 12, 115, -1, 9, 103, 70, -1, 66, - 103, 70, -1, 71, -1, 72, -1, 73, -1, 74, - -1, 75, -1, 77, -1, 78, -1, 79, -1, 80, - -1, 83, -1, 84, -1, -1, 107, -1, 117, 108, - 107, -1, 109, 9, 105, 117, 106, 110, -1, 120, - 105, 124, 106, -1, 76, -1, 81, -1, 82, -1, - 9, 105, 106, -1, 9, -1, 122, 108, 9, -1, - -1, 9, -1, 123, 108, 9, -1, -1, 115, -1, - 124, 108, 115, -1, 115, -1, 37, 105, 100, 106, - -1, 37, 105, 38, 9, 106, -1, 36, 105, 115, - 106, -1, -1, 125, -1, 126, 108, 125, -1, 100, - -1, 126, 49, 123, -1, 126, -1, -1, 40, 115, - -1, -1, 41, 51, -1, -1, 92, 17, 93, 94, - -1, -1, 46, -1, 47, -1, -1, 44, 45, 9, - 131, -1, 35, 127, 39, 122, 128, 129, 130, 132, - -1, 48, 49, 9, -1, 134, 50, 105, 124, 106, - -1, 134, 133, -1, 9, 95, 115, -1, 136, -1, - 137, 108, 136, -1, 40, 54, 55, 9, -1, 51, - 9, 52, 137, -1, 139, 128, -1, 139, 138, -1, - 53, 39, 9, -1, 142, 128, -1, 142, 138, -1, - 85, 133, -1, 9, 63, 115, -1, 31, 115, 29, - 114, -1, 147, -1, 148, 147, -1, -1, 30, 114, - -1, 148, -1, 28, 115, 29, 114, 149, 27, 28, - -1, 33, 115, 32, 114, 27, 32, -1, 41, 9, - 17, 115, 42, 115, 32, 114, 27, 32, -1, 90, - -1, 34, -1, 67, 9, -1, 69, 9, -1, 68, - 9, 49, 123, -1, 68, 9, 49, 121, -1, 9, - 171, 160, 161, 162, -1, 158, -1, 159, 108, 158, - -1, -1, 105, 3, 106, -1, -1, 89, -1, -1, - 12, 8, -1, -1, 61, -1, 56, 57, 9, 105, - 159, 106, 163, -1, 9, -1, 165, 108, 9, -1, - -1, 59, -1, -1, 60, -1, 56, 166, 167, 58, - 9, 62, 9, 105, 165, 106, -1, 86, 88, -1, - 87, 88, -1, 21, -1, 22, -1, 24, -1, 19, - -1, 20, -1, 9, 17, 171, -1, 9, 18, 171, - -1, -1, 172, -1, 173, 108, 172, -1, 9, 171, - 104, -1, -1, 174, -1, 175, 174, -1, 64, 65, - 9, 25, 133, 104, -1, 64, 91, 9, 104, -1, - 176, -1, 177, -1, -1, 178, -1, 179, 178, -1, - 16, 9, 105, 173, 106, 25, 175, 179, 26, 114, - 27, -1 -}; - -/* YYRLINE[YYN] -- source line where rule number YYN was defined. */ -static const unsigned short int yyrline[] = -{ - 0, 138, 138, 141, 142, 143, 144, 145, 146, 147, - 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, - 158, 159, 160, 161, 162, 166, 167, 172, 173, 175, - 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, - 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, - 196, 197, 199, 204, 205, 206, 207, 209, 210, 211, - 212, 213, 214, 215, 218, 220, 221, 225, 230, 235, - 236, 237, 241, 245, 246, 251, 252, 253, 258, 259, - 260, 264, 265, 270, 276, 283, 284, 285, 290, 292, - 294, 298, 299, 303, 304, 309, 310, 315, 316, 317, - 321, 322, 327, 337, 342, 344, 349, 353, 354, 359, - 365, 372, 377, 382, 388, 393, 398, 403, 408, 414, - 415, 420, 421, 423, 427, 434, 440, 448, 452, 456, - 462, 468, 470, 475, 480, 481, 486, 487, 492, 493, - 499, 500, 506, 507, 513, 519, 520, 525, 526, 530, - 531, 535, 543, 548, 553, 554, 555, 556, 557, 561, - 564, 570, 571, 572, 577, 581, 583, 584, 588, 594, - 599, 600, 603, 605, 606, 610 -}; -#endif - -#if YYDEBUG || YYERROR_VERBOSE -/* YYTNME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. - First, the terminals, then, starting at YYNTOKENS, nonterminals. */ -static const char *const yytname[] = -{ - "$end", "error", "$undefined", "PARS_INT_LIT", "PARS_FLOAT_LIT", - "PARS_STR_LIT", "PARS_FIXBINARY_LIT", "PARS_BLOB_LIT", "PARS_NULL_LIT", - "PARS_ID_TOKEN", "PARS_AND_TOKEN", "PARS_OR_TOKEN", "PARS_NOT_TOKEN", - "PARS_GE_TOKEN", "PARS_LE_TOKEN", "PARS_NE_TOKEN", - "PARS_PROCEDURE_TOKEN", "PARS_IN_TOKEN", "PARS_OUT_TOKEN", - "PARS_BINARY_TOKEN", "PARS_BLOB_TOKEN", "PARS_INT_TOKEN", - "PARS_INTEGER_TOKEN", "PARS_FLOAT_TOKEN", "PARS_CHAR_TOKEN", - "PARS_IS_TOKEN", "PARS_BEGIN_TOKEN", "PARS_END_TOKEN", "PARS_IF_TOKEN", - "PARS_THEN_TOKEN", "PARS_ELSE_TOKEN", "PARS_ELSIF_TOKEN", - "PARS_LOOP_TOKEN", "PARS_WHILE_TOKEN", "PARS_RETURN_TOKEN", - "PARS_SELECT_TOKEN", "PARS_SUM_TOKEN", "PARS_COUNT_TOKEN", - "PARS_DISTINCT_TOKEN", "PARS_FROM_TOKEN", "PARS_WHERE_TOKEN", - "PARS_FOR_TOKEN", "PARS_DDOT_TOKEN", "PARS_READ_TOKEN", - "PARS_ORDER_TOKEN", "PARS_BY_TOKEN", "PARS_ASC_TOKEN", "PARS_DESC_TOKEN", - "PARS_INSERT_TOKEN", "PARS_INTO_TOKEN", "PARS_VALUES_TOKEN", - "PARS_UPDATE_TOKEN", "PARS_SET_TOKEN", "PARS_DELETE_TOKEN", - "PARS_CURRENT_TOKEN", "PARS_OF_TOKEN", "PARS_CREATE_TOKEN", - "PARS_TABLE_TOKEN", "PARS_INDEX_TOKEN", "PARS_UNIQUE_TOKEN", - "PARS_CLUSTERED_TOKEN", "PARS_DOES_NOT_FIT_IN_MEM_TOKEN", - "PARS_ON_TOKEN", "PARS_ASSIGN_TOKEN", "PARS_DECLARE_TOKEN", - "PARS_CURSOR_TOKEN", "PARS_SQL_TOKEN", "PARS_OPEN_TOKEN", - "PARS_FETCH_TOKEN", "PARS_CLOSE_TOKEN", "PARS_NOTFOUND_TOKEN", - "PARS_TO_CHAR_TOKEN", "PARS_TO_NUMBER_TOKEN", "PARS_TO_BINARY_TOKEN", - "PARS_BINARY_TO_NUMBER_TOKEN", "PARS_SUBSTR_TOKEN", "PARS_REPLSTR_TOKEN", - "PARS_CONCAT_TOKEN", "PARS_INSTR_TOKEN", "PARS_LENGTH_TOKEN", - "PARS_SYSDATE_TOKEN", "PARS_PRINTF_TOKEN", "PARS_ASSERT_TOKEN", - "PARS_RND_TOKEN", "PARS_RND_STR_TOKEN", "PARS_ROW_PRINTF_TOKEN", - "PARS_COMMIT_TOKEN", "PARS_ROLLBACK_TOKEN", "PARS_WORK_TOKEN", - "PARS_UNSIGNED_TOKEN", "PARS_EXIT_TOKEN", "PARS_FUNCTION_TOKEN", - "PARS_LOCK_TOKEN", "PARS_SHARE_TOKEN", "PARS_MODE_TOKEN", "'='", "'<'", - "'>'", "'-'", "'+'", "'*'", "'/'", "NEG", "'%'", "';'", "'('", "')'", - "'?'", "','", "'{'", "'}'", "$accept", "top_statement", "statement", - "statement_list", "exp", "function_name", "question_mark_list", - "stored_procedure_call", "predefined_procedure_call", - "predefined_procedure_name", "user_function_call", "table_list", - "variable_list", "exp_list", "select_item", "select_item_list", - "select_list", "search_condition", "for_update_clause", - "lock_shared_clause", "order_direction", "order_by_clause", - "select_statement", "insert_statement_start", "insert_statement", - "column_assignment", "column_assignment_list", "cursor_positioned", - "update_statement_start", "update_statement_searched", - "update_statement_positioned", "delete_statement_start", - "delete_statement_searched", "delete_statement_positioned", - "row_printf_statement", "assignment_statement", "elsif_element", - "elsif_list", "else_part", "if_statement", "while_statement", - "for_statement", "exit_statement", "return_statement", - "open_cursor_statement", "close_cursor_statement", "fetch_statement", - "column_def", "column_def_list", "opt_column_len", "opt_unsigned", - "opt_not_null", "not_fit_in_memory", "create_table", "column_list", - "unique_def", "clustered_def", "create_index", "commit_statement", - "rollback_statement", "type_name", "parameter_declaration", - "parameter_declaration_list", "variable_declaration", - "variable_declaration_list", "cursor_declaration", - "function_declaration", "declaration", "declaration_list", - "procedure_definition", 0 -}; -#endif - -# ifdef YYPRINT -/* YYTOKNUM[YYLEX-NUM] -- Internal token number corresponding to - token YYLEX-NUM. */ -static const unsigned short int yytoknum[] = -{ - 0, 256, 257, 258, 259, 260, 261, 262, 263, 264, - 265, 266, 267, 268, 269, 270, 271, 272, 273, 274, - 275, 276, 277, 278, 279, 280, 281, 282, 283, 284, - 285, 286, 287, 288, 289, 290, 291, 292, 293, 294, - 295, 296, 297, 298, 299, 300, 301, 302, 303, 304, - 305, 306, 307, 308, 309, 310, 311, 312, 313, 314, - 315, 316, 317, 318, 319, 320, 321, 322, 323, 324, - 325, 326, 327, 328, 329, 330, 331, 332, 333, 334, - 335, 336, 337, 338, 339, 340, 341, 342, 343, 344, - 345, 346, 347, 348, 349, 61, 60, 62, 45, 43, - 42, 47, 350, 37, 59, 40, 41, 63, 44, 123, - 125 -}; -# endif - -/* YYR1[YYN] -- Symbol number of symbol that rule YYN derives. */ -static const unsigned char yyr1[] = -{ - 0, 111, 112, 113, 113, 113, 113, 113, 113, 113, - 113, 113, 113, 113, 113, 113, 113, 113, 113, 113, - 113, 113, 113, 113, 113, 114, 114, 115, 115, 115, - 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, - 115, 115, 115, 115, 115, 115, 115, 115, 115, 115, - 115, 115, 115, 116, 116, 116, 116, 116, 116, 116, - 116, 116, 116, 116, 117, 117, 117, 118, 119, 120, - 120, 120, 121, 122, 122, 123, 123, 123, 124, 124, - 124, 125, 125, 125, 125, 126, 126, 126, 127, 127, - 127, 128, 128, 129, 129, 130, 130, 131, 131, 131, - 132, 132, 133, 134, 135, 135, 136, 137, 137, 138, - 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, - 148, 149, 149, 149, 150, 151, 152, 153, 154, 155, - 156, 157, 157, 158, 159, 159, 160, 160, 161, 161, - 162, 162, 163, 163, 164, 165, 165, 166, 166, 167, - 167, 168, 169, 170, 171, 171, 171, 171, 171, 172, - 172, 173, 173, 173, 174, 175, 175, 175, 176, 177, - 178, 178, 179, 179, 179, 180 -}; - -/* YYR2[YYN] -- Number of symbols composing right hand side of rule YYN. */ -static const unsigned char yyr2[] = -{ - 0, 2, 2, 1, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, - 2, 2, 2, 2, 2, 1, 2, 1, 4, 1, - 1, 1, 1, 1, 1, 1, 3, 3, 3, 3, - 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, - 2, 3, 3, 1, 1, 1, 1, 1, 1, 1, - 1, 1, 1, 1, 0, 1, 3, 6, 4, 1, - 1, 1, 3, 1, 3, 0, 1, 3, 0, 1, - 3, 1, 4, 5, 4, 0, 1, 3, 1, 3, - 1, 0, 2, 0, 2, 0, 4, 0, 1, 1, - 0, 4, 8, 3, 5, 2, 3, 1, 3, 4, - 4, 2, 2, 3, 2, 2, 2, 3, 4, 1, - 2, 0, 2, 1, 7, 6, 10, 1, 1, 2, - 2, 4, 4, 5, 1, 3, 0, 3, 0, 1, - 0, 2, 0, 1, 7, 1, 3, 0, 1, 0, - 1, 10, 2, 2, 1, 1, 1, 1, 1, 3, - 3, 0, 1, 3, 3, 0, 1, 2, 6, 4, - 1, 1, 0, 1, 2, 11 -}; - -/* YYDEFACT[STATE-NAME] -- Default rule to reduce with in state - STATE-NUM when YYTABLE doesn't specify something else to do. Zero - means the default is an error. */ -static const unsigned char yydefact[] = -{ - 0, 0, 0, 0, 0, 1, 2, 161, 0, 162, - 0, 0, 0, 0, 0, 157, 158, 154, 155, 156, - 159, 160, 165, 163, 0, 166, 172, 0, 0, 167, - 170, 171, 173, 0, 164, 0, 0, 0, 174, 0, - 0, 0, 0, 0, 128, 85, 0, 0, 0, 0, - 147, 0, 0, 0, 69, 70, 71, 0, 0, 0, - 127, 0, 25, 0, 3, 0, 0, 0, 0, 0, - 91, 0, 0, 91, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 169, 0, 29, 30, 31, 32, 33, 34, 27, - 0, 35, 53, 54, 55, 56, 57, 58, 59, 60, - 61, 62, 63, 0, 0, 0, 0, 0, 0, 0, - 88, 81, 86, 90, 0, 0, 0, 0, 0, 0, - 148, 149, 129, 0, 130, 116, 152, 153, 0, 175, - 26, 4, 78, 11, 0, 105, 12, 0, 111, 112, - 16, 17, 114, 115, 14, 15, 13, 10, 8, 5, - 6, 7, 9, 18, 20, 19, 23, 24, 21, 22, - 0, 117, 0, 50, 0, 40, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 78, 0, 0, 0, 75, 0, 0, 0, 103, 0, - 113, 0, 150, 0, 75, 64, 79, 0, 78, 0, - 92, 168, 51, 52, 41, 48, 49, 45, 46, 47, - 121, 42, 43, 44, 37, 36, 38, 39, 0, 0, - 0, 0, 0, 76, 89, 87, 73, 91, 0, 0, - 107, 110, 0, 0, 76, 132, 131, 65, 0, 68, - 0, 0, 0, 0, 0, 119, 123, 0, 28, 0, - 84, 0, 82, 0, 0, 0, 93, 0, 0, 0, - 0, 134, 0, 0, 0, 0, 0, 80, 104, 109, - 122, 0, 120, 0, 125, 83, 77, 74, 0, 95, - 0, 106, 108, 136, 142, 0, 0, 72, 67, 66, - 0, 124, 94, 0, 100, 0, 0, 138, 143, 144, - 135, 0, 118, 0, 0, 102, 0, 0, 139, 140, - 0, 0, 0, 0, 137, 0, 133, 145, 0, 96, - 97, 126, 141, 151, 0, 98, 99, 101, 146 -}; - -/* YYDEFGOTO[NTERM-NUM]. */ -static const short int yydefgoto[] = -{ - -1, 2, 62, 63, 206, 116, 248, 64, 65, 66, - 245, 237, 234, 207, 122, 123, 124, 148, 289, 304, - 337, 315, 67, 68, 69, 240, 241, 149, 70, 71, - 72, 73, 74, 75, 76, 77, 255, 256, 257, 78, - 79, 80, 81, 82, 83, 84, 85, 271, 272, 307, - 319, 326, 309, 86, 328, 131, 203, 87, 88, 89, - 20, 9, 10, 25, 26, 30, 31, 32, 33, 3 -}; - -/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing - STATE-NUM. */ -#define YYPACT_NINF -177 -static const short int yypact[] = -{ - 28, 38, 54, -46, -29, -177, -177, 56, 50, -177, - -75, 8, 8, 46, 56, -177, -177, -177, -177, -177, - -177, -177, 63, -177, 8, -177, 2, -26, -51, -177, - -177, -177, -177, -13, -177, 71, 72, 587, -177, 57, - -21, 26, 272, 272, -177, 13, 91, 55, 96, 67, - -22, 99, 100, 103, -177, -177, -177, 75, 29, 35, - -177, 116, -177, 396, -177, 22, 23, 27, -9, 30, - 87, 31, 32, 87, 47, 49, 52, 58, 59, 60, - 61, 62, 65, 66, 74, 77, 78, 86, 89, 102, - 75, -177, 272, -177, -177, -177, -177, -177, -177, 39, - 272, 51, -177, -177, -177, -177, -177, -177, -177, -177, - -177, -177, -177, 272, 272, 361, 25, 489, 45, 90, - -177, 651, -177, -39, 93, 142, 124, 108, 152, 170, - -177, 131, -177, 143, -177, -177, -177, -177, 98, -177, - -177, -177, 272, -177, 110, -177, -177, 256, -177, -177, - -177, -177, -177, -177, -177, -177, -177, -177, -177, -177, - -177, -177, -177, -177, -177, -177, -177, -177, -177, -177, - 112, 651, 137, 101, 147, 204, 88, 272, 272, 272, - 272, 272, 587, 272, 272, 272, 272, 272, 272, 272, - 272, 587, 272, -30, 211, 168, 212, 272, -177, 213, - -177, 118, -177, 167, 217, 122, 651, -63, 272, 175, - 651, -177, -177, -177, -177, 101, 101, 21, 21, 651, - 332, 21, 21, 21, -6, -6, 204, 204, -60, 460, - 198, 222, 126, -177, 125, -177, -177, -33, 584, 140, - -177, 128, 228, 229, 139, -177, 125, -177, -53, -177, - 272, -49, 240, 587, 272, -177, 224, 226, -177, 225, - -177, 150, -177, 258, 272, 260, 230, 272, 272, 213, - 8, -177, -45, 208, 166, 164, 176, 651, -177, -177, - 587, 631, -177, 254, -177, -177, -177, -177, 234, 194, - 638, 651, -177, 182, 227, 228, 280, -177, -177, -177, - 587, -177, -177, 273, 247, 587, 289, 214, -177, -177, - -177, 195, 587, 209, 261, -177, 524, 199, -177, 295, - 292, 215, 299, 279, -177, 304, -177, -177, -44, -177, - -8, -177, -177, -177, 305, -177, -177, -177, -177 -}; - -/* YYPGOTO[NTERM-NUM]. */ -static const short int yypgoto[] = -{ - -177, -177, -62, -176, -40, -177, -177, -177, -177, -177, - -177, -177, 109, -166, 120, -177, -177, -69, -177, -177, - -177, -177, -34, -177, -177, 48, -177, 243, -177, -177, - -177, -177, -177, -177, -177, -177, 64, -177, -177, -177, - -177, -177, -177, -177, -177, -177, -177, 24, -177, -177, - -177, -177, -177, -177, -177, -177, -177, -177, -177, -177, - -12, 307, -177, 297, -177, -177, -177, 285, -177, -177 -}; - -/* YYTABLE[YYPACT[STATE-NUM]]. What to do in state STATE-NUM. If - positive, shift that token. If negative, reduce the rule which - number is the opposite. If zero, do what YYDEFACT says. - If YYTABLE_NINF, syntax error. */ -#define YYTABLE_NINF -1 -static const unsigned short int yytable[] = -{ - 21, 140, 115, 117, 152, 121, 220, 264, 231, 181, - 194, 24, 27, 37, 35, 229, 93, 94, 95, 96, - 97, 98, 99, 135, 228, 100, 45, 15, 16, 17, - 18, 13, 19, 14, 145, 129, 181, 130, 335, 336, - 36, 144, 251, 249, 1, 250, 258, 4, 250, 118, - 119, 28, 171, 275, 5, 276, 170, 278, 6, 250, - 173, 294, 333, 295, 334, 8, 28, 11, 12, 195, - 232, 22, 24, 175, 176, 265, 7, 280, 34, 101, - 39, 40, 90, 91, 102, 103, 104, 105, 106, 92, - 107, 108, 109, 110, 188, 189, 111, 112, 177, 178, - 125, 179, 180, 181, 126, 127, 128, 210, 132, 133, - 45, 113, 134, 120, 179, 180, 181, 136, 114, 186, - 187, 188, 189, 137, 312, 138, 141, 147, 142, 316, - 190, 143, 196, 198, 146, 150, 151, 215, 216, 217, - 218, 219, 172, 221, 222, 223, 224, 225, 226, 227, - 192, 154, 230, 155, 174, 121, 156, 238, 140, 197, - 199, 200, 157, 158, 159, 160, 161, 140, 266, 162, - 163, 93, 94, 95, 96, 97, 98, 99, 164, 201, - 100, 165, 166, 183, 184, 185, 186, 187, 188, 189, - 167, 202, 204, 168, 214, 193, 183, 184, 185, 186, - 187, 188, 189, 205, 118, 119, 169, 212, 177, 178, - 277, 179, 180, 181, 281, 208, 211, 213, 140, 181, - 233, 236, 239, 242, 210, 243, 244, 290, 291, 247, - 252, 261, 262, 263, 101, 268, 269, 270, 273, 102, - 103, 104, 105, 106, 274, 107, 108, 109, 110, 279, - 140, 111, 112, 283, 140, 254, 285, 284, 293, 93, - 94, 95, 96, 97, 98, 99, 113, 286, 100, 287, - 296, 288, 297, 114, 298, 93, 94, 95, 96, 97, - 98, 99, 301, 299, 100, 302, 303, 306, 308, 311, - 313, 314, 317, 183, 184, 185, 186, 187, 188, 189, - 320, 327, 321, 318, 260, 324, 322, 325, 330, 329, - 209, 331, 332, 246, 338, 235, 153, 292, 38, 310, - 282, 23, 101, 29, 0, 0, 0, 102, 103, 104, - 105, 106, 0, 107, 108, 109, 110, 0, 101, 111, - 112, 41, 0, 102, 103, 104, 105, 106, 0, 107, - 108, 109, 110, 0, 113, 111, 112, 0, 0, 0, - 42, 114, 253, 254, 0, 43, 44, 45, 0, 0, - 113, 177, 178, 46, 179, 180, 181, 114, 0, 0, - 47, 0, 0, 48, 0, 49, 0, 0, 50, 0, - 182, 0, 0, 0, 0, 0, 0, 0, 0, 51, - 52, 53, 0, 0, 0, 41, 0, 0, 54, 0, - 0, 0, 0, 55, 56, 0, 0, 57, 58, 59, - 0, 0, 60, 139, 42, 0, 0, 0, 0, 43, - 44, 45, 0, 0, 0, 0, 0, 46, 0, 0, - 0, 61, 0, 0, 47, 0, 0, 48, 0, 49, - 0, 0, 50, 0, 0, 0, 183, 184, 185, 186, - 187, 188, 189, 51, 52, 53, 0, 0, 0, 41, - 0, 0, 54, 0, 0, 0, 0, 55, 56, 0, - 0, 57, 58, 59, 0, 0, 60, 259, 42, 0, - 0, 0, 0, 43, 44, 45, 0, 0, 0, 177, - 178, 46, 179, 180, 181, 61, 0, 0, 47, 0, - 0, 48, 0, 49, 0, 0, 50, 0, 0, 0, - 0, 191, 0, 0, 0, 0, 0, 51, 52, 53, - 0, 0, 0, 41, 0, 0, 54, 0, 0, 0, - 0, 55, 56, 0, 0, 57, 58, 59, 0, 0, - 60, 323, 42, 0, 0, 0, 0, 43, 44, 45, - 0, 0, 0, 0, 0, 46, 0, 0, 0, 61, - 0, 0, 47, 0, 0, 48, 0, 49, 0, 0, - 50, 0, 0, 0, 183, 184, 185, 186, 187, 188, - 189, 51, 52, 53, 177, 178, 41, 179, 180, 181, - 54, 0, 0, 0, 0, 55, 56, 0, 0, 57, - 58, 59, 0, 0, 60, 42, 0, 0, 0, 0, - 43, 44, 45, 0, 0, 0, 267, 0, 46, 0, - 0, 0, 0, 61, 0, 47, 0, 0, 48, 0, - 49, 177, 178, 50, 179, 180, 181, 0, 177, 178, - 0, 179, 180, 181, 51, 52, 53, 0, 0, 0, - 300, 177, 178, 54, 179, 180, 181, 0, 55, 56, - 305, 0, 57, 58, 59, 0, 0, 60, 0, 183, - 184, 185, 186, 187, 188, 189, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 61, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 183, 184, 185, 186, - 187, 188, 189, 183, 184, 185, 186, 187, 188, 189, - 0, 0, 0, 0, 0, 0, 183, 184, 185, 186, - 187, 188, 189 -}; - -static const short int yycheck[] = -{ - 12, 63, 42, 43, 73, 45, 182, 40, 38, 15, - 49, 9, 24, 26, 65, 191, 3, 4, 5, 6, - 7, 8, 9, 57, 190, 12, 35, 19, 20, 21, - 22, 106, 24, 108, 68, 57, 15, 59, 46, 47, - 91, 50, 208, 106, 16, 108, 106, 9, 108, 36, - 37, 64, 92, 106, 0, 108, 90, 106, 104, 108, - 100, 106, 106, 108, 108, 9, 64, 17, 18, 108, - 100, 25, 9, 113, 114, 108, 105, 253, 104, 66, - 9, 9, 25, 104, 71, 72, 73, 74, 75, 63, - 77, 78, 79, 80, 100, 101, 83, 84, 10, 11, - 9, 13, 14, 15, 49, 9, 39, 147, 9, 9, - 35, 98, 9, 100, 13, 14, 15, 88, 105, 98, - 99, 100, 101, 88, 300, 9, 104, 40, 105, 305, - 105, 104, 39, 9, 104, 104, 104, 177, 178, 179, - 180, 181, 103, 183, 184, 185, 186, 187, 188, 189, - 105, 104, 192, 104, 103, 195, 104, 197, 220, 17, - 52, 9, 104, 104, 104, 104, 104, 229, 237, 104, - 104, 3, 4, 5, 6, 7, 8, 9, 104, 9, - 12, 104, 104, 95, 96, 97, 98, 99, 100, 101, - 104, 60, 49, 104, 106, 105, 95, 96, 97, 98, - 99, 100, 101, 105, 36, 37, 104, 70, 10, 11, - 250, 13, 14, 15, 254, 105, 104, 70, 280, 15, - 9, 9, 9, 105, 264, 58, 9, 267, 268, 107, - 55, 9, 106, 108, 66, 95, 108, 9, 9, 71, - 72, 73, 74, 75, 105, 77, 78, 79, 80, 9, - 312, 83, 84, 27, 316, 31, 106, 32, 270, 3, - 4, 5, 6, 7, 8, 9, 98, 9, 12, 9, - 62, 41, 106, 105, 110, 3, 4, 5, 6, 7, - 8, 9, 28, 107, 12, 51, 92, 105, 61, 9, - 17, 44, 3, 95, 96, 97, 98, 99, 100, 101, - 105, 9, 93, 89, 106, 106, 45, 12, 9, 94, - 54, 32, 8, 204, 9, 195, 73, 269, 33, 295, - 256, 14, 66, 26, -1, -1, -1, 71, 72, 73, - 74, 75, -1, 77, 78, 79, 80, -1, 66, 83, - 84, 9, -1, 71, 72, 73, 74, 75, -1, 77, - 78, 79, 80, -1, 98, 83, 84, -1, -1, -1, - 28, 105, 30, 31, -1, 33, 34, 35, -1, -1, - 98, 10, 11, 41, 13, 14, 15, 105, -1, -1, - 48, -1, -1, 51, -1, 53, -1, -1, 56, -1, - 29, -1, -1, -1, -1, -1, -1, -1, -1, 67, - 68, 69, -1, -1, -1, 9, -1, -1, 76, -1, - -1, -1, -1, 81, 82, -1, -1, 85, 86, 87, - -1, -1, 90, 27, 28, -1, -1, -1, -1, 33, - 34, 35, -1, -1, -1, -1, -1, 41, -1, -1, - -1, 109, -1, -1, 48, -1, -1, 51, -1, 53, - -1, -1, 56, -1, -1, -1, 95, 96, 97, 98, - 99, 100, 101, 67, 68, 69, -1, -1, -1, 9, - -1, -1, 76, -1, -1, -1, -1, 81, 82, -1, - -1, 85, 86, 87, -1, -1, 90, 27, 28, -1, - -1, -1, -1, 33, 34, 35, -1, -1, -1, 10, - 11, 41, 13, 14, 15, 109, -1, -1, 48, -1, - -1, 51, -1, 53, -1, -1, 56, -1, -1, -1, - -1, 32, -1, -1, -1, -1, -1, 67, 68, 69, - -1, -1, -1, 9, -1, -1, 76, -1, -1, -1, - -1, 81, 82, -1, -1, 85, 86, 87, -1, -1, - 90, 27, 28, -1, -1, -1, -1, 33, 34, 35, - -1, -1, -1, -1, -1, 41, -1, -1, -1, 109, - -1, -1, 48, -1, -1, 51, -1, 53, -1, -1, - 56, -1, -1, -1, 95, 96, 97, 98, 99, 100, - 101, 67, 68, 69, 10, 11, 9, 13, 14, 15, - 76, -1, -1, -1, -1, 81, 82, -1, -1, 85, - 86, 87, -1, -1, 90, 28, -1, -1, -1, -1, - 33, 34, 35, -1, -1, -1, 42, -1, 41, -1, - -1, -1, -1, 109, -1, 48, -1, -1, 51, -1, - 53, 10, 11, 56, 13, 14, 15, -1, 10, 11, - -1, 13, 14, 15, 67, 68, 69, -1, -1, -1, - 29, 10, 11, 76, 13, 14, 15, -1, 81, 82, - 32, -1, 85, 86, 87, -1, -1, 90, -1, 95, - 96, 97, 98, 99, 100, 101, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 109, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, - -1, -1, -1, -1, -1, -1, 95, 96, 97, 98, - 99, 100, 101, 95, 96, 97, 98, 99, 100, 101, - -1, -1, -1, -1, -1, -1, 95, 96, 97, 98, - 99, 100, 101 -}; - -/* YYSTOS[STATE-NUM] -- The (internal number of the) accessing - symbol of state STATE-NUM. */ -static const unsigned char yystos[] = -{ - 0, 16, 112, 180, 9, 0, 104, 105, 9, 172, - 173, 17, 18, 106, 108, 19, 20, 21, 22, 24, - 171, 171, 25, 172, 9, 174, 175, 171, 64, 174, - 176, 177, 178, 179, 104, 65, 91, 26, 178, 9, - 9, 9, 28, 33, 34, 35, 41, 48, 51, 53, - 56, 67, 68, 69, 76, 81, 82, 85, 86, 87, - 90, 109, 113, 114, 118, 119, 120, 133, 134, 135, - 139, 140, 141, 142, 143, 144, 145, 146, 150, 151, - 152, 153, 154, 155, 156, 157, 164, 168, 169, 170, - 25, 104, 63, 3, 4, 5, 6, 7, 8, 9, - 12, 66, 71, 72, 73, 74, 75, 77, 78, 79, - 80, 83, 84, 98, 105, 115, 116, 115, 36, 37, - 100, 115, 125, 126, 127, 9, 49, 9, 39, 57, - 59, 166, 9, 9, 9, 133, 88, 88, 9, 27, - 113, 104, 105, 104, 50, 133, 104, 40, 128, 138, - 104, 104, 128, 138, 104, 104, 104, 104, 104, 104, - 104, 104, 104, 104, 104, 104, 104, 104, 104, 104, - 133, 115, 103, 115, 103, 115, 115, 10, 11, 13, - 14, 15, 29, 95, 96, 97, 98, 99, 100, 101, - 105, 32, 105, 105, 49, 108, 39, 17, 9, 52, - 9, 9, 60, 167, 49, 105, 115, 124, 105, 54, - 115, 104, 70, 70, 106, 115, 115, 115, 115, 115, - 114, 115, 115, 115, 115, 115, 115, 115, 124, 114, - 115, 38, 100, 9, 123, 125, 9, 122, 115, 9, - 136, 137, 105, 58, 9, 121, 123, 107, 117, 106, - 108, 124, 55, 30, 31, 147, 148, 149, 106, 27, - 106, 9, 106, 108, 40, 108, 128, 42, 95, 108, - 9, 158, 159, 9, 105, 106, 108, 115, 106, 9, - 114, 115, 147, 27, 32, 106, 9, 9, 41, 129, - 115, 115, 136, 171, 106, 108, 62, 106, 110, 107, - 29, 28, 51, 92, 130, 32, 105, 160, 61, 163, - 158, 9, 114, 17, 44, 132, 114, 3, 89, 161, - 105, 93, 45, 27, 106, 12, 162, 9, 165, 94, - 9, 32, 8, 106, 108, 46, 47, 131, 9 -}; - -#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__) -# define YYSIZE_T __SIZE_TYPE__ -#endif -#if ! defined (YYSIZE_T) && defined (size_t) -# define YYSIZE_T size_t -#endif -#if ! defined (YYSIZE_T) -# if defined (__STDC__) || defined (__cplusplus) -# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ -# define YYSIZE_T size_t -# endif -#endif -#if ! defined (YYSIZE_T) -# define YYSIZE_T unsigned int -#endif - -#define yyerrok (yyerrstatus = 0) -#define yyclearin (yychar = YYEMPTY) -#define YYEMPTY (-2) -#define YYEOF 0 - -#define YYACCEPT goto yyacceptlab -#define YYABORT goto yyabortlab -#define YYERROR goto yyerrorlab - - -/* Like YYERROR except do call yyerror. This remains here temporarily - to ease the transition to the new meaning of YYERROR, for GCC. - Once GCC version 2 has supplanted version 1, this can go. */ - -#define YYFAIL goto yyerrlab - -#define YYRECOVERING() (!!yyerrstatus) - -#define YYBACKUP(Token, Value) \ -do \ - if (yychar == YYEMPTY && yylen == 1) \ - { \ - yychar = (Token); \ - yylval = (Value); \ - yytoken = YYTRANSLATE (yychar); \ - YYPOPSTACK; \ - goto yybackup; \ - } \ - else \ - { \ - yyerror ("syntax error: cannot back up");\ - YYERROR; \ - } \ -while (0) - -#define YYTERROR 1 -#define YYERRCODE 256 - -/* YYLLOC_DEFAULT -- Compute the default location (before the actions - are run). */ - -#ifndef YYLLOC_DEFAULT -# define YYLLOC_DEFAULT(Current, Rhs, N) \ - ((Current).first_line = (Rhs)[1].first_line, \ - (Current).first_column = (Rhs)[1].first_column, \ - (Current).last_line = (Rhs)[N].last_line, \ - (Current).last_column = (Rhs)[N].last_column) -#endif - -/* YYLEX -- calling `yylex' with the right arguments. */ - -#ifdef YYLEX_PARAM -# define YYLEX yylex (YYLEX_PARAM) -#else -# define YYLEX yylex () -#endif - -/* Enable debugging if requested. */ -#if YYDEBUG - -# ifndef YYFPRINTF -# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ -# define YYFPRINTF fprintf -# endif - -# define YYDPRINTF(Args) \ -do { \ - if (yydebug) \ - YYFPRINTF Args; \ -} while (0) - -# define YYDSYMPRINT(Args) \ -do { \ - if (yydebug) \ - yysymprint Args; \ -} while (0) - -# define YYDSYMPRINTF(Title, Token, Value, Location) \ -do { \ - if (yydebug) \ - { \ - YYFPRINTF (stderr, "%s ", Title); \ - yysymprint (stderr, \ - Token, Value); \ - YYFPRINTF (stderr, "\n"); \ - } \ -} while (0) - -/*------------------------------------------------------------------. -| yy_stack_print -- Print the state stack from its BOTTOM up to its | -| TOP (included). | -`------------------------------------------------------------------*/ - -#if defined (__STDC__) || defined (__cplusplus) -static void -yy_stack_print (short int *bottom, short int *top) -#else -static void -yy_stack_print (bottom, top) - short int *bottom; - short int *top; -#endif -{ - YYFPRINTF (stderr, "Stack now"); - for (/* Nothing. */; bottom <= top; ++bottom) - YYFPRINTF (stderr, " %d", *bottom); - YYFPRINTF (stderr, "\n"); -} - -# define YY_STACK_PRINT(Bottom, Top) \ -do { \ - if (yydebug) \ - yy_stack_print ((Bottom), (Top)); \ -} while (0) - - -/*------------------------------------------------. -| Report that the YYRULE is going to be reduced. | -`------------------------------------------------*/ - -#if defined (__STDC__) || defined (__cplusplus) -static void -yy_reduce_print (int yyrule) -#else -static void -yy_reduce_print (yyrule) - int yyrule; -#endif -{ - int yyi; - unsigned int yylno = yyrline[yyrule]; - YYFPRINTF (stderr, "Reducing stack by rule %d (line %u), ", - yyrule - 1, yylno); - /* Print the symbols being reduced, and their result. */ - for (yyi = yyprhs[yyrule]; 0 <= yyrhs[yyi]; yyi++) - YYFPRINTF (stderr, "%s ", yytname [yyrhs[yyi]]); - YYFPRINTF (stderr, "-> %s\n", yytname [yyr1[yyrule]]); -} - -# define YY_REDUCE_PRINT(Rule) \ -do { \ - if (yydebug) \ - yy_reduce_print (Rule); \ -} while (0) - -/* Nonzero means print parse trace. It is left uninitialized so that - multiple parsers can coexist. */ -int yydebug; -#else /* !YYDEBUG */ -# define YYDPRINTF(Args) -# define YYDSYMPRINT(Args) -# define YYDSYMPRINTF(Title, Token, Value, Location) -# define YY_STACK_PRINT(Bottom, Top) -# define YY_REDUCE_PRINT(Rule) -#endif /* !YYDEBUG */ - - -/* YYINITDEPTH -- initial size of the parser's stacks. */ -#ifndef YYINITDEPTH -# define YYINITDEPTH 200 -#endif - -/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only - if the built-in stack extension method is used). - - Do not make this value too large; the results are undefined if - SIZE_MAX < YYSTACK_BYTES (YYMAXDEPTH) - evaluated with infinite-precision integer arithmetic. */ - -#if defined (YYMAXDEPTH) && YYMAXDEPTH == 0 -# undef YYMAXDEPTH -#endif - -#ifndef YYMAXDEPTH -# define YYMAXDEPTH 10000 -#endif - - - -#if YYERROR_VERBOSE - -# ifndef yystrlen -# if defined (__GLIBC__) && defined (_STRING_H) -# define yystrlen strlen -# else -/* Return the length of YYSTR. */ -static YYSIZE_T -# if defined (__STDC__) || defined (__cplusplus) -yystrlen (const char *yystr) -# else -yystrlen (yystr) - const char *yystr; -# endif -{ - register const char *yys = yystr; - - while (*yys++ != '\0') - continue; - - return yys - yystr - 1; -} -# endif -# endif - -# ifndef yystpcpy -# if defined (__GLIBC__) && defined (_STRING_H) && defined (_GNU_SOURCE) -# define yystpcpy stpcpy -# else -/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in - YYDEST. */ -static char * -# if defined (__STDC__) || defined (__cplusplus) -yystpcpy (char *yydest, const char *yysrc) -# else -yystpcpy (yydest, yysrc) - char *yydest; - const char *yysrc; -# endif -{ - register char *yyd = yydest; - register const char *yys = yysrc; - - while ((*yyd++ = *yys++) != '\0') - continue; - - return yyd - 1; -} -# endif -# endif - -#endif /* !YYERROR_VERBOSE */ - - - -#if YYDEBUG -/*--------------------------------. -| Print this symbol on YYOUTPUT. | -`--------------------------------*/ - -#if defined (__STDC__) || defined (__cplusplus) -static void -yysymprint (FILE *yyoutput, int yytype, YYSTYPE *yyvaluep) -#else -static void -yysymprint (yyoutput, yytype, yyvaluep) - FILE *yyoutput; - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - /* Pacify ``unused variable'' warnings. */ - (void) yyvaluep; - - if (yytype < YYNTOKENS) - { - YYFPRINTF (yyoutput, "token %s (", yytname[yytype]); -# ifdef YYPRINT - YYPRINT (yyoutput, yytoknum[yytype], *yyvaluep); -# endif - } - else - YYFPRINTF (yyoutput, "nterm %s (", yytname[yytype]); - - switch (yytype) - { - default: - break; - } - YYFPRINTF (yyoutput, ")"); -} - -#endif /* ! YYDEBUG */ -/*-----------------------------------------------. -| Release the memory associated to this symbol. | -`-----------------------------------------------*/ - -#if defined (__STDC__) || defined (__cplusplus) -static void -yydestruct (int yytype, YYSTYPE *yyvaluep) -#else -static void -yydestruct (yytype, yyvaluep) - int yytype; - YYSTYPE *yyvaluep; -#endif -{ - /* Pacify ``unused variable'' warnings. */ - (void) yyvaluep; - - switch (yytype) - { - - default: - break; - } -} - - -/* Prevent warnings from -Wmissing-prototypes. */ - -#ifdef YYPARSE_PARAM -# if defined (__STDC__) || defined (__cplusplus) -int yyparse (void *YYPARSE_PARAM); -# else -int yyparse (); -# endif -#else /* ! YYPARSE_PARAM */ -#if defined (__STDC__) || defined (__cplusplus) -int yyparse (void); -#else -int yyparse (); -#endif -#endif /* ! YYPARSE_PARAM */ - - - -/* The lookahead symbol. */ -int yychar; - -/* The semantic value of the lookahead symbol. */ -YYSTYPE yylval; - -/* Number of syntax errors so far. */ -int yynerrs; - - - -/*----------. -| yyparse. | -`----------*/ - -#ifdef YYPARSE_PARAM -# if defined (__STDC__) || defined (__cplusplus) -int yyparse (void *YYPARSE_PARAM) -# else -int yyparse (YYPARSE_PARAM) - void *YYPARSE_PARAM; -# endif -#else /* ! YYPARSE_PARAM */ -#if defined (__STDC__) || defined (__cplusplus) -int -yyparse (void) -#else -int -yyparse () - -#endif -#endif -{ - - register int yystate; - register int yyn; - int yyresult; - /* Number of tokens to shift before error messages enabled. */ - int yyerrstatus; - /* Lookahead token as an internal (translated) token number. */ - int yytoken = 0; - - /* Three stacks and their tools: - `yyss': related to states, - `yyvs': related to semantic values, - `yyls': related to locations. - - Refer to the stacks thru separate pointers, to allow yyoverflow - to reallocate them elsewhere. */ - - /* The state stack. */ - short int yyssa[YYINITDEPTH]; - short int *yyss = yyssa; - register short int *yyssp; - - /* The semantic value stack. */ - YYSTYPE yyvsa[YYINITDEPTH]; - YYSTYPE *yyvs = yyvsa; - register YYSTYPE *yyvsp; - - - -#define YYPOPSTACK (yyvsp--, yyssp--) - - YYSIZE_T yystacksize = YYINITDEPTH; - - /* The variables used to return semantic value and location from the - action routines. */ - YYSTYPE yyval; - - - /* When reducing, the number of symbols on the RHS of the reduced - rule. */ - int yylen; - - YYDPRINTF ((stderr, "Starting parse\n")); - - yystate = 0; - yyerrstatus = 0; - yynerrs = 0; - yychar = YYEMPTY; /* Cause a token to be read. */ - - /* Initialize stack pointers. - Waste one element of value and location stack - so that they stay on the same level as the state stack. - The wasted elements are never initialized. */ - - yyssp = yyss; - yyvsp = yyvs; - - - goto yysetstate; - -/*------------------------------------------------------------. -| yynewstate -- Push a new state, which is found in yystate. | -`------------------------------------------------------------*/ - yynewstate: - /* In all cases, when you get here, the value and location stacks - have just been pushed. so pushing a state here evens the stacks. - */ - yyssp++; - - yysetstate: - *yyssp = yystate; - - if (yyss + yystacksize - 1 <= yyssp) - { - /* Get the current used size of the three stacks, in elements. */ - YYSIZE_T yysize = yyssp - yyss + 1; - -#ifdef yyoverflow - { - /* Give user a chance to reallocate the stack. Use copies of - these so that the &'s don't force the real ones into - memory. */ - YYSTYPE *yyvs1 = yyvs; - short int *yyss1 = yyss; - - - /* Each stack pointer address is followed by the size of the - data in use in that stack, in bytes. This used to be a - conditional around just the two extra args, but that might - be undefined if yyoverflow is a macro. */ - yyoverflow ("parser stack overflow", - &yyss1, yysize * sizeof (*yyssp), - &yyvs1, yysize * sizeof (*yyvsp), - - &yystacksize); - - yyss = yyss1; - yyvs = yyvs1; - } -#else /* no yyoverflow */ -# ifndef YYSTACK_RELOCATE - goto yyoverflowlab; -# else - /* Extend the stack our own way. */ - if (YYMAXDEPTH <= yystacksize) - goto yyoverflowlab; - yystacksize *= 2; - if (YYMAXDEPTH < yystacksize) - yystacksize = YYMAXDEPTH; - - { - short int *yyss1 = yyss; - union yyalloc *yyptr = - (union yyalloc *) YYSTACK_ALLOC (YYSTACK_BYTES (yystacksize)); - if (! yyptr) - goto yyoverflowlab; - YYSTACK_RELOCATE (yyss); - YYSTACK_RELOCATE (yyvs); - -# undef YYSTACK_RELOCATE - if (yyss1 != yyssa) - YYSTACK_FREE (yyss1); - } -# endif -#endif /* no yyoverflow */ - - yyssp = yyss + yysize - 1; - yyvsp = yyvs + yysize - 1; - - - YYDPRINTF ((stderr, "Stack size increased to %lu\n", - (unsigned long int) yystacksize)); - - if (yyss + yystacksize - 1 <= yyssp) - YYABORT; - } - - YYDPRINTF ((stderr, "Entering state %d\n", yystate)); - - goto yybackup; - -/*-----------. -| yybackup. | -`-----------*/ -yybackup: - -/* Do appropriate processing given the current state. */ -/* Read a lookahead token if we need one and don't already have one. */ -/* yyresume: */ - - /* First try to decide what to do without reference to lookahead token. */ - - yyn = yypact[yystate]; - if (yyn == YYPACT_NINF) - goto yydefault; - - /* Not known => get a lookahead token if don't already have one. */ - - /* YYCHAR is either YYEMPTY or YYEOF or a valid lookahead symbol. */ - if (yychar == YYEMPTY) - { - YYDPRINTF ((stderr, "Reading a token: ")); - yychar = YYLEX; - } - - if (yychar <= YYEOF) - { - yychar = yytoken = YYEOF; - YYDPRINTF ((stderr, "Now at end of input.\n")); - } - else - { - yytoken = YYTRANSLATE (yychar); - YYDSYMPRINTF ("Next token is", yytoken, &yylval, &yylloc); - } - - /* If the proper action on seeing token YYTOKEN is to reduce or to - detect an error, take that action. */ - yyn += yytoken; - if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) - goto yydefault; - yyn = yytable[yyn]; - if (yyn <= 0) - { - if (yyn == 0 || yyn == YYTABLE_NINF) - goto yyerrlab; - yyn = -yyn; - goto yyreduce; - } - - if (yyn == YYFINAL) - YYACCEPT; - - /* Shift the lookahead token. */ - YYDPRINTF ((stderr, "Shifting token %s, ", yytname[yytoken])); - - /* Discard the token being shifted unless it is eof. */ - if (yychar != YYEOF) - yychar = YYEMPTY; - - *++yyvsp = yylval; - - - /* Count tokens shifted since error; after three, turn off error - status. */ - if (yyerrstatus) - yyerrstatus--; - - yystate = yyn; - goto yynewstate; - - -/*-----------------------------------------------------------. -| yydefault -- do the default action for the current state. | -`-----------------------------------------------------------*/ -yydefault: - yyn = yydefact[yystate]; - if (yyn == 0) - goto yyerrlab; - goto yyreduce; - - -/*-----------------------------. -| yyreduce -- Do a reduction. | -`-----------------------------*/ -yyreduce: - /* yyn is the number of a rule to reduce with. */ - yylen = yyr2[yyn]; - - /* If YYLEN is nonzero, implement the default value of the action: - `$$ = $1'. - - Otherwise, the following line sets YYVAL to garbage. - This behavior is undocumented and Bison - users should not rely upon it. Assigning to YYVAL - unconditionally makes the parser a bit smaller, and it avoids a - GCC warning that YYVAL may be used uninitialized. */ - yyval = yyvsp[1-yylen]; - - - YY_REDUCE_PRINT (yyn); - switch (yyn) - { - case 25: -#line 166 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 26: -#line 168 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;} - break; - - case 27: -#line 172 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 28: -#line 174 "pars0grm.y" - { yyval = pars_func(yyvsp[-3], yyvsp[-1]); ;} - break; - - case 29: -#line 175 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 30: -#line 176 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 31: -#line 177 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 32: -#line 178 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 33: -#line 179 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 34: -#line 180 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 35: -#line 181 "pars0grm.y" - { yyval = yyvsp[0];;} - break; - - case 36: -#line 182 "pars0grm.y" - { yyval = pars_op('+', yyvsp[-2], yyvsp[0]); ;} - break; - - case 37: -#line 183 "pars0grm.y" - { yyval = pars_op('-', yyvsp[-2], yyvsp[0]); ;} - break; - - case 38: -#line 184 "pars0grm.y" - { yyval = pars_op('*', yyvsp[-2], yyvsp[0]); ;} - break; - - case 39: -#line 185 "pars0grm.y" - { yyval = pars_op('/', yyvsp[-2], yyvsp[0]); ;} - break; - - case 40: -#line 186 "pars0grm.y" - { yyval = pars_op('-', yyvsp[0], NULL); ;} - break; - - case 41: -#line 187 "pars0grm.y" - { yyval = yyvsp[-1]; ;} - break; - - case 42: -#line 188 "pars0grm.y" - { yyval = pars_op('=', yyvsp[-2], yyvsp[0]); ;} - break; - - case 43: -#line 189 "pars0grm.y" - { yyval = pars_op('<', yyvsp[-2], yyvsp[0]); ;} - break; - - case 44: -#line 190 "pars0grm.y" - { yyval = pars_op('>', yyvsp[-2], yyvsp[0]); ;} - break; - - case 45: -#line 191 "pars0grm.y" - { yyval = pars_op(PARS_GE_TOKEN, yyvsp[-2], yyvsp[0]); ;} - break; - - case 46: -#line 192 "pars0grm.y" - { yyval = pars_op(PARS_LE_TOKEN, yyvsp[-2], yyvsp[0]); ;} - break; - - case 47: -#line 193 "pars0grm.y" - { yyval = pars_op(PARS_NE_TOKEN, yyvsp[-2], yyvsp[0]); ;} - break; - - case 48: -#line 194 "pars0grm.y" - { yyval = pars_op(PARS_AND_TOKEN, yyvsp[-2], yyvsp[0]); ;} - break; - - case 49: -#line 195 "pars0grm.y" - { yyval = pars_op(PARS_OR_TOKEN, yyvsp[-2], yyvsp[0]); ;} - break; - - case 50: -#line 196 "pars0grm.y" - { yyval = pars_op(PARS_NOT_TOKEN, yyvsp[0], NULL); ;} - break; - - case 51: -#line 198 "pars0grm.y" - { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;} - break; - - case 52: -#line 200 "pars0grm.y" - { yyval = pars_op(PARS_NOTFOUND_TOKEN, yyvsp[-2], NULL); ;} - break; - - case 53: -#line 204 "pars0grm.y" - { yyval = &pars_to_char_token; ;} - break; - - case 54: -#line 205 "pars0grm.y" - { yyval = &pars_to_number_token; ;} - break; - - case 55: -#line 206 "pars0grm.y" - { yyval = &pars_to_binary_token; ;} - break; - - case 56: -#line 208 "pars0grm.y" - { yyval = &pars_binary_to_number_token; ;} - break; - - case 57: -#line 209 "pars0grm.y" - { yyval = &pars_substr_token; ;} - break; - - case 58: -#line 210 "pars0grm.y" - { yyval = &pars_concat_token; ;} - break; - - case 59: -#line 211 "pars0grm.y" - { yyval = &pars_instr_token; ;} - break; - - case 60: -#line 212 "pars0grm.y" - { yyval = &pars_length_token; ;} - break; - - case 61: -#line 213 "pars0grm.y" - { yyval = &pars_sysdate_token; ;} - break; - - case 62: -#line 214 "pars0grm.y" - { yyval = &pars_rnd_token; ;} - break; - - case 63: -#line 215 "pars0grm.y" - { yyval = &pars_rnd_str_token; ;} - break; - - case 67: -#line 226 "pars0grm.y" - { yyval = pars_stored_procedure_call(yyvsp[-4]); ;} - break; - - case 68: -#line 231 "pars0grm.y" - { yyval = pars_procedure_call(yyvsp[-3], yyvsp[-1]); ;} - break; - - case 69: -#line 235 "pars0grm.y" - { yyval = &pars_replstr_token; ;} - break; - - case 70: -#line 236 "pars0grm.y" - { yyval = &pars_printf_token; ;} - break; - - case 71: -#line 237 "pars0grm.y" - { yyval = &pars_assert_token; ;} - break; - - case 72: -#line 241 "pars0grm.y" - { yyval = yyvsp[-2]; ;} - break; - - case 73: -#line 245 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 74: -#line 247 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 75: -#line 251 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 76: -#line 252 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 77: -#line 254 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 78: -#line 258 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 79: -#line 259 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]);;} - break; - - case 80: -#line 260 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 81: -#line 264 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 82: -#line 266 "pars0grm.y" - { yyval = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - sym_tab_add_int_lit( - pars_sym_tab_global, 1))); ;} - break; - - case 83: -#line 271 "pars0grm.y" - { yyval = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - pars_func(&pars_distinct_token, - que_node_list_add_last( - NULL, yyvsp[-1])))); ;} - break; - - case 84: -#line 277 "pars0grm.y" - { yyval = pars_func(&pars_sum_token, - que_node_list_add_last(NULL, - yyvsp[-1])); ;} - break; - - case 85: -#line 283 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 86: -#line 284 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 87: -#line 286 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 88: -#line 290 "pars0grm.y" - { yyval = pars_select_list(&pars_star_denoter, - NULL); ;} - break; - - case 89: -#line 293 "pars0grm.y" - { yyval = pars_select_list(yyvsp[-2], yyvsp[0]); ;} - break; - - case 90: -#line 294 "pars0grm.y" - { yyval = pars_select_list(yyvsp[0], NULL); ;} - break; - - case 91: -#line 298 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 92: -#line 299 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 93: -#line 303 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 94: -#line 305 "pars0grm.y" - { yyval = &pars_update_token; ;} - break; - - case 95: -#line 309 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 96: -#line 311 "pars0grm.y" - { yyval = &pars_share_token; ;} - break; - - case 97: -#line 315 "pars0grm.y" - { yyval = &pars_asc_token; ;} - break; - - case 98: -#line 316 "pars0grm.y" - { yyval = &pars_asc_token; ;} - break; - - case 99: -#line 317 "pars0grm.y" - { yyval = &pars_desc_token; ;} - break; - - case 100: -#line 321 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 101: -#line 323 "pars0grm.y" - { yyval = pars_order_by(yyvsp[-1], yyvsp[0]); ;} - break; - - case 102: -#line 332 "pars0grm.y" - { yyval = pars_select_statement(yyvsp[-6], yyvsp[-4], yyvsp[-3], - yyvsp[-2], yyvsp[-1], yyvsp[0]); ;} - break; - - case 103: -#line 338 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 104: -#line 343 "pars0grm.y" - { yyval = pars_insert_statement(yyvsp[-4], yyvsp[-1], NULL); ;} - break; - - case 105: -#line 345 "pars0grm.y" - { yyval = pars_insert_statement(yyvsp[-1], NULL, yyvsp[0]); ;} - break; - - case 106: -#line 349 "pars0grm.y" - { yyval = pars_column_assignment(yyvsp[-2], yyvsp[0]); ;} - break; - - case 107: -#line 353 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 108: -#line 355 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 109: -#line 361 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 110: -#line 367 "pars0grm.y" - { yyval = pars_update_statement_start(FALSE, - yyvsp[-2], yyvsp[0]); ;} - break; - - case 111: -#line 373 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;} - break; - - case 112: -#line 378 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;} - break; - - case 113: -#line 383 "pars0grm.y" - { yyval = pars_update_statement_start(TRUE, - yyvsp[0], NULL); ;} - break; - - case 114: -#line 389 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], NULL, yyvsp[0]); ;} - break; - - case 115: -#line 394 "pars0grm.y" - { yyval = pars_update_statement(yyvsp[-1], yyvsp[0], NULL); ;} - break; - - case 116: -#line 399 "pars0grm.y" - { yyval = pars_row_printf_statement(yyvsp[0]); ;} - break; - - case 117: -#line 404 "pars0grm.y" - { yyval = pars_assignment_statement(yyvsp[-2], yyvsp[0]); ;} - break; - - case 118: -#line 410 "pars0grm.y" - { yyval = pars_elsif_element(yyvsp[-2], yyvsp[0]); ;} - break; - - case 119: -#line 414 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 120: -#line 416 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-1], yyvsp[0]); ;} - break; - - case 121: -#line 420 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 122: -#line 422 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 123: -#line 423 "pars0grm.y" - { yyval = yyvsp[0]; ;} - break; - - case 124: -#line 430 "pars0grm.y" - { yyval = pars_if_statement(yyvsp[-5], yyvsp[-3], yyvsp[-2]); ;} - break; - - case 125: -#line 436 "pars0grm.y" - { yyval = pars_while_statement(yyvsp[-4], yyvsp[-2]); ;} - break; - - case 126: -#line 444 "pars0grm.y" - { yyval = pars_for_statement(yyvsp[-8], yyvsp[-6], yyvsp[-4], yyvsp[-2]); ;} - break; - - case 127: -#line 448 "pars0grm.y" - { yyval = pars_exit_statement(); ;} - break; - - case 128: -#line 452 "pars0grm.y" - { yyval = pars_return_statement(); ;} - break; - - case 129: -#line 457 "pars0grm.y" - { yyval = pars_open_statement( - ROW_SEL_OPEN_CURSOR, yyvsp[0]); ;} - break; - - case 130: -#line 463 "pars0grm.y" - { yyval = pars_open_statement( - ROW_SEL_CLOSE_CURSOR, yyvsp[0]); ;} - break; - - case 131: -#line 469 "pars0grm.y" - { yyval = pars_fetch_statement(yyvsp[-2], yyvsp[0], NULL); ;} - break; - - case 132: -#line 471 "pars0grm.y" - { yyval = pars_fetch_statement(yyvsp[-2], NULL, yyvsp[0]); ;} - break; - - case 133: -#line 476 "pars0grm.y" - { yyval = pars_column_def(yyvsp[-4], yyvsp[-3], yyvsp[-2], yyvsp[-1], yyvsp[0]); ;} - break; - - case 134: -#line 480 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 135: -#line 482 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 136: -#line 486 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 137: -#line 488 "pars0grm.y" - { yyval = yyvsp[-1]; ;} - break; - - case 138: -#line 492 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 139: -#line 494 "pars0grm.y" - { yyval = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 140: -#line 499 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 141: -#line 501 "pars0grm.y" - { yyval = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 142: -#line 506 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 143: -#line 508 "pars0grm.y" - { yyval = &pars_int_token; - /* pass any non-NULL pointer */ ;} - break; - - case 144: -#line 515 "pars0grm.y" - { yyval = pars_create_table(yyvsp[-4], yyvsp[-2], yyvsp[0]); ;} - break; - - case 145: -#line 519 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 146: -#line 521 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 147: -#line 525 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 148: -#line 526 "pars0grm.y" - { yyval = &pars_unique_token; ;} - break; - - case 149: -#line 530 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 150: -#line 531 "pars0grm.y" - { yyval = &pars_clustered_token; ;} - break; - - case 151: -#line 539 "pars0grm.y" - { yyval = pars_create_index(yyvsp[-8], yyvsp[-7], yyvsp[-5], yyvsp[-3], yyvsp[-1]); ;} - break; - - case 152: -#line 544 "pars0grm.y" - { yyval = pars_commit_statement(); ;} - break; - - case 153: -#line 549 "pars0grm.y" - { yyval = pars_rollback_statement(); ;} - break; - - case 154: -#line 553 "pars0grm.y" - { yyval = &pars_int_token; ;} - break; - - case 155: -#line 554 "pars0grm.y" - { yyval = &pars_int_token; ;} - break; - - case 156: -#line 555 "pars0grm.y" - { yyval = &pars_char_token; ;} - break; - - case 157: -#line 556 "pars0grm.y" - { yyval = &pars_binary_token; ;} - break; - - case 158: -#line 557 "pars0grm.y" - { yyval = &pars_blob_token; ;} - break; - - case 159: -#line 562 "pars0grm.y" - { yyval = pars_parameter_declaration(yyvsp[-2], - PARS_INPUT, yyvsp[0]); ;} - break; - - case 160: -#line 565 "pars0grm.y" - { yyval = pars_parameter_declaration(yyvsp[-2], - PARS_OUTPUT, yyvsp[0]); ;} - break; - - case 161: -#line 570 "pars0grm.y" - { yyval = NULL; ;} - break; - - case 162: -#line 571 "pars0grm.y" - { yyval = que_node_list_add_last(NULL, yyvsp[0]); ;} - break; - - case 163: -#line 573 "pars0grm.y" - { yyval = que_node_list_add_last(yyvsp[-2], yyvsp[0]); ;} - break; - - case 164: -#line 578 "pars0grm.y" - { yyval = pars_variable_declaration(yyvsp[-2], yyvsp[-1]); ;} - break; - - case 168: -#line 590 "pars0grm.y" - { yyval = pars_cursor_declaration(yyvsp[-3], yyvsp[-1]); ;} - break; - - case 169: -#line 595 "pars0grm.y" - { yyval = pars_function_declaration(yyvsp[-1]); ;} - break; - - case 175: -#line 616 "pars0grm.y" - { yyval = pars_procedure_definition(yyvsp[-9], yyvsp[-7], - yyvsp[-1]); ;} - break; - - - } - -/* Line 1010 of yacc.c. */ -#line 2345 "pars0grm.tab.c" - - yyvsp -= yylen; - yyssp -= yylen; - - - YY_STACK_PRINT (yyss, yyssp); - - *++yyvsp = yyval; - - - /* Now `shift' the result of the reduction. Determine what state - that goes to, based on the state we popped back to and the rule - number reduced by. */ - - yyn = yyr1[yyn]; - - yystate = yypgoto[yyn - YYNTOKENS] + *yyssp; - if (0 <= yystate && yystate <= YYLAST && yycheck[yystate] == *yyssp) - yystate = yytable[yystate]; - else - yystate = yydefgoto[yyn - YYNTOKENS]; - - goto yynewstate; - - -/*------------------------------------. -| yyerrlab -- here on detecting error | -`------------------------------------*/ -yyerrlab: - /* If not already recovering from an error, report this error. */ - if (!yyerrstatus) - { - ++yynerrs; -#if YYERROR_VERBOSE - yyn = yypact[yystate]; - - if (YYPACT_NINF < yyn && yyn < YYLAST) - { - YYSIZE_T yysize = 0; - int yytype = YYTRANSLATE (yychar); - const char* yyprefix; - char *yymsg; - int yyx; - - /* Start YYX at -YYN if negative to avoid negative indexes in - YYCHECK. */ - int yyxbegin = yyn < 0 ? -yyn : 0; - - /* Stay within bounds of both yycheck and yytname. */ - int yychecklim = YYLAST - yyn; - int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; - int yycount = 0; - - yyprefix = ", expecting "; - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - yysize += yystrlen (yyprefix) + yystrlen (yytname [yyx]); - yycount += 1; - if (yycount == 5) - { - yysize = 0; - break; - } - } - yysize += (sizeof ("syntax error, unexpected ") - + yystrlen (yytname[yytype])); - yymsg = (char *) YYSTACK_ALLOC (yysize); - if (yymsg != 0) - { - char *yyp = yystpcpy (yymsg, "syntax error, unexpected "); - yyp = yystpcpy (yyp, yytname[yytype]); - - if (yycount < 5) - { - yyprefix = ", expecting "; - for (yyx = yyxbegin; yyx < yyxend; ++yyx) - if (yycheck[yyx + yyn] == yyx && yyx != YYTERROR) - { - yyp = yystpcpy (yyp, yyprefix); - yyp = yystpcpy (yyp, yytname[yyx]); - yyprefix = " or "; - } - } - yyerror (yymsg); - YYSTACK_FREE (yymsg); - } - else - yyerror ("syntax error; also virtual memory exhausted"); - } - else -#endif /* YYERROR_VERBOSE */ - yyerror ("syntax error"); - } - - - - if (yyerrstatus == 3) - { - /* If just tried and failed to reuse lookahead token after an - error, discard it. */ - - if (yychar <= YYEOF) - { - /* If at end of input, pop the error token, - then the rest of the stack, then return failure. */ - if (yychar == YYEOF) - for (;;) - { - YYPOPSTACK; - if (yyssp == yyss) - YYABORT; - YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); - yydestruct (yystos[*yyssp], yyvsp); - } - } - else - { - YYDSYMPRINTF ("Error: discarding", yytoken, &yylval, &yylloc); - yydestruct (yytoken, &yylval); - yychar = YYEMPTY; - - } - } - - /* Else will try to reuse lookahead token after shifting the error - token. */ - goto yyerrlab1; - - -/*---------------------------------------------------. -| yyerrorlab -- error raised explicitly by YYERROR. | -`---------------------------------------------------*/ -yyerrorlab: - -#ifdef __GNUC__ - /* Pacify GCC when the user code never invokes YYERROR and the label - yyerrorlab therefore never appears in user code. */ - if (0) - goto yyerrorlab; -#endif - - yyvsp -= yylen; - yyssp -= yylen; - yystate = *yyssp; - goto yyerrlab1; - - -/*-------------------------------------------------------------. -| yyerrlab1 -- common code for both syntax error and YYERROR. | -`-------------------------------------------------------------*/ -yyerrlab1: - yyerrstatus = 3; /* Each real token shifted decrements this. */ - - for (;;) - { - yyn = yypact[yystate]; - if (yyn != YYPACT_NINF) - { - yyn += YYTERROR; - if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYTERROR) - { - yyn = yytable[yyn]; - if (0 < yyn) - break; - } - } - - /* Pop the current state because it cannot handle the error token. */ - if (yyssp == yyss) - YYABORT; - - YYDSYMPRINTF ("Error: popping", yystos[*yyssp], yyvsp, yylsp); - yydestruct (yystos[yystate], yyvsp); - YYPOPSTACK; - yystate = *yyssp; - YY_STACK_PRINT (yyss, yyssp); - } - - if (yyn == YYFINAL) - YYACCEPT; - - YYDPRINTF ((stderr, "Shifting error token, ")); - - *++yyvsp = yylval; - - - yystate = yyn; - goto yynewstate; - - -/*-------------------------------------. -| yyacceptlab -- YYACCEPT comes here. | -`-------------------------------------*/ -yyacceptlab: - yyresult = 0; - goto yyreturn; - -/*-----------------------------------. -| yyabortlab -- YYABORT comes here. | -`-----------------------------------*/ -yyabortlab: - yyresult = 1; - goto yyreturn; - -#ifndef yyoverflow -/*----------------------------------------------. -| yyoverflowlab -- parser overflow comes here. | -`----------------------------------------------*/ -yyoverflowlab: - yyerror ("parser stack overflow"); - yyresult = 2; - /* Fall through. */ -#endif - -yyreturn: -#ifndef yyoverflow - if (yyss != yyssa) - YYSTACK_FREE (yyss); -#endif - return yyresult; -} - - -#line 620 "pars0grm.y" - - diff --git a/storage/innobase/pars/pars0grm.h b/storage/innobase/pars/pars0grm.h deleted file mode 100644 index 0062b8314ee..00000000000 --- a/storage/innobase/pars/pars0grm.h +++ /dev/null @@ -1,234 +0,0 @@ -/* A Bison parser, made by GNU Bison 1.875d. */ - -/* Skeleton parser for Yacc-like parsing with Bison, - Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004 Free Software Foundation, Inc. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place - Suite 330, - Boston, MA 02111-1307, USA. */ - -/* As a special exception, when this file is copied by Bison into a - Bison output file, you may use that output file without restriction. - This special exception was added by the Free Software Foundation - in version 1.24 of Bison. */ - -/* Tokens. */ -#ifndef YYTOKENTYPE -# define YYTOKENTYPE - /* Put the tokens into the symbol table, so that GDB and other debuggers - know about them. */ - enum yytokentype { - PARS_INT_LIT = 258, - PARS_FLOAT_LIT = 259, - PARS_STR_LIT = 260, - PARS_FIXBINARY_LIT = 261, - PARS_BLOB_LIT = 262, - PARS_NULL_LIT = 263, - PARS_ID_TOKEN = 264, - PARS_AND_TOKEN = 265, - PARS_OR_TOKEN = 266, - PARS_NOT_TOKEN = 267, - PARS_GE_TOKEN = 268, - PARS_LE_TOKEN = 269, - PARS_NE_TOKEN = 270, - PARS_PROCEDURE_TOKEN = 271, - PARS_IN_TOKEN = 272, - PARS_OUT_TOKEN = 273, - PARS_BINARY_TOKEN = 274, - PARS_BLOB_TOKEN = 275, - PARS_INT_TOKEN = 276, - PARS_INTEGER_TOKEN = 277, - PARS_FLOAT_TOKEN = 278, - PARS_CHAR_TOKEN = 279, - PARS_IS_TOKEN = 280, - PARS_BEGIN_TOKEN = 281, - PARS_END_TOKEN = 282, - PARS_IF_TOKEN = 283, - PARS_THEN_TOKEN = 284, - PARS_ELSE_TOKEN = 285, - PARS_ELSIF_TOKEN = 286, - PARS_LOOP_TOKEN = 287, - PARS_WHILE_TOKEN = 288, - PARS_RETURN_TOKEN = 289, - PARS_SELECT_TOKEN = 290, - PARS_SUM_TOKEN = 291, - PARS_COUNT_TOKEN = 292, - PARS_DISTINCT_TOKEN = 293, - PARS_FROM_TOKEN = 294, - PARS_WHERE_TOKEN = 295, - PARS_FOR_TOKEN = 296, - PARS_DDOT_TOKEN = 297, - PARS_READ_TOKEN = 298, - PARS_ORDER_TOKEN = 299, - PARS_BY_TOKEN = 300, - PARS_ASC_TOKEN = 301, - PARS_DESC_TOKEN = 302, - PARS_INSERT_TOKEN = 303, - PARS_INTO_TOKEN = 304, - PARS_VALUES_TOKEN = 305, - PARS_UPDATE_TOKEN = 306, - PARS_SET_TOKEN = 307, - PARS_DELETE_TOKEN = 308, - PARS_CURRENT_TOKEN = 309, - PARS_OF_TOKEN = 310, - PARS_CREATE_TOKEN = 311, - PARS_TABLE_TOKEN = 312, - PARS_INDEX_TOKEN = 313, - PARS_UNIQUE_TOKEN = 314, - PARS_CLUSTERED_TOKEN = 315, - PARS_DOES_NOT_FIT_IN_MEM_TOKEN = 316, - PARS_ON_TOKEN = 317, - PARS_ASSIGN_TOKEN = 318, - PARS_DECLARE_TOKEN = 319, - PARS_CURSOR_TOKEN = 320, - PARS_SQL_TOKEN = 321, - PARS_OPEN_TOKEN = 322, - PARS_FETCH_TOKEN = 323, - PARS_CLOSE_TOKEN = 324, - PARS_NOTFOUND_TOKEN = 325, - PARS_TO_CHAR_TOKEN = 326, - PARS_TO_NUMBER_TOKEN = 327, - PARS_TO_BINARY_TOKEN = 328, - PARS_BINARY_TO_NUMBER_TOKEN = 329, - PARS_SUBSTR_TOKEN = 330, - PARS_REPLSTR_TOKEN = 331, - PARS_CONCAT_TOKEN = 332, - PARS_INSTR_TOKEN = 333, - PARS_LENGTH_TOKEN = 334, - PARS_SYSDATE_TOKEN = 335, - PARS_PRINTF_TOKEN = 336, - PARS_ASSERT_TOKEN = 337, - PARS_RND_TOKEN = 338, - PARS_RND_STR_TOKEN = 339, - PARS_ROW_PRINTF_TOKEN = 340, - PARS_COMMIT_TOKEN = 341, - PARS_ROLLBACK_TOKEN = 342, - PARS_WORK_TOKEN = 343, - PARS_UNSIGNED_TOKEN = 344, - PARS_EXIT_TOKEN = 345, - PARS_FUNCTION_TOKEN = 346, - PARS_LOCK_TOKEN = 347, - PARS_SHARE_TOKEN = 348, - PARS_MODE_TOKEN = 349, - NEG = 350 - }; -#endif -#define PARS_INT_LIT 258 -#define PARS_FLOAT_LIT 259 -#define PARS_STR_LIT 260 -#define PARS_FIXBINARY_LIT 261 -#define PARS_BLOB_LIT 262 -#define PARS_NULL_LIT 263 -#define PARS_ID_TOKEN 264 -#define PARS_AND_TOKEN 265 -#define PARS_OR_TOKEN 266 -#define PARS_NOT_TOKEN 267 -#define PARS_GE_TOKEN 268 -#define PARS_LE_TOKEN 269 -#define PARS_NE_TOKEN 270 -#define PARS_PROCEDURE_TOKEN 271 -#define PARS_IN_TOKEN 272 -#define PARS_OUT_TOKEN 273 -#define PARS_BINARY_TOKEN 274 -#define PARS_BLOB_TOKEN 275 -#define PARS_INT_TOKEN 276 -#define PARS_INTEGER_TOKEN 277 -#define PARS_FLOAT_TOKEN 278 -#define PARS_CHAR_TOKEN 279 -#define PARS_IS_TOKEN 280 -#define PARS_BEGIN_TOKEN 281 -#define PARS_END_TOKEN 282 -#define PARS_IF_TOKEN 283 -#define PARS_THEN_TOKEN 284 -#define PARS_ELSE_TOKEN 285 -#define PARS_ELSIF_TOKEN 286 -#define PARS_LOOP_TOKEN 287 -#define PARS_WHILE_TOKEN 288 -#define PARS_RETURN_TOKEN 289 -#define PARS_SELECT_TOKEN 290 -#define PARS_SUM_TOKEN 291 -#define PARS_COUNT_TOKEN 292 -#define PARS_DISTINCT_TOKEN 293 -#define PARS_FROM_TOKEN 294 -#define PARS_WHERE_TOKEN 295 -#define PARS_FOR_TOKEN 296 -#define PARS_DDOT_TOKEN 297 -#define PARS_READ_TOKEN 298 -#define PARS_ORDER_TOKEN 299 -#define PARS_BY_TOKEN 300 -#define PARS_ASC_TOKEN 301 -#define PARS_DESC_TOKEN 302 -#define PARS_INSERT_TOKEN 303 -#define PARS_INTO_TOKEN 304 -#define PARS_VALUES_TOKEN 305 -#define PARS_UPDATE_TOKEN 306 -#define PARS_SET_TOKEN 307 -#define PARS_DELETE_TOKEN 308 -#define PARS_CURRENT_TOKEN 309 -#define PARS_OF_TOKEN 310 -#define PARS_CREATE_TOKEN 311 -#define PARS_TABLE_TOKEN 312 -#define PARS_INDEX_TOKEN 313 -#define PARS_UNIQUE_TOKEN 314 -#define PARS_CLUSTERED_TOKEN 315 -#define PARS_DOES_NOT_FIT_IN_MEM_TOKEN 316 -#define PARS_ON_TOKEN 317 -#define PARS_ASSIGN_TOKEN 318 -#define PARS_DECLARE_TOKEN 319 -#define PARS_CURSOR_TOKEN 320 -#define PARS_SQL_TOKEN 321 -#define PARS_OPEN_TOKEN 322 -#define PARS_FETCH_TOKEN 323 -#define PARS_CLOSE_TOKEN 324 -#define PARS_NOTFOUND_TOKEN 325 -#define PARS_TO_CHAR_TOKEN 326 -#define PARS_TO_NUMBER_TOKEN 327 -#define PARS_TO_BINARY_TOKEN 328 -#define PARS_BINARY_TO_NUMBER_TOKEN 329 -#define PARS_SUBSTR_TOKEN 330 -#define PARS_REPLSTR_TOKEN 331 -#define PARS_CONCAT_TOKEN 332 -#define PARS_INSTR_TOKEN 333 -#define PARS_LENGTH_TOKEN 334 -#define PARS_SYSDATE_TOKEN 335 -#define PARS_PRINTF_TOKEN 336 -#define PARS_ASSERT_TOKEN 337 -#define PARS_RND_TOKEN 338 -#define PARS_RND_STR_TOKEN 339 -#define PARS_ROW_PRINTF_TOKEN 340 -#define PARS_COMMIT_TOKEN 341 -#define PARS_ROLLBACK_TOKEN 342 -#define PARS_WORK_TOKEN 343 -#define PARS_UNSIGNED_TOKEN 344 -#define PARS_EXIT_TOKEN 345 -#define PARS_FUNCTION_TOKEN 346 -#define PARS_LOCK_TOKEN 347 -#define PARS_SHARE_TOKEN 348 -#define PARS_MODE_TOKEN 349 -#define NEG 350 - - - - -#if ! defined (YYSTYPE) && ! defined (YYSTYPE_IS_DECLARED) -typedef int YYSTYPE; -# define yystype YYSTYPE /* obsolescent; will be withdrawn */ -# define YYSTYPE_IS_DECLARED 1 -# define YYSTYPE_IS_TRIVIAL 1 -#endif - -extern YYSTYPE yylval; - - - diff --git a/storage/innobase/pars/pars0grm.y b/storage/innobase/pars/pars0grm.y deleted file mode 100644 index a07be9975a1..00000000000 --- a/storage/innobase/pars/pars0grm.y +++ /dev/null @@ -1,620 +0,0 @@ -/****************************************************** -SQL parser: input file for the GNU Bison parser generator - -(c) 1997 Innobase Oy - -Created 12/14/1997 Heikki Tuuri -Published under the GPL version 2 - -Look from pars0lex.l for instructions how to generate the C files for -the InnoDB parser. -*******************************************************/ - -%{ -/* The value of the semantic attribute is a pointer to a query tree node -que_node_t */ - -#include "univ.i" -#include <math.h> /* Can't be before univ.i */ -#include "pars0pars.h" -#include "mem0mem.h" -#include "que0types.h" -#include "que0que.h" -#include "row0sel.h" - -#define YYSTYPE que_node_t* - -/* #define __STDC__ */ - -int -yylex(void); -%} - -%token PARS_INT_LIT -%token PARS_FLOAT_LIT -%token PARS_STR_LIT -%token PARS_FIXBINARY_LIT -%token PARS_BLOB_LIT -%token PARS_NULL_LIT -%token PARS_ID_TOKEN -%token PARS_AND_TOKEN -%token PARS_OR_TOKEN -%token PARS_NOT_TOKEN -%token PARS_GE_TOKEN -%token PARS_LE_TOKEN -%token PARS_NE_TOKEN -%token PARS_PROCEDURE_TOKEN -%token PARS_IN_TOKEN -%token PARS_OUT_TOKEN -%token PARS_BINARY_TOKEN -%token PARS_BLOB_TOKEN -%token PARS_INT_TOKEN -%token PARS_INTEGER_TOKEN -%token PARS_FLOAT_TOKEN -%token PARS_CHAR_TOKEN -%token PARS_IS_TOKEN -%token PARS_BEGIN_TOKEN -%token PARS_END_TOKEN -%token PARS_IF_TOKEN -%token PARS_THEN_TOKEN -%token PARS_ELSE_TOKEN -%token PARS_ELSIF_TOKEN -%token PARS_LOOP_TOKEN -%token PARS_WHILE_TOKEN -%token PARS_RETURN_TOKEN -%token PARS_SELECT_TOKEN -%token PARS_SUM_TOKEN -%token PARS_COUNT_TOKEN -%token PARS_DISTINCT_TOKEN -%token PARS_FROM_TOKEN -%token PARS_WHERE_TOKEN -%token PARS_FOR_TOKEN -%token PARS_DDOT_TOKEN -%token PARS_READ_TOKEN -%token PARS_ORDER_TOKEN -%token PARS_BY_TOKEN -%token PARS_ASC_TOKEN -%token PARS_DESC_TOKEN -%token PARS_INSERT_TOKEN -%token PARS_INTO_TOKEN -%token PARS_VALUES_TOKEN -%token PARS_UPDATE_TOKEN -%token PARS_SET_TOKEN -%token PARS_DELETE_TOKEN -%token PARS_CURRENT_TOKEN -%token PARS_OF_TOKEN -%token PARS_CREATE_TOKEN -%token PARS_TABLE_TOKEN -%token PARS_INDEX_TOKEN -%token PARS_UNIQUE_TOKEN -%token PARS_CLUSTERED_TOKEN -%token PARS_DOES_NOT_FIT_IN_MEM_TOKEN -%token PARS_ON_TOKEN -%token PARS_ASSIGN_TOKEN -%token PARS_DECLARE_TOKEN -%token PARS_CURSOR_TOKEN -%token PARS_SQL_TOKEN -%token PARS_OPEN_TOKEN -%token PARS_FETCH_TOKEN -%token PARS_CLOSE_TOKEN -%token PARS_NOTFOUND_TOKEN -%token PARS_TO_CHAR_TOKEN -%token PARS_TO_NUMBER_TOKEN -%token PARS_TO_BINARY_TOKEN -%token PARS_BINARY_TO_NUMBER_TOKEN -%token PARS_SUBSTR_TOKEN -%token PARS_REPLSTR_TOKEN -%token PARS_CONCAT_TOKEN -%token PARS_INSTR_TOKEN -%token PARS_LENGTH_TOKEN -%token PARS_SYSDATE_TOKEN -%token PARS_PRINTF_TOKEN -%token PARS_ASSERT_TOKEN -%token PARS_RND_TOKEN -%token PARS_RND_STR_TOKEN -%token PARS_ROW_PRINTF_TOKEN -%token PARS_COMMIT_TOKEN -%token PARS_ROLLBACK_TOKEN -%token PARS_WORK_TOKEN -%token PARS_UNSIGNED_TOKEN -%token PARS_EXIT_TOKEN -%token PARS_FUNCTION_TOKEN -%token PARS_LOCK_TOKEN -%token PARS_SHARE_TOKEN -%token PARS_MODE_TOKEN - -%left PARS_AND_TOKEN PARS_OR_TOKEN -%left PARS_NOT_TOKEN -%left '=' '<' '>' PARS_GE_TOKEN PARS_LE_TOKEN -%left '-' '+' -%left '*' '/' -%left NEG /* negation--unary minus */ -%left '%' - -/* Grammar follows */ -%% - -top_statement: - procedure_definition ';' - -statement: - stored_procedure_call - | predefined_procedure_call ';' - | while_statement ';' - | for_statement ';' - | exit_statement ';' - | if_statement ';' - | return_statement ';' - | assignment_statement ';' - | select_statement ';' - | insert_statement ';' - | row_printf_statement ';' - | delete_statement_searched ';' - | delete_statement_positioned ';' - | update_statement_searched ';' - | update_statement_positioned ';' - | open_cursor_statement ';' - | fetch_statement ';' - | close_cursor_statement ';' - | commit_statement ';' - | rollback_statement ';' - | create_table ';' - | create_index ';' -; - -statement_list: - statement { $$ = que_node_list_add_last(NULL, $1); } - | statement_list statement - { $$ = que_node_list_add_last($1, $2); } -; - -exp: - PARS_ID_TOKEN { $$ = $1;} - | function_name '(' exp_list ')' - { $$ = pars_func($1, $3); } - | PARS_INT_LIT { $$ = $1;} - | PARS_FLOAT_LIT { $$ = $1;} - | PARS_STR_LIT { $$ = $1;} - | PARS_FIXBINARY_LIT { $$ = $1;} - | PARS_BLOB_LIT { $$ = $1;} - | PARS_NULL_LIT { $$ = $1;} - | PARS_SQL_TOKEN { $$ = $1;} - | exp '+' exp { $$ = pars_op('+', $1, $3); } - | exp '-' exp { $$ = pars_op('-', $1, $3); } - | exp '*' exp { $$ = pars_op('*', $1, $3); } - | exp '/' exp { $$ = pars_op('/', $1, $3); } - | '-' exp %prec NEG { $$ = pars_op('-', $2, NULL); } - | '(' exp ')' { $$ = $2; } - | exp '=' exp { $$ = pars_op('=', $1, $3); } - | exp '<' exp { $$ = pars_op('<', $1, $3); } - | exp '>' exp { $$ = pars_op('>', $1, $3); } - | exp PARS_GE_TOKEN exp { $$ = pars_op(PARS_GE_TOKEN, $1, $3); } - | exp PARS_LE_TOKEN exp { $$ = pars_op(PARS_LE_TOKEN, $1, $3); } - | exp PARS_NE_TOKEN exp { $$ = pars_op(PARS_NE_TOKEN, $1, $3); } - | exp PARS_AND_TOKEN exp{ $$ = pars_op(PARS_AND_TOKEN, $1, $3); } - | exp PARS_OR_TOKEN exp { $$ = pars_op(PARS_OR_TOKEN, $1, $3); } - | PARS_NOT_TOKEN exp { $$ = pars_op(PARS_NOT_TOKEN, $2, NULL); } - | PARS_ID_TOKEN '%' PARS_NOTFOUND_TOKEN - { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); } - | PARS_SQL_TOKEN '%' PARS_NOTFOUND_TOKEN - { $$ = pars_op(PARS_NOTFOUND_TOKEN, $1, NULL); } -; - -function_name: - PARS_TO_CHAR_TOKEN { $$ = &pars_to_char_token; } - | PARS_TO_NUMBER_TOKEN { $$ = &pars_to_number_token; } - | PARS_TO_BINARY_TOKEN { $$ = &pars_to_binary_token; } - | PARS_BINARY_TO_NUMBER_TOKEN - { $$ = &pars_binary_to_number_token; } - | PARS_SUBSTR_TOKEN { $$ = &pars_substr_token; } - | PARS_CONCAT_TOKEN { $$ = &pars_concat_token; } - | PARS_INSTR_TOKEN { $$ = &pars_instr_token; } - | PARS_LENGTH_TOKEN { $$ = &pars_length_token; } - | PARS_SYSDATE_TOKEN { $$ = &pars_sysdate_token; } - | PARS_RND_TOKEN { $$ = &pars_rnd_token; } - | PARS_RND_STR_TOKEN { $$ = &pars_rnd_str_token; } -; - -question_mark_list: - /* Nothing */ - | '?' - | question_mark_list ',' '?' -; - -stored_procedure_call: - '{' PARS_ID_TOKEN '(' question_mark_list ')' '}' - { $$ = pars_stored_procedure_call($2); } -; - -predefined_procedure_call: - predefined_procedure_name '(' exp_list ')' - { $$ = pars_procedure_call($1, $3); } -; - -predefined_procedure_name: - PARS_REPLSTR_TOKEN { $$ = &pars_replstr_token; } - | PARS_PRINTF_TOKEN { $$ = &pars_printf_token; } - | PARS_ASSERT_TOKEN { $$ = &pars_assert_token; } -; - -user_function_call: - PARS_ID_TOKEN '(' ')' { $$ = $1; } -; - -table_list: - PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); } - | table_list ',' PARS_ID_TOKEN - { $$ = que_node_list_add_last($1, $3); } -; - -variable_list: - /* Nothing */ { $$ = NULL; } - | PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); } - | variable_list ',' PARS_ID_TOKEN - { $$ = que_node_list_add_last($1, $3); } -; - -exp_list: - /* Nothing */ { $$ = NULL; } - | exp { $$ = que_node_list_add_last(NULL, $1);} - | exp_list ',' exp { $$ = que_node_list_add_last($1, $3); } -; - -select_item: - exp { $$ = $1; } - | PARS_COUNT_TOKEN '(' '*' ')' - { $$ = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - sym_tab_add_int_lit( - pars_sym_tab_global, 1))); } - | PARS_COUNT_TOKEN '(' PARS_DISTINCT_TOKEN PARS_ID_TOKEN ')' - { $$ = pars_func(&pars_count_token, - que_node_list_add_last(NULL, - pars_func(&pars_distinct_token, - que_node_list_add_last( - NULL, $4)))); } - | PARS_SUM_TOKEN '(' exp ')' - { $$ = pars_func(&pars_sum_token, - que_node_list_add_last(NULL, - $3)); } -; - -select_item_list: - /* Nothing */ { $$ = NULL; } - | select_item { $$ = que_node_list_add_last(NULL, $1); } - | select_item_list ',' select_item - { $$ = que_node_list_add_last($1, $3); } -; - -select_list: - '*' { $$ = pars_select_list(&pars_star_denoter, - NULL); } - | select_item_list PARS_INTO_TOKEN variable_list - { $$ = pars_select_list($1, $3); } - | select_item_list { $$ = pars_select_list($1, NULL); } -; - -search_condition: - /* Nothing */ { $$ = NULL; } - | PARS_WHERE_TOKEN exp { $$ = $2; } -; - -for_update_clause: - /* Nothing */ { $$ = NULL; } - | PARS_FOR_TOKEN PARS_UPDATE_TOKEN - { $$ = &pars_update_token; } -; - -lock_shared_clause: - /* Nothing */ { $$ = NULL; } - | PARS_LOCK_TOKEN PARS_IN_TOKEN PARS_SHARE_TOKEN PARS_MODE_TOKEN - { $$ = &pars_share_token; } -; - -order_direction: - /* Nothing */ { $$ = &pars_asc_token; } - | PARS_ASC_TOKEN { $$ = &pars_asc_token; } - | PARS_DESC_TOKEN { $$ = &pars_desc_token; } -; - -order_by_clause: - /* Nothing */ { $$ = NULL; } - | PARS_ORDER_TOKEN PARS_BY_TOKEN PARS_ID_TOKEN order_direction - { $$ = pars_order_by($3, $4); } -; - -select_statement: - PARS_SELECT_TOKEN select_list - PARS_FROM_TOKEN table_list - search_condition - for_update_clause - lock_shared_clause - order_by_clause { $$ = pars_select_statement($2, $4, $5, - $6, $7, $8); } -; - -insert_statement_start: - PARS_INSERT_TOKEN PARS_INTO_TOKEN - PARS_ID_TOKEN { $$ = $3; } -; - -insert_statement: - insert_statement_start PARS_VALUES_TOKEN '(' exp_list ')' - { $$ = pars_insert_statement($1, $4, NULL); } - | insert_statement_start select_statement - { $$ = pars_insert_statement($1, NULL, $2); } -; - -column_assignment: - PARS_ID_TOKEN '=' exp { $$ = pars_column_assignment($1, $3); } -; - -column_assignment_list: - column_assignment { $$ = que_node_list_add_last(NULL, $1); } - | column_assignment_list ',' column_assignment - { $$ = que_node_list_add_last($1, $3); } -; - -cursor_positioned: - PARS_WHERE_TOKEN - PARS_CURRENT_TOKEN PARS_OF_TOKEN - PARS_ID_TOKEN { $$ = $4; } -; - -update_statement_start: - PARS_UPDATE_TOKEN PARS_ID_TOKEN - PARS_SET_TOKEN - column_assignment_list { $$ = pars_update_statement_start(FALSE, - $2, $4); } -; - -update_statement_searched: - update_statement_start - search_condition { $$ = pars_update_statement($1, NULL, $2); } -; - -update_statement_positioned: - update_statement_start - cursor_positioned { $$ = pars_update_statement($1, $2, NULL); } -; - -delete_statement_start: - PARS_DELETE_TOKEN PARS_FROM_TOKEN - PARS_ID_TOKEN { $$ = pars_update_statement_start(TRUE, - $3, NULL); } -; - -delete_statement_searched: - delete_statement_start - search_condition { $$ = pars_update_statement($1, NULL, $2); } -; - -delete_statement_positioned: - delete_statement_start - cursor_positioned { $$ = pars_update_statement($1, $2, NULL); } -; - -row_printf_statement: - PARS_ROW_PRINTF_TOKEN select_statement - { $$ = pars_row_printf_statement($2); } -; - -assignment_statement: - PARS_ID_TOKEN PARS_ASSIGN_TOKEN exp - { $$ = pars_assignment_statement($1, $3); } -; - -elsif_element: - PARS_ELSIF_TOKEN - exp PARS_THEN_TOKEN statement_list - { $$ = pars_elsif_element($2, $4); } -; - -elsif_list: - elsif_element { $$ = que_node_list_add_last(NULL, $1); } - | elsif_list elsif_element - { $$ = que_node_list_add_last($1, $2); } -; - -else_part: - /* Nothing */ { $$ = NULL; } - | PARS_ELSE_TOKEN statement_list - { $$ = $2; } - | elsif_list { $$ = $1; } -; - -if_statement: - PARS_IF_TOKEN exp PARS_THEN_TOKEN statement_list - else_part - PARS_END_TOKEN PARS_IF_TOKEN - { $$ = pars_if_statement($2, $4, $5); } -; - -while_statement: - PARS_WHILE_TOKEN exp PARS_LOOP_TOKEN statement_list - PARS_END_TOKEN PARS_LOOP_TOKEN - { $$ = pars_while_statement($2, $4); } -; - -for_statement: - PARS_FOR_TOKEN PARS_ID_TOKEN PARS_IN_TOKEN - exp PARS_DDOT_TOKEN exp - PARS_LOOP_TOKEN statement_list - PARS_END_TOKEN PARS_LOOP_TOKEN - { $$ = pars_for_statement($2, $4, $6, $8); } -; - -exit_statement: - PARS_EXIT_TOKEN { $$ = pars_exit_statement(); } -; - -return_statement: - PARS_RETURN_TOKEN { $$ = pars_return_statement(); } -; - -open_cursor_statement: - PARS_OPEN_TOKEN PARS_ID_TOKEN - { $$ = pars_open_statement( - ROW_SEL_OPEN_CURSOR, $2); } -; - -close_cursor_statement: - PARS_CLOSE_TOKEN PARS_ID_TOKEN - { $$ = pars_open_statement( - ROW_SEL_CLOSE_CURSOR, $2); } -; - -fetch_statement: - PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN variable_list - { $$ = pars_fetch_statement($2, $4, NULL); } - | PARS_FETCH_TOKEN PARS_ID_TOKEN PARS_INTO_TOKEN user_function_call - { $$ = pars_fetch_statement($2, NULL, $4); } -; - -column_def: - PARS_ID_TOKEN type_name opt_column_len opt_unsigned opt_not_null - { $$ = pars_column_def($1, $2, $3, $4, $5); } -; - -column_def_list: - column_def { $$ = que_node_list_add_last(NULL, $1); } - | column_def_list ',' column_def - { $$ = que_node_list_add_last($1, $3); } -; - -opt_column_len: - /* Nothing */ { $$ = NULL; } - | '(' PARS_INT_LIT ')' - { $$ = $2; } -; - -opt_unsigned: - /* Nothing */ { $$ = NULL; } - | PARS_UNSIGNED_TOKEN - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -opt_not_null: - /* Nothing */ { $$ = NULL; } - | PARS_NOT_TOKEN PARS_NULL_LIT - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -not_fit_in_memory: - /* Nothing */ { $$ = NULL; } - | PARS_DOES_NOT_FIT_IN_MEM_TOKEN - { $$ = &pars_int_token; - /* pass any non-NULL pointer */ } -; - -create_table: - PARS_CREATE_TOKEN PARS_TABLE_TOKEN - PARS_ID_TOKEN '(' column_def_list ')' - not_fit_in_memory { $$ = pars_create_table($3, $5, $7); } -; - -column_list: - PARS_ID_TOKEN { $$ = que_node_list_add_last(NULL, $1); } - | column_list ',' PARS_ID_TOKEN - { $$ = que_node_list_add_last($1, $3); } -; - -unique_def: - /* Nothing */ { $$ = NULL; } - | PARS_UNIQUE_TOKEN { $$ = &pars_unique_token; } -; - -clustered_def: - /* Nothing */ { $$ = NULL; } - | PARS_CLUSTERED_TOKEN { $$ = &pars_clustered_token; } -; - -create_index: - PARS_CREATE_TOKEN unique_def - clustered_def - PARS_INDEX_TOKEN - PARS_ID_TOKEN PARS_ON_TOKEN PARS_ID_TOKEN - '(' column_list ')' { $$ = pars_create_index($2, $3, $5, $7, $9); } -; - -commit_statement: - PARS_COMMIT_TOKEN PARS_WORK_TOKEN - { $$ = pars_commit_statement(); } -; - -rollback_statement: - PARS_ROLLBACK_TOKEN PARS_WORK_TOKEN - { $$ = pars_rollback_statement(); } -; - -type_name: - PARS_INT_TOKEN { $$ = &pars_int_token; } - | PARS_INTEGER_TOKEN { $$ = &pars_int_token; } - | PARS_CHAR_TOKEN { $$ = &pars_char_token; } - | PARS_BINARY_TOKEN { $$ = &pars_binary_token; } - | PARS_BLOB_TOKEN { $$ = &pars_blob_token; } -; - -parameter_declaration: - PARS_ID_TOKEN PARS_IN_TOKEN type_name - { $$ = pars_parameter_declaration($1, - PARS_INPUT, $3); } - | PARS_ID_TOKEN PARS_OUT_TOKEN type_name - { $$ = pars_parameter_declaration($1, - PARS_OUTPUT, $3); } -; - -parameter_declaration_list: - /* Nothing */ { $$ = NULL; } - | parameter_declaration { $$ = que_node_list_add_last(NULL, $1); } - | parameter_declaration_list ',' parameter_declaration - { $$ = que_node_list_add_last($1, $3); } -; - -variable_declaration: - PARS_ID_TOKEN type_name ';' - { $$ = pars_variable_declaration($1, $2); } -; - -variable_declaration_list: - /* Nothing */ - | variable_declaration - | variable_declaration_list variable_declaration -; - -cursor_declaration: - PARS_DECLARE_TOKEN PARS_CURSOR_TOKEN PARS_ID_TOKEN - PARS_IS_TOKEN select_statement ';' - { $$ = pars_cursor_declaration($3, $5); } -; - -function_declaration: - PARS_DECLARE_TOKEN PARS_FUNCTION_TOKEN PARS_ID_TOKEN ';' - { $$ = pars_function_declaration($3); } -; - -declaration: - cursor_declaration - | function_declaration -; - -declaration_list: - /* Nothing */ - | declaration - | declaration_list declaration -; - -procedure_definition: - PARS_PROCEDURE_TOKEN PARS_ID_TOKEN '(' parameter_declaration_list ')' - PARS_IS_TOKEN - variable_declaration_list - declaration_list - PARS_BEGIN_TOKEN - statement_list - PARS_END_TOKEN { $$ = pars_procedure_definition($2, $4, - $10); } -; - -%% diff --git a/storage/innobase/pars/pars0lex.l b/storage/innobase/pars/pars0lex.l deleted file mode 100644 index ad65034fab0..00000000000 --- a/storage/innobase/pars/pars0lex.l +++ /dev/null @@ -1,648 +0,0 @@ -/****************************************************** -SQL parser lexical analyzer: input file for the GNU Flex lexer generator - -(c) 1997 Innobase Oy - -Created 12/14/1997 Heikki Tuuri -Published under the GPL version 2 - -The InnoDB parser is frozen because MySQL takes care of SQL parsing. -Therefore we normally keep the InnoDB parser C files as they are, and do -not automatically generate them from pars0grm.y and pars0lex.l. - -How to make the InnoDB parser and lexer C files: - -1. Run ./make_flex.sh to generate lexer files. - -2. Run ./make_bison.sh to generate parser files. - -These instructions seem to work at least with bison-1.875d and flex-2.5.31 on -Linux. -*******************************************************/ - -%option nostdinit -%option 8bit -%option warn -%option pointer -%option never-interactive -%option nodefault -%option noinput -%option nounput -%option noyywrap -%option noyy_scan_buffer -%option noyy_scan_bytes -%option noyy_scan_string -%option nounistd - -%{ -#define YYSTYPE que_node_t* - -#include "univ.i" -#include "pars0pars.h" -#include "pars0grm.h" -#include "pars0sym.h" -#include "mem0mem.h" -#include "os0proc.h" - -#define malloc(A) ut_malloc(A) -#define free(A) ut_free(A) -#define realloc(P, A) ut_realloc(P, A) -#define exit(A) ut_error - -#define YY_INPUT(buf, result, max_size) pars_get_lex_chars(buf, &result, max_size) - -/* String buffer for removing quotes */ -static ulint stringbuf_len_alloc = 0; /* Allocated length */ -static ulint stringbuf_len = 0; /* Current length */ -static char* stringbuf; /* Start of buffer */ -/* Appends a string to the buffer. */ -static -void -string_append( -/*==========*/ - const char* str, /* in: string to be appended */ - ulint len) /* in: length of the string */ -{ - if (stringbuf == NULL) { - stringbuf = malloc(1); - stringbuf_len_alloc = 1; - } - - if (stringbuf_len + len > stringbuf_len_alloc) { - while (stringbuf_len + len > stringbuf_len_alloc) { - stringbuf_len_alloc <<= 1; - } - stringbuf = realloc(stringbuf, stringbuf_len_alloc); - } - - memcpy(stringbuf + stringbuf_len, str, len); - stringbuf_len += len; -} - -%} - -DIGIT [0-9] -ID [a-z_A-Z][a-z_A-Z0-9]* -BOUND_LIT \:[a-z_A-Z0-9]+ -BOUND_ID \$[a-z_A-Z0-9]+ - -%x comment -%x quoted -%x id -%% - -{DIGIT}+ { - yylval = sym_tab_add_int_lit(pars_sym_tab_global, - atoi(yytext)); - return(PARS_INT_LIT); -} - -{DIGIT}+"."{DIGIT}* { - ut_error; /* not implemented */ - - return(PARS_FLOAT_LIT); -} - -{BOUND_LIT} { - ulint type; - - yylval = sym_tab_add_bound_lit(pars_sym_tab_global, - yytext + 1, &type); - - return((int) type); -} - -{BOUND_ID} { - yylval = sym_tab_add_bound_id(pars_sym_tab_global, - yytext + 1); - - return(PARS_ID_TOKEN); -} - -"'" { -/* Quoted character string literals are handled in an explicit -start state 'quoted'. This state is entered and the buffer for -the scanned string is emptied upon encountering a starting quote. - -In the state 'quoted', only two actions are possible (defined below). */ - BEGIN(quoted); - stringbuf_len = 0; -} -<quoted>[^\']+ { - /* Got a sequence of characters other than "'": - append to string buffer */ - string_append(yytext, yyleng); -} -<quoted>"'"+ { - /* Got a sequence of "'" characters: - append half of them to string buffer, - as "''" represents a single "'". - We apply truncating division, - so that "'''" will result in "'". */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - string literal. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_str_lit( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - return(PARS_STR_LIT); - } -} - -\" { -/* Quoted identifiers are handled in an explicit start state 'id'. -This state is entered and the buffer for the scanned string is emptied -upon encountering a starting quote. - -In the state 'id', only two actions are possible (defined below). */ - BEGIN(id); - stringbuf_len = 0; -} -<id>[^\"]+ { - /* Got a sequence of characters other than '"': - append to string buffer */ - string_append(yytext, yyleng); -} -<id>\"+ { - /* Got a sequence of '"' characters: - append half of them to string buffer, - as '""' represents a single '"'. - We apply truncating division, - so that '"""' will result in '"'. */ - - string_append(yytext, yyleng / 2); - - /* If we got an odd number of quotes, then the - last quote we got is the terminating quote. - At the end of the string, we return to the - initial start state and report the scanned - identifier. */ - - if (yyleng % 2) { - BEGIN(INITIAL); - yylval = sym_tab_add_id( - pars_sym_tab_global, - (byte*) stringbuf, stringbuf_len); - - return(PARS_ID_TOKEN); - } -} - -"NULL" { - yylval = sym_tab_add_null_lit(pars_sym_tab_global); - - return(PARS_NULL_LIT); -} - -"SQL" { - /* Implicit cursor name */ - yylval = sym_tab_add_str_lit(pars_sym_tab_global, - (byte*) yytext, yyleng); - return(PARS_SQL_TOKEN); -} - -"AND" { - return(PARS_AND_TOKEN); -} - -"OR" { - return(PARS_OR_TOKEN); -} - -"NOT" { - return(PARS_NOT_TOKEN); -} - -"PROCEDURE" { - return(PARS_PROCEDURE_TOKEN); -} - -"IN" { - return(PARS_IN_TOKEN); -} - -"OUT" { - return(PARS_OUT_TOKEN); -} - -"BINARY" { - return(PARS_BINARY_TOKEN); -} - -"BLOB" { - return(PARS_BLOB_TOKEN); -} - -"INT" { - return(PARS_INT_TOKEN); -} - -"INTEGER" { - return(PARS_INT_TOKEN); -} - -"FLOAT" { - return(PARS_FLOAT_TOKEN); -} - -"CHAR" { - return(PARS_CHAR_TOKEN); -} - -"IS" { - return(PARS_IS_TOKEN); -} - -"BEGIN" { - return(PARS_BEGIN_TOKEN); -} - -"END" { - return(PARS_END_TOKEN); -} - -"IF" { - return(PARS_IF_TOKEN); -} - -"THEN" { - return(PARS_THEN_TOKEN); -} - -"ELSE" { - return(PARS_ELSE_TOKEN); -} - -"ELSIF" { - return(PARS_ELSIF_TOKEN); -} - -"LOOP" { - return(PARS_LOOP_TOKEN); -} - -"WHILE" { - return(PARS_WHILE_TOKEN); -} - -"RETURN" { - return(PARS_RETURN_TOKEN); -} - -"SELECT" { - return(PARS_SELECT_TOKEN); -} - -"SUM" { - return(PARS_SUM_TOKEN); -} - -"COUNT" { - return(PARS_COUNT_TOKEN); -} - -"DISTINCT" { - return(PARS_DISTINCT_TOKEN); -} - -"FROM" { - return(PARS_FROM_TOKEN); -} - -"WHERE" { - return(PARS_WHERE_TOKEN); -} - -"FOR" { - return(PARS_FOR_TOKEN); -} - -"READ" { - return(PARS_READ_TOKEN); -} - -"ORDER" { - return(PARS_ORDER_TOKEN); -} - -"BY" { - return(PARS_BY_TOKEN); -} - -"ASC" { - return(PARS_ASC_TOKEN); -} - -"DESC" { - return(PARS_DESC_TOKEN); -} - -"INSERT" { - return(PARS_INSERT_TOKEN); -} - -"INTO" { - return(PARS_INTO_TOKEN); -} - -"VALUES" { - return(PARS_VALUES_TOKEN); -} - -"UPDATE" { - return(PARS_UPDATE_TOKEN); -} - -"SET" { - return(PARS_SET_TOKEN); -} - -"DELETE" { - return(PARS_DELETE_TOKEN); -} - -"CURRENT" { - return(PARS_CURRENT_TOKEN); -} - -"OF" { - return(PARS_OF_TOKEN); -} - -"CREATE" { - return(PARS_CREATE_TOKEN); -} - -"TABLE" { - return(PARS_TABLE_TOKEN); -} - -"INDEX" { - return(PARS_INDEX_TOKEN); -} - -"UNIQUE" { - return(PARS_UNIQUE_TOKEN); -} - -"CLUSTERED" { - return(PARS_CLUSTERED_TOKEN); -} - -"DOES_NOT_FIT_IN_MEMORY" { - return(PARS_DOES_NOT_FIT_IN_MEM_TOKEN); -} - -"ON" { - return(PARS_ON_TOKEN); -} - -"DECLARE" { - return(PARS_DECLARE_TOKEN); -} - -"CURSOR" { - return(PARS_CURSOR_TOKEN); -} - -"OPEN" { - return(PARS_OPEN_TOKEN); -} - -"FETCH" { - return(PARS_FETCH_TOKEN); -} - -"CLOSE" { - return(PARS_CLOSE_TOKEN); -} - -"NOTFOUND" { - return(PARS_NOTFOUND_TOKEN); -} - -"TO_CHAR" { - return(PARS_TO_CHAR_TOKEN); -} - -"TO_NUMBER" { - return(PARS_TO_NUMBER_TOKEN); -} - -"TO_BINARY" { - return(PARS_TO_BINARY_TOKEN); -} - -"BINARY_TO_NUMBER" { - return(PARS_BINARY_TO_NUMBER_TOKEN); -} - -"SUBSTR" { - return(PARS_SUBSTR_TOKEN); -} - -"REPLSTR" { - return(PARS_REPLSTR_TOKEN); -} - -"CONCAT" { - return(PARS_CONCAT_TOKEN); -} - -"INSTR" { - return(PARS_INSTR_TOKEN); -} - -"LENGTH" { - return(PARS_LENGTH_TOKEN); -} - -"SYSDATE" { - return(PARS_SYSDATE_TOKEN); -} - -"PRINTF" { - return(PARS_PRINTF_TOKEN); -} - -"ASSERT" { - return(PARS_ASSERT_TOKEN); -} - -"RND" { - return(PARS_RND_TOKEN); -} - -"RND_STR" { - return(PARS_RND_STR_TOKEN); -} - -"ROW_PRINTF" { - return(PARS_ROW_PRINTF_TOKEN); -} - -"COMMIT" { - return(PARS_COMMIT_TOKEN); -} - -"ROLLBACK" { - return(PARS_ROLLBACK_TOKEN); -} - -"WORK" { - return(PARS_WORK_TOKEN); -} - -"UNSIGNED" { - return(PARS_UNSIGNED_TOKEN); -} - -"EXIT" { - return(PARS_EXIT_TOKEN); -} - -"FUNCTION" { - return(PARS_FUNCTION_TOKEN); -} - -"LOCK" { - return(PARS_LOCK_TOKEN); -} - -"SHARE" { - return(PARS_SHARE_TOKEN); -} - -"MODE" { - return(PARS_MODE_TOKEN); -} - -{ID} { - yylval = sym_tab_add_id(pars_sym_tab_global, - (byte*)yytext, - ut_strlen(yytext)); - return(PARS_ID_TOKEN); -} - -".." { - return(PARS_DDOT_TOKEN); -} - -":=" { - return(PARS_ASSIGN_TOKEN); -} - -"<=" { - return(PARS_LE_TOKEN); -} - -">=" { - return(PARS_GE_TOKEN); -} - -"<>" { - return(PARS_NE_TOKEN); -} - -"(" { - - return((int)(*yytext)); -} - -"=" { - - return((int)(*yytext)); -} - -">" { - - return((int)(*yytext)); -} - -"<" { - - return((int)(*yytext)); -} - -"," { - - return((int)(*yytext)); -} - -";" { - - return((int)(*yytext)); -} - -")" { - - return((int)(*yytext)); -} - -"+" { - - return((int)(*yytext)); -} - -"-" { - - return((int)(*yytext)); -} - -"*" { - - return((int)(*yytext)); -} - -"/" { - - return((int)(*yytext)); -} - -"%" { - - return((int)(*yytext)); -} - -"{" { - - return((int)(*yytext)); -} - -"}" { - - return((int)(*yytext)); -} - -"?" { - - return((int)(*yytext)); -} - -"/*" BEGIN(comment); /* eat up comment */ - -<comment>[^*]* -<comment>"*"+[^*/]* -<comment>"*"+"/" BEGIN(INITIAL); - -[ \t\n]+ /* eat up whitespace */ - - -. { - fprintf(stderr,"Unrecognized character: %02x\n", - *yytext); - - ut_error; - - return(0); -} - -%% diff --git a/storage/innobase/pars/pars0opt.c b/storage/innobase/pars/pars0opt.c deleted file mode 100644 index 2abe6720235..00000000000 --- a/storage/innobase/pars/pars0opt.c +++ /dev/null @@ -1,1208 +0,0 @@ -/****************************************************** -Simple SQL optimizer - -(c) 1997 Innobase Oy - -Created 12/21/1997 Heikki Tuuri -*******************************************************/ - -#include "pars0opt.h" - -#ifdef UNIV_NONINL -#include "pars0opt.ic" -#endif - -#include "row0sel.h" -#include "row0ins.h" -#include "row0upd.h" -#include "dict0dict.h" -#include "dict0mem.h" -#include "que0que.h" -#include "pars0grm.h" -#include "pars0pars.h" -#include "lock0lock.h" - -#define OPT_EQUAL 1 /* comparison by = */ -#define OPT_COMPARISON 2 /* comparison by <, >, <=, or >= */ - -#define OPT_NOT_COND 1 -#define OPT_END_COND 2 -#define OPT_TEST_COND 3 -#define OPT_SCROLL_COND 4 - - -/*********************************************************************** -Inverts a comparison operator. */ -static -int -opt_invert_cmp_op( -/*==============*/ - /* out: the equivalent operator when the order of - the arguments is switched */ - int op) /* in: operator */ -{ - if (op == '<') { - return('>'); - } else if (op == '>') { - return('<'); - } else if (op == '=') { - return('='); - } else if (op == PARS_LE_TOKEN) { - return(PARS_GE_TOKEN); - } else if (op == PARS_GE_TOKEN) { - return(PARS_LE_TOKEN); - } else { - ut_error; - } - - return(0); -} - -/*********************************************************************** -Checks if the value of an expression can be calculated BEFORE the nth table -in a join is accessed. If this is the case, it can possibly be used in an -index search for the nth table. */ -static -ibool -opt_check_exp_determined_before( -/*============================*/ - /* out: TRUE if already determined */ - que_node_t* exp, /* in: expression */ - sel_node_t* sel_node, /* in: select node */ - ulint nth_table) /* in: nth table will be accessed */ -{ - func_node_t* func_node; - sym_node_t* sym_node; - dict_table_t* table; - que_node_t* arg; - ulint i; - - ut_ad(exp && sel_node); - - if (que_node_get_type(exp) == QUE_NODE_FUNC) { - func_node = exp; - - arg = func_node->args; - - while (arg) { - if (!opt_check_exp_determined_before(arg, sel_node, - nth_table)) { - return(FALSE); - } - - arg = que_node_get_next(arg); - } - - return(TRUE); - } - - ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL); - - sym_node = exp; - - if (sym_node->token_type != SYM_COLUMN) { - - return(TRUE); - } - - for (i = 0; i < nth_table; i++) { - - table = sel_node_get_nth_plan(sel_node, i)->table; - - if (sym_node->table == table) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*********************************************************************** -Looks in a comparison condition if a column value is already restricted by -it BEFORE the nth table is accessed. */ -static -que_node_t* -opt_look_for_col_in_comparison_before( -/*==================================*/ - /* out: expression restricting the - value of the column, or NULL if not - known */ - ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */ - ulint col_no, /* in: column number */ - func_node_t* search_cond, /* in: comparison condition */ - sel_node_t* sel_node, /* in: select node */ - ulint nth_table, /* in: nth table in a join (a query - from a single table is considered a - join of 1 table) */ - ulint* op) /* out: comparison operator ('=', - PARS_GE_TOKEN, ... ); this is inverted - if the column appears on the right - side */ -{ - sym_node_t* sym_node; - dict_table_t* table; - que_node_t* exp; - que_node_t* arg; - - ut_ad(search_cond); - - ut_a((search_cond->func == '<') - || (search_cond->func == '>') - || (search_cond->func == '=') - || (search_cond->func == PARS_GE_TOKEN) - || (search_cond->func == PARS_LE_TOKEN)); - - table = sel_node_get_nth_plan(sel_node, nth_table)->table; - - if ((cmp_type == OPT_EQUAL) && (search_cond->func != '=')) { - - return(NULL); - - } else if ((cmp_type == OPT_COMPARISON) - && (search_cond->func != '<') - && (search_cond->func != '>') - && (search_cond->func != PARS_GE_TOKEN) - && (search_cond->func != PARS_LE_TOKEN)) { - - return(NULL); - } - - arg = search_cond->args; - - if (que_node_get_type(arg) == QUE_NODE_SYMBOL) { - sym_node = arg; - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table) - && (sym_node->col_no == col_no)) { - - /* sym_node contains the desired column id */ - - /* Check if the expression on the right side of the - operator is already determined */ - - exp = que_node_get_next(arg); - - if (opt_check_exp_determined_before(exp, sel_node, - nth_table)) { - *op = search_cond->func; - - return(exp); - } - } - } - - exp = search_cond->args; - arg = que_node_get_next(arg); - - if (que_node_get_type(arg) == QUE_NODE_SYMBOL) { - sym_node = arg; - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table) - && (sym_node->col_no == col_no)) { - - if (opt_check_exp_determined_before(exp, sel_node, - nth_table)) { - *op = opt_invert_cmp_op(search_cond->func); - - return(exp); - } - } - } - - return(NULL); -} - -/*********************************************************************** -Looks in a search condition if a column value is already restricted by the -search condition BEFORE the nth table is accessed. Takes into account that -if we will fetch in an ascending order, we cannot utilize an upper limit for -a column value; in a descending order, respectively, a lower limit. */ -static -que_node_t* -opt_look_for_col_in_cond_before( -/*============================*/ - /* out: expression restricting the - value of the column, or NULL if not - known */ - ulint cmp_type, /* in: OPT_EQUAL, OPT_COMPARISON */ - ulint col_no, /* in: column number */ - func_node_t* search_cond, /* in: search condition or NULL */ - sel_node_t* sel_node, /* in: select node */ - ulint nth_table, /* in: nth table in a join (a query - from a single table is considered a - join of 1 table) */ - ulint* op) /* out: comparison operator ('=', - PARS_GE_TOKEN, ... ) */ -{ - func_node_t* new_cond; - que_node_t* exp; - - if (search_cond == NULL) { - - return(NULL); - } - - ut_a(que_node_get_type(search_cond) == QUE_NODE_FUNC); - ut_a(search_cond->func != PARS_OR_TOKEN); - ut_a(search_cond->func != PARS_NOT_TOKEN); - - if (search_cond->func == PARS_AND_TOKEN) { - new_cond = search_cond->args; - - exp = opt_look_for_col_in_cond_before(cmp_type, col_no, - new_cond, sel_node, - nth_table, op); - if (exp) { - - return(exp); - } - - new_cond = que_node_get_next(new_cond); - - exp = opt_look_for_col_in_cond_before(cmp_type, col_no, - new_cond, sel_node, - nth_table, op); - return(exp); - } - - exp = opt_look_for_col_in_comparison_before(cmp_type, col_no, - search_cond, sel_node, - nth_table, op); - if (exp == NULL) { - - return(NULL); - } - - /* If we will fetch in an ascending order, we cannot utilize an upper - limit for a column value; in a descending order, respectively, a lower - limit */ - - if (sel_node->asc && ((*op == '<') || (*op == PARS_LE_TOKEN))) { - - return(NULL); - - } else if (!sel_node->asc - && ((*op == '>') || (*op == PARS_GE_TOKEN))) { - - return(NULL); - } - - return(exp); -} - -/*********************************************************************** -Calculates the goodness for an index according to a select node. The -goodness is 4 times the number of first fields in index whose values we -already know exactly in the query. If we have a comparison condition for -an additional field, 2 point are added. If the index is unique, and we know -all the unique fields for the index we add 1024 points. For a clustered index -we add 1 point. */ -static -ulint -opt_calc_index_goodness( -/*====================*/ - /* out: goodness */ - dict_index_t* index, /* in: index */ - sel_node_t* sel_node, /* in: parsed select node */ - ulint nth_table, /* in: nth table in a join */ - que_node_t** index_plan, /* in/out: comparison expressions for - this index */ - ulint* last_op) /* out: last comparison operator, if - goodness > 1 */ -{ - que_node_t* exp; - ulint goodness; - ulint n_fields; - ulint col_no; - ulint op; - ulint j; - - goodness = 0; - - /* Note that as higher level node pointers in the B-tree contain - page addresses as the last field, we must not put more fields in - the search tuple than dict_index_get_n_unique_in_tree(index); see - the note in btr_cur_search_to_nth_level. */ - - n_fields = dict_index_get_n_unique_in_tree(index); - - for (j = 0; j < n_fields; j++) { - - col_no = dict_index_get_nth_col_no(index, j); - - exp = opt_look_for_col_in_cond_before( - OPT_EQUAL, col_no, sel_node->search_cond, - sel_node, nth_table, &op); - if (exp) { - /* The value for this column is exactly known already - at this stage of the join */ - - index_plan[j] = exp; - *last_op = op; - goodness += 4; - } else { - /* Look for non-equality comparisons */ - - exp = opt_look_for_col_in_cond_before( - OPT_COMPARISON, col_no, sel_node->search_cond, - sel_node, nth_table, &op); - if (exp) { - index_plan[j] = exp; - *last_op = op; - goodness += 2; - } - - break; - } - } - - if (goodness >= 4 * dict_index_get_n_unique(index)) { - goodness += 1024; - - if (index->type & DICT_CLUSTERED) { - - goodness += 1024; - } - } - - /* We have to test for goodness here, as last_op may note be set */ - if (goodness && index->type & DICT_CLUSTERED) { - - goodness++; - } - - return(goodness); -} - -/*********************************************************************** -Calculates the number of matched fields based on an index goodness. */ -UNIV_INLINE -ulint -opt_calc_n_fields_from_goodness( -/*============================*/ - /* out: number of excatly or partially matched - fields */ - ulint goodness) /* in: goodness */ -{ - return(((goodness % 1024) + 2) / 4); -} - -/*********************************************************************** -Converts a comparison operator to the corresponding search mode PAGE_CUR_GE, -... */ -UNIV_INLINE -ulint -opt_op_to_search_mode( -/*==================*/ - /* out: search mode */ - ibool asc, /* in: TRUE if the rows should be fetched in an - ascending order */ - ulint op) /* in: operator '=', PARS_GE_TOKEN, ... */ -{ - if (op == '=') { - if (asc) { - return(PAGE_CUR_GE); - } else { - return(PAGE_CUR_LE); - } - } else if (op == '<') { - ut_a(!asc); - return(PAGE_CUR_L); - } else if (op == '>') { - ut_a(asc); - return(PAGE_CUR_G); - } else if (op == PARS_GE_TOKEN) { - ut_a(asc); - return(PAGE_CUR_GE); - } else if (op == PARS_LE_TOKEN) { - ut_a(!asc); - return(PAGE_CUR_LE); - } else { - ut_error; - } - - return(0); -} - -/*********************************************************************** -Determines if a node is an argument node of a function node. */ -static -ibool -opt_is_arg( -/*=======*/ - /* out: TRUE if is an argument */ - que_node_t* arg_node, /* in: possible argument node */ - func_node_t* func_node) /* in: function node */ -{ - que_node_t* arg; - - arg = func_node->args; - - while (arg) { - if (arg == arg_node) { - - return(TRUE); - } - - arg = que_node_get_next(arg); - } - - return(FALSE); -} - -/*********************************************************************** -Decides if the fetching of rows should be made in a descending order, and -also checks that the chosen query plan produces a result which satisfies -the order-by. */ -static -void -opt_check_order_by( -/*===============*/ - sel_node_t* sel_node) /* in: select node; asserts an error - if the plan does not agree with the - order-by */ -{ - order_node_t* order_node; - dict_table_t* order_table; - ulint order_col_no; - plan_t* plan; - ulint i; - - if (!sel_node->order_by) { - - return; - } - - order_node = sel_node->order_by; - order_col_no = order_node->column->col_no; - order_table = order_node->column->table; - - /* If there is an order-by clause, the first non-exactly matched field - in the index used for the last table in the table list should be the - column defined in the order-by clause, and for all the other tables - we should get only at most a single row, otherwise we cannot presently - calculate the order-by, as we have no sort utility */ - - for (i = 0; i < sel_node->n_tables; i++) { - - plan = sel_node_get_nth_plan(sel_node, i); - - if (i < sel_node->n_tables - 1) { - ut_a(dict_index_get_n_unique(plan->index) - <= plan->n_exact_match); - } else { - ut_a(plan->table == order_table); - - ut_a((dict_index_get_n_unique(plan->index) - <= plan->n_exact_match) - || (dict_index_get_nth_col_no(plan->index, - plan->n_exact_match) - == order_col_no)); - } - } -} - -/*********************************************************************** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ -static -void -opt_search_plan_for_table( -/*======================*/ - sel_node_t* sel_node, /* in: parsed select node */ - ulint i, /* in: this is the ith table */ - dict_table_t* table) /* in: table */ -{ - plan_t* plan; - dict_index_t* index; - dict_index_t* best_index; - ulint n_fields; - ulint goodness; - ulint last_op = 75946965; /* Eliminate a Purify - warning */ - ulint best_goodness; - ulint best_last_op = 0; /* remove warning */ - que_node_t* index_plan[256]; - que_node_t* best_index_plan[256]; - - plan = sel_node_get_nth_plan(sel_node, i); - - plan->table = table; - plan->asc = sel_node->asc; - plan->pcur_is_open = FALSE; - plan->cursor_at_end = FALSE; - - /* Calculate goodness for each index of the table */ - - index = dict_table_get_first_index(table); - best_index = index; /* Eliminate compiler warning */ - best_goodness = 0; - - /* should be do ... until ? comment by Jani */ - while (index) { - goodness = opt_calc_index_goodness(index, sel_node, i, - index_plan, &last_op); - if (goodness > best_goodness) { - - best_index = index; - best_goodness = goodness; - n_fields = opt_calc_n_fields_from_goodness(goodness); - - ut_memcpy(best_index_plan, index_plan, - n_fields * sizeof(void*)); - best_last_op = last_op; - } - - index = dict_table_get_next_index(index); - } - - plan->index = best_index; - - n_fields = opt_calc_n_fields_from_goodness(best_goodness); - - if (n_fields == 0) { - plan->tuple = NULL; - plan->n_exact_match = 0; - } else { - plan->tuple = dtuple_create(pars_sym_tab_global->heap, - n_fields); - dict_index_copy_types(plan->tuple, plan->index, n_fields); - - plan->tuple_exps = mem_heap_alloc(pars_sym_tab_global->heap, - n_fields * sizeof(void*)); - - ut_memcpy(plan->tuple_exps, best_index_plan, - n_fields * sizeof(void*)); - if (best_last_op == '=') { - plan->n_exact_match = n_fields; - } else { - plan->n_exact_match = n_fields - 1; - } - - plan->mode = opt_op_to_search_mode(sel_node->asc, - best_last_op); - } - - if ((best_index->type & DICT_CLUSTERED) - && (plan->n_exact_match >= dict_index_get_n_unique(best_index))) { - - plan->unique_search = TRUE; - } else { - plan->unique_search = FALSE; - } - - plan->old_vers_heap = NULL; - - btr_pcur_init(&(plan->pcur)); - btr_pcur_init(&(plan->clust_pcur)); -} - -/*********************************************************************** -Looks at a comparison condition and decides if it can, and need, be tested for -a table AFTER the table has been accessed. */ -static -ulint -opt_classify_comparison( -/*====================*/ - /* out: OPT_NOT_COND if not for this - table, else OPT_END_COND, - OPT_TEST_COND, or OPT_SCROLL_COND, - where the last means that the - condition need not be tested, except - when scroll cursors are used */ - sel_node_t* sel_node, /* in: select node */ - ulint i, /* in: ith table in the join */ - func_node_t* cond) /* in: comparison condition */ -{ - plan_t* plan; - ulint n_fields; - ulint op; - ulint j; - - ut_ad(cond && sel_node); - - plan = sel_node_get_nth_plan(sel_node, i); - - /* Check if the condition is determined after the ith table has been - accessed, but not after the i - 1:th */ - - if (!opt_check_exp_determined_before(cond, sel_node, i + 1)) { - - return(OPT_NOT_COND); - } - - if ((i > 0) && opt_check_exp_determined_before(cond, sel_node, i)) { - - return(OPT_NOT_COND); - } - - /* If the condition is an exact match condition used in constructing - the search tuple, it is classified as OPT_END_COND */ - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - } else { - n_fields = 0; - } - - for (j = 0; j < plan->n_exact_match; j++) { - - if (opt_is_arg(plan->tuple_exps[j], cond)) { - - return(OPT_END_COND); - } - } - - /* If the condition is an non-exact match condition used in - constructing the search tuple, it is classified as OPT_SCROLL_COND. - When the cursor is positioned, and if a non-scroll cursor is used, - there is no need to test this condition; if a scroll cursor is used - the testing is necessary when the cursor is reversed. */ - - if ((n_fields > plan->n_exact_match) - && opt_is_arg(plan->tuple_exps[n_fields - 1], cond)) { - - return(OPT_SCROLL_COND); - } - - /* If the condition is a non-exact match condition on the first field - in index for which there is no exact match, and it limits the search - range from the opposite side of the search tuple already BEFORE we - access the table, it is classified as OPT_END_COND */ - - if ((dict_index_get_n_fields(plan->index) > plan->n_exact_match) - && opt_look_for_col_in_comparison_before( - OPT_COMPARISON, - dict_index_get_nth_col_no(plan->index, - plan->n_exact_match), - cond, sel_node, i, &op)) { - - if (sel_node->asc && ((op == '<') || (op == PARS_LE_TOKEN))) { - - return(OPT_END_COND); - } - - if (!sel_node->asc && ((op == '>') || (op == PARS_GE_TOKEN))) { - - return(OPT_END_COND); - } - } - - /* Otherwise, cond is classified as OPT_TEST_COND */ - - return(OPT_TEST_COND); -} - -/*********************************************************************** -Recursively looks for test conditions for a table in a join. */ -static -void -opt_find_test_conds( -/*================*/ - sel_node_t* sel_node, /* in: select node */ - ulint i, /* in: ith table in the join */ - func_node_t* cond) /* in: conjunction of search - conditions or NULL */ -{ - func_node_t* new_cond; - ulint class; - plan_t* plan; - - if (cond == NULL) { - - return; - } - - if (cond->func == PARS_AND_TOKEN) { - new_cond = cond->args; - - opt_find_test_conds(sel_node, i, new_cond); - - new_cond = que_node_get_next(new_cond); - - opt_find_test_conds(sel_node, i, new_cond); - - return; - } - - plan = sel_node_get_nth_plan(sel_node, i); - - class = opt_classify_comparison(sel_node, i, cond); - - if (class == OPT_END_COND) { - UT_LIST_ADD_LAST(cond_list, plan->end_conds, cond); - - } else if (class == OPT_TEST_COND) { - UT_LIST_ADD_LAST(cond_list, plan->other_conds, cond); - - } -} - -/*********************************************************************** -Normalizes a list of comparison conditions so that a column of the table -appears on the left side of the comparison if possible. This is accomplished -by switching the arguments of the operator. */ -static -void -opt_normalize_cmp_conds( -/*====================*/ - func_node_t* cond, /* in: first in a list of comparison - conditions, or NULL */ - dict_table_t* table) /* in: table */ -{ - que_node_t* arg1; - que_node_t* arg2; - sym_node_t* sym_node; - - while (cond) { - arg1 = cond->args; - arg2 = que_node_get_next(arg1); - - if (que_node_get_type(arg2) == QUE_NODE_SYMBOL) { - - sym_node = arg2; - - if ((sym_node->token_type == SYM_COLUMN) - && (sym_node->table == table)) { - - /* Switch the order of the arguments */ - - cond->args = arg2; - que_node_list_add_last(NULL, arg2); - que_node_list_add_last(arg2, arg1); - - /* Invert the operator */ - cond->func = opt_invert_cmp_op(cond->func); - } - } - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } -} - -/*********************************************************************** -Finds out the search condition conjuncts we can, and need, to test as the ith -table in a join is accessed. The search tuple can eliminate the need to test -some conjuncts. */ -static -void -opt_determine_and_normalize_test_conds( -/*===================================*/ - sel_node_t* sel_node, /* in: select node */ - ulint i) /* in: ith table in the join */ -{ - plan_t* plan; - - plan = sel_node_get_nth_plan(sel_node, i); - - UT_LIST_INIT(plan->end_conds); - UT_LIST_INIT(plan->other_conds); - - /* Recursively go through the conjuncts and classify them */ - - opt_find_test_conds(sel_node, i, sel_node->search_cond); - - opt_normalize_cmp_conds(UT_LIST_GET_FIRST(plan->end_conds), - plan->table); - - ut_a(UT_LIST_GET_LEN(plan->end_conds) >= plan->n_exact_match); -} - -/*********************************************************************** -Looks for occurrences of the columns of the table in the query subgraph and -adds them to the list of columns if an occurrence of the same column does not -already exist in the list. If the column is already in the list, puts a value -indirection to point to the occurrence in the column list, except if the -column occurrence we are looking at is in the column list, in which case -nothing is done. */ - -void -opt_find_all_cols( -/*==============*/ - ibool copy_val, /* in: if TRUE, new found columns are - added as columns to copy */ - dict_index_t* index, /* in: index of the table to use */ - sym_node_list_t* col_list, /* in: base node of a list where - to add new found columns */ - plan_t* plan, /* in: plan or NULL */ - que_node_t* exp) /* in: expression or condition or - NULL */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - sym_node_t* col_node; - ulint col_pos; - - if (exp == NULL) { - - return; - } - - if (que_node_get_type(exp) == QUE_NODE_FUNC) { - func_node = exp; - - arg = func_node->args; - - while (arg) { - opt_find_all_cols(copy_val, index, col_list, plan, - arg); - arg = que_node_get_next(arg); - } - - return; - } - - ut_a(que_node_get_type(exp) == QUE_NODE_SYMBOL); - - sym_node = exp; - - if (sym_node->token_type != SYM_COLUMN) { - - return; - } - - if (sym_node->table != index->table) { - - return; - } - - /* Look for an occurrence of the same column in the plan column - list */ - - col_node = UT_LIST_GET_FIRST(*col_list); - - while (col_node) { - if (col_node->col_no == sym_node->col_no) { - - if (col_node == sym_node) { - /* sym_node was already in a list: do - nothing */ - - return; - } - - /* Put an indirection */ - sym_node->indirection = col_node; - sym_node->alias = col_node; - - return; - } - - col_node = UT_LIST_GET_NEXT(col_var_list, col_node); - } - - /* The same column did not occur in the list: add it */ - - UT_LIST_ADD_LAST(col_var_list, *col_list, sym_node); - - sym_node->copy_val = copy_val; - - /* Fill in the field_no fields in sym_node */ - - sym_node->field_nos[SYM_CLUST_FIELD_NO] = dict_index_get_nth_col_pos( - dict_table_get_first_index(index->table), sym_node->col_no); - if (!(index->type & DICT_CLUSTERED)) { - - ut_a(plan); - - col_pos = dict_index_get_nth_col_pos(index, sym_node->col_no); - - if (col_pos == ULINT_UNDEFINED) { - - plan->must_get_clust = TRUE; - } - - sym_node->field_nos[SYM_SEC_FIELD_NO] = col_pos; - } -} - -/*********************************************************************** -Looks for occurrences of the columns of the table in conditions which are -not yet determined AFTER the join operation has fetched a row in the ith -table. The values for these column must be copied to dynamic memory for -later use. */ -static -void -opt_find_copy_cols( -/*===============*/ - sel_node_t* sel_node, /* in: select node */ - ulint i, /* in: ith table in the join */ - func_node_t* search_cond) /* in: search condition or NULL */ -{ - func_node_t* new_cond; - plan_t* plan; - - if (search_cond == NULL) { - - return; - } - - ut_ad(que_node_get_type(search_cond) == QUE_NODE_FUNC); - - if (search_cond->func == PARS_AND_TOKEN) { - new_cond = search_cond->args; - - opt_find_copy_cols(sel_node, i, new_cond); - - new_cond = que_node_get_next(new_cond); - - opt_find_copy_cols(sel_node, i, new_cond); - - return; - } - - if (!opt_check_exp_determined_before(search_cond, sel_node, i + 1)) { - - /* Any ith table columns occurring in search_cond should be - copied, as this condition cannot be tested already on the - fetch from the ith table */ - - plan = sel_node_get_nth_plan(sel_node, i); - - opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan, - search_cond); - } -} - -/*********************************************************************** -Classifies the table columns according to whether we use the column only while -holding the latch on the page, or whether we have to copy the column value to -dynamic memory. Puts the first occurrence of a column to either list in the -plan node, and puts indirections to later occurrences of the column. */ -static -void -opt_classify_cols( -/*==============*/ - sel_node_t* sel_node, /* in: select node */ - ulint i) /* in: ith table in the join */ -{ - plan_t* plan; - que_node_t* exp; - - plan = sel_node_get_nth_plan(sel_node, i); - - /* The final value of the following field will depend on the - environment of the select statement: */ - - plan->must_get_clust = FALSE; - - UT_LIST_INIT(plan->columns); - - /* All select list columns should be copied: therefore TRUE as the - first argument */ - - exp = sel_node->select_list; - - while (exp) { - opt_find_all_cols(TRUE, plan->index, &(plan->columns), plan, - exp); - exp = que_node_get_next(exp); - } - - opt_find_copy_cols(sel_node, i, sel_node->search_cond); - - /* All remaining columns in the search condition are temporary - columns: therefore FALSE */ - - opt_find_all_cols(FALSE, plan->index, &(plan->columns), plan, - sel_node->search_cond); -} - -/*********************************************************************** -Fills in the info in plan which is used in accessing a clustered index -record. The columns must already be classified for the plan node. */ -static -void -opt_clust_access( -/*=============*/ - sel_node_t* sel_node, /* in: select node */ - ulint n) /* in: nth table in select */ -{ - plan_t* plan; - dict_table_t* table; - dict_index_t* clust_index; - dict_index_t* index; - mem_heap_t* heap; - ulint n_fields; - ulint pos; - ulint i; - - plan = sel_node_get_nth_plan(sel_node, n); - - index = plan->index; - - /* The final value of the following field depends on the environment - of the select statement: */ - - plan->no_prefetch = FALSE; - - if (index->type & DICT_CLUSTERED) { - plan->clust_map = NULL; - plan->clust_ref = NULL; - - return; - } - - table = index->table; - - clust_index = dict_table_get_first_index(table); - - n_fields = dict_index_get_n_unique(clust_index); - - heap = pars_sym_tab_global->heap; - - plan->clust_ref = dtuple_create(heap, n_fields); - - dict_index_copy_types(plan->clust_ref, clust_index, n_fields); - - plan->clust_map = mem_heap_alloc(heap, n_fields * sizeof(ulint)); - - for (i = 0; i < n_fields; i++) { - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - /* We optimize here only queries to InnoDB's internal system - tables, and they should not contain column prefix indexes. */ - - if (dict_index_get_nth_field(index, pos)->prefix_len != 0 - || dict_index_get_nth_field(clust_index, i) - ->prefix_len != 0) { - fprintf(stderr, - "InnoDB: Error in pars0opt.c:" - " table %s has prefix_len != 0\n", - index->table_name); - } - - *(plan->clust_map + i) = pos; - - ut_ad(pos != ULINT_UNDEFINED); - } -} - -/*********************************************************************** -Optimizes a select. Decides which indexes to tables to use. The tables -are accessed in the order that they were written to the FROM part in the -select statement. */ - -void -opt_search_plan( -/*============*/ - sel_node_t* sel_node) /* in: parsed select node */ -{ - sym_node_t* table_node; - dict_table_t* table; - order_node_t* order_by; - ulint i; - - sel_node->plans = mem_heap_alloc(pars_sym_tab_global->heap, - sel_node->n_tables * sizeof(plan_t)); - - /* Analyze the search condition to find out what we know at each - join stage about the conditions that the columns of a table should - satisfy */ - - table_node = sel_node->table_list; - - if (sel_node->order_by == NULL) { - sel_node->asc = TRUE; - } else { - order_by = sel_node->order_by; - - sel_node->asc = order_by->asc; - } - - for (i = 0; i < sel_node->n_tables; i++) { - - table = table_node->table; - - /* Choose index through which to access the table */ - - opt_search_plan_for_table(sel_node, i, table); - - /* Determine the search condition conjuncts we can test at - this table; normalize the end conditions */ - - opt_determine_and_normalize_test_conds(sel_node, i); - - table_node = que_node_get_next(table_node); - } - - table_node = sel_node->table_list; - - for (i = 0; i < sel_node->n_tables; i++) { - - /* Classify the table columns into those we only need to access - but not copy, and to those we must copy to dynamic memory */ - - opt_classify_cols(sel_node, i); - - /* Calculate possible info for accessing the clustered index - record */ - - opt_clust_access(sel_node, i); - - table_node = que_node_get_next(table_node); - } - - /* Check that the plan obeys a possible order-by clause: if not, - an assertion error occurs */ - - opt_check_order_by(sel_node); - -#ifdef UNIV_SQL_DEBUG - opt_print_query_plan(sel_node); -#endif -} - -/************************************************************************ -Prints info of a query plan. */ - -void -opt_print_query_plan( -/*=================*/ - sel_node_t* sel_node) /* in: select node */ -{ - plan_t* plan; - ulint n_fields; - ulint i; - - fputs("QUERY PLAN FOR A SELECT NODE\n", stderr); - - fputs(sel_node->asc ? "Asc. search; " : "Desc. search; ", stderr); - - if (sel_node->set_x_locks) { - fputs("sets row x-locks; ", stderr); - ut_a(sel_node->row_lock_mode == LOCK_X); - ut_a(!sel_node->consistent_read); - } else if (sel_node->consistent_read) { - fputs("consistent read; ", stderr); - } else { - ut_a(sel_node->row_lock_mode == LOCK_S); - fputs("sets row s-locks; ", stderr); - } - - putc('\n', stderr); - - for (i = 0; i < sel_node->n_tables; i++) { - plan = sel_node_get_nth_plan(sel_node, i); - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - } else { - n_fields = 0; - } - - fputs("Table ", stderr); - dict_index_name_print(stderr, NULL, plan->index); - fprintf(stderr,"; exact m. %lu, match %lu, end conds %lu\n", - (unsigned long) plan->n_exact_match, - (unsigned long) n_fields, - (unsigned long) UT_LIST_GET_LEN(plan->end_conds)); - } -} diff --git a/storage/innobase/pars/pars0pars.c b/storage/innobase/pars/pars0pars.c deleted file mode 100644 index 89f6f862995..00000000000 --- a/storage/innobase/pars/pars0pars.c +++ /dev/null @@ -1,2200 +0,0 @@ -/****************************************************** -SQL parser - -(c) 1996 Innobase Oy - -Created 11/19/1996 Heikki Tuuri -*******************************************************/ - -/* Historical note: Innobase executed its first SQL string (CREATE TABLE) -on 1/27/1998 */ - -#include "pars0pars.h" - -#ifdef UNIV_NONINL -#include "pars0pars.ic" -#endif - -#include "row0sel.h" -#include "row0ins.h" -#include "row0upd.h" -#include "dict0dict.h" -#include "dict0mem.h" -#include "dict0crea.h" -#include "que0que.h" -#include "pars0grm.h" -#include "pars0opt.h" -#include "data0data.h" -#include "data0type.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "lock0lock.h" -#include "eval0eval.h" - -#ifdef UNIV_SQL_DEBUG -/* If the following is set TRUE, the lexer will print the SQL string -as it tokenizes it */ - -ibool pars_print_lexed = FALSE; -#endif /* UNIV_SQL_DEBUG */ - -/* Global variable used while parsing a single procedure or query : the code is -NOT re-entrant */ -sym_tab_t* pars_sym_tab_global; - -/* Global variables used to denote certain reserved words, used in -constructing the parsing tree */ - -pars_res_word_t pars_to_char_token = {PARS_TO_CHAR_TOKEN}; -pars_res_word_t pars_to_number_token = {PARS_TO_NUMBER_TOKEN}; -pars_res_word_t pars_to_binary_token = {PARS_TO_BINARY_TOKEN}; -pars_res_word_t pars_binary_to_number_token = {PARS_BINARY_TO_NUMBER_TOKEN}; -pars_res_word_t pars_substr_token = {PARS_SUBSTR_TOKEN}; -pars_res_word_t pars_replstr_token = {PARS_REPLSTR_TOKEN}; -pars_res_word_t pars_concat_token = {PARS_CONCAT_TOKEN}; -pars_res_word_t pars_instr_token = {PARS_INSTR_TOKEN}; -pars_res_word_t pars_length_token = {PARS_LENGTH_TOKEN}; -pars_res_word_t pars_sysdate_token = {PARS_SYSDATE_TOKEN}; -pars_res_word_t pars_printf_token = {PARS_PRINTF_TOKEN}; -pars_res_word_t pars_assert_token = {PARS_ASSERT_TOKEN}; -pars_res_word_t pars_rnd_token = {PARS_RND_TOKEN}; -pars_res_word_t pars_rnd_str_token = {PARS_RND_STR_TOKEN}; -pars_res_word_t pars_count_token = {PARS_COUNT_TOKEN}; -pars_res_word_t pars_sum_token = {PARS_SUM_TOKEN}; -pars_res_word_t pars_distinct_token = {PARS_DISTINCT_TOKEN}; -pars_res_word_t pars_binary_token = {PARS_BINARY_TOKEN}; -pars_res_word_t pars_blob_token = {PARS_BLOB_TOKEN}; -pars_res_word_t pars_int_token = {PARS_INT_TOKEN}; -pars_res_word_t pars_char_token = {PARS_CHAR_TOKEN}; -pars_res_word_t pars_float_token = {PARS_FLOAT_TOKEN}; -pars_res_word_t pars_update_token = {PARS_UPDATE_TOKEN}; -pars_res_word_t pars_asc_token = {PARS_ASC_TOKEN}; -pars_res_word_t pars_desc_token = {PARS_DESC_TOKEN}; -pars_res_word_t pars_open_token = {PARS_OPEN_TOKEN}; -pars_res_word_t pars_close_token = {PARS_CLOSE_TOKEN}; -pars_res_word_t pars_share_token = {PARS_SHARE_TOKEN}; -pars_res_word_t pars_unique_token = {PARS_UNIQUE_TOKEN}; -pars_res_word_t pars_clustered_token = {PARS_CLUSTERED_TOKEN}; - -/* Global variable used to denote the '*' in SELECT * FROM.. */ -#define PARS_STAR_DENOTER 12345678 -ulint pars_star_denoter = PARS_STAR_DENOTER; - - -/************************************************************************* -Determines the class of a function code. */ -static -ulint -pars_func_get_class( -/*================*/ - /* out: function class: PARS_FUNC_ARITH, ... */ - int func) /* in: function code: '=', PARS_GE_TOKEN, ... */ -{ - if ((func == '+') || (func == '-') || (func == '*') || (func == '/')) { - - return(PARS_FUNC_ARITH); - - } else if ((func == '=') || (func == '<') || (func == '>') - || (func == PARS_GE_TOKEN) || (func == PARS_LE_TOKEN) - || (func == PARS_NE_TOKEN)) { - - return(PARS_FUNC_CMP); - - } else if ((func == PARS_AND_TOKEN) || (func == PARS_OR_TOKEN) - || (func == PARS_NOT_TOKEN)) { - - return(PARS_FUNC_LOGICAL); - - } else if ((func == PARS_COUNT_TOKEN) || (func == PARS_SUM_TOKEN)) { - - return(PARS_FUNC_AGGREGATE); - - } else if ((func == PARS_TO_CHAR_TOKEN) - || (func == PARS_TO_NUMBER_TOKEN) - || (func == PARS_TO_BINARY_TOKEN) - || (func == PARS_BINARY_TO_NUMBER_TOKEN) - || (func == PARS_SUBSTR_TOKEN) - || (func == PARS_CONCAT_TOKEN) - || (func == PARS_LENGTH_TOKEN) - || (func == PARS_INSTR_TOKEN) - || (func == PARS_SYSDATE_TOKEN) - || (func == PARS_NOTFOUND_TOKEN) - || (func == PARS_PRINTF_TOKEN) - || (func == PARS_ASSERT_TOKEN) - || (func == PARS_RND_TOKEN) - || (func == PARS_RND_STR_TOKEN) - || (func == PARS_REPLSTR_TOKEN)) { - - return(PARS_FUNC_PREDEFINED); - } else { - return(PARS_FUNC_OTHER); - } -} - -/************************************************************************* -Parses an operator or predefined function expression. */ -static -func_node_t* -pars_func_low( -/*==========*/ - /* out, own: function node in a query tree */ - int func, /* in: function token code */ - que_node_t* arg) /* in: first argument in the argument list */ -{ - func_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(func_node_t)); - - node->common.type = QUE_NODE_FUNC; - dfield_set_data(&(node->common.val), NULL, 0); - node->common.val_buf_size = 0; - - node->func = func; - - node->class = pars_func_get_class(func); - - node->args = arg; - - UT_LIST_ADD_LAST(func_node_list, pars_sym_tab_global->func_node_list, - node); - return(node); -} - -/************************************************************************* -Parses a function expression. */ - -func_node_t* -pars_func( -/*======*/ - /* out, own: function node in a query tree */ - que_node_t* res_word,/* in: function name reserved word */ - que_node_t* arg) /* in: first argument in the argument list */ -{ - return(pars_func_low(((pars_res_word_t*)res_word)->code, arg)); -} - -/************************************************************************* -Parses an operator expression. */ - -func_node_t* -pars_op( -/*====*/ - /* out, own: function node in a query tree */ - int func, /* in: operator token code */ - que_node_t* arg1, /* in: first argument */ - que_node_t* arg2) /* in: second argument or NULL for an unary - operator */ -{ - que_node_list_add_last(NULL, arg1); - - if (arg2) { - que_node_list_add_last(arg1, arg2); - } - - return(pars_func_low(func, arg1)); -} - -/************************************************************************* -Parses an ORDER BY clause. Order by a single column only is supported. */ - -order_node_t* -pars_order_by( -/*==========*/ - /* out, own: order-by node in a query tree */ - sym_node_t* column, /* in: column name */ - pars_res_word_t* asc) /* in: &pars_asc_token or pars_desc_token */ -{ - order_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(order_node_t)); - - node->common.type = QUE_NODE_ORDER; - - node->column = column; - - if (asc == &pars_asc_token) { - node->asc = TRUE; - } else { - ut_a(asc == &pars_desc_token); - node->asc = FALSE; - } - - return(node); -} - -/************************************************************************* -Resolves the data type of a function in an expression. The argument data -types must already be resolved. */ -static -void -pars_resolve_func_data_type( -/*========================*/ - func_node_t* node) /* in: function node */ -{ - que_node_t* arg; - ulint func; - - ut_a(que_node_get_type(node) == QUE_NODE_FUNC); - - arg = node->args; - - func = node->func; - - if ((func == PARS_SUM_TOKEN) - || (func == '+') || (func == '-') || (func == '*') - || (func == '/') || (func == '+')) { - - /* Inherit the data type from the first argument (which must - not be the SQL null literal whose type is DATA_ERROR) */ - - dtype_copy(que_node_get_data_type(node), - que_node_get_data_type(arg)); - - ut_a(dtype_get_mtype(que_node_get_data_type(node)) - == DATA_INT); - } else if (func == PARS_COUNT_TOKEN) { - ut_a(arg); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_TO_CHAR_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT); - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - } else if (func == PARS_TO_BINARY_TOKEN) { - if (dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT) { - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - } else { - dtype_set(que_node_get_data_type(node), DATA_BINARY, - 0, 0); - } - } else if (func == PARS_TO_NUMBER_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - == DATA_VARCHAR); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_BINARY_TO_NUMBER_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - == DATA_VARCHAR); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_LENGTH_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - == DATA_VARCHAR); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_INSTR_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - == DATA_VARCHAR); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_SYSDATE_TOKEN) { - ut_a(arg == NULL); - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if ((func == PARS_SUBSTR_TOKEN) - || (func == PARS_CONCAT_TOKEN)) { - - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) - == DATA_VARCHAR); - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - - } else if ((func == '>') || (func == '<') || (func == '=') - || (func == PARS_GE_TOKEN) - || (func == PARS_LE_TOKEN) - || (func == PARS_NE_TOKEN) - || (func == PARS_AND_TOKEN) - || (func == PARS_OR_TOKEN) - || (func == PARS_NOT_TOKEN) - || (func == PARS_NOTFOUND_TOKEN)) { - - /* We currently have no iboolean type: use integer type */ - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_RND_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT); - - dtype_set(que_node_get_data_type(node), DATA_INT, 0, 4); - - } else if (func == PARS_RND_STR_TOKEN) { - ut_a(dtype_get_mtype(que_node_get_data_type(arg)) == DATA_INT); - - dtype_set(que_node_get_data_type(node), DATA_VARCHAR, - DATA_ENGLISH, 0); - } else { - ut_error; - } -} - -/************************************************************************* -Resolves the meaning of variables in an expression and the data types of -functions. It is an error if some identifier cannot be resolved here. */ -static -void -pars_resolve_exp_variables_and_types( -/*=================================*/ - sel_node_t* select_node, /* in: select node or NULL; if - this is not NULL then the variable - sym nodes are added to the - copy_variables list of select_node */ - que_node_t* exp_node) /* in: expression */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - sym_node_t* node; - - ut_a(exp_node); - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - func_node = exp_node; - - arg = func_node->args; - - while (arg) { - pars_resolve_exp_variables_and_types(select_node, arg); - - arg = que_node_get_next(arg); - } - - pars_resolve_func_data_type(func_node); - - return; - } - - ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL); - - sym_node = exp_node; - - if (sym_node->resolved) { - - return; - } - - /* Not resolved yet: look in the symbol table for a variable - or a cursor or a function with the same name */ - - node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list); - - while (node) { - if (node->resolved - && ((node->token_type == SYM_VAR) - || (node->token_type == SYM_CURSOR) - || (node->token_type == SYM_FUNCTION)) - && node->name - && (sym_node->name_len == node->name_len) - && (ut_memcmp(sym_node->name, node->name, - node->name_len) == 0)) { - - /* Found a variable or a cursor declared with - the same name */ - - break; - } - - node = UT_LIST_GET_NEXT(sym_list, node); - } - - if (!node) { - fprintf(stderr, "PARSER ERROR: Unresolved identifier %s\n", - sym_node->name); - } - - ut_a(node); - - sym_node->resolved = TRUE; - sym_node->token_type = SYM_IMPLICIT_VAR; - sym_node->alias = node; - sym_node->indirection = node; - - if (select_node) { - UT_LIST_ADD_LAST(col_var_list, select_node->copy_variables, - sym_node); - } - - dfield_set_type(que_node_get_val(sym_node), - que_node_get_data_type(node)); -} - -/************************************************************************* -Resolves the meaning of variables in an expression list. It is an error if -some identifier cannot be resolved here. Resolves also the data types of -functions. */ -static -void -pars_resolve_exp_list_variables_and_types( -/*======================================*/ - sel_node_t* select_node, /* in: select node or NULL */ - que_node_t* exp_node) /* in: expression list first node, or - NULL */ -{ - while (exp_node) { - pars_resolve_exp_variables_and_types(select_node, exp_node); - - exp_node = que_node_get_next(exp_node); - } -} - -/************************************************************************* -Resolves the columns in an expression. */ -static -void -pars_resolve_exp_columns( -/*=====================*/ - sym_node_t* table_node, /* in: first node in a table list */ - que_node_t* exp_node) /* in: expression */ -{ - func_node_t* func_node; - que_node_t* arg; - sym_node_t* sym_node; - dict_table_t* table; - sym_node_t* t_node; - ulint n_cols; - ulint i; - - ut_a(exp_node); - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - func_node = exp_node; - - arg = func_node->args; - - while (arg) { - pars_resolve_exp_columns(table_node, arg); - - arg = que_node_get_next(arg); - } - - return; - } - - ut_a(que_node_get_type(exp_node) == QUE_NODE_SYMBOL); - - sym_node = exp_node; - - if (sym_node->resolved) { - - return; - } - - /* Not resolved yet: look in the table list for a column with the - same name */ - - t_node = table_node; - - while (t_node) { - table = t_node->table; - - n_cols = dict_table_get_n_cols(table); - - for (i = 0; i < n_cols; i++) { - const dict_col_t* col - = dict_table_get_nth_col(table, i); - const char* col_name - = dict_table_get_col_name(table, i); - - if ((sym_node->name_len == ut_strlen(col_name)) - && (0 == ut_memcmp(sym_node->name, col_name, - sym_node->name_len))) { - /* Found */ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_COLUMN; - sym_node->table = table; - sym_node->col_no = i; - sym_node->prefetch_buf = NULL; - - dict_col_copy_type( - col, - dfield_get_type(&sym_node - ->common.val)); - - return; - } - } - - t_node = que_node_get_next(t_node); - } -} - -/************************************************************************* -Resolves the meaning of columns in an expression list. */ -static -void -pars_resolve_exp_list_columns( -/*==========================*/ - sym_node_t* table_node, /* in: first node in a table list */ - que_node_t* exp_node) /* in: expression list first node, or - NULL */ -{ - while (exp_node) { - pars_resolve_exp_columns(table_node, exp_node); - - exp_node = que_node_get_next(exp_node); - } -} - -/************************************************************************* -Retrieves the table definition for a table name id. */ -static -void -pars_retrieve_table_def( -/*====================*/ - sym_node_t* sym_node) /* in: table node */ -{ - const char* table_name; - - ut_a(sym_node); - ut_a(que_node_get_type(sym_node) == QUE_NODE_SYMBOL); - - sym_node->resolved = TRUE; - sym_node->token_type = SYM_TABLE; - - table_name = (const char*) sym_node->name; - - sym_node->table = dict_table_get_low(table_name); - - ut_a(sym_node->table); -} - -/************************************************************************* -Retrieves the table definitions for a list of table name ids. */ -static -ulint -pars_retrieve_table_list_defs( -/*==========================*/ - /* out: number of tables */ - sym_node_t* sym_node) /* in: first table node in list */ -{ - ulint count = 0; - - if (sym_node == NULL) { - - return(count); - } - - while (sym_node) { - pars_retrieve_table_def(sym_node); - - count++; - - sym_node = que_node_get_next(sym_node); - } - - return(count); -} - -/************************************************************************* -Adds all columns to the select list if the query is SELECT * FROM ... */ -static -void -pars_select_all_columns( -/*====================*/ - sel_node_t* select_node) /* in: select node already containing - the table list */ -{ - sym_node_t* col_node; - sym_node_t* table_node; - dict_table_t* table; - ulint i; - - select_node->select_list = NULL; - - table_node = select_node->table_list; - - while (table_node) { - table = table_node->table; - - for (i = 0; i < dict_table_get_n_user_cols(table); i++) { - const char* col_name = dict_table_get_col_name( - table, i); - - col_node = sym_tab_add_id(pars_sym_tab_global, - (byte*)col_name, - ut_strlen(col_name)); - - select_node->select_list = que_node_list_add_last( - select_node->select_list, col_node); - } - - table_node = que_node_get_next(table_node); - } -} - -/************************************************************************* -Parses a select list; creates a query graph node for the whole SELECT -statement. */ - -sel_node_t* -pars_select_list( -/*=============*/ - /* out, own: select node in a query - tree */ - que_node_t* select_list, /* in: select list */ - sym_node_t* into_list) /* in: variables list or NULL */ -{ - sel_node_t* node; - - node = sel_node_create(pars_sym_tab_global->heap); - - node->select_list = select_list; - node->into_list = into_list; - - pars_resolve_exp_list_variables_and_types(NULL, into_list); - - return(node); -} - -/************************************************************************* -Checks if the query is an aggregate query, in which case the selct list must -contain only aggregate function items. */ -static -void -pars_check_aggregate( -/*=================*/ - sel_node_t* select_node) /* in: select node already containing - the select list */ -{ - que_node_t* exp_node; - func_node_t* func_node; - ulint n_nodes = 0; - ulint n_aggregate_nodes = 0; - - exp_node = select_node->select_list; - - while (exp_node) { - - n_nodes++; - - if (que_node_get_type(exp_node) == QUE_NODE_FUNC) { - - func_node = exp_node; - - if (func_node->class == PARS_FUNC_AGGREGATE) { - - n_aggregate_nodes++; - } - } - - exp_node = que_node_get_next(exp_node); - } - - if (n_aggregate_nodes > 0) { - ut_a(n_nodes == n_aggregate_nodes); - - select_node->is_aggregate = TRUE; - } else { - select_node->is_aggregate = FALSE; - } -} - -/************************************************************************* -Parses a select statement. */ - -sel_node_t* -pars_select_statement( -/*==================*/ - /* out, own: select node in a query - tree */ - sel_node_t* select_node, /* in: select node already containing - the select list */ - sym_node_t* table_list, /* in: table list */ - que_node_t* search_cond, /* in: search condition or NULL */ - pars_res_word_t* for_update, /* in: NULL or &pars_update_token */ - pars_res_word_t* lock_shared, /* in: NULL or &pars_share_token */ - order_node_t* order_by) /* in: NULL or an order-by node */ -{ - select_node->state = SEL_NODE_OPEN; - - select_node->table_list = table_list; - select_node->n_tables = pars_retrieve_table_list_defs(table_list); - - if (select_node->select_list == &pars_star_denoter) { - - /* SELECT * FROM ... */ - pars_select_all_columns(select_node); - } - - if (select_node->into_list) { - ut_a(que_node_list_get_len(select_node->into_list) - == que_node_list_get_len(select_node->select_list)); - } - - UT_LIST_INIT(select_node->copy_variables); - - pars_resolve_exp_list_columns(table_list, select_node->select_list); - pars_resolve_exp_list_variables_and_types(select_node, - select_node->select_list); - pars_check_aggregate(select_node); - - select_node->search_cond = search_cond; - - if (search_cond) { - pars_resolve_exp_columns(table_list, search_cond); - pars_resolve_exp_variables_and_types(select_node, search_cond); - } - - if (for_update) { - ut_a(!lock_shared); - - select_node->set_x_locks = TRUE; - select_node->row_lock_mode = LOCK_X; - - select_node->consistent_read = FALSE; - select_node->read_view = NULL; - } else if (lock_shared){ - select_node->set_x_locks = FALSE; - select_node->row_lock_mode = LOCK_S; - - select_node->consistent_read = FALSE; - select_node->read_view = NULL; - } else { - select_node->set_x_locks = FALSE; - select_node->row_lock_mode = LOCK_S; - - select_node->consistent_read = TRUE; - } - - select_node->order_by = order_by; - - if (order_by) { - pars_resolve_exp_columns(table_list, order_by->column); - } - - /* The final value of the following fields depend on the environment - where the select statement appears: */ - - select_node->can_get_updated = FALSE; - select_node->explicit_cursor = NULL; - - opt_search_plan(select_node); - - return(select_node); -} - -/************************************************************************* -Parses a cursor declaration. */ - -que_node_t* -pars_cursor_declaration( -/*====================*/ - /* out: sym_node */ - sym_node_t* sym_node, /* in: cursor id node in the symbol - table */ - sel_node_t* select_node) /* in: select node */ -{ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_CURSOR; - sym_node->cursor_def = select_node; - - select_node->state = SEL_NODE_CLOSED; - select_node->explicit_cursor = sym_node; - - return(sym_node); -} - -/************************************************************************* -Parses a function declaration. */ - -que_node_t* -pars_function_declaration( -/*======================*/ - /* out: sym_node */ - sym_node_t* sym_node) /* in: function id node in the symbol - table */ -{ - sym_node->resolved = TRUE; - sym_node->token_type = SYM_FUNCTION; - - /* Check that the function exists. */ - ut_a(pars_info_get_user_func(pars_sym_tab_global->info, - sym_node->name)); - - return(sym_node); -} - -/************************************************************************* -Parses a delete or update statement start. */ - -upd_node_t* -pars_update_statement_start( -/*========================*/ - /* out, own: update node in a query - tree */ - ibool is_delete, /* in: TRUE if delete */ - sym_node_t* table_sym, /* in: table name node */ - col_assign_node_t* col_assign_list)/* in: column assignment list, NULL - if delete */ -{ - upd_node_t* node; - - node = upd_node_create(pars_sym_tab_global->heap); - - node->is_delete = is_delete; - - node->table_sym = table_sym; - node->col_assign_list = col_assign_list; - - return(node); -} - -/************************************************************************* -Parses a column assignment in an update. */ - -col_assign_node_t* -pars_column_assignment( -/*===================*/ - /* out: column assignment node */ - sym_node_t* column, /* in: column to assign */ - que_node_t* exp) /* in: value to assign */ -{ - col_assign_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, - sizeof(col_assign_node_t)); - node->common.type = QUE_NODE_COL_ASSIGNMENT; - - node->col = column; - node->val = exp; - - return(node); -} - -/************************************************************************* -Processes an update node assignment list. */ -static -void -pars_process_assign_list( -/*=====================*/ - upd_node_t* node) /* in: update node */ -{ - col_assign_node_t* col_assign_list; - sym_node_t* table_sym; - col_assign_node_t* assign_node; - upd_field_t* upd_field; - dict_index_t* clust_index; - sym_node_t* col_sym; - ulint changes_ord_field; - ulint changes_field_size; - ulint n_assigns; - ulint i; - - table_sym = node->table_sym; - col_assign_list = node->col_assign_list; - clust_index = dict_table_get_first_index(node->table); - - assign_node = col_assign_list; - n_assigns = 0; - - while (assign_node) { - pars_resolve_exp_columns(table_sym, assign_node->col); - pars_resolve_exp_columns(table_sym, assign_node->val); - pars_resolve_exp_variables_and_types(NULL, assign_node->val); -#if 0 - ut_a(dtype_get_mtype( - dfield_get_type(que_node_get_val( - assign_node->col))) - == dtype_get_mtype( - dfield_get_type(que_node_get_val( - assign_node->val)))); -#endif - - /* Add to the update node all the columns found in assignment - values as columns to copy: therefore, TRUE */ - - opt_find_all_cols(TRUE, clust_index, &(node->columns), NULL, - assign_node->val); - n_assigns++; - - assign_node = que_node_get_next(assign_node); - } - - node->update = upd_create(n_assigns, pars_sym_tab_global->heap); - - assign_node = col_assign_list; - - changes_field_size = UPD_NODE_NO_SIZE_CHANGE; - - for (i = 0; i < n_assigns; i++) { - upd_field = upd_get_nth_field(node->update, i); - - col_sym = assign_node->col; - - upd_field_set_field_no(upd_field, dict_index_get_nth_col_pos( - clust_index, col_sym->col_no), - clust_index, NULL); - upd_field->exp = assign_node->val; - - if (!dict_col_get_fixed_size( - dict_index_get_nth_col(clust_index, - upd_field->field_no))) { - changes_field_size = 0; - } - - assign_node = que_node_get_next(assign_node); - } - - /* Find out if the update can modify an ordering field in any index */ - - changes_ord_field = UPD_NODE_NO_ORD_CHANGE; - - if (row_upd_changes_some_index_ord_field_binary(node->table, - node->update)) { - changes_ord_field = 0; - } - - node->cmpl_info = changes_ord_field | changes_field_size; -} - -/************************************************************************* -Parses an update or delete statement. */ - -upd_node_t* -pars_update_statement( -/*==================*/ - /* out, own: update node in a query - tree */ - upd_node_t* node, /* in: update node */ - sym_node_t* cursor_sym, /* in: pointer to a cursor entry in - the symbol table or NULL */ - que_node_t* search_cond) /* in: search condition or NULL */ -{ - sym_node_t* table_sym; - sel_node_t* sel_node; - plan_t* plan; - - table_sym = node->table_sym; - - pars_retrieve_table_def(table_sym); - node->table = table_sym->table; - - UT_LIST_INIT(node->columns); - - /* Make the single table node into a list of table nodes of length 1 */ - - que_node_list_add_last(NULL, table_sym); - - if (cursor_sym) { - pars_resolve_exp_variables_and_types(NULL, cursor_sym); - - sel_node = cursor_sym->alias->cursor_def; - - node->searched_update = FALSE; - } else { - sel_node = pars_select_list(NULL, NULL); - - pars_select_statement(sel_node, table_sym, search_cond, NULL, - &pars_share_token, NULL); - node->searched_update = TRUE; - sel_node->common.parent = node; - } - - node->select = sel_node; - - ut_a(!node->is_delete || (node->col_assign_list == NULL)); - ut_a(node->is_delete || (node->col_assign_list != NULL)); - - if (node->is_delete) { - node->cmpl_info = 0; - } else { - pars_process_assign_list(node); - } - - if (node->searched_update) { - node->has_clust_rec_x_lock = TRUE; - sel_node->set_x_locks = TRUE; - sel_node->row_lock_mode = LOCK_X; - } else { - node->has_clust_rec_x_lock = sel_node->set_x_locks; - } - - ut_a(sel_node->n_tables == 1); - ut_a(sel_node->consistent_read == FALSE); - ut_a(sel_node->order_by == NULL); - ut_a(sel_node->is_aggregate == FALSE); - - sel_node->can_get_updated = TRUE; - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - plan = sel_node_get_nth_plan(sel_node, 0); - - plan->no_prefetch = TRUE; - - if (!((plan->index)->type & DICT_CLUSTERED)) { - - plan->must_get_clust = TRUE; - - node->pcur = &(plan->clust_pcur); - } else { - node->pcur = &(plan->pcur); - } - - if (!node->is_delete && node->searched_update - && (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) - && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - - /* The select node can perform the update in-place */ - - ut_a(plan->asc); - - node->select_will_do_update = TRUE; - sel_node->select_will_do_update = TRUE; - sel_node->latch_mode = BTR_MODIFY_LEAF; - } - - return(node); -} - -/************************************************************************* -Parses an insert statement. */ - -ins_node_t* -pars_insert_statement( -/*==================*/ - /* out, own: update node in a query - tree */ - sym_node_t* table_sym, /* in: table name node */ - que_node_t* values_list, /* in: value expression list or NULL */ - sel_node_t* select) /* in: select condition or NULL */ -{ - ins_node_t* node; - dtuple_t* row; - ulint ins_type; - - ut_a(values_list || select); - ut_a(!values_list || !select); - - if (values_list) { - ins_type = INS_VALUES; - } else { - ins_type = INS_SEARCHED; - } - - pars_retrieve_table_def(table_sym); - - node = ins_node_create(ins_type, table_sym->table, - pars_sym_tab_global->heap); - - row = dtuple_create(pars_sym_tab_global->heap, - dict_table_get_n_cols(node->table)); - - dict_table_copy_types(row, table_sym->table); - - ins_node_set_new_row(node, row); - - node->select = select; - - if (select) { - select->common.parent = node; - - ut_a(que_node_list_get_len(select->select_list) - == dict_table_get_n_user_cols(table_sym->table)); - } - - node->values_list = values_list; - - if (node->values_list) { - pars_resolve_exp_list_variables_and_types(NULL, values_list); - - ut_a(que_node_list_get_len(values_list) - == dict_table_get_n_user_cols(table_sym->table)); - } - - return(node); -} - -/************************************************************************* -Set the type of a dfield. */ -static -void -pars_set_dfield_type( -/*=================*/ - dfield_t* dfield, /* in: dfield */ - pars_res_word_t* type, /* in: pointer to a type - token */ - ulint len, /* in: length, or 0 */ - ibool is_unsigned, /* in: if TRUE, column is - UNSIGNED. */ - ibool is_not_null) /* in: if TRUE, column is - NOT NULL. */ -{ - ulint flags = 0; - - if (is_not_null) { - flags |= DATA_NOT_NULL; - } - - if (is_unsigned) { - flags |= DATA_UNSIGNED; - } - - if (type == &pars_int_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_INT, flags, 4); - - } else if (type == &pars_char_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_VARCHAR, - DATA_ENGLISH | flags, 0); - } else if (type == &pars_binary_token) { - ut_a(len != 0); - - dtype_set(dfield_get_type(dfield), DATA_FIXBINARY, - DATA_BINARY_TYPE | flags, len); - } else if (type == &pars_blob_token) { - ut_a(len == 0); - - dtype_set(dfield_get_type(dfield), DATA_BLOB, - DATA_BINARY_TYPE | flags, 0); - } else { - ut_error; - } -} - -/************************************************************************* -Parses a variable declaration. */ - -sym_node_t* -pars_variable_declaration( -/*======================*/ - /* out, own: symbol table node of type - SYM_VAR */ - sym_node_t* node, /* in: symbol table node allocated for the - id of the variable */ - pars_res_word_t* type) /* in: pointer to a type token */ -{ - node->resolved = TRUE; - node->token_type = SYM_VAR; - - node->param_type = PARS_NOT_PARAM; - - pars_set_dfield_type(que_node_get_val(node), type, 0, FALSE, FALSE); - - return(node); -} - -/************************************************************************* -Parses a procedure parameter declaration. */ - -sym_node_t* -pars_parameter_declaration( -/*=======================*/ - /* out, own: symbol table node of type - SYM_VAR */ - sym_node_t* node, /* in: symbol table node allocated for the - id of the parameter */ - ulint param_type, - /* in: PARS_INPUT or PARS_OUTPUT */ - pars_res_word_t* type) /* in: pointer to a type token */ -{ - ut_a((param_type == PARS_INPUT) || (param_type == PARS_OUTPUT)); - - pars_variable_declaration(node, type); - - node->param_type = param_type; - - return(node); -} - -/************************************************************************* -Sets the parent field in a query node list. */ -static -void -pars_set_parent_in_list( -/*====================*/ - que_node_t* node_list, /* in: first node in a list */ - que_node_t* parent) /* in: parent value to set in all - nodes of the list */ -{ - que_common_t* common; - - common = node_list; - - while (common) { - common->parent = parent; - - common = que_node_get_next(common); - } -} - -/************************************************************************* -Parses an elsif element. */ - -elsif_node_t* -pars_elsif_element( -/*===============*/ - /* out: elsif node */ - que_node_t* cond, /* in: if-condition */ - que_node_t* stat_list) /* in: statement list */ -{ - elsif_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(elsif_node_t)); - - node->common.type = QUE_NODE_ELSIF; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - return(node); -} - -/************************************************************************* -Parses an if-statement. */ - -if_node_t* -pars_if_statement( -/*==============*/ - /* out: if-statement node */ - que_node_t* cond, /* in: if-condition */ - que_node_t* stat_list, /* in: statement list */ - que_node_t* else_part) /* in: else-part statement list - or elsif element list */ -{ - if_node_t* node; - elsif_node_t* elsif_node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(if_node_t)); - - node->common.type = QUE_NODE_IF; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - if (else_part && (que_node_get_type(else_part) == QUE_NODE_ELSIF)) { - - /* There is a list of elsif conditions */ - - node->else_part = NULL; - node->elsif_list = else_part; - - elsif_node = else_part; - - while (elsif_node) { - pars_set_parent_in_list(elsif_node->stat_list, node); - - elsif_node = que_node_get_next(elsif_node); - } - } else { - node->else_part = else_part; - node->elsif_list = NULL; - - pars_set_parent_in_list(else_part, node); - } - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/************************************************************************* -Parses a while-statement. */ - -while_node_t* -pars_while_statement( -/*=================*/ - /* out: while-statement node */ - que_node_t* cond, /* in: while-condition */ - que_node_t* stat_list) /* in: statement list */ -{ - while_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(while_node_t)); - - node->common.type = QUE_NODE_WHILE; - - node->cond = cond; - - pars_resolve_exp_variables_and_types(NULL, cond); - - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/************************************************************************* -Parses a for-loop-statement. */ - -for_node_t* -pars_for_statement( -/*===============*/ - /* out: for-statement node */ - sym_node_t* loop_var, /* in: loop variable */ - que_node_t* loop_start_limit,/* in: loop start expression */ - que_node_t* loop_end_limit, /* in: loop end expression */ - que_node_t* stat_list) /* in: statement list */ -{ - for_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(for_node_t)); - - node->common.type = QUE_NODE_FOR; - - pars_resolve_exp_variables_and_types(NULL, loop_var); - pars_resolve_exp_variables_and_types(NULL, loop_start_limit); - pars_resolve_exp_variables_and_types(NULL, loop_end_limit); - - node->loop_var = loop_var->indirection; - - ut_a(loop_var->indirection); - - node->loop_start_limit = loop_start_limit; - node->loop_end_limit = loop_end_limit; - - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - return(node); -} - -/************************************************************************* -Parses an exit statement. */ - -exit_node_t* -pars_exit_statement(void) -/*=====================*/ - /* out: exit statement node */ -{ - exit_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(exit_node_t)); - node->common.type = QUE_NODE_EXIT; - - return(node); -} - -/************************************************************************* -Parses a return-statement. */ - -return_node_t* -pars_return_statement(void) -/*=======================*/ - /* out: return-statement node */ -{ - return_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, - sizeof(return_node_t)); - node->common.type = QUE_NODE_RETURN; - - return(node); -} - -/************************************************************************* -Parses an assignment statement. */ - -assign_node_t* -pars_assignment_statement( -/*======================*/ - /* out: assignment statement node */ - sym_node_t* var, /* in: variable to assign */ - que_node_t* val) /* in: value to assign */ -{ - assign_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, - sizeof(assign_node_t)); - node->common.type = QUE_NODE_ASSIGNMENT; - - node->var = var; - node->val = val; - - pars_resolve_exp_variables_and_types(NULL, var); - pars_resolve_exp_variables_and_types(NULL, val); - - ut_a(dtype_get_mtype(dfield_get_type(que_node_get_val(var))) - == dtype_get_mtype(dfield_get_type(que_node_get_val(val)))); - - return(node); -} - -/************************************************************************* -Parses a procedure call. */ - -func_node_t* -pars_procedure_call( -/*================*/ - /* out: function node */ - que_node_t* res_word,/* in: procedure name reserved word */ - que_node_t* args) /* in: argument list */ -{ - func_node_t* node; - - node = pars_func(res_word, args); - - pars_resolve_exp_list_variables_and_types(NULL, args); - - return(node); -} - -/************************************************************************* -Parses a fetch statement. into_list or user_func (but not both) must be -non-NULL. */ - -fetch_node_t* -pars_fetch_statement( -/*=================*/ - /* out: fetch statement node */ - sym_node_t* cursor, /* in: cursor node */ - sym_node_t* into_list, /* in: variables to set, or NULL */ - sym_node_t* user_func) /* in: user function name, or NULL */ -{ - sym_node_t* cursor_decl; - fetch_node_t* node; - - /* Logical XOR. */ - ut_a(!into_list != !user_func); - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(fetch_node_t)); - - node->common.type = QUE_NODE_FETCH; - - pars_resolve_exp_variables_and_types(NULL, cursor); - - if (into_list) { - pars_resolve_exp_list_variables_and_types(NULL, into_list); - node->into_list = into_list; - node->func = NULL; - } else { - pars_resolve_exp_variables_and_types(NULL, user_func); - - node->func = pars_info_get_user_func(pars_sym_tab_global->info, - user_func->name); - ut_a(node->func); - - node->into_list = NULL; - } - - cursor_decl = cursor->alias; - - ut_a(cursor_decl->token_type == SYM_CURSOR); - - node->cursor_def = cursor_decl->cursor_def; - - if (into_list) { - ut_a(que_node_list_get_len(into_list) - == que_node_list_get_len(node->cursor_def->select_list)); - } - - return(node); -} - -/************************************************************************* -Parses an open or close cursor statement. */ - -open_node_t* -pars_open_statement( -/*================*/ - /* out: fetch statement node */ - ulint type, /* in: ROW_SEL_OPEN_CURSOR - or ROW_SEL_CLOSE_CURSOR */ - sym_node_t* cursor) /* in: cursor node */ -{ - sym_node_t* cursor_decl; - open_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, sizeof(open_node_t)); - - node->common.type = QUE_NODE_OPEN; - - pars_resolve_exp_variables_and_types(NULL, cursor); - - cursor_decl = cursor->alias; - - ut_a(cursor_decl->token_type == SYM_CURSOR); - - node->op_type = type; - node->cursor_def = cursor_decl->cursor_def; - - return(node); -} - -/************************************************************************* -Parses a row_printf-statement. */ - -row_printf_node_t* -pars_row_printf_statement( -/*======================*/ - /* out: row_printf-statement node */ - sel_node_t* sel_node) /* in: select node */ -{ - row_printf_node_t* node; - - node = mem_heap_alloc(pars_sym_tab_global->heap, - sizeof(row_printf_node_t)); - node->common.type = QUE_NODE_ROW_PRINTF; - - node->sel_node = sel_node; - - sel_node->common.parent = node; - - return(node); -} - -/************************************************************************* -Parses a commit statement. */ - -commit_node_t* -pars_commit_statement(void) -/*=======================*/ -{ - return(commit_node_create(pars_sym_tab_global->heap)); -} - -/************************************************************************* -Parses a rollback statement. */ - -roll_node_t* -pars_rollback_statement(void) -/*=========================*/ -{ - return(roll_node_create(pars_sym_tab_global->heap)); -} - -/************************************************************************* -Parses a column definition at a table creation. */ - -sym_node_t* -pars_column_def( -/*============*/ - /* out: column sym table - node */ - sym_node_t* sym_node, /* in: column node in the - symbol table */ - pars_res_word_t* type, /* in: data type */ - sym_node_t* len, /* in: length of column, or - NULL */ - void* is_unsigned, /* in: if not NULL, column - is of type UNSIGNED. */ - void* is_not_null) /* in: if not NULL, column - is of type NOT NULL. */ -{ - ulint len2; - - if (len) { - len2 = eval_node_get_int_val(len); - } else { - len2 = 0; - } - - pars_set_dfield_type(que_node_get_val(sym_node), type, len2, - is_unsigned != NULL, is_not_null != NULL); - - return(sym_node); -} - -/************************************************************************* -Parses a table creation operation. */ - -tab_node_t* -pars_create_table( -/*==============*/ - /* out: table create subgraph */ - sym_node_t* table_sym, /* in: table name node in the symbol - table */ - sym_node_t* column_defs, /* in: list of column names */ - void* not_fit_in_memory __attribute__((unused))) - /* in: a non-NULL pointer means that - this is a table which in simulations - should be simulated as not fitting - in memory; thread is put to sleep - to simulate disk accesses; NOTE that - this flag is not stored to the data - dictionary on disk, and the database - will forget about non-NULL value if - it has to reload the table definition - from disk */ -{ - dict_table_t* table; - sym_node_t* column; - tab_node_t* node; - dtype_t* dtype; - ulint n_cols; - - n_cols = que_node_list_get_len(column_defs); - - /* As the InnoDB SQL parser is for internal use only, - for creating some system tables, this function will only - create tables in the old (not compact) record format. */ - table = dict_mem_table_create(table_sym->name, 0, n_cols, 0); - -#ifdef UNIV_DEBUG - if (not_fit_in_memory != NULL) { - table->does_not_fit_in_memory = TRUE; - } -#endif /* UNIV_DEBUG */ - column = column_defs; - - while (column) { - dtype = dfield_get_type(que_node_get_val(column)); - - dict_mem_table_add_col(table, table->heap, - column->name, dtype->mtype, - dtype->prtype, dtype->len); - column->resolved = TRUE; - column->token_type = SYM_COLUMN; - - column = que_node_get_next(column); - } - - node = tab_create_graph_create(table, pars_sym_tab_global->heap); - - table_sym->resolved = TRUE; - table_sym->token_type = SYM_TABLE; - - return(node); -} - -/************************************************************************* -Parses an index creation operation. */ - -ind_node_t* -pars_create_index( -/*==============*/ - /* out: index create subgraph */ - pars_res_word_t* unique_def, /* in: not NULL if a unique index */ - pars_res_word_t* clustered_def, /* in: not NULL if a clustered index */ - sym_node_t* index_sym, /* in: index name node in the symbol - table */ - sym_node_t* table_sym, /* in: table name node in the symbol - table */ - sym_node_t* column_list) /* in: list of column names */ -{ - dict_index_t* index; - sym_node_t* column; - ind_node_t* node; - ulint n_fields; - ulint ind_type; - - n_fields = que_node_list_get_len(column_list); - - ind_type = 0; - - if (unique_def) { - ind_type = ind_type | DICT_UNIQUE; - } - - if (clustered_def) { - ind_type = ind_type | DICT_CLUSTERED; - } - - index = dict_mem_index_create(table_sym->name, index_sym->name, 0, - ind_type, n_fields); - column = column_list; - - while (column) { - dict_mem_index_add_field(index, column->name, 0); - - column->resolved = TRUE; - column->token_type = SYM_COLUMN; - - column = que_node_get_next(column); - } - - node = ind_create_graph_create(index, pars_sym_tab_global->heap); - - table_sym->resolved = TRUE; - table_sym->token_type = SYM_TABLE; - - index_sym->resolved = TRUE; - index_sym->token_type = SYM_TABLE; - - return(node); -} - -/************************************************************************* -Parses a procedure definition. */ - -que_fork_t* -pars_procedure_definition( -/*======================*/ - /* out: query fork node */ - sym_node_t* sym_node, /* in: procedure id node in the symbol - table */ - sym_node_t* param_list, /* in: parameter declaration list */ - que_node_t* stat_list) /* in: statement list */ -{ - proc_node_t* node; - que_fork_t* fork; - que_thr_t* thr; - mem_heap_t* heap; - - heap = pars_sym_tab_global->heap; - - fork = que_fork_create(NULL, NULL, QUE_FORK_PROCEDURE, heap); - fork->trx = NULL; - - thr = que_thr_create(fork, heap); - - node = mem_heap_alloc(heap, sizeof(proc_node_t)); - - node->common.type = QUE_NODE_PROC; - node->common.parent = thr; - - sym_node->token_type = SYM_PROCEDURE_NAME; - sym_node->resolved = TRUE; - - node->proc_id = sym_node; - node->param_list = param_list; - node->stat_list = stat_list; - - pars_set_parent_in_list(stat_list, node); - - node->sym_tab = pars_sym_tab_global; - - thr->child = node; - - pars_sym_tab_global->query_graph = fork; - - return(fork); -} - -/***************************************************************** -Parses a stored procedure call, when this is not within another stored -procedure, that is, the client issues a procedure call directly. -In MySQL/InnoDB, stored InnoDB procedures are invoked via the -parsed procedure tree, not via InnoDB SQL, so this function is not used. */ - -que_fork_t* -pars_stored_procedure_call( -/*=======================*/ - /* out: query graph */ - sym_node_t* sym_node __attribute__((unused))) - /* in: stored procedure name */ -{ - ut_error; - return(NULL); -} - -/***************************************************************** -Retrieves characters to the lexical analyzer. */ - -void -pars_get_lex_chars( -/*===============*/ - char* buf, /* in/out: buffer where to copy */ - int* result, /* out: number of characters copied or EOF */ - int max_size) /* in: maximum number of characters which fit - in the buffer */ -{ - int len; - - len = pars_sym_tab_global->string_len - - pars_sym_tab_global->next_char_pos; - if (len == 0) { -#ifdef YYDEBUG - /* fputs("SQL string ends\n", stderr); */ -#endif - *result = 0; - - return; - } - - if (len > max_size) { - len = max_size; - } - -#ifdef UNIV_SQL_DEBUG - if (pars_print_lexed) { - - if (len >= 5) { - len = 5; - } - - fwrite(pars_sym_tab_global->sql_string - + pars_sym_tab_global->next_char_pos, - 1, len, stderr); - } -#endif /* UNIV_SQL_DEBUG */ - - ut_memcpy(buf, pars_sym_tab_global->sql_string - + pars_sym_tab_global->next_char_pos, len); - *result = len; - - pars_sym_tab_global->next_char_pos += len; -} - -/***************************************************************** -Called by yyparse on error. */ - -void -yyerror( -/*====*/ - const char* s __attribute__((unused))) - /* in: error message string */ -{ - ut_ad(s); - - fputs("PARSER ERROR: Syntax error in SQL string\n", stderr); - - ut_error; -} - -/***************************************************************** -Parses an SQL string returning the query graph. */ - -que_t* -pars_sql( -/*=====*/ - /* out, own: the query graph */ - pars_info_t* info, /* in: extra information, or NULL */ - const char* str) /* in: SQL string */ -{ - sym_node_t* sym_node; - mem_heap_t* heap; - que_t* graph; - - ut_ad(str); - - heap = mem_heap_create(256); - - /* Currently, the parser is not reentrant: */ - ut_ad(mutex_own(&(dict_sys->mutex))); - - pars_sym_tab_global = sym_tab_create(heap); - - pars_sym_tab_global->string_len = strlen(str); - pars_sym_tab_global->sql_string = mem_heap_dup( - heap, str, pars_sym_tab_global->string_len + 1); - pars_sym_tab_global->next_char_pos = 0; - pars_sym_tab_global->info = info; - - yyparse(); - - sym_node = UT_LIST_GET_FIRST(pars_sym_tab_global->sym_list); - - while (sym_node) { - ut_a(sym_node->resolved); - - sym_node = UT_LIST_GET_NEXT(sym_list, sym_node); - } - - graph = pars_sym_tab_global->query_graph; - - graph->sym_tab = pars_sym_tab_global; - graph->info = info; - - /* fprintf(stderr, "SQL graph size %lu\n", mem_heap_get_size(heap)); */ - - return(graph); -} - -/********************************************************************** -Completes a query graph by adding query thread and fork nodes -above it and prepares the graph for running. The fork created is of -type QUE_FORK_MYSQL_INTERFACE. */ - -que_thr_t* -pars_complete_graph_for_exec( -/*=========================*/ - /* out: query thread node to run */ - que_node_t* node, /* in: root node for an incomplete - query graph */ - trx_t* trx, /* in: transaction handle */ - mem_heap_t* heap) /* in: memory heap from which allocated */ -{ - que_fork_t* fork; - que_thr_t* thr; - - fork = que_fork_create(NULL, NULL, QUE_FORK_MYSQL_INTERFACE, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - - thr->child = node; - - que_node_set_parent(node, thr); - - trx->graph = NULL; - - return(thr); -} - -/******************************************************************** -Create parser info struct.*/ - -pars_info_t* -pars_info_create(void) -/*==================*/ - /* out, own: info struct */ -{ - pars_info_t* info; - mem_heap_t* heap; - - heap = mem_heap_create(512); - - info = mem_heap_alloc(heap, sizeof(*info)); - - info->heap = heap; - info->funcs = NULL; - info->bound_lits = NULL; - info->bound_ids = NULL; - info->graph_owns_us = TRUE; - - return(info); -} - -/******************************************************************** -Free info struct and everything it contains.*/ - -void -pars_info_free( -/*===========*/ - pars_info_t* info) /* in: info struct */ -{ - mem_heap_free(info->heap); -} - -/******************************************************************** -Add bound literal. */ - -void -pars_info_add_literal( -/*==================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const void* address, /* in: address */ - ulint length, /* in: length of data */ - ulint type, /* in: type, e.g. DATA_FIXBINARY */ - ulint prtype) /* in: precise type, e.g. - DATA_UNSIGNED */ -{ - pars_bound_lit_t* pbl; - - ut_ad(!pars_info_get_bound_lit(info, name)); - - pbl = mem_heap_alloc(info->heap, sizeof(*pbl)); - - pbl->name = name; - pbl->address = address; - pbl->length = length; - pbl->type = type; - pbl->prtype = prtype; - - if (!info->bound_lits) { - info->bound_lits = ib_vector_create(info->heap, 8); - } - - ib_vector_push(info->bound_lits, pbl); -} - -/******************************************************************** -Equivalent to pars_info_add_literal(info, name, str, strlen(str), -DATA_VARCHAR, DATA_ENGLISH). */ - -void -pars_info_add_str_literal( -/*======================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const char* str) /* in: string */ -{ - pars_info_add_literal(info, name, str, strlen(str), - DATA_VARCHAR, DATA_ENGLISH); -} - -/******************************************************************** -Equivalent to: - -char buf[4]; -mach_write_to_4(buf, val); -pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ - -void -pars_info_add_int4_literal( -/*=======================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - lint val) /* in: value */ -{ - byte* buf = mem_heap_alloc(info->heap, 4); - - mach_write_to_4(buf, val); - pars_info_add_literal(info, name, buf, 4, DATA_INT, 0); -} - -/******************************************************************** -Equivalent to: - -char buf[8]; -mach_write_to_8(buf, val); -pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0); - -except that the buffer is dynamically allocated from the info struct's -heap. */ - -void -pars_info_add_dulint_literal( -/*=========================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - dulint val) /* in: value */ -{ - byte* buf = mem_heap_alloc(info->heap, 8); - - mach_write_to_8(buf, val); - - pars_info_add_literal(info, name, buf, 8, DATA_FIXBINARY, 0); -} - -/******************************************************************** -Add user function. */ - -void -pars_info_add_function( -/*===================*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: function name */ - pars_user_func_cb_t func, /* in: function address */ - void* arg) /* in: user-supplied argument */ -{ - pars_user_func_t* puf; - - ut_ad(!pars_info_get_user_func(info, name)); - - puf = mem_heap_alloc(info->heap, sizeof(*puf)); - - puf->name = name; - puf->func = func; - puf->arg = arg; - - if (!info->funcs) { - info->funcs = ib_vector_create(info->heap, 8); - } - - ib_vector_push(info->funcs, puf); -} - -/******************************************************************** -Add bound id. */ - -void -pars_info_add_id( -/*=============*/ - pars_info_t* info, /* in: info struct */ - const char* name, /* in: name */ - const char* id) /* in: id */ -{ - pars_bound_id_t* bid; - - ut_ad(!pars_info_get_bound_id(info, name)); - - bid = mem_heap_alloc(info->heap, sizeof(*bid)); - - bid->name = name; - bid->id = id; - - if (!info->bound_ids) { - info->bound_ids = ib_vector_create(info->heap, 8); - } - - ib_vector_push(info->bound_ids, bid); -} - -/******************************************************************** -Get user function with the given name.*/ - -pars_user_func_t* -pars_info_get_user_func( -/*====================*/ - /* out: user func, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: function name to find*/ -{ - ulint i; - ib_vector_t* vec; - - if (!info || !info->funcs) { - return(NULL); - } - - vec = info->funcs; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_user_func_t* puf = ib_vector_get(vec, i); - - if (strcmp(puf->name, name) == 0) { - return(puf); - } - } - - return(NULL); -} - -/******************************************************************** -Get bound literal with the given name.*/ - -pars_bound_lit_t* -pars_info_get_bound_lit( -/*====================*/ - /* out: bound literal, or NULL if - not found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: bound literal name to find */ -{ - ulint i; - ib_vector_t* vec; - - if (!info || !info->bound_lits) { - return(NULL); - } - - vec = info->bound_lits; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_bound_lit_t* pbl = ib_vector_get(vec, i); - - if (strcmp(pbl->name, name) == 0) { - return(pbl); - } - } - - return(NULL); -} - -/******************************************************************** -Get bound id with the given name.*/ - -pars_bound_id_t* -pars_info_get_bound_id( -/*===================*/ - /* out: bound id, or NULL if not - found */ - pars_info_t* info, /* in: info struct */ - const char* name) /* in: bound id name to find */ -{ - ulint i; - ib_vector_t* vec; - - if (!info || !info->bound_ids) { - return(NULL); - } - - vec = info->bound_ids; - - for (i = 0; i < ib_vector_size(vec); i++) { - pars_bound_id_t* bid = ib_vector_get(vec, i); - - if (strcmp(bid->name, name) == 0) { - return(bid); - } - } - - return(NULL); -} diff --git a/storage/innobase/pars/pars0sym.c b/storage/innobase/pars/pars0sym.c deleted file mode 100644 index 2d56fff2d42..00000000000 --- a/storage/innobase/pars/pars0sym.c +++ /dev/null @@ -1,352 +0,0 @@ -/****************************************************** -SQL parser symbol table - -(c) 1997 Innobase Oy - -Created 12/15/1997 Heikki Tuuri -*******************************************************/ - -#include "pars0sym.h" - -#ifdef UNIV_NONINL -#include "pars0sym.ic" -#endif - -#include "mem0mem.h" -#include "data0type.h" -#include "data0data.h" -#include "pars0grm.h" -#include "pars0pars.h" -#include "que0que.h" -#include "eval0eval.h" -#include "row0sel.h" - -/********************************************************************** -Creates a symbol table for a single stored procedure or query. */ - -sym_tab_t* -sym_tab_create( -/*===========*/ - /* out, own: symbol table */ - mem_heap_t* heap) /* in: memory heap where to create */ -{ - sym_tab_t* sym_tab; - - sym_tab = mem_heap_alloc(heap, sizeof(sym_tab_t)); - - UT_LIST_INIT(sym_tab->sym_list); - UT_LIST_INIT(sym_tab->func_node_list); - - sym_tab->heap = heap; - - return(sym_tab); -} - -/********************************************************************** -Frees the memory allocated dynamically AFTER parsing phase for variables -etc. in the symbol table. Does not free the mem heap where the table was -originally created. Frees also SQL explicit cursor definitions. */ - -void -sym_tab_free_private( -/*=================*/ - sym_tab_t* sym_tab) /* in, own: symbol table */ -{ - sym_node_t* sym; - func_node_t* func; - - sym = UT_LIST_GET_FIRST(sym_tab->sym_list); - - while (sym) { - eval_node_free_val_buf(sym); - - if (sym->prefetch_buf) { - sel_col_prefetch_buf_free(sym->prefetch_buf); - } - - if (sym->cursor_def) { - que_graph_free_recursive(sym->cursor_def); - } - - sym = UT_LIST_GET_NEXT(sym_list, sym); - } - - func = UT_LIST_GET_FIRST(sym_tab->func_node_list); - - while (func) { - eval_node_free_val_buf(func); - - func = UT_LIST_GET_NEXT(func_node_list, func); - } -} - -/********************************************************************** -Adds an integer literal to a symbol table. */ - -sym_node_t* -sym_tab_add_int_lit( -/*================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - ulint val) /* in: integer value */ -{ - sym_node_t* node; - byte* data; - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - dtype_set(&(node->common.val.type), DATA_INT, 0, 4); - - data = mem_heap_alloc(sym_tab->heap, 4); - mach_write_to_4(data, val); - - dfield_set_data(&(node->common.val), data, 4); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Adds a string literal to a symbol table. */ - -sym_node_t* -sym_tab_add_str_lit( -/*================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - byte* str, /* in: string with no quotes around - it */ - ulint len) /* in: string length */ -{ - sym_node_t* node; - byte* data; - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - dtype_set(&(node->common.val.type), DATA_VARCHAR, DATA_ENGLISH, 0); - - if (len) { - data = mem_heap_alloc(sym_tab->heap, len); - ut_memcpy(data, str, len); - } else { - data = NULL; - } - - dfield_set_data(&(node->common.val), data, len); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Add a bound literal to a symbol table. */ - -sym_node_t* -sym_tab_add_bound_lit( -/*==================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - const char* name, /* in: name of bound literal */ - ulint* lit_type) /* out: type of literal (PARS_*_LIT) */ -{ - sym_node_t* node; - pars_bound_lit_t* blit; - ulint len = 0; - - blit = pars_info_get_bound_lit(sym_tab->info, name); - ut_a(blit); - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - switch (blit->type) { - case DATA_FIXBINARY: - len = blit->length; - *lit_type = PARS_FIXBINARY_LIT; - break; - - case DATA_BLOB: - *lit_type = PARS_BLOB_LIT; - break; - - case DATA_VARCHAR: - *lit_type = PARS_STR_LIT; - break; - - case DATA_CHAR: - ut_a(blit->length > 0); - - len = blit->length; - *lit_type = PARS_STR_LIT; - break; - - case DATA_INT: - ut_a(blit->length > 0); - ut_a(blit->length <= 8); - - len = blit->length; - *lit_type = PARS_INT_LIT; - break; - - default: - ut_error; - } - - dtype_set(&(node->common.val.type), blit->type, blit->prtype, len); - - dfield_set_data(&(node->common.val), blit->address, blit->length); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Adds an SQL null literal to a symbol table. */ - -sym_node_t* -sym_tab_add_null_lit( -/*=================*/ - /* out: symbol table node */ - sym_tab_t* sym_tab) /* in: symbol table */ -{ - sym_node_t* node; - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = TRUE; - node->token_type = SYM_LIT; - - node->indirection = NULL; - - node->common.val.type.mtype = DATA_ERROR; - - dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Adds an identifier to a symbol table. */ - -sym_node_t* -sym_tab_add_id( -/*===========*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - byte* name, /* in: identifier name */ - ulint len) /* in: identifier length */ -{ - sym_node_t* node; - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = FALSE; - node->indirection = NULL; - - node->name = mem_heap_strdupl(sym_tab->heap, (char*) name, len); - node->name_len = len; - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - node->sym_table = sym_tab; - - return(node); -} - -/********************************************************************** -Add a bound identifier to a symbol table. */ - -sym_node_t* -sym_tab_add_bound_id( -/*===========*/ - /* out: symbol table node */ - sym_tab_t* sym_tab, /* in: symbol table */ - const char* name) /* in: name of bound id */ -{ - sym_node_t* node; - pars_bound_id_t* bid; - - bid = pars_info_get_bound_id(sym_tab->info, name); - ut_a(bid); - - node = mem_heap_alloc(sym_tab->heap, sizeof(sym_node_t)); - - node->common.type = QUE_NODE_SYMBOL; - - node->resolved = FALSE; - node->indirection = NULL; - - node->name = mem_heap_strdup(sym_tab->heap, bid->id); - node->name_len = strlen(node->name); - - UT_LIST_ADD_LAST(sym_list, sym_tab->sym_list, node); - - dfield_set_data(&(node->common.val), NULL, UNIV_SQL_NULL); - - node->common.val_buf_size = 0; - node->prefetch_buf = NULL; - node->cursor_def = NULL; - - node->sym_table = sym_tab; - - return(node); -} diff --git a/storage/innobase/plug.in b/storage/innobase/plug.in deleted file mode 100644 index f7d2abed751..00000000000 --- a/storage/innobase/plug.in +++ /dev/null @@ -1,44 +0,0 @@ -MYSQL_STORAGE_ENGINE(innobase, innodb, [InnoDB Storage Engine], - [Transactional Tables using InnoDB], [max,max-no-ndb]) -MYSQL_PLUGIN_DIRECTORY(innobase, [storage/innobase]) -MYSQL_PLUGIN_STATIC(innobase, [libinnobase.a]) -MYSQL_PLUGIN_DYNAMIC(innobase, [ha_innodb.la]) -MYSQL_PLUGIN_ACTIONS(innobase, [ - AC_CHECK_LIB(rt, aio_read, [innodb_system_libs="-lrt"]) - AC_SUBST(innodb_system_libs) - AC_CHECK_HEADERS(aio.h sched.h) - AC_CHECK_SIZEOF(int, 4) - AC_CHECK_SIZEOF(long, 4) - AC_CHECK_SIZEOF(void*, 4) - AC_CHECK_FUNCS(sched_yield) - AC_CHECK_FUNCS(fdatasync) - AC_CHECK_FUNCS(localtime_r) - AC_C_BIGENDIAN - case "$target_os" in - lin*) - CFLAGS="$CFLAGS -DUNIV_LINUX";; - hpux10*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX -DUNIV_HPUX10";; - hp*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE -DUNIV_HPUX";; - aix*) - CFLAGS="$CFLAGS -DUNIV_AIX";; - irix*|osf*|sysv5uw7*|openbsd*) - CFLAGS="$CFLAGS -DUNIV_MUST_NOT_INLINE";; - *solaris*|*SunOS*) - CFLAGS="$CFLAGS -DUNIV_SOLARIS";; - esac - INNODB_DYNAMIC_CFLAGS="-DMYSQL_DYNAMIC_PLUGIN" - case "$target_cpu" in - x86_64) - # The AMD64 ABI forbids absolute addresses in shared libraries - ;; - *86) - # Use absolute addresses on IA-32 - INNODB_DYNAMIC_CFLAGS="$INNODB_DYNAMIC_CFLAGS -prefer-non-pic" - ;; - esac - AC_SUBST(INNODB_DYNAMIC_CFLAGS) - ]) - -# vim: set ft=config: diff --git a/storage/innobase/que/que0que.c b/storage/innobase/que/que0que.c deleted file mode 100644 index bf83f28f04e..00000000000 --- a/storage/innobase/que/que0que.c +++ /dev/null @@ -1,1443 +0,0 @@ -/****************************************************** -Query graph - -(c) 1996 Innobase Oy - -Created 5/27/1996 Heikki Tuuri -*******************************************************/ - -#include "que0que.h" - -#ifdef UNIV_NONINL -#include "que0que.ic" -#endif - -#include "srv0que.h" -#include "usr0sess.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "row0undo.h" -#include "row0ins.h" -#include "row0upd.h" -#include "row0sel.h" -#include "row0purge.h" -#include "dict0crea.h" -#include "log0log.h" -#include "eval0proc.h" -#include "eval0eval.h" -#include "pars0types.h" - -#define QUE_PARALLELIZE_LIMIT (64 * 256 * 256 * 256) -#define QUE_ROUND_ROBIN_LIMIT (64 * 256 * 256 * 256) -#define QUE_MAX_LOOPS_WITHOUT_CHECK 16 - -/* If the following flag is set TRUE, the module will print trace info -of SQL execution in the UNIV_SQL_DEBUG version */ -ibool que_trace_on = FALSE; - -ibool que_always_false = FALSE; - -/* Short introduction to query graphs - ================================== - -A query graph consists of nodes linked to each other in various ways. The -execution starts at que_run_threads() which takes a que_thr_t parameter. -que_thr_t contains two fields that control query graph execution: run_node -and prev_node. run_node is the next node to execute and prev_node is the -last node executed. - -Each node has a pointer to a 'next' statement, i.e., its brother, and a -pointer to its parent node. The next pointer is NULL in the last statement -of a block. - -Loop nodes contain a link to the first statement of the enclosed statement -list. While the loop runs, que_thr_step() checks if execution to the loop -node came from its parent or from one of the statement nodes in the loop. If -it came from the parent of the loop node it starts executing the first -statement node in the loop. If it came from one of the statement nodes in -the loop, then it checks if the statement node has another statement node -following it, and runs it if so. - -To signify loop ending, the loop statements (see e.g. while_step()) set -que_thr_t->run_node to the loop node's parent node. This is noticed on the -next call of que_thr_step() and execution proceeds to the node pointed to by -the loop node's 'next' pointer. - -For example, the code: - -X := 1; -WHILE X < 5 LOOP - X := X + 1; - X := X + 1; -X := 5 - -will result in the following node hierarchy, with the X-axis indicating -'next' links and the Y-axis indicating parent/child links: - -A - W - A - | - | - A - A - -A = assign_node_t, W = while_node_t. */ - -/* How a stored procedure containing COMMIT or ROLLBACK commands -is executed? - -The commit or rollback can be seen as a subprocedure call. -The problem is that if there are several query threads -currently running within the transaction, their action could -mess the commit or rollback operation. Or, at the least, the -operation would be difficult to visualize and keep in control. - -Therefore the query thread requesting a commit or a rollback -sends to the transaction a signal, which moves the transaction -to TRX_QUE_SIGNALED state. All running query threads of the -transaction will eventually notice that the transaction is now in -this state and voluntarily suspend themselves. Only the last -query thread which suspends itself will trigger handling of -the signal. - -When the transaction starts to handle a rollback or commit -signal, it builds a query graph which, when executed, will -roll back or commit the incomplete transaction. The transaction -is moved to the TRX_QUE_ROLLING_BACK or TRX_QUE_COMMITTING state. -If specified, the SQL cursors opened by the transaction are closed. -When the execution of the graph completes, it is like returning -from a subprocedure: the query thread which requested the operation -starts running again. */ - -/************************************************************************** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction. -***NOTE***: This is the only function in which such a transition is allowed -to happen! */ -static -void -que_thr_move_to_run_state( -/*======================*/ - que_thr_t* thr); /* in: an query thread */ - -/*************************************************************************** -Adds a query graph to the session's list of graphs. */ - -void -que_graph_publish( -/*==============*/ - que_t* graph, /* in: graph */ - sess_t* sess) /* in: session */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - UT_LIST_ADD_LAST(graphs, sess->graphs, graph); -} - -/*************************************************************************** -Creates a query graph fork node. */ - -que_fork_t* -que_fork_create( -/*============*/ - /* out, own: fork node */ - que_t* graph, /* in: graph, if NULL then this - fork node is assumed to be the - graph root */ - que_node_t* parent, /* in: parent node */ - ulint fork_type, /* in: fork type */ - mem_heap_t* heap) /* in: memory heap where created */ -{ - que_fork_t* fork; - - ut_ad(heap); - - fork = mem_heap_alloc(heap, sizeof(que_fork_t)); - - fork->common.type = QUE_NODE_FORK; - fork->n_active_thrs = 0; - - fork->state = QUE_FORK_COMMAND_WAIT; - - if (graph != NULL) { - fork->graph = graph; - } else { - fork->graph = fork; - } - - fork->common.parent = parent; - fork->fork_type = fork_type; - - fork->caller = NULL; - - UT_LIST_INIT(fork->thrs); - - fork->sym_tab = NULL; - fork->info = NULL; - - fork->heap = heap; - - return(fork); -} - -/*************************************************************************** -Creates a query graph thread node. */ - -que_thr_t* -que_thr_create( -/*===========*/ - /* out, own: query thread node */ - que_fork_t* parent, /* in: parent node, i.e., a fork node */ - mem_heap_t* heap) /* in: memory heap where created */ -{ - que_thr_t* thr; - - ut_ad(parent && heap); - - thr = mem_heap_alloc(heap, sizeof(que_thr_t)); - - thr->common.type = QUE_NODE_THR; - thr->common.parent = parent; - - thr->magic_n = QUE_THR_MAGIC_N; - - thr->graph = parent->graph; - - thr->state = QUE_THR_COMMAND_WAIT; - - thr->is_active = FALSE; - - thr->run_node = NULL; - thr->resource = 0; - thr->lock_state = QUE_THR_LOCK_NOLOCK; - - UT_LIST_ADD_LAST(thrs, parent->thrs, thr); - - return(thr); -} - -/************************************************************************** -Moves a suspended query thread to the QUE_THR_RUNNING state and may release -a single worker thread to execute it. This function should be used to end -the wait state of a query thread waiting for a lock or a stored procedure -completion. */ - -void -que_thr_end_wait( -/*=============*/ - que_thr_t* thr, /* in: query thread in the - QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if NULL is passed - as the parameter, it is ignored */ -{ - ibool was_active; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(thr); - ut_ad((thr->state == QUE_THR_LOCK_WAIT) - || (thr->state == QUE_THR_PROCEDURE_WAIT) - || (thr->state == QUE_THR_SIG_REPLY_WAIT)); - ut_ad(thr->run_node); - - thr->prev_node = thr->run_node; - - was_active = thr->is_active; - - que_thr_move_to_run_state(thr); - - if (was_active) { - - return; - } - - if (next_thr && *next_thr == NULL) { - *next_thr = thr; - } else { - ut_a(0); - srv_que_task_enqueue_low(thr); - } -} - -/************************************************************************** -Same as que_thr_end_wait, but no parameter next_thr available. */ - -void -que_thr_end_wait_no_next_thr( -/*=========================*/ - que_thr_t* thr) /* in: query thread in the QUE_THR_LOCK_WAIT, - or QUE_THR_PROCEDURE_WAIT, or - QUE_THR_SIG_REPLY_WAIT state */ -{ - ibool was_active; - - ut_a(thr->state == QUE_THR_LOCK_WAIT); /* In MySQL this is the - only possible state here */ - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(thr); - ut_ad((thr->state == QUE_THR_LOCK_WAIT) - || (thr->state == QUE_THR_PROCEDURE_WAIT) - || (thr->state == QUE_THR_SIG_REPLY_WAIT)); - - was_active = thr->is_active; - - que_thr_move_to_run_state(thr); - - if (was_active) { - - return; - } - - /* In MySQL we let the OS thread (not just the query thread) to wait - for the lock to be released: */ - - srv_release_mysql_thread_if_suspended(thr); - - /* srv_que_task_enqueue_low(thr); */ -} - -/************************************************************************** -Inits a query thread for a command. */ -UNIV_INLINE -void -que_thr_init_command( -/*=================*/ - que_thr_t* thr) /* in: query thread */ -{ - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - que_thr_move_to_run_state(thr); -} - -/************************************************************************** -Starts execution of a command in a query fork. Picks a query thread which -is not in the QUE_THR_RUNNING state and moves it to that state. If none -can be chosen, a situation which may arise in parallelized fetches, NULL -is returned. */ - -que_thr_t* -que_fork_start_command( -/*===================*/ - /* out: a query thread of the graph moved to - QUE_THR_RUNNING state, or NULL; the query - thread should be executed by que_run_threads - by the caller */ - que_fork_t* fork) /* in: a query fork */ -{ - que_thr_t* thr; - que_thr_t* suspended_thr = NULL; - que_thr_t* completed_thr = NULL; - - fork->state = QUE_FORK_ACTIVE; - - fork->last_sel_node = NULL; - - /* Choose the query thread to run: usually there is just one thread, - but in a parallelized select, which necessarily is non-scrollable, - there may be several to choose from */ - - /* First we try to find a query thread in the QUE_THR_COMMAND_WAIT - state. Then we try to find a query thread in the QUE_THR_SUSPENDED - state, finally we try to find a query thread in the QUE_THR_COMPLETED - state */ - - thr = UT_LIST_GET_FIRST(fork->thrs); - - /* We make a single pass over the thr list within which we note which - threads are ready to run. */ - while (thr) { - switch (thr->state) { - case QUE_THR_COMMAND_WAIT: - - /* We have to send the initial message to query thread - to start it */ - - que_thr_init_command(thr); - - return(thr); - - case QUE_THR_SUSPENDED: - /* In this case the execution of the thread was - suspended: no initial message is needed because - execution can continue from where it was left */ - if (!suspended_thr) { - suspended_thr = thr; - } - - break; - - case QUE_THR_COMPLETED: - if (!completed_thr) { - completed_thr = thr; - } - - break; - - case QUE_THR_LOCK_WAIT: - ut_error; - - } - - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - if (suspended_thr) { - - thr = suspended_thr; - que_thr_move_to_run_state(thr); - - } else if (completed_thr) { - - thr = completed_thr; - que_thr_init_command(thr); - } - - return(thr); -} - -/************************************************************************** -After signal handling is finished, returns control to a query graph error -handling routine. (Currently, just returns the control to the root of the -graph so that the graph can communicate an error message to the client.) */ - -void -que_fork_error_handle( -/*==================*/ - trx_t* trx __attribute__((unused)), /* in: trx */ - que_t* fork) /* in: query graph which was run before signal - handling started, NULL not allowed */ -{ - que_thr_t* thr; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(trx->sess->state == SESS_ERROR); - ut_ad(UT_LIST_GET_LEN(trx->reply_signals) == 0); - ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); - - thr = UT_LIST_GET_FIRST(fork->thrs); - - while (thr != NULL) { - ut_ad(!thr->is_active); - ut_ad(thr->state != QUE_THR_SIG_REPLY_WAIT); - ut_ad(thr->state != QUE_THR_LOCK_WAIT); - - thr->run_node = thr; - thr->prev_node = thr->child; - thr->state = QUE_THR_COMPLETED; - - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - thr = UT_LIST_GET_FIRST(fork->thrs); - - que_thr_move_to_run_state(thr); - - ut_a(0); - srv_que_task_enqueue_low(thr); -} - -/******************************************************************** -Tests if all the query threads in the same fork have a given state. */ -UNIV_INLINE -ibool -que_fork_all_thrs_in_state( -/*=======================*/ - /* out: TRUE if all the query threads in the - same fork were in the given state */ - que_fork_t* fork, /* in: query fork */ - ulint state) /* in: state */ -{ - que_thr_t* thr_node; - - thr_node = UT_LIST_GET_FIRST(fork->thrs); - - while (thr_node != NULL) { - if (thr_node->state != state) { - - return(FALSE); - } - - thr_node = UT_LIST_GET_NEXT(thrs, thr_node); - } - - return(TRUE); -} - -/************************************************************************** -Calls que_graph_free_recursive for statements in a statement list. */ -static -void -que_graph_free_stat_list( -/*=====================*/ - que_node_t* node) /* in: first query graph node in the list */ -{ - while (node) { - que_graph_free_recursive(node); - - node = que_node_get_next(node); - } -} - -/************************************************************************** -Frees a query graph, but not the heap where it was created. Does not free -explicit cursor declarations, they are freed in que_graph_free. */ - -void -que_graph_free_recursive( -/*=====================*/ - que_node_t* node) /* in: query graph node */ -{ - que_fork_t* fork; - que_thr_t* thr; - undo_node_t* undo; - sel_node_t* sel; - ins_node_t* ins; - upd_node_t* upd; - tab_node_t* cre_tab; - ind_node_t* cre_ind; - - if (node == NULL) { - - return; - } - - switch (que_node_get_type(node)) { - - case QUE_NODE_FORK: - fork = node; - - thr = UT_LIST_GET_FIRST(fork->thrs); - - while (thr) { - que_graph_free_recursive(thr); - - thr = UT_LIST_GET_NEXT(thrs, thr); - } - - break; - case QUE_NODE_THR: - - thr = node; - - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt;" - " magic n %lu\n", - (unsigned long) thr->magic_n); - mem_analyze_corruption(thr); - ut_error; - } - - thr->magic_n = QUE_THR_MAGIC_FREED; - - que_graph_free_recursive(thr->child); - - break; - case QUE_NODE_UNDO: - - undo = node; - - mem_heap_free(undo->heap); - - break; - case QUE_NODE_SELECT: - - sel = node; - - sel_node_free_private(sel); - - break; - case QUE_NODE_INSERT: - - ins = node; - - que_graph_free_recursive(ins->select); - - mem_heap_free(ins->entry_sys_heap); - - break; - case QUE_NODE_UPDATE: - - upd = node; - - if (upd->in_mysql_interface) { - - btr_pcur_free_for_mysql(upd->pcur); - } - - que_graph_free_recursive(upd->cascade_node); - - if (upd->cascade_heap) { - mem_heap_free(upd->cascade_heap); - } - - que_graph_free_recursive(upd->select); - - mem_heap_free(upd->heap); - - break; - case QUE_NODE_CREATE_TABLE: - cre_tab = node; - - que_graph_free_recursive(cre_tab->tab_def); - que_graph_free_recursive(cre_tab->col_def); - que_graph_free_recursive(cre_tab->commit_node); - - mem_heap_free(cre_tab->heap); - - break; - case QUE_NODE_CREATE_INDEX: - cre_ind = node; - - que_graph_free_recursive(cre_ind->ind_def); - que_graph_free_recursive(cre_ind->field_def); - que_graph_free_recursive(cre_ind->commit_node); - - mem_heap_free(cre_ind->heap); - - break; - case QUE_NODE_PROC: - que_graph_free_stat_list(((proc_node_t*)node)->stat_list); - - break; - case QUE_NODE_IF: - que_graph_free_stat_list(((if_node_t*)node)->stat_list); - que_graph_free_stat_list(((if_node_t*)node)->else_part); - que_graph_free_stat_list(((if_node_t*)node)->elsif_list); - - break; - case QUE_NODE_ELSIF: - que_graph_free_stat_list(((elsif_node_t*)node)->stat_list); - - break; - case QUE_NODE_WHILE: - que_graph_free_stat_list(((while_node_t*)node)->stat_list); - - break; - case QUE_NODE_FOR: - que_graph_free_stat_list(((for_node_t*)node)->stat_list); - - break; - - case QUE_NODE_ASSIGNMENT: - case QUE_NODE_EXIT: - case QUE_NODE_RETURN: - case QUE_NODE_COMMIT: - case QUE_NODE_ROLLBACK: - case QUE_NODE_LOCK: - case QUE_NODE_FUNC: - case QUE_NODE_ORDER: - case QUE_NODE_ROW_PRINTF: - case QUE_NODE_OPEN: - case QUE_NODE_FETCH: - /* No need to do anything */ - - break; - default: - fprintf(stderr, - "que_node struct appears corrupt; type %lu\n", - (unsigned long) que_node_get_type(node)); - mem_analyze_corruption(node); - ut_error; - } -} - -/************************************************************************** -Frees a query graph. */ - -void -que_graph_free( -/*===========*/ - que_t* graph) /* in: query graph; we assume that the memory - heap where this graph was created is private - to this graph: if not, then use - que_graph_free_recursive and free the heap - afterwards! */ -{ - ut_ad(graph); - - if (graph->sym_tab) { - /* The following call frees dynamic memory allocated - for variables etc. during execution. Frees also explicit - cursor definitions. */ - - sym_tab_free_private(graph->sym_tab); - } - - if (graph->info && graph->info->graph_owns_us) { - pars_info_free(graph->info); - } - - que_graph_free_recursive(graph); - - mem_heap_free(graph->heap); -} - -/************************************************************************** -Checks if the query graph is in a state where it should be freed, and -frees it in that case. If the session is in a state where it should be -closed, also this is done. */ - -ibool -que_graph_try_free( -/*===============*/ - /* out: TRUE if freed */ - que_t* graph) /* in: query graph */ -{ - sess_t* sess; - - ut_ad(mutex_own(&kernel_mutex)); - - sess = (graph->trx)->sess; - - if ((graph->state == QUE_FORK_BEING_FREED) - && (graph->n_active_thrs == 0)) { - - UT_LIST_REMOVE(graphs, sess->graphs, graph); - que_graph_free(graph); - - sess_try_close(sess); - - return(TRUE); - } - - return(FALSE); -} - -/******************************************************************** -Performs an execution step on a thr node. */ -static -que_thr_t* -que_thr_node_step( -/*==============*/ - /* out: query thread to run next, or NULL - if none */ - que_thr_t* thr) /* in: query thread where run_node must - be the thread node itself */ -{ - ut_ad(thr->run_node == thr); - - if (thr->prev_node == thr->common.parent) { - /* If control to the node came from above, it is just passed - on */ - - thr->run_node = thr->child; - - return(thr); - } - - mutex_enter(&kernel_mutex); - - if (que_thr_peek_stop(thr)) { - - mutex_exit(&kernel_mutex); - - return(thr); - } - - /* Thread execution completed */ - - thr->state = QUE_THR_COMPLETED; - - mutex_exit(&kernel_mutex); - - return(NULL); -} - -/************************************************************************** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction if thr was -not active. -***NOTE***: This and ..._mysql are the only functions in which such a -transition is allowed to happen! */ -static -void -que_thr_move_to_run_state( -/*======================*/ - que_thr_t* thr) /* in: an query thread */ -{ - trx_t* trx; - - ut_ad(thr->state != QUE_THR_RUNNING); - - trx = thr_get_trx(thr); - - if (!thr->is_active) { - - (thr->graph)->n_active_thrs++; - - trx->n_active_thrs++; - - thr->is_active = TRUE; - - ut_ad((thr->graph)->n_active_thrs == 1); - ut_ad(trx->n_active_thrs == 1); - } - - thr->state = QUE_THR_RUNNING; -} - -/************************************************************************** -Decrements the query thread reference counts in the query graph and the -transaction. May start signal handling, e.g., a rollback. -*** NOTE ***: -This and que_thr_stop_for_mysql are the only functions where the reference -count can be decremented and this function may only be called from inside -que_run_threads or que_thr_check_if_switch! These restrictions exist to make -the rollback code easier to maintain. */ -static -void -que_thr_dec_refer_count( -/*====================*/ - que_thr_t* thr, /* in: query thread */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -{ - que_fork_t* fork; - trx_t* trx; - sess_t* sess; - ulint fork_type; - ibool stopped; - - fork = thr->common.parent; - trx = thr_get_trx(thr); - sess = trx->sess; - - mutex_enter(&kernel_mutex); - - ut_a(thr->is_active); - - if (thr->state == QUE_THR_RUNNING) { - - stopped = que_thr_stop(thr); - - if (!stopped) { - /* The reason for the thr suspension or wait was - already canceled before we came here: continue - running the thread */ - - /* fputs("!!!!!!!! Wait already ended: continue thr\n", - stderr); */ - - if (next_thr && *next_thr == NULL) { - /* Normally srv_suspend_mysql_thread resets - the state to DB_SUCCESS before waiting, but - in this case we have to do it here, - otherwise nobody does it. */ - trx->error_state = DB_SUCCESS; - - *next_thr = thr; - } else { - ut_a(0); - srv_que_task_enqueue_low(thr); - } - - mutex_exit(&kernel_mutex); - - return; - } - } - - ut_ad(fork->n_active_thrs == 1); - ut_ad(trx->n_active_thrs == 1); - - fork->n_active_thrs--; - trx->n_active_thrs--; - - thr->is_active = FALSE; - - if (trx->n_active_thrs > 0) { - - mutex_exit(&kernel_mutex); - - return; - } - - fork_type = fork->fork_type; - - /* Check if all query threads in the same fork are completed */ - - if (que_fork_all_thrs_in_state(fork, QUE_THR_COMPLETED)) { - - if (fork_type == QUE_FORK_ROLLBACK) { - /* This is really the undo graph used in rollback, - no roll_node in this graph */ - - ut_ad(UT_LIST_GET_LEN(trx->signals) > 0); - ut_ad(trx->handling_signals == TRUE); - - trx_finish_rollback_off_kernel(fork, trx, next_thr); - - } else if (fork_type == QUE_FORK_PURGE) { - - /* Do nothing */ - } else if (fork_type == QUE_FORK_RECOVERY) { - - /* Do nothing */ - } else if (fork_type == QUE_FORK_MYSQL_INTERFACE) { - - /* Do nothing */ - } else { - ut_error; /* not used in MySQL */ - } - } - - if (UT_LIST_GET_LEN(trx->signals) > 0 && trx->n_active_thrs == 0) { - - /* If the trx is signaled and its query thread count drops to - zero, then we start processing a signal; from it we may get - a new query thread to run */ - - trx_sig_start_handle(trx, next_thr); - } - - if (trx->handling_signals && UT_LIST_GET_LEN(trx->signals) == 0) { - - trx_end_signal_handling(trx); - } - - mutex_exit(&kernel_mutex); -} - -/************************************************************************** -Stops a query thread if graph or trx is in a state requiring it. The -conditions are tested in the order (1) graph, (2) trx. The kernel mutex has -to be reserved. */ - -ibool -que_thr_stop( -/*=========*/ - /* out: TRUE if stopped */ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - que_t* graph; - ibool ret = TRUE; - - ut_ad(mutex_own(&kernel_mutex)); - - graph = thr->graph; - trx = graph->trx; - - if (graph->state == QUE_FORK_COMMAND_WAIT) { - thr->state = QUE_THR_SUSPENDED; - - } else if (trx->que_state == TRX_QUE_LOCK_WAIT) { - - UT_LIST_ADD_FIRST(trx_thrs, trx->wait_thrs, thr); - thr->state = QUE_THR_LOCK_WAIT; - - } else if (trx->error_state != DB_SUCCESS - && trx->error_state != DB_LOCK_WAIT) { - - /* Error handling built for the MySQL interface */ - thr->state = QUE_THR_COMPLETED; - - } else if (UT_LIST_GET_LEN(trx->signals) > 0 - && graph->fork_type != QUE_FORK_ROLLBACK) { - - thr->state = QUE_THR_SUSPENDED; - } else { - ut_ad(graph->state == QUE_FORK_ACTIVE); - - ret = FALSE; - } - - return(ret); -} - -/************************************************************************** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL. The -query thread is stopped and made inactive, except in the case where -it was put to the lock wait state in lock0lock.c, but the lock has already -been granted or the transaction chosen as a victim in deadlock resolution. */ - -void -que_thr_stop_for_mysql( -/*===================*/ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - - trx = thr_get_trx(thr); - - mutex_enter(&kernel_mutex); - - if (thr->state == QUE_THR_RUNNING) { - - if (trx->error_state != DB_SUCCESS - && trx->error_state != DB_LOCK_WAIT) { - - /* Error handling built for the MySQL interface */ - thr->state = QUE_THR_COMPLETED; - } else { - /* It must have been a lock wait but the lock was - already released, or this transaction was chosen - as a victim in selective deadlock resolution */ - - mutex_exit(&kernel_mutex); - - return; - } - } - - ut_ad(thr->is_active == TRUE); - ut_ad(trx->n_active_thrs == 1); - ut_ad(thr->graph->n_active_thrs == 1); - - thr->is_active = FALSE; - (thr->graph)->n_active_thrs--; - - trx->n_active_thrs--; - - mutex_exit(&kernel_mutex); -} - -/************************************************************************** -Moves a thread from another state to the QUE_THR_RUNNING state. Increments -the n_active_thrs counters of the query graph and transaction if thr was -not active. */ - -void -que_thr_move_to_run_state_for_mysql( -/*================================*/ - que_thr_t* thr, /* in: an query thread */ - trx_t* trx) /* in: transaction */ -{ - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt; magic n %lu\n", - (unsigned long) thr->magic_n); - - mem_analyze_corruption(thr); - - ut_error; - } - - if (!thr->is_active) { - - thr->graph->n_active_thrs++; - - trx->n_active_thrs++; - - thr->is_active = TRUE; - } - - thr->state = QUE_THR_RUNNING; -} - -/************************************************************************** -A patch for MySQL used to 'stop' a dummy query thread used in MySQL -select, when there is no error or lock wait. */ - -void -que_thr_stop_for_mysql_no_error( -/*============================*/ - que_thr_t* thr, /* in: query thread */ - trx_t* trx) /* in: transaction */ -{ - ut_ad(thr->state == QUE_THR_RUNNING); - ut_ad(thr->is_active == TRUE); - ut_ad(trx->n_active_thrs == 1); - ut_ad(thr->graph->n_active_thrs == 1); - - if (thr->magic_n != QUE_THR_MAGIC_N) { - fprintf(stderr, - "que_thr struct appears corrupt; magic n %lu\n", - (unsigned long) thr->magic_n); - - mem_analyze_corruption(thr); - - ut_error; - } - - thr->state = QUE_THR_COMPLETED; - - thr->is_active = FALSE; - (thr->graph)->n_active_thrs--; - - trx->n_active_thrs--; -} - -/******************************************************************** -Get the first containing loop node (e.g. while_node_t or for_node_t) for the -given node, or NULL if the node is not within a loop. */ - -que_node_t* -que_node_get_containing_loop_node( -/*==============================*/ - /* out: containing loop node, or NULL. */ - que_node_t* node) /* in: node */ -{ - ut_ad(node); - - for (;;) { - ulint type; - - node = que_node_get_parent(node); - - if (!node) { - break; - } - - type = que_node_get_type(node); - - if ((type == QUE_NODE_FOR) || (type == QUE_NODE_WHILE)) { - break; - } - } - - return(node); -} - -/************************************************************************** -Prints info of an SQL query graph node. */ - -void -que_node_print_info( -/*================*/ - que_node_t* node) /* in: query graph node */ -{ - ulint type; - const char* str; - - type = que_node_get_type(node); - - if (type == QUE_NODE_SELECT) { - str = "SELECT"; - } else if (type == QUE_NODE_INSERT) { - str = "INSERT"; - } else if (type == QUE_NODE_UPDATE) { - str = "UPDATE"; - } else if (type == QUE_NODE_WHILE) { - str = "WHILE"; - } else if (type == QUE_NODE_ASSIGNMENT) { - str = "ASSIGNMENT"; - } else if (type == QUE_NODE_IF) { - str = "IF"; - } else if (type == QUE_NODE_FETCH) { - str = "FETCH"; - } else if (type == QUE_NODE_OPEN) { - str = "OPEN"; - } else if (type == QUE_NODE_PROC) { - str = "STORED PROCEDURE"; - } else if (type == QUE_NODE_FUNC) { - str = "FUNCTION"; - } else if (type == QUE_NODE_LOCK) { - str = "LOCK"; - } else if (type == QUE_NODE_THR) { - str = "QUERY THREAD"; - } else if (type == QUE_NODE_COMMIT) { - str = "COMMIT"; - } else if (type == QUE_NODE_UNDO) { - str = "UNDO ROW"; - } else if (type == QUE_NODE_PURGE) { - str = "PURGE ROW"; - } else if (type == QUE_NODE_ROLLBACK) { - str = "ROLLBACK"; - } else if (type == QUE_NODE_CREATE_TABLE) { - str = "CREATE TABLE"; - } else if (type == QUE_NODE_CREATE_INDEX) { - str = "CREATE INDEX"; - } else if (type == QUE_NODE_FOR) { - str = "FOR LOOP"; - } else if (type == QUE_NODE_RETURN) { - str = "RETURN"; - } else if (type == QUE_NODE_EXIT) { - str = "EXIT"; - } else { - str = "UNKNOWN NODE TYPE"; - } - - fprintf(stderr, "Node type %lu: %s, address %p\n", - (ulong) type, str, (void*) node); -} - -/************************************************************************** -Performs an execution step on a query thread. */ -UNIV_INLINE -que_thr_t* -que_thr_step( -/*=========*/ - /* out: query thread to run next: it may - differ from the input parameter if, e.g., a - subprocedure call is made */ - que_thr_t* thr) /* in: query thread */ -{ - que_node_t* node; - que_thr_t* old_thr; - trx_t* trx; - ulint type; - - trx = thr_get_trx(thr); - - ut_ad(thr->state == QUE_THR_RUNNING); - ut_a(trx->error_state == DB_SUCCESS); - - thr->resource++; - - node = thr->run_node; - type = que_node_get_type(node); - - old_thr = thr; - -#ifdef UNIV_DEBUG - if (que_trace_on) { - fputs("To execute: ", stderr); - que_node_print_info(node); - } -#endif - if (type & QUE_NODE_CONTROL_STAT) { - if ((thr->prev_node != que_node_get_parent(node)) - && que_node_get_next(thr->prev_node)) { - - /* The control statements, like WHILE, always pass the - control to the next child statement if there is any - child left */ - - thr->run_node = que_node_get_next(thr->prev_node); - - } else if (type == QUE_NODE_IF) { - if_step(thr); - } else if (type == QUE_NODE_FOR) { - for_step(thr); - } else if (type == QUE_NODE_PROC) { - - /* We can access trx->undo_no without reserving - trx->undo_mutex, because there cannot be active query - threads doing updating or inserting at the moment! */ - - if (thr->prev_node == que_node_get_parent(node)) { - trx->last_sql_stat_start.least_undo_no - = trx->undo_no; - } - - proc_step(thr); - } else if (type == QUE_NODE_WHILE) { - while_step(thr); - } else { - ut_error; - } - } else if (type == QUE_NODE_ASSIGNMENT) { - assign_step(thr); - } else if (type == QUE_NODE_SELECT) { - thr = row_sel_step(thr); - } else if (type == QUE_NODE_INSERT) { - thr = row_ins_step(thr); - } else if (type == QUE_NODE_UPDATE) { - thr = row_upd_step(thr); - } else if (type == QUE_NODE_FETCH) { - thr = fetch_step(thr); - } else if (type == QUE_NODE_OPEN) { - thr = open_step(thr); - } else if (type == QUE_NODE_FUNC) { - proc_eval_step(thr); - - } else if (type == QUE_NODE_LOCK) { - - ut_error; - /* - thr = que_lock_step(thr); - */ - } else if (type == QUE_NODE_THR) { - thr = que_thr_node_step(thr); - } else if (type == QUE_NODE_COMMIT) { - thr = trx_commit_step(thr); - } else if (type == QUE_NODE_UNDO) { - thr = row_undo_step(thr); - } else if (type == QUE_NODE_PURGE) { - thr = row_purge_step(thr); - } else if (type == QUE_NODE_RETURN) { - thr = return_step(thr); - } else if (type == QUE_NODE_EXIT) { - thr = exit_step(thr); - } else if (type == QUE_NODE_ROLLBACK) { - thr = trx_rollback_step(thr); - } else if (type == QUE_NODE_CREATE_TABLE) { - thr = dict_create_table_step(thr); - } else if (type == QUE_NODE_CREATE_INDEX) { - thr = dict_create_index_step(thr); - } else if (type == QUE_NODE_ROW_PRINTF) { - thr = row_printf_step(thr); - } else { - ut_error; - } - - if (type == QUE_NODE_EXIT) { - old_thr->prev_node = que_node_get_containing_loop_node(node); - } else { - old_thr->prev_node = node; - } - - if (thr) { - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - } - - return(thr); -} - -/************************************************************************** -Run a query thread until it finishes or encounters e.g. a lock wait. */ -static -void -que_run_threads_low( -/*================*/ - que_thr_t* thr) /* in: query thread */ -{ - que_thr_t* next_thr; - ulint cumul_resource; - ulint loop_count; - - ut_ad(thr->state == QUE_THR_RUNNING); - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - ut_ad(!mutex_own(&kernel_mutex)); - - /* cumul_resource counts how much resources the OS thread (NOT the - query thread) has spent in this function */ - - loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK; - cumul_resource = 0; -loop: - /* Check that there is enough space in the log to accommodate - possible log entries by this query step; if the operation can touch - more than about 4 pages, checks must be made also within the query - step! */ - - log_free_check(); - - /* Perform the actual query step: note that the query thread - may change if, e.g., a subprocedure call is made */ - - /*-------------------------*/ - next_thr = que_thr_step(thr); - /*-------------------------*/ - - ut_a(!next_thr || (thr_get_trx(next_thr)->error_state == DB_SUCCESS)); - - loop_count++; - - if (next_thr != thr) { - ut_a(next_thr == NULL); - - /* This can change next_thr to a non-NULL value if there was - a lock wait that already completed. */ - que_thr_dec_refer_count(thr, &next_thr); - - if (next_thr == NULL) { - - return; - } - - loop_count = QUE_MAX_LOOPS_WITHOUT_CHECK; - - thr = next_thr; - } - - goto loop; -} - -/************************************************************************** -Run a query thread. Handles lock waits. */ -void -que_run_threads( -/*============*/ - que_thr_t* thr) /* in: query thread */ -{ -loop: - ut_a(thr_get_trx(thr)->error_state == DB_SUCCESS); - que_run_threads_low(thr); - - mutex_enter(&kernel_mutex); - - switch (thr->state) { - - case QUE_THR_RUNNING: - /* There probably was a lock wait, but it already ended - before we came here: continue running thr */ - - mutex_exit(&kernel_mutex); - - goto loop; - - case QUE_THR_LOCK_WAIT: - mutex_exit(&kernel_mutex); - - /* The ..._mysql_... function works also for InnoDB's - internal threads. Let us wait that the lock wait ends. */ - - srv_suspend_mysql_thread(thr); - - if (thr_get_trx(thr)->error_state != DB_SUCCESS) { - /* thr was chosen as a deadlock victim or there was - a lock wait timeout */ - - que_thr_dec_refer_count(thr, NULL); - - return; - } - - goto loop; - - case QUE_THR_COMPLETED: - case QUE_THR_COMMAND_WAIT: - /* Do nothing */ - break; - - default: - ut_error; - } - - mutex_exit(&kernel_mutex); -} - -/************************************************************************* -Evaluate the given SQL. */ - -ulint -que_eval_sql( -/*=========*/ - /* out: error code or DB_SUCCESS */ - pars_info_t* info, /* in: info struct, or NULL */ - const char* sql, /* in: SQL string */ - ibool reserve_dict_mutex, - /* in: if TRUE, acquire/release - dict_sys->mutex around call to pars_sql. */ - trx_t* trx) /* in: trx */ -{ - que_thr_t* thr; - que_t* graph; - - ut_a(trx->error_state == DB_SUCCESS); - - if (reserve_dict_mutex) { - mutex_enter(&dict_sys->mutex); - } - - graph = pars_sql(info, sql); - - if (reserve_dict_mutex) { - mutex_exit(&dict_sys->mutex); - } - - ut_a(graph); - - graph->trx = trx; - trx->graph = NULL; - - graph->fork_type = QUE_FORK_MYSQL_INTERFACE; - - ut_a(thr = que_fork_start_command(graph)); - - que_run_threads(thr); - - que_graph_free(graph); - - return(trx->error_state); -} diff --git a/storage/innobase/read/read0read.c b/storage/innobase/read/read0read.c deleted file mode 100644 index 4068cf4fa69..00000000000 --- a/storage/innobase/read/read0read.c +++ /dev/null @@ -1,527 +0,0 @@ -/****************************************************** -Cursor read - -(c) 1997 Innobase Oy - -Created 2/16/1997 Heikki Tuuri -*******************************************************/ - -#include "read0read.h" - -#ifdef UNIV_NONINL -#include "read0read.ic" -#endif - -#include "srv0srv.h" -#include "trx0sys.h" - -/* -------------------------------------------------------------------------------- -FACT A: Cursor read view on a secondary index sees only committed versions -------- -of the records in the secondary index or those versions of rows created -by transaction which created a cursor before cursor was created even -if transaction which created the cursor has changed that clustered index page. - -PROOF: We must show that read goes always to the clustered index record -to see that record is visible in the cursor read view. Consider e.g. -following table and SQL-clauses: - -create table t1(a int not null, b int, primary key(a), index(b)); -insert into t1 values (1,1),(2,2); -commit; - -Now consider that we have a cursor for a query - -select b from t1 where b >= 1; - -This query will use secondary key on the table t1. Now after the first fetch -on this cursor if we do a update: - -update t1 set b = 5 where b = 2; - -Now second fetch of the cursor should not see record (2,5) instead it should -see record (2,2). - -We also should show that if we have delete t1 where b = 5; we still -can see record (2,2). - -When we access a secondary key record maximum transaction id is fetched -from this record and this trx_id is compared to up_limit_id in the view. -If trx_id in the record is greater or equal than up_limit_id in the view -cluster record is accessed. Because trx_id of the creating -transaction is stored when this view was created to the list of -trx_ids not seen by this read view previous version of the -record is requested to be built. This is build using clustered record. -If the secondary key record is delete marked it's corresponding -clustered record can be already be purged only if records -trx_id < low_limit_no. Purge can't remove any record deleted by a -transaction which was active when cursor was created. But, we still -may have a deleted secondary key record but no clustered record. But, -this is not a problem because this case is handled in -row_sel_get_clust_rec() function which is called -whenever we note that this read view does not see trx_id in the -record. Thus, we see correct version. Q. E. D. - -------------------------------------------------------------------------------- -FACT B: Cursor read view on a clustered index sees only committed versions -------- -of the records in the clustered index or those versions of rows created -by transaction which created a cursor before cursor was created even -if transaction which created the cursor has changed that clustered index page. - -PROOF: Consider e.g.following table and SQL-clauses: - -create table t1(a int not null, b int, primary key(a)); -insert into t1 values (1),(2); -commit; - -Now consider that we have a cursor for a query - -select a from t1 where a >= 1; - -This query will use clustered key on the table t1. Now after the first fetch -on this cursor if we do a update: - -update t1 set a = 5 where a = 2; - -Now second fetch of the cursor should not see record (5) instead it should -see record (2). - -We also should show that if we have execute delete t1 where a = 5; after -the cursor is opened we still can see record (2). - -When accessing clustered record we always check if this read view sees -trx_id stored to clustered record. By default we don't see any changes -if record trx_id >= low_limit_id i.e. change was made transaction -which started after transaction which created the cursor. If row -was changed by the future transaction a previous version of the -clustered record is created. Thus we see only committed version in -this case. We see all changes made by committed transactions i.e. -record trx_id < up_limit_id. In this case we don't need to do anything, -we already see correct version of the record. We don't see any changes -made by active transaction except creating transaction. We have stored -trx_id of creating transaction to list of trx_ids when this view was -created. Thus we can easily see if this record was changed by the -creating transaction. Because we already have clustered record we can -access roll_ptr. Using this roll_ptr we can fetch undo record. -We can now check that undo_no of the undo record is less than undo_no of the -trancaction which created a view when cursor was created. We see this -clustered record only in case when record undo_no is less than undo_no -in the view. If this is not true we build based on undo_rec previous -version of the record. This record is found because purge can't remove -records accessed by active transaction. Thus we see correct version. Q. E. D. -------------------------------------------------------------------------------- -FACT C: Purge does not remove any delete marked row that is visible -------- -to cursor view. - -TODO: proof this - -*/ - -/************************************************************************* -Creates a read view object. */ -UNIV_INLINE -read_view_t* -read_view_create_low( -/*=================*/ - /* out, own: read view struct */ - ulint n, /* in: number of cells in the trx_ids array */ - mem_heap_t* heap) /* in: memory heap from which allocated */ -{ - read_view_t* view; - - view = mem_heap_alloc(heap, sizeof(read_view_t)); - - view->n_trx_ids = n; - view->trx_ids = mem_heap_alloc(heap, n * sizeof(dulint)); - - return(view); -} - -/************************************************************************* -Makes a copy of the oldest existing read view, with the exception that also -the creating trx of the oldest view is set as not visible in the 'copied' -view. Opens a new view if no views currently exist. The view must be closed -with ..._close. This is used in purge. */ - -read_view_t* -read_view_oldest_copy_or_open_new( -/*==============================*/ - /* out, own: read view struct */ - dulint cr_trx_id, /* in: trx_id of creating - transaction, or (0, 0) used in purge*/ - mem_heap_t* heap) /* in: memory heap from which - allocated */ -{ - read_view_t* old_view; - read_view_t* view_copy; - ibool needs_insert = TRUE; - ulint insert_done = 0; - ulint n; - ulint i; - - ut_ad(mutex_own(&kernel_mutex)); - - old_view = UT_LIST_GET_LAST(trx_sys->view_list); - - if (old_view == NULL) { - - return(read_view_open_now(cr_trx_id, heap)); - } - - n = old_view->n_trx_ids; - - if (ut_dulint_cmp(old_view->creator_trx_id, - ut_dulint_create(0,0)) != 0) { - n++; - } else { - needs_insert = FALSE; - } - - view_copy = read_view_create_low(n, heap); - - /* Insert the id of the creator in the right place of the descending - array of ids, if needs_insert is TRUE: */ - - i = 0; - while (i < n) { - if (needs_insert - && (i >= old_view->n_trx_ids - || ut_dulint_cmp(old_view->creator_trx_id, - read_view_get_nth_trx_id(old_view, i)) - > 0)) { - - read_view_set_nth_trx_id(view_copy, i, - old_view->creator_trx_id); - needs_insert = FALSE; - insert_done = 1; - } else { - read_view_set_nth_trx_id(view_copy, i, - read_view_get_nth_trx_id( - old_view, - i - insert_done)); - } - - i++; - } - - view_copy->creator_trx_id = cr_trx_id; - - view_copy->low_limit_no = old_view->low_limit_no; - view_copy->low_limit_id = old_view->low_limit_id; - - - if (n > 0) { - /* The last active transaction has the smallest id: */ - view_copy->up_limit_id = read_view_get_nth_trx_id( - view_copy, n - 1); - } else { - view_copy->up_limit_id = old_view->up_limit_id; - } - - UT_LIST_ADD_LAST(view_list, trx_sys->view_list, view_copy); - - return(view_copy); -} - -/************************************************************************* -Opens a read view where exactly the transactions serialized before this -point in time are seen in the view. */ - -read_view_t* -read_view_open_now( -/*===============*/ - /* out, own: read view struct */ - dulint cr_trx_id, /* in: trx_id of creating - transaction, or (0, 0) used in - purge */ - mem_heap_t* heap) /* in: memory heap from which - allocated */ -{ - read_view_t* view; - trx_t* trx; - ulint n; - - ut_ad(mutex_own(&kernel_mutex)); - - view = read_view_create_low(UT_LIST_GET_LEN(trx_sys->trx_list), heap); - - view->creator_trx_id = cr_trx_id; - view->type = VIEW_NORMAL; - view->undo_no = ut_dulint_create(0, 0); - - /* No future transactions should be visible in the view */ - - view->low_limit_no = trx_sys->max_trx_id; - view->low_limit_id = view->low_limit_no; - - n = 0; - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - /* No active transaction should be visible, except cr_trx */ - - while (trx) { - if (ut_dulint_cmp(trx->id, cr_trx_id) != 0 - && (trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED)) { - - read_view_set_nth_trx_id(view, n, trx->id); - - n++; - - /* NOTE that a transaction whose trx number is < - trx_sys->max_trx_id can still be active, if it is - in the middle of its commit! Note that when a - transaction starts, we initialize trx->no to - ut_dulint_max. */ - - if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) { - - view->low_limit_no = trx->no; - } - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - view->n_trx_ids = n; - - if (n > 0) { - /* The last active transaction has the smallest id: */ - view->up_limit_id = read_view_get_nth_trx_id(view, n - 1); - } else { - view->up_limit_id = view->low_limit_id; - } - - - UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view); - - return(view); -} - -/************************************************************************* -Closes a read view. */ - -void -read_view_close( -/*============*/ - read_view_t* view) /* in: read view */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - UT_LIST_REMOVE(view_list, trx_sys->view_list, view); -} - -/************************************************************************* -Closes a consistent read view for MySQL. This function is called at an SQL -statement end if the trx isolation level is <= TRX_ISO_READ_COMMITTED. */ - -void -read_view_close_for_mysql( -/*======================*/ - trx_t* trx) /* in: trx which has a read view */ -{ - ut_a(trx->global_read_view); - - mutex_enter(&kernel_mutex); - - read_view_close(trx->global_read_view); - - mem_heap_empty(trx->global_read_view_heap); - - trx->read_view = NULL; - trx->global_read_view = NULL; - - mutex_exit(&kernel_mutex); -} - -/************************************************************************* -Prints a read view to stderr. */ - -void -read_view_print( -/*============*/ - read_view_t* view) /* in: read view */ -{ - ulint n_ids; - ulint i; - - if (view->type == VIEW_HIGH_GRANULARITY) { - fprintf(stderr, - "High-granularity read view undo_n:o %lu %lu\n", - (ulong) ut_dulint_get_high(view->undo_no), - (ulong) ut_dulint_get_low(view->undo_no)); - } else { - fprintf(stderr, "Normal read view\n"); - } - - fprintf(stderr, "Read view low limit trx n:o %lu %lu\n", - (ulong) ut_dulint_get_high(view->low_limit_no), - (ulong) ut_dulint_get_low(view->low_limit_no)); - - fprintf(stderr, "Read view up limit trx id %lu %lu\n", - (ulong) ut_dulint_get_high(view->up_limit_id), - (ulong) ut_dulint_get_low(view->up_limit_id)); - - fprintf(stderr, "Read view low limit trx id %lu %lu\n", - (ulong) ut_dulint_get_high(view->low_limit_id), - (ulong) ut_dulint_get_low(view->low_limit_id)); - - fprintf(stderr, "Read view individually stored trx ids:\n"); - - n_ids = view->n_trx_ids; - - for (i = 0; i < n_ids; i++) { - fprintf(stderr, "Read view trx id %lu %lu\n", - (ulong) ut_dulint_get_high( - read_view_get_nth_trx_id(view, i)), - (ulong) ut_dulint_get_low( - read_view_get_nth_trx_id(view, i))); - } -} - -/************************************************************************* -Create a high-granularity consistent cursor view for mysql to be used -in cursors. In this consistent read view modifications done by the -creating transaction after the cursor is created or future transactions -are not visible. */ - -cursor_view_t* -read_cursor_view_create_for_mysql( -/*==============================*/ - trx_t* cr_trx) /* in: trx where cursor view is created */ -{ - cursor_view_t* curview; - read_view_t* view; - mem_heap_t* heap; - trx_t* trx; - ulint n; - - ut_a(cr_trx); - - /* Use larger heap than in trx_create when creating a read_view - because cursors are quite long. */ - - heap = mem_heap_create(512); - - curview = (cursor_view_t*) mem_heap_alloc(heap, sizeof(cursor_view_t)); - curview->heap = heap; - - /* Drop cursor tables from consideration when evaluating the need of - auto-commit */ - curview->n_mysql_tables_in_use = cr_trx->n_mysql_tables_in_use; - cr_trx->n_mysql_tables_in_use = 0; - - mutex_enter(&kernel_mutex); - - curview->read_view = read_view_create_low( - UT_LIST_GET_LEN(trx_sys->trx_list), curview->heap); - - view = curview->read_view; - view->creator_trx_id = cr_trx->id; - view->type = VIEW_HIGH_GRANULARITY; - view->undo_no = cr_trx->undo_no; - - /* No future transactions should be visible in the view */ - - view->low_limit_no = trx_sys->max_trx_id; - view->low_limit_id = view->low_limit_no; - - n = 0; - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - /* No active transaction should be visible */ - - while (trx) { - - if (trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED) { - - read_view_set_nth_trx_id(view, n, trx->id); - - n++; - - /* NOTE that a transaction whose trx number is < - trx_sys->max_trx_id can still be active, if it is - in the middle of its commit! Note that when a - transaction starts, we initialize trx->no to - ut_dulint_max. */ - - if (ut_dulint_cmp(view->low_limit_no, trx->no) > 0) { - - view->low_limit_no = trx->no; - } - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - view->n_trx_ids = n; - - if (n > 0) { - /* The last active transaction has the smallest id: */ - view->up_limit_id = read_view_get_nth_trx_id(view, n - 1); - } else { - view->up_limit_id = view->low_limit_id; - } - - UT_LIST_ADD_FIRST(view_list, trx_sys->view_list, view); - - mutex_exit(&kernel_mutex); - - return(curview); -} - -/************************************************************************* -Close a given consistent cursor view for mysql and restore global read view -back to a transaction read view. */ - -void -read_cursor_view_close_for_mysql( -/*=============================*/ - trx_t* trx, /* in: trx */ - cursor_view_t* curview)/* in: cursor view to be closed */ -{ - ut_a(curview); - ut_a(curview->read_view); - ut_a(curview->heap); - - /* Add cursor's tables to the global count of active tables that - belong to this transaction */ - trx->n_mysql_tables_in_use += curview->n_mysql_tables_in_use; - - mutex_enter(&kernel_mutex); - - read_view_close(curview->read_view); - trx->read_view = trx->global_read_view; - - mutex_exit(&kernel_mutex); - - mem_heap_free(curview->heap); -} - -/************************************************************************* -This function sets a given consistent cursor view to a transaction -read view if given consistent cursor view is not NULL. Otherwise, function -restores a global read view to a transaction read view. */ - -void -read_cursor_set_for_mysql( -/*======================*/ - trx_t* trx, /* in: transaction where cursor is set */ - cursor_view_t* curview)/* in: consistent cursor view to be set */ -{ - ut_a(trx); - - mutex_enter(&kernel_mutex); - - if (UNIV_LIKELY(curview != NULL)) { - trx->read_view = curview->read_view; - } else { - trx->read_view = trx->global_read_view; - } - - mutex_exit(&kernel_mutex); -} diff --git a/storage/innobase/rem/rem0cmp.c b/storage/innobase/rem/rem0cmp.c deleted file mode 100644 index ca0ec663548..00000000000 --- a/storage/innobase/rem/rem0cmp.c +++ /dev/null @@ -1,1064 +0,0 @@ -/*********************************************************************** -Comparison services for records - -(c) 1994-1996 Innobase Oy - -Created 7/1/1994 Heikki Tuuri -************************************************************************/ - -#include "rem0cmp.h" - -#ifdef UNIV_NONINL -#include "rem0cmp.ic" -#endif - -#include "srv0srv.h" - -/* ALPHABETICAL ORDER - ================== - -The records are put into alphabetical order in the following -way: let F be the first field where two records disagree. -If there is a character in some position n where the the -records disagree, the order is determined by comparison of -the characters at position n, possibly after -collating transformation. If there is no such character, -but the corresponding fields have different lengths, then -if the data type of the fields is paddable, -shorter field is padded with a padding character. If the -data type is not paddable, longer field is considered greater. -Finally, the SQL null is bigger than any other value. - -At the present, the comparison functions return 0 in the case, -where two records disagree only in the way that one -has more fields than the other. */ - -#ifdef UNIV_DEBUG -/***************************************************************** -Used in debug checking of cmp_dtuple_... . -This function is used to compare a data tuple to a physical record. If -dtuple has n fields then rec must have either m >= n fields, or it must -differ from dtuple in some of the m fields rec has. */ -static -int -cmp_debug_dtuple_rec_with_match( -/*============================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields);/* in/out: number of already - completely matched fields; when function - returns, contains the value for current - comparison */ -#endif /* UNIV_DEBUG */ -#ifndef UNIV_HOTBACKUP -/***************************************************************** -This function is used to compare two data fields for which the data type -is such that we must use MySQL code to compare them. The prototype here -must be a copy of the the one in ha_innobase.cc! */ -extern -int -innobase_mysql_cmp( -/*===============*/ - /* out: 1, 0, -1, if a is greater, - equal, less than b, respectively */ - int mysql_type, /* in: MySQL type */ - uint charset_number, /* in: number of the charset */ - unsigned char* a, /* in: data field */ - unsigned int a_length, /* in: data field length, - not UNIV_SQL_NULL */ - unsigned char* b, /* in: data field */ - unsigned int b_length); /* in: data field length, - not UNIV_SQL_NULL */ -#endif /* !UNIV_HOTBACKUP */ -/************************************************************************* -Transforms the character code so that it is ordered appropriately for the -language. This is only used for the latin1 char set. MySQL does the -comparisons for other char sets. */ -UNIV_INLINE -ulint -cmp_collate( -/*========*/ - /* out: collation order position */ - ulint code) /* in: code of a character stored in database record */ -{ - return((ulint) srv_latin1_ordering[code]); -} - -/***************************************************************** -Returns TRUE if two columns are equal for comparison purposes. */ - -ibool -cmp_cols_are_equal( -/*===============*/ - /* out: TRUE if the columns are - considered equal in comparisons */ - const dict_col_t* col1, /* in: column 1 */ - const dict_col_t* col2, /* in: column 2 */ - ibool check_charsets) - /* in: whether to check charsets */ -{ - if (dtype_is_non_binary_string_type(col1->mtype, col1->prtype) - && dtype_is_non_binary_string_type(col2->mtype, col2->prtype)) { - - /* Both are non-binary string types: they can be compared if - and only if the charset-collation is the same */ - - if (check_charsets) { - return(dtype_get_charset_coll(col1->prtype) - == dtype_get_charset_coll(col2->prtype)); - } else { - return(TRUE); - } - } - - if (dtype_is_binary_string_type(col1->mtype, col1->prtype) - && dtype_is_binary_string_type(col2->mtype, col2->prtype)) { - - /* Both are binary string types: they can be compared */ - - return(TRUE); - } - - if (col1->mtype != col2->mtype) { - - return(FALSE); - } - - if (col1->mtype == DATA_INT - && (col1->prtype & DATA_UNSIGNED) - != (col2->prtype & DATA_UNSIGNED)) { - - /* The storage format of an unsigned integer is different - from a signed integer: in a signed integer we OR - 0x8000... to the value of positive integers. */ - - return(FALSE); - } - - return(col1->mtype != DATA_INT || col1->len == col2->len); -} - -#ifndef UNIV_HOTBACKUP -/***************************************************************** -Innobase uses this function to compare two data fields for which the data type -is such that we must compare whole fields or call MySQL to do the comparison */ -static -int -cmp_whole_field( -/*============*/ - /* out: 1, 0, -1, if a is greater, - equal, less than b, respectively */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - unsigned char* a, /* in: data field */ - unsigned int a_length, /* in: data field length, - not UNIV_SQL_NULL */ - unsigned char* b, /* in: data field */ - unsigned int b_length) /* in: data field length, - not UNIV_SQL_NULL */ -{ - float f_1; - float f_2; - double d_1; - double d_2; - int swap_flag = 1; - - switch (mtype) { - - case DATA_DECIMAL: - /* Remove preceding spaces */ - for (; a_length && *a == ' '; a++, a_length--); - for (; b_length && *b == ' '; b++, b_length--); - - if (*a == '-') { - if (*b != '-') { - return(-1); - } - - a++; b++; - a_length--; - b_length--; - - swap_flag = -1; - - } else if (*b == '-') { - - return(1); - } - - while (a_length > 0 && (*a == '+' || *a == '0')) { - a++; a_length--; - } - - while (b_length > 0 && (*b == '+' || *b == '0')) { - b++; b_length--; - } - - if (a_length != b_length) { - if (a_length < b_length) { - return(-swap_flag); - } - - return(swap_flag); - } - - while (a_length > 0 && *a == *b) { - - a++; b++; a_length--; - } - - if (a_length == 0) { - - return(0); - } - - if (*a > *b) { - return(swap_flag); - } - - return(-swap_flag); - case DATA_DOUBLE: - d_1 = mach_double_read(a); - d_2 = mach_double_read(b); - - if (d_1 > d_2) { - return(1); - } else if (d_2 > d_1) { - return(-1); - } - - return(0); - - case DATA_FLOAT: - f_1 = mach_float_read(a); - f_2 = mach_float_read(b); - - if (f_1 > f_2) { - return(1); - } else if (f_2 > f_1) { - return(-1); - } - - return(0); - case DATA_BLOB: - if (prtype & DATA_BINARY_TYPE) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: comparing a binary BLOB" - " with a character set sensitive\n" - "InnoDB: comparison!\n"); - } - /* fall through */ - case DATA_VARMYSQL: - case DATA_MYSQL: - return(innobase_mysql_cmp( - (int)(prtype & DATA_MYSQL_TYPE_MASK), - (uint)dtype_get_charset_coll(prtype), - a, a_length, b, b_length)); - default: - fprintf(stderr, - "InnoDB: unknown type number %lu\n", - (ulong) mtype); - ut_error; - } - - return(0); -} -#endif /* !UNIV_HOTBACKUP */ - -/***************************************************************** -This function is used to compare two data fields for which we know the -data type. */ - -int -cmp_data_data_slow( -/*===============*/ - /* out: 1, 0, -1, if data1 is greater, equal, - less than data2, respectively */ - ulint mtype, /* in: main type */ - ulint prtype, /* in: precise type */ - byte* data1, /* in: data field (== a pointer to a memory - buffer) */ - ulint len1, /* in: data field length or UNIV_SQL_NULL */ - byte* data2, /* in: data field (== a pointer to a memory - buffer) */ - ulint len2) /* in: data field length or UNIV_SQL_NULL */ -{ -#ifndef UNIV_HOTBACKUP - ulint data1_byte; - ulint data2_byte; - ulint cur_bytes; - - if (len1 == UNIV_SQL_NULL || len2 == UNIV_SQL_NULL) { - - if (len1 == len2) { - - return(0); - } - - if (len1 == UNIV_SQL_NULL) { - /* We define the SQL null to be the smallest possible - value of a field in the alphabetical order */ - - return(-1); - } - - return(1); - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - return(cmp_whole_field(mtype, prtype, - data1, (unsigned) len1, - data2, (unsigned) len2)); - } - - /* Compare then the fields */ - - cur_bytes = 0; - - for (;;) { - if (len1 <= cur_bytes) { - if (len2 <= cur_bytes) { - - return(0); - } - - data1_byte = dtype_get_pad_char(mtype, prtype); - - if (data1_byte == ULINT_UNDEFINED) { - - return(-1); - } - } else { - data1_byte = *data1; - } - - if (len2 <= cur_bytes) { - data2_byte = dtype_get_pad_char(mtype, prtype); - - if (data2_byte == ULINT_UNDEFINED) { - - return(1); - } - } else { - data2_byte = *data2; - } - - if (data1_byte == data2_byte) { - /* If the bytes are equal, they will remain such even - after the collation transformation below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE))) { - - data1_byte = cmp_collate(data1_byte); - data2_byte = cmp_collate(data2_byte); - } - - if (data1_byte > data2_byte) { - - return(1); - } else if (data1_byte < data2_byte) { - - return(-1); - } -next_byte: - /* Next byte */ - cur_bytes++; - data1++; - data2++; - } -#else /* !UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* !UNIV_HOTBACKUP */ - - return(0); /* Not reached */ -} - -/***************************************************************** -This function is used to compare a data tuple to a physical record. -Only dtuple->n_fields_cmp first fields are taken into account for -the the data tuple! If we denote by n = n_fields_cmp, then rec must -have either m >= n fields, or it must differ from dtuple in some of -the m fields rec has. If rec has an externally stored field we do not -compare it but return with value 0 if such a comparison should be -made. */ - -int -cmp_dtuple_rec_with_match( -/*======================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared, or - until the first externally stored field in - rec */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when function returns, - contains the value for current comparison */ - ulint* matched_bytes) /* in/out: number of already matched - bytes within the first field not completely - matched; when function returns, contains the - value for current comparison */ -{ -#ifndef UNIV_HOTBACKUP - dfield_t* dtuple_field; /* current field in logical record */ - ulint dtuple_f_len; /* the length of the current field - in the logical record */ - byte* dtuple_b_ptr; /* pointer to the current byte in - logical field data */ - ulint dtuple_byte; /* value of current byte to be compared - in dtuple*/ - ulint rec_f_len; /* length of current field in rec */ - byte* rec_b_ptr; /* pointer to the current byte in - rec field */ - ulint rec_byte; /* value of current byte to be - compared in rec */ - ulint cur_field; /* current field number */ - ulint cur_bytes; /* number of already matched bytes - in current field */ - int ret = 3333; /* return value */ - - ut_ad(dtuple && rec && matched_fields && matched_bytes); - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - cur_field = *matched_fields; - cur_bytes = *matched_bytes; - - ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(cur_field <= rec_offs_n_fields(offsets)); - - if (cur_bytes == 0 && cur_field == 0) { - ulint rec_info = rec_get_info_bits(rec, - rec_offs_comp(offsets)); - ulint tup_info = dtuple_get_info_bits(dtuple); - - if (rec_info & REC_INFO_MIN_REC_FLAG) { - ret = !(tup_info & REC_INFO_MIN_REC_FLAG); - goto order_resolved; - } else if (tup_info & REC_INFO_MIN_REC_FLAG) { - ret = -1; - goto order_resolved; - } - } - - /* Match fields in a loop; stop if we run out of fields in dtuple - or find an externally stored field */ - - while (cur_field < dtuple_get_n_fields_cmp(dtuple)) { - - ulint mtype; - ulint prtype; - - dtuple_field = dtuple_get_nth_field(dtuple, cur_field); - { - const dtype_t* type - = dfield_get_type(dtuple_field); - - mtype = type->mtype; - prtype = type->prtype; - } - - dtuple_f_len = dfield_get_len(dtuple_field); - - rec_b_ptr = rec_get_nth_field(rec, offsets, - cur_field, &rec_f_len); - - /* If we have matched yet 0 bytes, it may be that one or - both the fields are SQL null, or the record or dtuple may be - the predefined minimum record, or the field is externally - stored */ - - if (UNIV_LIKELY(cur_bytes == 0)) { - if (rec_offs_nth_extern(offsets, cur_field)) { - /* We do not compare to an externally - stored field */ - - ret = 0; - - goto order_resolved; - } - - if (dtuple_f_len == UNIV_SQL_NULL) { - if (rec_f_len == UNIV_SQL_NULL) { - - goto next_field; - } - - ret = -1; - goto order_resolved; - } else if (rec_f_len == UNIV_SQL_NULL) { - /* We define the SQL null to be the - smallest possible value of a field - in the alphabetical order */ - - ret = 1; - goto order_resolved; - } - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - ret = cmp_whole_field(mtype, prtype, - dfield_get_data(dtuple_field), - (unsigned) dtuple_f_len, - rec_b_ptr, (unsigned) rec_f_len); - - if (ret != 0) { - cur_bytes = 0; - - goto order_resolved; - } else { - goto next_field; - } - } - - /* Set the pointers at the current byte */ - - rec_b_ptr = rec_b_ptr + cur_bytes; - dtuple_b_ptr = (byte*)dfield_get_data(dtuple_field) - + cur_bytes; - /* Compare then the fields */ - - for (;;) { - if (UNIV_UNLIKELY(rec_f_len <= cur_bytes)) { - if (dtuple_f_len <= cur_bytes) { - - goto next_field; - } - - rec_byte = dtype_get_pad_char(mtype, prtype); - - if (rec_byte == ULINT_UNDEFINED) { - ret = 1; - - goto order_resolved; - } - } else { - rec_byte = *rec_b_ptr; - } - - if (UNIV_UNLIKELY(dtuple_f_len <= cur_bytes)) { - dtuple_byte = dtype_get_pad_char(mtype, - prtype); - - if (dtuple_byte == ULINT_UNDEFINED) { - ret = -1; - - goto order_resolved; - } - } else { - dtuple_byte = *dtuple_b_ptr; - } - - if (dtuple_byte == rec_byte) { - /* If the bytes are equal, they will - remain such even after the collation - transformation below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && !(prtype & DATA_BINARY_TYPE))) { - - rec_byte = cmp_collate(rec_byte); - dtuple_byte = cmp_collate(dtuple_byte); - } - - ret = (int) (dtuple_byte - rec_byte); - if (UNIV_UNLIKELY(ret)) { - if (ret < 0) { - ret = -1; - goto order_resolved; - } else { - ret = 1; - goto order_resolved; - } - } -next_byte: - /* Next byte */ - cur_bytes++; - rec_b_ptr++; - dtuple_b_ptr++; - } - -next_field: - cur_field++; - cur_bytes = 0; - } - - ut_ad(cur_bytes == 0); - - ret = 0; /* If we ran out of fields, dtuple was equal to rec - up to the common fields */ -order_resolved: - ut_ad((ret >= - 1) && (ret <= 1)); - ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets, - matched_fields)); - ut_ad(*matched_fields == cur_field); /* In the debug version, the - above cmp_debug_... sets - *matched_fields to a value */ - *matched_fields = cur_field; - *matched_bytes = cur_bytes; - - return(ret); -#else /* !UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(0); -#endif /* !UNIV_HOTBACKUP */ -} - -/****************************************************************** -Compares a data tuple to a physical record. */ - -int -cmp_dtuple_rec( -/*===========*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively; see the comments - for cmp_dtuple_rec_with_match */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, - &matched_fields, &matched_bytes)); -} - -/****************************************************************** -Checks if a dtuple is a prefix of a record. The last field in dtuple -is allowed to be a prefix of the corresponding field in the record. */ - -ibool -cmp_dtuple_is_prefix_of_rec( -/*========================*/ - /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - ulint n_fields; - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - n_fields = dtuple_get_n_fields(dtuple); - - if (n_fields > rec_offs_n_fields(offsets)) { - - return(FALSE); - } - - cmp_dtuple_rec_with_match(dtuple, rec, offsets, - &matched_fields, &matched_bytes); - if (matched_fields == n_fields) { - - return(TRUE); - } - - if (matched_fields == n_fields - 1 - && matched_bytes == dfield_get_len( - dtuple_get_nth_field(dtuple, n_fields - 1))) { - return(TRUE); - } - - return(FALSE); -} - -/***************************************************************** -This function is used to compare two physical records. Only the common -first fields are compared, and if an externally stored field is -encountered, then 0 is returned. */ - -int -cmp_rec_rec_with_match( -/*===================*/ - /* out: 1, 0 , -1 if rec1 is greater, equal, - less, respectively, than rec2; only the common - first fields are compared */ - rec_t* rec1, /* in: physical record */ - rec_t* rec2, /* in: physical record */ - const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ - const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ - dict_index_t* index, /* in: data dictionary index */ - ulint* matched_fields, /* in/out: number of already completely - matched fields; when the function returns, - contains the value the for current - comparison */ - ulint* matched_bytes) /* in/out: number of already matched - bytes within the first field not completely - matched; when the function returns, contains - the value for the current comparison */ -{ -#ifndef UNIV_HOTBACKUP - ulint rec1_n_fields; /* the number of fields in rec */ - ulint rec1_f_len; /* length of current field in rec */ - byte* rec1_b_ptr; /* pointer to the current byte in rec field */ - ulint rec1_byte; /* value of current byte to be compared in - rec */ - ulint rec2_n_fields; /* the number of fields in rec */ - ulint rec2_f_len; /* length of current field in rec */ - byte* rec2_b_ptr; /* pointer to the current byte in rec field */ - ulint rec2_byte; /* value of current byte to be compared in - rec */ - ulint cur_field; /* current field number */ - ulint cur_bytes; /* number of already matched bytes in current - field */ - int ret = 3333; /* return value */ - ulint comp; - - ut_ad(rec1 && rec2 && index); - ut_ad(rec_offs_validate(rec1, index, offsets1)); - ut_ad(rec_offs_validate(rec2, index, offsets2)); - ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); - - comp = rec_offs_comp(offsets1); - rec1_n_fields = rec_offs_n_fields(offsets1); - rec2_n_fields = rec_offs_n_fields(offsets2); - - cur_field = *matched_fields; - cur_bytes = *matched_bytes; - - /* Match fields in a loop */ - - while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) { - - ulint mtype; - ulint prtype; - - if (UNIV_UNLIKELY(index->type & DICT_UNIVERSAL)) { - /* This is for the insert buffer B-tree. */ - mtype = DATA_BINARY; - prtype = 0; - } else { - const dict_col_t* col - = dict_index_get_nth_col(index, cur_field); - - mtype = col->mtype; - prtype = col->prtype; - } - - rec1_b_ptr = rec_get_nth_field(rec1, offsets1, - cur_field, &rec1_f_len); - rec2_b_ptr = rec_get_nth_field(rec2, offsets2, - cur_field, &rec2_f_len); - - if (cur_bytes == 0) { - if (cur_field == 0) { - /* Test if rec is the predefined minimum - record */ - if (rec_get_info_bits(rec1, comp) - & REC_INFO_MIN_REC_FLAG) { - - if (rec_get_info_bits(rec2, comp) - & REC_INFO_MIN_REC_FLAG) { - ret = 0; - } else { - ret = -1; - } - - goto order_resolved; - - } else if (rec_get_info_bits(rec2, comp) - & REC_INFO_MIN_REC_FLAG) { - - ret = 1; - - goto order_resolved; - } - } - - if (rec_offs_nth_extern(offsets1, cur_field) - || rec_offs_nth_extern(offsets2, cur_field)) { - /* We do not compare to an externally - stored field */ - - ret = 0; - - goto order_resolved; - } - - if (rec1_f_len == UNIV_SQL_NULL - || rec2_f_len == UNIV_SQL_NULL) { - - if (rec1_f_len == rec2_f_len) { - - goto next_field; - - } else if (rec2_f_len == UNIV_SQL_NULL) { - - /* We define the SQL null to be the - smallest possible value of a field - in the alphabetical order */ - - ret = 1; - } else { - ret = -1; - } - - goto order_resolved; - } - } - - if (mtype >= DATA_FLOAT - || (mtype == DATA_BLOB - && 0 == (prtype & DATA_BINARY_TYPE) - && dtype_get_charset_coll(prtype) - != DATA_MYSQL_LATIN1_SWEDISH_CHARSET_COLL)) { - - ret = cmp_whole_field(mtype, prtype, - rec1_b_ptr, - (unsigned) rec1_f_len, - rec2_b_ptr, - (unsigned) rec2_f_len); - if (ret != 0) { - cur_bytes = 0; - - goto order_resolved; - } else { - goto next_field; - } - } - - /* Set the pointers at the current byte */ - rec1_b_ptr = rec1_b_ptr + cur_bytes; - rec2_b_ptr = rec2_b_ptr + cur_bytes; - - /* Compare then the fields */ - for (;;) { - if (rec2_f_len <= cur_bytes) { - - if (rec1_f_len <= cur_bytes) { - - goto next_field; - } - - rec2_byte = dtype_get_pad_char(mtype, prtype); - - if (rec2_byte == ULINT_UNDEFINED) { - ret = 1; - - goto order_resolved; - } - } else { - rec2_byte = *rec2_b_ptr; - } - - if (rec1_f_len <= cur_bytes) { - rec1_byte = dtype_get_pad_char(mtype, prtype); - - if (rec1_byte == ULINT_UNDEFINED) { - ret = -1; - - goto order_resolved; - } - } else { - rec1_byte = *rec1_b_ptr; - } - - if (rec1_byte == rec2_byte) { - /* If the bytes are equal, they will remain - such even after the collation transformation - below */ - - goto next_byte; - } - - if (mtype <= DATA_CHAR - || (mtype == DATA_BLOB - && !(prtype & DATA_BINARY_TYPE))) { - - rec1_byte = cmp_collate(rec1_byte); - rec2_byte = cmp_collate(rec2_byte); - } - - if (rec1_byte < rec2_byte) { - ret = -1; - goto order_resolved; - } else if (rec1_byte > rec2_byte) { - ret = 1; - goto order_resolved; - } -next_byte: - /* Next byte */ - - cur_bytes++; - rec1_b_ptr++; - rec2_b_ptr++; - } - -next_field: - cur_field++; - cur_bytes = 0; - } - - ut_ad(cur_bytes == 0); - - ret = 0; /* If we ran out of fields, rec1 was equal to rec2 up - to the common fields */ -order_resolved: - - ut_ad((ret >= - 1) && (ret <= 1)); - - *matched_fields = cur_field; - *matched_bytes = cur_bytes; - - return(ret); -#else /* !UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(0); -#endif /* !UNIV_HOTBACKUP */ -} - -#ifdef UNIV_DEBUG -/***************************************************************** -Used in debug checking of cmp_dtuple_... . -This function is used to compare a data tuple to a physical record. If -dtuple has n fields then rec must have either m >= n fields, or it must -differ from dtuple in some of the m fields rec has. If encounters an -externally stored field, returns 0. */ -static -int -cmp_debug_dtuple_rec_with_match( -/*============================*/ - /* out: 1, 0, -1, if dtuple is greater, equal, - less than rec, respectively, when only the - common first fields are compared */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record which differs from - dtuple in some of the common fields, or which - has an equal number or more fields than - dtuple */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint* matched_fields) /* in/out: number of already - completely matched fields; when function - returns, contains the value for current - comparison */ -{ - dfield_t* dtuple_field; /* current field in logical record */ - ulint dtuple_f_len; /* the length of the current field - in the logical record */ - byte* dtuple_f_data; /* pointer to the current logical - field data */ - ulint rec_f_len; /* length of current field in rec */ - byte* rec_f_data; /* pointer to the current rec field */ - int ret = 3333; /* return value */ - ulint cur_field; /* current field number */ - - ut_ad(dtuple && rec && matched_fields); - ut_ad(dtuple_check_typed(dtuple)); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(*matched_fields <= rec_offs_n_fields(offsets)); - - cur_field = *matched_fields; - - if (cur_field == 0) { - if (rec_get_info_bits(rec, rec_offs_comp(offsets)) - & REC_INFO_MIN_REC_FLAG) { - - ret = !(dtuple_get_info_bits(dtuple) - & REC_INFO_MIN_REC_FLAG); - - goto order_resolved; - } - - if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) { - ret = -1; - - goto order_resolved; - } - } - - /* Match fields in a loop; stop if we run out of fields in dtuple */ - - while (cur_field < dtuple_get_n_fields_cmp(dtuple)) { - - ulint mtype; - ulint prtype; - - dtuple_field = dtuple_get_nth_field(dtuple, cur_field); - { - const dtype_t* type - = dfield_get_type(dtuple_field); - - mtype = type->mtype; - prtype = type->prtype; - } - - dtuple_f_data = dfield_get_data(dtuple_field); - dtuple_f_len = dfield_get_len(dtuple_field); - - rec_f_data = rec_get_nth_field(rec, offsets, - cur_field, &rec_f_len); - - if (rec_offs_nth_extern(offsets, cur_field)) { - /* We do not compare to an externally stored field */ - - ret = 0; - - goto order_resolved; - } - - ret = cmp_data_data(mtype, prtype, dtuple_f_data, dtuple_f_len, - rec_f_data, rec_f_len); - if (ret != 0) { - goto order_resolved; - } - - cur_field++; - } - - ret = 0; /* If we ran out of fields, dtuple was equal to rec - up to the common fields */ -order_resolved: - ut_ad((ret >= - 1) && (ret <= 1)); - - *matched_fields = cur_field; - - return(ret); -} -#endif /* UNIV_DEBUG */ diff --git a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c deleted file mode 100644 index 64f8e2d319c..00000000000 --- a/storage/innobase/rem/rem0rec.c +++ /dev/null @@ -1,1515 +0,0 @@ -/************************************************************************ -Record manager - -(c) 1994-2001 Innobase Oy - -Created 5/30/1994 Heikki Tuuri -*************************************************************************/ - -#include "rem0rec.h" - -#ifdef UNIV_NONINL -#include "rem0rec.ic" -#endif - -#include "mtr0mtr.h" -#include "mtr0log.h" - -/* PHYSICAL RECORD (OLD STYLE) - =========================== - -The physical record, which is the data type of all the records -found in index pages of the database, has the following format -(lower addresses and more significant bits inside a byte are below -represented on a higher text line): - -| offset of the end of the last field of data, the most significant - bit is set to 1 if and only if the field is SQL-null, - if the offset is 2-byte, then the second most significant - bit is set to 1 if the field is stored on another page: - mostly this will occur in the case of big BLOB fields | -... -| offset of the end of the first field of data + the SQL-null bit | -| 4 bits used to delete mark a record, and mark a predefined - minimum record in alphabetical order | -| 4 bits giving the number of records owned by this record - (this term is explained in page0page.h) | -| 13 bits giving the order number of this record in the - heap of the index page | -| 10 bits giving the number of fields in this record | -| 1 bit which is set to 1 if the offsets above are given in - one byte format, 0 if in two byte format | -| two bytes giving an absolute pointer to the next record in the page | -ORIGIN of the record -| first field of data | -... -| last field of data | - -The origin of the record is the start address of the first field -of data. The offsets are given relative to the origin. -The offsets of the data fields are stored in an inverted -order because then the offset of the first fields are near the -origin, giving maybe a better processor cache hit rate in searches. - -The offsets of the data fields are given as one-byte -(if there are less than 127 bytes of data in the record) -or two-byte unsigned integers. The most significant bit -is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. */ - -/* PHYSICAL RECORD (NEW STYLE) - =========================== - -The physical record, which is the data type of all the records -found in index pages of the database, has the following format -(lower addresses and more significant bits inside a byte are below -represented on a higher text line): - -| length of the last non-null variable-length field of data: - if the maximum length is 255, one byte; otherwise, - 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes, - length=128..16383, extern storage flag) | -... -| length of first variable-length field of data | -| SQL-null flags (1 bit per nullable field), padded to full bytes | -| 4 bits used to delete mark a record, and mark a predefined - minimum record in alphabetical order | -| 4 bits giving the number of records owned by this record - (this term is explained in page0page.h) | -| 13 bits giving the order number of this record in the - heap of the index page | -| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree), - 010=infimum, 011=supremum, 1xx=reserved | -| two bytes giving a relative pointer to the next record in the page | -ORIGIN of the record -| first field of data | -... -| last field of data | - -The origin of the record is the start address of the first field -of data. The offsets are given relative to the origin. -The offsets of the data fields are stored in an inverted -order because then the offset of the first fields are near the -origin, giving maybe a better processor cache hit rate in searches. - -The offsets of the data fields are given as one-byte -(if there are less than 127 bytes of data in the record) -or two-byte unsigned integers. The most significant bit -is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. */ - -/* CANONICAL COORDINATES. A record can be seen as a single -string of 'characters' in the following way: catenate the bytes -in each field, in the order of fields. An SQL-null field -is taken to be an empty sequence of bytes. Then after -the position of each field insert in the string -the 'character' <FIELD-END>, except that after an SQL-null field -insert <NULL-FIELD-END>. Now the ordinal position of each -byte in this canonical string is its canonical coordinate. -So, for the record ("AA", SQL-NULL, "BB", ""), the canonical -string is "AA<FIELD_END><NULL-FIELD-END>BB<FIELD-END><FIELD-END>". -We identify prefixes (= initial segments) of a record -with prefixes of the canonical string. The canonical -length of the prefix is the length of the corresponding -prefix of the canonical string. The canonical length of -a record is the length of its canonical string. - -For example, the maximal common prefix of records -("AA", SQL-NULL, "BB", "C") and ("AA", SQL-NULL, "B", "C") -is "AA<FIELD-END><NULL-FIELD-END>B", and its canonical -length is 5. - -A complete-field prefix of a record is a prefix which ends at the -end of some field (containing also <FIELD-END>). -A record is a complete-field prefix of another record, if -the corresponding canonical strings have the same property. */ - -ulint rec_dummy; /* this is used to fool compiler in - rec_validate */ - -/******************************************************************* -Validates the consistency of an old-style physical record. */ -static -ibool -rec_validate_old( -/*=============*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: physical record */ - -/********************************************************** -The following function determines the offsets to each field in the -record. The offsets are written to a previously allocated array of -ulint, where rec_offs_n_fields(offsets) has been initialized to the -number of fields in the record. The rest of the array will be -initialized by this function. rec_offs_base(offsets)[0] will be set -to the extra size (if REC_OFFS_COMPACT is set, the record is in the -new format), and rec_offs_base(offsets)[1..n_fields] will be set to -offsets past the end of fields 0..n_fields, or to the beginning of -fields 1..n_fields+1. When the high-order bit of the offset at [i+1] -is set (REC_OFFS_SQL_NULL), the field i is NULL. When the second -high-order bit of the offset at [i+1] is set (REC_OFFS_EXTERNAL), the -field i is being stored externally. */ -static -void -rec_init_offsets( -/*=============*/ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint* offsets)/* in/out: array of offsets; - in: n=rec_offs_n_fields(offsets) */ -{ - ulint i = 0; - ulint offs; - - rec_offs_make_valid(rec, index, offsets); - - if (dict_table_is_comp(index->table)) { - const byte* nulls; - const byte* lens; - dict_field_t* field; - ulint null_mask; - ulint status = rec_get_status(rec); - ulint n_node_ptr_field = ULINT_UNDEFINED; - - switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* the field is 8 bytes long */ - rec_offs_base(offsets)[0] - = REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT; - rec_offs_base(offsets)[1] = 8; - return; - case REC_STATUS_NODE_PTR: - n_node_ptr_field - = dict_index_get_n_unique_in_tree(index); - break; - case REC_STATUS_ORDINARY: - break; - } - - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - offs = 0; - null_mask = 1; - - /* read the lengths of fields 0..n */ - do { - ulint len; - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - len = offs += 4; - goto resolved; - } - - field = dict_index_get_nth_field(index, i); - if (!(dict_field_get_col(field)->prtype - & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* No length is stored for NULL fields. - We do not advance offs, and we set - the length to zero and enable the - SQL NULL flag in offsets[]. */ - len = offs | REC_OFFS_SQL_NULL; - goto resolved; - } - null_mask <<= 1; - } - - if (UNIV_UNLIKELY(!field->fixed_len)) { - /* Variable-length field: read the length */ - const dict_col_t* col - = dict_field_get_col(field); - len = *lens--; - if (UNIV_UNLIKELY(col->len > 255) - || UNIV_UNLIKELY(col->mtype - == DATA_BLOB)) { - if (len & 0x80) { - /* 1exxxxxxx xxxxxxxx */ - len <<= 8; - len |= *lens--; - - offs += len & 0x3fff; - if (UNIV_UNLIKELY(len - & 0x4000)) { - len = offs - | REC_OFFS_EXTERNAL; - } else { - len = offs; - } - - goto resolved; - } - } - - len = offs += len; - } else { - len = offs += field->fixed_len; - } -resolved: - rec_offs_base(offsets)[i + 1] = len; - } while (++i < rec_offs_n_fields(offsets)); - - *rec_offs_base(offsets) - = (rec - (lens + 1)) | REC_OFFS_COMPACT; - } else { - /* Old-style record: determine extra size and end offsets */ - offs = REC_N_OLD_EXTRA_BYTES; - if (rec_get_1byte_offs_flag(rec)) { - offs += rec_offs_n_fields(offsets); - *rec_offs_base(offsets) = offs; - /* Determine offsets to fields */ - do { - offs = rec_1_get_field_end_info(rec, i); - if (offs & REC_1BYTE_SQL_NULL_MASK) { - offs &= ~REC_1BYTE_SQL_NULL_MASK; - offs |= REC_OFFS_SQL_NULL; - } - rec_offs_base(offsets)[1 + i] = offs; - } while (++i < rec_offs_n_fields(offsets)); - } else { - offs += 2 * rec_offs_n_fields(offsets); - *rec_offs_base(offsets) = offs; - /* Determine offsets to fields */ - do { - offs = rec_2_get_field_end_info(rec, i); - if (offs & REC_2BYTE_SQL_NULL_MASK) { - offs &= ~REC_2BYTE_SQL_NULL_MASK; - offs |= REC_OFFS_SQL_NULL; - } - if (offs & REC_2BYTE_EXTERN_MASK) { - offs &= ~REC_2BYTE_EXTERN_MASK; - offs |= REC_OFFS_EXTERNAL; - } - rec_offs_base(offsets)[1 + i] = offs; - } while (++i < rec_offs_n_fields(offsets)); - } - } -} - -/********************************************************** -The following function determines the offsets to each field -in the record. It can reuse a previously returned array. */ - -ulint* -rec_get_offsets_func( -/*=================*/ - /* out: the new offsets */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint* offsets,/* in/out: array consisting of offsets[0] - allocated elements, or an array from - rec_get_offsets(), or NULL */ - ulint n_fields,/* in: maximum number of initialized fields - (ULINT_UNDEFINED if all fields) */ - mem_heap_t** heap, /* in/out: memory heap */ - const char* file, /* in: file name where called */ - ulint line) /* in: line number where called */ -{ - ulint n; - ulint size; - - ut_ad(rec); - ut_ad(index); - ut_ad(heap); - - if (dict_table_is_comp(index->table)) { - switch (UNIV_EXPECT(rec_get_status(rec), - REC_STATUS_ORDINARY)) { - case REC_STATUS_ORDINARY: - n = dict_index_get_n_fields(index); - break; - case REC_STATUS_NODE_PTR: - n = dict_index_get_n_unique_in_tree(index) + 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record */ - n = 1; - break; - default: - ut_error; - return(NULL); - } - } else { - n = rec_get_n_fields_old(rec); - } - - if (UNIV_UNLIKELY(n_fields < n)) { - n = n_fields; - } - - size = n + (1 + REC_OFFS_HEADER_SIZE); - - if (UNIV_UNLIKELY(!offsets) - || UNIV_UNLIKELY(rec_offs_get_n_alloc(offsets) < size)) { - if (!*heap) { - *heap = mem_heap_create_func(size * sizeof(ulint), - NULL, MEM_HEAP_DYNAMIC, - file, line); - } - offsets = mem_heap_alloc(*heap, size * sizeof(ulint)); - rec_offs_set_n_alloc(offsets, size); - } - - rec_offs_set_n_fields(offsets, n); - rec_init_offsets(rec, index, offsets); - return(offsets); -} - -/**************************************************************** -The following function is used to get a pointer to the nth -data field in an old-style record. */ - -byte* -rec_get_nth_field_old( -/*==================*/ - /* out: pointer to the field */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL - null */ -{ - ulint os; - ulint next_os; - - ut_ad(rec && len); - ut_ad(n < rec_get_n_fields_old(rec)); - - if (n > REC_MAX_N_FIELDS) { - fprintf(stderr, "Error: trying to access field %lu in rec\n", - (ulong) n); - ut_error; - } - - if (rec == NULL) { - fputs("Error: rec is NULL pointer\n", stderr); - ut_error; - } - - if (rec_get_1byte_offs_flag(rec)) { - os = rec_1_get_field_start_offs(rec, n); - - next_os = rec_1_get_field_end_info(rec, n); - - if (next_os & REC_1BYTE_SQL_NULL_MASK) { - *len = UNIV_SQL_NULL; - - return(rec + os); - } - - next_os = next_os & ~REC_1BYTE_SQL_NULL_MASK; - } else { - os = rec_2_get_field_start_offs(rec, n); - - next_os = rec_2_get_field_end_info(rec, n); - - if (next_os & REC_2BYTE_SQL_NULL_MASK) { - *len = UNIV_SQL_NULL; - - return(rec + os); - } - - next_os = next_os & ~(REC_2BYTE_SQL_NULL_MASK - | REC_2BYTE_EXTERN_MASK); - } - - *len = next_os - os; - - ut_ad(*len < UNIV_PAGE_SIZE); - - return(rec + os); -} - -/************************************************************** -The following function returns the size of a data tuple when converted to -a new-style physical record. */ - -ulint -rec_get_converted_size_new( -/*=======================*/ - /* out: size */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple) /* in: data tuple */ -{ - ulint size = REC_N_NEW_EXTRA_BYTES - + UT_BITS_IN_BYTES(index->n_nullable); - ulint i; - ulint n_fields; - ut_ad(index && dtuple); - ut_ad(dict_table_is_comp(index->table)); - - switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) { - case REC_STATUS_ORDINARY: - n_fields = dict_index_get_n_fields(index); - ut_ad(n_fields == dtuple_get_n_fields(dtuple)); - break; - case REC_STATUS_NODE_PTR: - n_fields = dict_index_get_n_unique_in_tree(index); - ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple)); - ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4); - size += 4; /* child page number */ - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record, 8 data bytes */ - return(REC_N_NEW_EXTRA_BYTES + 8); - default: - ut_error; - return(ULINT_UNDEFINED); - } - - /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - dict_field_t* field; - ulint len; - const dict_col_t* col; - - field = dict_index_get_nth_field(index, i); - len = dtuple_get_nth_field(dtuple, i)->len; - col = dict_field_get_col(field); - - ut_ad(dict_col_type_assert_equal( - col, dfield_get_type(dtuple_get_nth_field( - dtuple, i)))); - - if (len == UNIV_SQL_NULL) { - /* No length is stored for NULL fields. */ - ut_ad(!(col->prtype & DATA_NOT_NULL)); - continue; - } - - ut_ad(len <= col->len || col->mtype == DATA_BLOB); - - if (field->fixed_len) { - ut_ad(len == field->fixed_len); - /* dict_index_add_col() should guarantee this */ - ut_ad(!field->prefix_len - || field->fixed_len == field->prefix_len); - } else if (len < 128 - || (col->len < 256 && col->mtype != DATA_BLOB)) { - size++; - } else { - /* For variable-length columns, we look up the - maximum length from the column itself. If this - is a prefix index column shorter than 256 bytes, - this will waste one byte. */ - size += 2; - } - size += len; - } - - return(size); -} - -/*************************************************************** -Sets the value of the ith field SQL null bit of an old-style record. */ - -void -rec_set_nth_field_null_bit( -/*=======================*/ - rec_t* rec, /* in: record */ - ulint i, /* in: ith field */ - ibool val) /* in: value to set */ -{ - ulint info; - - if (rec_get_1byte_offs_flag(rec)) { - - info = rec_1_get_field_end_info(rec, i); - - if (val) { - info = info | REC_1BYTE_SQL_NULL_MASK; - } else { - info = info & ~REC_1BYTE_SQL_NULL_MASK; - } - - rec_1_set_field_end_info(rec, i, info); - - return; - } - - info = rec_2_get_field_end_info(rec, i); - - if (val) { - info = info | REC_2BYTE_SQL_NULL_MASK; - } else { - info = info & ~REC_2BYTE_SQL_NULL_MASK; - } - - rec_2_set_field_end_info(rec, i, info); -} - -/*************************************************************** -Sets the value of the ith field extern storage bit of an old-style record. */ - -void -rec_set_nth_field_extern_bit_old( -/*=============================*/ - rec_t* rec, /* in: old-style record */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr holding an X-latch to the page where - rec is, or NULL; in the NULL case we do not - write to log about the change */ -{ - ulint info; - - ut_a(!rec_get_1byte_offs_flag(rec)); - ut_a(i < rec_get_n_fields_old(rec)); - - info = rec_2_get_field_end_info(rec, i); - - if (val) { - info = info | REC_2BYTE_EXTERN_MASK; - } else { - info = info & ~REC_2BYTE_EXTERN_MASK; - } - - if (mtr) { - mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1), - info, MLOG_2BYTES, mtr); - } else { - rec_2_set_field_end_info(rec, i, info); - } -} - -/*************************************************************** -Sets the value of the ith field extern storage bit of a new-style record. */ - -void -rec_set_nth_field_extern_bit_new( -/*=============================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - ulint ith, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case - we do not write to log about the change */ -{ - byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - byte* lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - ulint i; - ulint n_fields; - ulint null_mask = 1; - ut_ad(rec && index); - ut_ad(dict_table_is_comp(index->table)); - ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); - - n_fields = dict_index_get_n_fields(index); - - ut_ad(ith < n_fields); - - /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - const dict_col_t* col; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - - if (!(col->prtype & DATA_NOT_NULL)) { - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - /* NULL fields cannot be external. */ - ut_ad(i != ith); - continue; - } - - null_mask <<= 1; - } - if (field->fixed_len) { - /* fixed-length fields cannot be external - (Fixed-length fields longer than - DICT_MAX_INDEX_COL_LEN will be treated as - variable-length ones in dict_index_add_col().) */ - ut_ad(i != ith); - continue; - } - lens--; - if (col->len > 255 || col->mtype == DATA_BLOB) { - ulint len = lens[1]; - if (len & 0x80) { /* 1exxxxxx: 2-byte length */ - if (i == ith) { - if (!val == !(len & 0x40)) { - return; /* no change */ - } - /* toggle the extern bit */ - len ^= 0x40; - if (mtr) { - mlog_write_ulint(lens + 1, - len, - MLOG_1BYTE, - mtr); - } else { - lens[1] = (byte) len; - } - return; - } - lens--; - } else { - /* short fields cannot be external */ - ut_ad(i != ith); - } - } else { - /* short fields cannot be external */ - ut_ad(i != ith); - } - } -} - -/*************************************************************** -Sets TRUE the extern storage bits of fields mentioned in an array. */ - -void -rec_set_field_extern_bits( -/*======================*/ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: record descriptor */ - const ulint* vec, /* in: array of field numbers */ - ulint n_fields,/* in: number of fields numbers */ - mtr_t* mtr) /* in: mtr holding an X-latch to the - page where rec is, or NULL; - in the NULL case we do not write - to log about the change */ -{ - ulint i; - - if (dict_table_is_comp(index->table)) { - for (i = 0; i < n_fields; i++) { - rec_set_nth_field_extern_bit_new(rec, index, vec[i], - TRUE, mtr); - } - } else { - for (i = 0; i < n_fields; i++) { - rec_set_nth_field_extern_bit_old(rec, vec[i], - TRUE, mtr); - } - } -} - -/*************************************************************** -Sets an old-style record field to SQL null. -The physical size of the field is not changed. */ - -void -rec_set_nth_field_sql_null( -/*=======================*/ - rec_t* rec, /* in: record */ - ulint n) /* in: index of the field */ -{ - ulint offset; - - offset = rec_get_field_start_offs(rec, n); - - data_write_sql_null(rec + offset, rec_get_nth_field_size(rec, n)); - - rec_set_nth_field_null_bit(rec, n, TRUE); -} - -/************************************************************* -Builds an old-style physical record out of a data tuple and -stores it beginning from the start of the given buffer. */ -static -rec_t* -rec_convert_dtuple_to_rec_old( -/*==========================*/ - /* out: pointer to the origin of - physical record */ - byte* buf, /* in: start address of the physical record */ - dtuple_t* dtuple)/* in: data tuple */ -{ - dfield_t* field; - ulint n_fields; - ulint data_size; - rec_t* rec; - ulint end_offset; - ulint ored_offset; - byte* data; - ulint len; - ulint i; - - ut_ad(buf && dtuple); - ut_ad(dtuple_validate(dtuple)); - ut_ad(dtuple_check_typed(dtuple)); - - n_fields = dtuple_get_n_fields(dtuple); - data_size = dtuple_get_data_size(dtuple); - - ut_ad(n_fields > 0); - - /* Calculate the offset of the origin in the physical record */ - - rec = buf + rec_get_converted_extra_size(data_size, n_fields); -#ifdef UNIV_DEBUG - /* Suppress Valgrind warnings of ut_ad() - in mach_write_to_1(), mach_write_to_2() et al. */ - memset(buf, 0xff, rec - buf + data_size); -#endif /* UNIV_DEBUG */ - /* Store the number of fields */ - rec_set_n_fields_old(rec, n_fields); - - /* Set the info bits of the record */ - rec_set_info_bits(rec, FALSE, - dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); - - /* Store the data and the offsets */ - - end_offset = 0; - - if (data_size <= REC_1BYTE_OFFS_LIMIT) { - - rec_set_1byte_offs_flag(rec, TRUE); - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(dtuple, i); - - data = dfield_get_data(field); - len = dfield_get_len(field); - - if (len == UNIV_SQL_NULL) { - len = dtype_get_sql_null_size( - dfield_get_type(field)); - data_write_sql_null(rec + end_offset, len); - - end_offset += len; - ored_offset = end_offset - | REC_1BYTE_SQL_NULL_MASK; - } else { - /* If the data is not SQL null, store it */ - ut_memcpy(rec + end_offset, data, len); - - end_offset += len; - ored_offset = end_offset; - } - - rec_1_set_field_end_info(rec, i, ored_offset); - } - } else { - rec_set_1byte_offs_flag(rec, FALSE); - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(dtuple, i); - - data = dfield_get_data(field); - len = dfield_get_len(field); - - if (len == UNIV_SQL_NULL) { - len = dtype_get_sql_null_size( - dfield_get_type(field)); - data_write_sql_null(rec + end_offset, len); - - end_offset += len; - ored_offset = end_offset - | REC_2BYTE_SQL_NULL_MASK; - } else { - /* If the data is not SQL null, store it */ - ut_memcpy(rec + end_offset, data, len); - - end_offset += len; - ored_offset = end_offset; - } - - rec_2_set_field_end_info(rec, i, ored_offset); - } - } - - return(rec); -} - -/************************************************************* -Builds a new-style physical record out of a data tuple and -stores it beginning from the start of the given buffer. */ -static -rec_t* -rec_convert_dtuple_to_rec_new( -/*==========================*/ - /* out: pointer to the origin - of physical record */ - byte* buf, /* in: start address of the physical record */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple) /* in: data tuple */ -{ - dfield_t* field; - dtype_t* type; - rec_t* rec = buf + REC_N_NEW_EXTRA_BYTES; - byte* end; - byte* nulls; - byte* lens; - ulint len; - ulint i; - ulint n_node_ptr_field; - ulint fixed_len; - ulint null_mask = 1; - const ulint n_fields = dtuple_get_n_fields(dtuple); - const ulint status = dtuple_get_info_bits(dtuple) - & REC_NEW_STATUS_MASK; - ut_ad(dict_table_is_comp(index->table)); - ut_ad(n_fields > 0); - - /* Try to ensure that the memset() between the for() loops - completes fast. The address is not exact, but UNIV_PREFETCH - should never generate a memory fault. */ - UNIV_PREFETCH_RW(rec - REC_N_NEW_EXTRA_BYTES - n_fields); - UNIV_PREFETCH_RW(rec); - - switch (UNIV_EXPECT(status, REC_STATUS_ORDINARY)) { - case REC_STATUS_ORDINARY: - ut_ad(n_fields <= dict_index_get_n_fields(index)); - n_node_ptr_field = ULINT_UNDEFINED; - break; - case REC_STATUS_NODE_PTR: - ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1); - n_node_ptr_field = n_fields - 1; - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - ut_ad(n_fields == 1); - n_node_ptr_field = ULINT_UNDEFINED; - goto init; - default: - ut_a(0); - return(0); - } - - /* Calculate the offset of the origin in the physical record. - We must loop over all fields to do this. */ - rec += UT_BITS_IN_BYTES(index->n_nullable); - - for (i = 0; i < n_fields; i++) { - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { -#ifdef UNIV_DEBUG - field = dtuple_get_nth_field(dtuple, i); - type = dfield_get_type(field); - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); - ut_ad(dfield_get_len(field) == 4); -#endif /* UNIV_DEBUG */ - goto init; - } - field = dtuple_get_nth_field(dtuple, i); - type = dfield_get_type(field); - len = dfield_get_len(field); - fixed_len = dict_index_get_nth_field(index, i)->fixed_len; - - ut_ad(dict_col_type_assert_equal( - dict_field_get_col(dict_index_get_nth_field( - index, i)), - dfield_get_type(field))); - - if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { - if (len == UNIV_SQL_NULL) - continue; - } - /* only nullable fields can be null */ - ut_ad(len != UNIV_SQL_NULL); - if (fixed_len) { - ut_ad(len == fixed_len); - } else { - ut_ad(len <= dtype_get_len(type) - || dtype_get_mtype(type) == DATA_BLOB); - rec++; - if (len >= 128 - && (dtype_get_len(type) >= 256 - || dtype_get_mtype(type) == DATA_BLOB)) { - rec++; - } - } - } - -init: - end = rec; - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - /* clear the SQL-null flags */ - memset (lens + 1, 0, nulls - lens); - - /* Set the info bits of the record */ - rec_set_status(rec, status); - - rec_set_info_bits(rec, TRUE, - dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); - - /* Store the data and the offsets */ - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(dtuple, i); - type = dfield_get_type(field); - len = dfield_get_len(field); - - if (UNIV_UNLIKELY(i == n_node_ptr_field)) { - ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); - ut_ad(len == 4); - memcpy(end, dfield_get_data(field), len); - break; - } - fixed_len = dict_index_get_nth_field(index, i)->fixed_len; - - if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { - /* nullable field */ - ut_ad(index->n_nullable > 0); - - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - ut_ad(*nulls < null_mask); - - /* set the null flag if necessary */ - if (len == UNIV_SQL_NULL) { - *nulls |= null_mask; - null_mask <<= 1; - continue; - } - - null_mask <<= 1; - } - /* only nullable fields can be null */ - ut_ad(len != UNIV_SQL_NULL); - if (fixed_len) { - ut_ad(len == fixed_len); - } else { - ut_ad(len <= dtype_get_len(type) - || dtype_get_mtype(type) == DATA_BLOB); - if (len < 128 - || (dtype_get_len(type) < 256 - && dtype_get_mtype(type) != DATA_BLOB)) { - - *lens-- = (byte) len; - } else { - /* the extern bits will be set later */ - ut_ad(len < 16384); - *lens-- = (byte) (len >> 8) | 0x80; - *lens-- = (byte) len; - } - } - - memcpy(end, dfield_get_data(field), len); - end += len; - } - - return(rec); -} - -/************************************************************* -Builds a physical record out of a data tuple and -stores it beginning from the start of the given buffer. */ - -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin - of physical record */ - byte* buf, /* in: start address of the - physical record */ - dict_index_t* index, /* in: record descriptor */ - dtuple_t* dtuple) /* in: data tuple */ -{ - rec_t* rec; - - ut_ad(buf && index && dtuple); - ut_ad(dtuple_validate(dtuple)); - ut_ad(dtuple_check_typed(dtuple)); - - if (dict_table_is_comp(index->table)) { - rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple); - } else { - rec = rec_convert_dtuple_to_rec_old(buf, dtuple); - } - -#ifdef UNIV_DEBUG - { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(rec, index, - offsets_, ULINT_UNDEFINED, &heap); - ut_ad(rec_validate(rec, offsets)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -#endif /* UNIV_DEBUG */ - return(rec); -} - -/****************************************************************** -Copies the first n fields of a physical record to a data tuple. The fields -are copied to the memory heap. */ - -void -rec_copy_prefix_to_dtuple( -/*======================*/ - dtuple_t* tuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint n_fields, /* in: number of fields to copy */ - mem_heap_t* heap) /* in: memory heap */ -{ - dfield_t* field; - byte* data; - ulint len; - byte* buf = NULL; - ulint i; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - offsets = rec_get_offsets(rec, index, offsets, n_fields, &heap); - - ut_ad(rec_validate(rec, offsets)); - ut_ad(dtuple_check_typed(tuple)); - - dtuple_set_info_bits(tuple, rec_get_info_bits( - rec, dict_table_is_comp(index->table))); - - for (i = 0; i < n_fields; i++) { - - field = dtuple_get_nth_field(tuple, i); - data = rec_get_nth_field(rec, offsets, i, &len); - - if (len != UNIV_SQL_NULL) { - buf = mem_heap_alloc(heap, len); - - ut_memcpy(buf, data, len); - } - - dfield_set_data(field, buf, len); - } -} - -/****************************************************************** -Copies the first n fields of an old-style physical record -to a new physical record in a buffer. */ -static -rec_t* -rec_copy_prefix_to_buf_old( -/*=======================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - ulint n_fields, /* in: number of fields to copy */ - ulint area_end, /* in: end of the prefix data */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size) /* in/out: buffer size */ -{ - rec_t* copy_rec; - ulint area_start; - ulint prefix_len; - - if (rec_get_1byte_offs_flag(rec)) { - area_start = REC_N_OLD_EXTRA_BYTES + n_fields; - } else { - area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields; - } - - prefix_len = area_start + area_end; - - if ((*buf == NULL) || (*buf_size < prefix_len)) { - if (*buf != NULL) { - mem_free(*buf); - } - - *buf = mem_alloc(prefix_len); - *buf_size = prefix_len; - } - - ut_memcpy(*buf, rec - area_start, prefix_len); - - copy_rec = *buf + area_start; - - rec_set_n_fields_old(copy_rec, n_fields); - - return(copy_rec); -} - -/****************************************************************** -Copies the first n fields of a physical record to a new physical record in -a buffer. */ - -rec_t* -rec_copy_prefix_to_buf( -/*===================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - dict_index_t* index, /* in: record descriptor */ - ulint n_fields, /* in: number of fields to copy */ - byte** buf, /* in/out: memory buffer - for the copied prefix, or NULL */ - ulint* buf_size) /* in/out: buffer size */ -{ - byte* nulls; - byte* lens; - ulint i; - ulint prefix_len; - ulint null_mask; - ulint status; - - UNIV_PREFETCH_RW(*buf); - - if (!dict_table_is_comp(index->table)) { - ut_ad(rec_validate_old(rec)); - return(rec_copy_prefix_to_buf_old( - rec, n_fields, - rec_get_field_start_offs(rec, n_fields), - buf, buf_size)); - } - - status = rec_get_status(rec); - - switch (status) { - case REC_STATUS_ORDINARY: - ut_ad(n_fields <= dict_index_get_n_fields(index)); - break; - case REC_STATUS_NODE_PTR: - /* it doesn't make sense to copy the child page number field */ - ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index)); - break; - case REC_STATUS_INFIMUM: - case REC_STATUS_SUPREMUM: - /* infimum or supremum record: no sense to copy anything */ - default: - ut_error; - return(NULL); - } - - nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); - lens = nulls - UT_BITS_IN_BYTES(index->n_nullable); - UNIV_PREFETCH_R(lens); - prefix_len = 0; - null_mask = 1; - - /* read the lengths of fields 0..n */ - for (i = 0; i < n_fields; i++) { - const dict_field_t* field; - const dict_col_t* col; - - field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(field); - - if (!(col->prtype & DATA_NOT_NULL)) { - /* nullable field => read the null flag */ - if (UNIV_UNLIKELY(!(byte) null_mask)) { - nulls--; - null_mask = 1; - } - - if (*nulls & null_mask) { - null_mask <<= 1; - continue; - } - - null_mask <<= 1; - } - - if (field->fixed_len) { - prefix_len += field->fixed_len; - } else { - ulint len = *lens--; - if (col->len > 255 || col->mtype == DATA_BLOB) { - if (len & 0x80) { - /* 1exxxxxx */ - len &= 0x3f; - len <<= 8; - len |= *lens--; - UNIV_PREFETCH_R(lens); - } - } - prefix_len += len; - } - } - - UNIV_PREFETCH_R(rec + prefix_len); - - prefix_len += rec - (lens + 1); - - if ((*buf == NULL) || (*buf_size < prefix_len)) { - if (*buf != NULL) { - mem_free(*buf); - } - - *buf = mem_alloc(prefix_len); - *buf_size = prefix_len; - } - - memcpy(*buf, lens + 1, prefix_len); - - return(*buf + (rec - (lens + 1))); -} - -/******************************************************************* -Validates the consistency of an old-style physical record. */ -static -ibool -rec_validate_old( -/*=============*/ - /* out: TRUE if ok */ - rec_t* rec) /* in: physical record */ -{ - byte* data; - ulint len; - ulint n_fields; - ulint len_sum = 0; - ulint sum = 0; - ulint i; - - ut_a(rec); - n_fields = rec_get_n_fields_old(rec); - - if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { - fprintf(stderr, "InnoDB: Error: record has %lu fields\n", - (ulong) n_fields); - return(FALSE); - } - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field_old(rec, i, &len); - - if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { - fprintf(stderr, - "InnoDB: Error: record field %lu len %lu\n", - (ulong) i, - (ulong) len); - return(FALSE); - } - - if (len != UNIV_SQL_NULL) { - len_sum += len; - sum += *(data + len -1); /* dereference the - end of the field to - cause a memory trap - if possible */ - } else { - len_sum += rec_get_nth_field_size(rec, i); - } - } - - if (len_sum != rec_get_data_size_old(rec)) { - fprintf(stderr, - "InnoDB: Error: record len should be %lu, len %lu\n", - (ulong) len_sum, - rec_get_data_size_old(rec)); - return(FALSE); - } - - rec_dummy = sum; /* This is here only to fool the compiler */ - - return(TRUE); -} - -/******************************************************************* -Validates the consistency of a physical record. */ - -ibool -rec_validate( -/*=========*/ - /* out: TRUE if ok */ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - const byte* data; - ulint len; - ulint n_fields; - ulint len_sum = 0; - ulint sum = 0; - ulint i; - - ut_a(rec); - n_fields = rec_offs_n_fields(offsets); - - if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { - fprintf(stderr, "InnoDB: Error: record has %lu fields\n", - (ulong) n_fields); - return(FALSE); - } - - ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec)); - - for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, offsets, i, &len); - - if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { - fprintf(stderr, - "InnoDB: Error: record field %lu len %lu\n", - (ulong) i, - (ulong) len); - return(FALSE); - } - - if (len != UNIV_SQL_NULL) { - len_sum += len; - sum += *(data + len -1); /* dereference the - end of the field to - cause a memory trap - if possible */ - } else if (!rec_offs_comp(offsets)) { - len_sum += rec_get_nth_field_size(rec, i); - } - } - - if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) { - fprintf(stderr, - "InnoDB: Error: record len should be %lu, len %lu\n", - (ulong) len_sum, - (ulong) (rec_get_end(rec, offsets) - rec)); - return(FALSE); - } - - rec_dummy = sum; /* This is here only to fool the compiler */ - - if (!rec_offs_comp(offsets)) { - ut_a(rec_validate_old(rec)); - } - - return(TRUE); -} - -/******************************************************************* -Prints an old-style physical record. */ - -void -rec_print_old( -/*==========*/ - FILE* file, /* in: file where to print */ - rec_t* rec) /* in: physical record */ -{ - const byte* data; - ulint len; - ulint n; - ulint i; - - ut_ad(rec); - - n = rec_get_n_fields_old(rec); - - fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " %u-byte offsets; info bits %lu\n", - (ulong) n, - rec_get_1byte_offs_flag(rec) ? 1 : 2, - (ulong) rec_get_info_bits(rec, FALSE)); - - for (i = 0; i < n; i++) { - - data = rec_get_nth_field_old(rec, i, &len); - - fprintf(file, " %lu:", (ulong) i); - - if (len != UNIV_SQL_NULL) { - if (len <= 30) { - - ut_print_buf(file, data, len); - } else { - ut_print_buf(file, data, 30); - - fputs("...(truncated)", file); - } - } else { - fprintf(file, " SQL NULL, size %lu ", - rec_get_nth_field_size(rec, i)); - } - putc(';', file); - } - - putc('\n', file); - - rec_validate_old(rec); -} - -/******************************************************************* -Prints a physical record. */ - -void -rec_print_new( -/*==========*/ - FILE* file, /* in: file where to print */ - rec_t* rec, /* in: physical record */ - const ulint* offsets)/* in: array returned by rec_get_offsets() */ -{ - const byte* data; - ulint len; - ulint i; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (!rec_offs_comp(offsets)) { - rec_print_old(file, rec); - return; - } - - ut_ad(rec); - - fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " compact format; info bits %lu\n", - (ulong) rec_offs_n_fields(offsets), - (ulong) rec_get_info_bits(rec, TRUE)); - - for (i = 0; i < rec_offs_n_fields(offsets); i++) { - - data = rec_get_nth_field(rec, offsets, i, &len); - - fprintf(file, " %lu:", (ulong) i); - - if (len != UNIV_SQL_NULL) { - if (len <= 30) { - - ut_print_buf(file, data, len); - } else { - ut_print_buf(file, data, 30); - - fputs("...(truncated)", file); - } - } else { - fputs(" SQL NULL", file); - } - putc(';', file); - } - - putc('\n', file); - - rec_validate(rec, offsets); -} - -/******************************************************************* -Prints a physical record. */ - -void -rec_print( -/*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec, /* in: physical record */ - dict_index_t* index) /* in: record descriptor */ -{ - ut_ad(index); - - if (!dict_table_is_comp(index->table)) { - rec_print_old(file, rec); - return; - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - rec_print_new(file, rec, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } -} diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c deleted file mode 100644 index ad14b927170..00000000000 --- a/storage/innobase/row/row0ins.c +++ /dev/null @@ -1,2522 +0,0 @@ -/****************************************************** -Insert into a table - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "row0ins.h" - -#ifdef UNIV_NONINL -#include "row0ins.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0undo.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "mach0data.h" -#include "que0que.h" -#include "row0upd.h" -#include "row0sel.h" -#include "row0row.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "log0log.h" -#include "eval0eval.h" -#include "data0data.h" -#include "usr0sess.h" -#include "buf0lru.h" - -#define ROW_INS_PREV 1 -#define ROW_INS_NEXT 2 - - -/********************************************************************* -This prototype is copied from /mysql/sql/ha_innodb.cc. -Invalidates the MySQL query cache for the table. -NOTE that the exact prototype of this function has to be in -/innobase/row/row0ins.c! */ -extern -void -innobase_invalidate_query_cache( -/*============================*/ - trx_t* trx, /* in: transaction which modifies the table */ - char* full_name, /* in: concatenation of database name, null - char '\0', table name, null char'\0'; - NOTE that in Windows this is always - in LOWER CASE! */ - ulint full_name_len); /* in: full name length where also the null - chars count */ - -/************************************************************************* -Creates an insert node struct. */ - -ins_node_t* -ins_node_create( -/*============*/ - /* out, own: insert node struct */ - ulint ins_type, /* in: INS_VALUES, ... */ - dict_table_t* table, /* in: table where to insert */ - mem_heap_t* heap) /* in: mem heap where created */ -{ - ins_node_t* node; - - node = mem_heap_alloc(heap, sizeof(ins_node_t)); - - node->common.type = QUE_NODE_INSERT; - - node->ins_type = ins_type; - - node->state = INS_NODE_SET_IX_LOCK; - node->table = table; - node->index = NULL; - node->entry = NULL; - - node->select = NULL; - - node->trx_id = ut_dulint_zero; - - node->entry_sys_heap = mem_heap_create(128); - - node->magic_n = INS_NODE_MAGIC_N; - - return(node); -} - -/*************************************************************** -Creates an entry template for each index of a table. */ -static -void -ins_node_create_entry_list( -/*=======================*/ - ins_node_t* node) /* in: row insert node */ -{ - dict_index_t* index; - dtuple_t* entry; - - ut_ad(node->entry_sys_heap); - - UT_LIST_INIT(node->entry_list); - - index = dict_table_get_first_index(node->table); - - while (index != NULL) { - entry = row_build_index_entry(node->row, index, - node->entry_sys_heap); - UT_LIST_ADD_LAST(tuple_list, node->entry_list, entry); - - index = dict_table_get_next_index(index); - } -} - -/********************************************************************* -Adds system field buffers to a row. */ -static -void -row_ins_alloc_sys_fields( -/*=====================*/ - ins_node_t* node) /* in: insert node */ -{ - dtuple_t* row; - dict_table_t* table; - mem_heap_t* heap; - const dict_col_t* col; - dfield_t* dfield; - byte* ptr; - - row = node->row; - table = node->table; - heap = node->entry_sys_heap; - - ut_ad(row && table && heap); - ut_ad(dtuple_get_n_fields(row) == dict_table_get_n_cols(table)); - - /* 1. Allocate buffer for row id */ - - col = dict_table_get_sys_col(table, DATA_ROW_ID); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - - ptr = mem_heap_alloc(heap, DATA_ROW_ID_LEN); - - dfield_set_data(dfield, ptr, DATA_ROW_ID_LEN); - - node->row_id_buf = ptr; - - /* 3. Allocate buffer for trx id */ - - col = dict_table_get_sys_col(table, DATA_TRX_ID); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - ptr = mem_heap_alloc(heap, DATA_TRX_ID_LEN); - - dfield_set_data(dfield, ptr, DATA_TRX_ID_LEN); - - node->trx_id_buf = ptr; - - /* 4. Allocate buffer for roll ptr */ - - col = dict_table_get_sys_col(table, DATA_ROLL_PTR); - - dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - ptr = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN); - - dfield_set_data(dfield, ptr, DATA_ROLL_PTR_LEN); -} - -/************************************************************************* -Sets a new row to insert for an INS_DIRECT node. This function is only used -if we have constructed the row separately, which is a rare case; this -function is quite slow. */ - -void -ins_node_set_new_row( -/*=================*/ - ins_node_t* node, /* in: insert node */ - dtuple_t* row) /* in: new row (or first row) for the node */ -{ - node->state = INS_NODE_SET_IX_LOCK; - node->index = NULL; - node->entry = NULL; - - node->row = row; - - mem_heap_empty(node->entry_sys_heap); - - /* Create templates for index entries */ - - ins_node_create_entry_list(node); - - /* Allocate from entry_sys_heap buffers for sys fields */ - - row_ins_alloc_sys_fields(node); - - /* As we allocated a new trx id buf, the trx id should be written - there again: */ - - node->trx_id = ut_dulint_zero; -} - -/*********************************************************************** -Does an insert operation by updating a delete-marked existing record -in the index. This situation can occur if the delete-marked record is -kept in the index for consistent reads. */ -static -ulint -row_ins_sec_index_entry_by_modify( -/*==============================*/ - /* out: DB_SUCCESS or error code */ - ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether mtr holds just a leaf - latch or also a tree latch */ - btr_cur_t* cursor, /* in: B-tree cursor */ - dtuple_t* entry, /* in: index entry to insert */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - big_rec_t* dummy_big_rec; - mem_heap_t* heap; - upd_t* update; - rec_t* rec; - ulint err; - - rec = btr_cur_get_rec(cursor); - - ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); - ut_ad(rec_get_deleted_flag(rec, - dict_table_is_comp(cursor->index->table))); - - /* We know that in the alphabetical ordering, entry and rec are - identified. But in their binary form there may be differences if - there are char fields in them. Therefore we have to calculate the - difference. */ - - heap = mem_heap_create(1024); - - update = row_upd_build_sec_rec_difference_binary( - cursor->index, entry, rec, thr_get_trx(thr), heap); - if (mode == BTR_MODIFY_LEAF) { - /* Try an optimistic updating of the record, keeping changes - within the page */ - - err = btr_cur_optimistic_update(BTR_KEEP_SYS_FLAG, cursor, - update, 0, thr, mtr); - if (err == DB_OVERFLOW || err == DB_UNDERFLOW) { - err = DB_FAIL; - } - } else { - ut_a(mode == BTR_MODIFY_TREE); - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - - goto func_exit; - } - - err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor, - &dummy_big_rec, update, - 0, thr, mtr); - } -func_exit: - mem_heap_free(heap); - - return(err); -} - -/*********************************************************************** -Does an insert operation by delete unmarking and updating a delete marked -existing record in the index. This situation can occur if the delete marked -record is kept in the index for consistent reads. */ -static -ulint -row_ins_clust_index_entry_by_modify( -/*================================*/ - /* out: DB_SUCCESS, DB_FAIL, or error code */ - ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether mtr holds just a leaf - latch or also a tree latch */ - btr_cur_t* cursor, /* in: B-tree cursor */ - big_rec_t** big_rec,/* out: possible big rec vector of fields - which have to be stored externally by the - caller */ - dtuple_t* entry, /* in: index entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - rec_t* rec; - upd_t* update; - ulint err; - - ut_ad(cursor->index->type & DICT_CLUSTERED); - - *big_rec = NULL; - - rec = btr_cur_get_rec(cursor); - - ut_ad(rec_get_deleted_flag(rec, - dict_table_is_comp(cursor->index->table))); - - heap = mem_heap_create(1024); - - /* Build an update vector containing all the fields to be modified; - NOTE that this vector may NOT contain system columns trx_id or - roll_ptr */ - - update = row_upd_build_difference_binary(cursor->index, entry, ext_vec, - n_ext_vec, rec, - thr_get_trx(thr), heap); - if (mode == BTR_MODIFY_LEAF) { - /* Try optimistic updating of the record, keeping changes - within the page */ - - err = btr_cur_optimistic_update(0, cursor, update, 0, thr, - mtr); - if (err == DB_OVERFLOW || err == DB_UNDERFLOW) { - err = DB_FAIL; - } - } else { - ut_a(mode == BTR_MODIFY_TREE); - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - - goto func_exit; - } - err = btr_cur_pessimistic_update(0, cursor, big_rec, update, - 0, thr, mtr); - } -func_exit: - mem_heap_free(heap); - - return(err); -} - -/************************************************************************* -Returns TRUE if in a cascaded update/delete an ancestor node of node -updates (not DELETE, but UPDATE) table. */ -static -ibool -row_ins_cascade_ancestor_updates_table( -/*===================================*/ - /* out: TRUE if an ancestor updates table */ - que_node_t* node, /* in: node in a query graph */ - dict_table_t* table) /* in: table */ -{ - que_node_t* parent; - upd_node_t* upd_node; - - parent = que_node_get_parent(node); - - while (que_node_get_type(parent) == QUE_NODE_UPDATE) { - - upd_node = parent; - - if (upd_node->table == table && upd_node->is_delete == FALSE) { - - return(TRUE); - } - - parent = que_node_get_parent(parent); - - ut_a(parent); - } - - return(FALSE); -} - -/************************************************************************* -Returns the number of ancestor UPDATE or DELETE nodes of a -cascaded update/delete node. */ -static -ulint -row_ins_cascade_n_ancestors( -/*========================*/ - /* out: number of ancestors */ - que_node_t* node) /* in: node in a query graph */ -{ - que_node_t* parent; - ulint n_ancestors = 0; - - parent = que_node_get_parent(node); - - while (que_node_get_type(parent) == QUE_NODE_UPDATE) { - n_ancestors++; - - parent = que_node_get_parent(parent); - - ut_a(parent); - } - - return(n_ancestors); -} - -/********************************************************************** -Calculates the update vector node->cascade->update for a child table in -a cascaded update. */ -static -ulint -row_ins_cascade_calc_update_vec( -/*============================*/ - /* out: number of fields in the - calculated update vector; the value - can also be 0 if no foreign key - fields changed; the returned value - is ULINT_UNDEFINED if the column - type in the child table is too short - to fit the new value in the parent - table: that means the update fails */ - upd_node_t* node, /* in: update node of the parent - table */ - dict_foreign_t* foreign, /* in: foreign key constraint whose - type is != 0 */ - mem_heap_t* heap) /* in: memory heap to use as - temporary storage */ -{ - upd_node_t* cascade = node->cascade_node; - dict_table_t* table = foreign->foreign_table; - dict_index_t* index = foreign->foreign_index; - upd_t* update; - upd_field_t* ufield; - dict_table_t* parent_table; - dict_index_t* parent_index; - upd_t* parent_update; - upd_field_t* parent_ufield; - ulint n_fields_updated; - ulint parent_field_no; - ulint i; - ulint j; - - ut_a(node); - ut_a(foreign); - ut_a(cascade); - ut_a(table); - ut_a(index); - - /* Calculate the appropriate update vector which will set the fields - in the child index record to the same value (possibly padded with - spaces if the column is a fixed length CHAR or FIXBINARY column) as - the referenced index record will get in the update. */ - - parent_table = node->table; - ut_a(parent_table == foreign->referenced_table); - parent_index = foreign->referenced_index; - parent_update = node->update; - - update = cascade->update; - - update->info_bits = 0; - update->n_fields = foreign->n_fields; - - n_fields_updated = 0; - - for (i = 0; i < foreign->n_fields; i++) { - - parent_field_no = dict_table_get_nth_col_pos( - parent_table, - dict_index_get_nth_col_no(parent_index, i)); - - for (j = 0; j < parent_update->n_fields; j++) { - parent_ufield = parent_update->fields + j; - - if (parent_ufield->field_no == parent_field_no) { - - ulint min_size; - const dict_col_t* col; - - col = dict_index_get_nth_col(index, i); - - /* A field in the parent index record is - updated. Let us make the update vector - field for the child table. */ - - ufield = update->fields + n_fields_updated; - - ufield->field_no - = dict_table_get_nth_col_pos( - table, dict_col_get_no(col)); - ufield->exp = NULL; - - ufield->new_val = parent_ufield->new_val; - - /* Do not allow a NOT NULL column to be - updated as NULL */ - - if (ufield->new_val.len == UNIV_SQL_NULL - && (col->prtype & DATA_NOT_NULL)) { - - return(ULINT_UNDEFINED); - } - - /* If the new value would not fit in the - column, do not allow the update */ - - if (ufield->new_val.len != UNIV_SQL_NULL - && dtype_get_at_most_n_mbchars( - col->prtype, - col->mbminlen, col->mbmaxlen, - col->len, - ufield->new_val.len, - ufield->new_val.data) - < ufield->new_val.len) { - - return(ULINT_UNDEFINED); - } - - /* If the parent column type has a different - length than the child column type, we may - need to pad with spaces the new value of the - child column */ - - min_size = dict_col_get_min_size(col); - - if (min_size - && ufield->new_val.len != UNIV_SQL_NULL - && ufield->new_val.len < min_size) { - - char* pad_start; - const char* pad_end; - ufield->new_val.data = mem_heap_alloc( - heap, min_size); - pad_start = ((char*) ufield - ->new_val.data) - + ufield->new_val.len; - pad_end = ((char*) ufield - ->new_val.data) - + min_size; - ufield->new_val.len = min_size; - ut_memcpy(ufield->new_val.data, - parent_ufield->new_val.data, - parent_ufield->new_val.len); - - switch (UNIV_EXPECT(col->mbminlen,1)) { - default: - ut_error; - case 1: - if (UNIV_UNLIKELY - (dtype_get_charset_coll( - col->prtype) - == DATA_MYSQL_BINARY_CHARSET_COLL)) { - /* Do not pad BINARY - columns. */ - return(ULINT_UNDEFINED); - } - - /* space=0x20 */ - memset(pad_start, 0x20, - pad_end - pad_start); - break; - case 2: - /* space=0x0020 */ - ut_a(!(ufield->new_val.len - % 2)); - ut_a(!(min_size % 2)); - do { - *pad_start++ = 0x00; - *pad_start++ = 0x20; - } while (pad_start < pad_end); - break; - } - } - - ufield->extern_storage = FALSE; - - n_fields_updated++; - } - } - } - - update->n_fields = n_fields_updated; - - return(n_fields_updated); -} - -/************************************************************************* -Set detailed error message associated with foreign key errors for -the given transaction. */ -static -void -row_ins_set_detailed( -/*=================*/ - trx_t* trx, /* in: transaction */ - dict_foreign_t* foreign) /* in: foreign key constraint */ -{ - mutex_enter(&srv_misc_tmpfile_mutex); - rewind(srv_misc_tmpfile); - - if (os_file_set_eof(srv_misc_tmpfile)) { - ut_print_name(srv_misc_tmpfile, trx, TRUE, - foreign->foreign_table_name); - dict_print_info_on_foreign_key_in_create_format( - srv_misc_tmpfile, trx, foreign, FALSE); - trx_set_detailed_error_from_file(trx, srv_misc_tmpfile); - } else { - trx_set_detailed_error(trx, "temp file operation failed"); - } - - mutex_exit(&srv_misc_tmpfile_mutex); -} - -/************************************************************************* -Reports a foreign key error associated with an update or a delete of a -parent table index entry. */ -static -void -row_ins_foreign_report_err( -/*=======================*/ - const char* errstr, /* in: error string from the viewpoint - of the parent table */ - que_thr_t* thr, /* in: query thread whose run_node - is an update node */ - dict_foreign_t* foreign, /* in: foreign key constraint */ - rec_t* rec, /* in: a matching index record in the - child table */ - dtuple_t* entry) /* in: index entry in the parent - table */ -{ - FILE* ef = dict_foreign_err_file; - trx_t* trx = thr_get_trx(thr); - - row_ins_set_detailed(trx, foreign); - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Transaction:\n", ef); - trx_print(ef, trx, 600); - - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign, - TRUE); - putc('\n', ef); - fputs(errstr, ef); - fputs(" in parent table, in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->referenced_index->name); - if (entry) { - fputs(" tuple:\n", ef); - dtuple_print(ef, entry); - } - fputs("\nBut in child table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(", in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); - if (rec) { - fputs(", there is a record:\n", ef); - rec_print(ef, rec, foreign->foreign_index); - } else { - fputs(", the record is not available\n", ef); - } - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); -} - -/************************************************************************* -Reports a foreign key error to dict_foreign_err_file when we are trying -to add an index entry to a child table. Note that the adding may be the result -of an update, too. */ -static -void -row_ins_foreign_report_add_err( -/*===========================*/ - trx_t* trx, /* in: transaction */ - dict_foreign_t* foreign, /* in: foreign key constraint */ - rec_t* rec, /* in: a record in the parent table: - it does not match entry because we - have an error! */ - dtuple_t* entry) /* in: index entry to insert in the - child table */ -{ - FILE* ef = dict_foreign_err_file; - - row_ins_set_detailed(trx, foreign); - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Transaction:\n", ef); - trx_print(ef, trx, 600); - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format(ef, trx, foreign, - TRUE); - fputs("\nTrying to add in child table, in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->foreign_index->name); - if (entry) { - fputs(" tuple:\n", ef); - dtuple_print(ef, entry); - } - fputs("\nBut in parent table ", ef); - ut_print_name(ef, trx, TRUE, foreign->referenced_table_name); - fputs(", in index ", ef); - ut_print_name(ef, trx, FALSE, foreign->referenced_index->name); - fputs(",\nthe closest match we can find is record:\n", ef); - if (rec && page_rec_is_supremum(rec)) { - /* If the cursor ended on a supremum record, it is better - to report the previous record in the error message, so that - the user gets a more descriptive error message. */ - rec = page_rec_get_prev(rec); - } - - if (rec) { - rec_print(ef, rec, foreign->referenced_index); - } - putc('\n', ef); - - mutex_exit(&dict_foreign_err_mutex); -} - -/************************************************************************* -Invalidate the query cache for the given table. */ -static -void -row_ins_invalidate_query_cache( -/*===========================*/ - que_thr_t* thr, /* in: query thread whose run_node - is an update node */ - const char* name) /* in: table name prefixed with - database name and a '/' character */ -{ - char* buf; - char* ptr; - ulint len = strlen(name) + 1; - - buf = mem_strdupl(name, len); - - ptr = strchr(buf, '/'); - ut_a(ptr); - *ptr = '\0'; - - /* We call a function in ha_innodb.cc */ -#ifndef UNIV_HOTBACKUP - innobase_invalidate_query_cache(thr_get_trx(thr), buf, len); -#endif - mem_free(buf); -} - -/************************************************************************* -Perform referential actions or checks when a parent row is deleted or updated -and the constraint had an ON DELETE or ON UPDATE condition which was not -RESTRICT. */ -static -ulint -row_ins_foreign_check_on_constraint( -/*================================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - or error code */ - que_thr_t* thr, /* in: query thread whose run_node - is an update node */ - dict_foreign_t* foreign, /* in: foreign key constraint whose - type is != 0 */ - btr_pcur_t* pcur, /* in: cursor placed on a matching - index record in the child table */ - dtuple_t* entry, /* in: index entry in the parent - table */ - mtr_t* mtr) /* in: mtr holding the latch of pcur - page */ -{ - upd_node_t* node; - upd_node_t* cascade; - dict_table_t* table = foreign->foreign_table; - dict_index_t* index; - dict_index_t* clust_index; - dtuple_t* ref; - mem_heap_t* upd_vec_heap = NULL; - rec_t* rec; - rec_t* clust_rec; - upd_t* update; - ulint n_to_update; - ulint err; - ulint i; - trx_t* trx; - mem_heap_t* tmp_heap = NULL; - - ut_a(thr); - ut_a(foreign); - ut_a(pcur); - ut_a(mtr); - - trx = thr_get_trx(thr); - - /* Since we are going to delete or update a row, we have to invalidate - the MySQL query cache for table. A deadlock of threads is not possible - here because the caller of this function does not hold any latches with - the sync0sync.h rank above the kernel mutex. The query cache mutex has - a rank just above the kernel mutex. */ - - row_ins_invalidate_query_cache(thr, table->name); - - node = thr->run_node; - - if (node->is_delete && 0 == (foreign->type - & (DICT_FOREIGN_ON_DELETE_CASCADE - | DICT_FOREIGN_ON_DELETE_SET_NULL))) { - - row_ins_foreign_report_err("Trying to delete", - thr, foreign, - btr_pcur_get_rec(pcur), entry); - - return(DB_ROW_IS_REFERENCED); - } - - if (!node->is_delete && 0 == (foreign->type - & (DICT_FOREIGN_ON_UPDATE_CASCADE - | DICT_FOREIGN_ON_UPDATE_SET_NULL))) { - - /* This is an UPDATE */ - - row_ins_foreign_report_err("Trying to update", - thr, foreign, - btr_pcur_get_rec(pcur), entry); - - return(DB_ROW_IS_REFERENCED); - } - - if (node->cascade_node == NULL) { - /* Extend our query graph by creating a child to current - update node. The child is used in the cascade or set null - operation. */ - - node->cascade_heap = mem_heap_create(128); - node->cascade_node = row_create_update_node_for_mysql( - table, node->cascade_heap); - que_node_set_parent(node->cascade_node, node); - } - - /* Initialize cascade_node to do the operation we want. Note that we - use the SAME cascade node to do all foreign key operations of the - SQL DELETE: the table of the cascade node may change if there are - several child tables to the table where the delete is done! */ - - cascade = node->cascade_node; - - cascade->table = table; - - cascade->foreign = foreign; - - if (node->is_delete - && (foreign->type & DICT_FOREIGN_ON_DELETE_CASCADE)) { - cascade->is_delete = TRUE; - } else { - cascade->is_delete = FALSE; - - if (foreign->n_fields > cascade->update_n_fields) { - /* We have to make the update vector longer */ - - cascade->update = upd_create(foreign->n_fields, - node->cascade_heap); - cascade->update_n_fields = foreign->n_fields; - } - } - - /* We do not allow cyclic cascaded updating (DELETE is allowed, - but not UPDATE) of the same table, as this can lead to an infinite - cycle. Check that we are not updating the same table which is - already being modified in this cascade chain. We have to check - this also because the modification of the indexes of a 'parent' - table may still be incomplete, and we must avoid seeing the indexes - of the parent table in an inconsistent state! */ - - if (!cascade->is_delete - && row_ins_cascade_ancestor_updates_table(cascade, table)) { - - /* We do not know if this would break foreign key - constraints, but play safe and return an error */ - - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying an update, possibly causing a cyclic" - " cascaded update\n" - "in the child table,", thr, foreign, - btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - if (row_ins_cascade_n_ancestors(cascade) >= 15) { - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying a too deep cascaded delete or update\n", - thr, foreign, btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - index = btr_pcur_get_btr_cur(pcur)->index; - - ut_a(index == foreign->foreign_index); - - rec = btr_pcur_get_rec(pcur); - - if (index->type & DICT_CLUSTERED) { - /* pcur is already positioned in the clustered index of - the child table */ - - clust_index = index; - clust_rec = rec; - } else { - /* We have to look for the record in the clustered index - in the child table */ - - clust_index = dict_table_get_first_index(table); - - tmp_heap = mem_heap_create(256); - - ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, - tmp_heap); - btr_pcur_open_with_no_init(clust_index, ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - cascade->pcur, 0, mtr); - - clust_rec = btr_pcur_get_rec(cascade->pcur); - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(cascade->pcur) - < dict_index_get_n_unique(clust_index)) { - - fputs("InnoDB: error in cascade of a foreign key op\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, rec, index); - fputs("\n" - "InnoDB: clustered record ", stderr); - rec_print(stderr, clust_rec, clust_index); - fputs("\n" - "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com\n", stderr); - - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - } - - /* Set an X-lock on the row to delete or update in the child table */ - - err = lock_table(0, table, LOCK_IX, thr); - - if (err == DB_SUCCESS) { - /* Here it suffices to use a LOCK_REC_NOT_GAP type lock; - we already have a normal shared lock on the appropriate - gap if the search criterion was not unique */ - - err = lock_clust_rec_read_check_and_lock_alt( - 0, clust_rec, clust_index, LOCK_X, LOCK_REC_NOT_GAP, - thr); - } - - if (err != DB_SUCCESS) { - - goto nonstandard_exit_func; - } - - if (rec_get_deleted_flag(clust_rec, dict_table_is_comp(table))) { - /* This can happen if there is a circular reference of - rows such that cascading delete comes to delete a row - already in the process of being delete marked */ - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - - if ((node->is_delete - && (foreign->type & DICT_FOREIGN_ON_DELETE_SET_NULL)) - || (!node->is_delete - && (foreign->type & DICT_FOREIGN_ON_UPDATE_SET_NULL))) { - - /* Build the appropriate update vector which sets - foreign->n_fields first fields in rec to SQL NULL */ - - update = cascade->update; - - update->info_bits = 0; - update->n_fields = foreign->n_fields; - - for (i = 0; i < foreign->n_fields; i++) { - (update->fields + i)->field_no - = dict_table_get_nth_col_pos( - table, - dict_index_get_nth_col_no(index, i)); - (update->fields + i)->exp = NULL; - (update->fields + i)->new_val.len = UNIV_SQL_NULL; - (update->fields + i)->new_val.data = NULL; - (update->fields + i)->extern_storage = FALSE; - } - } - - if (!node->is_delete - && (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)) { - - /* Build the appropriate update vector which sets changing - foreign->n_fields first fields in rec to new values */ - - upd_vec_heap = mem_heap_create(256); - - n_to_update = row_ins_cascade_calc_update_vec(node, foreign, - upd_vec_heap); - if (n_to_update == ULINT_UNDEFINED) { - err = DB_ROW_IS_REFERENCED; - - row_ins_foreign_report_err( - "Trying a cascaded update where the" - " updated value in the child\n" - "table would not fit in the length" - " of the column, or the value would\n" - "be NULL and the column is" - " declared as not NULL in the child table,", - thr, foreign, btr_pcur_get_rec(pcur), entry); - - goto nonstandard_exit_func; - } - - if (cascade->update->n_fields == 0) { - - /* The update does not change any columns referred - to in this foreign key constraint: no need to do - anything */ - - err = DB_SUCCESS; - - goto nonstandard_exit_func; - } - } - - /* Store pcur position and initialize or store the cascade node - pcur stored position */ - - btr_pcur_store_position(pcur, mtr); - - if (index == clust_index) { - btr_pcur_copy_stored_position(cascade->pcur, pcur); - } else { - btr_pcur_store_position(cascade->pcur, mtr); - } - - mtr_commit(mtr); - - ut_a(cascade->pcur->rel_pos == BTR_PCUR_ON); - - cascade->state = UPD_NODE_UPDATE_CLUSTERED; - - err = row_update_cascade_for_mysql(thr, cascade, - foreign->foreign_table); - - if (foreign->foreign_table->n_foreign_key_checks_running == 0) { - fprintf(stderr, - "InnoDB: error: table %s has the counter 0" - " though there is\n" - "InnoDB: a FOREIGN KEY check running on it.\n", - foreign->foreign_table->name); - } - - /* Release the data dictionary latch for a while, so that we do not - starve other threads from doing CREATE TABLE etc. if we have a huge - cascaded operation running. The counter n_foreign_key_checks_running - will prevent other users from dropping or ALTERing the table when we - release the latch. */ - - row_mysql_unfreeze_data_dictionary(thr_get_trx(thr)); - row_mysql_freeze_data_dictionary(thr_get_trx(thr)); - - mtr_start(mtr); - - /* Restore pcur position */ - - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); - - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - if (upd_vec_heap) { - mem_heap_free(upd_vec_heap); - } - - return(err); - -nonstandard_exit_func: - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - if (upd_vec_heap) { - mem_heap_free(upd_vec_heap); - } - - btr_pcur_store_position(pcur, mtr); - - mtr_commit(mtr); - mtr_start(mtr); - - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); - - return(err); -} - -/************************************************************************* -Sets a shared lock on a record. Used in locking possible duplicate key -records and also in checking foreign key constraints. */ -static -ulint -row_ins_set_shared_rec_lock( -/*========================*/ - /* out: DB_SUCCESS or error code */ - ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP type lock */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock( - 0, rec, index, offsets, LOCK_S, type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, rec, index, offsets, LOCK_S, type, thr); - } - - return(err); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Sets a exclusive lock on a record. Used in locking possible duplicate key -records */ -static -ulint -row_ins_set_exclusive_rec_lock( -/*===========================*/ - /* out: DB_SUCCESS or error code */ - ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or - LOCK_REC_NOT_GAP type lock */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock( - 0, rec, index, offsets, LOCK_X, type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, rec, index, offsets, LOCK_X, type, thr); - } - - return(err); -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************************* -Checks if foreign key constraint fails for an index entry. Sets shared locks -which lock either the success or the failure of the constraint. NOTE that -the caller must have a shared latch on dict_operation_lock. */ - -ulint -row_ins_check_foreign_constraint( -/*=============================*/ - /* out: DB_SUCCESS, - DB_NO_REFERENCED_ROW, - or DB_ROW_IS_REFERENCED */ - ibool check_ref,/* in: TRUE if we want to check that - the referenced table is ok, FALSE if we - want to to check the foreign key table */ - dict_foreign_t* foreign,/* in: foreign constraint; NOTE that the - tables mentioned in it must be in the - dictionary cache if they exist at all */ - dict_table_t* table, /* in: if check_ref is TRUE, then the foreign - table, else the referenced table */ - dtuple_t* entry, /* in: index entry for index */ - que_thr_t* thr) /* in: query thread */ -{ - upd_node_t* upd_node; - dict_table_t* check_table; - dict_index_t* check_index; - ulint n_fields_cmp; - rec_t* rec; - btr_pcur_t pcur; - ibool moved; - int cmp; - ulint err; - ulint i; - mtr_t mtr; - trx_t* trx = thr_get_trx(thr); - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - -run_again: -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - err = DB_SUCCESS; - - if (trx->check_foreigns == FALSE) { - /* The user has suppressed foreign key checks currently for - this session */ - goto exit_func; - } - - /* If any of the foreign key fields in entry is SQL NULL, we - suppress the foreign key check: this is compatible with Oracle, - for example */ - - for (i = 0; i < foreign->n_fields; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - goto exit_func; - } - } - - if (que_node_get_type(thr->run_node) == QUE_NODE_UPDATE) { - upd_node = thr->run_node; - - if (!(upd_node->is_delete) && upd_node->foreign == foreign) { - /* If a cascaded update is done as defined by a - foreign key constraint, do not check that - constraint for the child row. In ON UPDATE CASCADE - the update of the parent row is only half done when - we come here: if we would check the constraint here - for the child row it would fail. - - A QUESTION remains: if in the child table there are - several constraints which refer to the same parent - table, we should merge all updates to the child as - one update? And the updates can be contradictory! - Currently we just perform the update associated - with each foreign key constraint, one after - another, and the user has problems predicting in - which order they are performed. */ - - goto exit_func; - } - } - - if (check_ref) { - check_table = foreign->referenced_table; - check_index = foreign->referenced_index; - } else { - check_table = foreign->foreign_table; - check_index = foreign->foreign_index; - } - - if (check_table == NULL || check_table->ibd_file_missing) { - if (check_ref) { - FILE* ef = dict_foreign_err_file; - - row_ins_set_detailed(trx, foreign); - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - fputs(" Transaction:\n", ef); - trx_print(ef, trx, 600); - fputs("Foreign key constraint fails for table ", ef); - ut_print_name(ef, trx, TRUE, - foreign->foreign_table_name); - fputs(":\n", ef); - dict_print_info_on_foreign_key_in_create_format( - ef, trx, foreign, TRUE); - fputs("\nTrying to add to index ", ef); - ut_print_name(ef, trx, FALSE, - foreign->foreign_index->name); - fputs(" tuple:\n", ef); - dtuple_print(ef, entry); - fputs("\nBut the parent table ", ef); - ut_print_name(ef, trx, TRUE, - foreign->referenced_table_name); - fputs("\nor its .ibd file does" - " not currently exist!\n", ef); - mutex_exit(&dict_foreign_err_mutex); - - err = DB_NO_REFERENCED_ROW; - } - - goto exit_func; - } - - ut_a(check_table); - ut_a(check_index); - - if (check_table != table) { - /* We already have a LOCK_IX on table, but not necessarily - on check_table */ - - err = lock_table(0, check_table, LOCK_IS, thr); - - if (err != DB_SUCCESS) { - - goto do_possible_lock_wait; - } - } - - mtr_start(&mtr); - - /* Store old value on n_fields_cmp */ - - n_fields_cmp = dtuple_get_n_fields_cmp(entry); - - dtuple_set_n_fields_cmp(entry, foreign->n_fields); - - btr_pcur_open(check_index, entry, PAGE_CUR_GE, - BTR_SEARCH_LEAF, &pcur, &mtr); - - /* Scan index records and check if there is a matching record */ - - for (;;) { - rec = btr_pcur_get_rec(&pcur); - - if (page_rec_is_infimum(rec)) { - - goto next_rec; - } - - offsets = rec_get_offsets(rec, check_index, - offsets, ULINT_UNDEFINED, &heap); - - if (page_rec_is_supremum(rec)) { - - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; - } - - goto next_rec; - } - - cmp = cmp_dtuple_rec(entry, rec, offsets); - - if (cmp == 0) { - if (rec_get_deleted_flag(rec, - rec_offs_comp(offsets))) { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, rec, check_index, - offsets, thr); - if (err != DB_SUCCESS) { - - break; - } - } else { - /* Found a matching record. Lock only - a record because we can allow inserts - into gaps */ - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, rec, check_index, - offsets, thr); - - if (err != DB_SUCCESS) { - - break; - } - - if (check_ref) { - err = DB_SUCCESS; - - break; - } else if (foreign->type != 0) { - /* There is an ON UPDATE or ON DELETE - condition: check them in a separate - function */ - - err = row_ins_foreign_check_on_constraint( - thr, foreign, &pcur, entry, - &mtr); - if (err != DB_SUCCESS) { - /* Since reporting a plain - "duplicate key" error - message to the user in - cases where a long CASCADE - operation would lead to a - duplicate key in some - other table is very - confusing, map duplicate - key errors resulting from - FK constraints to a - separate error code. */ - - if (err == DB_DUPLICATE_KEY) { - err = DB_FOREIGN_DUPLICATE_KEY; - } - - break; - } - } else { - row_ins_foreign_report_err( - "Trying to delete or update", - thr, foreign, rec, entry); - - err = DB_ROW_IS_REFERENCED; - break; - } - } - } - - if (cmp < 0) { - err = row_ins_set_shared_rec_lock( - LOCK_GAP, rec, check_index, offsets, thr); - if (err != DB_SUCCESS) { - - break; - } - - if (check_ref) { - err = DB_NO_REFERENCED_ROW; - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - } else { - err = DB_SUCCESS; - } - - break; - } - - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - if (check_ref) { - rec = btr_pcur_get_rec(&pcur); - row_ins_foreign_report_add_err( - trx, foreign, rec, entry); - err = DB_NO_REFERENCED_ROW; - } else { - err = DB_SUCCESS; - } - - break; - } - } - - btr_pcur_close(&pcur); - - mtr_commit(&mtr); - - /* Restore old value */ - dtuple_set_n_fields_cmp(entry, n_fields_cmp); - -do_possible_lock_wait: - if (err == DB_LOCK_WAIT) { - trx->error_state = err; - - que_thr_stop_for_mysql(thr); - - srv_suspend_mysql_thread(thr); - - if (trx->error_state == DB_SUCCESS) { - - goto run_again; - } - - err = trx->error_state; - } - -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/******************************************************************* -Checks if foreign key constraints fail for an index entry. If index -is not mentioned in any constraint, this function does nothing, -Otherwise does searches to the indexes of referenced tables and -sets shared locks which lock either the success or the failure of -a constraint. */ -static -ulint -row_ins_check_foreign_constraints( -/*==============================*/ - /* out: DB_SUCCESS or error code */ - dict_table_t* table, /* in: table */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry for index */ - que_thr_t* thr) /* in: query thread */ -{ - dict_foreign_t* foreign; - ulint err; - trx_t* trx; - ibool got_s_lock = FALSE; - - trx = thr_get_trx(thr); - - foreign = UT_LIST_GET_FIRST(table->foreign_list); - - while (foreign) { - if (foreign->foreign_index == index) { - - if (foreign->referenced_table == NULL) { - dict_table_get(foreign->referenced_table_name, - FALSE); - } - - if (0 == trx->dict_operation_lock_mode) { - got_s_lock = TRUE; - - row_mysql_freeze_data_dictionary(trx); - } - - if (foreign->referenced_table) { - mutex_enter(&(dict_sys->mutex)); - - (foreign->referenced_table - ->n_foreign_key_checks_running)++; - - mutex_exit(&(dict_sys->mutex)); - } - - /* NOTE that if the thread ends up waiting for a lock - we will release dict_operation_lock temporarily! - But the counter on the table protects the referenced - table from being dropped while the check is running. */ - - err = row_ins_check_foreign_constraint( - TRUE, foreign, table, entry, thr); - - if (foreign->referenced_table) { - mutex_enter(&(dict_sys->mutex)); - - ut_a(foreign->referenced_table - ->n_foreign_key_checks_running > 0); - (foreign->referenced_table - ->n_foreign_key_checks_running)--; - - mutex_exit(&(dict_sys->mutex)); - } - - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - - if (err != DB_SUCCESS) { - return(err); - } - } - - foreign = UT_LIST_GET_NEXT(foreign_list, foreign); - } - - return(DB_SUCCESS); -} - -#ifndef UNIV_HOTBACKUP -/******************************************************************* -Checks if a unique key violation to rec would occur at the index entry -insert. */ -static -ibool -row_ins_dupl_error_with_rec( -/*========================*/ - /* out: TRUE if error */ - rec_t* rec, /* in: user record; NOTE that we assume - that the caller already has a record lock on - the record! */ - dtuple_t* entry, /* in: entry to insert */ - dict_index_t* index, /* in: index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - ulint matched_fields; - ulint matched_bytes; - ulint n_unique; - ulint i; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - n_unique = dict_index_get_n_unique(index); - - matched_fields = 0; - matched_bytes = 0; - - cmp_dtuple_rec_with_match(entry, rec, offsets, - &matched_fields, &matched_bytes); - - if (matched_fields < n_unique) { - - return(FALSE); - } - - /* In a unique secondary index we allow equal key values if they - contain SQL NULLs */ - - if (!(index->type & DICT_CLUSTERED)) { - - for (i = 0; i < n_unique; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - return(FALSE); - } - } - } - - return(!rec_get_deleted_flag(rec, rec_offs_comp(offsets))); -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************************* -Scans a unique non-clustered index at a given index entry to determine -whether a uniqueness violation has occurred for the key value of the entry. -Set shared locks on possible duplicate records. */ -static -ulint -row_ins_scan_sec_index_for_duplicate( -/*=================================*/ - /* out: DB_SUCCESS, DB_DUPLICATE_KEY, or - DB_LOCK_WAIT */ - dict_index_t* index, /* in: non-clustered unique index */ - dtuple_t* entry, /* in: index entry */ - que_thr_t* thr) /* in: query thread */ -{ -#ifndef UNIV_HOTBACKUP - ulint n_unique; - ulint i; - int cmp; - ulint n_fields_cmp; - rec_t* rec; - btr_pcur_t pcur; - ulint err = DB_SUCCESS; - ibool moved; - unsigned allow_duplicates; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - n_unique = dict_index_get_n_unique(index); - - /* If the secondary index is unique, but one of the fields in the - n_unique first fields is NULL, a unique key violation cannot occur, - since we define NULL != NULL in this case */ - - for (i = 0; i < n_unique; i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(entry, i))) { - - return(DB_SUCCESS); - } - } - - mtr_start(&mtr); - - /* Store old value on n_fields_cmp */ - - n_fields_cmp = dtuple_get_n_fields_cmp(entry); - - dtuple_set_n_fields_cmp(entry, dict_index_get_n_unique(index)); - - btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr); - - allow_duplicates = thr_get_trx(thr)->duplicates & TRX_DUP_IGNORE; - - /* Scan index records and check if there is a duplicate */ - - for (;;) { - rec = btr_pcur_get_rec(&pcur); - - if (page_rec_is_infimum(rec)) { - - goto next_rec; - } - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (allow_duplicates) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - LOCK_ORDINARY, rec, index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, rec, index, offsets, thr); - } - - if (err != DB_SUCCESS) { - - break; - } - - if (page_rec_is_supremum(rec)) { - - goto next_rec; - } - - cmp = cmp_dtuple_rec(entry, rec, offsets); - - if (cmp == 0) { - if (row_ins_dupl_error_with_rec(rec, entry, - index, offsets)) { - err = DB_DUPLICATE_KEY; - - thr_get_trx(thr)->error_info = index; - - break; - } - } - - if (cmp < 0) { - break; - } - - ut_a(cmp == 0); -next_rec: - moved = btr_pcur_move_to_next(&pcur, &mtr); - - if (!moved) { - break; - } - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - mtr_commit(&mtr); - - /* Restore old value */ - dtuple_set_n_fields_cmp(entry, n_fields_cmp); - - return(err); -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(DB_FAIL); -#endif /* UNIV_HOTBACKUP */ -} - -/******************************************************************* -Checks if a unique key violation error would occur at an index entry -insert. Sets shared locks on possible duplicate records. Works only -for a clustered index! */ -static -ulint -row_ins_duplicate_error_in_clust( -/*=============================*/ - /* out: DB_SUCCESS if no error, - DB_DUPLICATE_KEY if error, DB_LOCK_WAIT if we - have to wait for a lock on a possible - duplicate record */ - btr_cur_t* cursor, /* in: B-tree cursor */ - dtuple_t* entry, /* in: entry to insert */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ -#ifndef UNIV_HOTBACKUP - ulint err; - rec_t* rec; - ulint n_unique; - trx_t* trx = thr_get_trx(thr); - mem_heap_t*heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - UT_NOT_USED(mtr); - - ut_a(cursor->index->type & DICT_CLUSTERED); - ut_ad(cursor->index->type & DICT_UNIQUE); - - /* NOTE: For unique non-clustered indexes there may be any number - of delete marked records with the same value for the non-clustered - index key (remember multiversioning), and which differ only in - the row refererence part of the index record, containing the - clustered index key fields. For such a secondary index record, - to avoid race condition, we must FIRST do the insertion and after - that check that the uniqueness condition is not breached! */ - - /* NOTE: A problem is that in the B-tree node pointers on an - upper level may match more to the entry than the actual existing - user records on the leaf level. So, even if low_match would suggest - that a duplicate key violation may occur, this may not be the case. */ - - n_unique = dict_index_get_n_unique(cursor->index); - - if (cursor->low_match >= n_unique) { - - rec = btr_cur_get_rec(cursor); - - if (!page_rec_is_infimum(rec)) { - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - /* We set a lock on the possible duplicate: this - is needed in logical logging of MySQL to make - sure that in roll-forward we get the same duplicate - errors as in original execution */ - - if (trx->duplicates & TRX_DUP_IGNORE) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, rec, - cursor->index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, rec, - cursor->index, offsets, thr); - } - - if (err != DB_SUCCESS) { - goto func_exit; - } - - if (row_ins_dupl_error_with_rec( - rec, entry, cursor->index, offsets)) { - trx->error_info = cursor->index; - err = DB_DUPLICATE_KEY; - goto func_exit; - } - } - } - - if (cursor->up_match >= n_unique) { - - rec = page_rec_get_next(btr_cur_get_rec(cursor)); - - if (!page_rec_is_supremum(rec)) { - offsets = rec_get_offsets(rec, cursor->index, offsets, - ULINT_UNDEFINED, &heap); - - if (trx->duplicates & TRX_DUP_IGNORE) { - - /* If the SQL-query will update or replace - duplicate key we will take X-lock for - duplicates ( REPLACE, LOAD DATAFILE REPLACE, - INSERT ON DUPLICATE KEY UPDATE). */ - - err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, rec, - cursor->index, offsets, thr); - } else { - - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, rec, - cursor->index, offsets, thr); - } - - if (err != DB_SUCCESS) { - goto func_exit; - } - - if (row_ins_dupl_error_with_rec( - rec, entry, cursor->index, offsets)) { - trx->error_info = cursor->index; - err = DB_DUPLICATE_KEY; - goto func_exit; - } - } - - ut_a(!(cursor->index->type & DICT_CLUSTERED)); - /* This should never happen */ - } - - err = DB_SUCCESS; -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(DB_FAIL); -#endif /* UNIV_HOTBACKUP */ -} - -/******************************************************************* -Checks if an index entry has long enough common prefix with an existing -record so that the intended insert of the entry must be changed to a modify of -the existing record. In the case of a clustered index, the prefix must be -n_unique fields long, and in the case of a secondary index, all fields must be -equal. */ -UNIV_INLINE -ulint -row_ins_must_modify( -/*================*/ - /* out: 0 if no update, ROW_INS_PREV if - previous should be updated; currently we - do the search so that only the low_match - record can match enough to the search tuple, - not the next record */ - btr_cur_t* cursor) /* in: B-tree cursor */ -{ - ulint enough_match; - rec_t* rec; - - /* NOTE: (compare to the note in row_ins_duplicate_error) Because node - pointers on upper levels of the B-tree may match more to entry than - to actual user records on the leaf level, we have to check if the - candidate record is actually a user record. In a clustered index - node pointers contain index->n_unique first fields, and in the case - of a secondary index, all fields of the index. */ - - enough_match = dict_index_get_n_unique_in_tree(cursor->index); - - if (cursor->low_match >= enough_match) { - - rec = btr_cur_get_rec(cursor); - - if (!page_rec_is_infimum(rec)) { - - return(ROW_INS_PREV); - } - } - - return(0); -} - -/******************************************************************* -Tries to insert an index entry to an index. If the index is clustered -and a record with the same unique key is found, the other record is -necessarily marked deleted by a committed transaction, or a unique key -violation error occurs. The delete marked record is then updated to an -existing record, and we must write an undo log record on the delete -marked record. If the index is secondary, and a record with exactly the -same fields is found, the other record is necessarily marked deleted. -It is then unmarked. Otherwise, the entry is just inserted to the index. */ - -ulint -row_ins_index_entry_low( -/*====================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, DB_FAIL - if pessimistic retry needed, or error code */ - ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - que_thr_t* thr) /* in: query thread */ -{ - btr_cur_t cursor; - ulint ignore_sec_unique = 0; - ulint modify = 0; /* remove warning */ - rec_t* insert_rec; - rec_t* rec; - ulint err; - ulint n_unique; - big_rec_t* big_rec = NULL; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - log_free_check(); - - mtr_start(&mtr); - - cursor.thr = thr; - - /* Note that we use PAGE_CUR_LE as the search mode, because then - the function will return in both low_match and up_match of the - cursor sensible values */ - - if (!(thr_get_trx(thr)->check_unique_secondary)) { - ignore_sec_unique = BTR_IGNORE_SEC_UNIQUE; - } - - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - mode | BTR_INSERT | ignore_sec_unique, - &cursor, 0, &mtr); - - if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) { - /* The insertion was made to the insert buffer already during - the search: we are done */ - - err = DB_SUCCESS; - - goto function_exit; - } - -#ifdef UNIV_DEBUG - { - page_t* page = btr_cur_get_page(&cursor); - rec_t* first_rec = page_rec_get_next( - page_get_infimum_rec(page)); - - if (UNIV_LIKELY(first_rec != page_get_supremum_rec(page))) { - ut_a(rec_get_n_fields(first_rec, index) - == dtuple_get_n_fields(entry)); - } - } -#endif - - n_unique = dict_index_get_n_unique(index); - - if (index->type & DICT_UNIQUE && (cursor.up_match >= n_unique - || cursor.low_match >= n_unique)) { - - if (index->type & DICT_CLUSTERED) { - /* Note that the following may return also - DB_LOCK_WAIT */ - - err = row_ins_duplicate_error_in_clust( - &cursor, entry, thr, &mtr); - if (err != DB_SUCCESS) { - - goto function_exit; - } - } else { - mtr_commit(&mtr); - err = row_ins_scan_sec_index_for_duplicate( - index, entry, thr); - mtr_start(&mtr); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - /* We did not find a duplicate and we have now - locked with s-locks the necessary records to - prevent any insertion of a duplicate by another - transaction. Let us now reposition the cursor and - continue the insertion. */ - - btr_cur_search_to_nth_level(index, 0, entry, - PAGE_CUR_LE, - mode | BTR_INSERT, - &cursor, 0, &mtr); - } - } - - modify = row_ins_must_modify(&cursor); - - if (modify != 0) { - /* There is already an index entry with a long enough common - prefix, we must convert the insert into a modify of an - existing record */ - - if (modify == ROW_INS_NEXT) { - rec = page_rec_get_next(btr_cur_get_rec(&cursor)); - - btr_cur_position(index, rec, &cursor); - } - - if (index->type & DICT_CLUSTERED) { - err = row_ins_clust_index_entry_by_modify( - mode, &cursor, &big_rec, entry, - ext_vec, n_ext_vec, thr, &mtr); - } else { - err = row_ins_sec_index_entry_by_modify( - mode, &cursor, entry, thr, &mtr); - } - - } else { - if (mode == BTR_MODIFY_LEAF) { - err = btr_cur_optimistic_insert( - 0, &cursor, entry, &insert_rec, &big_rec, - thr, &mtr); - } else { - ut_a(mode == BTR_MODIFY_TREE); - if (buf_LRU_buf_pool_running_out()) { - - err = DB_LOCK_TABLE_FULL; - - goto function_exit; - } - err = btr_cur_pessimistic_insert( - 0, &cursor, entry, &insert_rec, &big_rec, - thr, &mtr); - } - - if (err == DB_SUCCESS) { - if (ext_vec) { - rec_set_field_extern_bits(insert_rec, index, - ext_vec, n_ext_vec, - &mtr); - } - } - } - -function_exit: - mtr_commit(&mtr); - - if (big_rec) { - rec_t* rec; - mtr_start(&mtr); - - btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, - BTR_MODIFY_TREE, &cursor, 0, &mtr); - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - err = btr_store_big_rec_extern_fields(index, rec, - offsets, big_rec, &mtr); - - if (modify) { - dtuple_big_rec_free(big_rec); - } else { - dtuple_convert_back_big_rec(index, entry, big_rec); - } - - mtr_commit(&mtr); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/******************************************************************* -Inserts an index entry to index. Tries first optimistic, then pessimistic -descent down the tree. If the entry matches enough to a delete marked record, -performs the insert by updating or delete unmarking the delete marked -record. */ - -ulint -row_ins_index_entry( -/*================*/ - /* out: DB_SUCCESS, DB_LOCK_WAIT, - DB_DUPLICATE_KEY, or some other error code */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - if (UT_LIST_GET_FIRST(index->table->foreign_list)) { - err = row_ins_check_foreign_constraints(index->table, index, - entry, thr); - if (err != DB_SUCCESS) { - - return(err); - } - } - - /* Try first optimistic descent to the B-tree */ - - err = row_ins_index_entry_low(BTR_MODIFY_LEAF, index, entry, - ext_vec, n_ext_vec, thr); - if (err != DB_FAIL) { - - return(err); - } - - /* Try then pessimistic descent to the B-tree */ - - err = row_ins_index_entry_low(BTR_MODIFY_TREE, index, entry, - ext_vec, n_ext_vec, thr); - return(err); -} - -/*************************************************************** -Sets the values of the dtuple fields in entry from the values of appropriate -columns in row. */ -static -void -row_ins_index_entry_set_vals( -/*=========================*/ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry to make */ - dtuple_t* row) /* in: row */ -{ - dict_field_t* ind_field; - dfield_t* field; - dfield_t* row_field; - ulint n_fields; - ulint i; - - ut_ad(entry && row); - - n_fields = dtuple_get_n_fields(entry); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(entry, i); - ind_field = dict_index_get_nth_field(index, i); - - row_field = dtuple_get_nth_field(row, ind_field->col->ind); - - /* Check column prefix indexes */ - if (ind_field->prefix_len > 0 - && dfield_get_len(row_field) != UNIV_SQL_NULL) { - - const dict_col_t* col - = dict_field_get_col(ind_field); - - field->len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - ind_field->prefix_len, - row_field->len, row_field->data); - } else { - field->len = row_field->len; - } - - field->data = row_field->data; - } -} - -/*************************************************************** -Inserts a single index entry to the table. */ -static -ulint -row_ins_index_entry_step( -/*=====================*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - ins_node_t* node, /* in: row insert node */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(dtuple_check_typed(node->row)); - - row_ins_index_entry_set_vals(node->index, node->entry, node->row); - - ut_ad(dtuple_check_typed(node->entry)); - - err = row_ins_index_entry(node->index, node->entry, NULL, 0, thr); - - return(err); -} - -/*************************************************************** -Allocates a row id for row and inits the node->index field. */ -UNIV_INLINE -void -row_ins_alloc_row_id_step( -/*======================*/ - ins_node_t* node) /* in: row insert node */ -{ - dulint row_id; - - ut_ad(node->state == INS_NODE_ALLOC_ROW_ID); - - if (dict_table_get_first_index(node->table)->type & DICT_UNIQUE) { - - /* No row id is stored if the clustered index is unique */ - - return; - } - - /* Fill in row id value to row */ - - row_id = dict_sys_get_new_row_id(); - - dict_sys_write_row_id(node->row_id_buf, row_id); -} - -/*************************************************************** -Gets a row to insert from the values list. */ -UNIV_INLINE -void -row_ins_get_row_from_values( -/*========================*/ - ins_node_t* node) /* in: row insert node */ -{ - que_node_t* list_node; - dfield_t* dfield; - dtuple_t* row; - ulint i; - - /* The field values are copied in the buffers of the select node and - it is safe to use them until we fetch from select again: therefore - we can just copy the pointers */ - - row = node->row; - - i = 0; - list_node = node->values_list; - - while (list_node) { - eval_exp(list_node); - - dfield = dtuple_get_nth_field(row, i); - dfield_copy_data(dfield, que_node_get_val(list_node)); - - i++; - list_node = que_node_get_next(list_node); - } -} - -/*************************************************************** -Gets a row to insert from the select list. */ -UNIV_INLINE -void -row_ins_get_row_from_select( -/*========================*/ - ins_node_t* node) /* in: row insert node */ -{ - que_node_t* list_node; - dfield_t* dfield; - dtuple_t* row; - ulint i; - - /* The field values are copied in the buffers of the select node and - it is safe to use them until we fetch from select again: therefore - we can just copy the pointers */ - - row = node->row; - - i = 0; - list_node = node->select->select_list; - - while (list_node) { - dfield = dtuple_get_nth_field(row, i); - dfield_copy_data(dfield, que_node_get_val(list_node)); - - i++; - list_node = que_node_get_next(list_node); - } -} - -/*************************************************************** -Inserts a row to a table. */ - -ulint -row_ins( -/*====*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - ins_node_t* node, /* in: row insert node */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad(node && thr); - - if (node->state == INS_NODE_ALLOC_ROW_ID) { - - row_ins_alloc_row_id_step(node); - - node->index = dict_table_get_first_index(node->table); - node->entry = UT_LIST_GET_FIRST(node->entry_list); - - if (node->ins_type == INS_SEARCHED) { - - row_ins_get_row_from_select(node); - - } else if (node->ins_type == INS_VALUES) { - - row_ins_get_row_from_values(node); - } - - node->state = INS_NODE_INSERT_ENTRIES; - } - - ut_ad(node->state == INS_NODE_INSERT_ENTRIES); - - while (node->index != NULL) { - err = row_ins_index_entry_step(node, thr); - - if (err != DB_SUCCESS) { - - return(err); - } - - node->index = dict_table_get_next_index(node->index); - node->entry = UT_LIST_GET_NEXT(tuple_list, node->entry); - } - - ut_ad(node->entry == NULL); - - node->state = INS_NODE_ALLOC_ROW_ID; - - return(DB_SUCCESS); -} - -/*************************************************************** -Inserts a row to a table. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -row_ins_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - ins_node_t* node; - que_node_t* parent; - sel_node_t* sel_node; - trx_t* trx; - ulint err; - - ut_ad(thr); - - trx = thr_get_trx(thr); - - trx_start_if_not_started(trx); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_INSERT); - - parent = que_node_get_parent(node); - sel_node = node->select; - - if (thr->prev_node == parent) { - node->state = INS_NODE_SET_IX_LOCK; - } - - /* If this is the first time this node is executed (or when - execution resumes after wait for the table IX lock), set an - IX lock on the table and reset the possible select node. MySQL's - partitioned table code may also call an insert within the same - SQL statement AFTER it has used this table handle to do a search. - This happens, for example, when a row update moves it to another - partition. In that case, we have already set the IX lock on the - table during the search operation, and there is no need to set - it again here. But we must write trx->id to node->trx_id_buf. */ - - trx_write_trx_id(node->trx_id_buf, trx->id); - - if (node->state == INS_NODE_SET_IX_LOCK) { - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - if (UT_DULINT_EQ(trx->id, node->trx_id)) { - /* No need to do IX-locking */ - - goto same_trx; - } - - err = lock_table(0, node->table, LOCK_IX, thr); - - if (err != DB_SUCCESS) { - - goto error_handling; - } - - node->trx_id = trx->id; -same_trx: - node->state = INS_NODE_ALLOC_ROW_ID; - - if (node->ins_type == INS_SEARCHED) { - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch a row to insert */ - - thr->run_node = sel_node; - - return(thr); - } - } - - if ((node->ins_type == INS_SEARCHED) - && (sel_node->state != SEL_NODE_FETCH)) { - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to insert */ - thr->run_node = parent; - - return(thr); - } - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = row_ins(node, thr); - -error_handling: - trx->error_state = err; - - if (err != DB_SUCCESS) { - /* err == DB_LOCK_WAIT or SQL error detected */ - return(NULL); - } - - /* DO THE TRIGGER ACTIONS HERE */ - - if (node->ins_type == INS_SEARCHED) { - /* Fetch a row to insert */ - - thr->run_node = sel_node; - } else { - thr->run_node = que_node_get_parent(node); - } - - return(thr); -} diff --git a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c deleted file mode 100644 index 2d9ed4fc944..00000000000 --- a/storage/innobase/row/row0mysql.c +++ /dev/null @@ -1,4199 +0,0 @@ -/****************************************************** -Interface between Innobase row operations and MySQL. -Contains also create table and other data dictionary operations. - -(c) 2000 Innobase Oy - -Created 9/17/2000 Heikki Tuuri -*******************************************************/ - -#include "row0mysql.h" - -#ifdef UNIV_NONINL -#include "row0mysql.ic" -#endif - -#include "row0ins.h" -#include "row0sel.h" -#include "row0upd.h" -#include "row0row.h" -#include "que0que.h" -#include "pars0pars.h" -#include "dict0dict.h" -#include "dict0crea.h" -#include "dict0load.h" -#include "dict0boot.h" -#include "trx0roll.h" -#include "trx0purge.h" -#include "lock0lock.h" -#include "rem0cmp.h" -#include "log0log.h" -#include "btr0sea.h" -#include "fil0fil.h" -#include "ibuf0ibuf.h" - -/* A dummy variable used to fool the compiler */ -ibool row_mysql_identically_false = FALSE; - -/* Provide optional 4.x backwards compatibility for 5.0 and above */ -ibool row_rollback_on_timeout = FALSE; - -/* List of tables we should drop in background. ALTER TABLE in MySQL requires -that the table handler can drop the table in background when there are no -queries to it any more. Protected by the kernel mutex. */ -typedef struct row_mysql_drop_struct row_mysql_drop_t; -struct row_mysql_drop_struct{ - char* table_name; - UT_LIST_NODE_T(row_mysql_drop_t) row_mysql_drop_list; -}; - -UT_LIST_BASE_NODE_T(row_mysql_drop_t) row_mysql_drop_list; -ibool row_mysql_drop_list_inited = FALSE; - -/* Magic table names for invoking various monitor threads */ -static const char S_innodb_monitor[] = "innodb_monitor"; -static const char S_innodb_lock_monitor[] = "innodb_lock_monitor"; -static const char S_innodb_tablespace_monitor[] = "innodb_tablespace_monitor"; -static const char S_innodb_table_monitor[] = "innodb_table_monitor"; -static const char S_innodb_mem_validate[] = "innodb_mem_validate"; - -/* Evaluates to true if str1 equals str2_onstack, used for comparing -the above strings. */ -#define STR_EQ(str1, str1_len, str2_onstack) \ - ((str1_len) == sizeof(str2_onstack) \ - && memcmp(str1, str2_onstack, sizeof(str2_onstack)) == 0) - -/*********************************************************************** -Determine if the given name is a name reserved for MySQL system tables. */ -static -ibool -row_mysql_is_system_table( -/*======================*/ - /* out: TRUE if name is a MySQL - system table name */ - const char* name) -{ - if (strncmp(name, "mysql/", 6) != 0) { - - return(FALSE); - } - - return(0 == strcmp(name + 6, "host") - || 0 == strcmp(name + 6, "user") - || 0 == strcmp(name + 6, "db")); -} - -/*********************************************************************** -Delays an INSERT, DELETE or UPDATE operation if the purge is lagging. */ -static -void -row_mysql_delay_if_needed(void) -/*===========================*/ -{ - if (srv_dml_needed_delay) { - os_thread_sleep(srv_dml_needed_delay); - } -} - -/*********************************************************************** -Frees the blob heap in prebuilt when no longer needed. */ - -void -row_mysql_prebuilt_free_blob_heap( -/*==============================*/ - row_prebuilt_t* prebuilt) /* in: prebuilt struct of a - ha_innobase:: table handle */ -{ - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; -} - -/*********************************************************************** -Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row -format. */ - -byte* -row_mysql_store_true_var_len( -/*=========================*/ - /* out: pointer to the data, we skip the 1 or 2 bytes - at the start that are used to store the len */ - byte* dest, /* in: where to store */ - ulint len, /* in: length, must fit in two bytes */ - ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */ -{ - if (lenlen == 2) { - ut_a(len < 256 * 256); - - mach_write_to_2_little_endian(dest, len); - - return(dest + 2); - } - - ut_a(lenlen == 1); - ut_a(len < 256); - - mach_write_to_1(dest, len); - - return(dest + 1); -} - -/*********************************************************************** -Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and -returns a pointer to the data. */ - -byte* -row_mysql_read_true_varchar( -/*========================*/ - /* out: pointer to the data, we skip the 1 or 2 bytes - at the start that are used to store the len */ - ulint* len, /* out: variable-length field length */ - byte* field, /* in: field in the MySQL format */ - ulint lenlen) /* in: storage length of len: either 1 or 2 bytes */ -{ - if (lenlen == 2) { - *len = mach_read_from_2_little_endian(field); - - return(field + 2); - } - - ut_a(lenlen == 1); - - *len = mach_read_from_1(field); - - return(field + 1); -} - -/*********************************************************************** -Stores a reference to a BLOB in the MySQL format. */ - -void -row_mysql_store_blob_ref( -/*=====================*/ - byte* dest, /* in: where to store */ - ulint col_len, /* in: dest buffer size: determines into - how many bytes the BLOB length is stored, - the space for the length may vary from 1 - to 4 bytes */ - byte* data, /* in: BLOB data; if the value to store - is SQL NULL this should be NULL pointer */ - ulint len) /* in: BLOB length; if the value to store - is SQL NULL this should be 0; remember - also to set the NULL bit in the MySQL record - header! */ -{ - /* MySQL might assume the field is set to zero except the length and - the pointer fields */ - - memset(dest, '\0', col_len); - - /* In dest there are 1 - 4 bytes reserved for the BLOB length, - and after that 8 bytes reserved for the pointer to the data. - In 32-bit architectures we only use the first 4 bytes of the pointer - slot. */ - - ut_a(col_len - 8 > 1 || len < 256); - ut_a(col_len - 8 > 2 || len < 256 * 256); - ut_a(col_len - 8 > 3 || len < 256 * 256 * 256); - - mach_write_to_n_little_endian(dest, col_len - 8, len); - - ut_memcpy(dest + col_len - 8, &data, sizeof(byte*)); -} - -/*********************************************************************** -Reads a reference to a BLOB in the MySQL format. */ - -byte* -row_mysql_read_blob_ref( -/*====================*/ - /* out: pointer to BLOB data */ - ulint* len, /* out: BLOB length */ - byte* ref, /* in: BLOB reference in the MySQL format */ - ulint col_len) /* in: BLOB reference length (not BLOB - length) */ -{ - byte* data; - - *len = mach_read_from_n_little_endian(ref, col_len - 8); - - ut_memcpy(&data, ref + col_len - 8, sizeof(byte*)); - - return(data); -} - -/****************************************************************** -Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format. -The counterpart of this function is row_sel_field_store_in_mysql_format() in -row0sel.c. */ - -byte* -row_mysql_store_col_in_innobase_format( -/*===================================*/ - /* out: up to which byte we used - buf in the conversion */ - dfield_t* dfield, /* in/out: dfield where dtype - information must be already set when - this function is called! */ - byte* buf, /* in/out: buffer for a converted - integer value; this must be at least - col_len long then! */ - ibool row_format_col, /* TRUE if the mysql_data is from - a MySQL row, FALSE if from a MySQL - key value; - in MySQL, a true VARCHAR storage - format differs in a row and in a - key value: in a key value the length - is always stored in 2 bytes! */ - byte* mysql_data, /* in: MySQL column value, not - SQL NULL; NOTE that dfield may also - get a pointer to mysql_data, - therefore do not discard this as long - as dfield is used! */ - ulint col_len, /* in: MySQL column length; NOTE that - this is the storage length of the - column in the MySQL format row, not - necessarily the length of the actual - payload data; if the column is a true - VARCHAR then this is irrelevant */ - ulint comp) /* in: nonzero=compact format */ -{ - byte* ptr = mysql_data; - dtype_t* dtype; - ulint type; - ulint lenlen; - - dtype = dfield_get_type(dfield); - - type = dtype->mtype; - - if (type == DATA_INT) { - /* Store integer data in Innobase in a big-endian format, - sign bit negated if the data is a signed integer. In MySQL, - integers are stored in a little-endian format. */ - - ptr = buf + col_len; - - for (;;) { - ptr--; - *ptr = *mysql_data; - if (ptr == buf) { - break; - } - mysql_data++; - } - - if (!(dtype->prtype & DATA_UNSIGNED)) { - - *ptr = (byte) (*ptr ^ 128); - } - - buf += col_len; - } else if ((type == DATA_VARCHAR - || type == DATA_VARMYSQL - || type == DATA_BINARY)) { - - if (dtype_get_mysql_type(dtype) == DATA_MYSQL_TRUE_VARCHAR) { - /* The length of the actual data is stored to 1 or 2 - bytes at the start of the field */ - - if (row_format_col) { - if (dtype->prtype & DATA_LONG_TRUE_VARCHAR) { - lenlen = 2; - } else { - lenlen = 1; - } - } else { - /* In a MySQL key value, lenlen is always 2 */ - lenlen = 2; - } - - ptr = row_mysql_read_true_varchar(&col_len, mysql_data, - lenlen); - } else { - /* Remove trailing spaces from old style VARCHAR - columns. */ - - /* Handle UCS2 strings differently. */ - ulint mbminlen = dtype_get_mbminlen(dtype); - - ptr = mysql_data; - - if (mbminlen == 2) { - /* space=0x0020 */ - /* Trim "half-chars", just in case. */ - col_len &= ~1; - - while (col_len >= 2 && ptr[col_len - 2] == 0x00 - && ptr[col_len - 1] == 0x20) { - col_len -= 2; - } - } else { - ut_a(mbminlen == 1); - /* space=0x20 */ - while (col_len > 0 - && ptr[col_len - 1] == 0x20) { - col_len--; - } - } - } - } else if (comp && type == DATA_MYSQL - && dtype_get_mbminlen(dtype) == 1 - && dtype_get_mbmaxlen(dtype) > 1) { - /* In some cases we strip trailing spaces from UTF-8 and other - multibyte charsets, from FIXED-length CHAR columns, to save - space. UTF-8 would otherwise normally use 3 * the string length - bytes to store an ASCII string! */ - - /* We assume that this CHAR field is encoded in a - variable-length character set where spaces have - 1:1 correspondence to 0x20 bytes, such as UTF-8. - - Consider a CHAR(n) field, a field of n characters. - It will contain between n * mbminlen and n * mbmaxlen bytes. - We will try to truncate it to n bytes by stripping - space padding. If the field contains single-byte - characters only, it will be truncated to n characters. - Consider a CHAR(5) field containing the string ".a " - where "." denotes a 3-byte character represented by - the bytes "$%&". After our stripping, the string will - be stored as "$%&a " (5 bytes). The string ".abc " - will be stored as "$%&abc" (6 bytes). - - The space padding will be restored in row0sel.c, function - row_sel_field_store_in_mysql_format(). */ - - ulint n_chars; - - ut_a(!(dtype_get_len(dtype) % dtype_get_mbmaxlen(dtype))); - - n_chars = dtype_get_len(dtype) / dtype_get_mbmaxlen(dtype); - - /* Strip space padding. */ - while (col_len > n_chars && ptr[col_len - 1] == 0x20) { - col_len--; - } - } else if (type == DATA_BLOB && row_format_col) { - - ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len); - } - - dfield_set_data(dfield, ptr, col_len); - - return(buf); -} - -/****************************************************************** -Convert a row in the MySQL format to a row in the Innobase format. Note that -the function to convert a MySQL format key value to an InnoDB dtuple is -row_sel_convert_mysql_key_to_innobase() in row0sel.c. */ -static -void -row_mysql_convert_row_to_innobase( -/*==============================*/ - dtuple_t* row, /* in/out: Innobase row where the - field type information is already - copied there! */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct where template - must be of type ROW_MYSQL_WHOLE_ROW */ - byte* mysql_rec) /* in: row in the MySQL format; - NOTE: do not discard as long as - row is used, as row may contain - pointers to this record! */ -{ - mysql_row_templ_t* templ; - dfield_t* dfield; - ulint i; - - ut_ad(prebuilt->template_type == ROW_MYSQL_WHOLE_ROW); - ut_ad(prebuilt->mysql_template); - - for (i = 0; i < prebuilt->n_template; i++) { - - templ = prebuilt->mysql_template + i; - dfield = dtuple_get_nth_field(row, i); - - if (templ->mysql_null_bit_mask != 0) { - /* Column may be SQL NULL */ - - if (mysql_rec[templ->mysql_null_byte_offset] - & (byte) (templ->mysql_null_bit_mask)) { - - /* It is SQL NULL */ - - dfield_set_data(dfield, NULL, UNIV_SQL_NULL); - - goto next_column; - } - } - - row_mysql_store_col_in_innobase_format( - dfield, - prebuilt->ins_upd_rec_buff + templ->mysql_col_offset, - TRUE, /* MySQL row format data */ - mysql_rec + templ->mysql_col_offset, - templ->mysql_col_len, - dict_table_is_comp(prebuilt->table)); -next_column: - ; - } -} - -/******************************************************************** -Handles user errors and lock waits detected by the database engine. */ - -ibool -row_mysql_handle_errors( -/*====================*/ - /* out: TRUE if it was a lock wait and - we should continue running the query thread */ - ulint* new_err,/* out: possible new error encountered in - lock wait, or if no new error, the value - of trx->error_state at the entry of this - function */ - trx_t* trx, /* in: transaction */ - que_thr_t* thr, /* in: query thread */ - trx_savept_t* savept) /* in: savepoint or NULL */ -{ -#ifndef UNIV_HOTBACKUP - ulint err; - -handle_new_error: - err = trx->error_state; - - ut_a(err != DB_SUCCESS); - - trx->error_state = DB_SUCCESS; - - if ((err == DB_DUPLICATE_KEY) - || (err == DB_FOREIGN_DUPLICATE_KEY)) { - if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ - - trx_general_rollback_for_mysql(trx, TRUE, savept); - } - } else if (err == DB_TOO_BIG_RECORD) { - if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ - - trx_general_rollback_for_mysql(trx, TRUE, savept); - } - /* MySQL will roll back the latest SQL statement */ - } else if (err == DB_ROW_IS_REFERENCED - || err == DB_NO_REFERENCED_ROW - || err == DB_CANNOT_ADD_CONSTRAINT - || err == DB_TOO_MANY_CONCURRENT_TRXS) { - if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ - - trx_general_rollback_for_mysql(trx, TRUE, savept); - } - /* MySQL will roll back the latest SQL statement */ - } else if (err == DB_LOCK_WAIT) { - - srv_suspend_mysql_thread(thr); - - if (trx->error_state != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - goto handle_new_error; - } - - *new_err = err; - - return(TRUE); - - } else if (err == DB_DEADLOCK - || err == DB_LOCK_TABLE_FULL - || (err == DB_LOCK_WAIT_TIMEOUT - && row_rollback_on_timeout)) { - /* Roll back the whole transaction; this resolution was added - to version 3.23.43 */ - - trx_general_rollback_for_mysql(trx, FALSE, NULL); - - } else if (err == DB_OUT_OF_FILE_SPACE - || err == DB_LOCK_WAIT_TIMEOUT) { - - ut_ad(!(err == DB_LOCK_WAIT_TIMEOUT - && row_rollback_on_timeout)); - - if (savept) { - /* Roll back the latest, possibly incomplete - insertion or update */ - - trx_general_rollback_for_mysql(trx, TRUE, savept); - } - /* MySQL will roll back the latest SQL statement */ - - } else if (err == DB_MUST_GET_MORE_FILE_SPACE) { - - fputs("InnoDB: The database cannot continue" - " operation because of\n" - "InnoDB: lack of space. You must add" - " a new data file to\n" - "InnoDB: my.cnf and restart the database.\n", stderr); - - exit(1); - } else if (err == DB_CORRUPTION) { - - fputs("InnoDB: We detected index corruption" - " in an InnoDB type table.\n" - "InnoDB: You have to dump + drop + reimport" - " the table or, in\n" - "InnoDB: a case of widespread corruption," - " dump all InnoDB\n" - "InnoDB: tables and recreate the" - " whole InnoDB tablespace.\n" - "InnoDB: If the mysqld server crashes" - " after the startup or when\n" - "InnoDB: you dump the tables, look at\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html" - " for help.\n", stderr); - - } else { - fprintf(stderr, "InnoDB: unknown error code %lu\n", - (ulong) err); - ut_error; - } - - if (trx->error_state != DB_SUCCESS) { - *new_err = trx->error_state; - } else { - *new_err = err; - } - - trx->error_state = DB_SUCCESS; - - return(FALSE); -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(FALSE); -#endif /* UNIV_HOTBACKUP */ -} - -/************************************************************************ -Create a prebuilt struct for a MySQL table handle. */ - -row_prebuilt_t* -row_create_prebuilt( -/*================*/ - /* out, own: a prebuilt struct */ - dict_table_t* table) /* in: Innobase table handle */ -{ - row_prebuilt_t* prebuilt; - mem_heap_t* heap; - dict_index_t* clust_index; - dtuple_t* ref; - ulint ref_len; - ulint i; - - heap = mem_heap_create(128); - - prebuilt = mem_heap_alloc(heap, sizeof(row_prebuilt_t)); - - prebuilt->magic_n = ROW_PREBUILT_ALLOCATED; - prebuilt->magic_n2 = ROW_PREBUILT_ALLOCATED; - - prebuilt->table = table; - - prebuilt->trx = NULL; - - prebuilt->sql_stat_start = TRUE; - - prebuilt->mysql_has_locked = FALSE; - - prebuilt->index = NULL; - - prebuilt->used_in_HANDLER = FALSE; - - prebuilt->n_template = 0; - prebuilt->mysql_template = NULL; - - prebuilt->heap = heap; - prebuilt->ins_node = NULL; - - prebuilt->ins_upd_rec_buff = NULL; - prebuilt->default_rec = NULL; - - prebuilt->upd_node = NULL; - prebuilt->ins_graph = NULL; - prebuilt->upd_graph = NULL; - - prebuilt->pcur = btr_pcur_create_for_mysql(); - prebuilt->clust_pcur = btr_pcur_create_for_mysql(); - - prebuilt->select_lock_type = LOCK_NONE; - prebuilt->stored_select_lock_type = 99999999; - - prebuilt->row_read_type = ROW_READ_WITH_LOCKS; - - prebuilt->sel_graph = NULL; - - prebuilt->search_tuple = dtuple_create( - heap, 2 * dict_table_get_n_cols(table)); - - clust_index = dict_table_get_first_index(table); - - /* Make sure that search_tuple is long enough for clustered index */ - ut_a(2 * dict_table_get_n_cols(table) >= clust_index->n_fields); - - ref_len = dict_index_get_n_unique(clust_index); - - ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(ref, clust_index, ref_len); - - prebuilt->clust_ref = ref; - - for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { - prebuilt->fetch_cache[i] = NULL; - } - - prebuilt->n_fetch_cached = 0; - - prebuilt->blob_heap = NULL; - - prebuilt->old_vers_heap = NULL; - - prebuilt->autoinc_error = 0; - prebuilt->autoinc_offset = 0; - - /* Default to 1, we will set the actual value later in - ha_innobase::get_auto_increment(). */ - prebuilt->autoinc_increment = 1; - - prebuilt->autoinc_last_value = 0; - - return(prebuilt); -} - -/************************************************************************ -Free a prebuilt struct for a MySQL table handle. */ - -void -row_prebuilt_free( -/*==============*/ - row_prebuilt_t* prebuilt) /* in, own: prebuilt struct */ -{ - ulint i; - - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED - || prebuilt->magic_n2 != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu," - " magic n2 %lu, table name", - (ulong) prebuilt->magic_n, - (ulong) prebuilt->magic_n2); - ut_print_name(stderr, NULL, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - prebuilt->magic_n = ROW_PREBUILT_FREED; - prebuilt->magic_n2 = ROW_PREBUILT_FREED; - - btr_pcur_free_for_mysql(prebuilt->pcur); - btr_pcur_free_for_mysql(prebuilt->clust_pcur); - - if (prebuilt->mysql_template) { - mem_free(prebuilt->mysql_template); - } - - if (prebuilt->ins_graph) { - que_graph_free_recursive(prebuilt->ins_graph); - } - - if (prebuilt->sel_graph) { - que_graph_free_recursive(prebuilt->sel_graph); - } - - if (prebuilt->upd_graph) { - que_graph_free_recursive(prebuilt->upd_graph); - } - - if (prebuilt->blob_heap) { - mem_heap_free(prebuilt->blob_heap); - } - - if (prebuilt->old_vers_heap) { - mem_heap_free(prebuilt->old_vers_heap); - } - - for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { - if (prebuilt->fetch_cache[i] != NULL) { - - if ((ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4( - (prebuilt->fetch_cache[i]) - 4)) - || (ROW_PREBUILT_FETCH_MAGIC_N != mach_read_from_4( - (prebuilt->fetch_cache[i]) - + prebuilt->mysql_row_len))) { - fputs("InnoDB: Error: trying to free" - " a corrupt fetch buffer.\n", stderr); - - mem_analyze_corruption( - prebuilt->fetch_cache[i]); - - ut_error; - } - - mem_free((prebuilt->fetch_cache[i]) - 4); - } - } - - dict_table_decrement_handle_count(prebuilt->table); - - mem_heap_free(prebuilt->heap); -} - -/************************************************************************* -Updates the transaction pointers in query graphs stored in the prebuilt -struct. */ - -void -row_update_prebuilt_trx( -/*====================*/ - /* out: prebuilt dtuple */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL - handle */ - trx_t* trx) /* in: transaction handle */ -{ - if (trx->magic_n != TRX_MAGIC_N) { - fprintf(stderr, - "InnoDB: Error: trying to use a corrupt\n" - "InnoDB: trx handle. Magic n %lu\n", - (ulong) trx->magic_n); - - mem_analyze_corruption(trx); - - ut_error; - } - - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to use a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, NULL, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - prebuilt->trx = trx; - - if (prebuilt->ins_graph) { - prebuilt->ins_graph->trx = trx; - } - - if (prebuilt->upd_graph) { - prebuilt->upd_graph->trx = trx; - } - - if (prebuilt->sel_graph) { - prebuilt->sel_graph->trx = trx; - } -} - -/************************************************************************* -Gets pointer to a prebuilt dtuple used in insertions. If the insert graph -has not yet been built in the prebuilt struct, then this function first -builds it. */ -static -dtuple_t* -row_get_prebuilt_insert_row( -/*========================*/ - /* out: prebuilt dtuple; the column - type information is also set in it */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - ins_node_t* node; - dtuple_t* row; - dict_table_t* table = prebuilt->table; - ulint i; - - ut_ad(prebuilt && table && prebuilt->trx); - - if (prebuilt->ins_node == NULL) { - - /* Not called before for this handle: create an insert node - and query graph to the prebuilt struct */ - - node = ins_node_create(INS_DIRECT, table, prebuilt->heap); - - prebuilt->ins_node = node; - - if (prebuilt->ins_upd_rec_buff == NULL) { - prebuilt->ins_upd_rec_buff = mem_heap_alloc( - prebuilt->heap, prebuilt->mysql_row_len); - } - - row = dtuple_create(prebuilt->heap, - dict_table_get_n_cols(table)); - - dict_table_copy_types(row, table); - - /* We init the value of every field to the SQL NULL to avoid - a debug assertion from failing */ - - for (i = 0; i < dtuple_get_n_fields(row); i++) { - - dtuple_get_nth_field(row, i)->len = UNIV_SQL_NULL; - } - - ins_node_set_new_row(node, row); - - prebuilt->ins_graph = que_node_get_parent( - pars_complete_graph_for_exec(node, - prebuilt->trx, - prebuilt->heap)); - prebuilt->ins_graph->state = QUE_FORK_ACTIVE; - } - - return(prebuilt->ins_node->row); -} - -/************************************************************************* -Updates the table modification counter and calculates new estimates -for table and index statistics if necessary. */ -UNIV_INLINE -void -row_update_statistics_if_needed( -/*============================*/ - dict_table_t* table) /* in: table */ -{ - ulint counter; - - counter = table->stat_modified_counter; - - table->stat_modified_counter = counter + 1; - - /* Calculate new statistics if 1 / 16 of table has been modified - since the last time a statistics batch was run, or if - stat_modified_counter > 2 000 000 000 (to avoid wrap-around). - We calculate statistics at most every 16th round, since we may have - a counter table which is very small and updated very often. */ - - if (counter > 2000000000 - || ((ib_longlong)counter > 16 + table->stat_n_rows / 16)) { - - dict_update_statistics(table); - } -} - -/************************************************************************* -Unlocks an AUTO_INC type lock possibly reserved by trx. */ - -void -row_unlock_table_autoinc_for_mysql( -/*===============================*/ - trx_t* trx) /* in: transaction */ -{ - if (!trx->auto_inc_lock) { - - return; - } - - lock_table_unlock_auto_inc(trx); -} - -/************************************************************************* -Sets an AUTO_INC type lock on the table mentioned in prebuilt. The -AUTO_INC lock gives exclusive access to the auto-inc counter of the -table. The lock is reserved only for the duration of an SQL statement. -It is not compatible with another AUTO_INC or exclusive lock on the -table. */ - -int -row_lock_table_autoinc_for_mysql( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in the MySQL - table handle */ -{ - trx_t* trx = prebuilt->trx; - ins_node_t* node = prebuilt->ins_node; - que_thr_t* thr; - ulint err; - ibool was_lock_wait; - - ut_ad(trx); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - if (trx->auto_inc_lock) { - - return(DB_SUCCESS); - } - - trx->op_info = "setting auto-inc lock"; - - if (node == NULL) { - row_get_prebuilt_insert_row(prebuilt); - node = prebuilt->ins_node; - } - - /* We use the insert query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr(prebuilt->ins_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started(trx); - - err = lock_table(0, prebuilt->table, LOCK_AUTO_INC, thr); - - trx->error_state = err; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return((int) err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Sets a table lock on the table mentioned in prebuilt. */ - -int -row_lock_table_for_mysql( -/*=====================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in the MySQL - table handle */ - dict_table_t* table, /* in: table to lock, or NULL - if prebuilt->table should be - locked as - prebuilt->select_lock_type */ - ulint mode) /* in: lock mode of table - (ignored if table==NULL) */ -{ - trx_t* trx = prebuilt->trx; - que_thr_t* thr; - ulint err; - ibool was_lock_wait; - - ut_ad(trx); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - trx->op_info = "setting table lock"; - - if (prebuilt->sel_graph == NULL) { - /* Build a dummy select query graph */ - row_prebuild_sel_graph(prebuilt); - } - - /* We use the select query graph as the dummy graph needed - in the lock module call */ - - thr = que_fork_get_first_thr(prebuilt->sel_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = thr; - thr->prev_node = thr->common.parent; - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started(trx); - - if (table) { - err = lock_table(0, table, mode, thr); - } else { - err = lock_table(0, prebuilt->table, - prebuilt->select_lock_type, thr); - } - - trx->error_state = err; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, NULL); - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return((int) err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Does an insert for MySQL. */ - -int -row_insert_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - byte* mysql_rec, /* in: row in the MySQL format */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - trx_savept_t savept; - que_thr_t* thr; - ulint err; - ibool was_lock_wait; - trx_t* trx = prebuilt->trx; - ins_node_t* node = prebuilt->ins_node; - - ut_ad(trx); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - if (prebuilt->table->ibd_file_missing) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error:\n" - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir, or have you" - " used DISCARD TABLESPACE?\n" - "InnoDB: Look from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - prebuilt->table->name); - return(DB_ERROR); - } - - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, prebuilt->trx, TRUE, - prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - if (srv_created_new_raw || srv_force_recovery) { - fputs("InnoDB: A new raw disk partition was initialized or\n" - "InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that" - " newraw is replaced\n" - "InnoDB: with raw, and innodb_force_... is removed.\n", - stderr); - - return(DB_ERROR); - } - - trx->op_info = "inserting"; - - row_mysql_delay_if_needed(); - - trx_start_if_not_started(trx); - - if (node == NULL) { - row_get_prebuilt_insert_row(prebuilt); - node = prebuilt->ins_node; - } - - row_mysql_convert_row_to_innobase(node->row, prebuilt, mysql_rec); - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(prebuilt->ins_graph); - - if (prebuilt->sql_stat_start) { - node->state = INS_NODE_SET_IX_LOCK; - prebuilt->sql_stat_start = FALSE; - } else { - node->state = INS_NODE_ALLOC_ROW_ID; - } - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - row_ins_step(thr); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - /* TODO: what is this? */ thr->lock_state= QUE_THR_LOCK_ROW; - - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, - &savept); - thr->lock_state= QUE_THR_LOCK_NOLOCK; - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return((int) err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - prebuilt->table->stat_n_rows++; - - srv_n_rows_inserted++; - - if (prebuilt->table->stat_n_rows == 0) { - /* Avoid wrap-over */ - prebuilt->table->stat_n_rows--; - } - - row_update_statistics_if_needed(prebuilt->table); - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Builds a dummy query graph used in selects. */ - -void -row_prebuild_sel_graph( -/*===================*/ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - sel_node_t* node; - - ut_ad(prebuilt && prebuilt->trx); - - if (prebuilt->sel_graph == NULL) { - - node = sel_node_create(prebuilt->heap); - - prebuilt->sel_graph = que_node_get_parent( - pars_complete_graph_for_exec(node, - prebuilt->trx, - prebuilt->heap)); - - prebuilt->sel_graph->state = QUE_FORK_ACTIVE; - } -} - -/************************************************************************* -Creates an query graph node of 'update' type to be used in the MySQL -interface. */ - -upd_node_t* -row_create_update_node_for_mysql( -/*=============================*/ - /* out, own: update node */ - dict_table_t* table, /* in: table to update */ - mem_heap_t* heap) /* in: mem heap from which allocated */ -{ - upd_node_t* node; - - node = upd_node_create(heap); - - node->in_mysql_interface = TRUE; - node->is_delete = FALSE; - node->searched_update = FALSE; - node->select_will_do_update = FALSE; - node->select = NULL; - node->pcur = btr_pcur_create_for_mysql(); - node->table = table; - - node->update = upd_create(dict_table_get_n_cols(table), heap); - - node->update_n_fields = dict_table_get_n_cols(table); - - UT_LIST_INIT(node->columns); - node->has_clust_rec_x_lock = TRUE; - node->cmpl_info = 0; - - node->table_sym = NULL; - node->col_assign_list = NULL; - - return(node); -} - -/************************************************************************* -Gets pointer to a prebuilt update vector used in updates. If the update -graph has not yet been built in the prebuilt struct, then this function -first builds it. */ - -upd_t* -row_get_prebuilt_update_vector( -/*===========================*/ - /* out: prebuilt update vector */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - dict_table_t* table = prebuilt->table; - upd_node_t* node; - - ut_ad(prebuilt && table && prebuilt->trx); - - if (prebuilt->upd_node == NULL) { - - /* Not called before for this handle: create an update node - and query graph to the prebuilt struct */ - - node = row_create_update_node_for_mysql(table, prebuilt->heap); - - prebuilt->upd_node = node; - - prebuilt->upd_graph = que_node_get_parent( - pars_complete_graph_for_exec(node, - prebuilt->trx, - prebuilt->heap)); - prebuilt->upd_graph->state = QUE_FORK_ACTIVE; - } - - return(prebuilt->upd_node->update); -} - -/************************************************************************* -Does an update or delete of a row for MySQL. */ - -int -row_update_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - byte* mysql_rec, /* in: the row to be updated, in - the MySQL format */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - trx_savept_t savept; - ulint err; - que_thr_t* thr; - ibool was_lock_wait; - dict_index_t* clust_index; - /* ulint ref_len; */ - upd_node_t* node; - dict_table_t* table = prebuilt->table; - trx_t* trx = prebuilt->trx; - - ut_ad(prebuilt && trx); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - UT_NOT_USED(mysql_rec); - - if (prebuilt->table->ibd_file_missing) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error:\n" - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir, or have you" - " used DISCARD TABLESPACE?\n" - "InnoDB: Look from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - prebuilt->table->name); - return(DB_ERROR); - } - - if (prebuilt->magic_n != ROW_PREBUILT_ALLOCATED) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, prebuilt->trx, TRUE, - prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - - if (srv_created_new_raw || srv_force_recovery) { - fputs("InnoDB: A new raw disk partition was initialized or\n" - "InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that newraw" - " is replaced\n" - "InnoDB: with raw, and innodb_force_... is removed.\n", - stderr); - - return(DB_ERROR); - } - - trx->op_info = "updating or deleting"; - - row_mysql_delay_if_needed(); - - trx_start_if_not_started(trx); - - node = prebuilt->upd_node; - - clust_index = dict_table_get_first_index(table); - - if (prebuilt->pcur->btr_cur.index == clust_index) { - btr_pcur_copy_stored_position(node->pcur, prebuilt->pcur); - } else { - btr_pcur_copy_stored_position(node->pcur, - prebuilt->clust_pcur); - } - - ut_a(node->pcur->rel_pos == BTR_PCUR_ON); - - /* MySQL seems to call rnd_pos before updating each row it - has cached: we can get the correct cursor position from - prebuilt->pcur; NOTE that we cannot build the row reference - from mysql_rec if the clustered index was automatically - generated for the table: MySQL does not know anything about - the row id used as the clustered index key */ - - savept = trx_savept_take(trx); - - thr = que_fork_get_first_thr(prebuilt->upd_graph); - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - ut_ad(!prebuilt->sql_stat_start); - - que_thr_move_to_run_state_for_mysql(thr, trx); - -run_again: - thr->run_node = node; - thr->prev_node = node; - - row_upd_step(thr); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - que_thr_stop_for_mysql(thr); - - if (err == DB_RECORD_NOT_FOUND) { - trx->error_state = DB_SUCCESS; - trx->op_info = ""; - - return((int) err); - } - - thr->lock_state= QUE_THR_LOCK_ROW; - was_lock_wait = row_mysql_handle_errors(&err, trx, thr, - &savept); - thr->lock_state= QUE_THR_LOCK_NOLOCK; - - if (was_lock_wait) { - goto run_again; - } - - trx->op_info = ""; - - return((int) err); - } - - que_thr_stop_for_mysql_no_error(thr, trx); - - if (node->is_delete) { - if (prebuilt->table->stat_n_rows > 0) { - prebuilt->table->stat_n_rows--; - } - - srv_n_rows_deleted++; - } else { - srv_n_rows_updated++; - } - - row_update_statistics_if_needed(prebuilt->table); - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -This can only be used when srv_locks_unsafe_for_binlog is TRUE or -this session is using a READ COMMITTED isolation level. Before -calling this function we must use trx_reset_new_rec_lock_info() and -trx_register_new_rec_lock() to store the information which new record locks -really were set. This function removes a newly set lock under prebuilt->pcur, -and also under prebuilt->clust_pcur. Currently, this is only used and tested -in the case of an UPDATE or a DELETE statement, where the row lock is of the -LOCK_X type. -Thus, this implements a 'mini-rollback' that releases the latest record -locks we set. */ - -int -row_unlock_for_mysql( -/*=================*/ - /* out: error code or DB_SUCCESS */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL - handle */ - ibool has_latches_on_recs)/* TRUE if called so that we have - the latches on the records under pcur - and clust_pcur, and we do not need to - reposition the cursors. */ -{ - btr_pcur_t* pcur = prebuilt->pcur; - btr_pcur_t* clust_pcur = prebuilt->clust_pcur; - trx_t* trx = prebuilt->trx; - - ut_ad(prebuilt && trx); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - if (UNIV_UNLIKELY - (!srv_locks_unsafe_for_binlog - && trx->isolation_level != TRX_ISO_READ_COMMITTED)) { - - fprintf(stderr, - "InnoDB: Error: calling row_unlock_for_mysql though\n" - "InnoDB: innodb_locks_unsafe_for_binlog is FALSE and\n" - "InnoDB: this session is not using" - " READ COMMITTED isolation level.\n"); - - return(DB_SUCCESS); - } - - trx->op_info = "unlock_row"; - - if (prebuilt->new_rec_locks >= 1) { - - rec_t* rec; - dict_index_t* index; - dulint rec_trx_id; - mtr_t mtr; - - mtr_start(&mtr); - - /* Restore the cursor position and find the record */ - - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, &mtr); - } - - rec = btr_pcur_get_rec(pcur); - index = btr_pcur_get_btr_cur(pcur)->index; - - if (prebuilt->new_rec_locks >= 2) { - /* Restore the cursor position and find the record - in the clustered index. */ - - if (!has_latches_on_recs) { - btr_pcur_restore_position(BTR_SEARCH_LEAF, - clust_pcur, &mtr); - } - - rec = btr_pcur_get_rec(clust_pcur); - index = btr_pcur_get_btr_cur(clust_pcur)->index; - } - - /* If the record has been modified by this - transaction, do not unlock it. */ - ut_a(index->type & DICT_CLUSTERED); - - if (index->trx_id_offset) { - rec_trx_id = trx_read_trx_id(rec - + index->trx_id_offset); - } else { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - rec_trx_id = row_get_rec_trx_id(rec, index, offsets); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - if (ut_dulint_cmp(rec_trx_id, trx->id) != 0) { - /* We did not update the record: unlock it */ - - rec = btr_pcur_get_rec(pcur); - index = btr_pcur_get_btr_cur(pcur)->index; - - lock_rec_unlock(trx, rec, prebuilt->select_lock_type); - - if (prebuilt->new_rec_locks >= 2) { - rec = btr_pcur_get_rec(clust_pcur); - index = btr_pcur_get_btr_cur(clust_pcur)->index; - - lock_rec_unlock(trx, rec, - prebuilt->select_lock_type); - } - } - - mtr_commit(&mtr); - } - - trx->op_info = ""; - - return(DB_SUCCESS); -} - -/************************************************************************** -Does a cascaded delete or set null in a foreign key operation. */ - -ulint -row_update_cascade_for_mysql( -/*=========================*/ - /* out: error code or DB_SUCCESS */ - que_thr_t* thr, /* in: query thread */ - upd_node_t* node, /* in: update node used in the cascade - or set null operation */ - dict_table_t* table) /* in: table where we do the operation */ -{ - ulint err; - trx_t* trx; - - trx = thr_get_trx(thr); -run_again: - thr->run_node = node; - thr->prev_node = node; - - row_upd_step(thr); - - err = trx->error_state; - - /* Note that the cascade node is a subnode of another InnoDB - query graph node. We do a normal lock wait in this node, but - all errors are handled by the parent node. */ - - if (err == DB_LOCK_WAIT) { - /* Handle lock wait here */ - - que_thr_stop_for_mysql(thr); - - srv_suspend_mysql_thread(thr); - - /* Note that a lock wait may also end in a lock wait timeout, - or this transaction is picked as a victim in selective - deadlock resolution */ - - if (trx->error_state != DB_SUCCESS) { - - return(trx->error_state); - } - - /* Retry operation after a normal lock wait */ - - goto run_again; - } - - if (err != DB_SUCCESS) { - - return(err); - } - - if (node->is_delete) { - if (table->stat_n_rows > 0) { - table->stat_n_rows--; - } - - srv_n_rows_deleted++; - } else { - srv_n_rows_updated++; - } - - row_update_statistics_if_needed(table); - - return(err); -} - -/************************************************************************* -Checks if a table is such that we automatically created a clustered -index on it (on row id). */ - -ibool -row_table_got_default_clust_index( -/*==============================*/ - dict_table_t* table) -{ - const dict_index_t* clust_index; - - clust_index = dict_table_get_first_index(table); - - return(dict_index_get_nth_col(clust_index, 0)->mtype == DATA_SYS); -} - -/************************************************************************* -Calculates the key number used inside MySQL for an Innobase index. We have -to take into account if we generated a default clustered index for the table */ - -ulint -row_get_mysql_key_number_for_index( -/*===============================*/ - dict_index_t* index) -{ - dict_index_t* ind; - ulint i; - - ut_a(index); - - i = 0; - ind = dict_table_get_first_index(index->table); - - while (index != ind) { - ind = dict_table_get_next_index(ind); - i++; - } - - if (row_table_got_default_clust_index(index->table)) { - ut_a(i > 0); - i--; - } - - return(i); -} - -/************************************************************************* -Locks the data dictionary in shared mode from modifications, for performing -foreign key check, rollback, or other operation invisible to MySQL. */ - -void -row_mysql_freeze_data_dictionary( -/*=============================*/ - trx_t* trx) /* in: transaction */ -{ - ut_a(trx->dict_operation_lock_mode == 0); - - rw_lock_s_lock(&dict_operation_lock); - - trx->dict_operation_lock_mode = RW_S_LATCH; -} - -/************************************************************************* -Unlocks the data dictionary shared lock. */ - -void -row_mysql_unfreeze_data_dictionary( -/*===============================*/ - trx_t* trx) /* in: transaction */ -{ - ut_a(trx->dict_operation_lock_mode == RW_S_LATCH); - - rw_lock_s_unlock(&dict_operation_lock); - - trx->dict_operation_lock_mode = 0; -} - -/************************************************************************* -Locks the data dictionary exclusively for performing a table create or other -data dictionary modification operation. */ - -void -row_mysql_lock_data_dictionary( -/*===========================*/ - trx_t* trx) /* in: transaction */ -{ - ut_a(trx->dict_operation_lock_mode == 0 - || trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks or lock waits can occur then in these operations */ - - rw_lock_x_lock(&dict_operation_lock); - trx->dict_operation_lock_mode = RW_X_LATCH; - - mutex_enter(&(dict_sys->mutex)); -} - -/************************************************************************* -Unlocks the data dictionary exclusive lock. */ - -void -row_mysql_unlock_data_dictionary( -/*=============================*/ - trx_t* trx) /* in: transaction */ -{ - ut_a(trx->dict_operation_lock_mode == RW_X_LATCH); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - mutex_exit(&(dict_sys->mutex)); - rw_lock_x_unlock(&dict_operation_lock); - - trx->dict_operation_lock_mode = 0; -} - -/************************************************************************* -Creates a table for MySQL. If the name of the table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also start the printing of monitor -output by the master thread. If the table name ends in "innodb_mem_validate", -InnoDB will try to invoke mem_validate(). */ - -int -row_create_table_for_mysql( -/*=======================*/ - /* out: error code or DB_SUCCESS */ - dict_table_t* table, /* in: table definition */ - trx_t* trx) /* in: transaction handle */ -{ - tab_node_t* node; - mem_heap_t* heap; - que_thr_t* thr; - const char* table_name; - ulint table_name_len; - ulint err; - ulint i; - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(trx->dict_operation_lock_mode == RW_X_LATCH); - - if (srv_created_new_raw) { - fputs("InnoDB: A new raw disk partition was initialized:\n" - "InnoDB: we do not allow database modifications" - " by the user.\n" - "InnoDB: Shut down mysqld and edit my.cnf so that newraw" - " is replaced with raw.\n", stderr); - - dict_mem_table_free(table); - trx_commit_for_mysql(trx); - - return(DB_ERROR); - } - - trx->op_info = "creating table"; - - if (row_mysql_is_system_table(table->name)) { - - fprintf(stderr, - "InnoDB: Error: trying to create a MySQL system" - " table %s of type InnoDB.\n" - "InnoDB: MySQL system tables must be" - " of the MyISAM type!\n", - table->name); - - dict_mem_table_free(table); - trx_commit_for_mysql(trx); - - return(DB_ERROR); - } - - /* Check that no reserved column names are used. */ - for (i = 0; i < dict_table_get_n_user_cols(table); i++) { - if (dict_col_name_is_reserved( - dict_table_get_col_name(table, i))) { - - dict_mem_table_free(table); - trx_commit_for_mysql(trx); - - return(DB_ERROR); - } - } - - trx_start_if_not_started(trx); - - /* The table name is prefixed with the database name and a '/'. - Certain table names starting with 'innodb_' have their special - meaning regardless of the database name. Thus, we need to - ignore the database name prefix in the comparisons. */ - table_name = strchr(table->name, '/'); - ut_a(table_name); - table_name++; - table_name_len = strlen(table_name) + 1; - - if (STR_EQ(table_name, table_name_len, S_innodb_monitor)) { - - /* Table equals "innodb_monitor": - start monitor prints */ - - srv_print_innodb_monitor = TRUE; - - /* The lock timeout monitor thread also takes care - of InnoDB monitor prints */ - - os_event_set(srv_lock_timeout_thread_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_lock_monitor)) { - - srv_print_innodb_monitor = TRUE; - srv_print_innodb_lock_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_tablespace_monitor)) { - - srv_print_innodb_tablespace_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_table_monitor)) { - - srv_print_innodb_table_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - } else if (STR_EQ(table_name, table_name_len, - S_innodb_mem_validate)) { - /* We define here a debugging feature intended for - developers */ - - fputs("Validating InnoDB memory:\n" - "to use this feature you must compile InnoDB with\n" - "UNIV_MEM_DEBUG defined in univ.i and" - " the server must be\n" - "quiet because allocation from a mem heap" - " is not protected\n" - "by any semaphore.\n", stderr); -#ifdef UNIV_MEM_DEBUG - ut_a(mem_validate()); - fputs("Memory validated\n", stderr); -#else /* UNIV_MEM_DEBUG */ - fputs("Memory NOT validated (recompile with UNIV_MEM_DEBUG)\n", - stderr); -#endif /* UNIV_MEM_DEBUG */ - } - - heap = mem_heap_create(512); - - trx->dict_operation = TRUE; - - node = tab_create_graph_create(table, heap); - - thr = pars_complete_graph_for_exec(node, trx, heap); - - ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); - que_run_threads(thr); - - err = trx->error_state; - - if (err != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_general_rollback_for_mysql(trx, FALSE, NULL); - - if (err == DB_OUT_OF_FILE_SPACE) { - ut_print_timestamp(stderr); - - fputs(" InnoDB: Warning: cannot create table ", - stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" because tablespace full\n", stderr); - - if (dict_table_get_low(table->name)) { - - row_drop_table_for_mysql(table->name, trx, - FALSE); - } - - } else if (err == DB_DUPLICATE_KEY) { - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" already exists in InnoDB internal\n" - "InnoDB: data dictionary. Have you deleted" - " the .frm file\n" - "InnoDB: and not used DROP TABLE?" - " Have you used DROP DATABASE\n" - "InnoDB: for InnoDB tables in" - " MySQL version <= 3.23.43?\n" - "InnoDB: See the Restrictions section" - " of the InnoDB manual.\n" - "InnoDB: You can drop the orphaned table" - " inside InnoDB by\n" - "InnoDB: creating an InnoDB table with" - " the same name in another\n" - "InnoDB: database and copying the .frm file" - " to the current database.\n" - "InnoDB: Then MySQL thinks the table exists," - " and DROP TABLE will\n" - "InnoDB: succeed.\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - stderr); - } - - /* We may also get err == DB_ERROR if the .ibd file for the - table already exists */ - - trx->error_state = DB_SUCCESS; - } - - que_graph_free((que_t*) que_node_get_parent(thr)); - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Does an index creation operation for MySQL. TODO: currently failure -to create an index results in dropping the whole table! This is no problem -currently as all indexes must be created at the same time as the table. */ - -int -row_create_index_for_mysql( -/*=======================*/ - /* out: error number or DB_SUCCESS */ - dict_index_t* index, /* in: index definition */ - trx_t* trx, /* in: transaction handle */ - const ulint* field_lengths) /* in: if not NULL, must contain - dict_index_get_n_fields(index) - actual field lengths for the - index columns, which are - then checked for not being too - large. */ -{ - ind_node_t* node; - mem_heap_t* heap; - que_thr_t* thr; - ulint err; - ulint i, j; - ulint len; - char* table_name; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mutex_own(&(dict_sys->mutex))); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - trx->op_info = "creating index"; - - /* Copy the table name because we may want to drop the - table later, after the index object is freed (inside - que_run_threads()) and thus index->table_name is not available. */ - table_name = mem_strdup(index->table_name); - - trx_start_if_not_started(trx); - - /* Check that the same column does not appear twice in the index. - Starting from 4.0.14, InnoDB should be able to cope with that, but - safer not to allow them. */ - - for (i = 0; i < dict_index_get_n_fields(index); i++) { - for (j = 0; j < i; j++) { - if (0 == ut_strcmp( - dict_index_get_nth_field(index, j)->name, - dict_index_get_nth_field(index, i)->name)) { - - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: column ", stderr); - ut_print_name(stderr, trx, FALSE, - dict_index_get_nth_field( - index, i)->name); - fputs(" appears twice in ", stderr); - dict_index_name_print(stderr, trx, index); - fputs("\n" - "InnoDB: This is not allowed" - " in InnoDB.\n", stderr); - - err = DB_COL_APPEARS_TWICE_IN_INDEX; - - goto error_handling; - } - } - - /* Check also that prefix_len and actual length - < DICT_MAX_INDEX_COL_LEN */ - - len = dict_index_get_nth_field(index, i)->prefix_len; - - if (field_lengths) { - len = ut_max(len, field_lengths[i]); - } - - if (len >= DICT_MAX_INDEX_COL_LEN) { - err = DB_TOO_BIG_RECORD; - - goto error_handling; - } - } - - heap = mem_heap_create(512); - - trx->dict_operation = TRUE; - - /* Note that the space id where we store the index is inherited from - the table in dict_build_index_def_step() in dict0crea.c. */ - - node = ind_create_graph_create(index, heap); - - thr = pars_complete_graph_for_exec(node, trx, heap); - - ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); - que_run_threads(thr); - - err = trx->error_state; - - que_graph_free((que_t*) que_node_get_parent(thr)); - -error_handling: - if (err != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_general_rollback_for_mysql(trx, FALSE, NULL); - - row_drop_table_for_mysql(table_name, trx, FALSE); - - trx->error_state = DB_SUCCESS; - } - - trx->op_info = ""; - - mem_free(table_name); - - return((int) err); -} - -/************************************************************************* -Scans a table create SQL string and adds to the data dictionary -the foreign key constraints declared in the string. This function -should be called after the indexes for a table have been created. -Each foreign key constraint must be accompanied with indexes in -bot participating tables. The indexes are allowed to contain more -fields than mentioned in the constraint. Check also that foreign key -constraints which reference this table are ok. */ - -int -row_table_add_foreign_constraints( -/*==============================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction */ - const char* sql_string, /* in: table create statement where - foreign keys are declared like: - FOREIGN KEY (a, b) REFERENCES table2(c, d), - table2 can be written also with the - database name before it: test.table2 */ - const char* name, /* in: table full name in the - normalized form - database_name/table_name */ - ibool reject_fks) /* in: if TRUE, fail with error - code DB_CANNOT_ADD_CONSTRAINT if - any foreign keys are found. */ -{ - ulint err; - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - ut_a(sql_string); - - trx->op_info = "adding foreign keys"; - - trx_start_if_not_started(trx); - - trx->dict_operation = TRUE; - - err = dict_create_foreign_constraints(trx, sql_string, name, - reject_fks); - - if (err == DB_SUCCESS) { - /* Check that also referencing constraints are ok */ - err = dict_load_foreigns(name, TRUE); - } - - if (err != DB_SUCCESS) { - /* We have special error handling here */ - - trx->error_state = DB_SUCCESS; - - trx_general_rollback_for_mysql(trx, FALSE, NULL); - - row_drop_table_for_mysql(name, trx, FALSE); - - trx->error_state = DB_SUCCESS; - } - - return((int) err); -} - -/************************************************************************* -Drops a table for MySQL as a background operation. MySQL relies on Unix -in ALTER TABLE to the fact that the table handler does not remove the -table before all handles to it has been removed. Furhermore, the MySQL's -call to drop table must be non-blocking. Therefore we do the drop table -as a background operation, which is taken care of by the master thread -in srv0srv.c. */ -static -int -row_drop_table_for_mysql_in_background( -/*===================================*/ - /* out: error code or DB_SUCCESS */ - const char* name) /* in: table name */ -{ - ulint error; - trx_t* trx; - - trx = trx_allocate_for_background(); - - /* If the original transaction was dropping a table referenced by - foreign keys, we must set the following to be able to drop the - table: */ - - trx->check_foreigns = FALSE; - - /* fputs("InnoDB: Error: Dropping table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(" in background drop list\n", stderr); */ - - /* Try to drop the table in InnoDB */ - - error = row_drop_table_for_mysql(name, trx, FALSE); - - /* Flush the log to reduce probability that the .frm files and - the InnoDB data dictionary get out-of-sync if the user runs - with innodb_flush_log_at_trx_commit = 0 */ - - log_buffer_flush_to_disk(); - - trx_commit_for_mysql(trx); - - trx_free_for_background(trx); - - return((int) error); -} - -/************************************************************************* -The master thread in srv0srv.c calls this regularly to drop tables which -we must drop in background after queries to them have ended. Such lazy -dropping of tables is needed in ALTER TABLE on Unix. */ - -ulint -row_drop_tables_for_mysql_in_background(void) -/*=========================================*/ - /* out: how many tables dropped - + remaining tables in list */ -{ - row_mysql_drop_t* drop; - dict_table_t* table; - ulint n_tables; - ulint n_tables_dropped = 0; -loop: - mutex_enter(&kernel_mutex); - - if (!row_mysql_drop_list_inited) { - - UT_LIST_INIT(row_mysql_drop_list); - row_mysql_drop_list_inited = TRUE; - } - - drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - - n_tables = UT_LIST_GET_LEN(row_mysql_drop_list); - - mutex_exit(&kernel_mutex); - - if (drop == NULL) { - /* All tables dropped */ - - return(n_tables + n_tables_dropped); - } - - mutex_enter(&(dict_sys->mutex)); - table = dict_table_get_low(drop->table_name); - mutex_exit(&(dict_sys->mutex)); - - if (table == NULL) { - /* If for some reason the table has already been dropped - through some other mechanism, do not try to drop it */ - - goto already_dropped; - } - - if (DB_SUCCESS != row_drop_table_for_mysql_in_background( - drop->table_name)) { - /* If the DROP fails for some table, we return, and let the - main thread retry later */ - - return(n_tables + n_tables_dropped); - } - - n_tables_dropped++; - -already_dropped: - mutex_enter(&kernel_mutex); - - UT_LIST_REMOVE(row_mysql_drop_list, row_mysql_drop_list, drop); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Dropped table %s in background drop queue.\n", - drop->table_name); - - mem_free(drop->table_name); - - mem_free(drop); - - mutex_exit(&kernel_mutex); - - goto loop; -} - -/************************************************************************* -Get the background drop list length. NOTE: the caller must own the kernel -mutex! */ - -ulint -row_get_background_drop_list_len_low(void) -/*======================================*/ - /* out: how many tables in list */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - if (!row_mysql_drop_list_inited) { - - UT_LIST_INIT(row_mysql_drop_list); - row_mysql_drop_list_inited = TRUE; - } - - return(UT_LIST_GET_LEN(row_mysql_drop_list)); -} - -/************************************************************************* -If a table is not yet in the drop list, adds the table to the list of tables -which the master thread drops in background. We need this on Unix because in -ALTER TABLE MySQL may call drop table even if the table has running queries on -it. Also, if there are running foreign key checks on the table, we drop the -table lazily. */ -static -ibool -row_add_table_to_background_drop_list( -/*==================================*/ - /* out: TRUE if the table was not yet in the - drop list, and was added there */ - dict_table_t* table) /* in: table */ -{ - row_mysql_drop_t* drop; - - mutex_enter(&kernel_mutex); - - if (!row_mysql_drop_list_inited) { - - UT_LIST_INIT(row_mysql_drop_list); - row_mysql_drop_list_inited = TRUE; - } - - /* Look if the table already is in the drop list */ - drop = UT_LIST_GET_FIRST(row_mysql_drop_list); - - while (drop != NULL) { - if (strcmp(drop->table_name, table->name) == 0) { - /* Already in the list */ - - mutex_exit(&kernel_mutex); - - return(FALSE); - } - - drop = UT_LIST_GET_NEXT(row_mysql_drop_list, drop); - } - - drop = mem_alloc(sizeof(row_mysql_drop_t)); - - drop->table_name = mem_strdup(table->name); - - UT_LIST_ADD_LAST(row_mysql_drop_list, row_mysql_drop_list, drop); - - /* fputs("InnoDB: Adding table ", stderr); - ut_print_name(stderr, trx, TRUE, drop->table_name); - fputs(" to background drop list\n", stderr); */ - - mutex_exit(&kernel_mutex); - - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Discards the tablespace of a table which stored in an .ibd file. Discarding -means that this function deletes the .ibd file and assigns a new table id for -the table. Also the flag table->ibd_file_missing is set TRUE. */ - -int -row_discard_tablespace_for_mysql( -/*=============================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx) /* in: transaction handle */ -{ - dict_foreign_t* foreign; - dulint new_id; - dict_table_t* table; - ibool success; - ulint err; - pars_info_t* info = NULL; - - /* How do we prevent crashes caused by ongoing operations on - the table? Old operations could try to access non-existent - pages. - - 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive - MySQL table lock on the table before we can do DISCARD - TABLESPACE. Then there are no running queries on the table. - - 2) Purge and rollback: we assign a new table id for the - table. Since purge and rollback look for the table based on - the table id, they see the table as 'dropped' and discard - their operations. - - 3) Insert buffer: we remove all entries for the tablespace in - the insert buffer tree; as long as the tablespace mem object - does not exist, ongoing insert buffer page merges are - discarded in buf0rea.c. If we recreate the tablespace mem - object with IMPORT TABLESPACE later, then the tablespace will - have the same id, but the tablespace_version field in the mem - object is different, and ongoing old insert buffer page merges - get discarded. - - 4) Linear readahead and random readahead: we use the same - method as in 3) to discard ongoing operations. - - 5) FOREIGN KEY operations: if - table->n_foreign_key_checks_running > 0, we do not allow the - discard. We also reserve the data dictionary latch. */ - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - trx->op_info = "discarding tablespace"; - trx_start_if_not_started(trx); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - row_mysql_lock_data_dictionary(trx); - - table = dict_table_get_low(name); - - if (!table) { - err = DB_TABLE_NOT_FOUND; - - goto funct_exit; - } - - if (table->space == 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: is in the system tablespace 0" - " which cannot be discarded\n", stderr); - err = DB_ERROR; - - goto funct_exit; - } - - if (table->n_foreign_key_checks_running > 0) { - - ut_print_timestamp(stderr); - fputs(" InnoDB: You are trying to DISCARD table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: though there is a foreign key check" - " running on it.\n" - "InnoDB: Cannot discard the table.\n", - stderr); - - err = DB_ERROR; - - goto funct_exit; - } - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign && foreign->foreign_table == table) { - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - if (foreign && trx->check_foreigns) { - - FILE* ef = dict_foreign_err_file; - - /* We only allow discarding a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - err = DB_CANNOT_DROP_CONSTRAINT; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot DISCARD table ", ef); - ut_print_name(ef, trx, TRUE, name); - fputs("\n" - "because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - goto funct_exit; - } - - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); - - /* Remove all locks except the table-level S and X locks. */ - lock_remove_all_on_table(table, FALSE); - - info = pars_info_create(); - - pars_info_add_str_literal(info, "table_name", name); - pars_info_add_dulint_literal(info, "new_id", new_id); - - err = que_eval_sql(info, - "PROCEDURE DISCARD_TABLESPACE_PROC () IS\n" - "old_id CHAR;\n" - "BEGIN\n" - "SELECT ID INTO old_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " COMMIT WORK;\n" - " RETURN;\n" - "END IF;\n" - "UPDATE SYS_TABLES SET ID = :new_id\n" - " WHERE ID = old_id;\n" - "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = old_id;\n" - "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = old_id;\n" - "COMMIT WORK;\n" - "END;\n" - , FALSE, trx); - - if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - } else { - dict_table_change_id_in_cache(table, new_id); - - success = fil_discard_tablespace(table->space); - - if (!success) { - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - - err = DB_ERROR; - } else { - /* Set the flag which tells that now it is legal to - IMPORT a tablespace for this table */ - table->tablespace_discarded = TRUE; - table->ibd_file_missing = TRUE; - } - } - -funct_exit: - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return((int) err); -} - -/********************************************************************* -Imports a tablespace. The space id in the .ibd file must match the space id -of the table in the data dictionary. */ - -int -row_import_tablespace_for_mysql( -/*============================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx) /* in: transaction handle */ -{ - dict_table_t* table; - ibool success; - dulint current_lsn; - ulint err = DB_SUCCESS; - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - trx_start_if_not_started(trx); - - trx->op_info = "importing tablespace"; - - current_lsn = log_get_lsn(); - - /* It is possible, though very improbable, that the lsn's in the - tablespace to be imported have risen above the current system lsn, if - a lengthy purge, ibuf merge, or rollback was performed on a backup - taken with ibbackup. If that is the case, reset page lsn's in the - file. We assume that mysqld was shut down after it performed these - cleanup operations on the .ibd file, so that it stamped the latest lsn - to the FIL_PAGE_FILE_FLUSH_LSN in the first page of the .ibd file. - - TODO: reset also the trx id's in clustered index records and write - a new space id to each data page. That would allow us to import clean - .ibd files from another MySQL installation. */ - - success = fil_reset_too_high_lsns(name, current_lsn); - - if (!success) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: cannot reset lsn's in table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n", - stderr); - - err = DB_ERROR; - - row_mysql_lock_data_dictionary(trx); - - goto funct_exit; - } - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - row_mysql_lock_data_dictionary(trx); - - table = dict_table_get_low(name); - - if (!table) { - ut_print_timestamp(stderr); - fputs(" InnoDB: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: does not exist in the InnoDB data dictionary\n" - "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n", - stderr); - - err = DB_TABLE_NOT_FOUND; - - goto funct_exit; - } - - if (table->space == 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: is in the system tablespace 0" - " which cannot be imported\n", stderr); - err = DB_ERROR; - - goto funct_exit; - } - - if (!table->tablespace_discarded) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: you are trying to" - " IMPORT a tablespace\n" - "InnoDB: ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(", though you have not called DISCARD on it yet\n" - "InnoDB: during the lifetime of the mysqld process!\n", - stderr); - - err = DB_ERROR; - - goto funct_exit; - } - - /* Play safe and remove all insert buffer entries, though we should - have removed them already when DISCARD TABLESPACE was called */ - - ibuf_delete_for_discarded_space(table->space); - - success = fil_open_single_table_tablespace(TRUE, table->space, - table->name); - if (success) { - table->ibd_file_missing = FALSE; - table->tablespace_discarded = FALSE; - } else { - if (table->ibd_file_missing) { - ut_print_timestamp(stderr); - fputs(" InnoDB: cannot find or open in the" - " database directory the .ibd file of\n" - "InnoDB: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: in ALTER TABLE ... IMPORT TABLESPACE\n", - stderr); - } - - err = DB_ERROR; - } - -funct_exit: - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Truncates a table for MySQL. */ - -int -row_truncate_table_for_mysql( -/*=========================*/ - /* out: error code or DB_SUCCESS */ - dict_table_t* table, /* in: table handle */ - trx_t* trx) /* in: transaction handle */ -{ - dict_foreign_t* foreign; - ulint err; - mem_heap_t* heap; - byte* buf; - dtuple_t* tuple; - dfield_t* dfield; - dict_index_t* sys_index; - btr_pcur_t pcur; - mtr_t mtr; - dulint new_id; - pars_info_t* info = NULL; - - /* How do we prevent crashes caused by ongoing operations on - the table? Old operations could try to access non-existent - pages. - - 1) SQL queries, INSERT, SELECT, ...: we must get an exclusive - MySQL table lock on the table before we can do TRUNCATE - TABLE. Then there are no running queries on the table. This is - guaranteed, because in ha_innobase::store_lock(), we do not - weaken the TL_WRITE lock requested by MySQL when executing - SQLCOM_TRUNCATE. - - 2) Purge and rollback: we assign a new table id for the - table. Since purge and rollback look for the table based on - the table id, they see the table as 'dropped' and discard - their operations. - - 3) Insert buffer: TRUNCATE TABLE is analogous to DROP TABLE, - so we do not have to remove insert buffer records, as the - insert buffer works at a low level. If a freed page is later - reallocated, the allocator will remove the ibuf entries for - it. - - TODO: when we truncate *.ibd files (analogous to DISCARD - TABLESPACE), we will have to remove we remove all entries for - the table in the insert buffer tree! - - 4) Linear readahead and random readahead: we use the same - method as in 3) to discard ongoing operations. (This will only - be relevant for TRUNCATE TABLE by DISCARD TABLESPACE.) - - 5) FOREIGN KEY operations: if - table->n_foreign_key_checks_running > 0, we do not allow the - TRUNCATE. We also reserve the data dictionary latch. */ - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - ut_ad(table); - - if (srv_created_new_raw) { - fputs("InnoDB: A new raw disk partition was initialized:\n" - "InnoDB: we do not allow database modifications" - " by the user.\n" - "InnoDB: Shut down mysqld and edit my.cnf so that newraw" - " is replaced with raw.\n", stderr); - - return(DB_ERROR); - } - - trx->op_info = "truncating table"; - - trx_start_if_not_started(trx); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - ut_a(trx->dict_operation_lock_mode == 0); - /* Prevent foreign key checks etc. while we are truncating the - table */ - - row_mysql_lock_data_dictionary(trx); - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign && foreign->foreign_table == table) { - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - if (foreign && trx->check_foreigns) { - FILE* ef = dict_foreign_err_file; - - /* We only allow truncating a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot truncate table ", ef); - ut_print_name(ef, trx, TRUE, table->name); - fputs(" by DROP+CREATE\n" - "InnoDB: because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - err = DB_ERROR; - goto funct_exit; - } - - /* TODO: could we replace the counter n_foreign_key_checks_running - with lock checks on the table? Acquire here an exclusive lock on the - table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that - they can cope with the table having been truncated here? Foreign key - checks take an IS or IX lock on the table. */ - - if (table->n_foreign_key_checks_running > 0) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Cannot truncate table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" by DROP+CREATE\n" - "InnoDB: because there is a foreign key check" - " running on it.\n", - stderr); - err = DB_ERROR; - - goto funct_exit; - } - - /* Remove all locks except the table-level S and X locks. */ - lock_remove_all_on_table(table, FALSE); - - trx->table_id = table->id; - - /* scan SYS_INDEXES for all indexes of the table */ - heap = mem_heap_create(800); - - tuple = dtuple_create(heap, 1); - dfield = dtuple_get_nth_field(tuple, 0); - - buf = mem_heap_alloc(heap, 8); - mach_write_to_8(buf, table->id); - - dfield_set_data(dfield, buf, 8); - sys_index = dict_table_get_first_index(dict_sys->sys_indexes); - dict_index_copy_types(tuple, sys_index, 1); - - mtr_start(&mtr); - btr_pcur_open_on_user_rec(sys_index, tuple, PAGE_CUR_GE, - BTR_MODIFY_LEAF, &pcur, &mtr); - for (;;) { - rec_t* rec; - const byte* field; - ulint len; - ulint root_page_no; - - if (!btr_pcur_is_on_user_rec(&pcur, &mtr)) { - /* The end of SYS_INDEXES has been reached. */ - break; - } - - rec = btr_pcur_get_rec(&pcur); - - field = rec_get_nth_field_old(rec, 0, &len); - ut_ad(len == 8); - - if (memcmp(buf, field, len) != 0) { - /* End of indexes for the table (TABLE_ID mismatch). */ - break; - } - - if (rec_get_deleted_flag(rec, FALSE)) { - /* The index has been dropped. */ - goto next_rec; - } - - /* This call may commit and restart mtr - and reposition pcur. */ - root_page_no = dict_truncate_index_tree(table, &pcur, &mtr); - - rec = btr_pcur_get_rec(&pcur); - - if (root_page_no != FIL_NULL) { - page_rec_write_index_page_no( - rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, - root_page_no, &mtr); - /* We will need to commit and restart the - mini-transaction in order to avoid deadlocks. - The dict_truncate_index_tree() call has allocated - a page in this mini-transaction, and the rest of - this loop could latch another index page. */ - mtr_commit(&mtr); - mtr_start(&mtr); - btr_pcur_restore_position(BTR_MODIFY_LEAF, - &pcur, &mtr); - } - -next_rec: - btr_pcur_move_to_next_user_rec(&pcur, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - mem_heap_free(heap); - - new_id = dict_hdr_get_new_id(DICT_HDR_TABLE_ID); - - info = pars_info_create(); - - pars_info_add_dulint_literal(info, "old_id", table->id); - pars_info_add_dulint_literal(info, "new_id", new_id); - - err = que_eval_sql(info, - "PROCEDURE RENUMBER_TABLESPACE_PROC () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES SET ID = :new_id\n" - " WHERE ID = :old_id;\n" - "UPDATE SYS_COLUMNS SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "UPDATE SYS_INDEXES SET TABLE_ID = :new_id\n" - " WHERE TABLE_ID = :old_id;\n" - "COMMIT WORK;\n" - "END;\n" - , FALSE, trx); - - if (err != DB_SUCCESS) { - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - ut_print_timestamp(stderr); - fputs(" InnoDB: Unable to assign a new identifier to table ", - stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: after truncating it. Background processes" - " may corrupt the table!\n", stderr); - err = DB_ERROR; - } else { - dict_table_change_id_in_cache(table, new_id); - } - - /* MySQL calls ha_innobase::reset_auto_increment() which does - the same thing. */ - dict_table_autoinc_lock(table); - dict_table_autoinc_initialize(table, 1); - dict_table_autoinc_unlock(table); - dict_update_statistics(table); - - trx_commit_for_mysql(trx); - -funct_exit: - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - srv_wake_master_thread(); - - return((int) err); -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************* -Drops a table for MySQL. If the name of the dropped table ends in -one of "innodb_monitor", "innodb_lock_monitor", "innodb_tablespace_monitor", -"innodb_table_monitor", then this will also stop the printing of monitor -output by the master thread. */ - -int -row_drop_table_for_mysql( -/*=====================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: table name */ - trx_t* trx, /* in: transaction handle */ - ibool drop_db)/* in: TRUE=dropping whole database */ -{ - dict_foreign_t* foreign; - dict_table_t* table; - ulint space_id; - ulint err; - const char* table_name; - ulint namelen; - ibool locked_dictionary = FALSE; - pars_info_t* info = NULL; - - ut_a(name != NULL); - - if (srv_created_new_raw) { - fputs("InnoDB: A new raw disk partition was initialized:\n" - "InnoDB: we do not allow database modifications" - " by the user.\n" - "InnoDB: Shut down mysqld and edit my.cnf so that newraw" - " is replaced with raw.\n", stderr); - - return(DB_ERROR); - } - - trx->op_info = "dropping table"; - - trx_start_if_not_started(trx); - - /* The table name is prefixed with the database name and a '/'. - Certain table names starting with 'innodb_' have their special - meaning regardless of the database name. Thus, we need to - ignore the database name prefix in the comparisons. */ - table_name = strchr(name, '/'); - ut_a(table_name); - table_name++; - namelen = strlen(table_name) + 1; - - if (namelen == sizeof S_innodb_monitor - && !memcmp(table_name, S_innodb_monitor, - sizeof S_innodb_monitor)) { - - /* Table name equals "innodb_monitor": - stop monitor prints */ - - srv_print_innodb_monitor = FALSE; - srv_print_innodb_lock_monitor = FALSE; - } else if (namelen == sizeof S_innodb_lock_monitor - && !memcmp(table_name, S_innodb_lock_monitor, - sizeof S_innodb_lock_monitor)) { - srv_print_innodb_monitor = FALSE; - srv_print_innodb_lock_monitor = FALSE; - } else if (namelen == sizeof S_innodb_tablespace_monitor - && !memcmp(table_name, S_innodb_tablespace_monitor, - sizeof S_innodb_tablespace_monitor)) { - - srv_print_innodb_tablespace_monitor = FALSE; - } else if (namelen == sizeof S_innodb_table_monitor - && !memcmp(table_name, S_innodb_table_monitor, - sizeof S_innodb_table_monitor)) { - - srv_print_innodb_table_monitor = FALSE; - } - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - if (trx->dict_operation_lock_mode != RW_X_LATCH) { - /* Prevent foreign key checks etc. while we are dropping the - table */ - - row_mysql_lock_data_dictionary(trx); - - locked_dictionary = TRUE; - } - - ut_ad(mutex_own(&(dict_sys->mutex))); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&dict_operation_lock, RW_LOCK_EX)); -#endif /* UNIV_SYNC_DEBUG */ - - table = dict_table_get_low(name); - - if (!table) { - err = DB_TABLE_NOT_FOUND; - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(" does not exist in the InnoDB internal\n" - "InnoDB: data dictionary though MySQL is" - " trying to drop it.\n" - "InnoDB: Have you copied the .frm file" - " of the table to the\n" - "InnoDB: MySQL database directory" - " from another database?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - stderr); - goto funct_exit; - } - - /* Check if the table is referenced by foreign key constraints from - some other table (not the table itself) */ - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign && foreign->foreign_table == table) { -check_next_foreign: - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - if (foreign && trx->check_foreigns - && !(drop_db && dict_tables_have_same_db( - name, foreign->foreign_table_name))) { - FILE* ef = dict_foreign_err_file; - - /* We only allow dropping a referenced table if - FOREIGN_KEY_CHECKS is set to 0 */ - - err = DB_CANNOT_DROP_CONSTRAINT; - - mutex_enter(&dict_foreign_err_mutex); - rewind(ef); - ut_print_timestamp(ef); - - fputs(" Cannot drop table ", ef); - ut_print_name(ef, trx, TRUE, name); - fputs("\n" - "because it is referenced by ", ef); - ut_print_name(ef, trx, TRUE, foreign->foreign_table_name); - putc('\n', ef); - mutex_exit(&dict_foreign_err_mutex); - - goto funct_exit; - } - - if (foreign && trx->check_foreigns) { - goto check_next_foreign; - } - - if (table->n_mysql_handles_opened > 0) { - ibool added; - - added = row_add_table_to_background_drop_list(table); - - if (added) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: MySQL is" - " trying to drop table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: though there are still" - " open handles to it.\n" - "InnoDB: Adding the table to the" - " background drop queue.\n", - stderr); - - /* We return DB_SUCCESS to MySQL though the drop will - happen lazily later */ - - err = DB_SUCCESS; - } else { - /* The table is already in the background drop list */ - err = DB_ERROR; - } - - goto funct_exit; - } - - /* TODO: could we replace the counter n_foreign_key_checks_running - with lock checks on the table? Acquire here an exclusive lock on the - table, and rewrite lock0lock.c and the lock wait in srv0srv.c so that - they can cope with the table having been dropped here? Foreign key - checks take an IS or IX lock on the table. */ - - if (table->n_foreign_key_checks_running > 0) { - - ibool added; - - added = row_add_table_to_background_drop_list(table); - - if (added) { - ut_print_timestamp(stderr); - fputs(" InnoDB: You are trying to drop table ", - stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs("\n" - "InnoDB: though there is a" - " foreign key check running on it.\n" - "InnoDB: Adding the table to" - " the background drop queue.\n", - stderr); - - /* We return DB_SUCCESS to MySQL though the drop will - happen lazily later */ - - err = DB_SUCCESS; - } else { - /* The table is already in the background drop list */ - err = DB_ERROR; - } - - goto funct_exit; - } - - /* Remove all locks there are on the table or its records */ - lock_remove_all_on_table(table, TRUE); - - trx->dict_operation = TRUE; - trx->table_id = table->id; - - /* We use the private SQL parser of Innobase to generate the - query graphs needed in deleting the dictionary data from system - tables in Innobase. Deleting a row from SYS_INDEXES table also - frees the file segments of the B-tree associated with the index. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "table_name", name); - - err = que_eval_sql(info, - "PROCEDURE DROP_TABLE_PROC () IS\n" - "sys_foreign_id CHAR;\n" - "table_id CHAR;\n" - "index_id CHAR;\n" - "foreign_id CHAR;\n" - "found INT;\n" - "BEGIN\n" - "SELECT ID INTO table_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = :table_name\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " COMMIT WORK;\n" - " RETURN;\n" - "END IF;\n" - "found := 1;\n" - "SELECT ID INTO sys_foreign_id\n" - "FROM SYS_TABLES\n" - "WHERE NAME = 'SYS_FOREIGN'\n" - "LOCK IN SHARE MODE;\n" - "IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN') THEN\n" - " found := 0;\n" - "END IF;\n" - "IF (:table_name = 'SYS_FOREIGN_COLS') THEN\n" - " found := 0;\n" - "END IF;\n" - "WHILE found = 1 LOOP\n" - " SELECT ID INTO foreign_id\n" - " FROM SYS_FOREIGN\n" - " WHERE FOR_NAME = :table_name\n" - " AND TO_BINARY(FOR_NAME)\n" - " = TO_BINARY(:table_name)\n" - " LOCK IN SHARE MODE;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "found := 1;\n" - "WHILE found = 1 LOOP\n" - " SELECT ID INTO index_id\n" - " FROM SYS_INDEXES\n" - " WHERE TABLE_ID = table_id\n" - " LOCK IN SHARE MODE;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " DELETE FROM SYS_FIELDS\n" - " WHERE INDEX_ID = index_id;\n" - " DELETE FROM SYS_INDEXES\n" - " WHERE ID = index_id\n" - " AND TABLE_ID = table_id;\n" - " END IF;\n" - "END LOOP;\n" - "DELETE FROM SYS_COLUMNS\n" - "WHERE TABLE_ID = table_id;\n" - "DELETE FROM SYS_TABLES\n" - "WHERE ID = table_id;\n" - "COMMIT WORK;\n" - "END;\n" - , FALSE, trx); - - if (err != DB_SUCCESS) { - ut_a(err == DB_OUT_OF_FILE_SPACE); - - err = DB_MUST_GET_MORE_FILE_SPACE; - - row_mysql_handle_errors(&err, trx, NULL, NULL); - - ut_error; - } else { - ibool is_path; - const char* name_or_path; - mem_heap_t* heap; - - heap = mem_heap_create(200); - - /* Clone the name, in case it has been allocated - from table->heap, which will be freed by - dict_table_remove_from_cache(table) below. */ - name = mem_heap_strdup(heap, name); - space_id = table->space; - - if (table->dir_path_of_temp_table != NULL) { - is_path = TRUE; - name_or_path = mem_heap_strdup( - heap, table->dir_path_of_temp_table); - } else { - is_path = FALSE; - name_or_path = name; - } - - dict_table_remove_from_cache(table); - - if (dict_load_table(name) != NULL) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: not able to remove table ", - stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs(" from the dictionary cache!\n", stderr); - err = DB_ERROR; - } - - /* Do not drop possible .ibd tablespace if something went - wrong: we do not want to delete valuable data of the user */ - - if (err == DB_SUCCESS && space_id > 0) { - if (!fil_space_for_table_exists_in_mem(space_id, - name_or_path, - is_path, - FALSE, TRUE)) { - err = DB_SUCCESS; - - fprintf(stderr, - "InnoDB: We removed now the InnoDB" - " internal data dictionary entry\n" - "InnoDB: of table "); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, ".\n"); - } else if (!fil_delete_tablespace(space_id)) { - fprintf(stderr, - "InnoDB: We removed now the InnoDB" - " internal data dictionary entry\n" - "InnoDB: of table "); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, ".\n"); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: not able to" - " delete tablespace %lu of table ", - (ulong) space_id); - ut_print_name(stderr, trx, TRUE, name); - fputs("!\n", stderr); - err = DB_ERROR; - } - } - - mem_heap_free(heap); - } -funct_exit: - - trx_commit_for_mysql(trx); - - if (locked_dictionary) { - row_mysql_unlock_data_dictionary(trx); - } - - trx->op_info = ""; - -#ifndef UNIV_HOTBACKUP - srv_wake_master_thread(); -#endif /* !UNIV_HOTBACKUP */ - - return((int) err); -} - -/*********************************************************************** -Drop all foreign keys in a database, see Bug#18942. -Called at the end of row_drop_database_for_mysql(). */ -static -ulint -drop_all_foreign_keys_in_db( -/*========================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: database name which ends to '/' */ - trx_t* trx) /* in: transaction handle */ -{ - pars_info_t* pinfo; - ulint err; - - ut_a(name[strlen(name) - 1] == '/'); - - pinfo = pars_info_create(); - - pars_info_add_str_literal(pinfo, "dbname", name); - -/* true if for_name is not prefixed with dbname */ -#define TABLE_NOT_IN_THIS_DB \ -"SUBSTR(for_name, 0, LENGTH(:dbname)) <> :dbname" - - err = que_eval_sql(pinfo, - "PROCEDURE DROP_ALL_FOREIGN_KEYS_PROC () IS\n" - "foreign_id CHAR;\n" - "for_name CHAR;\n" - "found INT;\n" - "DECLARE CURSOR cur IS\n" - "SELECT ID, FOR_NAME FROM SYS_FOREIGN\n" - "WHERE FOR_NAME >= :dbname\n" - "LOCK IN SHARE MODE\n" - "ORDER BY FOR_NAME;\n" - "BEGIN\n" - "found := 1;\n" - "OPEN cur;\n" - "WHILE found = 1 LOOP\n" - " FETCH cur INTO foreign_id, for_name;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSIF (" TABLE_NOT_IN_THIS_DB ") THEN\n" - " found := 0;\n" - " ELSIF (1=1) THEN\n" - " DELETE FROM SYS_FOREIGN_COLS\n" - " WHERE ID = foreign_id;\n" - " DELETE FROM SYS_FOREIGN\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - "END LOOP;\n" - "CLOSE cur;\n" - "COMMIT WORK;\n" - "END;\n", - FALSE, /* do not reserve dict mutex, - we are already holding it */ - trx); - - return(err); -} - -/************************************************************************* -Drops a database for MySQL. */ - -int -row_drop_database_for_mysql( -/*========================*/ - /* out: error code or DB_SUCCESS */ - const char* name, /* in: database name which ends to '/' */ - trx_t* trx) /* in: transaction handle */ -{ - dict_table_t* table; - char* table_name; - int err = DB_SUCCESS; - ulint namelen = strlen(name); - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - ut_a(name != NULL); - ut_a(name[namelen - 1] == '/'); - - trx->op_info = "dropping database"; - - trx_start_if_not_started(trx); -loop: - row_mysql_lock_data_dictionary(trx); - - while ((table_name = dict_get_first_table_name_in_db(name))) { - ut_a(memcmp(table_name, name, namelen) == 0); - - table = dict_table_get_low(table_name); - - ut_a(table); - - /* Wait until MySQL does not have any queries running on - the table */ - - if (table->n_mysql_handles_opened > 0) { - row_mysql_unlock_data_dictionary(trx); - - ut_print_timestamp(stderr); - fputs(" InnoDB: Warning: MySQL is trying to" - " drop database ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fputs("\n" - "InnoDB: though there are still" - " open handles to table ", stderr); - ut_print_name(stderr, trx, TRUE, table_name); - fputs(".\n", stderr); - - os_thread_sleep(1000000); - - mem_free(table_name); - - goto loop; - } - - err = row_drop_table_for_mysql(table_name, trx, TRUE); - - if (err != DB_SUCCESS) { - fputs("InnoDB: DROP DATABASE ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, " failed with error %lu for table ", - (ulint) err); - ut_print_name(stderr, trx, TRUE, table_name); - putc('\n', stderr); - mem_free(table_name); - break; - } - - mem_free(table_name); - } - - if (err == DB_SUCCESS) { - /* after dropping all tables try to drop all leftover - foreign keys in case orphaned ones exist */ - err = (int) drop_all_foreign_keys_in_db(name, trx); - - if (err != DB_SUCCESS) { - fputs("InnoDB: DROP DATABASE ", stderr); - ut_print_name(stderr, trx, TRUE, name); - fprintf(stderr, " failed with error %d while " - "dropping all foreign keys", err); - } - } - - trx_commit_for_mysql(trx); - - row_mysql_unlock_data_dictionary(trx); - - trx->op_info = ""; - - return(err); -} - -/************************************************************************* -Checks if a table name contains the string "/#sql" which denotes temporary -tables in MySQL. */ -static -ibool -row_is_mysql_tmp_table_name( -/*========================*/ - /* out: TRUE if temporary table */ - const char* name) /* in: table name in the form - 'database/tablename' */ -{ - return(strstr(name, "/#sql") != NULL); - /* return(strstr(name, "/@0023sql") != NULL); */ -} - -/******************************************************************** -Delete a single constraint. */ -static -int -row_delete_constraint_low( -/*======================*/ - /* out: error code or DB_SUCCESS */ - const char* id, /* in: constraint id */ - trx_t* trx) /* in: transaction handle */ -{ - pars_info_t* info = pars_info_create(); - - pars_info_add_str_literal(info, "id", id); - - return((int) que_eval_sql(info, - "PROCEDURE DELETE_CONSTRAINT () IS\n" - "BEGIN\n" - "DELETE FROM SYS_FOREIGN_COLS WHERE ID = :id;\n" - "DELETE FROM SYS_FOREIGN WHERE ID = :id;\n" - "END;\n" - , FALSE, trx)); -} - -/******************************************************************** -Delete a single constraint. */ -static -int -row_delete_constraint( -/*==================*/ - /* out: error code or DB_SUCCESS */ - const char* id, /* in: constraint id */ - const char* database_name, /* in: database name, with the - trailing '/' */ - mem_heap_t* heap, /* in: memory heap */ - trx_t* trx) /* in: transaction handle */ -{ - ulint err; - - /* New format constraints have ids <databasename>/<constraintname>. */ - err = row_delete_constraint_low( - mem_heap_strcat(heap, database_name, id), trx); - - if ((err == DB_SUCCESS) && !strchr(id, '/')) { - /* Old format < 4.0.18 constraints have constraint ids - <number>_<number>. We only try deleting them if the - constraint name does not contain a '/' character, otherwise - deleting a new format constraint named 'foo/bar' from - database 'baz' would remove constraint 'bar' from database - 'foo', if it existed. */ - - err = row_delete_constraint_low(id, trx); - } - - return((int) err); -} - -/************************************************************************* -Renames a table for MySQL. */ - -int -row_rename_table_for_mysql( -/*=======================*/ - /* out: error code or DB_SUCCESS */ - const char* old_name, /* in: old table name */ - const char* new_name, /* in: new table name */ - trx_t* trx) /* in: transaction handle */ -{ - dict_table_t* table; - ulint err; - mem_heap_t* heap = NULL; - const char** constraints_to_drop = NULL; - ulint n_constraints_to_drop = 0; - ibool old_is_tmp, new_is_tmp; - pars_info_t* info = NULL; - - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - ut_a(old_name != NULL); - ut_a(new_name != NULL); - - if (srv_created_new_raw || srv_force_recovery) { - fputs("InnoDB: A new raw disk partition was initialized or\n" - "InnoDB: innodb_force_recovery is on: we do not allow\n" - "InnoDB: database modifications by the user. Shut down\n" - "InnoDB: mysqld and edit my.cnf so that newraw" - " is replaced\n" - "InnoDB: with raw, and innodb_force_... is removed.\n", - stderr); - - trx_commit_for_mysql(trx); - return(DB_ERROR); - } - - if (row_mysql_is_system_table(new_name)) { - - fprintf(stderr, - "InnoDB: Error: trying to create a MySQL" - " system table %s of type InnoDB.\n" - "InnoDB: MySQL system tables must be" - " of the MyISAM type!\n", - new_name); - - trx_commit_for_mysql(trx); - return(DB_ERROR); - } - - trx->op_info = "renaming table"; - trx_start_if_not_started(trx); - - old_is_tmp = row_is_mysql_tmp_table_name(old_name); - new_is_tmp = row_is_mysql_tmp_table_name(new_name); - - /* Serialize data dictionary operations with dictionary mutex: - no deadlocks can occur then in these operations */ - - row_mysql_lock_data_dictionary(trx); - - table = dict_table_get_low(old_name); - - if (!table) { - err = DB_TABLE_NOT_FOUND; - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" does not exist in the InnoDB internal\n" - "InnoDB: data dictionary though MySQL is" - " trying to rename the table.\n" - "InnoDB: Have you copied the .frm file" - " of the table to the\n" - "InnoDB: MySQL database directory" - " from another database?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - stderr); - goto funct_exit; - } - - if (table->ibd_file_missing) { - err = DB_TABLE_NOT_FOUND; - ut_print_timestamp(stderr); - - fputs(" InnoDB: Error: table ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" does not have an .ibd file" - " in the database directory.\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n", - stderr); - goto funct_exit; - } - - if (new_is_tmp) { - /* MySQL is doing an ALTER TABLE command and it renames the - original table to a temporary table name. We want to preserve - the original foreign key constraint definitions despite the - name change. An exception is those constraints for which - the ALTER TABLE contained DROP FOREIGN KEY <foreign key id>.*/ - - heap = mem_heap_create(100); - - err = dict_foreign_parse_drop_constraints( - heap, trx, table, &n_constraints_to_drop, - &constraints_to_drop); - - if (err != DB_SUCCESS) { - - goto funct_exit; - } - } - - /* We use the private SQL parser of Innobase to generate the query - graphs needed in deleting the dictionary data from system tables in - Innobase. Deleting a row from SYS_INDEXES table also frees the file - segments of the B-tree associated with the index. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "old_table_name", old_name); - - err = que_eval_sql(info, - "PROCEDURE RENAME_TABLE () IS\n" - "BEGIN\n" - "UPDATE SYS_TABLES SET NAME = :new_table_name\n" - " WHERE NAME = :old_table_name;\n" - "END;\n" - , FALSE, trx); - - if (err != DB_SUCCESS) { - - goto end; - } - - if (!new_is_tmp) { - /* Rename all constraints. */ - - info = pars_info_create(); - - pars_info_add_str_literal(info, "new_table_name", new_name); - pars_info_add_str_literal(info, "old_table_name", old_name); - - err = que_eval_sql( - info, - "PROCEDURE RENAME_CONSTRAINT_IDS () IS\n" - "gen_constr_prefix CHAR;\n" - "new_db_name CHAR;\n" - "foreign_id CHAR;\n" - "new_foreign_id CHAR;\n" - "old_db_name_len INT;\n" - "old_t_name_len INT;\n" - "new_db_name_len INT;\n" - "id_len INT;\n" - "found INT;\n" - "BEGIN\n" - "found := 1;\n" - "old_db_name_len := INSTR(:old_table_name, '/')-1;\n" - "new_db_name_len := INSTR(:new_table_name, '/')-1;\n" - "new_db_name := SUBSTR(:new_table_name, 0,\n" - " new_db_name_len);\n" - "old_t_name_len := LENGTH(:old_table_name);\n" - "gen_constr_prefix := CONCAT(:old_table_name,\n" - " '_ibfk_');\n" - "WHILE found = 1 LOOP\n" - " SELECT ID INTO foreign_id\n" - " FROM SYS_FOREIGN\n" - " WHERE FOR_NAME = :old_table_name\n" - " AND TO_BINARY(FOR_NAME)\n" - " = TO_BINARY(:old_table_name)\n" - " LOCK IN SHARE MODE;\n" - " IF (SQL % NOTFOUND) THEN\n" - " found := 0;\n" - " ELSE\n" - " UPDATE SYS_FOREIGN\n" - " SET FOR_NAME = :new_table_name\n" - " WHERE ID = foreign_id;\n" - " id_len := LENGTH(foreign_id);\n" - " IF (INSTR(foreign_id, '/') > 0) THEN\n" - " IF (INSTR(foreign_id,\n" - " gen_constr_prefix) > 0)\n" - " THEN\n" - " new_foreign_id :=\n" - " CONCAT(:new_table_name,\n" - " SUBSTR(foreign_id, old_t_name_len,\n" - " id_len - old_t_name_len));\n" - " ELSE\n" - " new_foreign_id :=\n" - " CONCAT(new_db_name,\n" - " SUBSTR(foreign_id,\n" - " old_db_name_len,\n" - " id_len - old_db_name_len));\n" - " END IF;\n" - " UPDATE SYS_FOREIGN\n" - " SET ID = new_foreign_id\n" - " WHERE ID = foreign_id;\n" - " UPDATE SYS_FOREIGN_COLS\n" - " SET ID = new_foreign_id\n" - " WHERE ID = foreign_id;\n" - " END IF;\n" - " END IF;\n" - "END LOOP;\n" - "UPDATE SYS_FOREIGN SET REF_NAME = :new_table_name\n" - "WHERE REF_NAME = :old_table_name\n" - " AND TO_BINARY(REF_NAME)\n" - " = TO_BINARY(:old_table_name);\n" - "END;\n" - , FALSE, trx); - - } else if (n_constraints_to_drop > 0) { - /* Drop some constraints of tmp tables. */ - - ulint db_name_len = dict_get_db_name_len(old_name) + 1; - char* db_name = mem_heap_strdupl(heap, old_name, - db_name_len); - ulint i; - - for (i = 0; i < n_constraints_to_drop; i++) { - err = row_delete_constraint(constraints_to_drop[i], - db_name, heap, trx); - - if (err != DB_SUCCESS) { - break; - } - } - } - -end: - if (err != DB_SUCCESS) { - if (err == DB_DUPLICATE_KEY) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error; possible reasons:\n" - "InnoDB: 1) Table rename would cause" - " two FOREIGN KEY constraints\n" - "InnoDB: to have the same internal name" - " in case-insensitive comparison.\n" - "InnoDB: 2) table ", stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs(" exists in the InnoDB internal data\n" - "InnoDB: dictionary though MySQL is" - " trying to rename table ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" to it.\n" - "InnoDB: Have you deleted the .frm file" - " and not used DROP TABLE?\n" - "InnoDB: You can look for further help from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: If table ", stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs(" is a temporary table #sql..., then" - " it can be that\n" - "InnoDB: there are still queries running" - " on the table, and it will be\n" - "InnoDB: dropped automatically when" - " the queries end.\n" - "InnoDB: You can drop the orphaned table" - " inside InnoDB by\n" - "InnoDB: creating an InnoDB table with" - " the same name in another\n" - "InnoDB: database and copying the .frm file" - " to the current database.\n" - "InnoDB: Then MySQL thinks the table exists," - " and DROP TABLE will\n" - "InnoDB: succeed.\n", stderr); - } - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - } else { - /* The following call will also rename the .ibd data file if - the table is stored in a single-table tablespace */ - - ibool success = dict_table_rename_in_cache(table, new_name, - !new_is_tmp); - - if (!success) { - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - ut_print_timestamp(stderr); - fputs(" InnoDB: Error in table rename," - " cannot rename ", stderr); - ut_print_name(stderr, trx, TRUE, old_name); - fputs(" to ", stderr); - ut_print_name(stderr, trx, TRUE, new_name); - putc('\n', stderr); - err = DB_ERROR; - - goto funct_exit; - } - - /* We only want to switch off some of the type checking in - an ALTER, not in a RENAME. */ - - err = dict_load_foreigns( - new_name, old_is_tmp ? trx->check_foreigns : TRUE); - - if (err != DB_SUCCESS) { - ut_print_timestamp(stderr); - - if (old_is_tmp) { - fputs(" InnoDB: Error: in ALTER TABLE ", - stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs("\n" - "InnoDB: has or is referenced" - " in foreign key constraints\n" - "InnoDB: which are not compatible" - " with the new table definition.\n", - stderr); - } else { - fputs(" InnoDB: Error: in RENAME TABLE" - " table ", - stderr); - ut_print_name(stderr, trx, TRUE, new_name); - fputs("\n" - "InnoDB: is referenced in" - " foreign key constraints\n" - "InnoDB: which are not compatible" - " with the new table definition.\n", - stderr); - } - - ut_a(dict_table_rename_in_cache(table, - old_name, FALSE)); - trx->error_state = DB_SUCCESS; - trx_general_rollback_for_mysql(trx, FALSE, NULL); - trx->error_state = DB_SUCCESS; - } - } - -funct_exit: - trx_commit_for_mysql(trx); - row_mysql_unlock_data_dictionary(trx); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - trx->op_info = ""; - - return((int) err); -} - -/************************************************************************* -Checks that the index contains entries in an ascending order, unique -constraint is not broken, and calculates the number of index entries -in the read view of the current transaction. */ -static -ibool -row_scan_and_check_index( -/*=====================*/ - /* out: TRUE if ok */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct in MySQL */ - dict_index_t* index, /* in: index */ - ulint* n_rows) /* out: number of entries seen in the - current consistent read */ -{ - dtuple_t* prev_entry = NULL; - ulint matched_fields; - ulint matched_bytes; - byte* buf; - ulint ret; - rec_t* rec; - ibool is_ok = TRUE; - int cmp; - ibool contains_null; - ulint i; - ulint cnt; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - *n_rows = 0; - - buf = mem_alloc(UNIV_PAGE_SIZE); - heap = mem_heap_create(100); - - /* Make a dummy template in prebuilt, which we will use - in scanning the index entries */ - - prebuilt->index = index; - prebuilt->sql_stat_start = TRUE; - prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE; - prebuilt->n_template = 0; - prebuilt->need_to_access_clustered = FALSE; - - dtuple_set_n_fields(prebuilt->search_tuple, 0); - - prebuilt->select_lock_type = LOCK_NONE; - cnt = 1000; - - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0); -loop: - /* Check thd->killed every 1,000 scanned rows */ - if (--cnt == 0) { - if (trx_is_interrupted(prebuilt->trx)) { - goto func_exit; - } - cnt = 1000; - } - if (ret != DB_SUCCESS) { -func_exit: - mem_free(buf); - mem_heap_free(heap); - - return(is_ok); - } - - *n_rows = *n_rows + 1; - - /* row_search... returns the index record in buf, record origin offset - within buf stored in the first 4 bytes, because we have built a dummy - template */ - - rec = buf + mach_read_from_4(buf); - - if (prev_entry != NULL) { - matched_fields = 0; - matched_bytes = 0; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets, - &matched_fields, - &matched_bytes); - contains_null = FALSE; - - /* In a unique secondary index we allow equal key values if - they contain SQL NULLs */ - - for (i = 0; - i < dict_index_get_n_ordering_defined_by_user(index); - i++) { - if (UNIV_SQL_NULL == dfield_get_len( - dtuple_get_nth_field(prev_entry, i))) { - - contains_null = TRUE; - } - } - - if (cmp > 0) { - fputs("InnoDB: index records in a wrong order in ", - stderr); -not_ok: - dict_index_name_print(stderr, - prebuilt->trx, index); - fputs("\n" - "InnoDB: prev record ", stderr); - dtuple_print(stderr, prev_entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - is_ok = FALSE; - } else if ((index->type & DICT_UNIQUE) - && !contains_null - && matched_fields - >= dict_index_get_n_ordering_defined_by_user( - index)) { - - fputs("InnoDB: duplicate key in ", stderr); - goto not_ok; - } - } - - mem_heap_empty(heap); - offsets = offsets_; - - prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - - ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, ROW_SEL_NEXT); - - goto loop; -} - -/************************************************************************* -Checks a table for corruption. */ - -ulint -row_check_table_for_mysql( -/*======================*/ - /* out: DB_ERROR or DB_SUCCESS */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct in MySQL - handle */ -{ - dict_table_t* table = prebuilt->table; - dict_index_t* index; - ulint n_rows; - ulint n_rows_in_table = ULINT_UNDEFINED; - ulint ret = DB_SUCCESS; - ulint old_isolation_level; - - if (prebuilt->table->ibd_file_missing) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error:\n" - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir, or have you" - " used DISCARD TABLESPACE?\n" - "InnoDB: Look from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - prebuilt->table->name); - return(DB_ERROR); - } - - prebuilt->trx->op_info = "checking table"; - - old_isolation_level = prebuilt->trx->isolation_level; - - /* We must run the index record counts at an isolation level - >= READ COMMITTED, because a dirty read can see a wrong number - of records in some index; to play safe, we use always - REPEATABLE READ here */ - - prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ; - - /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - index = dict_table_get_first_index(table); - - while (index != NULL) { - /* fputs("Validating index ", stderr); - ut_print_name(stderr, trx, FALSE, index->name); - putc('\n', stderr); */ - - if (!btr_validate_index(index, prebuilt->trx)) { - ret = DB_ERROR; - } else { - if (!row_scan_and_check_index(prebuilt, - index, &n_rows)) { - ret = DB_ERROR; - } - - if (trx_is_interrupted(prebuilt->trx)) { - break; - } - - /* fprintf(stderr, "%lu entries in index %s\n", n_rows, - index->name); */ - - if (index == dict_table_get_first_index(table)) { - n_rows_in_table = n_rows; - } else if (n_rows != n_rows_in_table) { - - ret = DB_ERROR; - - fputs("Error: ", stderr); - dict_index_name_print(stderr, - prebuilt->trx, index); - fprintf(stderr, - " contains %lu entries," - " should be %lu\n", - (ulong) n_rows, - (ulong) n_rows_in_table); - } - } - - index = dict_table_get_next_index(index); - } - - /* Restore the original isolation level */ - prebuilt->trx->isolation_level = old_isolation_level; - - /* We validate also the whole adaptive hash index for all tables - at every CHECK TABLE */ - - if (!btr_search_validate()) { - - ret = DB_ERROR; - } - - /* Restore the fatal lock wait timeout after CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */ - mutex_exit(&kernel_mutex); - - prebuilt->trx->op_info = ""; - - return(ret); -} - -/************************************************************************* -Determines if a table is a magic monitor table. */ - -ibool -row_is_magic_monitor_table( -/*=======================*/ - /* out: TRUE if monitor table */ - const char* table_name) /* in: name of the table, in the - form database/table_name */ -{ - const char* name; /* table_name without database/ */ - ulint len; - - name = strchr(table_name, '/'); - ut_a(name != NULL); - name++; - len = strlen(name) + 1; - - if (STR_EQ(name, len, S_innodb_monitor) - || STR_EQ(name, len, S_innodb_lock_monitor) - || STR_EQ(name, len, S_innodb_tablespace_monitor) - || STR_EQ(name, len, S_innodb_table_monitor) - || STR_EQ(name, len, S_innodb_mem_validate)) { - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/innobase/row/row0purge.c b/storage/innobase/row/row0purge.c deleted file mode 100644 index 1fef47da13f..00000000000 --- a/storage/innobase/row/row0purge.c +++ /dev/null @@ -1,673 +0,0 @@ -/****************************************************** -Purge obsolete records - -(c) 1997 Innobase Oy - -Created 3/14/1997 Heikki Tuuri -*******************************************************/ - -#include "row0purge.h" - -#ifdef UNIV_NONINL -#include "row0purge.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0upd.h" -#include "row0vers.h" -#include "row0mysql.h" -#include "log0log.h" - -/************************************************************************ -Creates a purge node to a query graph. */ - -purge_node_t* -row_purge_node_create( -/*==================*/ - /* out, own: purge node */ - que_thr_t* parent, /* in: parent node, i.e., a thr node */ - mem_heap_t* heap) /* in: memory heap where created */ -{ - purge_node_t* node; - - ut_ad(parent && heap); - - node = mem_heap_alloc(heap, sizeof(purge_node_t)); - - node->common.type = QUE_NODE_PURGE; - node->common.parent = parent; - - node->heap = mem_heap_create(256); - - return(node); -} - -/*************************************************************** -Repositions the pcur in the purge node on the clustered index record, -if found. */ -static -ibool -row_purge_reposition_pcur( -/*======================*/ - /* out: TRUE if the record was found */ - ulint mode, /* in: latching mode */ - purge_node_t* node, /* in: row purge node */ - mtr_t* mtr) /* in: mtr */ -{ - ibool found; - - if (node->found_clust) { - found = btr_pcur_restore_position(mode, &(node->pcur), mtr); - - return(found); - } - - found = row_search_on_row_ref(&(node->pcur), mode, node->table, - node->ref, mtr); - node->found_clust = found; - - if (found) { - btr_pcur_store_position(&(node->pcur), mtr); - } - - return(found); -} - -/*************************************************************** -Removes a delete marked clustered index record if possible. */ -static -ibool -row_purge_remove_clust_if_poss_low( -/*===============================*/ - /* out: TRUE if success, or if not found, or - if modified after the delete marking */ - purge_node_t* node, /* in: row purge node */ - ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - dict_index_t* index; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ibool success; - ulint err; - mtr_t mtr; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - index = dict_table_get_first_index(node->table); - - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - - mtr_start(&mtr); - - success = row_purge_reposition_pcur(mode, node, &mtr); - - if (!success) { - /* The record is already removed */ - - btr_pcur_commit_specify_mtr(pcur, &mtr); - - return(TRUE); - } - - rec = btr_pcur_get_rec(pcur); - - if (0 != ut_dulint_cmp(node->roll_ptr, row_get_rec_roll_ptr( - rec, index, rec_get_offsets( - rec, index, offsets_, - ULINT_UNDEFINED, &heap)))) { - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - /* Someone else has modified the record later: do not remove */ - btr_pcur_commit_specify_mtr(pcur, &mtr); - - return(TRUE); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, &mtr); - - if (err == DB_SUCCESS) { - success = TRUE; - } else if (err == DB_OUT_OF_FILE_SPACE) { - success = FALSE; - } else { - ut_error; - } - } - - btr_pcur_commit_specify_mtr(pcur, &mtr); - - return(success); -} - -/*************************************************************** -Removes a clustered index record if it has not been modified after the delete -marking. */ -static -void -row_purge_remove_clust_if_poss( -/*===========================*/ - purge_node_t* node) /* in: row purge node */ -{ - ibool success; - ulint n_tries = 0; - - /* fputs("Purge: Removing clustered record\n", stderr); */ - - success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_LEAF); - if (success) { - - return; - } -retry: - success = row_purge_remove_clust_if_poss_low(node, BTR_MODIFY_TREE); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - ut_a(success); -} - -/*************************************************************** -Removes a secondary index entry if possible. */ -static -ibool -row_purge_remove_sec_if_poss_low( -/*=============================*/ - /* out: TRUE if success or if not found */ - purge_node_t* node, /* in: row purge node */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint mode) /* in: latch mode BTR_MODIFY_LEAF or - BTR_MODIFY_TREE */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur; - ibool success; - ibool old_has = 0; /* remove warning */ - ibool found; - ulint err; - mtr_t mtr; - mtr_t* mtr_vers; - - log_free_check(); - mtr_start(&mtr); - - found = row_search_index_entry(index, entry, mode, &pcur, &mtr); - - if (!found) { - /* Not found */ - - /* fputs("PURGE:........sec entry not found\n", stderr); */ - /* dtuple_print(entry); */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(TRUE); - } - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - /* We should remove the index record if no later version of the row, - which cannot be purged yet, requires its existence. If some requires, - we should do nothing. */ - - mtr_vers = mem_alloc(sizeof(mtr_t)); - - mtr_start(mtr_vers); - - success = row_purge_reposition_pcur(BTR_SEARCH_LEAF, node, mtr_vers); - - if (success) { - old_has = row_vers_old_has_index_entry( - TRUE, btr_pcur_get_rec(&(node->pcur)), - mtr_vers, index, entry); - } - - btr_pcur_commit_specify_mtr(&(node->pcur), mtr_vers); - - mem_free(mtr_vers); - - if (!success || !old_has) { - /* Remove the index record */ - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, &mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, - FALSE, &mtr); - if (err == DB_SUCCESS) { - success = TRUE; - } else if (err == DB_OUT_OF_FILE_SPACE) { - success = FALSE; - } else { - ut_error; - } - } - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(success); -} - -/*************************************************************** -Removes a secondary index entry if possible. */ -UNIV_INLINE -void -row_purge_remove_sec_if_poss( -/*=========================*/ - purge_node_t* node, /* in: row purge node */ - dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry */ -{ - ibool success; - ulint n_tries = 0; - - /* fputs("Purge: Removing secondary record\n", stderr); */ - - success = row_purge_remove_sec_if_poss_low(node, index, entry, - BTR_MODIFY_LEAF); - if (success) { - - return; - } -retry: - success = row_purge_remove_sec_if_poss_low(node, index, entry, - BTR_MODIFY_TREE); - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (!success && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - ut_a(success); -} - -/*************************************************************** -Purges a delete marking of a record. */ -static -void -row_purge_del_mark( -/*===============*/ - purge_node_t* node) /* in: row purge node */ -{ - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - - ut_ad(node); - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - index = node->index; - - /* Build the index entry */ - entry = row_build_index_entry(node->row, index, heap); - - row_purge_remove_sec_if_poss(node, index, entry); - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - row_purge_remove_clust_if_poss(node); -} - -/*************************************************************** -Purges an update of an existing record. Also purges an update of a delete -marked record if that record contained an externally stored field. */ -static -void -row_purge_upd_exist_or_extern( -/*==========================*/ - purge_node_t* node) /* in: row purge node */ -{ - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - upd_field_t* ufield; - ibool is_insert; - ulint rseg_id; - ulint page_no; - ulint offset; - ulint internal_offset; - byte* data_field; - ulint data_field_len; - ulint i; - mtr_t mtr; - - ut_ad(node); - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC) { - - goto skip_secondaries; - } - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - index = node->index; - - if (row_upd_changes_ord_field_binary(NULL, node->index, - node->update)) { - /* Build the older version of the index entry */ - entry = row_build_index_entry(node->row, index, heap); - - row_purge_remove_sec_if_poss(node, index, entry); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - -skip_secondaries: - /* Free possible externally stored fields */ - for (i = 0; i < upd_get_n_fields(node->update); i++) { - - ufield = upd_get_nth_field(node->update, i); - - if (ufield->extern_storage) { - /* We use the fact that new_val points to - node->undo_rec and get thus the offset of - dfield data inside the unod record. Then we - can calculate from node->roll_ptr the file - address of the new_val data */ - - internal_offset = ((byte*)ufield->new_val.data) - - node->undo_rec; - - ut_a(internal_offset < UNIV_PAGE_SIZE); - - trx_undo_decode_roll_ptr(node->roll_ptr, - &is_insert, &rseg_id, - &page_no, &offset); - mtr_start(&mtr); - - /* We have to acquire an X-latch to the clustered - index tree */ - - index = dict_table_get_first_index(node->table); - - mtr_x_lock(dict_index_get_lock(index), &mtr); - - /* NOTE: we must also acquire an X-latch to the - root page of the tree. We will need it when we - free pages from the tree. If the tree is of height 1, - the tree X-latch does NOT protect the root page, - because it is also a leaf page. Since we will have a - latch on an undo log page, we would break the - latching order if we would only later latch the - root page of such a tree! */ - - btr_root_get(index, &mtr); - - /* We assume in purge of externally stored fields - that the space id of the undo log record is 0! */ - - data_field = buf_page_get(0, page_no, RW_X_LATCH, &mtr) - + offset + internal_offset; - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(buf_frame_align(data_field), - SYNC_TRX_UNDO_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - data_field_len = ufield->new_val.len; - - btr_free_externally_stored_field(index, data_field, - data_field_len, - FALSE, &mtr); - mtr_commit(&mtr); - } - } -} - -/*************************************************************** -Parses the row reference and other info in a modify undo log record. */ -static -ibool -row_purge_parse_undo_rec( -/*=====================*/ - /* out: TRUE if purge operation required: - NOTE that then the CALLER must unfreeze - data dictionary! */ - purge_node_t* node, /* in: row undo node */ - ibool* updated_extern, - /* out: TRUE if an externally stored field - was updated */ - que_thr_t* thr) /* in: query thread */ -{ - dict_index_t* clust_index; - byte* ptr; - trx_t* trx; - dulint undo_no; - dulint table_id; - dulint trx_id; - dulint roll_ptr; - ulint info_bits; - ulint type; - ulint cmpl_info; - - ut_ad(node && thr); - - trx = thr_get_trx(thr); - - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - updated_extern, &undo_no, &table_id); - node->rec_type = type; - - if (type == TRX_UNDO_UPD_DEL_REC && !(*updated_extern)) { - - return(FALSE); - } - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - node->table = NULL; - - if (type == TRX_UNDO_UPD_EXIST_REC - && cmpl_info & UPD_NODE_NO_ORD_CHANGE && !(*updated_extern)) { - - /* Purge requires no changes to indexes: we may return */ - - return(FALSE); - } - - /* Prevent DROP TABLE etc. from running when we are doing the purge - for this row */ - - row_mysql_freeze_data_dictionary(trx); - - mutex_enter(&(dict_sys->mutex)); - - node->table = dict_table_get_on_id_low(table_id); - - mutex_exit(&(dict_sys->mutex)); - - if (node->table == NULL) { - /* The table has been dropped: no need to do purge */ - - row_mysql_unfreeze_data_dictionary(trx); - - return(FALSE); - } - - if (node->table->ibd_file_missing) { - /* We skip purge of missing .ibd files */ - - node->table = NULL; - - row_mysql_unfreeze_data_dictionary(trx); - - return(FALSE); - } - - clust_index = dict_table_get_first_index(node->table); - - if (clust_index == NULL) { - /* The table was corrupt in the data dictionary */ - - row_mysql_unfreeze_data_dictionary(trx); - - return(FALSE); - } - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); - - ptr = trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, trx, - node->heap, &(node->update)); - - /* Read to the partial row the fields that occur in indexes */ - - if (!(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - ptr = trx_undo_rec_get_partial_row(ptr, clust_index, - &(node->row), node->heap); - } - - return(TRUE); -} - -/*************************************************************** -Fetches an undo log record and does the purge for the recorded operation. -If none left, or the current purge completed, returns the control to the -parent node, which is always a query thread node. */ -static -ulint -row_purge( -/*======*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code */ - purge_node_t* node, /* in: row purge node */ - que_thr_t* thr) /* in: query thread */ -{ - dulint roll_ptr; - ibool purge_needed; - ibool updated_extern; - trx_t* trx; - - ut_ad(node && thr); - - trx = thr_get_trx(thr); - - node->undo_rec = trx_purge_fetch_next_rec(&roll_ptr, - &(node->reservation), - node->heap); - if (!node->undo_rec) { - /* Purge completed for this query thread */ - - thr->run_node = que_node_get_parent(node); - - return(DB_SUCCESS); - } - - node->roll_ptr = roll_ptr; - - if (node->undo_rec == &trx_purge_dummy_rec) { - purge_needed = FALSE; - } else { - purge_needed = row_purge_parse_undo_rec(node, &updated_extern, - thr); - /* If purge_needed == TRUE, we must also remember to unfreeze - data dictionary! */ - } - - if (purge_needed) { - node->found_clust = FALSE; - - node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); - - if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { - row_purge_del_mark(node); - - } else if (updated_extern - || node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - - row_purge_upd_exist_or_extern(node); - } - - if (node->found_clust) { - btr_pcur_close(&(node->pcur)); - } - - row_mysql_unfreeze_data_dictionary(trx); - } - - /* Do some cleanup */ - trx_purge_rec_release(node->reservation); - mem_heap_empty(node->heap); - - thr->run_node = node; - - return(DB_SUCCESS); -} - -/*************************************************************** -Does the purge operation for a single undo log record. This is a high-level -function used in an SQL execution graph. */ - -que_thr_t* -row_purge_step( -/*===========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - purge_node_t* node; - ulint err; - - ut_ad(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_PURGE); - - err = row_purge(node, thr); - - ut_ad(err == DB_SUCCESS); - - return(thr); -} diff --git a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c deleted file mode 100644 index 08e50817db9..00000000000 --- a/storage/innobase/row/row0row.c +++ /dev/null @@ -1,726 +0,0 @@ -/****************************************************** -General row routines - -(c) 1996 Innobase Oy - -Created 4/20/1996 Heikki Tuuri -*******************************************************/ - -#include "row0row.h" - -#ifdef UNIV_NONINL -#include "row0row.ic" -#endif - -#include "dict0dict.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0upd.h" -#include "rem0cmp.h" -#include "read0read.h" - -/************************************************************************* -Reads the trx id or roll ptr field from a clustered index record: this function -is slower than the specialized inline functions. */ - -dulint -row_get_rec_sys_field( -/*==================*/ - /* out: value of the field */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - ulint pos; - byte* field; - ulint len; - - ut_ad(index->type & DICT_CLUSTERED); - - pos = dict_index_get_sys_col_pos(index, type); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - if (type == DATA_TRX_ID) { - - return(trx_read_trx_id(field)); - } else { - ut_ad(type == DATA_ROLL_PTR); - - return(trx_read_roll_ptr(field)); - } -} - -/************************************************************************* -Sets the trx id or roll ptr field in a clustered index record: this function -is slower than the specialized inline functions. */ - -void -row_set_rec_sys_field( -/*==================*/ - /* out: value of the field */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: clustered index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - dulint val) /* in: value to set */ -{ - ulint pos; - byte* field; - ulint len; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - - pos = dict_index_get_sys_col_pos(index, type); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - if (type == DATA_TRX_ID) { - - trx_write_trx_id(field, val); - } else { - ut_ad(type == DATA_ROLL_PTR); - - trx_write_roll_ptr(field, val); - } -} - -/********************************************************************* -When an insert to a table is performed, this function builds the entry which -has to be inserted to an index on the table. */ - -dtuple_t* -row_build_index_entry( -/*==================*/ - /* out: index entry which should be inserted */ - dtuple_t* row, /* in: row which should be inserted to the - table */ - dict_index_t* index, /* in: index on the table */ - mem_heap_t* heap) /* in: memory heap from which the memory for - the index entry is allocated */ -{ - dtuple_t* entry; - ulint entry_len; - dict_field_t* ind_field; - dfield_t* dfield; - dfield_t* dfield2; - ulint i; - ulint storage_len; - - ut_ad(row && index && heap); - ut_ad(dtuple_check_typed(row)); - - entry_len = dict_index_get_n_fields(index); - entry = dtuple_create(heap, entry_len); - - if (index->type & DICT_UNIVERSAL) { - dtuple_set_n_fields_cmp(entry, entry_len); - } else { - dtuple_set_n_fields_cmp( - entry, dict_index_get_n_unique_in_tree(index)); - } - - for (i = 0; i < entry_len; i++) { - const dict_col_t* col; - ind_field = dict_index_get_nth_field(index, i); - col = ind_field->col; - - dfield = dtuple_get_nth_field(entry, i); - - dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col)); - - dfield_copy(dfield, dfield2); - - /* If a column prefix index, take only the prefix */ - if (ind_field->prefix_len > 0 - && dfield_get_len(dfield2) != UNIV_SQL_NULL) { - - storage_len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - ind_field->prefix_len, - dfield_get_len(dfield2), dfield2->data); - - dfield_set_len(dfield, storage_len); - } - } - - ut_ad(dtuple_check_typed(entry)); - - return(entry); -} - -/*********************************************************************** -An inverse function to dict_row_build_index_entry. Builds a row from a -record in a clustered index. */ - -dtuple_t* -row_build( -/*======*/ - /* out, own: row built; see the NOTE below! */ - ulint type, /* in: ROW_COPY_POINTERS or ROW_COPY_DATA; - the latter copies also the data fields to - heap while the first only places pointers to - data fields on the index page, and thus is - more efficient */ - dict_index_t* index, /* in: clustered index */ - rec_t* rec, /* in: record in the clustered index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row dtuple is used! */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) - or NULL, in which case this function - will invoke rec_get_offsets() */ - mem_heap_t* heap) /* in: memory heap from which the memory - needed is allocated */ -{ - dtuple_t* row; - dict_table_t* table; - dict_field_t* ind_field; - dfield_t* dfield; - ulint n_fields; - byte* field; - ulint len; - ulint row_len; - byte* buf; - ulint i; - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(index && rec && heap); - ut_ad(index->type & DICT_CLUSTERED); - - if (!offsets) { - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &tmp_heap); - } else { - ut_ad(rec_offs_validate(rec, index, offsets)); - } - - if (type != ROW_COPY_POINTERS) { - /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - rec = rec_copy(buf, rec, offsets); - /* Avoid a debug assertion in rec_offs_validate(). */ - rec_offs_make_valid(rec, index, (ulint*) offsets); - } - - table = index->table; - row_len = dict_table_get_n_cols(table); - - row = dtuple_create(heap, row_len); - - dtuple_set_info_bits(row, rec_get_info_bits( - rec, dict_table_is_comp(table))); - - n_fields = rec_offs_n_fields(offsets); - - dict_table_copy_types(row, table); - - for (i = 0; i < n_fields; i++) { - ind_field = dict_index_get_nth_field(index, i); - - if (ind_field->prefix_len == 0) { - - const dict_col_t* col - = dict_field_get_col(ind_field); - - dfield = dtuple_get_nth_field(row, - dict_col_get_no(col)); - field = rec_get_nth_field(rec, offsets, i, &len); - - dfield_set_data(dfield, field, len); - } - } - - ut_ad(dtuple_check_typed(row)); - - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - return(row); -} - -/*********************************************************************** -Converts an index record to a typed data tuple. NOTE that externally -stored (often big) fields are NOT copied to heap. */ - -dtuple_t* -row_rec_to_index_entry( -/*===================*/ - /* out, own: index entry built; see the - NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap as the latter only places pointers to - data fields on the index page */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the dtuple is used! */ - mem_heap_t* heap) /* in: memory heap from which the memory - needed is allocated */ -{ - dtuple_t* entry; - dfield_t* dfield; - ulint i; - byte* field; - ulint len; - ulint rec_len; - byte* buf; - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(rec && heap && index); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &tmp_heap); - - if (type == ROW_COPY_DATA) { - /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - rec = rec_copy(buf, rec, offsets); - /* Avoid a debug assertion in rec_offs_validate(). */ - rec_offs_make_valid(rec, index, offsets); - } - - rec_len = rec_offs_n_fields(offsets); - - entry = dtuple_create(heap, rec_len); - - dtuple_set_n_fields_cmp(entry, - dict_index_get_n_unique_in_tree(index)); - ut_ad(rec_len == dict_index_get_n_fields(index)); - - dict_index_copy_types(entry, index, rec_len); - - dtuple_set_info_bits(entry, - rec_get_info_bits(rec, rec_offs_comp(offsets))); - - for (i = 0; i < rec_len; i++) { - - dfield = dtuple_get_nth_field(entry, i); - field = rec_get_nth_field(rec, offsets, i, &len); - - dfield_set_data(dfield, field, len); - } - - ut_ad(dtuple_check_typed(entry)); - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - return(entry); -} - -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ - -dtuple_t* -row_build_row_ref( -/*==============*/ - /* out, own: row reference built; see the - NOTE below! */ - ulint type, /* in: ROW_COPY_DATA, or ROW_COPY_POINTERS: - the former copies also the data fields to - heap, whereas the latter only places pointers - to data fields on the index page */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: in the case ROW_COPY_POINTERS - the data fields in the row will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - mem_heap_t* heap) /* in: memory heap from which the memory - needed is allocated */ -{ - dict_table_t* table; - dict_index_t* clust_index; - dfield_t* dfield; - dtuple_t* ref; - byte* field; - ulint len; - ulint ref_len; - ulint pos; - byte* buf; - ulint clust_col_prefix_len; - ulint i; - mem_heap_t* tmp_heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(index && rec && heap); - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &tmp_heap); - - if (type == ROW_COPY_DATA) { - /* Take a copy of rec to heap */ - - buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - - rec = rec_copy(buf, rec, offsets); - /* Avoid a debug assertion in rec_offs_validate(). */ - rec_offs_make_valid(rec, index, offsets); - } - - table = index->table; - - clust_index = dict_table_get_first_index(table); - - ref_len = dict_index_get_n_unique(clust_index); - - ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(ref, clust_index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - dfield_set_data(dfield, field, len); - - /* If the primary key contains a column prefix, then the - secondary index may contain a longer prefix of the same - column, or the full column, and we must adjust the length - accordingly. */ - - clust_col_prefix_len = dict_index_get_nth_field( - clust_index, i)->prefix_len; - - if (clust_col_prefix_len > 0) { - if (len != UNIV_SQL_NULL) { - - const dtype_t* dtype - = dfield_get_type(dfield); - - dfield_set_len(dfield, - dtype_get_at_most_n_mbchars( - dtype->prtype, - dtype->mbminlen, - dtype->mbmaxlen, - clust_col_prefix_len, - len, (char*) field)); - } - } - } - - ut_ad(dtuple_check_typed(ref)); - if (tmp_heap) { - mem_heap_free(tmp_heap); - } - - return(ref); -} - -/*********************************************************************** -Builds from a secondary index record a row reference with which we can -search the clustered index record. */ - -void -row_build_row_ref_in_tuple( -/*=======================*/ - dtuple_t* ref, /* in/out: row reference built; see the - NOTE below! */ - dict_index_t* index, /* in: index */ - rec_t* rec, /* in: record in the index; - NOTE: the data fields in ref will point - directly into this record, therefore, - the buffer page of this record must be - at least s-latched and the latch held - as long as the row reference is used! */ - trx_t* trx) /* in: transaction */ -{ - dict_index_t* clust_index; - dfield_t* dfield; - byte* field; - ulint len; - ulint ref_len; - ulint pos; - ulint clust_col_prefix_len; - ulint i; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_a(ref); - ut_a(index); - ut_a(rec); - - if (UNIV_UNLIKELY(!index->table)) { - fputs("InnoDB: table ", stderr); -notfound: - ut_print_name(stderr, trx, TRUE, index->table_name); - fputs(" for index ", stderr); - ut_print_name(stderr, trx, FALSE, index->name); - fputs(" not found\n", stderr); - ut_error; - } - - clust_index = dict_table_get_first_index(index->table); - - if (!clust_index) { - fputs("InnoDB: clust index for table ", stderr); - goto notfound; - } - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - ref_len = dict_index_get_n_unique(clust_index); - - ut_ad(ref_len == dtuple_get_n_fields(ref)); - - dict_index_copy_types(ref, clust_index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(ref, i); - - pos = dict_index_get_nth_field_pos(index, clust_index, i); - - ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, offsets, pos, &len); - - dfield_set_data(dfield, field, len); - - /* If the primary key contains a column prefix, then the - secondary index may contain a longer prefix of the same - column, or the full column, and we must adjust the length - accordingly. */ - - clust_col_prefix_len = dict_index_get_nth_field( - clust_index, i)->prefix_len; - - if (clust_col_prefix_len > 0) { - if (len != UNIV_SQL_NULL) { - - const dtype_t* dtype - = dfield_get_type(dfield); - - dfield_set_len(dfield, - dtype_get_at_most_n_mbchars( - dtype->prtype, - dtype->mbminlen, - dtype->mbmaxlen, - clust_col_prefix_len, - len, (char*) field)); - } - } - } - - ut_ad(dtuple_check_typed(ref)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*********************************************************************** -From a row build a row reference with which we can search the clustered -index record. */ - -void -row_build_row_ref_from_row( -/*=======================*/ - dtuple_t* ref, /* in/out: row reference built; see the - NOTE below! ref must have the right number - of fields! */ - dict_table_t* table, /* in: table */ - dtuple_t* row) /* in: row - NOTE: the data fields in ref will point - directly into data of this row */ -{ - dict_index_t* clust_index; - ulint ref_len; - ulint i; - - ut_ad(ref && table && row); - - clust_index = dict_table_get_first_index(table); - - ref_len = dict_index_get_n_unique(clust_index); - - ut_ad(ref_len == dtuple_get_n_fields(ref)); - - for (i = 0; i < ref_len; i++) { - const dict_col_t* col; - dict_field_t* field; - dfield_t* dfield; - dfield_t* dfield2; - - dfield = dtuple_get_nth_field(ref, i); - - field = dict_index_get_nth_field(clust_index, i); - - col = dict_field_get_col(field); - - dfield2 = dtuple_get_nth_field(row, dict_col_get_no(col)); - - dfield_copy(dfield, dfield2); - - if (field->prefix_len > 0 - && dfield->len != UNIV_SQL_NULL) { - - dfield->len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - field->prefix_len, dfield->len, dfield->data); - } - } - - ut_ad(dtuple_check_typed(ref)); -} - -/******************************************************************* -Searches the clustered index record for a row, if we have the row reference. */ - -ibool -row_search_on_row_ref( -/*==================*/ - /* out: TRUE if found */ - btr_pcur_t* pcur, /* in/out: persistent cursor, which must - be closed by the caller */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - dict_table_t* table, /* in: table */ - dtuple_t* ref, /* in: row reference */ - mtr_t* mtr) /* in: mtr */ -{ - ulint low_match; - rec_t* rec; - dict_index_t* index; - - ut_ad(dtuple_check_typed(ref)); - - index = dict_table_get_first_index(table); - - ut_a(dtuple_get_n_fields(ref) == dict_index_get_n_unique(index)); - - btr_pcur_open(index, ref, PAGE_CUR_LE, mode, pcur, mtr); - - low_match = btr_pcur_get_low_match(pcur); - - rec = btr_pcur_get_rec(pcur); - - if (page_rec_is_infimum(rec)) { - - return(FALSE); - } - - if (low_match != dtuple_get_n_fields(ref)) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Fetches the clustered index record for a secondary index record. The latches -on the secondary index record are preserved. */ - -rec_t* -row_get_clust_rec( -/*==============*/ - /* out: record or NULL, if no record found */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index, /* in: secondary index */ - dict_index_t** clust_index,/* out: clustered index */ - mtr_t* mtr) /* in: mtr */ -{ - mem_heap_t* heap; - dtuple_t* ref; - dict_table_t* table; - btr_pcur_t pcur; - ibool found; - rec_t* clust_rec; - - ut_ad((index->type & DICT_CLUSTERED) == 0); - - table = index->table; - - heap = mem_heap_create(256); - - ref = row_build_row_ref(ROW_COPY_POINTERS, index, rec, heap); - - found = row_search_on_row_ref(&pcur, mode, table, ref, mtr); - - clust_rec = found ? btr_pcur_get_rec(&pcur) : NULL; - - mem_heap_free(heap); - - btr_pcur_close(&pcur); - - *clust_index = dict_table_get_first_index(table); - - return(clust_rec); -} - -/******************************************************************* -Searches an index record. */ - -ibool -row_search_index_entry( -/*===================*/ - /* out: TRUE if found */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint mode, /* in: BTR_MODIFY_LEAF, ... */ - btr_pcur_t* pcur, /* in/out: persistent cursor, which must - be closed by the caller */ - mtr_t* mtr) /* in: mtr */ -{ - ulint n_fields; - ulint low_match; - rec_t* rec; - - ut_ad(dtuple_check_typed(entry)); - - btr_pcur_open(index, entry, PAGE_CUR_LE, mode, pcur, mtr); - low_match = btr_pcur_get_low_match(pcur); - - rec = btr_pcur_get_rec(pcur); - - n_fields = dtuple_get_n_fields(entry); - - if (page_rec_is_infimum(rec)) { - - return(FALSE); - } - - if (low_match != n_fields) { - /* Not found */ - - return(FALSE); - } - - return(TRUE); -} diff --git a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c deleted file mode 100644 index 29efb2861b7..00000000000 --- a/storage/innobase/row/row0sel.c +++ /dev/null @@ -1,4640 +0,0 @@ -/******************************************************* -Select - -(c) 1997 Innobase Oy - -Created 12/19/1997 Heikki Tuuri -*******************************************************/ - -#include "row0sel.h" - -#ifdef UNIV_NONINL -#include "row0sel.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0undo.h" -#include "trx0trx.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "mach0data.h" -#include "que0que.h" -#include "row0upd.h" -#include "row0row.h" -#include "row0vers.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "eval0eval.h" -#include "pars0sym.h" -#include "pars0pars.h" -#include "row0mysql.h" -#include "read0read.h" -#include "buf0lru.h" -#include "ha_prototypes.h" - -/* Maximum number of rows to prefetch; MySQL interface has another parameter */ -#define SEL_MAX_N_PREFETCH 16 - -/* Number of rows fetched, after which to start prefetching; MySQL interface -has another parameter */ -#define SEL_PREFETCH_LIMIT 1 - -/* When a select has accessed about this many pages, it returns control back -to que_run_threads: this is to allow canceling runaway queries */ - -#define SEL_COST_LIMIT 100 - -/* Flags for search shortcut */ -#define SEL_FOUND 0 -#define SEL_EXHAUSTED 1 -#define SEL_RETRY 2 - -/************************************************************************ -Returns TRUE if the user-defined column values in a secondary index record -are alphabetically the same as the corresponding columns in the clustered -index record. -NOTE: the comparison is NOT done as a binary comparison, but character -fields are compared with collation! */ -static -ibool -row_sel_sec_rec_is_for_clust_rec( -/*=============================*/ - /* out: TRUE if the secondary - record is equal to the corresponding - fields in the clustered record, - when compared with collation */ - rec_t* sec_rec, /* in: secondary index record */ - dict_index_t* sec_index, /* in: secondary index */ - rec_t* clust_rec, /* in: clustered index record */ - dict_index_t* clust_index) /* in: clustered index */ -{ - byte* sec_field; - ulint sec_len; - byte* clust_field; - ulint clust_len; - ulint n; - ulint i; - mem_heap_t* heap = NULL; - ulint clust_offsets_[REC_OFFS_NORMAL_SIZE]; - ulint sec_offsets_[REC_OFFS_SMALL_SIZE]; - ulint* clust_offs = clust_offsets_; - ulint* sec_offs = sec_offsets_; - ibool is_equal = TRUE; - - *clust_offsets_ = (sizeof clust_offsets_) / sizeof *clust_offsets_; - *sec_offsets_ = (sizeof sec_offsets_) / sizeof *sec_offsets_; - - clust_offs = rec_get_offsets(clust_rec, clust_index, clust_offs, - ULINT_UNDEFINED, &heap); - sec_offs = rec_get_offsets(sec_rec, sec_index, sec_offs, - ULINT_UNDEFINED, &heap); - - n = dict_index_get_n_ordering_defined_by_user(sec_index); - - for (i = 0; i < n; i++) { - const dict_field_t* ifield; - const dict_col_t* col; - - ifield = dict_index_get_nth_field(sec_index, i); - col = dict_field_get_col(ifield); - - clust_field = rec_get_nth_field( - clust_rec, clust_offs, - dict_col_get_clust_pos(col, clust_index), &clust_len); - sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len); - - if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) { - - clust_len = dtype_get_at_most_n_mbchars( - col->prtype, col->mbminlen, col->mbmaxlen, - ifield->prefix_len, - clust_len, (char*) clust_field); - } - - if (0 != cmp_data_data(col->mtype, col->prtype, - clust_field, clust_len, - sec_field, sec_len)) { - is_equal = FALSE; - goto func_exit; - } - } - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(is_equal); -} - -/************************************************************************* -Creates a select node struct. */ - -sel_node_t* -sel_node_create( -/*============*/ - /* out, own: select node struct */ - mem_heap_t* heap) /* in: memory heap where created */ -{ - sel_node_t* node; - - node = mem_heap_alloc(heap, sizeof(sel_node_t)); - node->common.type = QUE_NODE_SELECT; - node->state = SEL_NODE_OPEN; - - node->select_will_do_update = FALSE; - node->latch_mode = BTR_SEARCH_LEAF; - - node->plans = NULL; - - return(node); -} - -/************************************************************************* -Frees the memory private to a select node when a query graph is freed, -does not free the heap where the node was originally created. */ - -void -sel_node_free_private( -/*==================*/ - sel_node_t* node) /* in: select node struct */ -{ - ulint i; - plan_t* plan; - - if (node->plans != NULL) { - for (i = 0; i < node->n_tables; i++) { - plan = sel_node_get_nth_plan(node, i); - - btr_pcur_close(&(plan->pcur)); - btr_pcur_close(&(plan->clust_pcur)); - - if (plan->old_vers_heap) { - mem_heap_free(plan->old_vers_heap); - } - } - } -} - -/************************************************************************* -Evaluates the values in a select list. If there are aggregate functions, -their argument value is added to the aggregate total. */ -UNIV_INLINE -void -sel_eval_select_list( -/*=================*/ - sel_node_t* node) /* in: select node */ -{ - que_node_t* exp; - - exp = node->select_list; - - while (exp) { - eval_exp(exp); - - exp = que_node_get_next(exp); - } -} - -/************************************************************************* -Assigns the values in the select list to the possible into-variables in -SELECT ... INTO ... */ -UNIV_INLINE -void -sel_assign_into_var_values( -/*=======================*/ - sym_node_t* var, /* in: first variable in a list of variables */ - sel_node_t* node) /* in: select node */ -{ - que_node_t* exp; - - if (var == NULL) { - - return; - } - - exp = node->select_list; - - while (var) { - ut_ad(exp); - - eval_node_copy_val(var->alias, exp); - - exp = que_node_get_next(exp); - var = que_node_get_next(var); - } -} - -/************************************************************************* -Resets the aggregate value totals in the select list of an aggregate type -query. */ -UNIV_INLINE -void -sel_reset_aggregate_vals( -/*=====================*/ - sel_node_t* node) /* in: select node */ -{ - func_node_t* func_node; - - ut_ad(node->is_aggregate); - - func_node = node->select_list; - - while (func_node) { - eval_node_set_int_val(func_node, 0); - - func_node = que_node_get_next(func_node); - } - - node->aggregate_already_fetched = FALSE; -} - -/************************************************************************* -Copies the input variable values when an explicit cursor is opened. */ -UNIV_INLINE -void -row_sel_copy_input_variable_vals( -/*=============================*/ - sel_node_t* node) /* in: select node */ -{ - sym_node_t* var; - - var = UT_LIST_GET_FIRST(node->copy_variables); - - while (var) { - eval_node_copy_val(var, var->alias); - - var->indirection = NULL; - - var = UT_LIST_GET_NEXT(col_var_list, var); - } -} - -/************************************************************************* -Fetches the column values from a record. */ -static -void -row_sel_fetch_columns( -/*==================*/ - dict_index_t* index, /* in: record index */ - rec_t* rec, /* in: record in a clustered or non-clustered - index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - sym_node_t* column) /* in: first column in a column list, or - NULL */ -{ - dfield_t* val; - ulint index_type; - ulint field_no; - byte* data; - ulint len; - - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (index->type & DICT_CLUSTERED) { - index_type = SYM_CLUST_FIELD_NO; - } else { - index_type = SYM_SEC_FIELD_NO; - } - - while (column) { - mem_heap_t* heap = NULL; - ibool needs_copy; - - field_no = column->field_nos[index_type]; - - if (field_no != ULINT_UNDEFINED) { - - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, - field_no))) { - - /* Copy an externally stored field to the - temporary heap */ - - heap = mem_heap_create(1); - - data = btr_rec_copy_externally_stored_field( - rec, offsets, field_no, &len, heap); - - ut_a(len != UNIV_SQL_NULL); - - needs_copy = TRUE; - } else { - data = rec_get_nth_field(rec, offsets, - field_no, &len); - - needs_copy = column->copy_val; - } - - if (needs_copy) { - eval_node_copy_and_alloc_val(column, data, - len); - } else { - val = que_node_get_val(column); - dfield_set_data(val, data, len); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - } - - column = UT_LIST_GET_NEXT(col_var_list, column); - } -} - -/************************************************************************* -Allocates a prefetch buffer for a column when prefetch is first time done. */ -static -void -sel_col_prefetch_buf_alloc( -/*=======================*/ - sym_node_t* column) /* in: symbol table node for a column */ -{ - sel_buf_t* sel_buf; - ulint i; - - ut_ad(que_node_get_type(column) == QUE_NODE_SYMBOL); - - column->prefetch_buf = mem_alloc(SEL_MAX_N_PREFETCH - * sizeof(sel_buf_t)); - for (i = 0; i < SEL_MAX_N_PREFETCH; i++) { - sel_buf = column->prefetch_buf + i; - - sel_buf->data = NULL; - - sel_buf->val_buf_size = 0; - } -} - -/************************************************************************* -Frees a prefetch buffer for a column, including the dynamically allocated -memory for data stored there. */ - -void -sel_col_prefetch_buf_free( -/*======================*/ - sel_buf_t* prefetch_buf) /* in, own: prefetch buffer */ -{ - sel_buf_t* sel_buf; - ulint i; - - for (i = 0; i < SEL_MAX_N_PREFETCH; i++) { - sel_buf = prefetch_buf + i; - - if (sel_buf->val_buf_size > 0) { - - mem_free(sel_buf->data); - } - } -} - -/************************************************************************* -Pops the column values for a prefetched, cached row from the column prefetch -buffers and places them to the val fields in the column nodes. */ -static -void -sel_pop_prefetched_row( -/*===================*/ - plan_t* plan) /* in: plan node for a table */ -{ - sym_node_t* column; - sel_buf_t* sel_buf; - dfield_t* val; - byte* data; - ulint len; - ulint val_buf_size; - - ut_ad(plan->n_rows_prefetched > 0); - - column = UT_LIST_GET_FIRST(plan->columns); - - while (column) { - val = que_node_get_val(column); - - if (!column->copy_val) { - /* We did not really push any value for the - column */ - - ut_ad(!column->prefetch_buf); - ut_ad(que_node_get_val_buf_size(column) == 0); -#ifdef UNIV_DEBUG - dfield_set_data(val, NULL, 0); -#endif - goto next_col; - } - - ut_ad(column->prefetch_buf); - - sel_buf = column->prefetch_buf + plan->first_prefetched; - - data = sel_buf->data; - len = sel_buf->len; - val_buf_size = sel_buf->val_buf_size; - - /* We must keep track of the allocated memory for - column values to be able to free it later: therefore - we swap the values for sel_buf and val */ - - sel_buf->data = dfield_get_data(val); - sel_buf->len = dfield_get_len(val); - sel_buf->val_buf_size = que_node_get_val_buf_size(column); - - dfield_set_data(val, data, len); - que_node_set_val_buf_size(column, val_buf_size); -next_col: - column = UT_LIST_GET_NEXT(col_var_list, column); - } - - plan->n_rows_prefetched--; - - plan->first_prefetched++; -} - -/************************************************************************* -Pushes the column values for a prefetched, cached row to the column prefetch -buffers from the val fields in the column nodes. */ -UNIV_INLINE -void -sel_push_prefetched_row( -/*====================*/ - plan_t* plan) /* in: plan node for a table */ -{ - sym_node_t* column; - sel_buf_t* sel_buf; - dfield_t* val; - byte* data; - ulint len; - ulint pos; - ulint val_buf_size; - - if (plan->n_rows_prefetched == 0) { - pos = 0; - plan->first_prefetched = 0; - } else { - pos = plan->n_rows_prefetched; - - /* We have the convention that pushing new rows starts only - after the prefetch stack has been emptied: */ - - ut_ad(plan->first_prefetched == 0); - } - - plan->n_rows_prefetched++; - - ut_ad(pos < SEL_MAX_N_PREFETCH); - - column = UT_LIST_GET_FIRST(plan->columns); - - while (column) { - if (!column->copy_val) { - /* There is no sense to push pointers to database - page fields when we do not keep latch on the page! */ - - goto next_col; - } - - if (!column->prefetch_buf) { - /* Allocate a new prefetch buffer */ - - sel_col_prefetch_buf_alloc(column); - } - - sel_buf = column->prefetch_buf + pos; - - val = que_node_get_val(column); - - data = dfield_get_data(val); - len = dfield_get_len(val); - val_buf_size = que_node_get_val_buf_size(column); - - /* We must keep track of the allocated memory for - column values to be able to free it later: therefore - we swap the values for sel_buf and val */ - - dfield_set_data(val, sel_buf->data, sel_buf->len); - que_node_set_val_buf_size(column, sel_buf->val_buf_size); - - sel_buf->data = data; - sel_buf->len = len; - sel_buf->val_buf_size = val_buf_size; -next_col: - column = UT_LIST_GET_NEXT(col_var_list, column); - } -} - -/************************************************************************* -Builds a previous version of a clustered index record for a consistent read */ -static -ulint -row_sel_build_prev_vers( -/*====================*/ - /* out: DB_SUCCESS or error code */ - read_view_t* read_view, /* in: read view */ - dict_index_t* index, /* in: plan node for table */ - rec_t* rec, /* in: record in a clustered index */ - ulint** offsets, /* in/out: offsets returned by - rec_get_offsets(rec, plan->index) */ - mem_heap_t** offset_heap, /* in/out: memory heap from which - the offsets are allocated */ - mem_heap_t** old_vers_heap, /* out: old version heap to use */ - rec_t** old_vers, /* out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /* in: mtr */ -{ - ulint err; - - if (*old_vers_heap) { - mem_heap_empty(*old_vers_heap); - } else { - *old_vers_heap = mem_heap_create(512); - } - - err = row_vers_build_for_consistent_read( - rec, mtr, index, offsets, read_view, offset_heap, - *old_vers_heap, old_vers); - return(err); -} - -/************************************************************************* -Builds the last committed version of a clustered index record for a -semi-consistent read. */ -static -ulint -row_sel_build_committed_vers_for_mysql( -/*===================================*/ - /* out: DB_SUCCESS or error code */ - dict_index_t* clust_index, /* in: clustered index */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec, /* in: record in a clustered index */ - ulint** offsets, /* in/out: offsets returned by - rec_get_offsets(rec, clust_index) */ - mem_heap_t** offset_heap, /* in/out: memory heap from which - the offsets are allocated */ - rec_t** old_vers, /* out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /* in: mtr */ -{ - ulint err; - - if (prebuilt->old_vers_heap) { - mem_heap_empty(prebuilt->old_vers_heap); - } else { - prebuilt->old_vers_heap = mem_heap_create(200); - } - - err = row_vers_build_for_semi_consistent_read( - rec, mtr, clust_index, offsets, offset_heap, - prebuilt->old_vers_heap, old_vers); - return(err); -} - -/************************************************************************* -Tests the conditions which determine when the index segment we are searching -through has been exhausted. */ -UNIV_INLINE -ibool -row_sel_test_end_conds( -/*===================*/ - /* out: TRUE if row passed the tests */ - plan_t* plan) /* in: plan for the table; the column values must - already have been retrieved and the right sides of - comparisons evaluated */ -{ - func_node_t* cond; - - /* All conditions in end_conds are comparisons of a column to an - expression */ - - cond = UT_LIST_GET_FIRST(plan->end_conds); - - while (cond) { - /* Evaluate the left side of the comparison, i.e., get the - column value if there is an indirection */ - - eval_sym(cond->args); - - /* Do the comparison */ - - if (!eval_cmp(cond)) { - - return(FALSE); - } - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } - - return(TRUE); -} - -/************************************************************************* -Tests the other conditions. */ -UNIV_INLINE -ibool -row_sel_test_other_conds( -/*=====================*/ - /* out: TRUE if row passed the tests */ - plan_t* plan) /* in: plan for the table; the column values must - already have been retrieved */ -{ - func_node_t* cond; - - cond = UT_LIST_GET_FIRST(plan->other_conds); - - while (cond) { - eval_exp(cond); - - if (!eval_node_get_ibool_val(cond)) { - - return(FALSE); - } - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } - - return(TRUE); -} - -/************************************************************************* -Retrieves the clustered index record corresponding to a record in a -non-clustered index. Does the necessary locking. */ -static -ulint -row_sel_get_clust_rec( -/*==================*/ - /* out: DB_SUCCESS or error code */ - sel_node_t* node, /* in: select_node */ - plan_t* plan, /* in: plan node for table */ - rec_t* rec, /* in: record in a non-clustered index */ - que_thr_t* thr, /* in: query thread */ - rec_t** out_rec,/* out: clustered record or an old version of - it, NULL if the old version did not exist - in the read view, i.e., it was a fresh - inserted version */ - mtr_t* mtr) /* in: mtr used to get access to the - non-clustered record; the same mtr is used to - access the clustered index */ -{ - dict_index_t* index; - rec_t* clust_rec; - rec_t* old_vers; - ulint err; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - *out_rec = NULL; - - offsets = rec_get_offsets(rec, - btr_pcur_get_btr_cur(&plan->pcur)->index, - offsets, ULINT_UNDEFINED, &heap); - - row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets); - - index = dict_table_get_first_index(plan->table); - - btr_pcur_open_with_no_init(index, plan->clust_ref, PAGE_CUR_LE, - node->latch_mode, &(plan->clust_pcur), - 0, mtr); - - clust_rec = btr_pcur_get_rec(&(plan->clust_pcur)); - - /* Note: only if the search ends up on a non-infimum record is the - low_match value the real match to the search tuple */ - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(&(plan->clust_pcur)) - < dict_index_get_n_unique(index)) { - - ut_a(rec_get_deleted_flag(rec, - dict_table_is_comp(plan->table))); - ut_a(node->read_view); - - /* In a rare case it is possible that no clust rec is found - for a delete-marked secondary index record: if in row0umod.c - in row_undo_mod_remove_clust_low() we have already removed - the clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case we know that the - clustered index record did not exist in the read view of - trx. */ - - goto func_exit; - } - - offsets = rec_get_offsets(clust_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (!node->read_view) { - /* Try to place a lock on the index record */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED isolation level - we lock only the record, i.e., next-key locking is - not used. */ - ulint lock_type; - trx_t* trx; - - trx = thr_get_trx(thr); - - if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = lock_clust_rec_read_check_and_lock( - 0, clust_rec, index, offsets, - node->row_lock_mode, lock_type, thr); - - if (err != DB_SUCCESS) { - - goto err_exit; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - old_vers = NULL; - - if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets, - node->read_view)) { - - err = row_sel_build_prev_vers( - node->read_view, index, clust_rec, - &offsets, &heap, &plan->old_vers_heap, - &old_vers, mtr); - - if (err != DB_SUCCESS) { - - goto err_exit; - } - - clust_rec = old_vers; - - if (clust_rec == NULL) { - goto func_exit; - } - } - - /* If we had to go to an earlier version of row or the - secondary index record is delete marked, then it may be that - the secondary index record corresponding to clust_rec - (or old_vers) is not rec; in that case we must ignore - such row because in our snapshot rec would not have existed. - Remember that from rec we cannot see directly which transaction - id corresponds to it: we have to go to the clustered index - record. A query where we want to fetch all rows where - the secondary index value is in some interval would return - a wrong result if we would not drop rows which we come to - visit through secondary index records that would not really - exist in our snapshot. */ - - if ((old_vers - || rec_get_deleted_flag(rec, dict_table_is_comp( - plan->table))) - && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index, - clust_rec, index)) { - goto func_exit; - } - } - - /* Fetch the columns needed in test conditions */ - - row_sel_fetch_columns(index, clust_rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - *out_rec = clust_rec; -func_exit: - err = DB_SUCCESS; -err_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/************************************************************************* -Sets a lock on a record. */ -UNIV_INLINE -ulint -sel_set_rec_lock( -/*=============*/ - /* out: DB_SUCCESS or error code */ - rec_t* rec, /* in: record */ - dict_index_t* index, /* in: index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - ulint mode, /* in: lock mode */ - ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or - LOC_REC_NOT_GAP */ - que_thr_t* thr) /* in: query thread */ -{ - trx_t* trx; - ulint err; - - trx = thr_get_trx(thr); - - if (UT_LIST_GET_LEN(trx->trx_locks) > 10000) { - if (buf_LRU_buf_pool_running_out()) { - - return(DB_LOCK_TABLE_FULL); - } - } - - if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock( - 0, rec, index, offsets, mode, type, thr); - } else { - err = lock_sec_rec_read_check_and_lock( - 0, rec, index, offsets, mode, type, thr); - } - - return(err); -} - -/************************************************************************* -Opens a pcur to a table index. */ -static -void -row_sel_open_pcur( -/*==============*/ - sel_node_t* node, /* in: select node */ - plan_t* plan, /* in: table plan */ - ibool search_latch_locked, - /* in: TRUE if the thread currently - has the search latch locked in - s-mode */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - func_node_t* cond; - que_node_t* exp; - ulint n_fields; - ulint has_search_latch = 0; /* RW_S_LATCH or 0 */ - ulint i; - - if (search_latch_locked) { - has_search_latch = RW_S_LATCH; - } - - index = plan->index; - - /* Calculate the value of the search tuple: the exact match columns - get their expressions evaluated when we evaluate the right sides of - end_conds */ - - cond = UT_LIST_GET_FIRST(plan->end_conds); - - while (cond) { - eval_exp(que_node_get_next(cond->args)); - - cond = UT_LIST_GET_NEXT(cond_list, cond); - } - - if (plan->tuple) { - n_fields = dtuple_get_n_fields(plan->tuple); - - if (plan->n_exact_match < n_fields) { - /* There is a non-exact match field which must be - evaluated separately */ - - eval_exp(plan->tuple_exps[n_fields - 1]); - } - - for (i = 0; i < n_fields; i++) { - exp = plan->tuple_exps[i]; - - dfield_copy_data(dtuple_get_nth_field(plan->tuple, i), - que_node_get_val(exp)); - } - - /* Open pcur to the index */ - - btr_pcur_open_with_no_init(index, plan->tuple, plan->mode, - node->latch_mode, &(plan->pcur), - has_search_latch, mtr); - } else { - /* Open the cursor to the start or the end of the index - (FALSE: no init) */ - - btr_pcur_open_at_index_side(plan->asc, index, node->latch_mode, - &(plan->pcur), FALSE, mtr); - } - - ut_ad(plan->n_rows_prefetched == 0); - ut_ad(plan->n_rows_fetched == 0); - ut_ad(plan->cursor_at_end == FALSE); - - plan->pcur_is_open = TRUE; -} - -/************************************************************************* -Restores a stored pcur position to a table index. */ -static -ibool -row_sel_restore_pcur_pos( -/*=====================*/ - /* out: TRUE if the cursor should be moved to - the next record after we return from this - function (moved to the previous, in the case - of a descending cursor) without processing - again the current cursor record */ - sel_node_t* node, /* in: select node */ - plan_t* plan, /* in: table plan */ - mtr_t* mtr) /* in: mtr */ -{ - ibool equal_position; - ulint relative_position; - - ut_ad(!plan->cursor_at_end); - - relative_position = btr_pcur_get_rel_pos(&(plan->pcur)); - - equal_position = btr_pcur_restore_position(node->latch_mode, - &(plan->pcur), mtr); - - /* If the cursor is traveling upwards, and relative_position is - - (1) BTR_PCUR_BEFORE: this is not allowed, as we did not have a lock - yet on the successor of the page infimum; - (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the - first record GREATER than the predecessor of a page supremum; we have - not yet processed the cursor record: no need to move the cursor to the - next record; - (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the - last record LESS or EQUAL to the old stored user record; (a) if - equal_position is FALSE, this means that the cursor is now on a record - less than the old user record, and we must move to the next record; - (b) if equal_position is TRUE, then if - plan->stored_cursor_rec_processed is TRUE, we must move to the next - record, else there is no need to move the cursor. */ - - if (plan->asc) { - if (relative_position == BTR_PCUR_ON) { - - if (equal_position) { - - return(plan->stored_cursor_rec_processed); - } - - return(TRUE); - } - - ut_ad(relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); - - return(FALSE); - } - - /* If the cursor is traveling downwards, and relative_position is - - (1) BTR_PCUR_BEFORE: btr_pcur_restore_position placed the cursor on - the last record LESS than the successor of a page infimum; we have not - processed the cursor record: no need to move the cursor; - (2) BTR_PCUR_AFTER: btr_pcur_restore_position placed the cursor on the - first record GREATER than the predecessor of a page supremum; we have - processed the cursor record: we should move the cursor to the previous - record; - (3) BTR_PCUR_ON: btr_pcur_restore_position placed the cursor on the - last record LESS or EQUAL to the old stored user record; (a) if - equal_position is FALSE, this means that the cursor is now on a record - less than the old user record, and we need not move to the previous - record; (b) if equal_position is TRUE, then if - plan->stored_cursor_rec_processed is TRUE, we must move to the previous - record, else there is no need to move the cursor. */ - - if (relative_position == BTR_PCUR_BEFORE - || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE) { - - return(FALSE); - } - - if (relative_position == BTR_PCUR_ON) { - - if (equal_position) { - - return(plan->stored_cursor_rec_processed); - } - - return(FALSE); - } - - ut_ad(relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE); - - return(TRUE); -} - -/************************************************************************* -Resets a plan cursor to a closed state. */ -UNIV_INLINE -void -plan_reset_cursor( -/*==============*/ - plan_t* plan) /* in: plan */ -{ - plan->pcur_is_open = FALSE; - plan->cursor_at_end = FALSE; - plan->n_rows_fetched = 0; - plan->n_rows_prefetched = 0; -} - -/************************************************************************* -Tries to do a shortcut to fetch a clustered index record with a unique key, -using the hash index if possible (not always). */ -static -ulint -row_sel_try_search_shortcut( -/*========================*/ - /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ - sel_node_t* node, /* in: select node for a consistent read */ - plan_t* plan, /* in: plan for a unique search in clustered - index */ - mtr_t* mtr) /* in: mtr */ -{ - dict_index_t* index; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - ulint ret; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - index = plan->index; - - ut_ad(node->read_view); - ut_ad(plan->unique_search); - ut_ad(!plan->must_get_clust); -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - row_sel_open_pcur(node, plan, TRUE, mtr); - - rec = btr_pcur_get_rec(&(plan->pcur)); - - if (!page_rec_is_user_rec(rec)) { - - return(SEL_RETRY); - } - - ut_ad(plan->mode == PAGE_CUR_GE); - - /* As the cursor is now placed on a user record after a search with - the mode PAGE_CUR_GE, the up_match field in the cursor tells how many - fields in the user record matched to the search tuple */ - - if (btr_pcur_get_up_match(&(plan->pcur)) < plan->n_exact_match) { - - return(SEL_EXHAUSTED); - } - - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (index->type & DICT_CLUSTERED) { - if (!lock_clust_rec_cons_read_sees(rec, index, offsets, - node->read_view)) { - ret = SEL_RETRY; - goto func_exit; - } - } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) { - - ret = SEL_RETRY; - goto func_exit; - } - - /* Test deleted flag. Fetch the columns needed in test conditions. */ - - row_sel_fetch_columns(index, rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - - if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table))) { - - ret = SEL_EXHAUSTED; - goto func_exit; - } - - /* Test the rest of search conditions */ - - if (!row_sel_test_other_conds(plan)) { - - ret = SEL_EXHAUSTED; - goto func_exit; - } - - ut_ad(plan->pcur.latch_mode == node->latch_mode); - - plan->n_rows_fetched++; - ret = SEL_FOUND; -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(ret); -} - -/************************************************************************* -Performs a select step. */ -static -ulint -row_sel( -/*====*/ - /* out: DB_SUCCESS or error code */ - sel_node_t* node, /* in: select node */ - que_thr_t* thr) /* in: query thread */ -{ - dict_index_t* index; - plan_t* plan; - mtr_t mtr; - ibool moved; - rec_t* rec; - rec_t* old_vers; - rec_t* clust_rec; - ibool search_latch_locked; - ibool consistent_read; - - /* The following flag becomes TRUE when we are doing a - consistent read from a non-clustered index and we must look - at the clustered index to find out the previous delete mark - state of the non-clustered record: */ - - ibool cons_read_requires_clust_rec = FALSE; - ulint cost_counter = 0; - ibool cursor_just_opened; - ibool must_go_to_next; - ibool leaf_contains_updates = FALSE; - /* TRUE if select_will_do_update is - TRUE and the current clustered index - leaf page has been updated during - the current mtr: mtr must be committed - at the same time as the leaf x-latch - is released */ - ibool mtr_has_extra_clust_latch = FALSE; - /* TRUE if the search was made using - a non-clustered index, and we had to - access the clustered record: now &mtr - contains a clustered index latch, and - &mtr must be committed before we move - to the next non-clustered record */ - ulint found_flag; - ulint err; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(thr->run_node == node); - - search_latch_locked = FALSE; - - if (node->read_view) { - /* In consistent reads, we try to do with the hash index and - not to use the buffer page get. This is to reduce memory bus - load resulting from semaphore operations. The search latch - will be s-locked when we access an index with a unique search - condition, but not locked when we access an index with a - less selective search condition. */ - - consistent_read = TRUE; - } else { - consistent_read = FALSE; - } - -table_loop: - /* TABLE LOOP - ---------- - This is the outer major loop in calculating a join. We come here when - node->fetch_table changes, and after adding a row to aggregate totals - and, of course, when this function is called. */ - - ut_ad(leaf_contains_updates == FALSE); - ut_ad(mtr_has_extra_clust_latch == FALSE); - - plan = sel_node_get_nth_plan(node, node->fetch_table); - index = plan->index; - - if (plan->n_rows_prefetched > 0) { - sel_pop_prefetched_row(plan); - - goto next_table_no_mtr; - } - - if (plan->cursor_at_end) { - /* The cursor has already reached the result set end: no more - rows to process for this table cursor, as also the prefetch - stack was empty */ - - ut_ad(plan->pcur_is_open); - - goto table_exhausted_no_mtr; - } - - /* Open a cursor to index, or restore an open cursor position */ - - mtr_start(&mtr); - - if (consistent_read && plan->unique_search && !plan->pcur_is_open - && !plan->must_get_clust - && !plan->table->big_rows) { - if (!search_latch_locked) { - rw_lock_s_lock(&btr_search_latch); - - search_latch_locked = TRUE; - } else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) { - - /* There is an x-latch request waiting: release the - s-latch for a moment; as an s-latch here is often - kept for some 10 searches before being released, - a waiting x-latch request would block other threads - from acquiring an s-latch for a long time, lowering - performance significantly in multiprocessors. */ - - rw_lock_s_unlock(&btr_search_latch); - rw_lock_s_lock(&btr_search_latch); - } - - found_flag = row_sel_try_search_shortcut(node, plan, &mtr); - - if (found_flag == SEL_FOUND) { - - goto next_table; - - } else if (found_flag == SEL_EXHAUSTED) { - - goto table_exhausted; - } - - ut_ad(found_flag == SEL_RETRY); - - plan_reset_cursor(plan); - - mtr_commit(&mtr); - mtr_start(&mtr); - } - - if (search_latch_locked) { - rw_lock_s_unlock(&btr_search_latch); - - search_latch_locked = FALSE; - } - - if (!plan->pcur_is_open) { - /* Evaluate the expressions to build the search tuple and - open the cursor */ - - row_sel_open_pcur(node, plan, search_latch_locked, &mtr); - - cursor_just_opened = TRUE; - - /* A new search was made: increment the cost counter */ - cost_counter++; - } else { - /* Restore pcur position to the index */ - - must_go_to_next = row_sel_restore_pcur_pos(node, plan, &mtr); - - cursor_just_opened = FALSE; - - if (must_go_to_next) { - /* We have already processed the cursor record: move - to the next */ - - goto next_rec; - } - } - -rec_loop: - /* RECORD LOOP - ----------- - In this loop we use pcur and try to fetch a qualifying row, and - also fill the prefetch buffer for this table if n_rows_fetched has - exceeded a threshold. While we are inside this loop, the following - holds: - (1) &mtr is started, - (2) pcur is positioned and open. - - NOTE that if cursor_just_opened is TRUE here, it means that we came - to this point right after row_sel_open_pcur. */ - - ut_ad(mtr_has_extra_clust_latch == FALSE); - - rec = btr_pcur_get_rec(&(plan->pcur)); - - /* PHASE 1: Set a lock if specified */ - - if (!node->asc && cursor_just_opened - && !page_rec_is_supremum(rec)) { - - /* When we open a cursor for a descending search, we must set - a next-key lock on the successor record: otherwise it would - be possible to insert new records next to the cursor position, - and it might be that these new records should appear in the - search result set, resulting in the phantom problem. */ - - if (!consistent_read) { - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED isolation - level, we lock only the record, i.e., next-key - locking is not used. */ - - rec_t* next_rec = page_rec_get_next(rec); - ulint lock_type; - trx_t* trx; - - trx = thr_get_trx(thr); - - offsets = rec_get_offsets(next_rec, index, offsets, - ULINT_UNDEFINED, &heap); - - if (srv_locks_unsafe_for_binlog - || trx->isolation_level - == TRX_ISO_READ_COMMITTED) { - - if (page_rec_is_supremum(next_rec)) { - - goto skip_lock; - } - - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = sel_set_rec_lock(next_rec, index, offsets, - node->row_lock_mode, - lock_type, thr); - - if (err != DB_SUCCESS) { - /* Note that in this case we will store in pcur - the PREDECESSOR of the record we are waiting - the lock for */ - - goto lock_wait_or_error; - } - } - } - -skip_lock: - if (page_rec_is_infimum(rec)) { - - /* The infimum record on a page cannot be in the result set, - and neither can a record lock be placed on it: we skip such - a record. We also increment the cost counter as we may have - processed yet another page of index. */ - - cost_counter++; - - goto next_rec; - } - - if (!consistent_read) { - /* Try to place a lock on the index record */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using READ COMMITTED isolation level, - we lock only the record, i.e., next-key locking is - not used. */ - - ulint lock_type; - trx_t* trx; - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - - trx = thr_get_trx(thr); - - if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { - - if (page_rec_is_supremum(rec)) { - - goto next_rec; - } - - lock_type = LOCK_REC_NOT_GAP; - } else { - lock_type = LOCK_ORDINARY; - } - - err = sel_set_rec_lock(rec, index, offsets, - node->row_lock_mode, lock_type, thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - } - - if (page_rec_is_supremum(rec)) { - - /* A page supremum record cannot be in the result set: skip - it now when we have placed a possible lock on it */ - - goto next_rec; - } - - ut_ad(page_rec_is_user_rec(rec)); - - if (cost_counter > SEL_COST_LIMIT) { - - /* Now that we have placed the necessary locks, we can stop - for a while and store the cursor position; NOTE that if we - would store the cursor position BEFORE placing a record lock, - it might happen that the cursor would jump over some records - that another transaction could meanwhile insert adjacent to - the cursor: this would result in the phantom problem. */ - - goto stop_for_a_while; - } - - /* PHASE 2: Check a mixed index mix id if needed */ - - if (plan->unique_search && cursor_just_opened) { - - ut_ad(plan->mode == PAGE_CUR_GE); - - /* As the cursor is now placed on a user record after a search - with the mode PAGE_CUR_GE, the up_match field in the cursor - tells how many fields in the user record matched to the search - tuple */ - - if (btr_pcur_get_up_match(&(plan->pcur)) - < plan->n_exact_match) { - goto table_exhausted; - } - - /* Ok, no need to test end_conds or mix id */ - - } - - /* We are ready to look at a possible new index entry in the result - set: the cursor is now placed on a user record */ - - /* PHASE 3: Get previous version in a consistent read */ - - cons_read_requires_clust_rec = FALSE; - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (consistent_read) { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - if (index->type & DICT_CLUSTERED) { - - if (!lock_clust_rec_cons_read_sees(rec, index, offsets, - node->read_view)) { - - err = row_sel_build_prev_vers( - node->read_view, index, rec, - &offsets, &heap, &plan->old_vers_heap, - &old_vers, &mtr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - if (old_vers == NULL) { - offsets = rec_get_offsets( - rec, index, offsets, - ULINT_UNDEFINED, &heap); - row_sel_fetch_columns( - index, rec, offsets, - UT_LIST_GET_FIRST( - plan->columns)); - - if (!row_sel_test_end_conds(plan)) { - - goto table_exhausted; - } - - goto next_rec; - } - - rec = old_vers; - } - } else if (!lock_sec_rec_cons_read_sees(rec, index, - node->read_view)) { - cons_read_requires_clust_rec = TRUE; - } - } - - /* PHASE 4: Test search end conditions and deleted flag */ - - /* Fetch the columns needed in test conditions */ - - row_sel_fetch_columns(index, rec, offsets, - UT_LIST_GET_FIRST(plan->columns)); - - /* Test the selection end conditions: these can only contain columns - which already are found in the index, even though the index might be - non-clustered */ - - if (plan->unique_search && cursor_just_opened) { - - /* No test necessary: the test was already made above */ - - } else if (!row_sel_test_end_conds(plan)) { - - goto table_exhausted; - } - - if (rec_get_deleted_flag(rec, dict_table_is_comp(plan->table)) - && !cons_read_requires_clust_rec) { - - /* The record is delete marked: we can skip it if this is - not a consistent read which might see an earlier version - of a non-clustered index record */ - - if (plan->unique_search) { - - goto table_exhausted; - } - - goto next_rec; - } - - /* PHASE 5: Get the clustered index record, if needed and if we did - not do the search using the clustered index */ - - if (plan->must_get_clust || cons_read_requires_clust_rec) { - - /* It was a non-clustered index and we must fetch also the - clustered index record */ - - err = row_sel_get_clust_rec(node, plan, rec, thr, &clust_rec, - &mtr); - mtr_has_extra_clust_latch = TRUE; - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - /* Retrieving the clustered record required a search: - increment the cost counter */ - - cost_counter++; - - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(consistent_read); - - goto next_rec; - } - - if (rec_get_deleted_flag(clust_rec, - dict_table_is_comp(plan->table))) { - - /* The record is delete marked: we can skip it */ - - goto next_rec; - } - - if (node->can_get_updated) { - - btr_pcur_store_position(&(plan->clust_pcur), &mtr); - } - } - - /* PHASE 6: Test the rest of search conditions */ - - if (!row_sel_test_other_conds(plan)) { - - if (plan->unique_search) { - - goto table_exhausted; - } - - goto next_rec; - } - - /* PHASE 7: We found a new qualifying row for the current table; push - the row if prefetch is on, or move to the next table in the join */ - - plan->n_rows_fetched++; - - ut_ad(plan->pcur.latch_mode == node->latch_mode); - - if (node->select_will_do_update) { - /* This is a searched update and we can do the update in-place, - saving CPU time */ - - row_upd_in_place_in_select(node, thr, &mtr); - - leaf_contains_updates = TRUE; - - /* When the database is in the online backup mode, the number - of log records for a single mtr should be small: increment the - cost counter to ensure it */ - - cost_counter += 1 + (SEL_COST_LIMIT / 8); - - if (plan->unique_search) { - - goto table_exhausted; - } - - goto next_rec; - } - - if ((plan->n_rows_fetched <= SEL_PREFETCH_LIMIT) - || plan->unique_search || plan->no_prefetch - || plan->table->big_rows) { - - /* No prefetch in operation: go to the next table */ - - goto next_table; - } - - sel_push_prefetched_row(plan); - - if (plan->n_rows_prefetched == SEL_MAX_N_PREFETCH) { - - /* The prefetch buffer is now full */ - - sel_pop_prefetched_row(plan); - - goto next_table; - } - -next_rec: - ut_ad(!search_latch_locked); - - if (mtr_has_extra_clust_latch) { - - /* We must commit &mtr if we are moving to the next - non-clustered index record, because we could break the - latching order if we would access a different clustered - index page right away without releasing the previous. */ - - goto commit_mtr_for_a_while; - } - - if (leaf_contains_updates - && btr_pcur_is_after_last_on_page(&(plan->pcur), &mtr)) { - - /* We must commit &mtr if we are moving to a different page, - because we have done updates to the x-latched leaf page, and - the latch would be released in btr_pcur_move_to_next, without - &mtr getting committed there */ - - ut_ad(node->asc); - - goto commit_mtr_for_a_while; - } - - if (node->asc) { - moved = btr_pcur_move_to_next(&(plan->pcur), &mtr); - } else { - moved = btr_pcur_move_to_prev(&(plan->pcur), &mtr); - } - - if (!moved) { - - goto table_exhausted; - } - - cursor_just_opened = FALSE; - - /* END OF RECORD LOOP - ------------------ */ - goto rec_loop; - -next_table: - /* We found a record which satisfies the conditions: we can move to - the next table or return a row in the result set */ - - ut_ad(btr_pcur_is_on_user_rec(&(plan->pcur), &mtr)); - - if (plan->unique_search && !node->can_get_updated) { - - plan->cursor_at_end = TRUE; - } else { - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = TRUE; - - btr_pcur_store_position(&(plan->pcur), &mtr); - } - - mtr_commit(&mtr); - - leaf_contains_updates = FALSE; - mtr_has_extra_clust_latch = FALSE; - -next_table_no_mtr: - /* If we use 'goto' to this label, it means that the row was popped - from the prefetched rows stack, and &mtr is already committed */ - - if (node->fetch_table + 1 == node->n_tables) { - - sel_eval_select_list(node); - - if (node->is_aggregate) { - - goto table_loop; - } - - sel_assign_into_var_values(node->into_list, node); - - thr->run_node = que_node_get_parent(node); - - if (search_latch_locked) { - rw_lock_s_unlock(&btr_search_latch); - } - - err = DB_SUCCESS; - goto func_exit; - } - - node->fetch_table++; - - /* When we move to the next table, we first reset the plan cursor: - we do not care about resetting it when we backtrack from a table */ - - plan_reset_cursor(sel_node_get_nth_plan(node, node->fetch_table)); - - goto table_loop; - -table_exhausted: - /* The table cursor pcur reached the result set end: backtrack to the - previous table in the join if we do not have cached prefetched rows */ - - plan->cursor_at_end = TRUE; - - mtr_commit(&mtr); - - leaf_contains_updates = FALSE; - mtr_has_extra_clust_latch = FALSE; - - if (plan->n_rows_prefetched > 0) { - /* The table became exhausted during a prefetch */ - - sel_pop_prefetched_row(plan); - - goto next_table_no_mtr; - } - -table_exhausted_no_mtr: - if (node->fetch_table == 0) { - err = DB_SUCCESS; - - if (node->is_aggregate && !node->aggregate_already_fetched) { - - node->aggregate_already_fetched = TRUE; - - sel_assign_into_var_values(node->into_list, node); - - thr->run_node = que_node_get_parent(node); - - if (search_latch_locked) { - rw_lock_s_unlock(&btr_search_latch); - } - - goto func_exit; - } - - node->state = SEL_NODE_NO_MORE_ROWS; - - thr->run_node = que_node_get_parent(node); - - if (search_latch_locked) { - rw_lock_s_unlock(&btr_search_latch); - } - - goto func_exit; - } - - node->fetch_table--; - - goto table_loop; - -stop_for_a_while: - /* Return control for a while to que_run_threads, so that runaway - queries can be canceled. NOTE that when we come here, we must, in a - locking read, have placed the necessary (possibly waiting request) - record lock on the cursor record or its successor: when we reposition - the cursor, this record lock guarantees that nobody can meanwhile have - inserted new records which should have appeared in the result set, - which would result in the phantom problem. */ - - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = FALSE; - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_gen(TRUE)); -#endif /* UNIV_SYNC_DEBUG */ - err = DB_SUCCESS; - goto func_exit; - -commit_mtr_for_a_while: - /* Stores the cursor position and commits &mtr; this is used if - &mtr may contain latches which would break the latching order if - &mtr would not be committed and the latches released. */ - - plan->stored_cursor_rec_processed = TRUE; - - ut_ad(!search_latch_locked); - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - - leaf_contains_updates = FALSE; - mtr_has_extra_clust_latch = FALSE; - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_gen(TRUE)); -#endif /* UNIV_SYNC_DEBUG */ - - goto table_loop; - -lock_wait_or_error: - /* See the note at stop_for_a_while: the same holds for this case */ - - ut_ad(!btr_pcur_is_before_first_on_page(&(plan->pcur), &mtr) - || !node->asc); - ut_ad(!search_latch_locked); - - plan->stored_cursor_rec_processed = FALSE; - btr_pcur_store_position(&(plan->pcur), &mtr); - - mtr_commit(&mtr); - -#ifdef UNIV_SYNC_DEBUG - ut_ad(sync_thread_levels_empty_gen(TRUE)); -#endif /* UNIV_SYNC_DEBUG */ - -func_exit: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/************************************************************************** -Performs a select step. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -row_sel_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - ulint i_lock_mode; - sym_node_t* table_node; - sel_node_t* node; - ulint err; - - ut_ad(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_SELECT); - - /* If this is a new time this node is executed (or when execution - resumes after wait for a table intention lock), set intention locks - on the tables, or assign a read view */ - - if (node->into_list && (thr->prev_node == que_node_get_parent(node))) { - - node->state = SEL_NODE_OPEN; - } - - if (node->state == SEL_NODE_OPEN) { - - /* It may be that the current session has not yet started - its transaction, or it has been committed: */ - - trx_start_if_not_started(thr_get_trx(thr)); - - plan_reset_cursor(sel_node_get_nth_plan(node, 0)); - - if (node->consistent_read) { - /* Assign a read view for the query */ - node->read_view = trx_assign_read_view( - thr_get_trx(thr)); - } else { - if (node->set_x_locks) { - i_lock_mode = LOCK_IX; - } else { - i_lock_mode = LOCK_IS; - } - - table_node = node->table_list; - - while (table_node) { - err = lock_table(0, table_node->table, - i_lock_mode, thr); - if (err != DB_SUCCESS) { - thr_get_trx(thr)->error_state = err; - - return(NULL); - } - - table_node = que_node_get_next(table_node); - } - } - - /* If this is an explicit cursor, copy stored procedure - variable values, so that the values cannot change between - fetches (currently, we copy them also for non-explicit - cursors) */ - - if (node->explicit_cursor - && UT_LIST_GET_FIRST(node->copy_variables)) { - - row_sel_copy_input_variable_vals(node); - } - - node->state = SEL_NODE_FETCH; - node->fetch_table = 0; - - if (node->is_aggregate) { - /* Reset the aggregate total values */ - sel_reset_aggregate_vals(node); - } - } - - err = row_sel(node, thr); - - /* NOTE! if queries are parallelized, the following assignment may - have problems; the assignment should be made only if thr is the - only top-level thr in the graph: */ - - thr->graph->last_sel_node = node; - - if (err != DB_SUCCESS) { - thr_get_trx(thr)->error_state = err; - - return(NULL); - } - - return(thr); -} - -/************************************************************************** -Performs a fetch for a cursor. */ - -que_thr_t* -fetch_step( -/*=======*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - sel_node_t* sel_node; - fetch_node_t* node; - - ut_ad(thr); - - node = thr->run_node; - sel_node = node->cursor_def; - - ut_ad(que_node_get_type(node) == QUE_NODE_FETCH); - - if (thr->prev_node != que_node_get_parent(node)) { - - if (sel_node->state != SEL_NODE_NO_MORE_ROWS) { - - if (node->into_list) { - sel_assign_into_var_values(node->into_list, - sel_node); - } else { - void* ret = (*node->func->func)( - sel_node, node->func->arg); - - if (!ret) { - sel_node->state - = SEL_NODE_NO_MORE_ROWS; - } - } - } - - thr->run_node = que_node_get_parent(node); - - return(thr); - } - - /* Make the fetch node the parent of the cursor definition for - the time of the fetch, so that execution knows to return to this - fetch node after a row has been selected or we know that there is - no row left */ - - sel_node->common.parent = node; - - if (sel_node->state == SEL_NODE_CLOSED) { - fprintf(stderr, - "InnoDB: Error: fetch called on a closed cursor\n"); - - thr_get_trx(thr)->error_state = DB_ERROR; - - return(NULL); - } - - thr->run_node = sel_node; - - return(thr); -} - -/******************************************************************** -Sample callback function for fetch that prints each row.*/ - -void* -row_fetch_print( -/*============*/ - /* out: always returns non-NULL */ - void* row, /* in: sel_node_t* */ - void* user_arg) /* in: not used */ -{ - sel_node_t* node = row; - que_node_t* exp; - ulint i = 0; - - UT_NOT_USED(user_arg); - - fprintf(stderr, "row_fetch_print: row %p\n", row); - - exp = node->select_list; - - while (exp) { - dfield_t* dfield = que_node_get_val(exp); - dtype_t* type = dfield_get_type(dfield); - - fprintf(stderr, " column %lu:\n", (ulong)i); - - dtype_print(type); - fprintf(stderr, "\n"); - - if (dfield_get_len(dfield) != UNIV_SQL_NULL) { - ut_print_buf(stderr, dfield_get_data(dfield), - dfield_get_len(dfield)); - } else { - fprintf(stderr, " <NULL>;"); - } - - fprintf(stderr, "\n"); - - exp = que_node_get_next(exp); - i++; - } - - return((void*)42); -} - -/******************************************************************** -Callback function for fetch that stores an unsigned 4 byte integer to the -location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length -= 4. */ - -void* -row_fetch_store_uint4( -/*==================*/ - /* out: always returns NULL */ - void* row, /* in: sel_node_t* */ - void* user_arg) /* in: data pointer */ -{ - sel_node_t* node = row; - ib_uint32_t* val = user_arg; - ulint tmp; - - dfield_t* dfield = que_node_get_val(node->select_list); - dtype_t* type = dfield_get_type(dfield); - ulint len = dfield_get_len(dfield); - - ut_a(dtype_get_mtype(type) == DATA_INT); - ut_a(dtype_get_prtype(type) & DATA_UNSIGNED); - ut_a(len == 4); - - tmp = mach_read_from_4(dfield_get_data(dfield)); - *val = (ib_uint32_t) tmp; - - return(NULL); -} - -/*************************************************************** -Prints a row in a select result. */ - -que_thr_t* -row_printf_step( -/*============*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - row_printf_node_t* node; - sel_node_t* sel_node; - que_node_t* arg; - - ut_ad(thr); - - node = thr->run_node; - - sel_node = node->sel_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_ROW_PRINTF); - - if (thr->prev_node == que_node_get_parent(node)) { - - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch next row to print */ - - thr->run_node = sel_node; - - return(thr); - } - - if (sel_node->state != SEL_NODE_FETCH) { - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to print */ - - thr->run_node = que_node_get_parent(node); - - return(thr); - } - - arg = sel_node->select_list; - - while (arg) { - dfield_print_also_hex(que_node_get_val(arg)); - - fputs(" ::: ", stderr); - - arg = que_node_get_next(arg); - } - - putc('\n', stderr); - - /* Fetch next row to print */ - - thr->run_node = sel_node; - - return(thr); -} - -/******************************************************************** -Converts a key value stored in MySQL format to an Innobase dtuple. The last -field of the key value may be just a prefix of a fixed length field: hence -the parameter key_len. But currently we do not allow search keys where the -last field is only a prefix of the full key field len and print a warning if -such appears. A counterpart of this function is -ha_innobase::store_key_val_for_row() in ha_innodb.cc. */ - -void -row_sel_convert_mysql_key_to_innobase( -/*==================================*/ - dtuple_t* tuple, /* in: tuple where to build; - NOTE: we assume that the type info - in the tuple is already according - to index! */ - byte* buf, /* in: buffer to use in field - conversions */ - ulint buf_len, /* in: buffer length */ - dict_index_t* index, /* in: index of the key value */ - byte* key_ptr, /* in: MySQL key value */ - ulint key_len, /* in: MySQL key value length */ - trx_t* trx) /* in: transaction */ -{ - byte* original_buf = buf; - byte* original_key_ptr = key_ptr; - dict_field_t* field; - dfield_t* dfield; - ulint data_offset; - ulint data_len; - ulint data_field_len; - ibool is_null; - byte* key_end; - ulint n_fields = 0; - ulint type; - - /* For documentation of the key value storage format in MySQL, see - ha_innobase::store_key_val_for_row() in ha_innodb.cc. */ - - key_end = key_ptr + key_len; - - /* Permit us to access any field in the tuple (ULINT_MAX): */ - - dtuple_set_n_fields(tuple, ULINT_MAX); - - dfield = dtuple_get_nth_field(tuple, 0); - field = dict_index_get_nth_field(index, 0); - - if (dfield_get_type(dfield)->mtype == DATA_SYS) { - /* A special case: we are looking for a position in the - generated clustered index which InnoDB automatically added - to a table with no primary key: the first and the only - ordering column is ROW_ID which InnoDB stored to the key_ptr - buffer. */ - - ut_a(key_len == DATA_ROW_ID_LEN); - - dfield_set_data(dfield, key_ptr, DATA_ROW_ID_LEN); - - dtuple_set_n_fields(tuple, 1); - - return; - } - - while (key_ptr < key_end) { - - ut_a(field->col->mtype == dfield_get_type(dfield)->mtype); - - data_offset = 0; - is_null = FALSE; - - if (!(dfield_get_type(dfield)->prtype & DATA_NOT_NULL)) { - /* The first byte in the field tells if this is - an SQL NULL value */ - - data_offset = 1; - - if (*key_ptr != 0) { - dfield_set_data(dfield, NULL, UNIV_SQL_NULL); - - is_null = TRUE; - } - } - - type = dfield_get_type(dfield)->mtype; - - /* Calculate data length and data field total length */ - - if (type == DATA_BLOB) { - /* The key field is a column prefix of a BLOB or - TEXT */ - - ut_a(field->prefix_len > 0); - - /* MySQL stores the actual data length to the first 2 - bytes after the optional SQL NULL marker byte. The - storage format is little-endian, that is, the most - significant byte at a higher address. In UTF-8, MySQL - seems to reserve field->prefix_len bytes for - storing this field in the key value buffer, even - though the actual value only takes data_len bytes - from the start. */ - - data_len = key_ptr[data_offset] - + 256 * key_ptr[data_offset + 1]; - data_field_len = data_offset + 2 + field->prefix_len; - - data_offset += 2; - - /* Now that we know the length, we store the column - value like it would be a fixed char field */ - - } else if (field->prefix_len > 0) { - /* Looks like MySQL pads unused end bytes in the - prefix with space. Therefore, also in UTF-8, it is ok - to compare with a prefix containing full prefix_len - bytes, and no need to take at most prefix_len / 3 - UTF-8 characters from the start. - If the prefix is used as the upper end of a LIKE - 'abc%' query, then MySQL pads the end with chars - 0xff. TODO: in that case does it any harm to compare - with the full prefix_len bytes. How do characters - 0xff in UTF-8 behave? */ - - data_len = field->prefix_len; - data_field_len = data_offset + data_len; - } else { - data_len = dfield_get_type(dfield)->len; - data_field_len = data_offset + data_len; - } - - if (dtype_get_mysql_type(dfield_get_type(dfield)) - == DATA_MYSQL_TRUE_VARCHAR - && dfield_get_type(dfield)->mtype != DATA_INT) { - /* In a MySQL key value format, a true VARCHAR is - always preceded by 2 bytes of a length field. - dfield_get_type(dfield)->len returns the maximum - 'payload' len in bytes. That does not include the - 2 bytes that tell the actual data length. - - We added the check != DATA_INT to make sure we do - not treat MySQL ENUM or SET as a true VARCHAR! */ - - data_len += 2; - data_field_len += 2; - } - - /* Storing may use at most data_len bytes of buf */ - - if (!is_null) { - row_mysql_store_col_in_innobase_format( - dfield, buf, - FALSE, /* MySQL key value format col */ - key_ptr + data_offset, data_len, - dict_table_is_comp(index->table)); - buf += data_len; - } - - key_ptr += data_field_len; - - if (key_ptr > key_end) { - /* The last field in key was not a complete key field - but a prefix of it. - - Print a warning about this! HA_READ_PREFIX_LAST does - not currently work in InnoDB with partial-field key - value prefixes. Since MySQL currently uses a padding - trick to calculate LIKE 'abc%' type queries there - should never be partial-field prefixes in searches. */ - - ut_print_timestamp(stderr); - - fputs(" InnoDB: Warning: using a partial-field" - " key prefix in search.\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, ". Last data field length %lu bytes,\n" - "InnoDB: key ptr now exceeds" - " key end by %lu bytes.\n" - "InnoDB: Key value in the MySQL format:\n", - (ulong) data_field_len, - (ulong) (key_ptr - key_end)); - fflush(stderr); - ut_print_buf(stderr, original_key_ptr, key_len); - fprintf(stderr, "\n"); - - if (!is_null) { - dfield->len -= (ulint)(key_ptr - key_end); - } - } - - n_fields++; - field++; - dfield++; - } - - ut_a(buf <= original_buf + buf_len); - - /* We set the length of tuple to n_fields: we assume that the memory - area allocated for it is big enough (usually bigger than n_fields). */ - - dtuple_set_n_fields(tuple, n_fields); -} - -/****************************************************************** -Stores the row id to the prebuilt struct. */ -static -void -row_sel_store_row_id_to_prebuilt( -/*=============================*/ - row_prebuilt_t* prebuilt, /* in: prebuilt */ - rec_t* index_rec, /* in: record */ - dict_index_t* index, /* in: index of the record */ - const ulint* offsets) /* in: rec_get_offsets - (index_rec, index) */ -{ - byte* data; - ulint len; - - ut_ad(rec_offs_validate(index_rec, index, offsets)); - - data = rec_get_nth_field( - index_rec, offsets, - dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); - - if (len != DATA_ROW_ID_LEN) { - fprintf(stderr, - "InnoDB: Error: Row id field is" - " wrong length %lu in ", (ulong) len); - dict_index_name_print(stderr, prebuilt->trx, index); - fprintf(stderr, "\n" - "InnoDB: Field number %lu, record:\n", - (ulong) dict_index_get_sys_col_pos(index, - DATA_ROW_ID)); - rec_print_new(stderr, index_rec, offsets); - putc('\n', stderr); - ut_error; - } - - ut_memcpy(prebuilt->row_id, data, len); -} - -/****************************************************************** -Stores a non-SQL-NULL field in the MySQL format. The counterpart of this -function is row_mysql_store_col_in_innobase_format() in row0mysql.c. */ -static -void -row_sel_field_store_in_mysql_format( -/*================================*/ - byte* dest, /* in/out: buffer where to store; NOTE that BLOBs - are not in themselves stored here: the caller must - allocate and copy the BLOB into buffer before, and pass - the pointer to the BLOB in 'data' */ - const mysql_row_templ_t* templ, /* in: MySQL column template. - Its following fields are referenced: - type, is_unsigned, mysql_col_len, mbminlen, mbmaxlen */ - byte* data, /* in: data to store */ - ulint len) /* in: length of the data */ -{ - byte* ptr; - byte* field_end; - byte* pad_ptr; - - ut_ad(len != UNIV_SQL_NULL); - - if (templ->type == DATA_INT) { - /* Convert integer data from Innobase to a little-endian - format, sign bit restored to normal */ - - ptr = dest + len; - - for (;;) { - ptr--; - *ptr = *data; - if (ptr == dest) { - break; - } - data++; - } - - if (!templ->is_unsigned) { - dest[len - 1] = (byte) (dest[len - 1] ^ 128); - } - - ut_ad(templ->mysql_col_len == len); - } else if (templ->type == DATA_VARCHAR - || templ->type == DATA_VARMYSQL - || templ->type == DATA_BINARY) { - - field_end = dest + templ->mysql_col_len; - - if (templ->mysql_type == DATA_MYSQL_TRUE_VARCHAR) { - /* This is a >= 5.0.3 type true VARCHAR. Store the - length of the data to the first byte or the first - two bytes of dest. */ - - dest = row_mysql_store_true_var_len( - dest, len, templ->mysql_length_bytes); - } - - /* Copy the actual data */ - ut_memcpy(dest, data, len); - - /* Pad with trailing spaces. We pad with spaces also the - unused end of a >= 5.0.3 true VARCHAR column, just in case - MySQL expects its contents to be deterministic. */ - - pad_ptr = dest + len; - - ut_ad(templ->mbminlen <= templ->mbmaxlen); - - /* We handle UCS2 charset strings differently. */ - if (templ->mbminlen == 2) { - /* A space char is two bytes, 0x0020 in UCS2 */ - - if (len & 1) { - /* A 0x20 has been stripped from the column. - Pad it back. */ - - if (pad_ptr < field_end) { - *pad_ptr = 0x20; - pad_ptr++; - } - } - - /* Pad the rest of the string with 0x0020 */ - - while (pad_ptr < field_end) { - *pad_ptr = 0x00; - pad_ptr++; - *pad_ptr = 0x20; - pad_ptr++; - } - } else { - ut_ad(templ->mbminlen == 1); - /* space=0x20 */ - - memset(pad_ptr, 0x20, field_end - pad_ptr); - } - } else if (templ->type == DATA_BLOB) { - /* Store a pointer to the BLOB buffer to dest: the BLOB was - already copied to the buffer in row_sel_store_mysql_rec */ - - row_mysql_store_blob_ref(dest, templ->mysql_col_len, data, - len); - } else if (templ->type == DATA_MYSQL) { - memcpy(dest, data, len); - - ut_ad(templ->mysql_col_len >= len); - ut_ad(templ->mbmaxlen >= templ->mbminlen); - - ut_ad(templ->mbmaxlen > templ->mbminlen - || templ->mysql_col_len == len); - /* The following assertion would fail for old tables - containing UTF-8 ENUM columns due to Bug #9526. */ - ut_ad(!templ->mbmaxlen - || !(templ->mysql_col_len % templ->mbmaxlen)); - ut_ad(len * templ->mbmaxlen >= templ->mysql_col_len); - - if (templ->mbminlen != templ->mbmaxlen) { - /* Pad with spaces. This undoes the stripping - done in row0mysql.ic, function - row_mysql_store_col_in_innobase_format(). */ - - memset(dest + len, 0x20, templ->mysql_col_len - len); - } - } else { - ut_ad(templ->type == DATA_CHAR - || templ->type == DATA_FIXBINARY - /*|| templ->type == DATA_SYS_CHILD - || templ->type == DATA_SYS*/ - || templ->type == DATA_FLOAT - || templ->type == DATA_DOUBLE - || templ->type == DATA_DECIMAL); - ut_ad(templ->mysql_col_len == len); - - memcpy(dest, data, len); - } -} - -/****************************************************************** -Convert a row in the Innobase format to a row in the MySQL format. -Note that the template in prebuilt may advise us to copy only a few -columns to mysql_rec, other columns are left blank. All columns may not -be needed in the query. */ -static -ibool -row_sel_store_mysql_rec( -/*====================*/ - /* out: TRUE if success, FALSE if - could not allocate memory for a BLOB - (though we may also assert in that - case) */ - byte* mysql_rec, /* out: row in the MySQL format */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec, /* in: Innobase record in the index - which was described in prebuilt's - template */ - const ulint* offsets) /* in: array returned by - rec_get_offsets() */ -{ - mysql_row_templ_t* templ; - mem_heap_t* extern_field_heap = NULL; - mem_heap_t* heap; - byte* data; - ulint len; - ulint i; - - ut_ad(prebuilt->mysql_template); - ut_ad(prebuilt->default_rec); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - if (UNIV_LIKELY_NULL(prebuilt->blob_heap)) { - mem_heap_free(prebuilt->blob_heap); - prebuilt->blob_heap = NULL; - } - - for (i = 0; i < prebuilt->n_template; i++) { - - templ = prebuilt->mysql_template + i; - - if (UNIV_UNLIKELY(rec_offs_nth_extern(offsets, - templ->rec_field_no))) { - - /* Copy an externally stored field to the temporary - heap */ - - ut_a(!prebuilt->trx->has_search_latch); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB)) { - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - heap = prebuilt->blob_heap; - } else { - extern_field_heap - = mem_heap_create(UNIV_PAGE_SIZE); - - heap = extern_field_heap; - } - - /* NOTE: if we are retrieving a big BLOB, we may - already run out of memory in the next call, which - causes an assert */ - - data = btr_rec_copy_externally_stored_field( - rec, offsets, templ->rec_field_no, - &len, heap); - - ut_a(len != UNIV_SQL_NULL); - } else { - /* Field is stored in the row. */ - - data = rec_get_nth_field(rec, offsets, - templ->rec_field_no, &len); - - if (UNIV_UNLIKELY(templ->type == DATA_BLOB) - && len != UNIV_SQL_NULL) { - - /* It is a BLOB field locally stored in the - InnoDB record: we MUST copy its contents to - prebuilt->blob_heap here because later code - assumes all BLOB values have been copied to a - safe place. */ - - if (prebuilt->blob_heap == NULL) { - prebuilt->blob_heap = mem_heap_create( - UNIV_PAGE_SIZE); - } - - data = memcpy(mem_heap_alloc( - prebuilt->blob_heap, len), - data, len); - } - } - - if (len != UNIV_SQL_NULL) { - row_sel_field_store_in_mysql_format( - mysql_rec + templ->mysql_col_offset, - templ, data, len); - - /* Cleanup */ - if (extern_field_heap) { - mem_heap_free(extern_field_heap); - extern_field_heap = NULL; - } - - if (templ->mysql_null_bit_mask) { - /* It is a nullable column with a non-NULL - value */ - mysql_rec[templ->mysql_null_byte_offset] - &= ~(byte) templ->mysql_null_bit_mask; - } - } else { - /* MySQL assumes that the field for an SQL - NULL value is set to the default value. */ - - mysql_rec[templ->mysql_null_byte_offset] - |= (byte) templ->mysql_null_bit_mask; - memcpy(mysql_rec + templ->mysql_col_offset, - prebuilt->default_rec + templ->mysql_col_offset, - templ->mysql_col_len); - } - } - - return(TRUE); -} - -/************************************************************************* -Builds a previous version of a clustered index record for a consistent read */ -static -ulint -row_sel_build_prev_vers_for_mysql( -/*==============================*/ - /* out: DB_SUCCESS or error code */ - read_view_t* read_view, /* in: read view */ - dict_index_t* clust_index, /* in: clustered index */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec, /* in: record in a clustered index */ - ulint** offsets, /* in/out: offsets returned by - rec_get_offsets(rec, clust_index) */ - mem_heap_t** offset_heap, /* in/out: memory heap from which - the offsets are allocated */ - rec_t** old_vers, /* out: old version, or NULL if the - record does not exist in the view: - i.e., it was freshly inserted - afterwards */ - mtr_t* mtr) /* in: mtr */ -{ - ulint err; - - if (prebuilt->old_vers_heap) { - mem_heap_empty(prebuilt->old_vers_heap); - } else { - prebuilt->old_vers_heap = mem_heap_create(200); - } - - err = row_vers_build_for_consistent_read( - rec, mtr, clust_index, offsets, read_view, offset_heap, - prebuilt->old_vers_heap, old_vers); - return(err); -} - -/************************************************************************* -Retrieves the clustered index record corresponding to a record in a -non-clustered index. Does the necessary locking. Used in the MySQL -interface. */ -static -ulint -row_sel_get_clust_rec_for_mysql( -/*============================*/ - /* out: DB_SUCCESS or error code */ - row_prebuilt_t* prebuilt,/* in: prebuilt struct in the handle */ - dict_index_t* sec_index,/* in: secondary index where rec resides */ - rec_t* rec, /* in: record in a non-clustered index; if - this is a locking read, then rec is not - allowed to be delete-marked, and that would - not make sense either */ - que_thr_t* thr, /* in: query thread */ - rec_t** out_rec,/* out: clustered record or an old version of - it, NULL if the old version did not exist - in the read view, i.e., it was a fresh - inserted version */ - ulint** offsets,/* out: offsets returned by - rec_get_offsets(out_rec, clust_index) */ - mem_heap_t** offset_heap,/* in/out: memory heap from which - the offsets are allocated */ - mtr_t* mtr) /* in: mtr used to get access to the - non-clustered record; the same mtr is used to - access the clustered index */ -{ - dict_index_t* clust_index; - rec_t* clust_rec; - rec_t* old_vers; - ulint err; - trx_t* trx; - - *out_rec = NULL; - trx = thr_get_trx(thr); - - row_build_row_ref_in_tuple(prebuilt->clust_ref, sec_index, rec, trx); - - clust_index = dict_table_get_first_index(sec_index->table); - - btr_pcur_open_with_no_init(clust_index, prebuilt->clust_ref, - PAGE_CUR_LE, BTR_SEARCH_LEAF, - prebuilt->clust_pcur, 0, mtr); - - clust_rec = btr_pcur_get_rec(prebuilt->clust_pcur); - - prebuilt->clust_pcur->trx_if_known = trx; - - /* Note: only if the search ends up on a non-infimum record is the - low_match value the real match to the search tuple */ - - if (!page_rec_is_user_rec(clust_rec) - || btr_pcur_get_low_match(prebuilt->clust_pcur) - < dict_index_get_n_unique(clust_index)) { - - /* In a rare case it is possible that no clust rec is found - for a delete-marked secondary index record: if in row0umod.c - in row_undo_mod_remove_clust_low() we have already removed - the clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case we know that the - clustered index record did not exist in the read view of - trx. */ - - if (!rec_get_deleted_flag(rec, - dict_table_is_comp(sec_index->table)) - || prebuilt->select_lock_type != LOCK_NONE) { - ut_print_timestamp(stderr); - fputs(" InnoDB: error clustered record" - " for sec rec not found\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, sec_index); - fputs("\n" - "InnoDB: sec index record ", stderr); - rec_print(stderr, rec, sec_index); - fputs("\n" - "InnoDB: clust index record ", stderr); - rec_print(stderr, clust_rec, clust_index); - putc('\n', stderr); - trx_print(stderr, trx, 600); - - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - } - - clust_rec = NULL; - - goto func_exit; - } - - *offsets = rec_get_offsets(clust_rec, clust_index, *offsets, - ULINT_UNDEFINED, offset_heap); - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record; we are searching - the clust rec with a unique condition, hence - we set a LOCK_REC_NOT_GAP type lock */ - - err = lock_clust_rec_read_check_and_lock( - 0, clust_rec, clust_index, *offsets, - prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); - if (err != DB_SUCCESS) { - - goto err_exit; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - old_vers = NULL; - - /* If the isolation level allows reading of uncommitted data, - then we never look for an earlier version */ - - if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED - && !lock_clust_rec_cons_read_sees( - clust_rec, clust_index, *offsets, - trx->read_view)) { - - /* The following call returns 'offsets' associated with - 'old_vers' */ - err = row_sel_build_prev_vers_for_mysql( - trx->read_view, clust_index, prebuilt, - clust_rec, offsets, offset_heap, &old_vers, - mtr); - - if (err != DB_SUCCESS) { - - goto err_exit; - } - - clust_rec = old_vers; - } - - /* If we had to go to an earlier version of row or the - secondary index record is delete marked, then it may be that - the secondary index record corresponding to clust_rec - (or old_vers) is not rec; in that case we must ignore - such row because in our snapshot rec would not have existed. - Remember that from rec we cannot see directly which transaction - id corresponds to it: we have to go to the clustered index - record. A query where we want to fetch all rows where - the secondary index value is in some interval would return - a wrong result if we would not drop rows which we come to - visit through secondary index records that would not really - exist in our snapshot. */ - - if (clust_rec && (old_vers || rec_get_deleted_flag( - rec, - dict_table_is_comp( - sec_index->table))) - && !row_sel_sec_rec_is_for_clust_rec( - rec, sec_index, clust_rec, clust_index)) { - clust_rec = NULL; - } else { -#ifdef UNIV_SEARCH_DEBUG - ut_a(clust_rec == NULL - || row_sel_sec_rec_is_for_clust_rec( - rec, sec_index, clust_rec, clust_index)); -#endif - } - } - -func_exit: - *out_rec = clust_rec; - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* We may use the cursor in update or in unlock_row(): - store its position */ - - btr_pcur_store_position(prebuilt->clust_pcur, mtr); - } - - err = DB_SUCCESS; -err_exit: - return(err); -} - -/************************************************************************ -Restores cursor position after it has been stored. We have to take into -account that the record cursor was positioned on may have been deleted. -Then we may have to move the cursor one step up or down. */ -static -ibool -sel_restore_position_for_mysql( -/*===========================*/ - /* out: TRUE if we may need to - process the record the cursor is - now positioned on (i.e. we should - not go to the next record yet) */ - ibool* same_user_rec, /* out: TRUE if we were able to restore - the cursor on a user record with the - same ordering prefix in in the - B-tree index */ - ulint latch_mode, /* in: latch mode wished in - restoration */ - btr_pcur_t* pcur, /* in: cursor whose position - has been stored */ - ibool moves_up, /* in: TRUE if the cursor moves up - in the index */ - mtr_t* mtr) /* in: mtr; CAUTION: may commit - mtr temporarily! */ -{ - ibool success; - ulint relative_position; - - relative_position = pcur->rel_pos; - - success = btr_pcur_restore_position(latch_mode, pcur, mtr); - - *same_user_rec = success; - - if (relative_position == BTR_PCUR_ON) { - if (success) { - return(FALSE); - } - - if (moves_up) { - btr_pcur_move_to_next(pcur, mtr); - } - - return(TRUE); - } - - if (relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) { - - if (moves_up) { - return(TRUE); - } - - if (btr_pcur_is_on_user_rec(pcur, mtr)) { - btr_pcur_move_to_prev(pcur, mtr); - } - - return(TRUE); - } - - ut_ad(relative_position == BTR_PCUR_BEFORE - || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE); - - if (moves_up && btr_pcur_is_on_user_rec(pcur, mtr)) { - btr_pcur_move_to_next(pcur, mtr); - } - - return(TRUE); -} - -/************************************************************************ -Pops a cached row for MySQL from the fetch cache. */ -UNIV_INLINE -void -row_sel_pop_cached_row_for_mysql( -/*=============================*/ - byte* buf, /* in/out: buffer where to copy the - row */ - row_prebuilt_t* prebuilt) /* in: prebuilt struct */ -{ - ulint i; - mysql_row_templ_t* templ; - byte* cached_rec; - ut_ad(prebuilt->n_fetch_cached > 0); - ut_ad(prebuilt->mysql_prefix_len <= prebuilt->mysql_row_len); - - if (UNIV_UNLIKELY(prebuilt->keep_other_fields_on_keyread)) { - /* Copy cache record field by field, don't touch fields that - are not covered by current key */ - cached_rec = prebuilt->fetch_cache[ - prebuilt->fetch_cache_first]; - - for (i = 0; i < prebuilt->n_template; i++) { - templ = prebuilt->mysql_template + i; - ut_memcpy(buf + templ->mysql_col_offset, - cached_rec + templ->mysql_col_offset, - templ->mysql_col_len); - /* Copy NULL bit of the current field from cached_rec - to buf */ - if (templ->mysql_null_bit_mask) { - buf[templ->mysql_null_byte_offset] - ^= (buf[templ->mysql_null_byte_offset] - ^ cached_rec[templ->mysql_null_byte_offset]) - & (byte)templ->mysql_null_bit_mask; - } - } - } - else { - ut_memcpy(buf, - prebuilt->fetch_cache[prebuilt->fetch_cache_first], - prebuilt->mysql_prefix_len); - } - prebuilt->n_fetch_cached--; - prebuilt->fetch_cache_first++; - - if (prebuilt->n_fetch_cached == 0) { - prebuilt->fetch_cache_first = 0; - } -} - -/************************************************************************ -Pushes a row for MySQL to the fetch cache. */ -UNIV_INLINE -void -row_sel_push_cache_row_for_mysql( -/*=============================*/ - row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec, /* in: record to push */ - const ulint* offsets) /* in: rec_get_offsets() */ -{ - byte* buf; - ulint i; - - ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); - ut_ad(rec_offs_validate(rec, NULL, offsets)); - ut_a(!prebuilt->templ_contains_blob); - - if (prebuilt->fetch_cache[0] == NULL) { - /* Allocate memory for the fetch cache */ - - for (i = 0; i < MYSQL_FETCH_CACHE_SIZE; i++) { - - /* A user has reported memory corruption in these - buffers in Linux. Put magic numbers there to help - to track a possible bug. */ - - buf = mem_alloc(prebuilt->mysql_row_len + 8); - - prebuilt->fetch_cache[i] = buf + 4; - - mach_write_to_4(buf, ROW_PREBUILT_FETCH_MAGIC_N); - mach_write_to_4(buf + 4 + prebuilt->mysql_row_len, - ROW_PREBUILT_FETCH_MAGIC_N); - } - } - - ut_ad(prebuilt->fetch_cache_first == 0); - - if (UNIV_UNLIKELY(!row_sel_store_mysql_rec( - prebuilt->fetch_cache[ - prebuilt->n_fetch_cached], - prebuilt, rec, offsets))) { - ut_error; - } - - prebuilt->n_fetch_cached++; -} - -/************************************************************************* -Tries to do a shortcut to fetch a clustered index record with a unique key, -using the hash index if possible (not always). We assume that the search -mode is PAGE_CUR_GE, it is a consistent read, there is a read view in trx, -btr search latch has been locked in S-mode. */ -static -ulint -row_sel_try_search_shortcut_for_mysql( -/*==================================*/ - /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ - rec_t** out_rec,/* out: record if found */ - row_prebuilt_t* prebuilt,/* in: prebuilt struct */ - ulint** offsets,/* in/out: for rec_get_offsets(*out_rec) */ - mem_heap_t** heap, /* in/out: heap for rec_get_offsets() */ - mtr_t* mtr) /* in: started mtr */ -{ - dict_index_t* index = prebuilt->index; - dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; - trx_t* trx = prebuilt->trx; - rec_t* rec; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(!prebuilt->templ_contains_blob); - - btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE, - BTR_SEARCH_LEAF, pcur, -#ifndef UNIV_SEARCH_DEBUG - RW_S_LATCH, -#else - 0, -#endif - mtr); - rec = btr_pcur_get_rec(pcur); - - if (!page_rec_is_user_rec(rec)) { - - return(SEL_RETRY); - } - - /* As the cursor is now placed on a user record after a search with - the mode PAGE_CUR_GE, the up_match field in the cursor tells how many - fields in the user record matched to the search tuple */ - - if (btr_pcur_get_up_match(pcur) < dtuple_get_n_fields(search_tuple)) { - - return(SEL_EXHAUSTED); - } - - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - *offsets = rec_get_offsets(rec, index, *offsets, - ULINT_UNDEFINED, heap); - - if (!lock_clust_rec_cons_read_sees(rec, index, - *offsets, trx->read_view)) { - - return(SEL_RETRY); - } - - if (rec_get_deleted_flag(rec, dict_table_is_comp(index->table))) { - - return(SEL_EXHAUSTED); - } - - *out_rec = rec; - - return(SEL_FOUND); -} - -/************************************************************************ -Searches for rows in the database. This is used in the interface to -MySQL. This function opens a cursor, and also implements fetch next -and fetch prev. NOTE that if we do a search with a full key value -from a unique index (ROW_SEL_EXACT), then we will not store the cursor -position and fetch next or fetch prev must not be tried to the cursor! */ - -ulint -row_search_for_mysql( -/*=================*/ - /* out: DB_SUCCESS, - DB_RECORD_NOT_FOUND, - DB_END_OF_INDEX, DB_DEADLOCK, - DB_LOCK_TABLE_FULL, DB_CORRUPTION, - or DB_TOO_BIG_RECORD */ - byte* buf, /* in/out: buffer for the fetched - row in the MySQL format */ - ulint mode, /* in: search mode PAGE_CUR_L, ... */ - row_prebuilt_t* prebuilt, /* in: prebuilt struct for the - table handle; this contains the info - of search_tuple, index; if search - tuple contains 0 fields then we - position the cursor at the start or - the end of the index, depending on - 'mode' */ - ulint match_mode, /* in: 0 or ROW_SEL_EXACT or - ROW_SEL_EXACT_PREFIX */ - ulint direction) /* in: 0 or ROW_SEL_NEXT or - ROW_SEL_PREV; NOTE: if this is != 0, - then prebuilt must have a pcur - with stored position! In opening of a - cursor 'direction' should be 0. */ -{ - dict_index_t* index = prebuilt->index; - ibool comp = dict_table_is_comp(index->table); - dtuple_t* search_tuple = prebuilt->search_tuple; - btr_pcur_t* pcur = prebuilt->pcur; - trx_t* trx = prebuilt->trx; - dict_index_t* clust_index; - que_thr_t* thr; - rec_t* rec; - rec_t* result_rec; - rec_t* clust_rec; - ulint err = DB_SUCCESS; - ibool unique_search = FALSE; - ibool unique_search_from_clust_index = FALSE; - ibool mtr_has_extra_clust_latch = FALSE; - ibool moves_up = FALSE; - ibool set_also_gap_locks = TRUE; - /* if the query is a plain locking SELECT, and the isolation level - is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ - ibool did_semi_consistent_read = FALSE; - /* if the returned record was locked and we did a semi-consistent - read (fetch the newest committed version), then this is set to - TRUE */ -#ifdef UNIV_SEARCH_DEBUG - ulint cnt = 0; -#endif /* UNIV_SEARCH_DEBUG */ - ulint next_offs; - ibool same_user_rec; - mtr_t mtr; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(index && pcur && search_tuple); - ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); - - if (UNIV_UNLIKELY(prebuilt->table->ibd_file_missing)) { - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: Error:\n" - "InnoDB: MySQL is trying to use a table handle" - " but the .ibd file for\n" - "InnoDB: table %s does not exist.\n" - "InnoDB: Have you deleted the .ibd file" - " from the database directory under\n" - "InnoDB: the MySQL datadir, or have you used" - " DISCARD TABLESPACE?\n" - "InnoDB: Look from\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "innodb-troubleshooting.html\n" - "InnoDB: how you can resolve the problem.\n", - prebuilt->table->name); - - return(DB_ERROR); - } - - if (UNIV_UNLIKELY(prebuilt->magic_n != ROW_PREBUILT_ALLOCATED)) { - fprintf(stderr, - "InnoDB: Error: trying to free a corrupt\n" - "InnoDB: table handle. Magic n %lu, table name ", - (ulong) prebuilt->magic_n); - ut_print_name(stderr, trx, TRUE, prebuilt->table->name); - putc('\n', stderr); - - mem_analyze_corruption(prebuilt); - - ut_error; - } - -#if 0 - /* August 19, 2005 by Heikki: temporarily disable this error - print until the cursor lock count is done correctly. - See bugs #12263 and #12456!*/ - - if (trx->n_mysql_tables_in_use == 0 - && UNIV_UNLIKELY(prebuilt->select_lock_type == LOCK_NONE)) { - /* Note that if MySQL uses an InnoDB temp table that it - created inside LOCK TABLES, then n_mysql_tables_in_use can - be zero; in that case select_lock_type is set to LOCK_X in - ::start_stmt. */ - - fputs("InnoDB: Error: MySQL is trying to perform a SELECT\n" - "InnoDB: but it has not locked" - " any tables in ::external_lock()!\n", - stderr); - trx_print(stderr, trx, 600); - fputc('\n', stderr); - } -#endif - -#if 0 - fprintf(stderr, "Match mode %lu\n search tuple ", - (ulong) match_mode); - dtuple_print(search_tuple); - fprintf(stderr, "N tables locked %lu\n", - (ulong) trx->mysql_n_tables_locked); -#endif - /*-------------------------------------------------------------*/ - /* PHASE 0: Release a possible s-latch we are holding on the - adaptive hash index latch if there is someone waiting behind */ - - if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED) - && trx->has_search_latch) { - - /* There is an x-latch request on the adaptive hash index: - release the s-latch to reduce starvation and wait for - BTR_SEA_TIMEOUT rounds before trying to keep it again over - calls from MySQL */ - - rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - - trx->search_latch_timeout = BTR_SEA_TIMEOUT; - } - - /* Reset the new record lock info if srv_locks_unsafe_for_binlog - is set or session is using a READ COMMITED isolation level. Then - we are able to remove the record locks set here on an individual - row. */ - prebuilt->new_rec_locks = 0; - - /*-------------------------------------------------------------*/ - /* PHASE 1: Try to pop the row from the prefetch cache */ - - if (UNIV_UNLIKELY(direction == 0)) { - trx->op_info = "starting index read"; - - prebuilt->n_rows_fetched = 0; - prebuilt->n_fetch_cached = 0; - prebuilt->fetch_cache_first = 0; - - if (prebuilt->sel_graph == NULL) { - /* Build a dummy select query graph */ - row_prebuild_sel_graph(prebuilt); - } - } else { - trx->op_info = "fetching rows"; - - if (prebuilt->n_rows_fetched == 0) { - prebuilt->fetch_direction = direction; - } - - if (UNIV_UNLIKELY(direction != prebuilt->fetch_direction)) { - if (UNIV_UNLIKELY(prebuilt->n_fetch_cached > 0)) { - ut_error; - /* TODO: scrollable cursor: restore cursor to - the place of the latest returned row, - or better: prevent caching for a scroll - cursor! */ - } - - prebuilt->n_rows_fetched = 0; - prebuilt->n_fetch_cached = 0; - prebuilt->fetch_cache_first = 0; - - } else if (UNIV_LIKELY(prebuilt->n_fetch_cached > 0)) { - row_sel_pop_cached_row_for_mysql(buf, prebuilt); - - prebuilt->n_rows_fetched++; - - srv_n_rows_read++; - err = DB_SUCCESS; - goto func_exit; - } - - if (prebuilt->fetch_cache_first > 0 - && prebuilt->fetch_cache_first < MYSQL_FETCH_CACHE_SIZE) { - - /* The previous returned row was popped from the fetch - cache, but the cache was not full at the time of the - popping: no more rows can exist in the result set */ - - err = DB_RECORD_NOT_FOUND; - goto func_exit; - } - - prebuilt->n_rows_fetched++; - - if (prebuilt->n_rows_fetched > 1000000000) { - /* Prevent wrap-over */ - prebuilt->n_rows_fetched = 500000000; - } - - mode = pcur->search_mode; - } - - /* In a search where at most one record in the index may match, we - can use a LOCK_REC_NOT_GAP type record lock when locking a - non-delete-marked matching record. - - Note that in a unique secondary index there may be different - delete-marked versions of a record where only the primary key - values differ: thus in a secondary index we must use next-key - locks when locking delete-marked records. */ - - if (match_mode == ROW_SEL_EXACT - && index->type & DICT_UNIQUE - && dtuple_get_n_fields(search_tuple) - == dict_index_get_n_unique(index) - && (index->type & DICT_CLUSTERED - || !dtuple_contains_null(search_tuple))) { - - /* Note above that a UNIQUE secondary index can contain many - rows with the same key value if one of the columns is the SQL - null. A clustered index under MySQL can never contain null - columns because we demand that all the columns in primary key - are non-null. */ - - unique_search = TRUE; - - /* Even if the condition is unique, MySQL seems to try to - retrieve also a second row if a primary key contains more than - 1 column. Return immediately if this is not a HANDLER - command. */ - - if (UNIV_UNLIKELY(direction != 0 - && !prebuilt->used_in_HANDLER)) { - - err = DB_RECORD_NOT_FOUND; - goto func_exit; - } - } - - mtr_start(&mtr); - - /*-------------------------------------------------------------*/ - /* PHASE 2: Try fast adaptive hash index search if possible */ - - /* Next test if this is the special case where we can use the fast - adaptive hash index to try the search. Since we must release the - search system latch when we retrieve an externally stored field, we - cannot use the adaptive hash index in a search in the case the row - may be long and there may be externally stored fields */ - - if (UNIV_UNLIKELY(direction == 0) - && unique_search - && index->type & DICT_CLUSTERED - && !prebuilt->templ_contains_blob - && !prebuilt->used_in_HANDLER - && (prebuilt->mysql_row_len < UNIV_PAGE_SIZE / 8)) { - - mode = PAGE_CUR_GE; - - unique_search_from_clust_index = TRUE; - - if (trx->mysql_n_tables_locked == 0 - && prebuilt->select_lock_type == LOCK_NONE - && trx->isolation_level > TRX_ISO_READ_UNCOMMITTED - && trx->read_view) { - - /* This is a SELECT query done as a consistent read, - and the read view has already been allocated: - let us try a search shortcut through the hash - index. - NOTE that we must also test that - mysql_n_tables_locked == 0, because this might - also be INSERT INTO ... SELECT ... or - CREATE TABLE ... SELECT ... . Our algorithm is - NOT prepared to inserts interleaved with the SELECT, - and if we try that, we can deadlock on the adaptive - hash index semaphore! */ - -#ifndef UNIV_SEARCH_DEBUG - if (!trx->has_search_latch) { - rw_lock_s_lock(&btr_search_latch); - trx->has_search_latch = TRUE; - } -#endif - switch (row_sel_try_search_shortcut_for_mysql( - &rec, prebuilt, &offsets, &heap, - &mtr)) { - case SEL_FOUND: -#ifdef UNIV_SEARCH_DEBUG - ut_a(0 == cmp_dtuple_rec(search_tuple, - rec, offsets)); -#endif - if (!row_sel_store_mysql_rec(buf, prebuilt, - rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - /* We let the main loop to do the - error handling */ - goto shortcut_fails_too_big_rec; - } - - mtr_commit(&mtr); - - /* ut_print_name(stderr, index->name); - fputs(" shortcut\n", stderr); */ - - srv_n_rows_read++; - - if (trx->search_latch_timeout > 0 - && trx->has_search_latch) { - - trx->search_latch_timeout--; - - rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - } - - /* NOTE that we do NOT store the cursor - position */ - err = DB_SUCCESS; - goto func_exit; - - case SEL_EXHAUSTED: - mtr_commit(&mtr); - - /* ut_print_name(stderr, index->name); - fputs(" record not found 2\n", stderr); */ - - if (trx->search_latch_timeout > 0 - && trx->has_search_latch) { - - trx->search_latch_timeout--; - - rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - } - - /* NOTE that we do NOT store the cursor - position */ - - err = DB_RECORD_NOT_FOUND; - goto func_exit; - } -shortcut_fails_too_big_rec: - mtr_commit(&mtr); - mtr_start(&mtr); - } - } - - /*-------------------------------------------------------------*/ - /* PHASE 3: Open or restore index cursor position */ - - if (trx->has_search_latch) { - rw_lock_s_unlock(&btr_search_latch); - trx->has_search_latch = FALSE; - } - - trx_start_if_not_started(trx); - - if (trx->isolation_level <= TRX_ISO_READ_COMMITTED - && prebuilt->select_lock_type != LOCK_NONE - && trx->mysql_thd != NULL - && thd_is_select(trx->mysql_thd)) { - /* It is a plain locking SELECT and the isolation - level is low: do not lock gaps */ - - set_also_gap_locks = FALSE; - } - - /* Note that if the search mode was GE or G, then the cursor - naturally moves upward (in fetch next) in alphabetical order, - otherwise downward */ - - if (UNIV_UNLIKELY(direction == 0)) { - if (mode == PAGE_CUR_GE || mode == PAGE_CUR_G) { - moves_up = TRUE; - } - } else if (direction == ROW_SEL_NEXT) { - moves_up = TRUE; - } - - thr = que_fork_get_first_thr(prebuilt->sel_graph); - - que_thr_move_to_run_state_for_mysql(thr, trx); - - clust_index = dict_table_get_first_index(index->table); - - if (UNIV_LIKELY(direction != 0)) { - ibool need_to_process = sel_restore_position_for_mysql( - &same_user_rec, BTR_SEARCH_LEAF, - pcur, moves_up, &mtr); - - if (UNIV_UNLIKELY(need_to_process)) { - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - /* We did a semi-consistent read, - but the record was removed in - the meantime. */ - prebuilt->row_read_type - = ROW_READ_TRY_SEMI_CONSISTENT; - } - } else if (UNIV_LIKELY(prebuilt->row_read_type - != ROW_READ_DID_SEMI_CONSISTENT)) { - - /* The cursor was positioned on the record - that we returned previously. If we need - to repeat a semi-consistent read as a - pessimistic locking read, the record - cannot be skipped. */ - - goto next_rec; - } - - } else if (dtuple_get_n_fields(search_tuple) > 0) { - - btr_pcur_open_with_no_init(index, search_tuple, mode, - BTR_SEARCH_LEAF, - pcur, 0, &mtr); - - pcur->trx_if_known = trx; - - rec = btr_pcur_get_rec(pcur); - - if (!moves_up - && !page_rec_is_supremum(rec) - && set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the next index record - to prevent phantoms in ORDER BY ... DESC queries */ - - offsets = rec_get_offsets(page_rec_get_next(rec), - index, offsets, - ULINT_UNDEFINED, &heap); - err = sel_set_rec_lock(page_rec_get_next(rec), - index, offsets, - prebuilt->select_lock_type, - LOCK_GAP, thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - } - } else { - if (mode == PAGE_CUR_G) { - btr_pcur_open_at_index_side( - TRUE, index, BTR_SEARCH_LEAF, pcur, FALSE, - &mtr); - } else if (mode == PAGE_CUR_L) { - btr_pcur_open_at_index_side( - FALSE, index, BTR_SEARCH_LEAF, pcur, FALSE, - &mtr); - } - } - - if (!prebuilt->sql_stat_start) { - /* No need to set an intention lock or assign a read view */ - - if (trx->read_view == NULL - && prebuilt->select_lock_type == LOCK_NONE) { - - fputs("InnoDB: Error: MySQL is trying to" - " perform a consistent read\n" - "InnoDB: but the read view is not assigned!\n", - stderr); - trx_print(stderr, trx, 600); - fputc('\n', stderr); - ut_a(0); - } - } else if (prebuilt->select_lock_type == LOCK_NONE) { - /* This is a consistent read */ - /* Assign a read view for the query */ - - trx_assign_read_view(trx); - prebuilt->sql_stat_start = FALSE; - } else { - ulint lock_mode; - if (prebuilt->select_lock_type == LOCK_S) { - lock_mode = LOCK_IS; - } else { - lock_mode = LOCK_IX; - } - err = lock_table(0, index->table, lock_mode, thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - prebuilt->sql_stat_start = FALSE; - } - -rec_loop: - /*-------------------------------------------------------------*/ - /* PHASE 4: Look for matching records in a loop */ - - rec = btr_pcur_get_rec(pcur); - ut_ad(!!page_rec_is_comp(rec) == comp); -#ifdef UNIV_SEARCH_DEBUG - /* - fputs("Using ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt, - buf_frame_get_page_no(buf_frame_align(rec))); - rec_print(rec); - */ -#endif /* UNIV_SEARCH_DEBUG */ - - if (page_rec_is_infimum(rec)) { - - /* The infimum record on a page cannot be in the result set, - and neither can a record lock be placed on it: we skip such - a record. */ - - goto next_rec; - } - - if (page_rec_is_supremum(rec)) { - - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a lock on the index record */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITTED isolation - level we do not lock gaps. Supremum record is really - a gap and therefore we do not set locks there. */ - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - err = sel_set_rec_lock(rec, index, offsets, - prebuilt->select_lock_type, - LOCK_ORDINARY, thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - } - /* A page supremum record cannot be in the result set: skip - it now that we have placed a possible lock on it */ - - goto next_rec; - } - - /*-------------------------------------------------------------*/ - /* Do sanity checks in case our cursor has bumped into page - corruption */ - - if (comp) { - next_offs = rec_get_next_offs(rec, TRUE); - if (UNIV_UNLIKELY(next_offs < PAGE_NEW_SUPREMUM)) { - - goto wrong_offs; - } - } else { - next_offs = rec_get_next_offs(rec, FALSE); - if (UNIV_UNLIKELY(next_offs < PAGE_OLD_SUPREMUM)) { - - goto wrong_offs; - } - } - - if (UNIV_UNLIKELY(next_offs >= UNIV_PAGE_SIZE - PAGE_DIR)) { - -wrong_offs: - if (srv_force_recovery == 0 || moves_up == FALSE) { - ut_print_timestamp(stderr); - buf_page_print(buf_frame_align(rec)); - fprintf(stderr, - "\nInnoDB: rec address %p, first" - " buffer frame %p\n" - "InnoDB: buffer pool high end %p," - " buf block fix count %lu\n", - (void*) rec, (void*) buf_pool->frame_zero, - (void*) buf_pool->high_end, - (ulong)buf_block_align(rec)->buf_fix_count); - fprintf(stderr, - "InnoDB: Index corruption: rec offs %lu" - " next offs %lu, page no %lu,\n" - "InnoDB: ", - (ulong) page_offset(rec), - (ulong) next_offs, - (ulong) buf_frame_get_page_no(rec)); - dict_index_name_print(stderr, trx, index); - fputs(". Run CHECK TABLE. You may need to\n" - "InnoDB: restore from a backup, or" - " dump + drop + reimport the table.\n", - stderr); - - err = DB_CORRUPTION; - - goto lock_wait_or_error; - } else { - /* The user may be dumping a corrupt table. Jump - over the corruption to recover as much as possible. */ - - fprintf(stderr, - "InnoDB: Index corruption: rec offs %lu" - " next offs %lu, page no %lu,\n" - "InnoDB: ", - (ulong) page_offset(rec), - (ulong) next_offs, - (ulong) buf_frame_get_page_no(rec)); - dict_index_name_print(stderr, trx, index); - fputs(". We try to skip the rest of the page.\n", - stderr); - - btr_pcur_move_to_last_on_page(pcur, &mtr); - - goto next_rec; - } - } - /*-------------------------------------------------------------*/ - - /* Calculate the 'offsets' associated with 'rec' */ - - offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); - - if (UNIV_UNLIKELY(srv_force_recovery > 0)) { - if (!rec_validate(rec, offsets) - || !btr_index_rec_validate(rec, index, FALSE)) { - fprintf(stderr, - "InnoDB: Index corruption: rec offs %lu" - " next offs %lu, page no %lu,\n" - "InnoDB: ", - (ulong) page_offset(rec), - (ulong) next_offs, - (ulong) buf_frame_get_page_no(rec)); - dict_index_name_print(stderr, trx, index); - fputs(". We try to skip the record.\n", - stderr); - - goto next_rec; - } - } - - /* Note that we cannot trust the up_match value in the cursor at this - place because we can arrive here after moving the cursor! Thus - we have to recompare rec and search_tuple to determine if they - match enough. */ - - if (match_mode == ROW_SEL_EXACT) { - /* Test if the index record matches completely to search_tuple - in prebuilt: if not, then we return with DB_RECORD_NOT_FOUND */ - - /* fputs("Comparing rec and search tuple\n", stderr); */ - - if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) { - - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED isolation level. */ - - err = sel_set_rec_lock( - rec, index, offsets, - prebuilt->select_lock_type, LOCK_GAP, - thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - } - - btr_pcur_store_position(pcur, &mtr); - - err = DB_RECORD_NOT_FOUND; - /* ut_print_name(stderr, index->name); - fputs(" record not found 3\n", stderr); */ - - goto normal_return; - } - - } else if (match_mode == ROW_SEL_EXACT_PREFIX) { - - if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) { - - if (set_also_gap_locks - && !(srv_locks_unsafe_for_binlog - || trx->isolation_level - == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* Try to place a gap lock on the index - record only if innodb_locks_unsafe_for_binlog - option is not set or this session is not - using a READ COMMITTED isolation level. */ - - err = sel_set_rec_lock( - rec, index, offsets, - prebuilt->select_lock_type, LOCK_GAP, - thr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - } - - btr_pcur_store_position(pcur, &mtr); - - err = DB_RECORD_NOT_FOUND; - /* ut_print_name(stderr, index->name); - fputs(" record not found 4\n", stderr); */ - - goto normal_return; - } - } - - /* We are ready to look at a possible new index entry in the result - set: the cursor is now placed on a user record */ - - if (prebuilt->select_lock_type != LOCK_NONE) { - /* Try to place a lock on the index record; note that delete - marked records are a special case in a unique search. If there - is a non-delete marked record, then it is enough to lock its - existence with LOCK_REC_NOT_GAP. */ - - /* If innodb_locks_unsafe_for_binlog option is used - or this session is using a READ COMMITED isolation - level we lock only the record, i.e., next-key locking is - not used. */ - - ulint lock_type; - - if (!set_also_gap_locks - || srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED - || (unique_search - && !UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp)))) { - - goto no_gap_lock; - } else { - lock_type = LOCK_ORDINARY; - } - - /* If we are doing a 'greater or equal than a primary key - value' search from a clustered index, and we find a record - that has that exact primary key value, then there is no need - to lock the gap before the record, because no insert in the - gap can be in our search range. That is, no phantom row can - appear that way. - - An example: if col1 is the primary key, the search is WHERE - col1 >= 100, and we find a record where col1 = 100, then no - need to lock the gap before that record. */ - - if (index == clust_index - && mode == PAGE_CUR_GE - && direction == 0 - && dtuple_get_n_fields_cmp(search_tuple) - == dict_index_get_n_unique(index) - && 0 == cmp_dtuple_rec(search_tuple, rec, offsets)) { -no_gap_lock: - lock_type = LOCK_REC_NOT_GAP; - } - - err = sel_set_rec_lock(rec, index, offsets, - prebuilt->select_lock_type, - lock_type, thr); - - switch (err) { - rec_t* old_vers; - case DB_SUCCESS: - if (srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) { - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; - } - break; - case DB_LOCK_WAIT: - if (UNIV_LIKELY(prebuilt->row_read_type - != ROW_READ_TRY_SEMI_CONSISTENT) - || index != clust_index) { - - goto lock_wait_or_error; - } - - /* The following call returns 'offsets' - associated with 'old_vers' */ - err = row_sel_build_committed_vers_for_mysql( - clust_index, prebuilt, rec, - &offsets, &heap, &old_vers, &mtr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - mutex_enter(&kernel_mutex); - if (trx->was_chosen_as_deadlock_victim) { - mutex_exit(&kernel_mutex); - err = DB_DEADLOCK; - - goto lock_wait_or_error; - } - if (UNIV_LIKELY(trx->wait_lock != NULL)) { - lock_cancel_waiting_and_release( - trx->wait_lock); - prebuilt->new_rec_locks = 0; - } else { - mutex_exit(&kernel_mutex); - - /* The lock was granted while we were - searching for the last committed version. - Do a normal locking read. */ - - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, - &heap); - err = DB_SUCCESS; - /* Note that a record of - prebuilt->index was locked. */ - prebuilt->new_rec_locks = 1; - break; - } - mutex_exit(&kernel_mutex); - - if (old_vers == NULL) { - /* The row was not yet committed */ - - goto next_rec; - } - - did_semi_consistent_read = TRUE; - rec = old_vers; - break; - default: - - goto lock_wait_or_error; - } - } else { - /* This is a non-locking consistent read: if necessary, fetch - a previous version of the record */ - - if (trx->isolation_level == TRX_ISO_READ_UNCOMMITTED) { - - /* Do nothing: we let a non-locking SELECT read the - latest version of the record */ - - } else if (index == clust_index) { - - /* Fetch a previous version of the row if the current - one is not visible in the snapshot; if we have a very - high force recovery level set, we try to avoid crashes - by skipping this lookup */ - - if (UNIV_LIKELY(srv_force_recovery < 5) - && !lock_clust_rec_cons_read_sees( - rec, index, offsets, trx->read_view)) { - - rec_t* old_vers; - /* The following call returns 'offsets' - associated with 'old_vers' */ - err = row_sel_build_prev_vers_for_mysql( - trx->read_view, clust_index, - prebuilt, rec, &offsets, &heap, - &old_vers, &mtr); - - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - if (old_vers == NULL) { - /* The row did not exist yet in - the read view */ - - goto next_rec; - } - - rec = old_vers; - } - } else if (!lock_sec_rec_cons_read_sees(rec, index, - trx->read_view)) { - /* We are looking into a non-clustered index, - and to get the right version of the record we - have to look also into the clustered index: this - is necessary, because we can only get the undo - information via the clustered index record. */ - - ut_ad(index != clust_index); - - goto requires_clust_rec; - } - } - - /* NOTE that at this point rec can be an old version of a clustered - index record built for a consistent read. We cannot assume after this - point that rec is on a buffer pool page. Functions like - page_rec_is_comp() cannot be used! */ - - if (UNIV_UNLIKELY(rec_get_deleted_flag(rec, comp))) { - - /* The record is delete-marked: we can skip it */ - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE - && !did_semi_consistent_read) { - - /* No need to keep a lock on a delete-marked record - if we do not want to use next-key locking. */ - - row_unlock_for_mysql(prebuilt, TRUE); - } - - /* This is an optimization to skip setting the next key lock - on the record that follows this delete-marked record. This - optimization works because of the unique search criteria - which precludes the presence of a range lock between this - delete marked record and the record following it. - - For now this is applicable only to clustered indexes while - doing a unique search. There is scope for further optimization - applicable to unique secondary indexes. Current behaviour is - to widen the scope of a lock on an already delete marked record - if the same record is deleted twice by the same transaction */ - if (index == clust_index && unique_search) { - err = DB_RECORD_NOT_FOUND; - - goto normal_return; - } - - goto next_rec; - } - - /* Get the clustered index record if needed, if we did not do the - search using the clustered index. */ - - if (index != clust_index && prebuilt->need_to_access_clustered) { - -requires_clust_rec: - /* We use a 'goto' to the preceding label if a consistent - read of a secondary index record requires us to look up old - versions of the associated clustered index record. */ - - ut_ad(rec_offs_validate(rec, index, offsets)); - - /* It was a non-clustered index and we must fetch also the - clustered index record */ - - mtr_has_extra_clust_latch = TRUE; - - /* The following call returns 'offsets' associated with - 'clust_rec'. Note that 'clust_rec' can be an old version - built for a consistent read. */ - - err = row_sel_get_clust_rec_for_mysql(prebuilt, index, rec, - thr, &clust_rec, - &offsets, &heap, &mtr); - if (err != DB_SUCCESS) { - - goto lock_wait_or_error; - } - - if (clust_rec == NULL) { - /* The record did not exist in the read view */ - ut_ad(prebuilt->select_lock_type == LOCK_NONE); - - goto next_rec; - } - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - /* Note that both the secondary index record - and the clustered index record were locked. */ - ut_ad(prebuilt->new_rec_locks == 1); - prebuilt->new_rec_locks = 2; - } - - if (UNIV_UNLIKELY(rec_get_deleted_flag(clust_rec, comp))) { - - /* The record is delete marked: we can skip it */ - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && prebuilt->select_lock_type != LOCK_NONE) { - - /* No need to keep a lock on a delete-marked - record if we do not want to use next-key - locking. */ - - row_unlock_for_mysql(prebuilt, TRUE); - } - - goto next_rec; - } - - if (prebuilt->need_to_access_clustered) { - - result_rec = clust_rec; - - ut_ad(rec_offs_validate(result_rec, clust_index, - offsets)); - } else { - /* We used 'offsets' for the clust rec, recalculate - them for 'rec' */ - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - result_rec = rec; - } - } else { - result_rec = rec; - } - - /* We found a qualifying record 'result_rec'. At this point, - 'offsets' are associated with 'result_rec'. */ - - ut_ad(rec_offs_validate(result_rec, - result_rec != rec ? clust_index : index, - offsets)); - - if ((match_mode == ROW_SEL_EXACT - || prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD) - && prebuilt->select_lock_type == LOCK_NONE - && !prebuilt->templ_contains_blob - && !prebuilt->clust_index_was_generated - && !prebuilt->used_in_HANDLER - && prebuilt->template_type - != ROW_MYSQL_DUMMY_TEMPLATE) { - - /* Inside an update, for example, we do not cache rows, - since we may use the cursor position to do the actual - update, that is why we require ...lock_type == LOCK_NONE. - Since we keep space in prebuilt only for the BLOBs of - a single row, we cannot cache rows in the case there - are BLOBs in the fields to be fetched. In HANDLER we do - not cache rows because there the cursor is a scrollable - cursor. */ - - row_sel_push_cache_row_for_mysql(prebuilt, result_rec, - offsets); - if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { - - goto got_row; - } - - goto next_rec; - } else { - if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) { - memcpy(buf + 4, result_rec - - rec_offs_extra_size(offsets), - rec_offs_size(offsets)); - mach_write_to_4(buf, - rec_offs_extra_size(offsets) + 4); - } else { - if (!row_sel_store_mysql_rec(buf, prebuilt, - result_rec, offsets)) { - err = DB_TOO_BIG_RECORD; - - goto lock_wait_or_error; - } - } - - if (prebuilt->clust_index_was_generated) { - if (result_rec != rec) { - offsets = rec_get_offsets( - rec, index, offsets, ULINT_UNDEFINED, - &heap); - } - row_sel_store_row_id_to_prebuilt(prebuilt, rec, - index, offsets); - } - } - - /* From this point on, 'offsets' are invalid. */ - -got_row: - /* We have an optimization to save CPU time: if this is a consistent - read on a unique condition on the clustered index, then we do not - store the pcur position, because any fetch next or prev will anyway - return 'end of file'. Exceptions are locking reads and the MySQL - HANDLER command where the user can move the cursor with PREV or NEXT - even after a unique search. */ - - if (!unique_search_from_clust_index - || prebuilt->select_lock_type != LOCK_NONE - || prebuilt->used_in_HANDLER) { - - /* Inside an update always store the cursor position */ - - btr_pcur_store_position(pcur, &mtr); - } - - err = DB_SUCCESS; - - goto normal_return; - -next_rec: - /* Reset the old and new "did semi-consistent read" flags. */ - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - did_semi_consistent_read = FALSE; - prebuilt->new_rec_locks = 0; - - /*-------------------------------------------------------------*/ - /* PHASE 5: Move the cursor to the next index record */ - - if (UNIV_UNLIKELY(mtr_has_extra_clust_latch)) { - /* We must commit mtr if we are moving to the next - non-clustered index record, because we could break the - latching order if we would access a different clustered - index page right away without releasing the previous. */ - - btr_pcur_store_position(pcur, &mtr); - - mtr_commit(&mtr); - mtr_has_extra_clust_latch = FALSE; - - mtr_start(&mtr); - if (sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, - pcur, moves_up, &mtr)) { -#ifdef UNIV_SEARCH_DEBUG - cnt++; -#endif /* UNIV_SEARCH_DEBUG */ - - goto rec_loop; - } - } - - if (moves_up) { - if (UNIV_UNLIKELY(!btr_pcur_move_to_next(pcur, &mtr))) { -not_moved: - btr_pcur_store_position(pcur, &mtr); - - if (match_mode != 0) { - err = DB_RECORD_NOT_FOUND; - } else { - err = DB_END_OF_INDEX; - } - - goto normal_return; - } - } else { - if (UNIV_UNLIKELY(!btr_pcur_move_to_prev(pcur, &mtr))) { - goto not_moved; - } - } - -#ifdef UNIV_SEARCH_DEBUG - cnt++; -#endif /* UNIV_SEARCH_DEBUG */ - - goto rec_loop; - -lock_wait_or_error: - /* Reset the old and new "did semi-consistent read" flags. */ - if (UNIV_UNLIKELY(prebuilt->row_read_type - == ROW_READ_DID_SEMI_CONSISTENT)) { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - did_semi_consistent_read = FALSE; - - /*-------------------------------------------------------------*/ - - btr_pcur_store_position(pcur, &mtr); - - mtr_commit(&mtr); - mtr_has_extra_clust_latch = FALSE; - - trx->error_state = err; - - /* The following is a patch for MySQL */ - - que_thr_stop_for_mysql(thr); - - thr->lock_state = QUE_THR_LOCK_ROW; - - if (row_mysql_handle_errors(&err, trx, thr, NULL)) { - /* It was a lock wait, and it ended */ - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - mtr_start(&mtr); - - sel_restore_position_for_mysql(&same_user_rec, - BTR_SEARCH_LEAF, pcur, - moves_up, &mtr); - - if ((srv_locks_unsafe_for_binlog - || trx->isolation_level == TRX_ISO_READ_COMMITTED) - && !same_user_rec) { - - /* Since we were not able to restore the cursor - on the same user record, we cannot use - row_unlock_for_mysql() to unlock any records, and - we must thus reset the new rec lock info. Since - in lock0lock.c we have blocked the inheriting of gap - X-locks, we actually do not have any new record locks - set in this case. - - Note that if we were able to restore on the 'same' - user record, it is still possible that we were actually - waiting on a delete-marked record, and meanwhile - it was removed by purge and inserted again by some - other user. But that is no problem, because in - rec_loop we will again try to set a lock, and - new_rec_lock_info in trx will be right at the end. */ - - prebuilt->new_rec_locks = 0; - } - - mode = pcur->search_mode; - - goto rec_loop; - } - - thr->lock_state = QUE_THR_LOCK_NOLOCK; - -#ifdef UNIV_SEARCH_DEBUG - /* fputs("Using ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ -#endif /* UNIV_SEARCH_DEBUG */ - goto func_exit; - -normal_return: - /*-------------------------------------------------------------*/ - que_thr_stop_for_mysql_no_error(thr, trx); - - mtr_commit(&mtr); - - if (prebuilt->n_fetch_cached > 0) { - row_sel_pop_cached_row_for_mysql(buf, prebuilt); - - err = DB_SUCCESS; - } - -#ifdef UNIV_SEARCH_DEBUG - /* fputs("Using ", stderr); - dict_index_name_print(stderr, index); - fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ -#endif /* UNIV_SEARCH_DEBUG */ - if (err == DB_SUCCESS) { - srv_n_rows_read++; - } - -func_exit: - trx->op_info = ""; - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* Set or reset the "did semi-consistent read" flag on return. - The flag did_semi_consistent_read is set if and only if - the record being returned was fetched with a semi-consistent read. */ - ut_ad(prebuilt->row_read_type != ROW_READ_WITH_LOCKS - || !did_semi_consistent_read); - - if (UNIV_UNLIKELY(prebuilt->row_read_type != ROW_READ_WITH_LOCKS)) { - if (UNIV_UNLIKELY(did_semi_consistent_read)) { - prebuilt->row_read_type = ROW_READ_DID_SEMI_CONSISTENT; - } else { - prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT; - } - } - return(err); -} - -/*********************************************************************** -Checks if MySQL at the moment is allowed for this table to retrieve a -consistent read result, or store it to the query cache. */ - -ibool -row_search_check_if_query_cache_permitted( -/*======================================*/ - /* out: TRUE if storing or retrieving - from the query cache is permitted */ - trx_t* trx, /* in: transaction object */ - const char* norm_name) /* in: concatenation of database name, - '/' char, table name */ -{ - dict_table_t* table; - ibool ret = FALSE; - - table = dict_table_get(norm_name, FALSE); - - if (table == NULL) { - - return(FALSE); - } - - mutex_enter(&kernel_mutex); - - /* Start the transaction if it is not started yet */ - - trx_start_if_not_started_low(trx); - - /* If there are locks on the table or some trx has invalidated the - cache up to our trx id, then ret = FALSE. - We do not check what type locks there are on the table, though only - IX type locks actually would require ret = FALSE. */ - - if (UT_LIST_GET_LEN(table->locks) == 0 - && ut_dulint_cmp(trx->id, - table->query_cache_inv_trx_id) >= 0) { - - ret = TRUE; - - /* If the isolation level is high, assign a read view for the - transaction if it does not yet have one */ - - if (trx->isolation_level >= TRX_ISO_REPEATABLE_READ - && !trx->read_view) { - - trx->read_view = read_view_open_now( - trx->id, trx->global_read_view_heap); - trx->global_read_view = trx->read_view; - } - } - - mutex_exit(&kernel_mutex); - - return(ret); -} - -/*********************************************************************** -Read the AUTOINC column from the current row. If the value is less than -0 and the type is not unsigned then we reset the value to 0. */ -static -ib_ulonglong -row_search_autoinc_read_column( -/*===========================*/ - /* out: value read from the column */ - dict_index_t* index, /* in: index to read from */ - const rec_t* rec, /* in: current rec */ - ulint col_no, /* in: column number */ - ibool unsigned_type) /* in: signed or unsigned flag */ -{ - ulint len; - const byte* data; - ib_ulonglong value; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - - *offsets_ = sizeof offsets_ / sizeof *offsets_; - - /* TODO: We have to cast away the const of rec for now. This needs - to be fixed later.*/ - offsets = rec_get_offsets( - (rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap); - - /* TODO: We have to cast away the const of rec for now. This needs - to be fixed later.*/ - data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len); - - ut_a(len != UNIV_SQL_NULL); - ut_a(len <= sizeof value); - - value = mach_read_int_type(data, len, unsigned_type); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - /* We assume that the autoinc counter can't be negative. */ - if (!unsigned_type && (ib_longlong) value < 0) { - value = 0; - } - - return(value); -} - -/*********************************************************************** -Get the last row. */ -static -const rec_t* -row_search_autoinc_get_rec( -/*=======================*/ - /* out: current rec or NULL */ - btr_pcur_t* pcur, /* in: the current cursor */ - mtr_t* mtr) /* in: mini transaction */ -{ - do { - const rec_t* rec = btr_pcur_get_rec(pcur); - - if (page_rec_is_user_rec(rec)) { - return(rec); - } - } while (btr_pcur_move_to_prev(pcur, mtr)); - - return(NULL); -} - -/*********************************************************************** -Read the max AUTOINC value from an index. */ - -ulint -row_search_max_autoinc( -/*===================*/ - /* out: DB_SUCCESS if all OK else - error code, DB_RECORD_NOT_FOUND if - column name can't be found in index */ - dict_index_t* index, /* in: index to search */ - const char* col_name, /* in: name of autoinc column */ - ib_ulonglong* value) /* out: AUTOINC value read */ -{ - ulint i; - ulint n_cols; - dict_field_t* dfield = NULL; - ulint error = DB_SUCCESS; - - n_cols = dict_index_get_n_ordering_defined_by_user(index); - - /* Search the index for the AUTOINC column name */ - for (i = 0; i < n_cols; ++i) { - dfield = dict_index_get_nth_field(index, i); - - if (strcmp(col_name, dfield->name) == 0) { - break; - } - } - - *value = 0; - - /* Must find the AUTOINC column name */ - if (i < n_cols && dfield) { - mtr_t mtr; - btr_pcur_t pcur; - - mtr_start(&mtr); - - /* Open at the high/right end (FALSE), and INIT - cursor (TRUE) */ - btr_pcur_open_at_index_side( - FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); - - if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) { - const rec_t* rec; - - rec = row_search_autoinc_get_rec(&pcur, &mtr); - - if (rec != NULL) { - ibool unsigned_type = ( - dfield->col->prtype & DATA_UNSIGNED); - - *value = row_search_autoinc_read_column( - index, rec, i, unsigned_type); - } - } - - btr_pcur_close(&pcur); - - mtr_commit(&mtr); - } else { - error = DB_RECORD_NOT_FOUND; - } - - return(error); -} diff --git a/storage/innobase/row/row0uins.c b/storage/innobase/row/row0uins.c deleted file mode 100644 index ce9ab792204..00000000000 --- a/storage/innobase/row/row0uins.c +++ /dev/null @@ -1,308 +0,0 @@ -/****************************************************** -Fresh insert undo - -(c) 1996 Innobase Oy - -Created 2/25/1997 Heikki Tuuri -*******************************************************/ - -#include "row0uins.h" - -#ifdef UNIV_NONINL -#include "row0uins.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "dict0crea.h" -#include "trx0undo.h" -#include "trx0roll.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "row0undo.h" -#include "row0vers.h" -#include "trx0trx.h" -#include "trx0rec.h" -#include "row0row.h" -#include "row0upd.h" -#include "que0que.h" -#include "ibuf0ibuf.h" -#include "log0log.h" - -/******************************************************************* -Removes a clustered index record. The pcur in node was positioned on the -record, now it is detached. */ -static -ulint -row_undo_ins_remove_clust_rec( -/*==========================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node) /* in: undo node */ -{ - btr_cur_t* btr_cur; - ibool success; - ulint err; - ulint n_tries = 0; - mtr_t mtr; - - mtr_start(&mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, &(node->pcur), - &mtr); - ut_a(success); - - if (ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { - - /* Drop the index tree associated with the row in - SYS_INDEXES table: */ - - dict_drop_index_tree(btr_pcur_get_rec(&(node->pcur)), &mtr); - - mtr_commit(&mtr); - - mtr_start(&mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, - &(node->pcur), &mtr); - ut_a(success); - } - - btr_cur = btr_pcur_get_btr_cur(&(node->pcur)); - - success = btr_cur_optimistic_delete(btr_cur, &mtr); - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - if (success) { - trx_undo_rec_release(node->trx, node->undo_no); - - return(DB_SUCCESS); - } -retry: - /* If did not succeed, try pessimistic descent to tree */ - mtr_start(&mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_TREE, - &(node->pcur), &mtr); - ut_a(success); - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (err == DB_OUT_OF_FILE_SPACE - && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - trx_undo_rec_release(node->trx, node->undo_no); - - return(err); -} - -/******************************************************************* -Removes a secondary index entry if found. */ -static -ulint -row_undo_ins_remove_sec_low( -/*========================*/ - /* out: DB_SUCCESS, DB_FAIL, or - DB_OUT_OF_FILE_SPACE */ - ulint mode, /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE, - depending on whether we wish optimistic or - pessimistic descent down the index tree */ - dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry to remove */ -{ - btr_pcur_t pcur; - btr_cur_t* btr_cur; - ibool found; - ibool success; - ulint err; - mtr_t mtr; - - log_free_check(); - mtr_start(&mtr); - - found = row_search_index_entry(index, entry, mode, &pcur, &mtr); - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - if (!found) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(DB_SUCCESS); - } - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, &mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, TRUE, &mtr); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(err); -} - -/******************************************************************* -Removes a secondary index entry from the index if found. Tries first -optimistic, then pessimistic descent down the tree. */ -static -ulint -row_undo_ins_remove_sec( -/*====================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry to insert */ -{ - ulint err; - ulint n_tries = 0; - - /* Try first optimistic descent to the B-tree */ - - err = row_undo_ins_remove_sec_low(BTR_MODIFY_LEAF, index, entry); - - if (err == DB_SUCCESS) { - - return(err); - } - - /* Try then pessimistic descent to the B-tree */ -retry: - err = row_undo_ins_remove_sec_low(BTR_MODIFY_TREE, index, entry); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - - if (err != DB_SUCCESS && n_tries < BTR_CUR_RETRY_DELETE_N_TIMES) { - - n_tries++; - - os_thread_sleep(BTR_CUR_RETRY_SLEEP_TIME); - - goto retry; - } - - return(err); -} - -/*************************************************************** -Parses the row reference and other info in a fresh insert undo record. */ -static -void -row_undo_ins_parse_undo_rec( -/*========================*/ - undo_node_t* node) /* in: row undo node */ -{ - dict_index_t* clust_index; - byte* ptr; - dulint undo_no; - dulint table_id; - ulint type; - ulint dummy; - ibool dummy_extern; - - ut_ad(node); - - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &dummy, - &dummy_extern, &undo_no, &table_id); - ut_ad(type == TRX_UNDO_INSERT_REC); - node->rec_type = type; - - node->table = dict_table_get_on_id(table_id, node->trx); - - if (node->table == NULL) { - - return; - } - - if (node->table->ibd_file_missing) { - /* We skip undo operations to missing .ibd files */ - node->table = NULL; - - return; - } - - clust_index = dict_table_get_first_index(node->table); - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); -} - -/*************************************************************** -Undoes a fresh insert of a row to a table. A fresh insert means that -the same clustered index unique key did not have any record, even delete -marked, at the time of the insert. */ - -ulint -row_undo_ins( -/*=========*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node) /* in: row undo node */ -{ - dtuple_t* entry; - ibool found; - ulint err; - - ut_ad(node); - ut_ad(node->state == UNDO_NODE_INSERT); - - row_undo_ins_parse_undo_rec(node); - - if (node->table == NULL) { - found = FALSE; - } else { - found = row_undo_search_clust_to_pcur(node); - } - - if (!found) { - trx_undo_rec_release(node->trx, node->undo_no); - - return(DB_SUCCESS); - } - - node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); - - while (node->index != NULL) { - entry = row_build_index_entry(node->row, node->index, - node->heap); - err = row_undo_ins_remove_sec(node->index, entry); - - if (err != DB_SUCCESS) { - - return(err); - } - - node->index = dict_table_get_next_index(node->index); - } - - err = row_undo_ins_remove_clust_rec(node); - - return(err); -} diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c deleted file mode 100644 index 68139da116e..00000000000 --- a/storage/innobase/row/row0umod.c +++ /dev/null @@ -1,762 +0,0 @@ -/****************************************************** -Undo modify of a row - -(c) 1997 Innobase Oy - -Created 2/27/1997 Heikki Tuuri -*******************************************************/ - -#include "row0umod.h" - -#ifdef UNIV_NONINL -#include "row0umod.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "trx0undo.h" -#include "trx0roll.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "row0undo.h" -#include "row0vers.h" -#include "trx0trx.h" -#include "trx0rec.h" -#include "row0row.h" -#include "row0upd.h" -#include "que0que.h" -#include "log0log.h" - -/* Considerations on undoing a modify operation. -(1) Undoing a delete marking: all index records should be found. Some of -them may have delete mark already FALSE, if the delete mark operation was -stopped underway, or if the undo operation ended prematurely because of a -system crash. -(2) Undoing an update of a delete unmarked record: the newer version of -an updated secondary index entry should be removed if no prior version -of the clustered index record requires its existence. Otherwise, it should -be delete marked. -(3) Undoing an update of a delete marked record. In this kind of update a -delete marked clustered index record was delete unmarked and possibly also -some of its fields were changed. Now, it is possible that the delete marked -version has become obsolete at the time the undo is started. */ - -/*************************************************************** -Checks if also the previous version of the clustered index record was -modified or inserted by the same transaction, and its undo number is such -that it should be undone in the same rollback. */ -UNIV_INLINE -ibool -row_undo_mod_undo_also_prev_vers( -/*=============================*/ - /* out: TRUE if also previous modify or - insert of this row should be undone */ - undo_node_t* node, /* in: row undo node */ - dulint* undo_no)/* out: the undo number */ -{ - trx_undo_rec_t* undo_rec; - trx_t* trx; - - trx = node->trx; - - if (0 != ut_dulint_cmp(node->new_trx_id, trx->id)) { - - *undo_no = ut_dulint_zero; - return(FALSE); - } - - undo_rec = trx_undo_get_undo_rec_low(node->new_roll_ptr, node->heap); - - *undo_no = trx_undo_rec_get_undo_no(undo_rec); - - return(ut_dulint_cmp(trx->roll_limit, *undo_no) <= 0); -} - -/*************************************************************** -Undoes a modify in a clustered index record. */ -static -ulint -row_undo_mod_clust_low( -/*===================*/ - /* out: DB_SUCCESS, DB_FAIL, or error code: - we may run out of file space */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr, /* in: mtr */ - ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - big_rec_t* dummy_big_rec; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ulint err; - ibool success; - - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - - success = btr_pcur_restore_position(mode, pcur, mtr); - - ut_ad(success); - - if (mode == BTR_MODIFY_LEAF) { - - err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, node->update, - node->cmpl_info, thr, mtr); - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG - | BTR_NO_UNDO_LOG_FLAG - | BTR_KEEP_SYS_FLAG, - btr_cur, &dummy_big_rec, node->update, - node->cmpl_info, thr, mtr); - } - - return(err); -} - -/*************************************************************** -Removes a clustered index record after undo if possible. */ -static -ulint -row_undo_mod_remove_clust_low( -/*==========================*/ - /* out: DB_SUCCESS, DB_FAIL, or error code: - we may run out of file space */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr __attribute__((unused)), /* in: query thread */ - mtr_t* mtr, /* in: mtr */ - ulint mode) /* in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */ -{ - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ulint err; - ibool success; - - pcur = &(node->pcur); - btr_cur = btr_pcur_get_btr_cur(pcur); - - success = btr_pcur_restore_position(mode, pcur, mtr); - - if (!success) { - - return(DB_SUCCESS); - } - - /* Find out if we can remove the whole clustered index record */ - - if (node->rec_type == TRX_UNDO_UPD_DEL_REC - && !row_vers_must_preserve_del_marked(node->new_trx_id, mtr)) { - - /* Ok, we can remove */ - } else { - return(DB_SUCCESS); - } - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, mtr); - - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - /* Note that since this operation is analogous to purge, - we can free also inherited externally stored fields: - hence the last FALSE in the call below */ - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, FALSE, mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - } - - return(err); -} - -/*************************************************************** -Undoes a modify in a clustered index record. Sets also the node state for the -next round of undo. */ -static -ulint -row_undo_mod_clust( -/*===============*/ - /* out: DB_SUCCESS or error code: we may run - out of file space */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - btr_pcur_t* pcur; - mtr_t mtr; - ulint err; - ibool success; - ibool more_vers; - dulint new_undo_no; - - ut_ad(node && thr); - - /* Check if also the previous version of the clustered index record - should be undone in this same rollback operation */ - - more_vers = row_undo_mod_undo_also_prev_vers(node, &new_undo_no); - - pcur = &(node->pcur); - - mtr_start(&mtr); - - /* Try optimistic processing of the record, keeping changes within - the index page */ - - err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_LEAF); - - if (err != DB_SUCCESS) { - btr_pcur_commit_specify_mtr(pcur, &mtr); - - /* We may have to modify tree structure: do a pessimistic - descent down the index tree */ - - mtr_start(&mtr); - - err = row_undo_mod_clust_low(node, thr, &mtr, BTR_MODIFY_TREE); - } - - btr_pcur_commit_specify_mtr(pcur, &mtr); - - if (err == DB_SUCCESS && node->rec_type == TRX_UNDO_UPD_DEL_REC) { - - mtr_start(&mtr); - - err = row_undo_mod_remove_clust_low(node, thr, &mtr, - BTR_MODIFY_LEAF); - if (err != DB_SUCCESS) { - btr_pcur_commit_specify_mtr(pcur, &mtr); - - /* We may have to modify tree structure: do a - pessimistic descent down the index tree */ - - mtr_start(&mtr); - - err = row_undo_mod_remove_clust_low(node, thr, &mtr, - BTR_MODIFY_TREE); - } - - btr_pcur_commit_specify_mtr(pcur, &mtr); - } - - node->state = UNDO_NODE_FETCH_NEXT; - - trx_undo_rec_release(node->trx, node->undo_no); - - if (more_vers && err == DB_SUCCESS) { - - /* Reserve the undo log record to the prior version after - committing &mtr: this is necessary to comply with the latching - order, as &mtr may contain the fsp latch which is lower in - the latch hierarchy than trx->undo_mutex. */ - - success = trx_undo_rec_reserve(node->trx, new_undo_no); - - if (success) { - node->state = UNDO_NODE_PREV_VERS; - } - } - - return(err); -} - -/*************************************************************** -Delete marks or removes a secondary index entry if found. */ -static -ulint -row_undo_mod_del_mark_or_remove_sec_low( -/*====================================*/ - /* out: DB_SUCCESS, DB_FAIL, or - DB_OUT_OF_FILE_SPACE */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr, /* in: query thread */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: index entry */ - ulint mode) /* in: latch mode BTR_MODIFY_LEAF or - BTR_MODIFY_TREE */ -{ - ibool found; - btr_pcur_t pcur; - btr_cur_t* btr_cur; - ibool success; - ibool old_has; - ulint err; - mtr_t mtr; - mtr_t mtr_vers; - - log_free_check(); - mtr_start(&mtr); - - found = row_search_index_entry(index, entry, mode, &pcur, &mtr); - - btr_cur = btr_pcur_get_btr_cur(&pcur); - - if (!found) { - /* Not found */ - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(DB_SUCCESS); - } - - /* We should remove the index record if no prior version of the row, - which cannot be purged yet, requires its existence. If some requires, - we should delete mark the record. */ - - mtr_start(&mtr_vers); - - success = btr_pcur_restore_position(BTR_SEARCH_LEAF, &(node->pcur), - &mtr_vers); - ut_a(success); - - old_has = row_vers_old_has_index_entry(FALSE, - btr_pcur_get_rec(&(node->pcur)), - &mtr_vers, index, entry); - if (old_has) { - err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, &mtr); - ut_ad(err == DB_SUCCESS); - } else { - /* Remove the index record */ - - if (mode == BTR_MODIFY_LEAF) { - success = btr_cur_optimistic_delete(btr_cur, &mtr); - if (success) { - err = DB_SUCCESS; - } else { - err = DB_FAIL; - } - } else { - ut_ad(mode == BTR_MODIFY_TREE); - - btr_cur_pessimistic_delete(&err, FALSE, btr_cur, - TRUE, &mtr); - - /* The delete operation may fail if we have little - file space left: TODO: easiest to crash the database - and restart with more file space */ - } - } - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr_vers); - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(err); -} - -/*************************************************************** -Delete marks or removes a secondary index entry if found. -NOTE that if we updated the fields of a delete-marked secondary index record -so that alphabetically they stayed the same, e.g., 'abc' -> 'aBc', we cannot -return to the original values because we do not know them. But this should -not cause problems because in row0sel.c, in queries we always retrieve the -clustered index record or an earlier version of it, if the secondary index -record through which we do the search is delete-marked. */ -static -ulint -row_undo_mod_del_mark_or_remove_sec( -/*================================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr, /* in: query thread */ - dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry */ -{ - ulint err; - - err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, - entry, BTR_MODIFY_LEAF); - if (err == DB_SUCCESS) { - - return(err); - } - - err = row_undo_mod_del_mark_or_remove_sec_low(node, thr, index, - entry, BTR_MODIFY_TREE); - return(err); -} - -/*************************************************************** -Delete unmarks a secondary index entry which must be found. It might not be -delete-marked at the moment, but it does not harm to unmark it anyway. We also -need to update the fields of the secondary index record if we updated its -fields but alphabetically they stayed the same, e.g., 'abc' -> 'aBc'. */ -static -ulint -row_undo_mod_del_unmark_sec_and_undo_update( -/*========================================*/ - /* out: DB_FAIL or DB_SUCCESS or - DB_OUT_OF_FILE_SPACE */ - ulint mode, /* in: search mode: BTR_MODIFY_LEAF or - BTR_MODIFY_TREE */ - que_thr_t* thr, /* in: query thread */ - dict_index_t* index, /* in: index */ - dtuple_t* entry) /* in: index entry */ -{ - mem_heap_t* heap; - btr_pcur_t pcur; - upd_t* update; - ulint err = DB_SUCCESS; - ibool found; - big_rec_t* dummy_big_rec; - mtr_t mtr; - trx_t* trx = thr_get_trx(thr); - - log_free_check(); - mtr_start(&mtr); - - found = row_search_index_entry(index, entry, mode, &pcur, &mtr); - - if (!found) { - fputs("InnoDB: error in sec index entry del undo in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, btr_pcur_get_rec(&pcur), index); - putc('\n', stderr); - trx_print(stderr, trx, 0); - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - } else { - btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); - - err = btr_cur_del_mark_set_sec_rec(BTR_NO_LOCKING_FLAG, - btr_cur, FALSE, thr, &mtr); - ut_a(err == DB_SUCCESS); - heap = mem_heap_create(100); - - update = row_upd_build_sec_rec_difference_binary( - index, entry, btr_cur_get_rec(btr_cur), trx, heap); - if (upd_get_n_fields(update) == 0) { - - /* Do nothing */ - - } else if (mode == BTR_MODIFY_LEAF) { - /* Try an optimistic updating of the record, keeping - changes within the page */ - - err = btr_cur_optimistic_update( - BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, - btr_cur, update, 0, thr, &mtr); - if (err == DB_OVERFLOW || err == DB_UNDERFLOW) { - err = DB_FAIL; - } - } else { - ut_a(mode == BTR_MODIFY_TREE); - err = btr_cur_pessimistic_update( - BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, - btr_cur, &dummy_big_rec, - update, 0, thr, &mtr); - } - - mem_heap_free(heap); - } - - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - return(err); -} - -/*************************************************************** -Undoes a modify in secondary indexes when undo record type is UPD_DEL. */ -static -ulint -row_undo_mod_upd_del_sec( -/*=====================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - ulint err; - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - index = node->index; - - entry = row_build_index_entry(node->row, index, heap); - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, index, - entry); - if (err != DB_SUCCESS) { - - mem_heap_free(heap); - - return(err); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - return(DB_SUCCESS); -} - -/*************************************************************** -Undoes a modify in secondary indexes when undo record type is DEL_MARK. */ -static -ulint -row_undo_mod_del_mark_sec( -/*======================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - ulint err; - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - index = node->index; - - entry = row_build_index_entry(node->row, index, heap); - - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_LEAF, thr, index, entry); - if (err == DB_FAIL) { - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_TREE, thr, index, entry); - } - - if (err != DB_SUCCESS) { - - mem_heap_free(heap); - - return(err); - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - return(DB_SUCCESS); -} - -/*************************************************************** -Undoes a modify in secondary indexes when undo record type is UPD_EXIST. */ -static -ulint -row_undo_mod_upd_exist_sec( -/*=======================*/ - /* out: DB_SUCCESS or DB_OUT_OF_FILE_SPACE */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - mem_heap_t* heap; - dtuple_t* entry; - dict_index_t* index; - ulint err; - - if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { - /* No change in secondary indexes */ - - return(DB_SUCCESS); - } - - heap = mem_heap_create(1024); - - while (node->index != NULL) { - index = node->index; - - if (row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { - - /* Build the newest version of the index entry */ - entry = row_build_index_entry(node->row, index, heap); - - /* NOTE that if we updated the fields of a - delete-marked secondary index record so that - alphabetically they stayed the same, e.g., - 'abc' -> 'aBc', we cannot return to the original - values because we do not know them. But this should - not cause problems because in row0sel.c, in queries - we always retrieve the clustered index record or an - earlier version of it, if the secondary index record - through which we do the search is delete-marked. */ - - err = row_undo_mod_del_mark_or_remove_sec(node, thr, - index, - entry); - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); - } - - /* We may have to update the delete mark in the - secondary index record of the previous version of - the row. We also need to update the fields of - the secondary index record if we updated its fields - but alphabetically they stayed the same, e.g., - 'abc' -> 'aBc'. */ - - row_upd_index_replace_new_col_vals(entry, index, - node->update, NULL); - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_LEAF, thr, index, entry); - if (err == DB_FAIL) { - err = row_undo_mod_del_unmark_sec_and_undo_update( - BTR_MODIFY_TREE, thr, index, entry); - } - - if (err != DB_SUCCESS) { - mem_heap_free(heap); - - return(err); - } - } - - node->index = dict_table_get_next_index(node->index); - } - - mem_heap_free(heap); - - return(DB_SUCCESS); -} - -/*************************************************************** -Parses the row reference and other info in a modify undo log record. */ -static -void -row_undo_mod_parse_undo_rec( -/*========================*/ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - dict_index_t* clust_index; - byte* ptr; - dulint undo_no; - dulint table_id; - dulint trx_id; - dulint roll_ptr; - ulint info_bits; - ulint type; - ulint cmpl_info; - ibool dummy_extern; - trx_t* trx; - - ut_ad(node && thr); - trx = thr_get_trx(thr); - ptr = trx_undo_rec_get_pars(node->undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); - node->rec_type = type; - - node->table = dict_table_get_on_id(table_id, trx); - - /* TODO: other fixes associated with DROP TABLE + rollback in the - same table by another user */ - - if (node->table == NULL) { - /* Table was dropped */ - return; - } - - if (node->table->ibd_file_missing) { - /* We skip undo operations to missing .ibd files */ - node->table = NULL; - - return; - } - - clust_index = dict_table_get_first_index(node->table); - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - - ptr = trx_undo_rec_get_row_ref(ptr, clust_index, &(node->ref), - node->heap); - - trx_undo_update_rec_get_update(ptr, clust_index, type, trx_id, - roll_ptr, info_bits, trx, - node->heap, &(node->update)); - node->new_roll_ptr = roll_ptr; - node->new_trx_id = trx_id; - node->cmpl_info = cmpl_info; -} - -/*************************************************************** -Undoes a modify operation on a row of a table. */ - -ulint -row_undo_mod( -/*=========*/ - /* out: DB_SUCCESS or error code */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - ibool found; - ulint err; - - ut_ad(node && thr); - ut_ad(node->state == UNDO_NODE_MODIFY); - - row_undo_mod_parse_undo_rec(node, thr); - - if (node->table == NULL) { - found = FALSE; - } else { - found = row_undo_search_clust_to_pcur(node); - } - - if (!found) { - /* It is already undone, or will be undone by another query - thread, or table was dropped */ - - trx_undo_rec_release(node->trx, node->undo_no); - node->state = UNDO_NODE_FETCH_NEXT; - - return(DB_SUCCESS); - } - - node->index = dict_table_get_next_index( - dict_table_get_first_index(node->table)); - - if (node->rec_type == TRX_UNDO_UPD_EXIST_REC) { - - err = row_undo_mod_upd_exist_sec(node, thr); - - } else if (node->rec_type == TRX_UNDO_DEL_MARK_REC) { - - err = row_undo_mod_del_mark_sec(node, thr); - } else { - ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC); - err = row_undo_mod_upd_del_sec(node, thr); - } - - if (err != DB_SUCCESS) { - - return(err); - } - - err = row_undo_mod_clust(node, thr); - - return(err); -} diff --git a/storage/innobase/row/row0undo.c b/storage/innobase/row/row0undo.c deleted file mode 100644 index f03f84ed1b0..00000000000 --- a/storage/innobase/row/row0undo.c +++ /dev/null @@ -1,352 +0,0 @@ -/****************************************************** -Row undo - -(c) 1997 Innobase Oy - -Created 1/8/1997 Heikki Tuuri -*******************************************************/ - -#include "row0undo.h" - -#ifdef UNIV_NONINL -#include "row0undo.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0uins.h" -#include "row0umod.h" -#include "row0mysql.h" -#include "srv0srv.h" - -/* How to undo row operations? -(1) For an insert, we have stored a prefix of the clustered index record -in the undo log. Using it, we look for the clustered record, and using -that we look for the records in the secondary indexes. The insert operation -may have been left incomplete, if the database crashed, for example. -We may have look at the trx id and roll ptr to make sure the record in the -clustered index is really the one for which the undo log record was -written. We can use the framework we get from the original insert op. -(2) Delete marking: We can use the framework we get from the original -delete mark op. We only have to check the trx id. -(3) Update: This may be the most complicated. We have to use the framework -we get from the original update op. - -What if the same trx repeatedly deletes and inserts an identical row. -Then the row id changes and also roll ptr. What if the row id was not -part of the ordering fields in the clustered index? Maybe we have to write -it to undo log. Well, maybe not, because if we order the row id and trx id -in descending order, then the only undeleted copy is the first in the -index. Our searches in row operations always position the cursor before -the first record in the result set. But, if there is no key defined for -a table, then it would be desirable that row id is in ascending order. -So, lets store row id in descending order only if it is not an ordering -field in the clustered index. - -NOTE: Deletes and inserts may lead to situation where there are identical -records in a secondary index. Is that a problem in the B-tree? Yes. -Also updates can lead to this, unless trx id and roll ptr are included in -ord fields. -(1) Fix in clustered indexes: include row id, trx id, and roll ptr -in node pointers of B-tree. -(2) Fix in secondary indexes: include all fields in node pointers, and -if an entry is inserted, check if it is equal to the right neighbor, -in which case update the right neighbor: the neighbor must be delete -marked, set it unmarked and write the trx id of the current transaction. - -What if the same trx repeatedly updates the same row, updating a secondary -index field or not? Updating a clustered index ordering field? - -(1) If it does not update the secondary index and not the clustered index -ord field. Then the secondary index record stays unchanged, but the -trx id in the secondary index record may be smaller than in the clustered -index record. This is no problem? -(2) If it updates secondary index ord field but not clustered: then in -secondary index there are delete marked records, which differ in an -ord field. No problem. -(3) Updates clustered ord field but not secondary, and secondary index -is unique. Then the record in secondary index is just updated at the -clustered ord field. -(4) - -Problem with duplicate records: -Fix 1: Add a trx op no field to all indexes. A problem: if a trx with a -bigger trx id has inserted and delete marked a similar row, our trx inserts -again a similar row, and a trx with an even bigger id delete marks it. Then -the position of the row should change in the index if the trx id affects -the alphabetical ordering. - -Fix 2: If an insert encounters a similar row marked deleted, we turn the -insert into an 'update' of the row marked deleted. Then we must write undo -info on the update. A problem: what if a purge operation tries to remove -the delete marked row? - -We can think of the database row versions as a linked list which starts -from the record in the clustered index, and is linked by roll ptrs -through undo logs. The secondary index records are references which tell -what kinds of records can be found in this linked list for a record -in the clustered index. - -How to do the purge? A record can be removed from the clustered index -if its linked list becomes empty, i.e., the row has been marked deleted -and its roll ptr points to the record in the undo log we are going through, -doing the purge. Similarly, during a rollback, a record can be removed -if the stored roll ptr in the undo log points to a trx already (being) purged, -or if the roll ptr is NULL, i.e., it was a fresh insert. */ - -/************************************************************************ -Creates a row undo node to a query graph. */ - -undo_node_t* -row_undo_node_create( -/*=================*/ - /* out, own: undo node */ - trx_t* trx, /* in: transaction */ - que_thr_t* parent, /* in: parent node, i.e., a thr node */ - mem_heap_t* heap) /* in: memory heap where created */ -{ - undo_node_t* undo; - - ut_ad(trx && parent && heap); - - undo = mem_heap_alloc(heap, sizeof(undo_node_t)); - - undo->common.type = QUE_NODE_UNDO; - undo->common.parent = parent; - - undo->state = UNDO_NODE_FETCH_NEXT; - undo->trx = trx; - - btr_pcur_init(&(undo->pcur)); - - undo->heap = mem_heap_create(256); - - return(undo); -} - -/*************************************************************** -Looks for the clustered index record when node has the row reference. -The pcur in node is used in the search. If found, stores the row to node, -and stores the position of pcur, and detaches it. The pcur must be closed -by the caller in any case. */ - -ibool -row_undo_search_clust_to_pcur( -/*==========================*/ - /* out: TRUE if found; NOTE the node->pcur - must be closed by the caller, regardless of - the return value */ - undo_node_t* node) /* in: row undo node */ -{ - dict_index_t* clust_index; - ibool found; - mtr_t mtr; - ibool ret; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - mtr_start(&mtr); - - clust_index = dict_table_get_first_index(node->table); - - found = row_search_on_row_ref(&(node->pcur), BTR_MODIFY_LEAF, - node->table, node->ref, &mtr); - - rec = btr_pcur_get_rec(&(node->pcur)); - - offsets = rec_get_offsets(rec, clust_index, offsets, - ULINT_UNDEFINED, &heap); - - if (!found || 0 != ut_dulint_cmp(node->roll_ptr, - row_get_rec_roll_ptr(rec, clust_index, - offsets))) { - - /* We must remove the reservation on the undo log record - BEFORE releasing the latch on the clustered index page: this - is to make sure that some thread will eventually undo the - modification corresponding to node->roll_ptr. */ - - /* fputs("--------------------undoing a previous version\n", - stderr); */ - - ret = FALSE; - } else { - node->row = row_build(ROW_COPY_DATA, clust_index, rec, - offsets, node->heap); - btr_pcur_store_position(&(node->pcur), &mtr); - - ret = TRUE; - } - - btr_pcur_commit_specify_mtr(&(node->pcur), &mtr); - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(ret); -} - -/*************************************************************** -Fetches an undo log record and does the undo for the recorded operation. -If none left, or a partial rollback completed, returns control to the -parent node, which is always a query thread node. */ -static -ulint -row_undo( -/*=====*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code */ - undo_node_t* node, /* in: row undo node */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - trx_t* trx; - dulint roll_ptr; - ibool locked_data_dict; - - ut_ad(node && thr); - - trx = node->trx; - - if (node->state == UNDO_NODE_FETCH_NEXT) { - - node->undo_rec = trx_roll_pop_top_rec_of_trx(trx, - trx->roll_limit, - &roll_ptr, - node->heap); - if (!node->undo_rec) { - /* Rollback completed for this query thread */ - - thr->run_node = que_node_get_parent(node); - - return(DB_SUCCESS); - } - - node->roll_ptr = roll_ptr; - node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); - - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - - node->state = UNDO_NODE_INSERT; - } else { - node->state = UNDO_NODE_MODIFY; - } - - } else if (node->state == UNDO_NODE_PREV_VERS) { - - /* Undo should be done to the same clustered index record - again in this same rollback, restoring the previous version */ - - roll_ptr = node->new_roll_ptr; - - node->undo_rec = trx_undo_get_undo_rec_low(roll_ptr, - node->heap); - node->roll_ptr = roll_ptr; - node->undo_no = trx_undo_rec_get_undo_no(node->undo_rec); - - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - - node->state = UNDO_NODE_INSERT; - } else { - node->state = UNDO_NODE_MODIFY; - } - } - - /* Prevent DROP TABLE etc. while we are rolling back this row. - If we are doing a TABLE CREATE or some other dictionary operation, - then we already have dict_operation_lock locked in x-mode. Do not - try to lock again, because that would cause a hang. */ - - locked_data_dict = (trx->dict_operation_lock_mode == 0); - - if (locked_data_dict) { - - row_mysql_lock_data_dictionary(trx); - } - - if (node->state == UNDO_NODE_INSERT) { - - err = row_undo_ins(node); - - node->state = UNDO_NODE_FETCH_NEXT; - } else { - ut_ad(node->state == UNDO_NODE_MODIFY); - err = row_undo_mod(node, thr); - } - - if (locked_data_dict) { - - row_mysql_unlock_data_dictionary(trx); - } - - /* Do some cleanup */ - btr_pcur_close(&(node->pcur)); - - mem_heap_empty(node->heap); - - thr->run_node = node; - - return(err); -} - -/*************************************************************** -Undoes a row operation in a table. This is a high-level function used -in SQL execution graphs. */ - -que_thr_t* -row_undo_step( -/*==========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - undo_node_t* node; - trx_t* trx; - - ut_ad(thr); - - srv_activity_count++; - - trx = thr_get_trx(thr); - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_UNDO); - - err = row_undo(node, thr); - - trx->error_state = err; - - if (err != DB_SUCCESS) { - /* SQL error detected */ - - fprintf(stderr, "InnoDB: Fatal error %lu in rollback.\n", - (ulong) err); - - if (err == DB_OUT_OF_FILE_SPACE) { - fprintf(stderr, - "InnoDB: Error 13 means out of tablespace.\n" - "InnoDB: Consider increasing" - " your tablespace.\n"); - - exit(1); - } - - ut_error; - - return(NULL); - } - - return(thr); -} diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c deleted file mode 100644 index c91cc449b96..00000000000 --- a/storage/innobase/row/row0upd.c +++ /dev/null @@ -1,2081 +0,0 @@ -/****************************************************** -Update of a row - -(c) 1996 Innobase Oy - -Created 12/27/1996 Heikki Tuuri -*******************************************************/ - -#include "row0upd.h" - -#ifdef UNIV_NONINL -#include "row0upd.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "dict0crea.h" -#include "mach0data.h" -#include "trx0undo.h" -#include "btr0btr.h" -#include "btr0cur.h" -#include "que0que.h" -#include "row0ins.h" -#include "row0sel.h" -#include "row0row.h" -#include "rem0cmp.h" -#include "lock0lock.h" -#include "log0log.h" -#include "pars0sym.h" -#include "eval0eval.h" -#include "buf0lru.h" - - -/* What kind of latch and lock can we assume when the control comes to - ------------------------------------------------------------------- -an update node? --------------- -Efficiency of massive updates would require keeping an x-latch on a -clustered index page through many updates, and not setting an explicit -x-lock on clustered index records, as they anyway will get an implicit -x-lock when they are updated. A problem is that the read nodes in the -graph should know that they must keep the latch when passing the control -up to the update node, and not set any record lock on the record which -will be updated. Another problem occurs if the execution is stopped, -as the kernel switches to another query thread, or the transaction must -wait for a lock. Then we should be able to release the latch and, maybe, -acquire an explicit x-lock on the record. - Because this seems too complicated, we conclude that the less -efficient solution of releasing all the latches when the control is -transferred to another node, and acquiring explicit x-locks, is better. */ - -/* How is a delete performed? If there is a delete without an -explicit cursor, i.e., a searched delete, there are at least -two different situations: -the implicit select cursor may run on (1) the clustered index or -on (2) a secondary index. The delete is performed by setting -the delete bit in the record and substituting the id of the -deleting transaction for the original trx id, and substituting a -new roll ptr for previous roll ptr. The old trx id and roll ptr -are saved in the undo log record. Thus, no physical changes occur -in the index tree structure at the time of the delete. Only -when the undo log is purged, the index records will be physically -deleted from the index trees. - -The query graph executing a searched delete would consist of -a delete node which has as a subtree a select subgraph. -The select subgraph should return a (persistent) cursor -in the clustered index, placed on page which is x-latched. -The delete node should look for all secondary index records for -this clustered index entry and mark them as deleted. When is -the x-latch freed? The most efficient way for performing a -searched delete is obviously to keep the x-latch for several -steps of query graph execution. */ - -/*************************************************************** -Checks if an update vector changes some of the first ordering fields of an -index record. This is only used in foreign key checks and we can assume -that index does not contain column prefixes. */ -static -ibool -row_upd_changes_first_fields_binary( -/*================================*/ - /* out: TRUE if changes */ - dtuple_t* entry, /* in: old value of index entry */ - dict_index_t* index, /* in: index of entry */ - upd_t* update, /* in: update vector for the row */ - ulint n); /* in: how many first fields to check */ - - -/************************************************************************* -Checks if index currently is mentioned as a referenced index in a foreign -key constraint. */ -static -ibool -row_upd_index_is_referenced( -/*========================*/ - /* out: TRUE if referenced; NOTE that since - we do not hold dict_operation_lock - when leaving the function, it may be that - the referencing table has been dropped when - we leave this function: this function is only - for heuristic use! */ - dict_index_t* index, /* in: index */ - trx_t* trx) /* in: transaction */ -{ - dict_table_t* table = index->table; - dict_foreign_t* foreign; - ibool froze_data_dict = FALSE; - - if (!UT_LIST_GET_FIRST(table->referenced_list)) { - - return(FALSE); - } - - if (trx->dict_operation_lock_mode == 0) { - row_mysql_freeze_data_dictionary(trx); - froze_data_dict = TRUE; - } - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign) { - if (foreign->referenced_index == index) { - - if (froze_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); - } - - return(TRUE); - } - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - if (froze_data_dict) { - row_mysql_unfreeze_data_dictionary(trx); - } - - return(FALSE); -} - -/************************************************************************* -Checks if possible foreign key constraints hold after a delete of the record -under pcur. NOTE that this function will temporarily commit mtr and lose the -pcur position! */ -static -ulint -row_upd_check_references_constraints( -/*=================================*/ - /* out: DB_SUCCESS or an error code */ - upd_node_t* node, /* in: row update node */ - btr_pcur_t* pcur, /* in: cursor positioned on a record; NOTE: the - cursor position is lost in this function! */ - dict_table_t* table, /* in: table in question */ - dict_index_t* index, /* in: index of the cursor */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - dict_foreign_t* foreign; - mem_heap_t* heap; - dtuple_t* entry; - trx_t* trx; - rec_t* rec; - ulint err; - ibool got_s_lock = FALSE; - - if (UT_LIST_GET_FIRST(table->referenced_list) == NULL) { - - return(DB_SUCCESS); - } - - trx = thr_get_trx(thr); - - rec = btr_pcur_get_rec(pcur); - - heap = mem_heap_create(500); - - entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); - - mtr_commit(mtr); - - mtr_start(mtr); - - if (trx->dict_operation_lock_mode == 0) { - got_s_lock = TRUE; - - row_mysql_freeze_data_dictionary(trx); - } - - foreign = UT_LIST_GET_FIRST(table->referenced_list); - - while (foreign) { - /* Note that we may have an update which updates the index - record, but does NOT update the first fields which are - referenced in a foreign key constraint. Then the update does - NOT break the constraint. */ - - if (foreign->referenced_index == index - && (node->is_delete - || row_upd_changes_first_fields_binary( - entry, index, node->update, - foreign->n_fields))) { - - if (foreign->foreign_table == NULL) { - dict_table_get(foreign->foreign_table_name, - FALSE); - } - - if (foreign->foreign_table) { - mutex_enter(&(dict_sys->mutex)); - - (foreign->foreign_table - ->n_foreign_key_checks_running)++; - - mutex_exit(&(dict_sys->mutex)); - } - - /* NOTE that if the thread ends up waiting for a lock - we will release dict_operation_lock temporarily! - But the counter on the table protects 'foreign' from - being dropped while the check is running. */ - - err = row_ins_check_foreign_constraint( - FALSE, foreign, table, entry, thr); - - if (foreign->foreign_table) { - mutex_enter(&(dict_sys->mutex)); - - ut_a(foreign->foreign_table - ->n_foreign_key_checks_running > 0); - - (foreign->foreign_table - ->n_foreign_key_checks_running)--; - - mutex_exit(&(dict_sys->mutex)); - } - - if (err != DB_SUCCESS) { - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary( - trx); - } - - mem_heap_free(heap); - - return(err); - } - } - - foreign = UT_LIST_GET_NEXT(referenced_list, foreign); - } - - if (got_s_lock) { - row_mysql_unfreeze_data_dictionary(trx); - } - - mem_heap_free(heap); - - return(DB_SUCCESS); -} - -/************************************************************************* -Creates an update node for a query graph. */ - -upd_node_t* -upd_node_create( -/*============*/ - /* out, own: update node */ - mem_heap_t* heap) /* in: mem heap where created */ -{ - upd_node_t* node; - - node = mem_heap_alloc(heap, sizeof(upd_node_t)); - node->common.type = QUE_NODE_UPDATE; - - node->state = UPD_NODE_UPDATE_CLUSTERED; - node->select_will_do_update = FALSE; - node->in_mysql_interface = FALSE; - - node->row = NULL; - node->ext_vec = NULL; - node->index = NULL; - node->update = NULL; - - node->foreign = NULL; - node->cascade_heap = NULL; - node->cascade_node = NULL; - - node->select = NULL; - - node->heap = mem_heap_create(128); - node->magic_n = UPD_NODE_MAGIC_N; - - node->cmpl_info = 0; - - return(node); -} - -/************************************************************************* -Updates the trx id and roll ptr field in a clustered index record in database -recovery. */ - -void -row_upd_rec_sys_fields_in_recovery( -/*===============================*/ - rec_t* rec, /* in: record */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr)/* in: roll ptr of the undo log record */ -{ - byte* field; - ulint len; - - field = rec_get_nth_field(rec, offsets, pos, &len); - ut_ad(len == DATA_TRX_ID_LEN); - trx_write_trx_id(field, trx_id); - - field = rec_get_nth_field(rec, offsets, pos + 1, &len); - ut_ad(len == DATA_ROLL_PTR_LEN); - trx_write_roll_ptr(field, roll_ptr); -} - -/************************************************************************* -Sets the trx id or roll ptr field of a clustered index entry. */ - -void -row_upd_index_entry_sys_field( -/*==========================*/ - dtuple_t* entry, /* in: index entry, where the memory buffers - for sys fields are already allocated: - the function just copies the new values to - them */ - dict_index_t* index, /* in: clustered index */ - ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ - dulint val) /* in: value to write */ -{ - dfield_t* dfield; - byte* field; - ulint pos; - - ut_ad(index->type & DICT_CLUSTERED); - - pos = dict_index_get_sys_col_pos(index, type); - - dfield = dtuple_get_nth_field(entry, pos); - field = dfield_get_data(dfield); - - if (type == DATA_TRX_ID) { - trx_write_trx_id(field, val); - } else { - ut_ad(type == DATA_ROLL_PTR); - trx_write_roll_ptr(field, val); - } -} - -/*************************************************************** -Returns TRUE if row update changes size of some field in index or if some -field to be updated is stored externally in rec or update. */ - -ibool -row_upd_changes_field_size_or_external( -/*===================================*/ - /* out: TRUE if the update changes the size of - some field in index or the field is external - in rec or update */ - dict_index_t* index, /* in: index */ - const ulint* offsets,/* in: rec_get_offsets(rec, index) */ - upd_t* update) /* in: update vector */ -{ - upd_field_t* upd_field; - dfield_t* new_val; - ulint old_len; - ulint new_len; - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(NULL, index, offsets)); - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - - new_val = &(upd_field->new_val); - new_len = new_val->len; - - if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) { - /* A bug fixed on Dec 31st, 2004: we looked at the - SQL NULL size from the wrong field! We may backport - this fix also to 4.0. The merge to 5.0 will be made - manually immediately after we commit this to 4.1. */ - - new_len = dict_col_get_sql_null_size( - dict_index_get_nth_col(index, - upd_field->field_no)); - } - - old_len = rec_offs_nth_size(offsets, upd_field->field_no); - - if (rec_offs_comp(offsets) - && rec_offs_nth_sql_null(offsets, - upd_field->field_no)) { - /* Note that in the compact table format, for a - variable length field, an SQL NULL will use zero - bytes in the offset array at the start of the physical - record, but a zero-length value (empty string) will - use one byte! Thus, we cannot use update-in-place - if we update an SQL NULL varchar to an empty string! */ - - old_len = UNIV_SQL_NULL; - } - - if (old_len != new_len) { - - return(TRUE); - } - - if (rec_offs_nth_extern(offsets, upd_field->field_no)) { - - return(TRUE); - } - - if (upd_field->extern_storage) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*************************************************************** -Replaces the new column values stored in the update vector to the record -given. No field size changes are allowed. This function is used only for -a clustered index */ - -void -row_upd_rec_in_place( -/*=================*/ - rec_t* rec, /* in/out: record where replaced */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - upd_t* update) /* in: update vector */ -{ - upd_field_t* upd_field; - dfield_t* new_val; - ulint n_fields; - ulint i; - - ut_ad(rec_offs_validate(rec, NULL, offsets)); - - rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits); - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - new_val = &(upd_field->new_val); - - rec_set_nth_field(rec, offsets, upd_field->field_no, - dfield_get_data(new_val), - dfield_get_len(new_val)); - } -} - -/************************************************************************* -Writes into the redo log the values of trx id and roll ptr and enough info -to determine their positions within a clustered index record. */ - -byte* -row_upd_write_sys_vals_to_log( -/*==========================*/ - /* out: new pointer to mlog */ - dict_index_t* index, /* in: clustered index */ - trx_t* trx, /* in: transaction */ - dulint roll_ptr,/* in: roll ptr of the undo log record */ - byte* log_ptr,/* pointer to a buffer of size > 20 opened - in mlog */ - mtr_t* mtr __attribute__((unused))) /* in: mtr */ -{ - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(mtr); - - log_ptr += mach_write_compressed(log_ptr, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID)); - - trx_write_roll_ptr(log_ptr, roll_ptr); - log_ptr += DATA_ROLL_PTR_LEN; - - log_ptr += mach_dulint_write_compressed(log_ptr, trx->id); - - return(log_ptr); -} - -/************************************************************************* -Parses the log data of system field values. */ - -byte* -row_upd_parse_sys_vals( -/*===================*/ - /* out: log data end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - ulint* pos, /* out: TRX_ID position in record */ - dulint* trx_id, /* out: trx id */ - dulint* roll_ptr)/* out: roll ptr */ -{ - ptr = mach_parse_compressed(ptr, end_ptr, pos); - - if (ptr == NULL) { - - return(NULL); - } - - if (end_ptr < ptr + DATA_ROLL_PTR_LEN) { - - return(NULL); - } - - *roll_ptr = trx_read_roll_ptr(ptr); - ptr += DATA_ROLL_PTR_LEN; - - ptr = mach_dulint_parse_compressed(ptr, end_ptr, trx_id); - - return(ptr); -} - -/*************************************************************** -Writes to the redo log the new values of the fields occurring in the index. */ - -void -row_upd_index_write_log( -/*====================*/ - upd_t* update, /* in: update vector */ - byte* log_ptr,/* in: pointer to mlog buffer: must contain at least - MLOG_BUF_MARGIN bytes of free space; the buffer is - closed within this function */ - mtr_t* mtr) /* in: mtr into whose log to write */ -{ - upd_field_t* upd_field; - dfield_t* new_val; - ulint len; - ulint n_fields; - byte* buf_end; - ulint i; - - n_fields = upd_get_n_fields(update); - - buf_end = log_ptr + MLOG_BUF_MARGIN; - - mach_write_to_1(log_ptr, update->info_bits); - log_ptr++; - log_ptr += mach_write_compressed(log_ptr, n_fields); - - for (i = 0; i < n_fields; i++) { - -#if MLOG_BUF_MARGIN <= 30 -# error "MLOG_BUF_MARGIN <= 30" -#endif - - if (log_ptr + 30 > buf_end) { - mlog_close(mtr, log_ptr); - - log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); - buf_end = log_ptr + MLOG_BUF_MARGIN; - } - - upd_field = upd_get_nth_field(update, i); - - new_val = &(upd_field->new_val); - - len = new_val->len; - - log_ptr += mach_write_compressed(log_ptr, upd_field->field_no); - log_ptr += mach_write_compressed(log_ptr, len); - - if (len != UNIV_SQL_NULL) { - if (log_ptr + len < buf_end) { - ut_memcpy(log_ptr, new_val->data, len); - - log_ptr += len; - } else { - mlog_close(mtr, log_ptr); - - mlog_catenate_string(mtr, new_val->data, len); - - log_ptr = mlog_open(mtr, MLOG_BUF_MARGIN); - buf_end = log_ptr + MLOG_BUF_MARGIN; - } - } - } - - mlog_close(mtr, log_ptr); -} - -/************************************************************************* -Parses the log data written by row_upd_index_write_log. */ - -byte* -row_upd_index_parse( -/*================*/ - /* out: log data end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - mem_heap_t* heap, /* in: memory heap where update vector is - built */ - upd_t** update_out)/* out: update vector */ -{ - upd_t* update; - upd_field_t* upd_field; - dfield_t* new_val; - ulint len; - ulint n_fields; - byte* buf; - ulint info_bits; - ulint i; - - if (end_ptr < ptr + 1) { - - return(NULL); - } - - info_bits = mach_read_from_1(ptr); - ptr++; - ptr = mach_parse_compressed(ptr, end_ptr, &n_fields); - - if (ptr == NULL) { - - return(NULL); - } - - update = upd_create(n_fields, heap); - update->info_bits = info_bits; - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - new_val = &(upd_field->new_val); - - ptr = mach_parse_compressed(ptr, end_ptr, - &(upd_field->field_no)); - if (ptr == NULL) { - - return(NULL); - } - - ptr = mach_parse_compressed(ptr, end_ptr, &len); - - if (ptr == NULL) { - - return(NULL); - } - - new_val->len = len; - - if (len != UNIV_SQL_NULL) { - - if (end_ptr < ptr + len) { - - return(NULL); - } else { - buf = mem_heap_alloc(heap, len); - ut_memcpy(buf, ptr, len); - - ptr += len; - - new_val->data = buf; - } - } - } - - *update_out = update; - - return(ptr); -} - -/******************************************************************* -Returns TRUE if ext_vec contains i. */ -static -ibool -upd_ext_vec_contains( -/*=================*/ - /* out: TRUE if i is in ext_vec */ - ulint* ext_vec, /* in: array of indexes or NULL */ - ulint n_ext_vec, /* in: number of numbers in ext_vec */ - ulint i) /* in: a number */ -{ - ulint j; - - if (ext_vec == NULL) { - - return(FALSE); - } - - for (j = 0; j < n_ext_vec; j++) { - if (ext_vec[j] == i) { - - return(TRUE); - } - } - - return(FALSE); -} - -/******************************************************************* -Builds an update vector from those fields which in a secondary index entry -differ from a record that has the equal ordering fields. NOTE: we compare -the fields as binary strings! */ - -upd_t* -row_upd_build_sec_rec_difference_binary( -/*====================================*/ - /* out, own: update vector of differing - fields */ - dict_index_t* index, /* in: index */ - dtuple_t* entry, /* in: entry to insert */ - rec_t* rec, /* in: secondary index record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap) /* in: memory heap from which allocated */ -{ - upd_field_t* upd_field; - dfield_t* dfield; - byte* data; - ulint len; - upd_t* update; - ulint n_diff; - ulint i; - ulint offsets_[REC_OFFS_SMALL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - /* This function is used only for a secondary index */ - ut_a(0 == (index->type & DICT_CLUSTERED)); - - update = upd_create(dtuple_get_n_fields(entry), heap); - - n_diff = 0; - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - data = rec_get_nth_field(rec, offsets, i, &len); - - dfield = dtuple_get_nth_field(entry, i); - - /* NOTE that it may be that len != dfield_get_len(dfield) if we - are updating in a character set and collation where strings of - different length can be equal in an alphabetical comparison, - and also in the case where we have a column prefix index - and the last characters in the index field are spaces; the - latter case probably caused the assertion failures reported at - row0upd.c line 713 in versions 4.0.14 - 4.0.16. */ - - /* NOTE: we compare the fields as binary strings! - (No collation) */ - - if (!dfield_data_is_binary_equal(dfield, len, data)) { - - upd_field = upd_get_nth_field(update, n_diff); - - dfield_copy(&(upd_field->new_val), dfield); - - upd_field_set_field_no(upd_field, i, index, trx); - - upd_field->extern_storage = FALSE; - - n_diff++; - } - } - - update->n_fields = n_diff; - - return(update); -} - -/******************************************************************* -Builds an update vector from those fields, excluding the roll ptr and -trx id fields, which in an index entry differ from a record that has -the equal ordering fields. NOTE: we compare the fields as binary strings! */ - -upd_t* -row_upd_build_difference_binary( -/*============================*/ - /* out, own: update vector of differing - fields, excluding roll ptr and trx id */ - dict_index_t* index, /* in: clustered index */ - dtuple_t* entry, /* in: entry to insert */ - ulint* ext_vec,/* in: array containing field numbers of - externally stored fields in entry, or NULL */ - ulint n_ext_vec,/* in: number of fields in ext_vec */ - rec_t* rec, /* in: clustered index record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap) /* in: memory heap from which allocated */ -{ - upd_field_t* upd_field; - dfield_t* dfield; - byte* data; - ulint len; - upd_t* update; - ulint n_diff; - ulint roll_ptr_pos; - ulint trx_id_pos; - ibool extern_bit; - ulint i; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - /* This function is used only for a clustered index */ - ut_a(index->type & DICT_CLUSTERED); - - update = upd_create(dtuple_get_n_fields(entry), heap); - - n_diff = 0; - - roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR); - trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); - - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - for (i = 0; i < dtuple_get_n_fields(entry); i++) { - - data = rec_get_nth_field(rec, offsets, i, &len); - - dfield = dtuple_get_nth_field(entry, i); - - /* NOTE: we compare the fields as binary strings! - (No collation) */ - - if (i == trx_id_pos || i == roll_ptr_pos) { - - goto skip_compare; - } - - extern_bit = upd_ext_vec_contains(ext_vec, n_ext_vec, i); - - if (UNIV_UNLIKELY(extern_bit - == (ibool)!rec_offs_nth_extern(offsets, i)) - || !dfield_data_is_binary_equal(dfield, len, data)) { - - upd_field = upd_get_nth_field(update, n_diff); - - dfield_copy(&(upd_field->new_val), dfield); - - upd_field_set_field_no(upd_field, i, index, trx); - - upd_field->extern_storage = extern_bit; - - n_diff++; - } -skip_compare: - ; - } - - update->n_fields = n_diff; - - return(update); -} - -/*************************************************************** -Replaces the new column values stored in the update vector to the index entry -given. */ - -void -row_upd_index_replace_new_col_vals_index_pos( -/*=========================================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that this may also be a - non-clustered index */ - upd_t* update, /* in: an update vector built for the index so - that the field number in an upd_field is the - index position */ - ibool order_only, - /* in: if TRUE, limit the replacement to - ordering fields of index; note that this - does not work for non-clustered indexes. */ - mem_heap_t* heap) /* in: memory heap to which we allocate and - copy the new values, set this as NULL if you - do not want allocation */ -{ - dict_field_t* field; - upd_field_t* upd_field; - dfield_t* dfield; - dfield_t* new_val; - ulint j; - ulint i; - ulint n_fields; - - ut_ad(index); - - dtuple_set_info_bits(entry, update->info_bits); - - if (order_only) { - n_fields = dict_index_get_n_unique(index); - } else { - n_fields = dict_index_get_n_fields(index); - } - - for (j = 0; j < n_fields; j++) { - - field = dict_index_get_nth_field(index, j); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - if (upd_field->field_no == j) { - - dfield = dtuple_get_nth_field(entry, j); - - new_val = &(upd_field->new_val); - - dfield_set_data(dfield, new_val->data, - new_val->len); - if (heap && new_val->len != UNIV_SQL_NULL) { - dfield->data = mem_heap_alloc( - heap, new_val->len); - ut_memcpy(dfield->data, new_val->data, - new_val->len); - } - - if (field->prefix_len > 0 - && new_val->len != UNIV_SQL_NULL) { - - const dict_col_t* col - = dict_field_get_col(field); - - dfield->len - = dtype_get_at_most_n_mbchars( - col->prtype, - col->mbminlen, - col->mbmaxlen, - field->prefix_len, - new_val->len, - new_val->data); - } - } - } - } -} - -/*************************************************************** -Replaces the new column values stored in the update vector to the index entry -given. */ - -void -row_upd_index_replace_new_col_vals( -/*===============================*/ - dtuple_t* entry, /* in/out: index entry where replaced */ - dict_index_t* index, /* in: index; NOTE that this may also be a - non-clustered index */ - upd_t* update, /* in: an update vector built for the - CLUSTERED index so that the field number in - an upd_field is the clustered index position */ - mem_heap_t* heap) /* in: memory heap to which we allocate and - copy the new values, set this as NULL if you - do not want allocation */ -{ - upd_field_t* upd_field; - dfield_t* dfield; - dfield_t* new_val; - ulint j; - ulint i; - dict_index_t* clust_index; - - ut_ad(index); - - clust_index = dict_table_get_first_index(index->table); - - dtuple_set_info_bits(entry, update->info_bits); - - for (j = 0; j < dict_index_get_n_fields(index); j++) { - - ulint clust_pos; - dict_field_t* field = dict_index_get_nth_field(index, j); - - clust_pos = dict_col_get_clust_pos(field->col, clust_index); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - if (upd_field->field_no == clust_pos) { - - dfield = dtuple_get_nth_field(entry, j); - - new_val = &(upd_field->new_val); - - dfield_set_data(dfield, new_val->data, - new_val->len); - if (heap && new_val->len != UNIV_SQL_NULL) { - dfield->data = mem_heap_alloc( - heap, new_val->len); - ut_memcpy(dfield->data, new_val->data, - new_val->len); - } - - if (field->prefix_len > 0 - && new_val->len != UNIV_SQL_NULL) { - - const dict_col_t* col - = dict_field_get_col(field); - - dfield->len - = dtype_get_at_most_n_mbchars( - col->prtype, - col->mbminlen, - col->mbmaxlen, - field->prefix_len, - new_val->len, - new_val->data); - } - } - } - } -} - -/*************************************************************** -Checks if an update vector changes an ordering field of an index record. -This function is fast if the update vector is short or the number of ordering -fields in the index is small. Otherwise, this can be quadratic. -NOTE: we compare the fields as binary strings! */ - -ibool -row_upd_changes_ord_field_binary( -/*=============================*/ - /* out: TRUE if update vector changes - an ordering field in the index record; - NOTE: the fields are compared as binary - strings */ - dtuple_t* row, /* in: old value of row, or NULL if the - row and the data values in update are not - known when this function is called, e.g., at - compile time */ - dict_index_t* index, /* in: index of the record */ - upd_t* update) /* in: update vector for the row; NOTE: the - field numbers in this MUST be clustered index - positions! */ -{ - ulint n_unique; - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; - - ut_ad(update && index); - - n_unique = dict_index_get_n_unique(index); - n_upd_fields = upd_get_n_fields(update); - - clust_index = dict_table_get_first_index(index->table); - - for (i = 0; i < n_unique; i++) { - - const dict_field_t* ind_field; - const dict_col_t* col; - ulint col_pos; - ulint col_no; - - ind_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); - col_no = dict_col_get_no(col); - - for (j = 0; j < n_upd_fields; j++) { - - upd_field_t* upd_field - = upd_get_nth_field(update, j); - - /* Note that if the index field is a column prefix - then it may be that row does not contain an externally - stored part of the column value, and we cannot compare - the datas */ - - if (col_pos == upd_field->field_no - && (row == NULL - || ind_field->prefix_len > 0 - || !dfield_datas_are_binary_equal( - dtuple_get_nth_field(row, col_no), - &(upd_field->new_val)))) { - - return(TRUE); - } - } - } - - return(FALSE); -} - -/*************************************************************** -Checks if an update vector changes an ordering field of an index record. -NOTE: we compare the fields as binary strings! */ - -ibool -row_upd_changes_some_index_ord_field_binary( -/*========================================*/ - /* out: TRUE if update vector may change - an ordering field in an index record */ - dict_table_t* table, /* in: table */ - upd_t* update) /* in: update vector for the row */ -{ - upd_field_t* upd_field; - dict_index_t* index; - ulint i; - - index = dict_table_get_first_index(table); - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - - if (dict_field_get_col(dict_index_get_nth_field( - index, upd_field->field_no)) - ->ord_part) { - - return(TRUE); - } - } - - return(FALSE); -} - -/*************************************************************** -Checks if an update vector changes some of the first ordering fields of an -index record. This is only used in foreign key checks and we can assume -that index does not contain column prefixes. */ -static -ibool -row_upd_changes_first_fields_binary( -/*================================*/ - /* out: TRUE if changes */ - dtuple_t* entry, /* in: index entry */ - dict_index_t* index, /* in: index of entry */ - upd_t* update, /* in: update vector for the row */ - ulint n) /* in: how many first fields to check */ -{ - ulint n_upd_fields; - ulint i, j; - dict_index_t* clust_index; - - ut_ad(update && index); - ut_ad(n <= dict_index_get_n_fields(index)); - - n_upd_fields = upd_get_n_fields(update); - clust_index = dict_table_get_first_index(index->table); - - for (i = 0; i < n; i++) { - - const dict_field_t* ind_field; - const dict_col_t* col; - ulint col_pos; - - ind_field = dict_index_get_nth_field(index, i); - col = dict_field_get_col(ind_field); - col_pos = dict_col_get_clust_pos(col, clust_index); - - ut_a(ind_field->prefix_len == 0); - - for (j = 0; j < n_upd_fields; j++) { - - upd_field_t* upd_field - = upd_get_nth_field(update, j); - - if (col_pos == upd_field->field_no - && !dfield_datas_are_binary_equal( - dtuple_get_nth_field(entry, i), - &(upd_field->new_val))) { - - return(TRUE); - } - } - } - - return(FALSE); -} - -/************************************************************************* -Copies the column values from a record. */ -UNIV_INLINE -void -row_upd_copy_columns( -/*=================*/ - rec_t* rec, /* in: record in a clustered index */ - const ulint* offsets,/* in: array returned by rec_get_offsets() */ - sym_node_t* column) /* in: first column in a column list, or - NULL */ -{ - byte* data; - ulint len; - - while (column) { - data = rec_get_nth_field(rec, offsets, - column->field_nos[SYM_CLUST_FIELD_NO], - &len); - eval_node_copy_and_alloc_val(column, data, len); - - column = UT_LIST_GET_NEXT(col_var_list, column); - } -} - -/************************************************************************* -Calculates the new values for fields to update. Note that row_upd_copy_columns -must have been called first. */ -UNIV_INLINE -void -row_upd_eval_new_vals( -/*==================*/ - upd_t* update) /* in: update vector */ -{ - que_node_t* exp; - upd_field_t* upd_field; - ulint n_fields; - ulint i; - - n_fields = upd_get_n_fields(update); - - for (i = 0; i < n_fields; i++) { - upd_field = upd_get_nth_field(update, i); - - exp = upd_field->exp; - - eval_exp(exp); - - dfield_copy_data(&(upd_field->new_val), que_node_get_val(exp)); - } -} - -/*************************************************************** -Stores to the heap the row on which the node->pcur is positioned. */ -static -void -row_upd_store_row( -/*==============*/ - upd_node_t* node) /* in: row update node */ -{ - dict_index_t* clust_index; - upd_t* update; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES); - - if (node->row != NULL) { - mem_heap_empty(node->heap); - node->row = NULL; - } - - clust_index = dict_table_get_first_index(node->table); - - rec = btr_pcur_get_rec(node->pcur); - - offsets = rec_get_offsets(rec, clust_index, offsets_, - ULINT_UNDEFINED, &heap); - node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, - node->heap); - node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint) - * rec_offs_n_fields(offsets)); - if (node->is_delete) { - update = NULL; - } else { - update = node->update; - } - - node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec, - offsets, update); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } -} - -/*************************************************************** -Updates a secondary index entry of a row. */ -static -ulint -row_upd_sec_index_entry( -/*====================*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - upd_node_t* node, /* in: row update node */ - que_thr_t* thr) /* in: query thread */ -{ - ibool check_ref; - ibool found; - dict_index_t* index; - dtuple_t* entry; - btr_pcur_t pcur; - btr_cur_t* btr_cur; - mem_heap_t* heap; - rec_t* rec; - ulint err = DB_SUCCESS; - mtr_t mtr; - trx_t* trx = thr_get_trx(thr); - - index = node->index; - - check_ref = row_upd_index_is_referenced(index, trx); - - heap = mem_heap_create(1024); - - /* Build old index entry */ - entry = row_build_index_entry(node->row, index, heap); - - log_free_check(); - mtr_start(&mtr); - - found = row_search_index_entry(index, entry, BTR_MODIFY_LEAF, &pcur, - &mtr); - btr_cur = btr_pcur_get_btr_cur(&pcur); - - rec = btr_cur_get_rec(btr_cur); - - if (UNIV_UNLIKELY(!found)) { - fputs("InnoDB: error in sec index entry update in\n" - "InnoDB: ", stderr); - dict_index_name_print(stderr, trx, index); - fputs("\n" - "InnoDB: tuple ", stderr); - dtuple_print(stderr, entry); - fputs("\n" - "InnoDB: record ", stderr); - rec_print(stderr, rec, index); - putc('\n', stderr); - - trx_print(stderr, trx, 0); - - fputs("\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n", stderr); - } else { - /* Delete mark the old index record; it can already be - delete marked if we return after a lock wait in - row_ins_index_entry below */ - - if (!rec_get_deleted_flag(rec, - dict_table_is_comp(index->table))) { - err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, - thr, &mtr); - if (err == DB_SUCCESS && check_ref) { - - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, &pcur, index->table, - index, thr, &mtr); - if (err != DB_SUCCESS) { - - goto close_cur; - } - } - - } - } -close_cur: - btr_pcur_close(&pcur); - mtr_commit(&mtr); - - if (node->is_delete || err != DB_SUCCESS) { - - mem_heap_free(heap); - - return(err); - } - - /* Build a new index entry */ - row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); - - /* Insert new index entry */ - err = row_ins_index_entry(index, entry, NULL, 0, thr); - - mem_heap_free(heap); - - return(err); -} - -/*************************************************************** -Updates the secondary index record if it is changed in the row update or -deletes it if this is a delete. */ -UNIV_INLINE -ulint -row_upd_sec_step( -/*=============*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - upd_node_t* node, /* in: row update node */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err; - - ut_ad((node->state == UPD_NODE_UPDATE_ALL_SEC) - || (node->state == UPD_NODE_UPDATE_SOME_SEC)); - ut_ad(!(node->index->type & DICT_CLUSTERED)); - - if (node->state == UPD_NODE_UPDATE_ALL_SEC - || row_upd_changes_ord_field_binary(node->row, node->index, - node->update)) { - err = row_upd_sec_index_entry(node, thr); - - return(err); - } - - return(DB_SUCCESS); -} - -/*************************************************************** -Marks the clustered index record deleted and inserts the updated version -of the record to the index. This function should be used when the ordering -fields of the clustered index record change. This should be quite rare in -database applications. */ -static -ulint -row_upd_clust_rec_by_insert( -/*========================*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - upd_node_t* node, /* in: row update node */ - dict_index_t* index, /* in: clustered index of the record */ - que_thr_t* thr, /* in: query thread */ - ibool check_ref,/* in: TRUE if index may be referenced in - a foreign key constraint */ - mtr_t* mtr) /* in: mtr; gets committed here */ -{ - mem_heap_t* heap = NULL; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - trx_t* trx; - dict_table_t* table; - dtuple_t* entry; - ulint err; - - ut_ad(node); - ut_ad(index->type & DICT_CLUSTERED); - - trx = thr_get_trx(thr); - table = node->table; - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - if (node->state != UPD_NODE_INSERT_CLUSTERED) { - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - return(err); - } - - /* Mark as not-owned the externally stored fields which the new - row inherits from the delete marked record: purge should not - free those externally stored fields even if the delete marked - record is removed from the index tree, or updated. */ - - btr_cur_mark_extern_inherited_fields( - btr_cur_get_rec(btr_cur), - rec_get_offsets(btr_cur_get_rec(btr_cur), - dict_table_get_first_index(table), - offsets_, ULINT_UNDEFINED, &heap), - node->update, mtr); - if (check_ref) { - /* NOTE that the following call loses - the position of pcur ! */ - err = row_upd_check_references_constraints( - node, pcur, table, index, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - } - - } - - mtr_commit(mtr); - - if (!heap) { - heap = mem_heap_create(500); - } - node->state = UPD_NODE_INSERT_CLUSTERED; - - entry = row_build_index_entry(node->row, index, heap); - - row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); - - row_upd_index_entry_sys_field(entry, index, DATA_TRX_ID, trx->id); - - /* If we return from a lock wait, for example, we may have - extern fields marked as not-owned in entry (marked in the - if-branch above). We must unmark them. */ - - btr_cur_unmark_dtuple_extern_fields(entry, node->ext_vec, - node->n_ext_vec); - /* We must mark non-updated extern fields in entry as inherited, - so that a possible rollback will not free them */ - - btr_cur_mark_dtuple_inherited_extern(entry, node->ext_vec, - node->n_ext_vec, - node->update); - - err = row_ins_index_entry(index, entry, node->ext_vec, - node->n_ext_vec, thr); - mem_heap_free(heap); - - return(err); -} - -/*************************************************************** -Updates a clustered index record of a row when the ordering fields do -not change. */ -static -ulint -row_upd_clust_rec( -/*==============*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - upd_node_t* node, /* in: row update node */ - dict_index_t* index, /* in: clustered index */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr; gets committed here */ -{ - big_rec_t* big_rec = NULL; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ulint err; - - ut_ad(node); - ut_ad(index->type & DICT_CLUSTERED); - - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), - dict_table_is_comp(index->table))); - - /* Try optimistic updating of the record, keeping changes within - the page; we do not check locks because we assume the x-lock on the - record to update */ - - if (node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE) { - err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, - btr_cur, node->update, - node->cmpl_info, thr, mtr); - } else { - err = btr_cur_optimistic_update(BTR_NO_LOCKING_FLAG, - btr_cur, node->update, - node->cmpl_info, thr, mtr); - } - - mtr_commit(mtr); - - if (err == DB_SUCCESS) { - - return(err); - } - - if (buf_LRU_buf_pool_running_out()) { - - return(DB_LOCK_TABLE_FULL); - } - /* We may have to modify the tree structure: do a pessimistic descent - down the index tree */ - - mtr_start(mtr); - - /* NOTE: this transaction has an s-lock or x-lock on the record and - therefore other transactions cannot modify the record when we have no - latch on the page. In addition, we assume that other query threads of - the same transaction do not modify the record in the meantime. - Therefore we can assert that the restoration of the cursor succeeds. */ - - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - - ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), - dict_table_is_comp(index->table))); - - err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, - &big_rec, node->update, - node->cmpl_info, thr, mtr); - mtr_commit(mtr); - - if (err == DB_SUCCESS && big_rec) { - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - rec_t* rec; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - mtr_start(mtr); - - ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - rec = btr_cur_get_rec(btr_cur); - err = btr_store_big_rec_extern_fields( - index, rec, - rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap), - big_rec, mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - mtr_commit(mtr); - } - - if (big_rec) { - dtuple_big_rec_free(big_rec); - } - - return(err); -} - -/*************************************************************** -Delete marks a clustered index record. */ -static -ulint -row_upd_del_mark_clust_rec( -/*=======================*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code */ - upd_node_t* node, /* in: row update node */ - dict_index_t* index, /* in: clustered index */ - que_thr_t* thr, /* in: query thread */ - ibool check_ref,/* in: TRUE if index may be referenced in - a foreign key constraint */ - mtr_t* mtr) /* in: mtr; gets committed here */ -{ - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ulint err; - - ut_ad(node); - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(node->is_delete); - - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - /* Store row because we have to build also the secondary index - entries */ - - row_upd_store_row(node); - - /* Mark the clustered index record deleted; we do not have to check - locks, because we assume that we have an x-lock on the record */ - - err = btr_cur_del_mark_set_clust_rec(BTR_NO_LOCKING_FLAG, - btr_cur, TRUE, thr, mtr); - if (err == DB_SUCCESS && check_ref) { - /* NOTE that the following call loses the position of pcur ! */ - - err = row_upd_check_references_constraints(node, - pcur, index->table, - index, thr, mtr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - - return(err); - } - } - - mtr_commit(mtr); - - return(err); -} - -/*************************************************************** -Updates the clustered index record. */ -static -ulint -row_upd_clust_step( -/*===============*/ - /* out: DB_SUCCESS if operation successfully - completed, DB_LOCK_WAIT in case of a lock wait, - else error code */ - upd_node_t* node, /* in: row update node */ - que_thr_t* thr) /* in: query thread */ -{ - dict_index_t* index; - btr_pcur_t* pcur; - ibool success; - ibool check_ref; - ulint err; - mtr_t* mtr; - mtr_t mtr_buf; - rec_t* rec; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - const ulint* offsets; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - index = dict_table_get_first_index(node->table); - - check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr)); - - pcur = node->pcur; - - /* We have to restore the cursor to its position */ - mtr = &mtr_buf; - - mtr_start(mtr); - - /* If the restoration does not succeed, then the same - transaction has deleted the record on which the cursor was, - and that is an SQL error. If the restoration succeeds, it may - still be that the same transaction has successively deleted - and inserted a record with the same ordering fields, but in - that case we know that the transaction has at least an - implicit x-lock on the record. */ - - ut_a(pcur->rel_pos == BTR_PCUR_ON); - - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, mtr); - - if (!success) { - err = DB_RECORD_NOT_FOUND; - - mtr_commit(mtr); - - return(err); - } - - /* If this is a row in SYS_INDEXES table of the data dictionary, - then we have to free the file segments of the index tree associated - with the index */ - - if (node->is_delete - && ut_dulint_cmp(node->table->id, DICT_INDEXES_ID) == 0) { - - dict_drop_index_tree(btr_pcur_get_rec(pcur), mtr); - - mtr_commit(mtr); - - mtr_start(mtr); - - success = btr_pcur_restore_position(BTR_MODIFY_LEAF, pcur, - mtr); - if (!success) { - err = DB_ERROR; - - mtr_commit(mtr); - - return(err); - } - } - - rec = btr_pcur_get_rec(pcur); - offsets = rec_get_offsets(rec, index, offsets_, - ULINT_UNDEFINED, &heap); - - if (!node->has_clust_rec_x_lock) { - err = lock_clust_rec_modify_check_and_lock( - 0, rec, index, offsets, thr); - if (err != DB_SUCCESS) { - mtr_commit(mtr); - goto exit_func; - } - } - - /* NOTE: the following function calls will also commit mtr */ - - if (node->is_delete) { - err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, - mtr); - if (err == DB_SUCCESS) { - node->state = UPD_NODE_UPDATE_ALL_SEC; - node->index = dict_table_get_next_index(index); - } -exit_func: - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); - } - - /* If the update is made for MySQL, we already have the update vector - ready, else we have to do some evaluation: */ - - if (!node->in_mysql_interface) { - /* Copy the necessary columns from clust_rec and calculate the - new values to set */ - row_upd_copy_columns(rec, offsets, - UT_LIST_GET_FIRST(node->columns)); - row_upd_eval_new_vals(node->update); - } - - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - - if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { - - err = row_upd_clust_rec(node, index, thr, mtr); - return(err); - } - - row_upd_store_row(node); - - if (row_upd_changes_ord_field_binary(node->row, index, node->update)) { - - /* Update causes an ordering field (ordering fields within - the B-tree) of the clustered index record to change: perform - the update by delete marking and inserting. - - TODO! What to do to the 'Halloween problem', where an update - moves the record forward in index so that it is again - updated when the cursor arrives there? Solution: the - read operation must check the undo record undo number when - choosing records to update. MySQL solves now the problem - externally! */ - - err = row_upd_clust_rec_by_insert(node, index, thr, check_ref, - mtr); - if (err != DB_SUCCESS) { - - return(err); - } - - node->state = UPD_NODE_UPDATE_ALL_SEC; - } else { - err = row_upd_clust_rec(node, index, thr, mtr); - - if (err != DB_SUCCESS) { - - return(err); - } - - node->state = UPD_NODE_UPDATE_SOME_SEC; - } - - node->index = dict_table_get_next_index(index); - - return(err); -} - -/*************************************************************** -Updates the affected index records of a row. When the control is transferred -to this node, we assume that we have a persistent cursor which was on a -record, and the position of the cursor is stored in the cursor. */ -static -ulint -row_upd( -/*====*/ - /* out: DB_SUCCESS if operation successfully - completed, else error code or DB_LOCK_WAIT */ - upd_node_t* node, /* in: row update node */ - que_thr_t* thr) /* in: query thread */ -{ - ulint err = DB_SUCCESS; - - ut_ad(node && thr); - - if (UNIV_LIKELY(node->in_mysql_interface)) { - - /* We do not get the cmpl_info value from the MySQL - interpreter: we must calculate it on the fly: */ - - if (node->is_delete - || row_upd_changes_some_index_ord_field_binary( - node->table, node->update)) { - node->cmpl_info = 0; - } else { - node->cmpl_info = UPD_NODE_NO_ORD_CHANGE; - } - } - - if (node->state == UPD_NODE_UPDATE_CLUSTERED - || node->state == UPD_NODE_INSERT_CLUSTERED) { - - err = row_upd_clust_step(node, thr); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - } - - if (!node->is_delete && (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - - goto function_exit; - } - - while (node->index != NULL) { - err = row_upd_sec_step(node, thr); - - if (err != DB_SUCCESS) { - - goto function_exit; - } - - node->index = dict_table_get_next_index(node->index); - } - -function_exit: - if (err == DB_SUCCESS) { - /* Do some cleanup */ - - if (node->row != NULL) { - node->row = NULL; - node->n_ext_vec = 0; - mem_heap_empty(node->heap); - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - } - - return(err); -} - -/*************************************************************** -Updates a row in a table. This is a high-level function used in SQL execution -graphs. */ - -que_thr_t* -row_upd_step( -/*=========*/ - /* out: query thread to run next or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - upd_node_t* node; - sel_node_t* sel_node; - que_node_t* parent; - ulint err = DB_SUCCESS; - trx_t* trx; - - ut_ad(thr); - - trx = thr_get_trx(thr); - - trx_start_if_not_started(trx); - - node = thr->run_node; - - sel_node = node->select; - - parent = que_node_get_parent(node); - - ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); - - if (thr->prev_node == parent) { - node->state = UPD_NODE_SET_IX_LOCK; - } - - if (node->state == UPD_NODE_SET_IX_LOCK) { - - if (!node->has_clust_rec_x_lock) { - /* It may be that the current session has not yet - started its transaction, or it has been committed: */ - - err = lock_table(0, node->table, LOCK_IX, thr); - - if (err != DB_SUCCESS) { - - goto error_handling; - } - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - if (node->searched_update) { - /* Reset the cursor */ - sel_node->state = SEL_NODE_OPEN; - - /* Fetch a row to update */ - - thr->run_node = sel_node; - - return(thr); - } - } - - /* sel_node is NULL if we are in the MySQL interface */ - - if (sel_node && (sel_node->state != SEL_NODE_FETCH)) { - - if (!node->searched_update) { - /* An explicit cursor should be positioned on a row - to update */ - - ut_error; - - err = DB_ERROR; - - goto error_handling; - } - - ut_ad(sel_node->state == SEL_NODE_NO_MORE_ROWS); - - /* No more rows to update, or the select node performed the - updates directly in-place */ - - thr->run_node = parent; - - return(thr); - } - - /* DO THE CHECKS OF THE CONSISTENCY CONSTRAINTS HERE */ - - err = row_upd(node, thr); - -error_handling: - trx->error_state = err; - - if (err != DB_SUCCESS) { - return(NULL); - } - - /* DO THE TRIGGER ACTIONS HERE */ - - if (node->searched_update) { - /* Fetch next row to update */ - - thr->run_node = sel_node; - } else { - /* It was an explicit cursor update */ - - thr->run_node = parent; - } - - node->state = UPD_NODE_UPDATE_CLUSTERED; - - return(thr); -} - -/************************************************************************* -Performs an in-place update for the current clustered index record in -select. */ - -void -row_upd_in_place_in_select( -/*=======================*/ - sel_node_t* sel_node, /* in: select node */ - que_thr_t* thr, /* in: query thread */ - mtr_t* mtr) /* in: mtr */ -{ - upd_node_t* node; - btr_pcur_t* pcur; - btr_cur_t* btr_cur; - ulint err; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_ad(sel_node->select_will_do_update); - ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF); - ut_ad(sel_node->asc); - - node = que_node_get_parent(sel_node); - - ut_ad(que_node_get_type(node) == QUE_NODE_UPDATE); - - pcur = node->pcur; - btr_cur = btr_pcur_get_btr_cur(pcur); - - /* Copy the necessary columns from clust_rec and calculate the new - values to set */ - - row_upd_copy_columns(btr_pcur_get_rec(pcur), - rec_get_offsets(btr_pcur_get_rec(pcur), - btr_cur->index, offsets_, - ULINT_UNDEFINED, &heap), - UT_LIST_GET_FIRST(node->columns)); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - row_upd_eval_new_vals(node->update); - - ut_ad(!rec_get_deleted_flag( - btr_pcur_get_rec(pcur), - dict_table_is_comp(btr_cur->index->table))); - - ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE); - ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); - ut_ad(node->select_will_do_update); - - err = btr_cur_update_in_place(BTR_NO_LOCKING_FLAG, btr_cur, - node->update, node->cmpl_info, - thr, mtr); - ut_ad(err == DB_SUCCESS); -} diff --git a/storage/innobase/row/row0vers.c b/storage/innobase/row/row0vers.c deleted file mode 100644 index 03d9a2f1203..00000000000 --- a/storage/innobase/row/row0vers.c +++ /dev/null @@ -1,665 +0,0 @@ -/****************************************************** -Row versions - -(c) 1997 Innobase Oy - -Created 2/6/1997 Heikki Tuuri -*******************************************************/ - -#include "row0vers.h" - -#ifdef UNIV_NONINL -#include "row0vers.ic" -#endif - -#include "dict0dict.h" -#include "dict0boot.h" -#include "btr0btr.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "trx0undo.h" -#include "trx0purge.h" -#include "trx0rec.h" -#include "que0que.h" -#include "row0row.h" -#include "row0upd.h" -#include "rem0cmp.h" -#include "read0read.h" -#include "lock0lock.h" - -/********************************************************************* -Finds out if an active transaction has inserted or modified a secondary -index record. NOTE: the kernel mutex is temporarily released in this -function! */ - -trx_t* -row_vers_impl_x_locked_off_kernel( -/*==============================*/ - /* out: NULL if committed, else the active - transaction; NOTE that the kernel mutex is - temporarily released! */ - rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index, /* in: the secondary index */ - const ulint* offsets)/* in: rec_get_offsets(rec, index) */ -{ - dict_index_t* clust_index; - rec_t* clust_rec; - ulint* clust_offsets; - rec_t* version; - rec_t* prev_version; - dulint trx_id; - dulint prev_trx_id; - mem_heap_t* heap; - mem_heap_t* heap2; - dtuple_t* row; - dtuple_t* entry = NULL; /* assignment to eliminate compiler - warning */ - trx_t* trx; - ulint vers_del; - ulint rec_del; - ulint err; - mtr_t mtr; - ulint comp; - - ut_ad(mutex_own(&kernel_mutex)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - mutex_exit(&kernel_mutex); - - mtr_start(&mtr); - - /* Search for the clustered index record: this is a time-consuming - operation: therefore we release the kernel mutex; also, the release - is required by the latching order convention. The latch on the - clustered index locks the top of the stack of versions. We also - reserve purge_latch to lock the bottom of the version stack. */ - - clust_rec = row_get_clust_rec(BTR_SEARCH_LEAF, rec, index, - &clust_index, &mtr); - if (!clust_rec) { - /* In a rare case it is possible that no clust rec is found - for a secondary index record: if in row0umod.c - row_undo_mod_remove_clust_low() we have already removed the - clust rec, while purge is still cleaning and removing - secondary index records associated with earlier versions of - the clustered index record. In that case there cannot be - any implicit lock on the secondary index record, because - an active transaction which has modified the secondary index - record has also modified the clustered index record. And in - a rollback we always undo the modifications to secondary index - records before the clustered index record. */ - - mutex_enter(&kernel_mutex); - mtr_commit(&mtr); - - return(NULL); - } - - heap = mem_heap_create(1024); - clust_offsets = rec_get_offsets(clust_rec, clust_index, NULL, - ULINT_UNDEFINED, &heap); - trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); - - mtr_s_lock(&(purge_sys->latch), &mtr); - - mutex_enter(&kernel_mutex); - - trx = NULL; - if (!trx_is_active(trx_id)) { - /* The transaction that modified or inserted clust_rec is no - longer active: no implicit lock on rec */ - goto exit_func; - } - - if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, - clust_offsets, TRUE)) { - /* Corruption noticed: try to avoid a crash by returning */ - goto exit_func; - } - - comp = page_rec_is_comp(rec); - ut_ad(index->table == clust_index->table); - ut_ad(!!comp == dict_table_is_comp(index->table)); - ut_ad(!comp == !page_rec_is_comp(clust_rec)); - - /* We look up if some earlier version, which was modified by the trx_id - transaction, of the clustered index record would require rec to be in - a different state (delete marked or unmarked, or have different field - values, or not existing). If there is such a version, then rec was - modified by the trx_id transaction, and it has an implicit x-lock on - rec. Note that if clust_rec itself would require rec to be in a - different state, then the trx_id transaction has not yet had time to - modify rec, and does not necessarily have an implicit x-lock on rec. */ - - rec_del = rec_get_deleted_flag(rec, comp); - trx = NULL; - - version = clust_rec; - - for (;;) { - mutex_exit(&kernel_mutex); - - /* While we retrieve an earlier version of clust_rec, we - release the kernel mutex, because it may take time to access - the disk. After the release, we have to check if the trx_id - transaction is still active. We keep the semaphore in mtr on - the clust_rec page, so that no other transaction can update - it and get an implicit x-lock on rec. */ - - heap2 = heap; - heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(clust_rec, &mtr, version, - clust_index, clust_offsets, - heap, &prev_version); - mem_heap_free(heap2); /* free version and clust_offsets */ - - if (prev_version) { - clust_offsets = rec_get_offsets( - prev_version, clust_index, NULL, - ULINT_UNDEFINED, &heap); - row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, clust_offsets, heap); - entry = row_build_index_entry(row, index, heap); - } - - mutex_enter(&kernel_mutex); - - if (!trx_is_active(trx_id)) { - /* Transaction no longer active: no implicit x-lock */ - - break; - } - - /* If the transaction is still active, the previous version - of clust_rec must be accessible if not a fresh insert; we - may assert the following: */ - - ut_ad(err == DB_SUCCESS); - - if (prev_version == NULL) { - /* It was a freshly inserted version: there is an - implicit x-lock on rec */ - - trx = trx_get_on_id(trx_id); - - break; - } - - /* If we get here, we know that the trx_id transaction is - still active and it has modified prev_version. Let us check - if prev_version would require rec to be in a different - state. */ - - vers_del = rec_get_deleted_flag(prev_version, comp); - - /* We check if entry and rec are identified in the alphabetical - ordering */ - if (0 == cmp_dtuple_rec(entry, rec, offsets)) { - /* The delete marks of rec and prev_version should be - equal for rec to be in the state required by - prev_version */ - - if (rec_del != vers_del) { - trx = trx_get_on_id(trx_id); - - break; - } - - /* It is possible that the row was updated so that the - secondary index record remained the same in - alphabetical ordering, but the field values changed - still. For example, 'abc' -> 'ABC'. Check also that. */ - - dtuple_set_types_binary(entry, - dtuple_get_n_fields(entry)); - if (0 != cmp_dtuple_rec(entry, rec, offsets)) { - - trx = trx_get_on_id(trx_id); - - break; - } - } else if (!rec_del) { - /* The delete mark should be set in rec for it to be - in the state required by prev_version */ - - trx = trx_get_on_id(trx_id); - - break; - } - - prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, - clust_offsets); - - if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) { - /* The versions modified by the trx_id transaction end - to prev_version: no implicit x-lock */ - - break; - } - - version = prev_version; - }/* for (;;) */ - -exit_func: - mtr_commit(&mtr); - mem_heap_free(heap); - - return(trx); -} - -/********************************************************************* -Finds out if we must preserve a delete marked earlier version of a clustered -index record, because it is >= the purge view. */ - -ibool -row_vers_must_preserve_del_marked( -/*==============================*/ - /* out: TRUE if earlier version should be preserved */ - dulint trx_id, /* in: transaction id in the version */ - mtr_t* mtr) /* in: mtr holding the latch on the clustered index - record; it will also hold the latch on purge_view */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - mtr_s_lock(&(purge_sys->latch), mtr); - - if (trx_purge_update_undo_must_exist(trx_id)) { - - /* A purge operation is not yet allowed to remove this - delete marked record */ - - return(TRUE); - } - - return(FALSE); -} - -/********************************************************************* -Finds out if a version of the record, where the version >= the current -purge view, should have ientry as its secondary index entry. We check -if there is any not delete marked version of the record where the trx -id >= purge view, and the secondary index entry and ientry are identified in -the alphabetical ordering; exactly in this case we return TRUE. */ - -ibool -row_vers_old_has_index_entry( -/*=========================*/ - /* out: TRUE if earlier version should have */ - ibool also_curr,/* in: TRUE if also rec is included in the - versions to search; otherwise only versions - prior to it are searched */ - rec_t* rec, /* in: record in the clustered index; the - caller must have a latch on the page */ - mtr_t* mtr, /* in: mtr holding the latch on rec; it will - also hold the latch on purge_view */ - dict_index_t* index, /* in: the secondary index */ - dtuple_t* ientry) /* in: the secondary index entry */ -{ - rec_t* version; - rec_t* prev_version; - dict_index_t* clust_index; - ulint* clust_offsets; - mem_heap_t* heap; - mem_heap_t* heap2; - dtuple_t* row; - dtuple_t* entry; - ulint err; - ulint comp; - - ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains(mtr, buf_block_align(rec), - MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - mtr_s_lock(&(purge_sys->latch), mtr); - - clust_index = dict_table_get_first_index(index->table); - - comp = page_rec_is_comp(rec); - ut_ad(!dict_table_is_comp(index->table) == !comp); - heap = mem_heap_create(1024); - clust_offsets = rec_get_offsets(rec, clust_index, NULL, - ULINT_UNDEFINED, &heap); - - if (also_curr && !rec_get_deleted_flag(rec, comp)) { - row = row_build(ROW_COPY_POINTERS, clust_index, - rec, clust_offsets, heap); - entry = row_build_index_entry(row, index, heap); - - /* NOTE that we cannot do the comparison as binary - fields because the row is maybe being modified so that - the clustered index record has already been updated - to a different binary value in a char field, but the - collation identifies the old and new value anyway! */ - - if (dtuple_datas_are_ordering_equal(ientry, entry)) { - - mem_heap_free(heap); - - return(TRUE); - } - } - - version = rec; - - for (;;) { - heap2 = heap; - heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(rec, mtr, version, - clust_index, clust_offsets, - heap, &prev_version); - mem_heap_free(heap2); /* free version and clust_offsets */ - - if (err != DB_SUCCESS || !prev_version) { - /* Versions end here */ - - mem_heap_free(heap); - - return(FALSE); - } - - clust_offsets = rec_get_offsets(prev_version, clust_index, - NULL, ULINT_UNDEFINED, &heap); - - if (!rec_get_deleted_flag(prev_version, comp)) { - row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, clust_offsets, heap); - entry = row_build_index_entry(row, index, heap); - - /* NOTE that we cannot do the comparison as binary - fields because maybe the secondary index record has - already been updated to a different binary value in - a char field, but the collation identifies the old - and new value anyway! */ - - if (dtuple_datas_are_ordering_equal(ientry, entry)) { - - mem_heap_free(heap); - - return(TRUE); - } - } - - version = prev_version; - } -} - -/********************************************************************* -Constructs the version of a clustered index record which a consistent -read should see. We assume that the trx id stored in rec is such that -the consistent read should not see rec in its present version. */ - -ulint -row_vers_build_for_consistent_read( -/*===============================*/ - /* out: DB_SUCCESS or DB_MISSING_HISTORY */ - rec_t* rec, /* in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /* in: mtr holding the latch on rec */ - dict_index_t* index, /* in: the clustered index */ - ulint** offsets,/* in/out: offsets returned by - rec_get_offsets(rec, index) */ - read_view_t* view, /* in: the consistent read view */ - mem_heap_t** offset_heap,/* in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/* in: memory heap from which the memory for - old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers)/* out, own: old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ -{ - rec_t* version; - rec_t* prev_version; - dulint trx_id; - mem_heap_t* heap = NULL; - byte* buf; - ulint err; - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains(mtr, buf_block_align(rec), - MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(rec_offs_validate(rec, index, *offsets)); - - trx_id = row_get_rec_trx_id(rec, index, *offsets); - - ut_ad(!read_view_sees_trx_id(view, trx_id)); - - rw_lock_s_lock(&(purge_sys->latch)); - version = rec; - - for (;;) { - mem_heap_t* heap2 = heap; - trx_undo_rec_t* undo_rec; - dulint roll_ptr; - dulint undo_no; - heap = mem_heap_create(1024); - - /* If we have high-granularity consistent read view and - creating transaction of the view is the same as trx_id in - the record we see this record only in the case when - undo_no of the record is < undo_no in the view. */ - - if (view->type == VIEW_HIGH_GRANULARITY - && ut_dulint_cmp(view->creator_trx_id, trx_id) == 0) { - - roll_ptr = row_get_rec_roll_ptr(version, index, - *offsets); - undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); - undo_no = trx_undo_rec_get_undo_no(undo_rec); - mem_heap_empty(heap); - - if (ut_dulint_cmp(view->undo_no, undo_no) > 0) { - /* The view already sees this version: we can - copy it to in_heap and return */ - - buf = mem_heap_alloc(in_heap, - rec_offs_size(*offsets)); - *old_vers = rec_copy(buf, version, *offsets); - rec_offs_make_valid(*old_vers, index, - *offsets); - err = DB_SUCCESS; - - break; - } - } - - err = trx_undo_prev_version_build(rec, mtr, version, index, - *offsets, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* free version */ - } - - if (err != DB_SUCCESS) { - break; - } - - if (prev_version == NULL) { - /* It was a freshly inserted version */ - *old_vers = NULL; - err = DB_SUCCESS; - - break; - } - - *offsets = rec_get_offsets(prev_version, index, *offsets, - ULINT_UNDEFINED, offset_heap); - - trx_id = row_get_rec_trx_id(prev_version, index, *offsets); - - if (read_view_sees_trx_id(view, trx_id)) { - - /* The view already sees this version: we can copy - it to in_heap and return */ - - buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets)); - *old_vers = rec_copy(buf, prev_version, *offsets); - rec_offs_make_valid(*old_vers, index, *offsets); - err = DB_SUCCESS; - - break; - } - - version = prev_version; - }/* for (;;) */ - - mem_heap_free(heap); - rw_lock_s_unlock(&(purge_sys->latch)); - - return(err); -} - -/********************************************************************* -Constructs the last committed version of a clustered index record, -which should be seen by a semi-consistent read. */ - -ulint -row_vers_build_for_semi_consistent_read( -/*====================================*/ - /* out: DB_SUCCESS or DB_MISSING_HISTORY */ - rec_t* rec, /* in: record in a clustered index; the - caller must have a latch on the page; this - latch locks the top of the stack of versions - of this records */ - mtr_t* mtr, /* in: mtr holding the latch on rec */ - dict_index_t* index, /* in: the clustered index */ - ulint** offsets,/* in/out: offsets returned by - rec_get_offsets(rec, index) */ - mem_heap_t** offset_heap,/* in/out: memory heap from which - the offsets are allocated */ - mem_heap_t* in_heap,/* in: memory heap from which the memory for - old_vers is allocated; memory for possible - intermediate versions is allocated and freed - locally within the function */ - rec_t** old_vers)/* out, own: rec, old version, or NULL if the - record does not exist in the view, that is, - it was freshly inserted afterwards */ -{ - rec_t* version; - mem_heap_t* heap = NULL; - byte* buf; - ulint err; - dulint rec_trx_id = ut_dulint_create(0, 0); - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) - || mtr_memo_contains(mtr, buf_block_align(rec), - MTR_MEMO_PAGE_S_FIX)); -#ifdef UNIV_SYNC_DEBUG - ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - ut_ad(rec_offs_validate(rec, index, *offsets)); - - rw_lock_s_lock(&(purge_sys->latch)); - /* The S-latch on purge_sys prevents the purge view from - changing. Thus, if we have an uncommitted transaction at - this point, then purge cannot remove its undo log even if - the transaction could commit now. */ - - version = rec; - - for (;;) { - trx_t* version_trx; - mem_heap_t* heap2; - rec_t* prev_version; - dulint version_trx_id; - - version_trx_id = row_get_rec_trx_id(version, index, *offsets); - if (rec == version) { - rec_trx_id = version_trx_id; - } - - mutex_enter(&kernel_mutex); - version_trx = trx_get_on_id(version_trx_id); - mutex_exit(&kernel_mutex); - - if (!version_trx - || version_trx->conc_state == TRX_NOT_STARTED - || version_trx->conc_state == TRX_COMMITTED_IN_MEMORY) { - - /* We found a version that belongs to a - committed transaction: return it. */ - - if (rec == version) { - *old_vers = rec; - err = DB_SUCCESS; - break; - } - - /* We assume that a rolled-back transaction stays in - TRX_ACTIVE state until all the changes have been - rolled back and the transaction is removed from - the global list of transactions. */ - - if (!ut_dulint_cmp(rec_trx_id, version_trx_id)) { - /* The transaction was committed while - we searched for earlier versions. - Return the current version as a - semi-consistent read. */ - - version = rec; - *offsets = rec_get_offsets(version, - index, *offsets, - ULINT_UNDEFINED, - offset_heap); - } - - buf = mem_heap_alloc(in_heap, rec_offs_size(*offsets)); - *old_vers = rec_copy(buf, version, *offsets); - rec_offs_make_valid(*old_vers, index, *offsets); - err = DB_SUCCESS; - - break; - } - - heap2 = heap; - heap = mem_heap_create(1024); - - err = trx_undo_prev_version_build(rec, mtr, version, index, - *offsets, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* free version */ - } - - if (UNIV_UNLIKELY(err != DB_SUCCESS)) { - break; - } - - if (prev_version == NULL) { - /* It was a freshly inserted version */ - *old_vers = NULL; - err = DB_SUCCESS; - - break; - } - - version = prev_version; - *offsets = rec_get_offsets(version, index, *offsets, - ULINT_UNDEFINED, offset_heap); - }/* for (;;) */ - - if (heap) { - mem_heap_free(heap); - } - rw_lock_s_unlock(&(purge_sys->latch)); - - return(err); -} diff --git a/storage/innobase/srv/srv0que.c b/storage/innobase/srv/srv0que.c deleted file mode 100644 index e2b4e217980..00000000000 --- a/storage/innobase/srv/srv0que.c +++ /dev/null @@ -1,110 +0,0 @@ -/****************************************************** -Server query execution - -(c) 1996 Innobase Oy - -Created 6/5/1996 Heikki Tuuri -*******************************************************/ - -#include "srv0que.h" - -#include "srv0srv.h" -#include "sync0sync.h" -#include "os0thread.h" -#include "usr0sess.h" -#include "que0que.h" - -/************************************************************************** -Checks if there is work to do in the server task queue. If there is, the -thread starts processing a task. Before leaving, it again checks the task -queue and picks a new task if any exists. This is called by a SRV_WORKER -thread. */ - -void -srv_que_task_queue_check(void) -/*==========================*/ -{ - que_thr_t* thr; - - for (;;) { - mutex_enter(&kernel_mutex); - - thr = UT_LIST_GET_FIRST(srv_sys->tasks); - - if (thr == NULL) { - mutex_exit(&kernel_mutex); - - return; - } - - UT_LIST_REMOVE(queue, srv_sys->tasks, thr); - - mutex_exit(&kernel_mutex); - - que_run_threads(thr); - } -} - -/************************************************************************** -Performs round-robin on the server tasks. This is called by a SRV_WORKER -thread every second or so. */ - -que_thr_t* -srv_que_round_robin( -/*================*/ - /* out: the new (may be == thr) query thread - to run */ - que_thr_t* thr) /* in: query thread */ -{ - que_thr_t* new_thr; - - ut_ad(thr); - ut_ad(thr->state == QUE_THR_RUNNING); - - mutex_enter(&kernel_mutex); - - UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr); - - new_thr = UT_LIST_GET_FIRST(srv_sys->tasks); - - mutex_exit(&kernel_mutex); - - return(new_thr); -} - -/************************************************************************** -Enqueues a task to server task queue and releases a worker thread, if there -is a suspended one. */ - -void -srv_que_task_enqueue_low( -/*=====================*/ - que_thr_t* thr) /* in: query thread */ -{ - ut_ad(thr); - ut_ad(mutex_own(&kernel_mutex)); - - UT_LIST_ADD_LAST(queue, srv_sys->tasks, thr); - - srv_release_threads(SRV_WORKER, 1); -} - -/************************************************************************** -Enqueues a task to server task queue and releases a worker thread, if there -is a suspended one. */ - -void -srv_que_task_enqueue( -/*=================*/ - que_thr_t* thr) /* in: query thread */ -{ - ut_ad(thr); - - ut_a(0); /* Under MySQL this is never called */ - - mutex_enter(&kernel_mutex); - - srv_que_task_enqueue_low(thr); - - mutex_exit(&kernel_mutex); -} diff --git a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c deleted file mode 100644 index 36c3d450aae..00000000000 --- a/storage/innobase/srv/srv0srv.c +++ /dev/null @@ -1,2885 +0,0 @@ -/****************************************************** -The database server main program - -NOTE: SQL Server 7 uses something which the documentation -calls user mode scheduled threads (UMS threads). One such -thread is usually allocated per processor. Win32 -documentation does not know any UMS threads, which suggests -that the concept is internal to SQL Server 7. It may mean that -SQL Server 7 does all the scheduling of threads itself, even -in i/o waits. We should maybe modify InnoDB to use the same -technique, because thread switches within NT may be too slow. - -SQL Server 7 also mentions fibers, which are cooperatively -scheduled threads. They can boost performance by 5 %, -according to the Delaney and Soukup's book. - -Windows 2000 will have something called thread pooling -(see msdn website), which we could possibly use. - -Another possibility could be to use some very fast user space -thread library. This might confuse NT though. - -(c) 1995 Innobase Oy - -Created 10/8/1995 Heikki Tuuri -*******************************************************/ -/* Dummy comment */ -#include "srv0srv.h" - -#include "ut0mem.h" -#include "os0proc.h" -#include "mem0mem.h" -#include "mem0pool.h" -#include "sync0sync.h" -#include "thr0loc.h" -#include "que0que.h" -#include "srv0que.h" -#include "log0recv.h" -#include "pars0pars.h" -#include "usr0sess.h" -#include "lock0lock.h" -#include "trx0purge.h" -#include "ibuf0ibuf.h" -#include "buf0flu.h" -#include "btr0sea.h" -#include "dict0load.h" -#include "dict0boot.h" -#include "srv0start.h" -#include "row0mysql.h" -#include "ha_prototypes.h" - -/* This is set to TRUE if the MySQL user has set it in MySQL; currently -affects only FOREIGN KEY definition parsing */ -ibool srv_lower_case_table_names = FALSE; - -/* The following counter is incremented whenever there is some user activity -in the server */ -ulint srv_activity_count = 0; - -/* The following is the maximum allowed duration of a lock wait. */ -ulint srv_fatal_semaphore_wait_threshold = 600; - -/* How much data manipulation language (DML) statements need to be delayed, -in microseconds, in order to reduce the lagging of the purge thread. */ -ulint srv_dml_needed_delay = 0; - -ibool srv_lock_timeout_and_monitor_active = FALSE; -ibool srv_error_monitor_active = FALSE; - -const char* srv_main_thread_op_info = ""; - -/* Prefix used by MySQL to indicate pre-5.1 table name encoding */ -const char srv_mysql50_table_name_prefix[9] = "#mysql50#"; - -/* Server parameters which are read from the initfile */ - -/* The following three are dir paths which are catenated before file -names, where the file name itself may also contain a path */ - -char* srv_data_home = NULL; -#ifdef UNIV_LOG_ARCHIVE -char* srv_arch_dir = NULL; -#endif /* UNIV_LOG_ARCHIVE */ - -ibool srv_file_per_table = FALSE; /* store to its own file each table - created by an user; data dictionary - tables are in the system tablespace - 0 */ -ibool srv_locks_unsafe_for_binlog = FALSE; /* Place locks to - records only i.e. do - not use next-key - locking except on - duplicate key checking - and foreign key - checking */ -ulint srv_n_data_files = 0; -char** srv_data_file_names = NULL; -ulint* srv_data_file_sizes = NULL; /* size in database pages */ - -ibool srv_auto_extend_last_data_file = FALSE; /* if TRUE, then we - auto-extend the last data - file */ -ulint srv_last_file_size_max = 0; /* if != 0, this tells - the max size auto-extending - may increase the last data - file size */ -ulong srv_auto_extend_increment = 8; /* If the last data file is - auto-extended, we add this - many pages to it at a time */ -ulint* srv_data_file_is_raw_partition = NULL; - -/* If the following is TRUE we do not allow inserts etc. This protects -the user from forgetting the 'newraw' keyword to my.cnf */ - -ibool srv_created_new_raw = FALSE; - -char** srv_log_group_home_dirs = NULL; - -ulint srv_n_log_groups = ULINT_MAX; -ulint srv_n_log_files = ULINT_MAX; -ulint srv_log_file_size = ULINT_MAX; /* size in database pages */ -ulint srv_log_buffer_size = ULINT_MAX; /* size in database pages */ -ulong srv_flush_log_at_trx_commit = 1; - -byte srv_latin1_ordering[256] /* The sort order table of the latin1 - character set. The following table is - the MySQL order as of Feb 10th, 2002 */ -= { - 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07 -, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F -, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17 -, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F -, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27 -, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F -, 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37 -, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F -, 0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 -, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F -, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 -, 0x58, 0x59, 0x5A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F -, 0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47 -, 0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F -, 0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57 -, 0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F -, 0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87 -, 0x88, 0x89, 0x8A, 0x8B, 0x8C, 0x8D, 0x8E, 0x8F -, 0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97 -, 0x98, 0x99, 0x9A, 0x9B, 0x9C, 0x9D, 0x9E, 0x9F -, 0xA0, 0xA1, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7 -, 0xA8, 0xA9, 0xAA, 0xAB, 0xAC, 0xAD, 0xAE, 0xAF -, 0xB0, 0xB1, 0xB2, 0xB3, 0xB4, 0xB5, 0xB6, 0xB7 -, 0xB8, 0xB9, 0xBA, 0xBB, 0xBC, 0xBD, 0xBE, 0xBF -, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 -, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 -, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xD7 -, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xDF -, 0x41, 0x41, 0x41, 0x41, 0x5C, 0x5B, 0x5C, 0x43 -, 0x45, 0x45, 0x45, 0x45, 0x49, 0x49, 0x49, 0x49 -, 0x44, 0x4E, 0x4F, 0x4F, 0x4F, 0x4F, 0x5D, 0xF7 -, 0xD8, 0x55, 0x55, 0x55, 0x59, 0x59, 0xDE, 0xFF -}; - -ulint srv_pool_size = ULINT_MAX; /* size in pages; MySQL inits - this to size in kilobytes but - we normalize this to pages in - srv_boot() */ -ulint srv_awe_window_size = 0; /* size in pages; MySQL inits - this to bytes, but we - normalize it to pages in - srv_boot() */ -ulint srv_mem_pool_size = ULINT_MAX; /* size in bytes */ -ulint srv_lock_table_size = ULINT_MAX; - - -ulint srv_io_capacity = ULINT_MAX; /* Number of IO operations per - second the server can do */ - -ibool srv_extra_dirty_writes = TRUE; /* Write dirty pages to disk when pct - dirty < max dirty pct */ - -ulint srv_n_read_io_threads = ULINT_MAX; -ulint srv_n_write_io_threads = ULINT_MAX; -ulint srv_max_merged_io = 64; - -#ifdef UNIV_LOG_ARCHIVE -ibool srv_log_archive_on = FALSE; -ibool srv_archive_recovery = 0; -dulint srv_archive_recovery_limit_lsn; -#endif /* UNIV_LOG_ARCHIVE */ - -ulint srv_lock_wait_timeout = 1024 * 1024 * 1024; - -/* This parameter is used to throttle the number of insert buffers that are -merged in a batch. By increasing this parameter on a faster disk you can -possibly reduce the number of I/O operations performed to complete the -merge operation. The value of this parameter is used as is by the -background loop when the system is idle (low load), on a busy system -the parameter is scaled down by a factor of 4, this is to avoid putting -a heavier load on the I/O sub system. */ - -ulong srv_insert_buffer_batch_size = 20; - -char* srv_file_flush_method_str = NULL; -ulint srv_unix_file_flush_method = SRV_UNIX_FSYNC; -ulint srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - -ulint srv_max_n_open_files = 300; - -/* The InnoDB main thread tries to keep the ratio of modified pages -in the buffer pool to all database pages in the buffer pool smaller than -the following number. But it is not guaranteed that the value stays below -that during a time of heavy update/insert activity. */ - -ulong srv_max_buf_pool_modified_pct = 90; - -/* variable counts amount of data read in total (in bytes) */ -ulint srv_data_read = 0; - -/* here we count the amount of data written in total (in bytes) */ -ulint srv_data_written = 0; - -/* the number of the log write requests done */ -ulint srv_log_write_requests = 0; - -/* the number of physical writes to the log performed */ -ulint srv_log_writes = 0; - -/* amount of data written to the log files in bytes */ -ulint srv_os_log_written = 0; - -/* amount of writes being done to the log files */ -ulint srv_os_log_pending_writes = 0; - -/* we increase this counter, when there we don't have enough space in the -log buffer and have to flush it */ -ulint srv_log_waits = 0; - -/* this variable counts the amount of times, when the doublewrite buffer -was flushed */ -ulint srv_dblwr_writes = 0; - -/* here we store the number of pages that have been flushed to the -doublewrite buffer */ -ulint srv_dblwr_pages_written = 0; - -/* in this variable we store the number of write requests issued */ -ulint srv_buf_pool_write_requests = 0; - -/* here we store the number of times when we had to wait for a free page -in the buffer pool. It happens when the buffer pool is full and we need -to make a flush, in order to be able to read or create a page. */ -ulint srv_buf_pool_wait_free = 0; - -/* variable to count the number of pages that were written from buffer -pool to the disk */ -ulint srv_buf_pool_flushed = 0; - -/* variable to count the number of buffer pool reads that led to the -reading of a disk page */ -ulint srv_buf_pool_reads = 0; - -/* variable to count the number of sequential read-aheads */ -ulint srv_read_ahead_seq = 0; - -/* variable to count the number of random read-aheads */ -ulint srv_read_ahead_rnd = 0; - -/* An option to enable the fix for "Bug#43660 SHOW INDEXES/ANALYZE does -NOT update cardinality for indexes of InnoDB table". By default we are -running with the fix disabled because MySQL 5.1 is frozen for such -behavioral changes. */ -char srv_use_legacy_cardinality_algorithm = TRUE; - -/* structure to pass status variables to MySQL */ -export_struc export_vars; - -/* If the following is != 0 we do not allow inserts etc. This protects -the user from forgetting the innodb_force_recovery keyword to my.cnf */ - -ulint srv_force_recovery = 0; -/*-----------------------*/ -/* We are prepared for a situation that we have this many threads waiting for -a semaphore inside InnoDB. innobase_start_or_create_for_mysql() sets the -value. */ - -ulint srv_max_n_threads = 0; - -/* The following controls how many threads we let inside InnoDB concurrently: -threads waiting for locks are not counted into the number because otherwise -we could get a deadlock. MySQL creates a thread for each user session, and -semaphore contention and convoy problems can occur withput this restriction. -Value 10 should be good if there are less than 4 processors + 4 disks in the -computer. Bigger computers need bigger values. Value 0 will disable the -concurrency check. */ - -ibool srv_thread_concurrency_timer_based = TRUE; -ulong srv_thread_concurrency = 0; -ulong srv_commit_concurrency = 0; - -os_fast_mutex_t srv_conc_mutex; /* this mutex protects srv_conc data - structures */ -lint srv_conc_n_threads = 0; /* number of transactions that - have declared_to_be_inside_innodb - set. It used to be a non-error - for this value to drop below - zero temporarily. This is no - longer true. We'll, however, - keep the lint datatype to add - assertions to catch any corner - cases that we may have - missed. */ -ulint srv_conc_n_waiting_threads = 0; /* number of OS threads waiting in the - FIFO for a permission to enter InnoDB - */ - -typedef struct srv_conc_slot_struct srv_conc_slot_t; -struct srv_conc_slot_struct{ - os_event_t event; /* event to wait */ - ibool reserved; /* TRUE if slot - reserved */ - ibool wait_ended; /* TRUE when another - thread has already set - the event and the - thread in this slot is - free to proceed; but - reserved may still be - TRUE at that point */ - UT_LIST_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue node */ -}; - -UT_LIST_BASE_NODE_T(srv_conc_slot_t) srv_conc_queue; /* queue of threads - waiting to get in */ -srv_conc_slot_t* srv_conc_slots; /* array of wait - slots */ - -/* Number of times a thread is allowed to enter InnoDB within the same -SQL query after it has once got the ticket at srv_conc_enter_innodb */ -#define SRV_FREE_TICKETS_TO_ENTER srv_n_free_tickets_to_enter -#define SRV_THREAD_SLEEP_DELAY srv_thread_sleep_delay -/*-----------------------*/ -/* If the following is set to 1 then we do not run purge and insert buffer -merge to completion before shutdown. If it is set to 2, do not even flush the -buffer pool to data files at the shutdown: we effectively 'crash' -InnoDB (but lose no committed transactions). */ -ulint srv_fast_shutdown = 0; - -/* Generate a innodb_status.<pid> file */ -ibool srv_innodb_status = FALSE; - -ibool srv_use_doublewrite_buf = TRUE; -ibool srv_use_checksums = TRUE; - -ibool srv_set_thread_priorities = TRUE; -int srv_query_thread_priority = 0; - -/* TRUE if the Address Windowing Extensions of Windows are used; then we must -disable adaptive hash indexes */ -ibool srv_use_awe = FALSE; -ibool srv_use_adaptive_hash_indexes = TRUE; - -/*-------------------------------------------*/ -ulong srv_n_spin_wait_rounds = 30; -ulong srv_n_free_tickets_to_enter = 500; -ulong srv_thread_sleep_delay = 10000; -ulint srv_spin_wait_delay = 6; -ibool srv_priority_boost = TRUE; - -ibool srv_print_thread_releases = FALSE; -ibool srv_print_lock_waits = FALSE; -ibool srv_print_buf_io = FALSE; -ibool srv_print_log_io = FALSE; -ibool srv_print_latch_waits = FALSE; - -ulint srv_n_rows_inserted = 0; -ulint srv_n_rows_updated = 0; -ulint srv_n_rows_deleted = 0; -ulint srv_n_rows_read = 0; -#ifndef UNIV_HOTBACKUP -static ulint srv_n_rows_inserted_old = 0; -static ulint srv_n_rows_updated_old = 0; -static ulint srv_n_rows_deleted_old = 0; -static ulint srv_n_rows_read_old = 0; -#endif /* !UNIV_HOTBACKUP */ - -ulint srv_n_lock_wait_count = 0; -ulint srv_n_lock_wait_current_count = 0; -ib_longlong srv_n_lock_wait_time = 0; -ulint srv_n_lock_max_wait_time = 0; - - -/* - Set the following to 0 if you want InnoDB to write messages on - stderr on startup/shutdown -*/ -ibool srv_print_verbose_log = TRUE; -ibool srv_print_innodb_monitor = FALSE; -ibool srv_print_innodb_lock_monitor = FALSE; -ibool srv_print_innodb_tablespace_monitor = FALSE; -ibool srv_print_innodb_table_monitor = FALSE; - -/* Array of English strings describing the current state of an -i/o handler thread */ - -const char* srv_io_thread_op_info[SRV_MAX_N_IO_THREADS]; -const char* srv_io_thread_function[SRV_MAX_N_IO_THREADS]; - -time_t srv_last_monitor_time; - -mutex_t srv_innodb_monitor_mutex; - -/* Mutex for locking srv_monitor_file */ -mutex_t srv_monitor_file_mutex; -/* Temporary file for innodb monitor output */ -FILE* srv_monitor_file; -/* Mutex for locking srv_dict_tmpfile. -This mutex has a very high rank; threads reserving it should not -be holding any InnoDB latches. */ -mutex_t srv_dict_tmpfile_mutex; -/* Temporary file for output from the data dictionary */ -FILE* srv_dict_tmpfile; -/* Mutex for locking srv_misc_tmpfile. -This mutex has a very low rank; threads reserving it should not -acquire any further latches or sleep before releasing this one. */ -mutex_t srv_misc_tmpfile_mutex; -/* Temporary file for miscellanous diagnostic output */ -FILE* srv_misc_tmpfile; - -ulint srv_main_thread_process_no = 0; -ulint srv_main_thread_id = 0; - -/* The following count work done by srv_master_thread. */ - -/* Iterations by the 'once per second' loop */ -ulint srv_main_1_second_loops = 0; -/* Calls to sleep by the 'once per second' loop */ -ulint srv_main_sleeps = 0; -/* Iterations by the 'once per 10 seconds' loop */ -ulint srv_main_10_second_loops = 0; -/* Iterations of the loop bounded by the 'background_loop' label */ -ulint srv_main_background_loops = 0; -/* Iterations of the loop bounded by the 'flush_loop' label */ -ulint srv_main_flush_loops = 0; -/* Calls to log_buffer_flush_to_disk */ -ulint srv_sync_flush = 0; -/* Calls to log_buffer_flush_maybe_sync */ -ulint srv_async_flush = 0; - -/* Number of microseconds threads wait because of -innodb_thread_concurrency */ -static ib_longlong srv_thread_wait_mics = 0; - -/* Number of microseconds for spinlock delay */ -static ib_longlong srv_timed_spin_delay = 0; - -/* - IMPLEMENTATION OF THE SERVER MAIN PROGRAM - ========================================= - -There is the following analogue between this database -server and an operating system kernel: - -DB concept equivalent OS concept ----------- --------------------- -transaction -- process; - -query thread -- thread; - -lock -- semaphore; - -transaction set to -the rollback state -- kill signal delivered to a process; - -kernel -- kernel; - -query thread execution: -(a) without kernel mutex -reserved -- process executing in user mode; -(b) with kernel mutex reserved - -- process executing in kernel mode; - -The server is controlled by a master thread which runs at -a priority higher than normal, that is, higher than user threads. -It sleeps most of the time, and wakes up, say, every 300 milliseconds, -to check whether there is anything happening in the server which -requires intervention of the master thread. Such situations may be, -for example, when flushing of dirty blocks is needed in the buffer -pool or old version of database rows have to be cleaned away. - -The threads which we call user threads serve the queries of -the clients and input from the console of the server. -They run at normal priority. The server may have several -communications endpoints. A dedicated set of user threads waits -at each of these endpoints ready to receive a client request. -Each request is taken by a single user thread, which then starts -processing and, when the result is ready, sends it to the client -and returns to wait at the same endpoint the thread started from. - -So, we do not have dedicated communication threads listening at -the endpoints and dealing the jobs to dedicated worker threads. -Our architecture saves one thread swithch per request, compared -to the solution with dedicated communication threads -which amounts to 15 microseconds on 100 MHz Pentium -running NT. If the client -is communicating over a network, this saving is negligible, but -if the client resides in the same machine, maybe in an SMP machine -on a different processor from the server thread, the saving -can be important as the threads can communicate over shared -memory with an overhead of a few microseconds. - -We may later implement a dedicated communication thread solution -for those endpoints which communicate over a network. - -Our solution with user threads has two problems: for each endpoint -there has to be a number of listening threads. If there are many -communication endpoints, it may be difficult to set the right number -of concurrent threads in the system, as many of the threads -may always be waiting at less busy endpoints. Another problem -is queuing of the messages, as the server internally does not -offer any queue for jobs. - -Another group of user threads is intended for splitting the -queries and processing them in parallel. Let us call these -parallel communication threads. These threads are waiting for -parallelized tasks, suspended on event semaphores. - -A single user thread waits for input from the console, -like a command to shut the database. - -Utility threads are a different group of threads which takes -care of the buffer pool flushing and other, mainly background -operations, in the server. -Some of these utility threads always run at a lower than normal -priority, so that they are always in background. Some of them -may dynamically boost their priority by the pri_adjust function, -even to higher than normal priority, if their task becomes urgent. -The running of utilities is controlled by high- and low-water marks -of urgency. The urgency may be measured by the number of dirty blocks -in the buffer pool, in the case of the flush thread, for example. -When the high-water mark is exceeded, an utility starts running, until -the urgency drops under the low-water mark. Then the utility thread -suspend itself to wait for an event. The master thread is -responsible of signaling this event when the utility thread is -again needed. - -For each individual type of utility, some threads always remain -at lower than normal priority. This is because pri_adjust is implemented -so that the threads at normal or higher priority control their -share of running time by calling sleep. Thus, if the load of the -system sudenly drops, these threads cannot necessarily utilize -the system fully. The background priority threads make up for this, -starting to run when the load drops. - -When there is no activity in the system, also the master thread -suspends itself to wait for an event making -the server totally silent. The responsibility to signal this -event is on the user thread which again receives a message -from a client. - -There is still one complication in our server design. If a -background utility thread obtains a resource (e.g., mutex) needed by a user -thread, and there is also some other user activity in the system, -the user thread may have to wait indefinitely long for the -resource, as the OS does not schedule a background thread if -there is some other runnable user thread. This problem is called -priority inversion in real-time programming. - -One solution to the priority inversion problem would be to -keep record of which thread owns which resource and -in the above case boost the priority of the background thread -so that it will be scheduled and it can release the resource. -This solution is called priority inheritance in real-time programming. -A drawback of this solution is that the overhead of acquiring a mutex -increases slightly, maybe 0.2 microseconds on a 100 MHz Pentium, because -the thread has to call os_thread_get_curr_id. -This may be compared to 0.5 microsecond overhead for a mutex lock-unlock -pair. Note that the thread -cannot store the information in the resource, say mutex, itself, -because competing threads could wipe out the information if it is -stored before acquiring the mutex, and if it stored afterwards, -the information is outdated for the time of one machine instruction, -at least. (To be precise, the information could be stored to -lock_word in mutex if the machine supports atomic swap.) - -The above solution with priority inheritance may become actual in the -future, but at the moment we plan to implement a more coarse solution, -which could be called a global priority inheritance. If a thread -has to wait for a long time, say 300 milliseconds, for a resource, -we just guess that it may be waiting for a resource owned by a background -thread, and boost the the priority of all runnable background threads -to the normal level. The background threads then themselves adjust -their fixed priority back to background after releasing all resources -they had (or, at some fixed points in their program code). - -What is the performance of the global priority inheritance solution? -We may weigh the length of the wait time 300 milliseconds, during -which the system processes some other thread -to the cost of boosting the priority of each runnable background -thread, rescheduling it, and lowering the priority again. -On 100 MHz Pentium + NT this overhead may be of the order 100 -microseconds per thread. So, if the number of runnable background -threads is not very big, say < 100, the cost is tolerable. -Utility threads probably will access resources used by -user threads not very often, so collisions of user threads -to preempted utility threads should not happen very often. - -The thread table contains -information of the current status of each thread existing in the system, -and also the event semaphores used in suspending the master thread -and utility and parallel communication threads when they have nothing to do. -The thread table can be seen as an analogue to the process table -in a traditional Unix implementation. - -The thread table is also used in the global priority inheritance -scheme. This brings in one additional complication: threads accessing -the thread table must have at least normal fixed priority, -because the priority inheritance solution does not work if a background -thread is preempted while possessing the mutex protecting the thread table. -So, if a thread accesses the thread table, its priority has to be -boosted at least to normal. This priority requirement can be seen similar to -the privileged mode used when processing the kernel calls in traditional -Unix.*/ - -/* Thread slot in the thread table */ -struct srv_slot_struct{ - os_thread_id_t id; /* thread id */ - os_thread_t handle; /* thread handle */ - ulint type; /* thread type: user, utility etc. */ - ibool in_use; /* TRUE if this slot is in use */ - ibool suspended; /* TRUE if the thread is waiting - for the event of this slot */ - ib_time_t suspend_time; /* time when the thread was - suspended */ - os_event_t event; /* event used in suspending the - thread when it has nothing to do */ - que_thr_t* thr; /* suspended query thread (only - used for MySQL threads) */ -}; - -/* Table for MySQL threads where they will be suspended to wait for locks */ -srv_slot_t* srv_mysql_table = NULL; - -os_event_t srv_lock_timeout_thread_event; - -srv_sys_t* srv_sys = NULL; - -byte srv_pad1[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line */ -mutex_t* kernel_mutex_temp;/* mutex protecting the server, trx structs, - query threads, and lock table */ -byte srv_pad2[64]; /* padding to prevent other memory update - hotspots from residing on the same memory - cache line */ - -/* The following three values measure the urgency of the jobs of -buffer, version, and insert threads. They may vary from 0 - 1000. -The server mutex protects all these variables. The low-water values -tell that the server can acquiesce the utility when the value -drops below this low-water mark. */ - -ulint srv_meter[SRV_MASTER + 1]; -ulint srv_meter_low_water[SRV_MASTER + 1]; -ulint srv_meter_high_water[SRV_MASTER + 1]; -ulint srv_meter_high_water2[SRV_MASTER + 1]; -ulint srv_meter_foreground[SRV_MASTER + 1]; - -/* The following values give info about the activity going on in -the database. They are protected by the server mutex. The arrays -are indexed by the type of the thread. */ - -ulint srv_n_threads_active[SRV_MASTER + 1]; -ulint srv_n_threads[SRV_MASTER + 1]; - -static void time_spin_delay() -{ - ulint start_sec, end_sec; - ulint start_usec, end_usec; - int i; - - srv_timed_spin_delay = 0; - - if (ut_usectime(&start_sec, &start_usec)) - return; - - for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i) - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - - if (ut_usectime(&end_sec, &end_usec)) - return; - - srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec, - start_sec, start_usec); -} - -/************************************************************************* -Prints counters for work done by srv_master_thread. */ - -static -void -srv_print_extra( -/*===================*/ - FILE *file) /* in: output stream */ -{ - fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, " - "%lu 10_second, %lu background, %lu flush\n", - srv_main_1_second_loops, srv_main_sleeps, - srv_main_10_second_loops, srv_main_background_loops, - srv_main_flush_loops); - fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n", - srv_sync_flush, srv_async_flush); - fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n", - srv_thread_wait_mics, - (double) srv_thread_wait_mics / 1000000.0); - fprintf(file, - "spinlock delay for %d delay %d rounds is %lld mics\n", - (int)srv_spin_wait_delay, - (int)SYNC_SPIN_ROUNDS, - srv_timed_spin_delay); -} - -/************************************************************************* -Sets the info describing an i/o thread current state. */ - -void -srv_set_io_thread_op_info( -/*======================*/ - ulint i, /* in: the 'segment' of the i/o thread */ - const char* str) /* in: constant char string describing the - state */ -{ - ut_a(i < SRV_MAX_N_IO_THREADS); - - srv_io_thread_op_info[i] = str; -} - -/************************************************************************* -Accessor function to get pointer to n'th slot in the server thread -table. */ -static -srv_slot_t* -srv_table_get_nth_slot( -/*===================*/ - /* out: pointer to the slot */ - ulint index) /* in: index of the slot */ -{ - ut_a(index < OS_THREAD_MAX_N); - - return(srv_sys->threads + index); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Gets the number of threads in the system. */ - -ulint -srv_get_n_threads(void) -/*===================*/ -{ - ulint i; - ulint n_threads = 0; - - mutex_enter(&kernel_mutex); - - for (i = SRV_COM; i < SRV_MASTER + 1; i++) { - - n_threads += srv_n_threads[i]; - } - - mutex_exit(&kernel_mutex); - - return(n_threads); -} - -/************************************************************************* -Reserves a slot in the thread table for the current thread. Also creates the -thread local storage struct for the current thread. NOTE! The server mutex -has to be reserved by the caller! */ -static -ulint -srv_table_reserve_slot( -/*===================*/ - /* out: reserved slot index */ - ulint type) /* in: type of the thread: one of SRV_COM, ... */ -{ - srv_slot_t* slot; - ulint i; - - ut_a(type > 0); - ut_a(type <= SRV_MASTER); - - i = 0; - slot = srv_table_get_nth_slot(i); - - while (slot->in_use) { - i++; - slot = srv_table_get_nth_slot(i); - } - - ut_a(slot->in_use == FALSE); - - slot->in_use = TRUE; - slot->suspended = FALSE; - slot->id = os_thread_get_curr_id(); - slot->handle = os_thread_get_curr(); - slot->type = type; - - thr_local_create(); - - thr_local_set_slot_no(os_thread_get_curr_id(), i); - - return(i); -} - -/************************************************************************* -Suspends the calling thread to wait for the event in its thread slot. -NOTE! The server mutex has to be reserved by the caller! */ -static -os_event_t -srv_suspend_thread(void) -/*====================*/ - /* out: event for the calling thread to wait */ -{ - srv_slot_t* slot; - os_event_t event; - ulint slot_no; - ulint type; - - ut_ad(mutex_own(&kernel_mutex)); - - slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); - - if (srv_print_thread_releases) { - fprintf(stderr, - "Suspending thread %lu to slot %lu meter %lu\n", - (ulong) os_thread_get_curr_id(), (ulong) slot_no, - (ulong) srv_meter[SRV_RECOVERY]); - } - - slot = srv_table_get_nth_slot(slot_no); - - type = slot->type; - - ut_ad(type >= SRV_WORKER); - ut_ad(type <= SRV_MASTER); - - event = slot->event; - - slot->suspended = TRUE; - - ut_ad(srv_n_threads_active[type] > 0); - - srv_n_threads_active[type]--; - - os_event_reset(event); - - return(event); -} -#endif /* !UNIV_HOTBACKUP */ - -/************************************************************************* -Releases threads of the type given from suspension in the thread table. -NOTE! The server mutex has to be reserved by the caller! */ - -ulint -srv_release_threads( -/*================*/ - /* out: number of threads released: this may be - < n if not enough threads were suspended at the - moment */ - ulint type, /* in: thread type */ - ulint n) /* in: number of threads to release */ -{ - srv_slot_t* slot; - ulint i; - ulint count = 0; - - ut_ad(type >= SRV_WORKER); - ut_ad(type <= SRV_MASTER); - ut_ad(n > 0); - ut_ad(mutex_own(&kernel_mutex)); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = srv_table_get_nth_slot(i); - - if (slot->in_use && slot->type == type && slot->suspended) { - - slot->suspended = FALSE; - - srv_n_threads_active[type]++; - - os_event_set(slot->event); - - if (srv_print_thread_releases) { - fprintf(stderr, - "Releasing thread %lu type %lu" - " from slot %lu meter %lu\n", - (ulong) slot->id, (ulong) type, - (ulong) i, - (ulong) srv_meter[SRV_RECOVERY]); - } - - count++; - - if (count == n) { - break; - } - } - } - - return(count); -} - -/************************************************************************* -Returns the calling thread type. */ - -ulint -srv_get_thread_type(void) -/*=====================*/ - /* out: SRV_COM, ... */ -{ - ulint slot_no; - srv_slot_t* slot; - ulint type; - - mutex_enter(&kernel_mutex); - - slot_no = thr_local_get_slot_no(os_thread_get_curr_id()); - - slot = srv_table_get_nth_slot(slot_no); - - type = slot->type; - - ut_ad(type >= SRV_WORKER); - ut_ad(type <= SRV_MASTER); - - mutex_exit(&kernel_mutex); - - return(type); -} - -/************************************************************************* -Initializes the server. */ - -void -srv_init(void) -/*==========*/ -{ - srv_conc_slot_t* conc_slot; - srv_slot_t* slot; - dict_table_t* table; - ulint i; - - time_spin_delay(); - - srv_sys = mem_alloc(sizeof(srv_sys_t)); - - kernel_mutex_temp = mem_alloc(sizeof(mutex_t)); - mutex_create(&kernel_mutex, SYNC_KERNEL); - - mutex_create(&srv_innodb_monitor_mutex, SYNC_NO_ORDER_CHECK); - - srv_sys->threads = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - slot = srv_table_get_nth_slot(i); - slot->in_use = FALSE; - slot->type=0; /* Avoid purify errors */ - slot->event = os_event_create(NULL); - ut_a(slot->event); - } - - srv_mysql_table = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_slot_t)); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - slot = srv_mysql_table + i; - slot->in_use = FALSE; - slot->type = 0; - slot->event = os_event_create(NULL); - ut_a(slot->event); - } - - srv_lock_timeout_thread_event = os_event_create(NULL); - - for (i = 0; i < SRV_MASTER + 1; i++) { - srv_n_threads_active[i] = 0; - srv_n_threads[i] = 0; - srv_meter[i] = 30; - srv_meter_low_water[i] = 50; - srv_meter_high_water[i] = 100; - srv_meter_high_water2[i] = 200; - srv_meter_foreground[i] = 250; - } - - UT_LIST_INIT(srv_sys->tasks); - - /* create dummy table and index for old-style infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY1", - DICT_HDR_SPACE, 1, 0); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - - srv_sys->dummy_ind1 = dict_mem_index_create( - "SYS_DUMMY1", "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); - dict_index_add_col(srv_sys->dummy_ind1, table, (dict_col_t*) - dict_table_get_nth_col(table, 0), 0); - srv_sys->dummy_ind1->table = table; - /* create dummy table and index for new-style infimum and supremum */ - table = dict_mem_table_create("SYS_DUMMY2", - DICT_HDR_SPACE, 1, DICT_TF_COMPACT); - dict_mem_table_add_col(table, NULL, NULL, DATA_CHAR, - DATA_ENGLISH | DATA_NOT_NULL, 8); - srv_sys->dummy_ind2 = dict_mem_index_create( - "SYS_DUMMY2", "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); - dict_index_add_col(srv_sys->dummy_ind2, table, (dict_col_t*) - dict_table_get_nth_col(table, 0), 0); - srv_sys->dummy_ind2->table = table; - - /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ - srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; - - /* Init the server concurrency restriction data structures */ - - os_fast_mutex_init(&srv_conc_mutex); - - UT_LIST_INIT(srv_conc_queue); - - srv_conc_slots = mem_alloc(OS_THREAD_MAX_N * sizeof(srv_conc_slot_t)); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - conc_slot = srv_conc_slots + i; - conc_slot->reserved = FALSE; - conc_slot->event = os_event_create(NULL); - ut_a(conc_slot->event); - } -} - -/************************************************************************* -Frees the OS fast mutex created in srv_init(). */ - -void -srv_free(void) -/*==========*/ -{ - os_fast_mutex_free(&srv_conc_mutex); -} - -/************************************************************************* -Initializes the synchronization primitives, memory system, and the thread -local storage. */ - -void -srv_general_init(void) -/*==================*/ -{ - os_sync_init(); - sync_init(); - mem_init(srv_mem_pool_size); - thr_local_init(); -} - -/*======================= InnoDB Server FIFO queue =======================*/ - -/* Maximum allowable purge history length. <=0 means 'infinite'. */ -ulong srv_max_purge_lag = 0; - -/************************************************************************* -Puts an OS thread to wait if there are too many concurrent threads -(>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */ - -#ifdef UNIV_SYNC_ATOMIC -static void -inc_srv_conc_n_threads(lint *n_threads) -{ - *n_threads = os_atomic_increment(&srv_conc_n_threads, 1); -} - -static void -dec_srv_conc_n_threads() -{ - os_atomic_increment(&srv_conc_n_threads, -1); -} -#endif - -static void -print_already_in_error(trx_t* trx) -{ - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: trying to declare trx" - " to enter InnoDB, but\n" - "InnoDB: it already is declared.\n", stderr); - trx_print(stderr, trx, 0); - putc('\n', stderr); - return; -} - -#ifdef UNIV_SYNC_ATOMIC -static void -enter_innodb_with_tickets(trx_t* trx) -{ - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; - return; -} - -static void -srv_conc_enter_innodb_timer_based(trx_t* trx) -{ - lint conc_n_threads; - ibool has_yielded = FALSE; - ulint has_slept = 0; - - if (trx->declared_to_be_inside_innodb) { - print_already_in_error(trx); - } -retry: - if (srv_conc_n_threads < (lint) srv_thread_concurrency) { - inc_srv_conc_n_threads(&conc_n_threads); - if (conc_n_threads <= (lint) srv_thread_concurrency) { - enter_innodb_with_tickets(trx); - return; - } - dec_srv_conc_n_threads(&conc_n_threads); - } - if (!has_yielded) - { - has_yielded = TRUE; - os_thread_yield(); - goto retry; - } - if (trx->has_search_latch - || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) { - - inc_srv_conc_n_threads(&conc_n_threads); - enter_innodb_with_tickets(trx); - return; - } - if (has_slept < 2) - { - trx->op_info = "sleeping before entering InnoDB"; - os_thread_sleep(10000); - trx->op_info = ""; - has_slept++; - } - inc_srv_conc_n_threads(&conc_n_threads); - enter_innodb_with_tickets(trx); - return; -} - -static void -srv_conc_exit_innodb_timer_based(trx_t* trx) -{ - dec_srv_conc_n_threads(); - trx->declared_to_be_inside_innodb = FALSE; - trx->n_tickets_to_enter_innodb = 0; - return; -} -#endif - -void -srv_conc_enter_innodb( -/*==================*/ - trx_t* trx) /* in: transaction object associated with the - thread */ -{ - ibool has_slept = FALSE; - srv_conc_slot_t* slot = NULL; - ulint i; - - if (trx->mysql_thd != NULL - && thd_is_replication_slave_thread(trx->mysql_thd)) { - - /* TODO Do something more interesting (based on a config - parameter). Some users what to give the replication - thread very low priority, see http://bugs.mysql.com/25078 - This can be done by introducing - innodb_replication_delay(ms) config parameter */ - return; - } - - /* If trx has 'free tickets' to enter the engine left, then use one - such ticket */ - - if (trx->n_tickets_to_enter_innodb > 0) { - trx->n_tickets_to_enter_innodb--; - - return; - } - -#ifdef UNIV_SYNC_ATOMIC - if (srv_thread_concurrency_timer_based) { - srv_conc_enter_innodb_timer_based(trx); - return; - } -#endif - - os_fast_mutex_lock(&srv_conc_mutex); -retry: - if (trx->declared_to_be_inside_innodb) { - print_already_in_error(trx); - os_fast_mutex_unlock(&srv_conc_mutex); - - return; - } - - ut_ad(srv_conc_n_threads >= 0); - - if (srv_conc_n_threads < (lint)srv_thread_concurrency) { - - srv_conc_n_threads++; - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; - - os_fast_mutex_unlock(&srv_conc_mutex); - - return; - } - - /* If the transaction is not holding resources, let it sleep - for SRV_THREAD_SLEEP_DELAY microseconds, and try again then */ - - if (!has_slept && !trx->has_search_latch - && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) { - - has_slept = TRUE; /* We let it sleep only once to avoid - starvation */ - - srv_conc_n_waiting_threads++; - - os_fast_mutex_unlock(&srv_conc_mutex); - - trx->op_info = "sleeping before joining InnoDB queue"; - - /* Peter Zaitsev suggested that we take the sleep away - altogether. But the sleep may be good in pathological - situations of lots of thread switches. Simply put some - threads aside for a while to reduce the number of thread - switches. */ - if (SRV_THREAD_SLEEP_DELAY > 0) { - os_thread_sleep(SRV_THREAD_SLEEP_DELAY); - } - - trx->op_info = ""; - - os_fast_mutex_lock(&srv_conc_mutex); - - srv_conc_n_waiting_threads--; - - goto retry; - } - - /* Too many threads inside: put the current thread to a queue */ - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - slot = srv_conc_slots + i; - - if (!slot->reserved) { - - break; - } - } - - if (i == OS_THREAD_MAX_N) { - /* Could not find a free wait slot, we must let the - thread enter */ - - srv_conc_n_threads++; - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = 0; - - os_fast_mutex_unlock(&srv_conc_mutex); - - return; - } - - /* Release possible search system latch this thread has */ - if (trx->has_search_latch) { - trx_search_latch_release_if_reserved(trx); - } - - /* Add to the queue */ - slot->reserved = TRUE; - slot->wait_ended = FALSE; - - UT_LIST_ADD_LAST(srv_conc_queue, srv_conc_queue, slot); - - os_event_reset(slot->event); - - srv_conc_n_waiting_threads++; - - os_fast_mutex_unlock(&srv_conc_mutex); - - /* Go to wait for the event; when a thread leaves InnoDB it will - release this thread */ - - trx->op_info = "waiting in InnoDB queue"; - - os_event_wait(slot->event); - - trx->op_info = ""; - - os_fast_mutex_lock(&srv_conc_mutex); - - srv_conc_n_waiting_threads--; - - /* NOTE that the thread which released this thread already - incremented the thread counter on behalf of this thread */ - - slot->reserved = FALSE; - - UT_LIST_REMOVE(srv_conc_queue, srv_conc_queue, slot); - - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER; - - os_fast_mutex_unlock(&srv_conc_mutex); -} - -/************************************************************************* -This lets a thread enter InnoDB regardless of the number of threads inside -InnoDB. This must be called when a thread ends a lock wait. */ - -void -srv_conc_force_enter_innodb( -/*========================*/ - trx_t* trx) /* in: transaction object associated with the - thread */ -{ - - if (UNIV_LIKELY(!srv_thread_concurrency)) { - - return; - } - - ut_ad(srv_conc_n_threads >= 0); -#ifdef UNIV_SYNC_ATOMIC - if (srv_thread_concurrency_timer_based) { - lint conc_n_threads; - - inc_srv_conc_n_threads(&conc_n_threads); - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = 1; - return; - } -#endif - os_fast_mutex_lock(&srv_conc_mutex); - srv_conc_n_threads++; - trx->declared_to_be_inside_innodb = TRUE; - trx->n_tickets_to_enter_innodb = 1; - os_fast_mutex_unlock(&srv_conc_mutex); -} - -/************************************************************************* -This must be called when a thread exits InnoDB in a lock wait or at the -end of an SQL statement. */ - -void -srv_conc_force_exit_innodb( -/*=======================*/ - trx_t* trx) /* in: transaction object associated with the - thread */ -{ - srv_conc_slot_t* slot = NULL; - - if (trx->mysql_thd != NULL - && thd_is_replication_slave_thread(trx->mysql_thd)) { - - return; - } - - if (trx->declared_to_be_inside_innodb == FALSE) { - - return; - } - -#ifdef UNIV_SYNC_ATOMIC - if (srv_thread_concurrency_timer_based) - { - srv_conc_exit_innodb_timer_based(trx); - return; - } -#endif - - os_fast_mutex_lock(&srv_conc_mutex); - - ut_ad(srv_conc_n_threads > 0); - srv_conc_n_threads--; - trx->declared_to_be_inside_innodb = FALSE; - trx->n_tickets_to_enter_innodb = 0; - - if (srv_conc_n_threads < (lint)srv_thread_concurrency) { - /* Look for a slot where a thread is waiting and no other - thread has yet released the thread */ - - slot = UT_LIST_GET_FIRST(srv_conc_queue); - - while (slot && slot->wait_ended == TRUE) { - slot = UT_LIST_GET_NEXT(srv_conc_queue, slot); - } - - if (slot != NULL) { - slot->wait_ended = TRUE; - - /* We increment the count on behalf of the released - thread */ - - srv_conc_n_threads++; - } - } - - os_fast_mutex_unlock(&srv_conc_mutex); - - if (slot != NULL) { - os_event_set(slot->event); - } -} - -/************************************************************************* -This must be called when a thread exits InnoDB. */ - -void -srv_conc_exit_innodb( -/*=================*/ - trx_t* trx) /* in: transaction object associated with the - thread */ -{ - if (trx->n_tickets_to_enter_innodb > 0) { - /* We will pretend the thread is still inside InnoDB though it - now leaves the InnoDB engine. In this way we save - a lot of semaphore operations. srv_conc_force_exit_innodb is - used to declare the thread definitely outside InnoDB. It - should be called when there is a lock wait or an SQL statement - ends. */ - - return; - } - - srv_conc_force_exit_innodb(trx); -} - -/*========================================================================*/ - -/************************************************************************* -Normalizes init parameter values to use units we use inside InnoDB. */ -static -ulint -srv_normalize_init_values(void) -/*===========================*/ - /* out: DB_SUCCESS or error code */ -{ - ulint n; - ulint i; - - n = srv_n_data_files; - - for (i = 0; i < n; i++) { - srv_data_file_sizes[i] = srv_data_file_sizes[i] - * ((1024 * 1024) / UNIV_PAGE_SIZE); - } - - srv_last_file_size_max = srv_last_file_size_max - * ((1024 * 1024) / UNIV_PAGE_SIZE); - - srv_log_file_size = srv_log_file_size / UNIV_PAGE_SIZE; - - srv_log_buffer_size = srv_log_buffer_size / UNIV_PAGE_SIZE; - - srv_pool_size = srv_pool_size / (UNIV_PAGE_SIZE / 1024); - - srv_awe_window_size = srv_awe_window_size / UNIV_PAGE_SIZE; - - if (srv_use_awe) { - /* If we are using AWE we must save memory in the 32-bit - address space of the process, and cannot bind the lock - table size to the real buffer pool size. */ - - srv_lock_table_size = 20 * srv_awe_window_size; - } else { - srv_lock_table_size = 5 * srv_pool_size; - } - - return(DB_SUCCESS); -} - -/************************************************************************* -Boots the InnoDB server. */ - -ulint -srv_boot(void) -/*==========*/ - /* out: DB_SUCCESS or error code */ -{ - ulint err; - - /* Transform the init parameter values given by MySQL to - use units we use inside InnoDB: */ - - err = srv_normalize_init_values(); - - if (err != DB_SUCCESS) { - return(err); - } - - /* Initialize synchronization primitives, memory management, and thread - local storage */ - - srv_general_init(); - - /* Initialize this module */ - - srv_init(); - - return(DB_SUCCESS); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Reserves a slot in the thread table for the current MySQL OS thread. -NOTE! The kernel mutex has to be reserved by the caller! */ -static -srv_slot_t* -srv_table_reserve_slot_for_mysql(void) -/*==================================*/ - /* out: reserved slot */ -{ - srv_slot_t* slot; - ulint i; - - ut_ad(mutex_own(&kernel_mutex)); - - i = 0; - slot = srv_mysql_table + i; - - while (slot->in_use) { - i++; - - if (i >= OS_THREAD_MAX_N) { - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: There appear to be %lu MySQL" - " threads currently waiting\n" - "InnoDB: inside InnoDB, which is the" - " upper limit. Cannot continue operation.\n" - "InnoDB: We intentionally generate" - " a seg fault to print a stack trace\n" - "InnoDB: on Linux. But first we print" - " a list of waiting threads.\n", (ulong) i); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = srv_mysql_table + i; - - fprintf(stderr, - "Slot %lu: thread id %lu, type %lu," - " in use %lu, susp %lu, time %lu\n", - (ulong) i, - (ulong) os_thread_pf(slot->id), - (ulong) slot->type, - (ulong) slot->in_use, - (ulong) slot->suspended, - (ulong) difftime(ut_time(), - slot->suspend_time)); - } - - ut_error; - } - - slot = srv_mysql_table + i; - } - - ut_a(slot->in_use == FALSE); - - slot->in_use = TRUE; - slot->id = os_thread_get_curr_id(); - slot->handle = os_thread_get_curr(); - - return(slot); -} -#endif /* !UNIV_HOTBACKUP */ - -/******************************************************************* -Puts a MySQL OS thread to wait for a lock to be released. If an error -occurs during the wait trx->error_state associated with thr is -!= DB_SUCCESS when we return. DB_LOCK_WAIT_TIMEOUT and DB_DEADLOCK -are possible errors. DB_DEADLOCK is returned if selective deadlock -resolution chose this transaction as a victim. */ - -void -srv_suspend_mysql_thread( -/*=====================*/ - que_thr_t* thr) /* in: query thread associated with the MySQL - OS thread */ -{ -#ifndef UNIV_HOTBACKUP - srv_slot_t* slot; - os_event_t event; - double wait_time; - trx_t* trx; - ibool had_dict_lock = FALSE; - ibool was_declared_inside_innodb = FALSE; - ib_longlong start_time = 0; - ib_longlong finish_time; - ulint diff_time; - ulint sec; - ulint ms; - - ut_ad(!mutex_own(&kernel_mutex)); - - trx = thr_get_trx(thr); - - os_event_set(srv_lock_timeout_thread_event); - - mutex_enter(&kernel_mutex); - - trx->error_state = DB_SUCCESS; - - if (thr->state == QUE_THR_RUNNING) { - - ut_ad(thr->is_active == TRUE); - - /* The lock has already been released or this transaction - was chosen as a deadlock victim: no need to suspend */ - - if (trx->was_chosen_as_deadlock_victim) { - - trx->error_state = DB_DEADLOCK; - trx->was_chosen_as_deadlock_victim = FALSE; - } - - mutex_exit(&kernel_mutex); - - return; - } - - ut_ad(thr->is_active == FALSE); - - slot = srv_table_reserve_slot_for_mysql(); - - event = slot->event; - - slot->thr = thr; - - os_event_reset(event); - - slot->suspend_time = ut_time(); - - if (thr->lock_state == QUE_THR_LOCK_ROW) { - srv_n_lock_wait_count++; - srv_n_lock_wait_current_count++; - - if (ut_usectime(&sec, &ms) == -1) { - start_time = -1; - } else { - start_time = (ib_longlong)sec * 1000000 + ms; - } - } - /* Wake the lock timeout monitor thread, if it is suspended */ - - os_event_set(srv_lock_timeout_thread_event); - - mutex_exit(&kernel_mutex); - - if (trx->declared_to_be_inside_innodb) { - - was_declared_inside_innodb = TRUE; - - /* We must declare this OS thread to exit InnoDB, since a - possible other thread holding a lock which this thread waits - for must be allowed to enter, sooner or later */ - - srv_conc_force_exit_innodb(trx); - } - - /* Release possible foreign key check latch */ - if (trx->dict_operation_lock_mode == RW_S_LATCH) { - - had_dict_lock = TRUE; - - row_mysql_unfreeze_data_dictionary(trx); - } - - ut_a(trx->dict_operation_lock_mode == 0); - - /* Wait for the release */ - - os_event_wait(event); - - if (had_dict_lock) { - - row_mysql_freeze_data_dictionary(trx); - } - - if (was_declared_inside_innodb) { - - /* Return back inside InnoDB */ - - srv_conc_force_enter_innodb(trx); - } - - mutex_enter(&kernel_mutex); - - /* Release the slot for others to use */ - - slot->in_use = FALSE; - - wait_time = ut_difftime(ut_time(), slot->suspend_time); - - if (thr->lock_state == QUE_THR_LOCK_ROW) { - if (ut_usectime(&sec, &ms) == -1) { - finish_time = -1; - } else { - finish_time = (ib_longlong)sec * 1000000 + ms; - } - - diff_time = (ulint) (finish_time - start_time); - - srv_n_lock_wait_current_count--; - srv_n_lock_wait_time = srv_n_lock_wait_time + diff_time; - if (diff_time > srv_n_lock_max_wait_time && - /* only update the variable if we successfully - retrieved the start and finish times. See Bug#36819. */ - start_time != -1 && finish_time != -1) { - srv_n_lock_max_wait_time = diff_time; - } - } - - if (trx->was_chosen_as_deadlock_victim) { - - trx->error_state = DB_DEADLOCK; - trx->was_chosen_as_deadlock_victim = FALSE; - } - - mutex_exit(&kernel_mutex); - - if (srv_lock_wait_timeout < 100000000 - && wait_time > (double)srv_lock_wait_timeout) { - - trx->error_state = DB_LOCK_WAIT_TIMEOUT; - } -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* UNIV_HOTBACKUP */ -} - -/************************************************************************ -Releases a MySQL OS thread waiting for a lock to be released, if the -thread is already suspended. */ - -void -srv_release_mysql_thread_if_suspended( -/*==================================*/ - que_thr_t* thr) /* in: query thread associated with the - MySQL OS thread */ -{ -#ifndef UNIV_HOTBACKUP - srv_slot_t* slot; - ulint i; - - ut_ad(mutex_own(&kernel_mutex)); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = srv_mysql_table + i; - - if (slot->in_use && slot->thr == thr) { - /* Found */ - - os_event_set(slot->event); - - return; - } - } - - /* not found */ -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; -#endif /* UNIV_HOTBACKUP */ -} - -#ifndef UNIV_HOTBACKUP -/********************************************************************** -Refreshes the values used to calculate per-second averages. */ -static -ibool -srv_refresh_innodb_monitor_stats(void) -/*==================================*/ -{ - /* Sometimes we will skip stats update to avoid deadlock, since - since this function is called by the background wake-up thread */ - if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) { - /* mutex_enter_nowait returns 1 on failure */ - return FALSE; - } - - srv_last_monitor_time = time(NULL); - - os_aio_refresh_stats(); - - btr_cur_n_sea_old = btr_cur_n_sea; - btr_cur_n_non_sea_old = btr_cur_n_non_sea; - - log_refresh_stats(); - - buf_refresh_io_stats(); - - srv_n_rows_inserted_old = srv_n_rows_inserted; - srv_n_rows_updated_old = srv_n_rows_updated; - srv_n_rows_deleted_old = srv_n_rows_deleted; - srv_n_rows_read_old = srv_n_rows_read; - - mutex_exit(&srv_innodb_monitor_mutex); - return TRUE; -} - -/********************************************************************** -Outputs to a file the output of the InnoDB Monitor. */ - -void -srv_printf_innodb_monitor( -/*======================*/ - FILE* file) /* in: output stream */ -{ - double time_elapsed; - time_t current_time; - ulint n_reserved; - - mutex_enter(&srv_innodb_monitor_mutex); - - current_time = time(NULL); - - /* We add 0.001 seconds to time_elapsed to prevent division - by zero if two users happen to call SHOW INNODB STATUS at the same - time */ - - time_elapsed = difftime(current_time, srv_last_monitor_time) - + 0.001; - - srv_last_monitor_time = time(NULL); - - fputs("\n=====================================\n", file); - - ut_print_timestamp(file); - fprintf(file, - " INNODB MONITOR OUTPUT\n" - "=====================================\n" - "Per second averages calculated from the last %lu seconds\n", - (ulong)time_elapsed); - - fputs("----------\n" - "BACKGROUND THREAD\n" - "----------\n", file); - srv_print_extra(file); - - fputs("----------\n" - "SEMAPHORES\n" - "----------\n", file); - sync_print(file); - - /* Conceptually, srv_innodb_monitor_mutex has a very high latching - order level in sync0sync.h, while dict_foreign_err_mutex has a very - low level 135. Therefore we can reserve the latter mutex here without - a danger of a deadlock of threads. */ - - mutex_enter(&dict_foreign_err_mutex); - - if (ftell(dict_foreign_err_file) != 0L) { - fputs("------------------------\n" - "LATEST FOREIGN KEY ERROR\n" - "------------------------\n", file); - ut_copy_file(file, dict_foreign_err_file); - } - - mutex_exit(&dict_foreign_err_mutex); - - /* Print open transaction details */ - lock_print_info_summary(file); - - if (trx_start) { - long t = ftell(file); - if (t < 0) { - *trx_start = ULINT_UNDEFINED; - } else { - *trx_start = (ulint) t; - } - } - lock_print_info_all_transactions(file); - if (trx_end) { - long t = ftell(file); - if (t < 0) { - *trx_end = ULINT_UNDEFINED; - } else { - *trx_end = (ulint) t; - } - } - - fputs("--------\n" - "FILE I/O\n" - "--------\n", file); - os_aio_print(file); - - fputs("-------------------------------------\n" - "INSERT BUFFER AND ADAPTIVE HASH INDEX\n" - "-------------------------------------\n", file); - ibuf_print(file); - - ha_print_info(file, btr_search_sys->hash_index); - - fprintf(file, - "%.2f hash searches/s, %.2f non-hash searches/s\n", - (btr_cur_n_sea - btr_cur_n_sea_old) - / time_elapsed, - (btr_cur_n_non_sea - btr_cur_n_non_sea_old) - / time_elapsed); - btr_cur_n_sea_old = btr_cur_n_sea; - btr_cur_n_non_sea_old = btr_cur_n_non_sea; - - fputs("---\n" - "LOG\n" - "---\n", file); - log_print(file); - - fputs("----------------------\n" - "BUFFER POOL AND MEMORY\n" - "----------------------\n", file); - fprintf(file, - "Total memory allocated " ULINTPF - "; in additional pool allocated " ULINTPF "\n", - ut_total_allocated_memory, - mem_pool_get_reserved(mem_comm_pool)); - fprintf(file, "Dictionary memory allocated " ULINTPF "\n", - dict_sys->size); - - if (srv_use_awe) { - fprintf(file, - "In addition to that %lu MB of AWE memory allocated\n", - (ulong) (srv_pool_size - / ((1024 * 1024) / UNIV_PAGE_SIZE))); - } - - buf_print_io(file); - - fputs("--------------\n" - "ROW OPERATIONS\n" - "--------------\n", file); - fprintf(file, "%ld queries inside InnoDB, %lu queries in queue\n", - (long) srv_conc_n_threads, - (ulong) srv_conc_n_waiting_threads); - - fprintf(file, "%lu read views open inside InnoDB\n", - UT_LIST_GET_LEN(trx_sys->view_list)); - - n_reserved = fil_space_get_n_reserved_extents(0); - if (n_reserved > 0) { - fprintf(file, - "%lu tablespace extents now reserved for" - " B-tree split operations\n", - (ulong) n_reserved); - } - -#ifdef UNIV_LINUX - fprintf(file, "Main thread process no. %lu, id %lu, state: %s\n", - (ulong) srv_main_thread_process_no, - (ulong) srv_main_thread_id, - srv_main_thread_op_info); -#else - fprintf(file, "Main thread id %lu, state: %s\n", - (ulong) srv_main_thread_id, - srv_main_thread_op_info); -#endif - fprintf(file, - "Number of rows inserted " ULINTPF - ", updated " ULINTPF ", deleted " ULINTPF - ", read " ULINTPF "\n", - srv_n_rows_inserted, - srv_n_rows_updated, - srv_n_rows_deleted, - srv_n_rows_read); - fprintf(file, - "%.2f inserts/s, %.2f updates/s," - " %.2f deletes/s, %.2f reads/s\n", - (srv_n_rows_inserted - srv_n_rows_inserted_old) - / time_elapsed, - (srv_n_rows_updated - srv_n_rows_updated_old) - / time_elapsed, - (srv_n_rows_deleted - srv_n_rows_deleted_old) - / time_elapsed, - (srv_n_rows_read - srv_n_rows_read_old) - / time_elapsed); - - srv_n_rows_inserted_old = srv_n_rows_inserted; - srv_n_rows_updated_old = srv_n_rows_updated; - srv_n_rows_deleted_old = srv_n_rows_deleted; - srv_n_rows_read_old = srv_n_rows_read; - - fputs("----------------------------\n" - "END OF INNODB MONITOR OUTPUT\n" - "============================\n", file); - mutex_exit(&srv_innodb_monitor_mutex); - fflush(file); -} - -/********************************************************************** -Function to pass InnoDB status variables to MySQL */ - -void -srv_export_innodb_status(void) -{ - mutex_enter(&srv_innodb_monitor_mutex); - - export_vars.innodb_data_pending_reads - = os_n_pending_reads; - export_vars.innodb_data_pending_writes - = os_n_pending_writes; - export_vars.innodb_data_pending_fsyncs - = fil_n_pending_log_flushes - + fil_n_pending_tablespace_flushes; - export_vars.innodb_data_fsyncs = os_n_fsyncs; - export_vars.innodb_data_read = srv_data_read; - export_vars.innodb_data_reads = os_n_file_reads; - export_vars.innodb_data_writes = os_n_file_writes; - export_vars.innodb_data_written = srv_data_written; - export_vars.innodb_buffer_pool_read_requests = buf_pool->n_page_gets; - export_vars.innodb_buffer_pool_write_requests - = srv_buf_pool_write_requests; - export_vars.innodb_buffer_pool_wait_free = srv_buf_pool_wait_free; - export_vars.innodb_buffer_pool_pages_flushed = srv_buf_pool_flushed; - export_vars.innodb_buffer_pool_reads = srv_buf_pool_reads; - export_vars.innodb_buffer_pool_read_ahead_rnd = srv_read_ahead_rnd; - export_vars.innodb_buffer_pool_read_ahead_seq = srv_read_ahead_seq; - export_vars.innodb_buffer_pool_pages_data - = UT_LIST_GET_LEN(buf_pool->LRU); - export_vars.innodb_buffer_pool_pages_dirty - = UT_LIST_GET_LEN(buf_pool->flush_list); - export_vars.innodb_buffer_pool_pages_free - = UT_LIST_GET_LEN(buf_pool->free); -#ifdef UNIV_DEBUG - export_vars.innodb_buffer_pool_pages_latched - = buf_get_latched_pages_number(); -#endif /* UNIV_DEBUG */ - export_vars.innodb_buffer_pool_pages_total = buf_pool->curr_size; - - export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size - - UT_LIST_GET_LEN(buf_pool->LRU) - - UT_LIST_GET_LEN(buf_pool->free); -#ifdef UNIV_SYNC_ATOMIC - export_vars.innodb_have_sync_atomic = 1; -#else - export_vars.innodb_have_sync_atomic = 0; -#endif -#ifdef UNIV_DISABLE_MEM_POOL - export_vars.innodb_heap_enabled = 0; -#else - export_vars.innodb_heap_enabled = 1; -#endif - export_vars.innodb_page_size = UNIV_PAGE_SIZE; - export_vars.innodb_log_waits = srv_log_waits; - export_vars.innodb_os_log_written = srv_os_log_written; - export_vars.innodb_os_log_fsyncs = fil_n_log_flushes; - export_vars.innodb_os_log_pending_fsyncs = fil_n_pending_log_flushes; - export_vars.innodb_os_log_pending_writes = srv_os_log_pending_writes; - export_vars.innodb_log_write_requests = srv_log_write_requests; - export_vars.innodb_log_writes = srv_log_writes; - export_vars.innodb_dblwr_pages_written = srv_dblwr_pages_written; - export_vars.innodb_dblwr_writes = srv_dblwr_writes; - export_vars.innodb_pages_created = buf_pool->n_pages_created; - export_vars.innodb_pages_read = buf_pool->n_pages_read; - export_vars.innodb_pages_written = buf_pool->n_pages_written; - export_vars.innodb_row_lock_waits = srv_n_lock_wait_count; - export_vars.innodb_row_lock_current_waits - = srv_n_lock_wait_current_count; - export_vars.innodb_row_lock_time = srv_n_lock_wait_time / 1000; - if (srv_n_lock_wait_count > 0) { - export_vars.innodb_row_lock_time_avg = (ulint) - (srv_n_lock_wait_time / 1000 / srv_n_lock_wait_count); - } else { - export_vars.innodb_row_lock_time_avg = 0; - } - export_vars.innodb_row_lock_time_max - = srv_n_lock_max_wait_time / 1000; - export_vars.innodb_rows_read = srv_n_rows_read; - export_vars.innodb_rows_inserted = srv_n_rows_inserted; - export_vars.innodb_rows_updated = srv_n_rows_updated; - export_vars.innodb_rows_deleted = srv_n_rows_deleted; - export_vars.innodb_wake_ups = sync_wake_ups; - - mutex_exit(&srv_innodb_monitor_mutex); -} - -/************************************************************************* -A thread which wakes up threads whose lock wait may have lasted too long. -This also prints the info output by various InnoDB monitors. */ - -os_thread_ret_t -srv_lock_timeout_and_monitor_thread( -/*================================*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))) - /* in: a dummy parameter required by - os_thread_create */ -{ - srv_slot_t* slot; - double time_elapsed; - time_t current_time; - time_t last_table_monitor_time; - time_t last_tablespace_monitor_time; - time_t last_monitor_time; - ibool some_waits; - double wait_time; - ulint i; - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Lock timeout thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif - UT_NOT_USED(arg); - srv_last_monitor_time = time(NULL); - last_table_monitor_time = time(NULL); - last_tablespace_monitor_time = time(NULL); - last_monitor_time = time(NULL); -loop: - srv_lock_timeout_and_monitor_active = TRUE; - - /* When someone is waiting for a lock, we wake up every second - and check if a timeout has passed for a lock wait */ - - os_thread_sleep(1000000); - - current_time = time(NULL); - - time_elapsed = difftime(current_time, last_monitor_time); - - if (time_elapsed > 15) { - last_monitor_time = time(NULL); - - if (srv_print_innodb_monitor) { - srv_printf_innodb_monitor(stderr); - } - - if (srv_innodb_status) { - mutex_enter(&srv_monitor_file_mutex); - rewind(srv_monitor_file); - srv_printf_innodb_monitor(srv_monitor_file); - os_file_set_eof(srv_monitor_file); - mutex_exit(&srv_monitor_file_mutex); - } - - if (srv_print_innodb_tablespace_monitor - && difftime(current_time, - last_tablespace_monitor_time) > 60) { - last_tablespace_monitor_time = time(NULL); - - fputs("========================" - "========================\n", - stderr); - - ut_print_timestamp(stderr); - - fputs(" INNODB TABLESPACE MONITOR OUTPUT\n" - "========================" - "========================\n", - stderr); - - fsp_print(0); - fputs("Validating tablespace\n", stderr); - fsp_validate(0); - fputs("Validation ok\n" - "---------------------------------------\n" - "END OF INNODB TABLESPACE MONITOR OUTPUT\n" - "=======================================\n", - stderr); - } - - if (srv_print_innodb_table_monitor - && difftime(current_time, last_table_monitor_time) > 60) { - - last_table_monitor_time = time(NULL); - - fputs("===========================================\n", - stderr); - - ut_print_timestamp(stderr); - - fputs(" INNODB TABLE MONITOR OUTPUT\n" - "===========================================\n", - stderr); - dict_print(); - - fputs("-----------------------------------\n" - "END OF INNODB TABLE MONITOR OUTPUT\n" - "==================================\n", - stderr); - } - } - - mutex_enter(&kernel_mutex); - - some_waits = FALSE; - - /* Check of all slots if a thread is waiting there, and if it - has exceeded the time limit */ - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = srv_mysql_table + i; - - if (slot->in_use) { - some_waits = TRUE; - - wait_time = ut_difftime(ut_time(), slot->suspend_time); - - if (srv_lock_wait_timeout < 100000000 - && (wait_time > (double) srv_lock_wait_timeout - || wait_time < 0)) { - - /* Timeout exceeded or a wrap-around in system - time counter: cancel the lock request queued - by the transaction and release possible - other transactions waiting behind; it is - possible that the lock has already been - granted: in that case do nothing */ - - if (thr_get_trx(slot->thr)->wait_lock) { - lock_cancel_waiting_and_release( - thr_get_trx(slot->thr) - ->wait_lock); - } - } - } - } - - os_event_reset(srv_lock_timeout_thread_event); - - mutex_exit(&kernel_mutex); - - if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) { - goto exit_func; - } - - if (some_waits || srv_print_innodb_monitor - || srv_print_innodb_lock_monitor - || srv_print_innodb_tablespace_monitor - || srv_print_innodb_table_monitor) { - goto loop; - } - - /* No one was waiting for a lock and no monitor was active: - suspend this thread */ - - srv_lock_timeout_and_monitor_active = FALSE; - -#if 0 - /* The following synchronisation is disabled, since - the InnoDB monitor output is to be updated every 15 seconds. */ - os_event_wait(srv_lock_timeout_thread_event); -#endif - goto loop; - -exit_func: - srv_lock_timeout_and_monitor_active = FALSE; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/************************************************************************* -A thread which prints warnings about semaphore waits which have lasted -too long. These can be used to track bugs which cause hangs. -NOTE: This thread should not wait for any innodb mutexes or rw_locks. -A deadlock could arise where the thread holding that lock requires waking -by this background thread while this thread is blocked on that lock. */ - -os_thread_ret_t -srv_error_monitor_thread( -/*=====================*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))) - /* in: a dummy parameter required by - os_thread_create */ -{ - /* number of successive fatal timeouts observed */ - ulint fatal_cnt = 0; - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Error monitor thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif -loop: - srv_error_monitor_active = TRUE; - - if (difftime(time(NULL), srv_last_monitor_time) > 60) { - /* We refresh InnoDB Monitor values so that averages are - printed from at most 60 last seconds */ - - srv_refresh_innodb_monitor_stats(); - } - - /* In case mutex_exit is not a memory barrier, it is - theoretically possible some threads are left waiting though - the semaphore is already released. Wake up those threads: */ - - sync_arr_wake_threads_if_sema_free(); - - if (sync_array_print_long_waits()) { - fatal_cnt++; - if (fatal_cnt > 10) { - - fprintf(stderr, - "InnoDB: Error: semaphore wait has lasted" - " > %lu seconds\n" - "InnoDB: We intentionally crash the server," - " because it appears to be hung.\n", - (ulong) srv_fatal_semaphore_wait_threshold); - - ut_error; - } - } else { - fatal_cnt = 0; - } - - /* Flush stderr so that a database user gets the output - to possible MySQL error file */ - - fflush(stderr); - - os_thread_sleep(1000000); - - if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) { - - goto loop; - } - - srv_error_monitor_active = FALSE; - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*********************************************************************** -Tells the InnoDB server that there has been activity in the database -and wakes up the master thread if it is suspended (not sleeping). Used -in the MySQL interface. Note that there is a small chance that the master -thread stays suspended (we do not protect our operation with the kernel -mutex, for performace reasons). */ - -void -srv_active_wake_master_thread(void) -/*===============================*/ -{ - srv_activity_count++; - - if (srv_n_threads_active[SRV_MASTER] == 0) { - - mutex_enter(&kernel_mutex); - - srv_release_threads(SRV_MASTER, 1); - - mutex_exit(&kernel_mutex); - } -} - -/*********************************************************************** -Wakes up the master thread if it is suspended or being suspended. */ - -void -srv_wake_master_thread(void) -/*========================*/ -{ - srv_activity_count++; - - mutex_enter(&kernel_mutex); - - srv_release_threads(SRV_MASTER, 1); - - mutex_exit(&kernel_mutex); -} - -/************************************************************************* -Returns the number of IO operations that is X percent of the capacity. - -PCT_IO(5) -> returns the number of IO operations that is 5% of the max -where max is srv_io_capacity. -*/ -#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0))) - -/************************************************************************* -The master thread controlling the server. */ - -os_thread_ret_t -srv_master_thread( -/*==============*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))) - /* in: a dummy parameter required by - os_thread_create */ -{ - os_event_t event; - time_t last_flush_time; - time_t current_time; - ulint old_activity_count; - ulint n_pages_purged; - ulint n_bytes_merged; - ulint n_pages_flushed; - ulint n_bytes_archived; - ulint n_tables_to_drop; - ulint n_ios; - ulint n_ios_old; - ulint n_ios_very_old; - ulint n_pend_ios; - ibool skip_sleep = FALSE; - ulint i; - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Master thread starts, id %lu\n", - os_thread_pf(os_thread_get_curr_id())); -#endif - fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n", - srv_io_capacity); - - srv_main_thread_process_no = os_proc_get_number(); - srv_main_thread_id = os_thread_pf(os_thread_get_curr_id()); - - srv_table_reserve_slot(SRV_MASTER); - - mutex_enter(&kernel_mutex); - - srv_n_threads_active[SRV_MASTER]++; - - mutex_exit(&kernel_mutex); - -loop: - /*****************************************************************/ - /* ---- When there is database activity by users, we cycle in this - loop */ - - srv_main_thread_op_info = "reserving kernel mutex"; - - n_ios_very_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - mutex_enter(&kernel_mutex); - - /* Store the user activity counter at the start of this loop */ - old_activity_count = srv_activity_count; - - mutex_exit(&kernel_mutex); - - if (srv_force_recovery >= SRV_FORCE_NO_BACKGROUND) { - - goto suspend_thread; - } - - /* ---- We run the following loop approximately once per second - when there is database activity */ - - skip_sleep = FALSE; - - for (i = 0; i < 10; i++) { - n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - srv_main_thread_op_info = "sleeping"; - srv_main_1_second_loops++; - - if (!skip_sleep) { - - os_thread_sleep(1000000); - srv_main_sleeps++; - } - - skip_sleep = FALSE; - - /* ALTER TABLE in MySQL requires on Unix that the table handler - can drop tables lazily after there no longer are SELECT - queries to them. */ - - srv_main_thread_op_info = "doing background drop tables"; - - row_drop_tables_for_mysql_in_background(); - - srv_main_thread_op_info = ""; - - if (srv_fast_shutdown && srv_shutdown_state > 0) { - - goto background_loop; - } - - /* We flush the log once in a second even if no commit - is issued or the we have specified in my.cnf no flush - at transaction commit */ - - srv_main_thread_op_info = "flushing log"; - log_buffer_flush_to_disk(); - srv_sync_flush++; - - srv_main_thread_op_info = "making checkpoint"; - log_free_check(); - - /* If i/os during one second sleep were less than 5% of - capacity, we assume that there is free disk i/o capacity - available, and it makes sense to do an insert buffer merge. */ - - n_pend_ios = buf_get_n_pending_ios() - + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) { - srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); - - srv_main_thread_op_info = "flushing log"; - - /* No fsync when srv_flush_log_at_trx_commit != 1 */ - log_buffer_flush_maybe_sync(); - srv_async_flush++; - } - - if (UNIV_UNLIKELY(buf_get_modified_ratio_pct() - > srv_max_buf_pool_modified_pct)) { - - /* Try to keep the number of modified pages in the - buffer pool under the limit wished by the user */ - - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, - PCT_IO(100), - ut_dulint_max); - - /* If we had to do the flush, it may have taken - even more than 1 second, and also, there may be more - to flush. Do not sleep 1 second during the next - iteration of this loop. */ - - skip_sleep = TRUE; - } - - if (srv_activity_count == old_activity_count) { - - /* There is no user activity at the moment, go to - the background loop */ - - goto background_loop; - } - } - - /* ---- We perform the following code approximately once per - 10 seconds when there is database activity */ - -#ifdef MEM_PERIODIC_CHECK - /* Check magic numbers of every allocated mem block once in 10 - seconds */ - mem_validate_all_blocks(); -#endif - /* If i/os during the 10 second period were less than 200% of - capacity, we assume that there is free disk i/o capacity - available, and it makes sense to flush srv_io_capacity pages. - - Note that this is done regardless of the fraction of dirty - pages relative to the max requested by the user. The one second - loop above requests writes for that case. The writes done here - are not required, and may be disabled. */ - - n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes; - n_ios = log_sys->n_log_ios + buf_pool->n_pages_read - + buf_pool->n_pages_written; - if (srv_extra_dirty_writes && - n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) { - - srv_main_thread_op_info = "flushing buffer pool pages"; - buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max); - - srv_main_thread_op_info = "flushing log"; - /* No fsync when srv_flush_log_at_trx_commit != 1 */ - log_buffer_flush_maybe_sync(); - srv_async_flush++; - } - - /* We run a batch of insert buffer merge every 10 seconds, - even if the server were active */ - - srv_main_thread_op_info = "doing insert buffer merge"; - ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4); - - srv_main_thread_op_info = "flushing log"; - /* No fsync when srv_flush_log_at_trx_commit != 1 */ - log_buffer_flush_maybe_sync(); - srv_async_flush++; - - /* We run a full purge every 10 seconds, even if the server - were active */ - - n_pages_purged = 1; - - last_flush_time = time(NULL); - - while (n_pages_purged) { - - if (srv_fast_shutdown && srv_shutdown_state > 0) { - - goto background_loop; - } - - srv_main_thread_op_info = "purging"; - n_pages_purged = trx_purge(); - - current_time = time(NULL); - - if (difftime(current_time, last_flush_time) > 1) { - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); - last_flush_time = current_time; - srv_sync_flush++; - } - } - - srv_main_thread_op_info = "flushing buffer pool pages"; - - /* Flush a few oldest pages to make a new checkpoint younger */ - - if (buf_get_modified_ratio_pct() > 70) { - - /* If there are lots of modified pages in the buffer pool - (> 70 %), we assume we can afford reserving the disk(s) for - the time it requires to flush 100 pages */ - - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, - PCT_IO(100), - ut_dulint_max); - } else { - /* Otherwise, we only flush a small number of pages so that - we do not unnecessarily use much disk i/o capacity from - other work */ - - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, - PCT_IO(10), - ut_dulint_max); - } - - srv_main_thread_op_info = "making checkpoint"; - - /* Make a new checkpoint about once in 10 seconds */ - - log_checkpoint(TRUE, FALSE); - - srv_main_thread_op_info = "reserving kernel mutex"; - - mutex_enter(&kernel_mutex); - - /* ---- When there is database activity, we jump from here back to - the start of loop */ - - if (srv_activity_count != old_activity_count) { - mutex_exit(&kernel_mutex); - goto loop; - } - - mutex_exit(&kernel_mutex); - - /* If the database is quiet, we enter the background loop */ - - /*****************************************************************/ -background_loop: - /* ---- In this loop we run background operations when the server - is quiet from user activity. Also in the case of a shutdown, we - loop here, flushing the buffer pool to the data files. */ - - /* The server has been quiet for a while: start running background - operations */ - srv_main_background_loops++; - srv_main_thread_op_info = "doing background drop tables"; - - n_tables_to_drop = row_drop_tables_for_mysql_in_background(); - - if (n_tables_to_drop > 0) { - /* Do not monopolize the CPU even if there are tables waiting - in the background drop queue. (It is essentially a bug if - MySQL tries to drop a table while there are still open handles - to it and we had to put it to the background drop queue.) */ - - os_thread_sleep(100000); - } - - srv_main_thread_op_info = "purging"; - - /* Run a full purge */ - - n_pages_purged = 1; - - last_flush_time = time(NULL); - - while (n_pages_purged) { - if (srv_fast_shutdown && srv_shutdown_state > 0) { - - break; - } - - srv_main_thread_op_info = "purging"; - n_pages_purged = trx_purge(); - - current_time = time(NULL); - - if (difftime(current_time, last_flush_time) > 1) { - srv_main_thread_op_info = "flushing log"; - - log_buffer_flush_to_disk(); - last_flush_time = current_time; - srv_sync_flush++; - } - } - - srv_main_thread_op_info = "reserving kernel mutex"; - - mutex_enter(&kernel_mutex); - if (srv_activity_count != old_activity_count) { - mutex_exit(&kernel_mutex); - goto loop; - } - mutex_exit(&kernel_mutex); - - srv_main_thread_op_info = "doing insert buffer merge"; - - if (srv_fast_shutdown && srv_shutdown_state > 0) { - n_bytes_merged = 0; - } else { - /* This should do an amount of IO similar to the number of - * dirty pages that will be flushed in the call to - * buf_flush_batch below. Otherwise, the system favors - * clean pages over cleanup throughput. */ - n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100)); - } - - srv_main_thread_op_info = "reserving kernel mutex"; - - mutex_enter(&kernel_mutex); - if (srv_activity_count != old_activity_count) { - mutex_exit(&kernel_mutex); - goto loop; - } - mutex_exit(&kernel_mutex); - -flush_loop: - srv_main_thread_op_info = "flushing buffer pool pages"; - srv_main_flush_loops++; - if (srv_fast_shutdown < 2) { - n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, - PCT_IO(100), - ut_dulint_max); - } else { - /* In the fastest shutdown we do not flush the buffer pool - to data files: we set n_pages_flushed to 0 artificially. */ - - n_pages_flushed = 0; - } - - srv_main_thread_op_info = "reserving kernel mutex"; - - mutex_enter(&kernel_mutex); - if (srv_activity_count != old_activity_count) { - mutex_exit(&kernel_mutex); - goto loop; - } - mutex_exit(&kernel_mutex); - - srv_main_thread_op_info = "waiting for buffer pool flush to end"; - buf_flush_wait_batch_end(BUF_FLUSH_LIST); - - srv_main_thread_op_info = "flushing log"; - - current_time = time(NULL); - if (difftime(current_time, last_flush_time) > 1) { - srv_main_thread_op_info = (char*) "flushing log"; - log_buffer_flush_to_disk(); - last_flush_time = current_time; - srv_sync_flush++; - } else { - /* No fsync when srv_flush_log_at_trx_commit != 1 */ - log_buffer_flush_maybe_sync(); - srv_async_flush++; - } - - srv_main_thread_op_info = "making checkpoint"; - - log_checkpoint(TRUE, FALSE); - - if (buf_get_modified_ratio_pct() > srv_max_buf_pool_modified_pct) { - - /* Try to keep the number of modified pages in the - buffer pool under the limit wished by the user */ - - goto flush_loop; - } - - srv_main_thread_op_info = "reserving kernel mutex"; - - mutex_enter(&kernel_mutex); - if (srv_activity_count != old_activity_count) { - mutex_exit(&kernel_mutex); - goto loop; - } - mutex_exit(&kernel_mutex); - /* - srv_main_thread_op_info = "archiving log (if log archive is on)"; - - log_archive_do(FALSE, &n_bytes_archived); - */ - n_bytes_archived = 0; - - /* Keep looping in the background loop if still work to do */ - - if (srv_fast_shutdown && srv_shutdown_state > 0) { - if (n_tables_to_drop + n_pages_flushed - + n_bytes_archived != 0) { - - /* If we are doing a fast shutdown (= the default) - we do not do purge or insert buffer merge. But we - flush the buffer pool completely to disk. - In a 'very fast' shutdown we do not flush the buffer - pool to data files: we have set n_pages_flushed to - 0 artificially. */ - - goto background_loop; - } - } else if (n_tables_to_drop - + n_pages_purged + n_bytes_merged + n_pages_flushed - + n_bytes_archived != 0) { - /* In a 'slow' shutdown we run purge and the insert buffer - merge to completion */ - - goto background_loop; - } - - /* There is no work for background operations either: suspend - master thread to wait for more server activity */ - -suspend_thread: - srv_main_thread_op_info = "suspending"; - - mutex_enter(&kernel_mutex); - - if (row_get_background_drop_list_len_low() > 0) { - mutex_exit(&kernel_mutex); - - goto loop; - } - - event = srv_suspend_thread(); - - mutex_exit(&kernel_mutex); - - /* DO NOT CHANGE THIS STRING. innobase_start_or_create_for_mysql() - waits for database activity to die down when converting < 4.1.x - databases, and relies on this string being exactly as it is. InnoDB - manual also mentions this string in several places. */ - srv_main_thread_op_info = "waiting for server activity"; - - os_event_wait(event); - - if (srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS) { - /* This is only extra safety, the thread should exit - already when the event wait ends */ - - os_thread_exit(NULL); - } - - /* When there is user activity, InnoDB will set the event and the - main thread goes back to loop. */ - - goto loop; - - OS_THREAD_DUMMY_RETURN; /* Not reached, avoid compiler warning */ -} -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c deleted file mode 100644 index 0b63d1a0b86..00000000000 --- a/storage/innobase/srv/srv0start.c +++ /dev/null @@ -1,2027 +0,0 @@ -/************************************************************************ -Starts the InnoDB database server - -(c) 1996-2000 Innobase Oy - -Created 2/16/1996 Heikki Tuuri -*************************************************************************/ - -#include "os0proc.h" -#include "sync0sync.h" -#include "ut0mem.h" -#include "mem0mem.h" -#include "mem0pool.h" -#include "data0data.h" -#include "data0type.h" -#include "dict0dict.h" -#include "buf0buf.h" -#include "buf0flu.h" -#include "buf0rea.h" -#include "os0file.h" -#include "os0thread.h" -#include "fil0fil.h" -#include "fsp0fsp.h" -#include "rem0rec.h" -#include "rem0cmp.h" -#include "mtr0mtr.h" -#include "log0log.h" -#include "log0recv.h" -#include "page0page.h" -#include "page0cur.h" -#include "trx0trx.h" -#include "dict0boot.h" -#include "dict0load.h" -#include "trx0sys.h" -#include "dict0crea.h" -#include "btr0btr.h" -#include "btr0pcur.h" -#include "btr0cur.h" -#include "btr0sea.h" -#include "rem0rec.h" -#include "srv0srv.h" -#include "que0que.h" -#include "usr0sess.h" -#include "lock0lock.h" -#include "trx0roll.h" -#include "trx0purge.h" -#include "row0ins.h" -#include "row0sel.h" -#include "row0upd.h" -#include "row0row.h" -#include "row0mysql.h" -#include "lock0lock.h" -#include "ibuf0ibuf.h" -#include "pars0pars.h" -#include "btr0sea.h" -#include "srv0start.h" -#include "que0que.h" - -/* Log sequence number immediately after startup */ -dulint srv_start_lsn; -/* Log sequence number at shutdown */ -dulint srv_shutdown_lsn; - -#ifdef HAVE_DARWIN_THREADS -# include <sys/utsname.h> -ibool srv_have_fullfsync = FALSE; -#endif - -ibool srv_start_raw_disk_in_use = FALSE; - -ulint srv_sizeof_trx_t_in_ha_innodb_cc; - -ibool srv_startup_is_before_trx_rollback_phase = FALSE; -ibool srv_is_being_started = FALSE; -#ifndef UNIV_HOTBACKUP -static ibool srv_start_has_been_called = FALSE; -static ibool srv_was_started = FALSE; -#endif /* !UNIV_HOTBACKUP */ - -/* At a shutdown the value first climbs to SRV_SHUTDOWN_CLEANUP -and then to SRV_SHUTDOWN_LAST_PHASE */ -ulint srv_shutdown_state = 0; - -#ifndef UNIV_HOTBACKUP -static os_file_t files[1000]; - -static mutex_t ios_mutex; -static ulint ios; - -static ulint n[SRV_MAX_N_IO_THREADS + 5]; -static os_thread_id_t thread_ids[SRV_MAX_N_IO_THREADS + 5]; - -/* We use this mutex to test the return value of pthread_mutex_trylock - on successful locking. HP-UX does NOT return 0, though Linux et al do. */ -static os_fast_mutex_t srv_os_test_mutex; - -/* Name of srv_monitor_file */ -static char* srv_monitor_file_name; -#endif /* !UNIV_HOTBACKUP */ - -#define SRV_N_PENDING_IOS_PER_THREAD OS_AIO_N_PENDING_IOS_PER_THREAD -#define SRV_MAX_N_PENDING_SYNC_IOS 100 - - -/* Avoid warnings when using purify */ - -#ifdef HAVE_purify -static int inno_bcmp(register const char *s1, register const char *s2, - register uint len) -{ - while ((len-- != 0) && (*s1++ == *s2++)) - ; - - return(len + 1); -} -#define memcmp(A,B,C) inno_bcmp((A),(B),(C)) -#endif - -static -char* -srv_parse_megabytes( -/*================*/ - /* out: next character in string */ - char* str, /* in: string containing a quantity in bytes */ - ulint* megs) /* out: the number in megabytes */ -{ - char* endp; - ulint size; - - size = strtoul(str, &endp, 10); - - str = endp; - - switch (*str) { - case 'G': case 'g': - size *= 1024; - /* fall through */ - case 'M': case 'm': - str++; - break; - default: - size /= 1024 * 1024; - break; - } - - *megs = size; - return(str); -} - -/************************************************************************* -Reads the data files and their sizes from a character string given in -the .cnf file. */ - -ibool -srv_parse_data_file_paths_and_sizes( -/*================================*/ - /* out: TRUE if ok, FALSE if parsing - error */ - char* str, /* in: the data file path string */ - char*** data_file_names, /* out, own: array of data file - names */ - ulint** data_file_sizes, /* out, own: array of data file sizes - in megabytes */ - ulint** data_file_is_raw_partition,/* out, own: array of flags - showing which data files are raw - partitions */ - ulint* n_data_files, /* out: number of data files */ - ibool* is_auto_extending, /* out: TRUE if the last data file is - auto-extending */ - ulint* max_auto_extend_size) /* out: max auto extend size for the - last file if specified, 0 if not */ -{ - char* input_str; - char* path; - ulint size; - ulint i = 0; - - *is_auto_extending = FALSE; - *max_auto_extend_size = 0; - - input_str = str; - - /* First calculate the number of data files and check syntax: - path:size[M | G];path:size[M | G]... . Note that a Windows path may - contain a drive name and a ':'. */ - - while (*str != '\0') { - path = str; - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/' - || *(str + 1) == ':'))) { - str++; - } - - if (*str == '\0') { - return(FALSE); - } - - str++; - - str = srv_parse_megabytes(str, &size); - - if (0 == strncmp(str, ":autoextend", - (sizeof ":autoextend") - 1)) { - - str += (sizeof ":autoextend") - 1; - - if (0 == strncmp(str, ":max:", - (sizeof ":max:") - 1)) { - - str += (sizeof ":max:") - 1; - - str = srv_parse_megabytes(str, &size); - } - - if (*str != '\0') { - - return(FALSE); - } - } - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - } - - if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { - str += 3; - } - - if (size == 0) { - return(FALSE); - } - - i++; - - if (*str == ';') { - str++; - } else if (*str != '\0') { - - return(FALSE); - } - } - - if (i == 0) { - /* If innodb_data_file_path was defined it must contain - at least one data file definition */ - - return(FALSE); - } - - *data_file_names = (char**)ut_malloc(i * sizeof(void*)); - *data_file_sizes = (ulint*)ut_malloc(i * sizeof(ulint)); - *data_file_is_raw_partition = (ulint*)ut_malloc(i * sizeof(ulint)); - - *n_data_files = i; - - /* Then store the actual values to our arrays */ - - str = input_str; - i = 0; - - while (*str != '\0') { - path = str; - - /* Note that we must step over the ':' in a Windows path; - a Windows path normally looks like C:\ibdata\ibdata1:1G, but - a Windows raw partition may have a specification like - \\.\C::1Gnewraw or \\.\PHYSICALDRIVE2:1Gnewraw */ - - while ((*str != ':' && *str != '\0') - || (*str == ':' - && (*(str + 1) == '\\' || *(str + 1) == '/' - || *(str + 1) == ':'))) { - str++; - } - - if (*str == ':') { - /* Make path a null-terminated string */ - *str = '\0'; - str++; - } - - str = srv_parse_megabytes(str, &size); - - (*data_file_names)[i] = path; - (*data_file_sizes)[i] = size; - - if (0 == strncmp(str, ":autoextend", - (sizeof ":autoextend") - 1)) { - - *is_auto_extending = TRUE; - - str += (sizeof ":autoextend") - 1; - - if (0 == strncmp(str, ":max:", - (sizeof ":max:") - 1)) { - - str += (sizeof ":max:") - 1; - - str = srv_parse_megabytes( - str, max_auto_extend_size); - } - - if (*str != '\0') { - - return(FALSE); - } - } - - (*data_file_is_raw_partition)[i] = 0; - - if (strlen(str) >= 6 - && *str == 'n' - && *(str + 1) == 'e' - && *(str + 2) == 'w') { - str += 3; - (*data_file_is_raw_partition)[i] = SRV_NEW_RAW; - } - - if (*str == 'r' && *(str + 1) == 'a' && *(str + 2) == 'w') { - str += 3; - - if ((*data_file_is_raw_partition)[i] == 0) { - (*data_file_is_raw_partition)[i] = SRV_OLD_RAW; - } - } - - i++; - - if (*str == ';') { - str++; - } - } - - return(TRUE); -} - -/************************************************************************* -Reads log group home directories from a character string given in -the .cnf file. */ - -ibool -srv_parse_log_group_home_dirs( -/*==========================*/ - /* out: TRUE if ok, FALSE if parsing - error */ - char* str, /* in: character string */ - char*** log_group_home_dirs) /* out, own: log group home dirs */ -{ - char* input_str; - char* path; - ulint i = 0; - - input_str = str; - - /* First calculate the number of directories and check syntax: - path;path;... */ - - while (*str != '\0') { - path = str; - - while (*str != ';' && *str != '\0') { - str++; - } - - i++; - - if (*str == ';') { - str++; - } else if (*str != '\0') { - - return(FALSE); - } - } - - if (i != 1) { - /* If innodb_log_group_home_dir was defined it must - contain exactly one path definition under current MySQL */ - - return(FALSE); - } - - *log_group_home_dirs = (char**) ut_malloc(i * sizeof(void*)); - - /* Then store the actual values to our array */ - - str = input_str; - i = 0; - - while (*str != '\0') { - path = str; - - while (*str != ';' && *str != '\0') { - str++; - } - - if (*str == ';') { - *str = '\0'; - str++; - } - - (*log_group_home_dirs)[i] = path; - - i++; - } - - return(TRUE); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************ -I/o-handler thread function. */ -static - -os_thread_ret_t -io_handler_thread( -/*==============*/ - void* arg) -{ - ulint segment; - ulint i; - - segment = *((ulint*)arg); - -#ifdef UNIV_DEBUG_THREAD_CREATION - fprintf(stderr, "Io handler thread %lu starts, id %lu\n", segment, - os_thread_pf(os_thread_get_curr_id())); -#endif - for (i = 0;; i++) { - fil_aio_wait(segment); - - mutex_enter(&ios_mutex); - ios++; - mutex_exit(&ios_mutex); - } - - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. - The thread actually never comes here because it is exited in an - os_event_wait(). */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} -#endif /* !UNIV_HOTBACKUP */ - -#ifdef __WIN__ -#define SRV_PATH_SEPARATOR '\\' -#else -#define SRV_PATH_SEPARATOR '/' -#endif - -/************************************************************************* -Normalizes a directory path for Windows: converts slashes to backslashes. */ - -void -srv_normalize_path_for_win( -/*=======================*/ - char* str __attribute__((unused))) /* in/out: null-terminated - character string */ -{ -#ifdef __WIN__ - for (; *str; str++) { - - if (*str == '/') { - *str = '\\'; - } - } -#endif -} - -/************************************************************************* -Adds a slash or a backslash to the end of a string if it is missing -and the string is not empty. */ - -char* -srv_add_path_separator_if_needed( -/*=============================*/ - /* out: string which has the separator if the - string is not empty */ - char* str) /* in: null-terminated character string */ -{ - char* out_str; - ulint len = ut_strlen(str); - - if (len == 0 || str[len - 1] == SRV_PATH_SEPARATOR) { - - return(str); - } - - out_str = ut_malloc(len + 2); - memcpy(out_str, str, len); - out_str[len] = SRV_PATH_SEPARATOR; - out_str[len + 1] = 0; - - return(out_str); -} - -#ifndef UNIV_HOTBACKUP -/************************************************************************* -Calculates the low 32 bits when a file size which is given as a number -database pages is converted to the number of bytes. */ -static -ulint -srv_calc_low32( -/*===========*/ - /* out: low 32 bytes of file size when - expressed in bytes */ - ulint file_size) /* in: file size in database pages */ -{ - return(0xFFFFFFFFUL & (file_size << UNIV_PAGE_SIZE_SHIFT)); -} - -/************************************************************************* -Calculates the high 32 bits when a file size which is given as a number -database pages is converted to the number of bytes. */ -static -ulint -srv_calc_high32( -/*============*/ - /* out: high 32 bytes of file size when - expressed in bytes */ - ulint file_size) /* in: file size in database pages */ -{ - return(file_size >> (32 - UNIV_PAGE_SIZE_SHIFT)); -} - -/************************************************************************* -Creates or opens the log files and closes them. */ -static -ulint -open_or_create_log_file( -/*====================*/ - /* out: DB_SUCCESS or error code */ - ibool create_new_db, /* in: TRUE if we should create a - new database */ - ibool* log_file_created, /* out: TRUE if new log file - created */ - ibool log_file_has_been_opened,/* in: TRUE if a log file has been - opened before: then it is an error - to try to create another log file */ - ulint k, /* in: log group number */ - ulint i) /* in: log file number in group */ -{ - ibool ret; - ulint size; - ulint size_high; - char name[10000]; - - UT_NOT_USED(create_new_db); - - *log_file_created = FALSE; - - srv_normalize_path_for_win(srv_log_group_home_dirs[k]); - srv_log_group_home_dirs[k] = srv_add_path_separator_if_needed( - srv_log_group_home_dirs[k]); - - ut_a(strlen(srv_log_group_home_dirs[k]) - < (sizeof name) - 10 - sizeof "ib_logfile"); - sprintf(name, "%s%s%lu", srv_log_group_home_dirs[k], - "ib_logfile", (ulong) i); - - files[i] = os_file_create(name, OS_FILE_CREATE, OS_FILE_NORMAL, - OS_LOG_FILE, &ret); - if (ret == FALSE) { - if (os_file_get_last_error(FALSE) != OS_FILE_ALREADY_EXISTS -#ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have errno set - to 0 here, which causes our function to return 100; - work around that AIX problem */ - && os_file_get_last_error(FALSE) != 100 -#endif - ) { - fprintf(stderr, - "InnoDB: Error in creating" - " or opening %s\n", name); - - return(DB_ERROR); - } - - files[i] = os_file_create(name, OS_FILE_OPEN, OS_FILE_AIO, - OS_LOG_FILE, &ret); - if (!ret) { - fprintf(stderr, - "InnoDB: Error in opening %s\n", name); - - return(DB_ERROR); - } - - ret = os_file_get_size(files[i], &size, &size_high); - ut_a(ret); - - if (size != srv_calc_low32(srv_log_file_size) - || size_high != srv_calc_high32(srv_log_file_size)) { - - fprintf(stderr, - "InnoDB: Error: log file %s is" - " of different size %lu %lu bytes\n" - "InnoDB: than specified in the .cnf" - " file %lu %lu bytes!\n", - name, (ulong) size_high, (ulong) size, - (ulong) srv_calc_high32(srv_log_file_size), - (ulong) srv_calc_low32(srv_log_file_size)); - - return(DB_ERROR); - } - } else { - *log_file_created = TRUE; - - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Log file %s did not exist:" - " new to be created\n", - name); - if (log_file_has_been_opened) { - - return(DB_ERROR); - } - - fprintf(stderr, "InnoDB: Setting log file %s size to %lu MB\n", - name, (ulong) srv_log_file_size - >> (20 - UNIV_PAGE_SIZE_SHIFT)); - - fprintf(stderr, - "InnoDB: Database physically writes the file" - " full: wait...\n"); - - ret = os_file_set_size(name, files[i], - srv_calc_low32(srv_log_file_size), - srv_calc_high32(srv_log_file_size)); - if (!ret) { - fprintf(stderr, - "InnoDB: Error in creating %s:" - " probably out of disk space\n", - name); - - return(DB_ERROR); - } - } - - ret = os_file_close(files[i]); - ut_a(ret); - - if (i == 0) { - /* Create in memory the file space object - which is for this log group */ - - fil_space_create(name, - 2 * k + SRV_LOG_SPACE_FIRST_ID, FIL_LOG); - } - - ut_a(fil_validate()); - - fil_node_create(name, srv_log_file_size, - 2 * k + SRV_LOG_SPACE_FIRST_ID, FALSE); -#ifdef UNIV_LOG_ARCHIVE - /* If this is the first log group, create the file space object - for archived logs. - Under MySQL, no archiving ever done. */ - - if (k == 0 && i == 0) { - arch_space_id = 2 * k + 1 + SRV_LOG_SPACE_FIRST_ID; - - fil_space_create("arch_log_space", arch_space_id, FIL_LOG); - } else { - arch_space_id = ULINT_UNDEFINED; - } -#endif /* UNIV_LOG_ARCHIVE */ - if (i == 0) { - log_group_init(k, srv_n_log_files, - srv_log_file_size * UNIV_PAGE_SIZE, - 2 * k + SRV_LOG_SPACE_FIRST_ID, - SRV_LOG_SPACE_FIRST_ID + 1); /* dummy arch - space id */ - } - - return(DB_SUCCESS); -} - -/************************************************************************* -Creates or opens database data files and closes them. */ -static -ulint -open_or_create_data_files( -/*======================*/ - /* out: DB_SUCCESS or error code */ - ibool* create_new_db, /* out: TRUE if new database should be - created */ -#ifdef UNIV_LOG_ARCHIVE - ulint* min_arch_log_no,/* out: min of archived log numbers in data - files */ - ulint* max_arch_log_no,/* out: */ -#endif /* UNIV_LOG_ARCHIVE */ - dulint* min_flushed_lsn,/* out: min of flushed lsn values in data - files */ - dulint* max_flushed_lsn,/* out: */ - ulint* sum_of_new_sizes)/* out: sum of sizes of the new files added */ -{ - ibool ret; - ulint i; - ibool one_opened = FALSE; - ibool one_created = FALSE; - ulint size; - ulint size_high; - ulint rounded_size_pages; - char name[10000]; - - if (srv_n_data_files >= 1000) { - fprintf(stderr, "InnoDB: can only have < 1000 data files\n" - "InnoDB: you have defined %lu\n", - (ulong) srv_n_data_files); - return(DB_ERROR); - } - - *sum_of_new_sizes = 0; - - *create_new_db = FALSE; - - srv_normalize_path_for_win(srv_data_home); - srv_data_home = srv_add_path_separator_if_needed(srv_data_home); - - for (i = 0; i < srv_n_data_files; i++) { - srv_normalize_path_for_win(srv_data_file_names[i]); - - ut_a(strlen(srv_data_home) + strlen(srv_data_file_names[i]) - < (sizeof name) - 1); - sprintf(name, "%s%s", srv_data_home, srv_data_file_names[i]); - - if (srv_data_file_is_raw_partition[i] == 0) { - - /* First we try to create the file: if it already - exists, ret will get value FALSE */ - - files[i] = os_file_create(name, OS_FILE_CREATE, - OS_FILE_NORMAL, - OS_DATA_FILE, &ret); - - if (ret == FALSE && os_file_get_last_error(FALSE) - != OS_FILE_ALREADY_EXISTS -#ifdef UNIV_AIX - /* AIX 5.1 after security patch ML7 may have - errno set to 0 here, which causes our function - to return 100; work around that AIX problem */ - && os_file_get_last_error(FALSE) != 100 -#endif - ) { - fprintf(stderr, - "InnoDB: Error in creating" - " or opening %s\n", - name); - - return(DB_ERROR); - } - } else if (srv_data_file_is_raw_partition[i] == SRV_NEW_RAW) { - /* The partition is opened, not created; then it is - written over */ - - srv_start_raw_disk_in_use = TRUE; - srv_created_new_raw = TRUE; - - files[i] = os_file_create(name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, - OS_DATA_FILE, &ret); - if (!ret) { - fprintf(stderr, - "InnoDB: Error in opening %s\n", name); - - return(DB_ERROR); - } - } else if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - srv_start_raw_disk_in_use = TRUE; - - ret = FALSE; - } else { - ut_a(0); - } - - if (ret == FALSE) { - /* We open the data file */ - - if (one_created) { - fprintf(stderr, - "InnoDB: Error: data files can only" - " be added at the end\n"); - fprintf(stderr, - "InnoDB: of a tablespace, but" - " data file %s existed beforehand.\n", - name); - return(DB_ERROR); - } - - if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - files[i] = os_file_create( - name, OS_FILE_OPEN_RAW, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); - } else if (i == 0) { - files[i] = os_file_create( - name, OS_FILE_OPEN_RETRY, - OS_FILE_NORMAL, OS_DATA_FILE, &ret); - } else { - files[i] = os_file_create( - name, OS_FILE_OPEN, OS_FILE_NORMAL, - OS_DATA_FILE, &ret); - } - - if (!ret) { - fprintf(stderr, - "InnoDB: Error in opening %s\n", name); - os_file_get_last_error(TRUE); - - return(DB_ERROR); - } - - if (srv_data_file_is_raw_partition[i] == SRV_OLD_RAW) { - - goto skip_size_check; - } - - ret = os_file_get_size(files[i], &size, &size_high); - ut_a(ret); - /* Round size downward to megabytes */ - - rounded_size_pages - = (size / (1024 * 1024) + 4096 * size_high) - << (20 - UNIV_PAGE_SIZE_SHIFT); - - if (i == srv_n_data_files - 1 - && srv_auto_extend_last_data_file) { - - if (srv_data_file_sizes[i] > rounded_size_pages - || (srv_last_file_size_max > 0 - && srv_last_file_size_max - < rounded_size_pages)) { - - fprintf(stderr, - "InnoDB: Error: auto-extending" - " data file %s is" - " of a different size\n" - "InnoDB: %lu pages (rounded" - " down to MB) than specified" - " in the .cnf file:\n" - "InnoDB: initial %lu pages," - " max %lu (relevant if" - " non-zero) pages!\n", - name, - (ulong) rounded_size_pages, - (ulong) srv_data_file_sizes[i], - (ulong) - srv_last_file_size_max); - - return(DB_ERROR); - } - - srv_data_file_sizes[i] = rounded_size_pages; - } - - if (rounded_size_pages != srv_data_file_sizes[i]) { - - fprintf(stderr, - "InnoDB: Error: data file %s" - " is of a different size\n" - "InnoDB: %lu pages" - " (rounded down to MB)\n" - "InnoDB: than specified" - " in the .cnf file %lu pages!\n", - name, - (ulong) rounded_size_pages, - (ulong) srv_data_file_sizes[i]); - - return(DB_ERROR); - } -skip_size_check: - fil_read_flushed_lsn_and_arch_log_no( - files[i], one_opened, -#ifdef UNIV_LOG_ARCHIVE - min_arch_log_no, max_arch_log_no, -#endif /* UNIV_LOG_ARCHIVE */ - min_flushed_lsn, max_flushed_lsn); - one_opened = TRUE; - } else { - /* We created the data file and now write it full of - zeros */ - - one_created = TRUE; - - if (i > 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Data file %s did not" - " exist: new to be created\n", - name); - } else { - fprintf(stderr, - "InnoDB: The first specified" - " data file %s did not exist:\n" - "InnoDB: a new database" - " to be created!\n", name); - *create_new_db = TRUE; - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Setting file %s size to %lu MB\n", - name, - (ulong) (srv_data_file_sizes[i] - >> (20 - UNIV_PAGE_SIZE_SHIFT))); - - fprintf(stderr, - "InnoDB: Database physically writes the" - " file full: wait...\n"); - - ret = os_file_set_size( - name, files[i], - srv_calc_low32(srv_data_file_sizes[i]), - srv_calc_high32(srv_data_file_sizes[i])); - - if (!ret) { - fprintf(stderr, - "InnoDB: Error in creating %s:" - " probably out of disk space\n", name); - - return(DB_ERROR); - } - - *sum_of_new_sizes = *sum_of_new_sizes - + srv_data_file_sizes[i]; - } - - ret = os_file_close(files[i]); - ut_a(ret); - - if (i == 0) { - fil_space_create(name, 0, FIL_TABLESPACE); - } - - ut_a(fil_validate()); - - if (srv_data_file_is_raw_partition[i]) { - - fil_node_create(name, srv_data_file_sizes[i], 0, TRUE); - } else { - fil_node_create(name, srv_data_file_sizes[i], 0, - FALSE); - } - } - - ios = 0; - - mutex_create(&ios_mutex, SYNC_NO_ORDER_CHECK); - - return(DB_SUCCESS); -} - -/******************************************************************** -Starts InnoDB and creates a new database if database files -are not found and the user wants. Server parameters are -read from a file of name "srv_init" in the ib_home directory. */ - -int -innobase_start_or_create_for_mysql(void) -/*====================================*/ - /* out: DB_SUCCESS or error code */ -{ - buf_pool_t* ret; - ibool create_new_db; - ibool log_file_created; - ibool log_created = FALSE; - ibool log_opened = FALSE; - dulint min_flushed_lsn; - dulint max_flushed_lsn; -#ifdef UNIV_LOG_ARCHIVE - ulint min_arch_log_no; - ulint max_arch_log_no; -#endif /* UNIV_LOG_ARCHIVE */ - ulint sum_of_new_sizes; - ulint sum_of_data_file_sizes; - ulint tablespace_size_in_header; - ulint err; - ulint i; - ibool srv_file_per_table_original_value = srv_file_per_table; - mtr_t mtr; - ulint n_threads; -#ifdef HAVE_DARWIN_THREADS -# ifdef F_FULLFSYNC - /* This executable has been compiled on Mac OS X 10.3 or later. - Assume that F_FULLFSYNC is available at run-time. */ - srv_have_fullfsync = TRUE; -# else /* F_FULLFSYNC */ - /* This executable has been compiled on Mac OS X 10.2 - or earlier. Determine if the executable is running - on Mac OS X 10.3 or later. */ - struct utsname utsname; - if (uname(&utsname)) { - fputs("InnoDB: cannot determine Mac OS X version!\n", stderr); - } else { - srv_have_fullfsync = strcmp(utsname.release, "7.") >= 0; - } - if (!srv_have_fullfsync) { - fputs("InnoDB: On Mac OS X, fsync() may be" - " broken on internal drives,\n" - "InnoDB: making transactions unsafe!\n", stderr); - } -# endif /* F_FULLFSYNC */ -#endif /* HAVE_DARWIN_THREADS */ - - if (sizeof(ulint) != sizeof(void*)) { - fprintf(stderr, - "InnoDB: Error: size of InnoDB's ulint is %lu," - " but size of void* is %lu.\n" - "InnoDB: The sizes should be the same" - " so that on a 64-bit platform you can\n" - "InnoDB: allocate more than 4 GB of memory.", - (ulong)sizeof(ulint), (ulong)sizeof(void*)); - } - - srv_file_per_table = FALSE; /* system tables are created in tablespace - 0 */ -#ifdef UNIV_DEBUG - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n"); -#endif - -#ifdef UNIV_IBUF_DEBUG - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n" - "InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n"); -#endif - -#ifdef UNIV_SYNC_DEBUG - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n"); -#endif - -#ifdef UNIV_SEARCH_DEBUG - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_SEARCH_DEBUG switched on !!!!!!!!!\n"); -#endif - -#ifdef UNIV_MEM_DEBUG - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_MEM_DEBUG switched on !!!!!!!!!\n"); -#endif - -#ifdef UNIV_SIMULATE_AWE - fprintf(stderr, - "InnoDB: !!!!!!!! UNIV_SIMULATE_AWE switched on !!!!!!!!!\n"); -#endif - if (srv_sizeof_trx_t_in_ha_innodb_cc != (ulint)sizeof(trx_t)) { - fprintf(stderr, - "InnoDB: Error: trx_t size is %lu in ha_innodb.cc" - " but %lu in srv0start.c\n" - "InnoDB: Check that pthread_mutex_t is defined" - " in the same way in these\n" - "InnoDB: compilation modules. Cannot continue.\n", - (ulong) srv_sizeof_trx_t_in_ha_innodb_cc, - (ulong) sizeof(trx_t)); - return(DB_ERROR); - } - -#ifdef UNIV_DISABLE_MEM_POOL - fprintf(stderr, - "InnoDB: The InnoDB memory heap has been disabled.\n"); -#endif - -#ifdef UNIV_SYNC_ATOMIC - fprintf(stderr, - "InnoDB: Mutex and rw_lock use atomics.\n"); -#endif - - /* Since InnoDB does not currently clean up all its internal data - structures in MySQL Embedded Server Library server_end(), we - print an error message if someone tries to start up InnoDB a - second time during the process lifetime. */ - - if (srv_start_has_been_called) { - fprintf(stderr, - "InnoDB: Error:startup called second time" - " during the process lifetime.\n" - "InnoDB: In the MySQL Embedded Server Library" - " you cannot call server_init()\n" - "InnoDB: more than once during" - " the process lifetime.\n"); - } - - srv_start_has_been_called = TRUE; - -#ifdef UNIV_DEBUG - log_do_write = TRUE; -#endif /* UNIV_DEBUG */ - /* yydebug = TRUE; */ - - srv_is_being_started = TRUE; - srv_startup_is_before_trx_rollback_phase = TRUE; - os_aio_use_native_aio = FALSE; - -#if !defined(__WIN2000__) && !defined(UNIV_SIMULATE_AWE) - if (srv_use_awe) { - - fprintf(stderr, - "InnoDB: Error: You have specified" - " innodb_buffer_pool_awe_mem_mb\n" - "InnoDB: in my.cnf, but AWE can only" - " be used in Windows 2000 and later.\n" - "InnoDB: To use AWE, InnoDB must" - " be compiled with __WIN2000__ defined.\n"); - - return(DB_ERROR); - } -#endif - -#ifdef __WIN__ - if (os_get_os_version() == OS_WIN95 - || os_get_os_version() == OS_WIN31 - || os_get_os_version() == OS_WINNT) { - - /* On Win 95, 98, ME, Win32 subsystem for Windows 3.1, - and NT use simulated aio. In NT Windows provides async i/o, - but when run in conjunction with InnoDB Hot Backup, it seemed - to corrupt the data files. */ - - os_aio_use_native_aio = FALSE; - } else { - /* On Win 2000 and XP use async i/o */ - os_aio_use_native_aio = TRUE; - } -#endif - if (srv_file_flush_method_str == NULL) { - /* These are the default options */ - - srv_unix_file_flush_method = SRV_UNIX_FSYNC; - - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; -#ifndef __WIN__ - } else if (0 == ut_strcmp(srv_file_flush_method_str, "fsync")) { - srv_unix_file_flush_method = SRV_UNIX_FSYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DSYNC")) { - srv_unix_file_flush_method = SRV_UNIX_O_DSYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "O_DIRECT")) { - srv_unix_file_flush_method = SRV_UNIX_O_DIRECT; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "littlesync")) { - srv_unix_file_flush_method = SRV_UNIX_LITTLESYNC; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "nosync")) { - srv_unix_file_flush_method = SRV_UNIX_NOSYNC; -#else - } else if (0 == ut_strcmp(srv_file_flush_method_str, "normal")) { - srv_win_file_flush_method = SRV_WIN_IO_NORMAL; - os_aio_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, "unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; - os_aio_use_native_aio = FALSE; - - } else if (0 == ut_strcmp(srv_file_flush_method_str, - "async_unbuffered")) { - srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED; -#endif - } else { - fprintf(stderr, - "InnoDB: Unrecognized value %s for" - " innodb_flush_method\n", - srv_file_flush_method_str); - return(DB_ERROR); - } - - /* Note that the call srv_boot() also changes the values of - srv_pool_size etc. to the units used by InnoDB internally */ - - /* Set the maximum number of threads which can wait for a semaphore - inside InnoDB: this is the 'sync wait array' size, as well as the - maximum number of threads that can wait in the 'srv_conc array' for - their time to enter InnoDB. */ - -#if defined(__NETWARE__) - - /* Create less event semaphores because Win 98/ME had - difficulty creating 40000 event semaphores. Comment from - Novell, Inc.: also, these just take a lot of memory on - NetWare. */ - srv_max_n_threads = 1000; -#else - if (srv_pool_size >= 1000 * 1024) { - /* Here we still have srv_pool_size counted - in kilobytes (in 4.0 this was in bytes) - srv_boot() converts the value to - pages; if buffer pool is less than 1000 MB, - assume fewer threads. */ - srv_max_n_threads = 50000; - - } else if (srv_pool_size >= 8 * 1024) { - - srv_max_n_threads = 10000; - } else { - srv_max_n_threads = 1000; /* saves several MB of memory, - especially in 64-bit - computers */ - } -#endif - err = srv_boot(); /* This changes srv_pool_size to units of a page */ - - if (err != DB_SUCCESS) { - - return((int) err); - } - - mutex_create(&srv_monitor_file_mutex, SYNC_NO_ORDER_CHECK); - - if (srv_innodb_status) { - srv_monitor_file_name = mem_alloc( - strlen(fil_path_to_mysql_datadir) - + 20 + sizeof "/innodb_status."); - sprintf(srv_monitor_file_name, "%s/innodb_status.%lu", - fil_path_to_mysql_datadir, os_proc_get_number()); - srv_monitor_file = fopen(srv_monitor_file_name, "w+"); - if (!srv_monitor_file) { - fprintf(stderr, "InnoDB: unable to create %s: %s\n", - srv_monitor_file_name, strerror(errno)); - return(DB_ERROR); - } - } else { - srv_monitor_file_name = NULL; - srv_monitor_file = os_file_create_tmpfile(); - if (!srv_monitor_file) { - return(DB_ERROR); - } - } - - mutex_create(&srv_dict_tmpfile_mutex, SYNC_DICT_OPERATION); - - srv_dict_tmpfile = os_file_create_tmpfile(); - if (!srv_dict_tmpfile) { - return(DB_ERROR); - } - - mutex_create(&srv_misc_tmpfile_mutex, SYNC_ANY_LATCH); - - srv_misc_tmpfile = os_file_create_tmpfile(); - if (!srv_misc_tmpfile) { - return(DB_ERROR); - } - -#ifdef __WIN__ - /* - Need to hardcode this to 1 read and 1 write on Windows - while searching for problem causing this to crash when - higher number of threads are supported. - */ - srv_n_read_io_threads = srv_n_write_io_threads = 1; -#endif - /* Restrict the maximum number of file i/o threads */ - if ((srv_n_read_io_threads + srv_n_write_io_threads) > SRV_MAX_N_IO_THREADS) { - fprintf(stderr, - "InnoDB: requested too many read(%d) or write(%d) IO threads, max is %d\n", - (int)srv_n_read_io_threads, - (int)srv_n_write_io_threads, - SRV_MAX_N_IO_THREADS); - return(DB_ERROR); - } - - if (!os_aio_use_native_aio) { - /* More than 4 threads are now supported. */ - n_threads = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD, - srv_n_read_io_threads, - srv_n_write_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); - } else { - /* Might need more slots here. Alas, I don't do windows. */ - n_threads = os_aio_init(SRV_N_PENDING_IOS_PER_THREAD, - srv_n_read_io_threads, - srv_n_write_io_threads, - SRV_MAX_N_PENDING_SYNC_IOS); - } - - if (n_threads > SRV_MAX_N_IO_THREADS) { - fprintf(stderr, - "InnoDB: requested too many IO threads(%d), max is %d\n", - (int)n_threads, SRV_MAX_N_IO_THREADS); - return(DB_ERROR); - } - - fil_init(srv_max_n_open_files); - - if (srv_use_awe) { - fprintf(stderr, - "InnoDB: Using AWE: Memory window is %lu MB" - " and AWE memory is %lu MB\n", - (ulong) (srv_awe_window_size / ((1024 * 1024) - / UNIV_PAGE_SIZE)), - (ulong) (srv_pool_size / ((1024 * 1024) - / UNIV_PAGE_SIZE))); - - /* We must disable adaptive hash indexes because they do not - tolerate remapping of pages in AWE */ - - srv_use_adaptive_hash_indexes = FALSE; - ret = buf_pool_init(srv_pool_size, srv_pool_size, - srv_awe_window_size); - } else { - ret = buf_pool_init(srv_pool_size, srv_pool_size, - srv_pool_size); - } - - if (ret == NULL) { - fprintf(stderr, - "InnoDB: Fatal error: cannot allocate the memory" - " for the buffer pool\n"); - - return(DB_ERROR); - } - - fsp_init(); - log_init(); - - lock_sys_create(srv_lock_table_size); - - /* Create i/o-handler threads: */ - - for (i = 0; i < n_threads; i++) { - n[i] = i; - - os_thread_create(io_handler_thread, n + i, thread_ids + i); - } - -#ifdef UNIV_LOG_ARCHIVE - if (0 != ut_strcmp(srv_log_group_home_dirs[0], srv_arch_dir)) { - fprintf(stderr, - "InnoDB: Error: you must set the log group" - " home dir in my.cnf the\n" - "InnoDB: same as log arch dir.\n"); - - return(DB_ERROR); - } -#endif /* UNIV_LOG_ARCHIVE */ - - if (srv_n_log_files * srv_log_file_size >= 262144) { - fprintf(stderr, - "InnoDB: Error: combined size of log files" - " must be < 4 GB\n"); - - return(DB_ERROR); - } - - sum_of_new_sizes = 0; - - for (i = 0; i < srv_n_data_files; i++) { -#ifndef __WIN__ - if (sizeof(off_t) < 5 && srv_data_file_sizes[i] >= 262144) { - fprintf(stderr, - "InnoDB: Error: file size must be < 4 GB" - " with this MySQL binary\n" - "InnoDB: and operating system combination," - " in some OS's < 2 GB\n"); - - return(DB_ERROR); - } -#endif - sum_of_new_sizes += srv_data_file_sizes[i]; - } - - if (sum_of_new_sizes < 640) { - fprintf(stderr, - "InnoDB: Error: tablespace size must be" - " at least 10 MB\n"); - - return(DB_ERROR); - } - - err = open_or_create_data_files(&create_new_db, -#ifdef UNIV_LOG_ARCHIVE - &min_arch_log_no, &max_arch_log_no, -#endif /* UNIV_LOG_ARCHIVE */ - &min_flushed_lsn, &max_flushed_lsn, - &sum_of_new_sizes); - if (err != DB_SUCCESS) { - fprintf(stderr, - "InnoDB: Could not open or create data files.\n" - "InnoDB: If you tried to add new data files," - " and it failed here,\n" - "InnoDB: you should now edit innodb_data_file_path" - " in my.cnf back\n" - "InnoDB: to what it was, and remove the" - " new ibdata files InnoDB created\n" - "InnoDB: in this failed attempt. InnoDB only wrote" - " those files full of\n" - "InnoDB: zeros, but did not yet use them in any way." - " But be careful: do not\n" - "InnoDB: remove old data files" - " which contain your precious data!\n"); - - return((int) err); - } - -#ifdef UNIV_LOG_ARCHIVE - srv_normalize_path_for_win(srv_arch_dir); - srv_arch_dir = srv_add_path_separator_if_needed(srv_arch_dir); -#endif /* UNIV_LOG_ARCHIVE */ - - for (i = 0; i < srv_n_log_files; i++) { - err = open_or_create_log_file(create_new_db, &log_file_created, - log_opened, 0, i); - if (err != DB_SUCCESS) { - - return((int) err); - } - - if (log_file_created) { - log_created = TRUE; - } else { - log_opened = TRUE; - } - if ((log_opened && create_new_db) - || (log_opened && log_created)) { - fprintf(stderr, - "InnoDB: Error: all log files must be" - " created at the same time.\n" - "InnoDB: All log files must be" - " created also in database creation.\n" - "InnoDB: If you want bigger or smaller" - " log files, shut down the\n" - "InnoDB: database and make sure there" - " were no errors in shutdown.\n" - "InnoDB: Then delete the existing log files." - " Edit the .cnf file\n" - "InnoDB: and start the database again.\n"); - - return(DB_ERROR); - } - } - - /* Open all log files and data files in the system tablespace: we - keep them open until database shutdown */ - - fil_open_log_and_system_tablespace_files(); - - if (log_created && !create_new_db -#ifdef UNIV_LOG_ARCHIVE - && !srv_archive_recovery -#endif /* UNIV_LOG_ARCHIVE */ - ) { - if (ut_dulint_cmp(max_flushed_lsn, min_flushed_lsn) != 0 -#ifdef UNIV_LOG_ARCHIVE - || max_arch_log_no != min_arch_log_no -#endif /* UNIV_LOG_ARCHIVE */ - ) { - fprintf(stderr, - "InnoDB: Cannot initialize created" - " log files because\n" - "InnoDB: data files were not in sync" - " with each other\n" - "InnoDB: or the data files are corrupt.\n"); - - return(DB_ERROR); - } - - if (ut_dulint_cmp(max_flushed_lsn, ut_dulint_create(0, 1000)) - < 0) { - fprintf(stderr, - "InnoDB: Cannot initialize created" - " log files because\n" - "InnoDB: data files are corrupt," - " or new data files were\n" - "InnoDB: created when the database" - " was started previous\n" - "InnoDB: time but the database" - " was not shut down\n" - "InnoDB: normally after that.\n"); - - return(DB_ERROR); - } - - mutex_enter(&(log_sys->mutex)); - -#ifdef UNIV_LOG_ARCHIVE - /* Do not + 1 arch_log_no because we do not use log - archiving */ - recv_reset_logs(max_flushed_lsn, max_arch_log_no, TRUE); -#else - recv_reset_logs(max_flushed_lsn, TRUE); -#endif /* UNIV_LOG_ARCHIVE */ - - mutex_exit(&(log_sys->mutex)); - } - - if (create_new_db) { - mtr_start(&mtr); - - fsp_header_init(0, sum_of_new_sizes, &mtr); - - mtr_commit(&mtr); - - trx_sys_create(); - dict_create(); - srv_startup_is_before_trx_rollback_phase = FALSE; - -#ifdef UNIV_LOG_ARCHIVE - } else if (srv_archive_recovery) { - fprintf(stderr, - "InnoDB: Starting archive" - " recovery from a backup...\n"); - err = recv_recovery_from_archive_start( - min_flushed_lsn, srv_archive_recovery_limit_lsn, - min_arch_log_no); - if (err != DB_SUCCESS) { - - return(DB_ERROR); - } - /* Since ibuf init is in dict_boot, and ibuf is needed - in any disk i/o, first call dict_boot */ - - dict_boot(); - trx_sys_init_at_db_start(); - srv_startup_is_before_trx_rollback_phase = FALSE; - - /* Initialize the fsp free limit global variable in the log - system */ - fsp_header_get_free_limit(0); - - recv_recovery_from_archive_finish(); -#endif /* UNIV_LOG_ARCHIVE */ - } else { - /* We always try to do a recovery, even if the database had - been shut down normally: this is the normal startup path */ - - err = recv_recovery_from_checkpoint_start(LOG_CHECKPOINT, - ut_dulint_max, - min_flushed_lsn, - max_flushed_lsn); - if (err != DB_SUCCESS) { - - return(DB_ERROR); - } - - /* Since the insert buffer init is in dict_boot, and the - insert buffer is needed in any disk i/o, first we call - dict_boot(). Note that trx_sys_init_at_db_start() only needs - to access space 0, and the insert buffer at this stage already - works for space 0. */ - - dict_boot(); - trx_sys_init_at_db_start(); - - if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) { - /* The following call is necessary for the insert - buffer to work with multiple tablespaces. We must - know the mapping between space id's and .ibd file - names. - - In a crash recovery, we check that the info in data - dictionary is consistent with what we already know - about space id's from the call of - fil_load_single_table_tablespaces(). - - In a normal startup, we create the space objects for - every table in the InnoDB data dictionary that has - an .ibd file. - - We also determine the maximum tablespace id used. - - TODO: We may have incomplete transactions in the - data dictionary tables. Does that harm the scanning of - the data dictionary below? */ - - dict_check_tablespaces_and_store_max_id( - recv_needed_recovery); - } - - srv_startup_is_before_trx_rollback_phase = FALSE; - - /* Initialize the fsp free limit global variable in the log - system */ - fsp_header_get_free_limit(0); - - /* recv_recovery_from_checkpoint_finish needs trx lists which - are initialized in trx_sys_init_at_db_start(). */ - - recv_recovery_from_checkpoint_finish(); - } - - if (!create_new_db && sum_of_new_sizes > 0) { - /* New data file(s) were added */ - mtr_start(&mtr); - - fsp_header_inc_size(0, sum_of_new_sizes, &mtr); - - mtr_commit(&mtr); - - /* Immediately write the log record about increased tablespace - size to disk, so that it is durable even if mysqld would crash - quickly */ - - log_buffer_flush_to_disk(); - } - -#ifdef UNIV_LOG_ARCHIVE - /* Archiving is always off under MySQL */ - if (!srv_log_archive_on) { - ut_a(DB_SUCCESS == log_archive_noarchivelog()); - } else { - mutex_enter(&(log_sys->mutex)); - - start_archive = FALSE; - - if (log_sys->archiving_state == LOG_ARCH_OFF) { - start_archive = TRUE; - } - - mutex_exit(&(log_sys->mutex)); - - if (start_archive) { - ut_a(DB_SUCCESS == log_archive_archivelog()); - } - } -#endif /* UNIV_LOG_ARCHIVE */ - - /* fprintf(stderr, "Max allowed record size %lu\n", - page_get_free_space_of_empty() / 2); */ - - /* Create the thread which watches the timeouts for lock waits - and prints InnoDB monitor info */ - - os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL, - thread_ids + 2 + SRV_MAX_N_IO_THREADS); - - /* Create the thread which warns of long semaphore waits */ - os_thread_create(&srv_error_monitor_thread, NULL, - thread_ids + 3 + SRV_MAX_N_IO_THREADS); - srv_was_started = TRUE; - srv_is_being_started = FALSE; - - if (trx_doublewrite == NULL) { - /* Create the doublewrite buffer to a new tablespace */ - - trx_sys_create_doublewrite_buf(); - } - - err = dict_create_or_check_foreign_constraint_tables(); - - if (err != DB_SUCCESS) { - return((int)DB_ERROR); - } - - /* Create the master thread which does purge and other utility - operations */ - - os_thread_create(&srv_master_thread, NULL, thread_ids - + (1 + SRV_MAX_N_IO_THREADS)); -#ifdef UNIV_DEBUG - /* buf_debug_prints = TRUE; */ -#endif /* UNIV_DEBUG */ - sum_of_data_file_sizes = 0; - - for (i = 0; i < srv_n_data_files; i++) { - sum_of_data_file_sizes += srv_data_file_sizes[i]; - } - - tablespace_size_in_header = fsp_header_get_tablespace_size(0); - - if (!srv_auto_extend_last_data_file - && sum_of_data_file_sizes != tablespace_size_in_header) { - - fprintf(stderr, - "InnoDB: Error: tablespace size" - " stored in header is %lu pages, but\n" - "InnoDB: the sum of data file sizes is %lu pages\n", - (ulong) tablespace_size_in_header, - (ulong) sum_of_data_file_sizes); - - if (srv_force_recovery == 0 - && sum_of_data_file_sizes < tablespace_size_in_header) { - /* This is a fatal error, the tail of a tablespace is - missing */ - - fprintf(stderr, - "InnoDB: Cannot start InnoDB." - " The tail of the system tablespace is\n" - "InnoDB: missing. Have you edited" - " innodb_data_file_path in my.cnf in an\n" - "InnoDB: inappropriate way, removing" - " ibdata files from there?\n" - "InnoDB: You can set innodb_force_recovery=1" - " in my.cnf to force\n" - "InnoDB: a startup if you are trying" - " to recover a badly corrupt database.\n"); - - return(DB_ERROR); - } - } - - if (srv_auto_extend_last_data_file - && sum_of_data_file_sizes < tablespace_size_in_header) { - - fprintf(stderr, - "InnoDB: Error: tablespace size stored in header" - " is %lu pages, but\n" - "InnoDB: the sum of data file sizes" - " is only %lu pages\n", - (ulong) tablespace_size_in_header, - (ulong) sum_of_data_file_sizes); - - if (srv_force_recovery == 0) { - - fprintf(stderr, - "InnoDB: Cannot start InnoDB. The tail of" - " the system tablespace is\n" - "InnoDB: missing. Have you edited" - " innodb_data_file_path in my.cnf in an\n" - "InnoDB: inappropriate way, removing" - " ibdata files from there?\n" - "InnoDB: You can set innodb_force_recovery=1" - " in my.cnf to force\n" - "InnoDB: a startup if you are trying to" - " recover a badly corrupt database.\n"); - - return(DB_ERROR); - } - } - - /* Check that os_fast_mutexes work as expected */ - os_fast_mutex_init(&srv_os_test_mutex); - - if (0 != os_fast_mutex_trylock(&srv_os_test_mutex)) { - fprintf(stderr, - "InnoDB: Error: pthread_mutex_trylock returns" - " an unexpected value on\n" - "InnoDB: success! Cannot continue.\n"); - exit(1); - } - - os_fast_mutex_unlock(&srv_os_test_mutex); - - os_fast_mutex_lock(&srv_os_test_mutex); - - os_fast_mutex_unlock(&srv_os_test_mutex); - - os_fast_mutex_free(&srv_os_test_mutex); - - if (srv_print_verbose_log) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Started; log sequence number %lu %lu\n", - (ulong) ut_dulint_get_high(srv_start_lsn), - (ulong) ut_dulint_get_low(srv_start_lsn)); - } - - if (srv_force_recovery > 0) { - fprintf(stderr, - "InnoDB: !!! innodb_force_recovery" - " is set to %lu !!!\n", - (ulong) srv_force_recovery); - } - - fflush(stderr); - - if (trx_doublewrite_must_reset_space_ids) { - /* Actually, we did not change the undo log format between - 4.0 and 4.1.1, and we would not need to run purge to - completion. Note also that the purge algorithm in 4.1.1 - can process the the history list again even after a full - purge, because our algorithm does not cut the end of the - history list in all cases so that it would become empty - after a full purge. That mean that we may purge 4.0 type - undo log even after this phase. - - The insert buffer record format changed between 4.0 and - 4.1.1. It is essential that the insert buffer is emptied - here! */ - - fprintf(stderr, - "InnoDB: You are upgrading to an" - " InnoDB version which allows multiple\n" - "InnoDB: tablespaces. Wait that purge" - " and insert buffer merge run to\n" - "InnoDB: completion...\n"); - for (;;) { - os_thread_sleep(1000000); - - if (0 == strcmp(srv_main_thread_op_info, - "waiting for server activity")) { - - ut_a(ibuf_is_empty()); - - break; - } - } - fprintf(stderr, - "InnoDB: Full purge and insert buffer merge" - " completed.\n"); - - trx_sys_mark_upgraded_to_multiple_tablespaces(); - - fprintf(stderr, - "InnoDB: You have now successfully upgraded" - " to the multiple tablespaces\n" - "InnoDB: format. You should NOT DOWNGRADE" - " to an earlier version of\n" - "InnoDB: InnoDB! But if you absolutely need to" - " downgrade, see\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "multiple-tablespaces.html\n" - "InnoDB: for instructions.\n"); - } - - if (srv_force_recovery == 0) { - /* In the insert buffer we may have even bigger tablespace - id's, because we may have dropped those tablespaces, but - insert buffer merge has not had time to clean the records from - the ibuf tree. */ - - ibuf_update_max_tablespace_id(); - } - - srv_file_per_table = srv_file_per_table_original_value; - - return((int) DB_SUCCESS); -} - -/******************************************************************** -Shuts down the InnoDB database. */ - -int -innobase_shutdown_for_mysql(void) -/*=============================*/ - /* out: DB_SUCCESS or error code */ -{ - ulint i; -#ifdef __NETWARE__ - extern ibool panic_shutdown; -#endif - if (!srv_was_started) { - if (srv_is_being_started) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: shutting down" - " a not properly started\n" - "InnoDB: or created database!\n"); - } - - return(DB_SUCCESS); - } - - /* 1. Flush the buffer pool to disk, write the current lsn to - the tablespace header(s), and copy all log data to archive. - The step 1 is the real InnoDB shutdown. The remaining steps 2 - ... - just free data structures after the shutdown. */ - - - if (srv_fast_shutdown == 2) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: MySQL has requested a very fast shutdown" - " without flushing " - "the InnoDB buffer pool to data files." - " At the next mysqld startup " - "InnoDB will do a crash recovery!\n"); - } - -#ifdef __NETWARE__ - if(!panic_shutdown) -#endif - logs_empty_and_mark_files_at_shutdown(); - - if (srv_conc_n_threads != 0) { - fprintf(stderr, - "InnoDB: Warning: query counter shows %ld queries" - " still\n" - "InnoDB: inside InnoDB at shutdown\n", - srv_conc_n_threads); - } - - /* 2. Make all threads created by InnoDB to exit */ - - srv_shutdown_state = SRV_SHUTDOWN_EXIT_THREADS; - - /* In a 'very fast' shutdown, we do not need to wait for these threads - to die; all which counts is that we flushed the log; a 'very fast' - shutdown is essentially a crash. */ - - if (srv_fast_shutdown == 2) { - return(DB_SUCCESS); - } - - /* All threads end up waiting for certain events. Put those events - to the signaled state. Then the threads will exit themselves in - os_thread_event_wait(). */ - - for (i = 0; i < 1000; i++) { - /* NOTE: IF YOU CREATE THREADS IN INNODB, YOU MUST EXIT THEM - HERE OR EARLIER */ - - /* a. Let the lock timeout thread exit */ - os_event_set(srv_lock_timeout_thread_event); - - /* b. srv error monitor thread exits automatically, no need - to do anything here */ - - /* c. We wake the master thread so that it exits */ - srv_wake_master_thread(); - - /* d. Exit the i/o threads */ - - os_aio_wake_all_threads_at_shutdown(); - - os_mutex_enter(os_sync_mutex); - - if (os_thread_count == 0) { - /* All the threads have exited or are just exiting; - NOTE that the threads may not have completed their - exit yet. Should we use pthread_join() to make sure - they have exited? Now we just sleep 0.1 seconds and - hope that is enough! */ - - os_mutex_exit(os_sync_mutex); - - os_thread_sleep(100000); - - break; - } - - os_mutex_exit(os_sync_mutex); - - os_thread_sleep(100000); - } - - if (i == 1000) { - fprintf(stderr, - "InnoDB: Warning: %lu threads created by InnoDB" - " had not exited at shutdown!\n", - (ulong) os_thread_count); - } - - if (srv_monitor_file) { - fclose(srv_monitor_file); - srv_monitor_file = 0; - if (srv_monitor_file_name) { - unlink(srv_monitor_file_name); - mem_free(srv_monitor_file_name); - } - } - if (srv_dict_tmpfile) { - fclose(srv_dict_tmpfile); - srv_dict_tmpfile = 0; - } - - if (srv_misc_tmpfile) { - fclose(srv_misc_tmpfile); - srv_misc_tmpfile = 0; - } - - mutex_free(&srv_monitor_file_mutex); - mutex_free(&srv_dict_tmpfile_mutex); - mutex_free(&srv_misc_tmpfile_mutex); - - /* 3. Free all InnoDB's own mutexes and the os_fast_mutexes inside - them */ - sync_close(); - - /* 4. Free the os_conc_mutex and all os_events and os_mutexes */ - - srv_free(); - os_sync_free(); - - /* Check that all read views are closed except read view owned - by a purge. */ - - if (UT_LIST_GET_LEN(trx_sys->view_list) > 1) { - fprintf(stderr, - "InnoDB: Error: all read views were not closed" - " before shutdown:\n" - "InnoDB: %lu read views open \n", - UT_LIST_GET_LEN(trx_sys->view_list) - 1); - } - - /* 5. Free all allocated memory and the os_fast_mutex created in - ut0mem.c */ - - ut_free_all_mem(); - - if (os_thread_count != 0 - || os_event_count != 0 - || os_mutex_count != 0 - || os_fast_mutex_count != 0) { - fprintf(stderr, - "InnoDB: Warning: some resources were not" - " cleaned up in shutdown:\n" - "InnoDB: threads %lu, events %lu," - " os_mutexes %lu, os_fast_mutexes %lu\n", - (ulong) os_thread_count, (ulong) os_event_count, - (ulong) os_mutex_count, (ulong) os_fast_mutex_count); - } - - if (dict_foreign_err_file) { - fclose(dict_foreign_err_file); - } - if (lock_latest_err_file) { - fclose(lock_latest_err_file); - } - - if (srv_print_verbose_log) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Shutdown completed;" - " log sequence number %lu %lu\n", - (ulong) ut_dulint_get_high(srv_shutdown_lsn), - (ulong) ut_dulint_get_low(srv_shutdown_lsn)); - } - - return((int) DB_SUCCESS); -} - -#ifdef __NETWARE__ -void set_panic_flag_for_netware() -{ - extern ibool panic_shutdown; - panic_shutdown = TRUE; -} -#endif /* __NETWARE__ */ -#endif /* !UNIV_HOTBACKUP */ diff --git a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c deleted file mode 100644 index bb64ac07342..00000000000 --- a/storage/innobase/sync/sync0arr.c +++ /dev/null @@ -1,1021 +0,0 @@ -/****************************************************** -The wait array used in synchronization primitives - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0arr.h" -#ifdef UNIV_NONINL -#include "sync0arr.ic" -#endif - -#include "sync0sync.h" -#include "sync0rw.h" -#include "os0sync.h" -#include "os0file.h" -#include "srv0srv.h" - -/* - WAIT ARRAY - ========== - -The wait array consists of cells each of which has an -an operating system event object created for it. The threads -waiting for a mutex, for example, can reserve a cell -in the array and suspend themselves to wait for the event -to become signaled. When using the wait array, remember to make -sure that some thread holding the synchronization object -will eventually know that there is a waiter in the array and -signal the object, to prevent infinite wait. -Why we chose to implement a wait array? First, to make -mutexes fast, we had to code our own implementation of them, -which only in usually uncommon cases resorts to using -slow operating system primitives. Then we had the choice of -assigning a unique OS event for each mutex, which would -be simpler, or using a global wait array. In some operating systems, -the global wait array solution is more efficient and flexible, -because we can do with a very small number of OS events, -say 200. In NT 3.51, allocating events seems to be a quadratic -algorithm, because 10 000 events are created fast, but -100 000 events takes a couple of minutes to create. - -As of 5.0.30 the above mentioned design is changed. Since now -OS can handle millions of wait events efficiently, we no longer -have this concept of each cell of wait array having one event. -Instead, now the event that a thread wants to wait on is embedded -in the wait object (mutex or rw_lock). We still keep the global -wait array for the sake of diagnostics and also to avoid infinite -wait The error_monitor thread scans the global wait array to signal -any waiting threads who have missed the signal. */ - -/* A cell where an individual thread may wait suspended -until a resource is released. The suspending is implemented -using an operating system event semaphore. */ -struct sync_cell_struct { - void* wait_object; /* pointer to the object the - thread is waiting for; if NULL - the cell is free for use */ - mutex_t* old_wait_mutex; /* the latest wait mutex in cell */ - rw_lock_t* old_wait_rw_lock;/* the latest wait rw-lock in cell */ - ulint request_type; /* lock type requested on the - object */ - const char* file; /* in debug version file where - requested */ - ulint line; /* in debug version line where - requested */ - os_thread_id_t thread; /* thread id of this waiting - thread */ - ibool waiting; /* TRUE if the thread has already - called sync_array_event_wait - on this cell */ - ib_longlong signal_count; /* We capture the signal_count - of the wait_object when we - reset the event. This value is - then passed on to os_event_wait - and we wait only if the event - has not been signalled in the - period between the reset and - wait call. */ - time_t reservation_time;/* time when the thread reserved - the wait cell */ -}; - -/* NOTE: It is allowed for a thread to wait -for an event allocated for the array without owning the -protecting mutex (depending on the case: OS or database mutex), but -all changes (set or reset) to the state of the event must be made -while owning the mutex. */ -struct sync_array_struct { - ulint n_reserved; /* number of currently reserved - cells in the wait array */ - ulint n_cells; /* number of cells in the - wait array */ - sync_cell_t* array; /* pointer to wait array */ - ulint protection; /* this flag tells which - mutex protects the data */ - mutex_t mutex; /* possible database mutex - protecting this data structure */ - os_mutex_t os_mutex; /* Possible operating system mutex - protecting the data structure. - As this data structure is used in - constructing the database mutex, - to prevent infinite recursion - in implementation, we fall back to - an OS mutex. */ - ulint sg_count; /* count of how many times an - object has been signalled */ - ulint res_count; /* count of cell reservations - since creation of the array */ -}; - -/* Counts the number of times that sync_arr_wake_threads_if_sema_free has - * found a thread that can run because it may have missed a wakeup signal. */ -ulint sync_wake_ups = 0; - -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -This function is called only in the debug version. Detects a deadlock -of one or more threads because of waits of semaphores. */ -static -ibool -sync_array_detect_deadlock( -/*=======================*/ - /* out: TRUE if deadlock detected */ - sync_array_t* arr, /* in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /* in: cell where recursive search started */ - sync_cell_t* cell, /* in: cell to search */ - ulint depth); /* in: recursion depth */ -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************* -Gets the nth cell in array. */ -static -sync_cell_t* -sync_array_get_nth_cell( -/*====================*/ - /* out: cell */ - sync_array_t* arr, /* in: sync array */ - ulint n) /* in: index */ -{ - ut_a(arr); - ut_a(n < arr->n_cells); - - return(arr->array + n); -} - -/********************************************************************** -Reserves the mutex semaphore protecting a sync array. */ -static -void -sync_array_enter( -/*=============*/ - sync_array_t* arr) /* in: sync wait array */ -{ - ulint protection; - - protection = arr->protection; - - if (protection == SYNC_ARRAY_OS_MUTEX) { - os_mutex_enter(arr->os_mutex); - } else if (protection == SYNC_ARRAY_MUTEX) { - mutex_enter(&(arr->mutex)); - } else { - ut_error; - } -} - -/********************************************************************** -Releases the mutex semaphore protecting a sync array. */ -static -void -sync_array_exit( -/*============*/ - sync_array_t* arr) /* in: sync wait array */ -{ - ulint protection; - - protection = arr->protection; - - if (protection == SYNC_ARRAY_OS_MUTEX) { - os_mutex_exit(arr->os_mutex); - } else if (protection == SYNC_ARRAY_MUTEX) { - mutex_exit(&(arr->mutex)); - } else { - ut_error; - } -} - -/*********************************************************************** -Creates a synchronization wait array. It is protected by a mutex -which is automatically reserved when the functions operating on it -are called. */ - -sync_array_t* -sync_array_create( -/*==============*/ - /* out, own: created wait array */ - ulint n_cells, /* in: number of cells in the array - to create */ - ulint protection) /* in: either SYNC_ARRAY_OS_MUTEX or - SYNC_ARRAY_MUTEX: determines the type - of mutex protecting the data structure */ -{ - sync_array_t* arr; - sync_cell_t* cell_array; - sync_cell_t* cell; - ulint i; - - ut_a(n_cells > 0); - - /* Allocate memory for the data structures */ - arr = ut_malloc(sizeof(sync_array_t)); - - cell_array = ut_malloc(sizeof(sync_cell_t) * n_cells); - - arr->n_cells = n_cells; - arr->n_reserved = 0; - arr->array = cell_array; - arr->protection = protection; - arr->sg_count = 0; - arr->res_count = 0; - - /* Then create the mutex to protect the wait array complex */ - if (protection == SYNC_ARRAY_OS_MUTEX) { - arr->os_mutex = os_mutex_create(NULL); - } else if (protection == SYNC_ARRAY_MUTEX) { - mutex_create(&arr->mutex, SYNC_NO_ORDER_CHECK); - } else { - ut_error; - } - - for (i = 0; i < n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - cell->wait_object = NULL; - cell->waiting = FALSE; - cell->signal_count = 0; - } - - return(arr); -} - -/********************************************************************** -Frees the resources in a wait array. */ - -void -sync_array_free( -/*============*/ - sync_array_t* arr) /* in, own: sync wait array */ -{ - ulint protection; - - ut_a(arr->n_reserved == 0); - - sync_array_validate(arr); - - protection = arr->protection; - - /* Release the mutex protecting the wait array complex */ - - if (protection == SYNC_ARRAY_OS_MUTEX) { - os_mutex_free(arr->os_mutex); - } else if (protection == SYNC_ARRAY_MUTEX) { - mutex_free(&(arr->mutex)); - } else { - ut_error; - } - - ut_free(arr->array); - ut_free(arr); -} - -/************************************************************************ -Validates the integrity of the wait array. Checks -that the number of reserved cells equals the count variable. */ - -void -sync_array_validate( -/*================*/ - sync_array_t* arr) /* in: sync wait array */ -{ - ulint i; - sync_cell_t* cell; - ulint count = 0; - - sync_array_enter(arr); - - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - if (cell->wait_object != NULL) { - count++; - } - } - - ut_a(count == arr->n_reserved); - - sync_array_exit(arr); -} - -/*********************************************************************** -Returns the event that the thread owning the cell waits for. */ -static -os_event_t -sync_cell_get_event( -/*================*/ - sync_cell_t* cell) /* in: non-empty sync array cell */ -{ - ulint type = cell->request_type; - - if (type == SYNC_MUTEX) { - return(((mutex_t *) cell->wait_object)->event); - } else if (type == RW_LOCK_WAIT_EX) { - return(((rw_lock_t *) cell->wait_object)->wait_ex_event); - } else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */ - return(((rw_lock_t *) cell->wait_object)->event); - } -} - - -/********************************************************************** -Reserves a wait array cell for waiting for an object. -The event of the cell is reset to nonsignalled state. */ - -void -sync_array_reserve_cell( -/*====================*/ - sync_array_t* arr, /* in: wait array */ - void* object, /* in: pointer to the object to wait for */ - ulint type, /* in: lock request type */ - const char* file, /* in: file where requested */ - ulint line, /* in: line where requested */ - ulint* index) /* out: index of the reserved cell */ -{ - sync_cell_t* cell; - os_event_t event; - ulint i; - - ut_a(object); - ut_a(index); - - sync_array_enter(arr); - - arr->res_count++; - - /* Reserve a new cell. */ - for (i = 0; i < arr->n_cells; i++) { - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object == NULL) { - - cell->waiting = FALSE; - cell->wait_object = object; - - if (type == SYNC_MUTEX) { - cell->old_wait_mutex = object; - } else { - cell->old_wait_rw_lock = object; - } - - cell->request_type = type; - - cell->file = file; - cell->line = line; - - arr->n_reserved++; - - *index = i; - - sync_array_exit(arr); - - /* Make sure the event is reset and also store - the value of signal_count at which the event - was reset. */ - event = sync_cell_get_event(cell); - cell->signal_count = os_event_reset(event); - - cell->reservation_time = time(NULL); - - cell->thread = os_thread_get_curr_id(); - - return; - } - } - - ut_error; /* No free cell found */ - - return; -} - -/********************************************************************** -This function should be called when a thread starts to wait on -a wait array cell. In the debug version this function checks -if the wait for a semaphore will result in a deadlock, in which -case prints info and asserts. */ - -void -sync_array_wait_event( -/*==================*/ - sync_array_t* arr, /* in: wait array */ - ulint index) /* in: index of the reserved cell */ -{ - sync_cell_t* cell; - os_event_t event; - - ut_a(arr); - - sync_array_enter(arr); - - cell = sync_array_get_nth_cell(arr, index); - - ut_a(cell->wait_object); - ut_a(!cell->waiting); - ut_ad(os_thread_get_curr_id() == cell->thread); - - event = sync_cell_get_event(cell); - cell->waiting = TRUE; - -#ifdef UNIV_SYNC_DEBUG - - /* We use simple enter to the mutex below, because if - we cannot acquire it at once, mutex_enter would call - recursively sync_array routines, leading to trouble. - rw_lock_debug_mutex freezes the debug lists. */ - - rw_lock_debug_mutex_enter(); - - if (TRUE == sync_array_detect_deadlock(arr, cell, cell, 0)) { - - fputs("########################################\n", stderr); - ut_error; - } - - rw_lock_debug_mutex_exit(); -#endif - sync_array_exit(arr); - - os_event_wait_low(event, cell->signal_count); - - sync_array_free_cell(arr, index); -} - -/********************************************************************** -Reports info of a wait array cell. */ -static -void -sync_array_cell_print( -/*==================*/ - FILE* file, /* in: file where to print */ - sync_cell_t* cell) /* in: sync cell */ -{ - mutex_t* mutex; - rw_lock_t* rwlock; - ulint type; - ulint writer; - - type = cell->request_type; - - fprintf(file, - "--Thread %lu has waited at %s line %lu" - " for %.2f seconds the semaphore:\n", - (ulong) os_thread_pf(cell->thread), cell->file, - (ulong) cell->line, - difftime(time(NULL), cell->reservation_time)); - - if (type == SYNC_MUTEX) { - /* We use old_wait_mutex in case the cell has already - been freed meanwhile */ - mutex = cell->old_wait_mutex; - - fprintf(file, - "Mutex at %p created file %s line %lu, lock var %lu\n" -#ifdef UNIV_SYNC_DEBUG - "Last time reserved in file %s line %lu, " -#endif /* UNIV_SYNC_DEBUG */ - "waiters flag %lu\n", - (void*) mutex, mutex->cfile_name, (ulong) mutex->cline, - (ulong) mutex->lock_word, -#ifdef UNIV_SYNC_DEBUG - mutex->file_name, (ulong) mutex->line, -#endif /* UNIV_SYNC_DEBUG */ - (ulong) mutex->waiters); - - } else if (type == RW_LOCK_EX - || type == RW_LOCK_WAIT_EX - || type == RW_LOCK_SHARED) { - - switch(type) { - case RW_LOCK_EX: fputs("X-lock on", file); break; - case RW_LOCK_WAIT_EX: fputs("wait-X-lock on", file); break; - default: fputs("S-lock on", file); break; - } - - rwlock = cell->old_wait_rw_lock; - - fprintf(file, - " RW-latch at %p created in file %s line %lu\n", - (void*) rwlock, rwlock->cfile_name, - (ulong) rwlock->cline); - writer = rw_lock_get_writer(rwlock); - if (writer != RW_LOCK_NOT_LOCKED) { - fprintf(file, - "a writer (thread id %lu) has" - " reserved it in mode %s", - (ulong) os_thread_pf(rwlock->writer_thread), - writer == RW_LOCK_EX - ? " exclusive\n" - : " wait exclusive\n"); - } - - fprintf(file, - "number of readers %lu, waiters flag %lu, " - "lock_word: %ld\n" - "Last time read locked in file %s line %lu\n" - "Last time write locked in file %s line %lu\n", - (ulong) rw_lock_get_reader_count(rwlock), - (ulong) rwlock->waiters, - rwlock->lock_word, - rwlock->last_s_file_name, - (ulong) rwlock->last_s_line, - rwlock->last_x_file_name, - (ulong) rwlock->last_x_line); - } else { - ut_error; - } - - if (!cell->waiting) { - fputs("wait has ended\n", file); - } -} - -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Looks for a cell with the given thread id. */ -static -sync_cell_t* -sync_array_find_thread( -/*===================*/ - /* out: pointer to cell or NULL - if not found */ - sync_array_t* arr, /* in: wait array */ - os_thread_id_t thread) /* in: thread id */ -{ - ulint i; - sync_cell_t* cell; - - for (i = 0; i < arr->n_cells; i++) { - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL - && os_thread_eq(cell->thread, thread) - && cell->waiting)) { - - return(cell); /* Found */ - } - } - - return(NULL); /* Not found */ -} - -/********************************************************************** -Recursion step for deadlock detection. */ -static -ibool -sync_array_deadlock_step( -/*=====================*/ - /* out: TRUE if deadlock detected */ - sync_array_t* arr, /* in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /* in: cell where recursive search - started */ - os_thread_id_t thread, /* in: thread to look at */ - ulint pass, /* in: pass value */ - ulint depth) /* in: recursion depth */ -{ - sync_cell_t* new; - ibool ret; - - depth++; - - if (pass != 0) { - /* If pass != 0, then we do not know which threads are - responsible of releasing the lock, and no deadlock can - be detected. */ - - return(FALSE); - } - - new = sync_array_find_thread(arr, thread); - - if (new == start) { - /* Stop running of other threads */ - - ut_dbg_stop_threads = TRUE; - - /* Deadlock */ - fputs("########################################\n" - "DEADLOCK of threads detected!\n", stderr); - - return(TRUE); - - } else if (new) { - ret = sync_array_detect_deadlock(arr, start, new, depth); - - if (ret) { - return(TRUE); - } - } - return(FALSE); -} - -/********************************************************************** -This function is called only in the debug version. Detects a deadlock -of one or more threads because of waits of semaphores. */ -static -ibool -sync_array_detect_deadlock( -/*=======================*/ - /* out: TRUE if deadlock detected */ - sync_array_t* arr, /* in: wait array; NOTE! the caller must - own the mutex to array */ - sync_cell_t* start, /* in: cell where recursive search started */ - sync_cell_t* cell, /* in: cell to search */ - ulint depth) /* in: recursion depth */ -{ - mutex_t* mutex; - rw_lock_t* lock; - os_thread_id_t thread; - ibool ret; - rw_lock_debug_t*debug; - - ut_a(arr); - ut_a(start); - ut_a(cell); - ut_ad(cell->wait_object); - ut_ad(os_thread_get_curr_id() == start->thread); - ut_ad(depth < 100); - - depth++; - - if (!cell->waiting) { - - return(FALSE); /* No deadlock here */ - } - - if (cell->request_type == SYNC_MUTEX) { - - mutex = cell->wait_object; - - if (mutex_get_lock_word(mutex) != 0) { - - thread = mutex->thread_id; - - /* Note that mutex->thread_id above may be - also OS_THREAD_ID_UNDEFINED, because the - thread which held the mutex maybe has not - yet updated the value, or it has already - released the mutex: in this case no deadlock - can occur, as the wait array cannot contain - a thread with ID_UNDEFINED value. */ - - ret = sync_array_deadlock_step(arr, start, thread, 0, - depth); - if (ret) { - fprintf(stderr, - "Mutex %p owned by thread %lu file %s line %lu\n", - mutex, (ulong) os_thread_pf(mutex->thread_id), - mutex->file_name, (ulong) mutex->line); - sync_array_cell_print(stderr, cell); - - return(TRUE); - } - } - - return(FALSE); /* No deadlock */ - - } else if (cell->request_type == RW_LOCK_EX - || cell->request_type == RW_LOCK_WAIT_EX) { - - lock = cell->wait_object; - - debug = UT_LIST_GET_FIRST(lock->debug_list); - - while (debug != NULL) { - - thread = debug->thread_id; - - if (((debug->lock_type == RW_LOCK_EX) - && !os_thread_eq(thread, cell->thread)) - || ((debug->lock_type == RW_LOCK_WAIT_EX) - && !os_thread_eq(thread, cell->thread)) - || (debug->lock_type == RW_LOCK_SHARED)) { - - /* The (wait) x-lock request can block - infinitely only if someone (can be also cell - thread) is holding s-lock, or someone - (cannot be cell thread) (wait) x-lock, and - he is blocked by start thread */ - - ret = sync_array_deadlock_step( - arr, start, thread, debug->pass, - depth); - if (ret) { -print: - fprintf(stderr, "rw-lock %p ", - (void*) lock); - sync_array_cell_print(stderr, cell); - rw_lock_debug_print(debug); - return(TRUE); - } - } - - debug = UT_LIST_GET_NEXT(list, debug); - } - - return(FALSE); - - } else if (cell->request_type == RW_LOCK_SHARED) { - - lock = cell->wait_object; - debug = UT_LIST_GET_FIRST(lock->debug_list); - - while (debug != NULL) { - - thread = debug->thread_id; - - if ((debug->lock_type == RW_LOCK_EX) - || (debug->lock_type == RW_LOCK_WAIT_EX)) { - - /* The s-lock request can block infinitely - only if someone (can also be cell thread) is - holding (wait) x-lock, and he is blocked by - start thread */ - - ret = sync_array_deadlock_step( - arr, start, thread, debug->pass, - depth); - if (ret) { - goto print; - } - } - - debug = UT_LIST_GET_NEXT(list, debug); - } - - return(FALSE); - - } else { - ut_error; - } - - return(TRUE); /* Execution never reaches this line: for compiler - fooling only */ -} -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************** -Determines if we can wake up the thread waiting for a sempahore. */ -static -ibool -sync_arr_cell_can_wake_up( -/*======================*/ - sync_cell_t* cell) /* in: cell to search */ -{ - mutex_t* mutex; - rw_lock_t* lock; - - if (cell->request_type == SYNC_MUTEX) { - - mutex = cell->wait_object; - - if (mutex_get_lock_word(mutex) == 0) { - - return(TRUE); - } - - } else if (cell->request_type == RW_LOCK_EX) { - - lock = cell->wait_object; - - /* X_LOCK_DECR is the unlocked state */ - if (lock->lock_word == X_LOCK_DECR) { - - return(TRUE); - } - - } else if (cell->request_type == RW_LOCK_WAIT_EX) { - - lock = cell->wait_object; - - /* lock_word == 0 means all readers have left */ - if (lock->lock_word == 0) { - - return(TRUE); - } - } else if (cell->request_type == RW_LOCK_SHARED) { - lock = cell->wait_object; - - /* lock_word > 0 means no writer or reserved writer */ - if (lock->lock_word > 0) { - - return(TRUE); - } - } - - return(FALSE); -} - -/********************************************************************** -Frees the cell. NOTE! sync_array_wait_event frees the cell -automatically! */ - -void -sync_array_free_cell( -/*=================*/ - sync_array_t* arr, /* in: wait array */ - ulint index) /* in: index of the cell in array */ -{ - sync_cell_t* cell; - - sync_array_enter(arr); - - cell = sync_array_get_nth_cell(arr, index); - - ut_a(cell->wait_object != NULL); - - cell->waiting = FALSE; - cell->wait_object = NULL; - cell->signal_count = 0; - - ut_a(arr->n_reserved > 0); - arr->n_reserved--; - - sync_array_exit(arr); -} - -/************************************************************************** -Increments the signalled count. */ - -void -sync_array_object_signalled( -/*========================*/ - sync_array_t* arr) /* in: wait array */ -{ -#ifdef UNIV_SYNC_ATOMIC - (void)os_atomic_increment((volatile lint *)&(arr->sg_count), 1); -#else - sync_array_enter(arr); - - arr->sg_count++; - - sync_array_exit(arr); -#endif -} - -/************************************************************************** -If the wakeup algorithm does not work perfectly at semaphore relases, -this function will do the waking (see the comment in mutex_exit). This -function should be called about every 1 second in the server. - -Note that there's a race condition between this thread and mutex_exit -changing the lock_word and calling signal_object, so sometimes this finds -threads to wake up even when nothing has gone wrong. */ - -void -sync_arr_wake_threads_if_sema_free(void) -/*====================================*/ -{ - sync_array_t* arr = sync_primary_wait_array; - sync_cell_t* cell; - ulint count; - ulint i; - os_event_t event; - - sync_array_enter(arr); - - i = 0; - count = 0; - - while (count < arr->n_reserved) { - - cell = sync_array_get_nth_cell(arr, i); - i++; - - if (cell->wait_object == NULL) { - continue; - } - count++; - - if (!cell->waiting) { - continue; - } - - if (sync_arr_cell_can_wake_up(cell)) { - - event = sync_cell_get_event(cell); - - os_event_set(event); - sync_wake_ups++; - } - - } - - sync_array_exit(arr); -} - -/************************************************************************** -Prints warnings of long semaphore waits to stderr. */ - -ibool -sync_array_print_long_waits(void) -/*=============================*/ - /* out: TRUE if fatal semaphore wait threshold - was exceeded */ -{ - sync_cell_t* cell; - ibool old_val; - ibool noticed = FALSE; - ulint i; - ulint fatal_timeout = srv_fatal_semaphore_wait_threshold; - ibool fatal = FALSE; - - for (i = 0; i < sync_primary_wait_array->n_cells; i++) { - - cell = sync_array_get_nth_cell(sync_primary_wait_array, i); - - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) > 240) { - fputs("InnoDB: Warning: a long semaphore wait:\n", - stderr); - sync_array_cell_print(stderr, cell); - noticed = TRUE; - } - - if (cell->wait_object != NULL && cell->waiting - && difftime(time(NULL), cell->reservation_time) - > fatal_timeout) { - fatal = TRUE; - } - } - - if (noticed) { - fprintf(stderr, - "InnoDB: ###### Starts InnoDB Monitor" - " for 30 secs to print diagnostic info:\n"); - old_val = srv_print_innodb_monitor; - - /* If some crucial semaphore is reserved, then also the InnoDB - Monitor can hang, and we do not get diagnostics. Since in - many cases an InnoDB hang is caused by a pwrite() or a pread() - call hanging inside the operating system, let us print right - now the values of pending calls of these. */ - - fprintf(stderr, - "InnoDB: Pending preads %lu, pwrites %lu\n", - (ulong)os_file_n_pending_preads, - (ulong)os_file_n_pending_pwrites); - - srv_print_innodb_monitor = TRUE; - os_event_set(srv_lock_timeout_thread_event); - - os_thread_sleep(30000000); - - srv_print_innodb_monitor = old_val; - fprintf(stderr, - "InnoDB: ###### Diagnostic info printed" - " to the standard error stream\n"); - } - - return(fatal); -} - -/************************************************************************** -Prints info of the wait array. */ -static -void -sync_array_output_info( -/*===================*/ - FILE* file, /* in: file where to print */ - sync_array_t* arr) /* in: wait array; NOTE! caller must own the - mutex */ -{ - sync_cell_t* cell; - ulint count; - ulint i; - - fprintf(file, - "OS WAIT ARRAY INFO: reservation count %ld, signal count %ld\n", - (long) arr->res_count, (long) arr->sg_count); - i = 0; - count = 0; - - while (count < arr->n_reserved) { - - cell = sync_array_get_nth_cell(arr, i); - - if (cell->wait_object != NULL) { - count++; - sync_array_cell_print(file, cell); - } - - i++; - } -} - -/************************************************************************** -Prints info of the wait array. */ - -void -sync_array_print_info( -/*==================*/ - FILE* file, /* in: file where to print */ - sync_array_t* arr) /* in: wait array */ -{ - sync_array_enter(arr); - - sync_array_output_info(file, arr); - - sync_array_exit(arr); -} diff --git a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c deleted file mode 100644 index 2fcf75009a6..00000000000 --- a/storage/innobase/sync/sync0rw.c +++ /dev/null @@ -1,997 +0,0 @@ -/****************************************************** -The read-write lock (for thread synchronization) - -(c) 1995 Innobase Oy - -Created 9/11/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0rw.h" -#ifdef UNIV_NONINL -#include "sync0rw.ic" -#endif - -#include "os0thread.h" -#include "mem0mem.h" -#include "srv0srv.h" - -/* - IMPLEMENTATION OF THE RW_LOCK - ============================= -The status of a rw_lock is held in lock_word. The initial value of lock_word is -X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR -for each x-lock. This describes the lock state for each value of lock_word: - -lock_word == X_LOCK_DECR: Unlocked. -0 < lock_word < X_LOCK_DECR: Read locked, no waiting writers. - (X_LOCK_DECR - lock_word) is the - number of readers that hold the lock. -lock_word == 0: Write locked --X_LOCK_DECR < lock_word < 0: Read locked, with a waiting writer. - (-lock_word) is the number of readers - that hold the lock. -lock_word <= -X_LOCK_DECR: Recursively write locked. lock_word has been - decremented by X_LOCK_DECR once for each lock, - so the number of locks is: - ((-lock_word) / X_LOCK_DECR) + 1 -When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0: -other values of lock_word are invalid. - -The lock_word is always read and updated atomically and consistently, so that -it always represents the state of the lock, and the state of the lock changes -with a single atomic operation. This lock_word holds all of the information -that a thread needs in order to determine if it is eligible to gain the lock -or if it must spin or sleep. The one exception to this is that writer_thread -must be verified before recursive write locks: to solve this scenario, we make -writer_thread readable by all threads, but only writeable by the x-lock holder. - -The other members of the lock obey the following rules to remain consistent: - -pass: This is only set to 1 to prevent recursive x-locks. It must - be set as specified by x_lock caller after the lock_word - indicates that the thread holds the lock, but before that - thread resumes execution. It must also be set to 1 during the - final x_unlock, but before the lock_word status is updated. - When an x_lock or move_ownership call wishes to change - pass, it must first update the writer_thread appropriately. -writer_thread: Must be set to the writers thread_id after the lock_word - indicates that the thread holds the lock, but before that - thread resumes execution. writer_thread may be invalid and - should not be read when pass == 1. A thread trying to become - writer never reads its own stale writer_thread, since it sets - pass during its previous unlock call. -waiters: May be set to 1 anytime, but to avoid unnecessary wake-up - signals, it should only be set to 1 when there are threads - waiting on event. Must be 1 when a writer starts waiting to - ensure the current x-locking thread sends a wake-up signal - during unlock. May only be reset to 0 immediately before a - a wake-up signal is sent to event. -event: Threads wait on event for read or writer lock when another - thread has an x-lock or an x-lock reservation (wait_ex). A - thread may only wait on event after performing the following - actions in order: - (1) Record the counter value of event (with os_event_reset). - (2) Set waiters to 1. - (3) Verify lock_word <= 0. - (1) must come before (2) to ensure signal is not missed. - (2) must come before (3) to ensure a signal is sent. - These restrictions force the above ordering. - Immediately before sending the wake-up signal, we should: - (1) Verify lock_word == X_LOCK_DECR (unlocked) - (2) Reset waiters to 0. -wait_ex_event: A thread may only wait on the wait_ex_event after it has - performed the following actions in order: - (1) Decrement lock_word by X_LOCK_DECR. - (2) Record counter value of wait_ex_event (os_event_reset, - called from sync_array_reserve_cell). - (3) Verify that lock_word < 0. - (1) must come first to ensures no other threads become reader - or next writer, and notifies unlocker that signal must be sent. - (2) must come before (3) to ensure the signal is not missed. - These restrictions force the above ordering. - Immediately before sending the wake-up signal, we should: - Verify lock_word == 0 (waiting thread holds x_lock) -*/ - - -/* number of spin waits on rw-latches, -resulted during shared (read) locks */ -ib_longlong rw_s_spin_wait_count = 0; -ib_longlong rw_s_spin_round_count = 0; - -/* number of OS waits on rw-latches, -resulted during shared (read) locks */ -ib_longlong rw_s_os_wait_count = 0; - -/* number of unlocks (that unlock shared locks), -set only when UNIV_SYNC_PERF_STAT is defined */ -ib_longlong rw_s_exit_count = 0; - -/* number of spin waits on rw-latches, -resulted during exclusive (write) locks */ -ib_longlong rw_x_spin_wait_count = 0; -ib_longlong rw_x_spin_round_count = 0; - -/* number of OS waits on rw-latches, -resulted during exclusive (write) locks */ -ib_longlong rw_x_os_wait_count = 0; - -/* number of unlocks (that unlock exclusive locks), -set only when UNIV_SYNC_PERF_STAT is defined */ -ib_longlong rw_x_exit_count = 0; - -/* The global list of rw-locks */ -rw_lock_list_t rw_lock_list; -mutex_t rw_lock_list_mutex; - -#ifdef UNIV_SYNC_DEBUG -/* The global mutex which protects debug info lists of all rw-locks. -To modify the debug info list of an rw-lock, this mutex has to be -acquired in addition to the mutex protecting the lock. */ - -mutex_t rw_lock_debug_mutex; -os_event_t rw_lock_debug_event; /* If deadlock detection does not - get immediately the mutex, it may - wait for this event */ -ibool rw_lock_debug_waiters; /* This is set to TRUE, if there may - be waiters for the event */ - -/********************************************************************** -Creates a debug info struct. */ -static -rw_lock_debug_t* -rw_lock_debug_create(void); -/*======================*/ -/********************************************************************** -Frees a debug info struct. */ -static -void -rw_lock_debug_free( -/*===============*/ - rw_lock_debug_t* info); - -/********************************************************************** -Creates a debug info struct. */ -static -rw_lock_debug_t* -rw_lock_debug_create(void) -/*======================*/ -{ - return((rw_lock_debug_t*) mem_alloc(sizeof(rw_lock_debug_t))); -} - -/********************************************************************** -Frees a debug info struct. */ -static -void -rw_lock_debug_free( -/*===============*/ - rw_lock_debug_t* info) -{ - mem_free(info); -} -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************** -Creates, or rather, initializes an rw-lock object in a specified memory -location (which must be appropriately aligned). The rw-lock is initialized -to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free -is necessary only if the memory block containing it is freed. */ - -void -rw_lock_create_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to memory */ -#ifdef UNIV_DEBUG -# ifdef UNIV_SYNC_DEBUG - ulint level, /* in: level */ -# endif /* UNIV_SYNC_DEBUG */ - const char* cmutex_name, /* in: mutex name */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /* in: file name where created */ - ulint cline) /* in: file line where created */ -{ - /* If this is the very first time a synchronization object is - created, then the following call initializes the sync system. */ - -#ifndef UNIV_SYNC_ATOMIC - mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK); - - lock->mutex.cfile_name = cfile_name; - lock->mutex.cline = cline; - -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - lock->mutex.cmutex_name = cmutex_name; - lock->mutex.mutex_type = 1; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - -#endif /* UNIV_SYNC_ATOMIC */ - - lock->lock_word = X_LOCK_DECR; - lock->waiters = 0; - lock->pass = 1; - /* We do not have to initialize writer_thread until pass == 0 */ - -#ifdef UNIV_SYNC_DEBUG - UT_LIST_INIT(lock->debug_list); - - lock->level = level; -#endif /* UNIV_SYNC_DEBUG */ - - lock->magic_n = RW_LOCK_MAGIC_N; - - lock->cfile_name = cfile_name; - lock->cline = (unsigned int) cline; - - lock->count_os_wait = 0; - lock->last_s_file_name = "not yet reserved"; - lock->last_x_file_name = "not yet reserved"; - lock->last_s_line = 0; - lock->last_x_line = 0; - lock->event = os_event_create(NULL); - lock->wait_ex_event = os_event_create(NULL); - - mutex_enter(&rw_lock_list_mutex); - - if (UT_LIST_GET_LEN(rw_lock_list) > 0) { - ut_a(UT_LIST_GET_FIRST(rw_lock_list)->magic_n - == RW_LOCK_MAGIC_N); - } - - UT_LIST_ADD_FIRST(list, rw_lock_list, lock); - - mutex_exit(&rw_lock_list_mutex); -} - -/********************************************************************** -Calling this function is obligatory only if the memory buffer containing -the rw-lock is freed. Removes an rw-lock object from the global list. The -rw-lock is checked to be in the non-locked state. */ - -void -rw_lock_free( -/*=========*/ - rw_lock_t* lock) /* in: rw-lock */ -{ - ut_ad(rw_lock_validate(lock)); - ut_a(lock->lock_word == X_LOCK_DECR); - - lock->magic_n = 0; - -#ifndef UNIV_SYNC_ATOMIC - mutex_free(rw_lock_get_mutex(lock)); -#endif /* UNIV_SYNC_ATOMIC */ - - mutex_enter(&rw_lock_list_mutex); - os_event_free(lock->event); - - os_event_free(lock->wait_ex_event); - - if (UT_LIST_GET_PREV(list, lock)) { - ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - if (UT_LIST_GET_NEXT(list, lock)) { - ut_a(UT_LIST_GET_NEXT(list, lock)->magic_n == RW_LOCK_MAGIC_N); - } - - UT_LIST_REMOVE(list, rw_lock_list, lock); - - mutex_exit(&rw_lock_list_mutex); -} - -#ifdef UNIV_DEBUG -/********************************************************************** -Checks that the rw-lock has been initialized and that there are no -simultaneous shared and exclusive locks. */ - -ibool -rw_lock_validate( -/*=============*/ - rw_lock_t* lock) -{ - ut_a(lock); - - ulint waiters = rw_lock_get_waiters(lock); - lint lock_word = lock->lock_word; - - ut_a(lock->magic_n == RW_LOCK_MAGIC_N); - ut_a(waiters == 0 || waiters == 1); - ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0); - - return(TRUE); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************** -Lock an rw-lock in shared mode for the current thread. If the rw-lock is -locked in exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock, before suspending the thread. */ - -void -rw_lock_s_lock_spin( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock - will be passed to another thread to unlock */ - const char* file_name, /* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - ulint index; /* index of the reserved wait cell */ - ulint i = 0; /* spin round count */ - - ut_ad(rw_lock_validate(lock)); - - rw_s_spin_wait_count++; /* Count calls to this function */ -lock_loop: - - /* Spin waiting for the writer field to become free */ - while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - } - - i++; - } - - if (i == SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread %lu spin wait rw-s-lock at %p" - " cfile %s cline %lu rnds %lu\n", - (ulong) os_thread_pf(os_thread_get_curr_id()), - (void*) lock, - lock->cfile_name, (ulong) lock->cline, (ulong) i); - } - - /* We try once again to obtain the lock */ - if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { - rw_s_spin_round_count += i; - - return; /* Success */ - } else { - - if (i < SYNC_SPIN_ROUNDS) { - goto lock_loop; - } - - rw_s_spin_round_count += i; - - sync_array_reserve_cell(sync_primary_wait_array, - lock, RW_LOCK_SHARED, - file_name, line, - &index); - - /* Set waiters before checking lock_word to ensure wake-up - signal is sent. This may lead to some unnecessary signals. */ - rw_lock_set_waiters(lock); - - if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) { - sync_array_free_cell(sync_primary_wait_array, index); - return; /* Success */ - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread %lu OS wait rw-s-lock at %p" - " cfile %s cline %lu\n", - os_thread_pf(os_thread_get_curr_id()), - (void*) lock, lock->cfile_name, - (ulong) lock->cline); - } - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_s_os_wait_count++; - - sync_array_wait_event(sync_primary_wait_array, index); - - i = 0; - goto lock_loop; - } -} - -/********************************************************************** -This function is used in the insert buffer to move the ownership of an -x-latch on a buffer frame to the current thread. The x-latch was set by -the buffer read operation and it protected the buffer frame while the -read was done. The ownership is moved because we want that the current -thread is able to acquire a second x-latch which is stored in an mtr. -This, in turn, is needed to pass the debug checks of index page -operations. */ - -void -rw_lock_x_lock_move_ownership( -/*==========================*/ - rw_lock_t* lock) /* in: lock which was x-locked in the - buffer read */ -{ - ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX)); - -#ifdef UNIV_SYNC_ATOMIC - lock->writer_thread = os_thread_get_curr_id(); - os_memory_barrier_store(); - lock->pass = 0; -#else /* UNIV_SYNC_ATOMIC */ - mutex_enter(&(lock->mutex)); - lock->writer_thread = os_thread_get_curr_id(); - lock->pass = 0; - mutex_exit(&(lock->mutex)); -#endif /* UNIV_SYNC_ATOMIC */ -} - -/********************************************************************** -Function for the next writer to call. Waits for readers to exit. -The caller must have already decremented lock_word by X_LOCK_DECR.*/ -UNIV_INLINE -void -rw_lock_x_lock_wait( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ -#ifdef UNIV_SYNC_DEBUG - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ -#endif - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - ulint index; - ulint i = 0; - - ut_ad(lock->lock_word <= 0); - - while (lock->lock_word < 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - } - if(i < SYNC_SPIN_ROUNDS) { - i++; - continue; - } - - /* If there is still a reader, then go to sleep.*/ - rw_x_spin_round_count += i; - i = 0; - sync_array_reserve_cell(sync_primary_wait_array, - lock, - RW_LOCK_WAIT_EX, - file_name, line, - &index); - /* Check lock_word to ensure wake-up isn't missed.*/ - if(lock->lock_word < 0) { - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_x_os_wait_count++; - - /* Add debug info as it is needed to detect possible - deadlock. We must add info for WAIT_EX thread for - deadlock detection to work properly. */ -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX, - file_name, line); -#endif - - sync_array_wait_event(sync_primary_wait_array, - index); -#ifdef UNIV_SYNC_DEBUG - rw_lock_remove_debug_info(lock, pass, - RW_LOCK_WAIT_EX); -#endif - /* It is possible to wake when lock_word < 0. - We must pass the while-loop check to proceed.*/ - } else { - sync_array_free_cell(sync_primary_wait_array, - index); - } - } - rw_x_spin_round_count += i; -} - -/********************************************************************** -Low-level function for acquiring an exclusive lock. */ -UNIV_INLINE -ibool -rw_lock_x_lock_low( -/*===============*/ - /* out: RW_LOCK_NOT_LOCKED if did - not succeed, RW_LOCK_EX if success. */ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - os_thread_id_t curr_thread = os_thread_get_curr_id(); - - if(rw_lock_lock_word_decr(lock, X_LOCK_DECR)) { - ut_ad(lock->pass); - - /* Decrement occurred: we are writer or next-writer. */ - lock->writer_thread = curr_thread; - lock->pass = pass; - rw_lock_x_lock_wait(lock, -#ifdef UNIV_SYNC_DEBUG - pass, -#endif - file_name, line); - - } else { - /* Decrement failed: relock or failed lock */ - /* Must verify pass first: otherwise another thread can - call move_ownership suddenly allowing recursive locks. - and after we have verified our thread_id matches - (though move_ownership has since changed it).*/ - if(!pass && !(lock->pass) && - os_thread_eq(lock->writer_thread, curr_thread)) { - /* Relock */ - lock->lock_word -= X_LOCK_DECR; - } else { - /* Another thread locked before us */ - return(FALSE); - } - } -#ifdef UNIV_SYNC_DEBUG - rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, - file_name, line); -#endif - lock->last_x_file_name = file_name; - lock->last_x_line = (unsigned int) line; - - return(TRUE); -} - -/********************************************************************** -NOTE! Use the corresponding macro, not directly this function! Lock an -rw-lock in exclusive mode for the current thread. If the rw-lock is locked -in shared or exclusive mode, or there is an exclusive lock request waiting, -the function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the lock before suspending the thread. If the same thread has an x-lock -on the rw-lock, locking succeed, with the following exception: if pass != 0, -only a single x-lock may be taken on the lock. NOTE: If the same thread has -an s-lock, locking does not succeed! */ - -void -rw_lock_x_lock_func( -/*================*/ - rw_lock_t* lock, /* in: pointer to rw-lock */ - ulint pass, /* in: pass value; != 0, if the lock will - be passed to another thread to unlock */ - const char* file_name,/* in: file name where lock requested */ - ulint line) /* in: line where requested */ -{ - ulint index; /* index of the reserved wait cell */ - ulint i; /* spin round count */ - ibool spinning = FALSE; - - ut_ad(rw_lock_validate(lock)); - - i = 0; - -lock_loop: - - if (rw_lock_x_lock_low(lock, pass, file_name, line)) { - rw_x_spin_round_count += i; - - return; /* Locking succeeded */ - - } else { - - if (!spinning) { - spinning = TRUE; - rw_x_spin_wait_count++; - } - - /* Spin waiting for the lock_word to become free */ - while (i < SYNC_SPIN_ROUNDS - && lock->lock_word <= 0) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, - srv_spin_wait_delay)); - } - - i++; - } - if (i == SYNC_SPIN_ROUNDS) { - os_thread_yield(); - } else { - goto lock_loop; - } - } - - rw_x_spin_round_count += i; - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread %lu spin wait rw-x-lock at %p" - " cfile %s cline %lu rnds %lu\n", - os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->cfile_name, (ulong) lock->cline, (ulong) i); - } - - sync_array_reserve_cell(sync_primary_wait_array, - lock, - RW_LOCK_EX, - file_name, line, - &index); - - /* Waiters must be set before checking lock_word, to ensure signal - is sent. This could lead to a few unnecessary wake-up signals. */ - rw_lock_set_waiters(lock); - - if (rw_lock_x_lock_low(lock, pass, file_name, line)) { - sync_array_free_cell(sync_primary_wait_array, index); - return; /* Locking succeeded */ - } - - if (srv_print_latch_waits) { - fprintf(stderr, - "Thread %lu OS wait for rw-x-lock at %p" - " cfile %s cline %lu\n", - os_thread_pf(os_thread_get_curr_id()), (void*) lock, - lock->cfile_name, (ulong) lock->cline); - } - - /* these stats may not be accurate */ - lock->count_os_wait++; - rw_x_os_wait_count++; - - sync_array_wait_event(sync_primary_wait_array, index); - - i = 0; - goto lock_loop; -} - -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Acquires the debug mutex. We cannot use the mutex defined in sync0sync, -because the debug mutex is also acquired in sync0arr while holding the OS -mutex protecting the sync array, and the ordinary mutex_enter might -recursively call routines in sync0arr, leading to a deadlock on the OS -mutex. */ - -void -rw_lock_debug_mutex_enter(void) -/*==========================*/ -{ -loop: - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_reset(rw_lock_debug_event); - - rw_lock_debug_waiters = TRUE; - - if (0 == mutex_enter_nowait(&rw_lock_debug_mutex)) { - return; - } - - os_event_wait(rw_lock_debug_event); - - goto loop; -} - -/********************************************************************** -Releases the debug mutex. */ - -void -rw_lock_debug_mutex_exit(void) -/*==========================*/ -{ - mutex_exit(&rw_lock_debug_mutex); - - if (rw_lock_debug_waiters) { - rw_lock_debug_waiters = FALSE; - os_event_set(rw_lock_debug_event); - } -} - -/********************************************************************** -Inserts the debug information for an rw-lock. */ - -void -rw_lock_add_debug_info( -/*===================*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint pass, /* in: pass value */ - ulint lock_type, /* in: lock type */ - const char* file_name, /* in: file where requested */ - ulint line) /* in: line where requested */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - ut_ad(file_name); - - info = rw_lock_debug_create(); - - rw_lock_debug_mutex_enter(); - - info->file_name = file_name; - info->line = line; - info->lock_type = lock_type; - info->thread_id = os_thread_get_curr_id(); - info->pass = pass; - - UT_LIST_ADD_FIRST(list, lock->debug_list, info); - - rw_lock_debug_mutex_exit(); - - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_add_level(lock, lock->level); - } -} - -/********************************************************************** -Removes a debug information struct for an rw-lock. */ - -void -rw_lock_remove_debug_info( -/*======================*/ - rw_lock_t* lock, /* in: rw-lock */ - ulint pass, /* in: pass value */ - ulint lock_type) /* in: lock type */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - - if ((pass == 0) && (lock_type != RW_LOCK_WAIT_EX)) { - sync_thread_reset_level(lock); - } - - rw_lock_debug_mutex_enter(); - - info = UT_LIST_GET_FIRST(lock->debug_list); - - while (info != NULL) { - if ((pass == info->pass) - && ((pass != 0) - || os_thread_eq(info->thread_id, - os_thread_get_curr_id())) - && (info->lock_type == lock_type)) { - - /* Found! */ - UT_LIST_REMOVE(list, lock->debug_list, info); - rw_lock_debug_mutex_exit(); - - rw_lock_debug_free(info); - - return; - } - - info = UT_LIST_GET_NEXT(list, info); - } - - ut_error; -} -#endif /* UNIV_SYNC_DEBUG */ - -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Checks if the thread has locked the rw-lock in the specified mode, with -the pass value == 0. */ - -ibool -rw_lock_own( -/*========*/ - /* out: TRUE if locked */ - rw_lock_t* lock, /* in: rw-lock */ - ulint lock_type) /* in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -{ - rw_lock_debug_t* info; - - ut_ad(lock); - ut_ad(rw_lock_validate(lock)); - - rw_lock_debug_mutex_enter(); - - info = UT_LIST_GET_FIRST(lock->debug_list); - - while (info != NULL) { - - if (os_thread_eq(info->thread_id, os_thread_get_curr_id()) - && (info->pass == 0) - && (info->lock_type == lock_type)) { - - rw_lock_debug_mutex_exit(); - /* Found! */ - - return(TRUE); - } - - info = UT_LIST_GET_NEXT(list, info); - } - rw_lock_debug_mutex_exit(); - - return(FALSE); -} -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************** -Checks if somebody has locked the rw-lock in the specified mode. */ - -ibool -rw_lock_is_locked( -/*==============*/ - /* out: TRUE if locked */ - rw_lock_t* lock, /* in: rw-lock */ - ulint lock_type) /* in: lock type: RW_LOCK_SHARED, - RW_LOCK_EX */ -{ - ibool ret = FALSE; - - ut_ad(lock); - ut_ad(rw_lock_validate(lock)); - - if (lock_type == RW_LOCK_SHARED) { - if (rw_lock_get_reader_count(lock) > 0) { - ret = TRUE; - } - } else if (lock_type == RW_LOCK_EX) { - if (rw_lock_get_writer(lock) == RW_LOCK_EX) { - ret = TRUE; - } - } else { - ut_error; - } - - return(ret); -} - -#ifdef UNIV_SYNC_DEBUG -/******************************************************************* -Prints debug info of currently locked rw-locks. */ - -void -rw_lock_list_print_info( -/*====================*/ - FILE* file) /* in: file where to print */ -{ - rw_lock_t* lock; - ulint count = 0; - rw_lock_debug_t* info; - - mutex_enter(&rw_lock_list_mutex); - - fputs("-------------\n" - "RW-LATCH INFO\n" - "-------------\n", file); - - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { - - count++; - -#ifndef UNIV_SYNC_ATOMIC - mutex_enter(&(lock->mutex)); -#endif - if (lock->lock_word != X_LOCK_DECR) { - - fprintf(file, "RW-LOCK: %p ", (void*) lock); - - if (rw_lock_get_waiters(lock)) { - fputs(" Waiters for the lock exist\n", file); - } else { - putc('\n', file); - } - - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { - rw_lock_debug_print(info); - info = UT_LIST_GET_NEXT(list, info); - } - } -#ifndef UNIV_SYNC_ATOMIC - mutex_exit(&(lock->mutex)); -#endif - - lock = UT_LIST_GET_NEXT(list, lock); - } - - fprintf(file, "Total number of rw-locks %ld\n", count); - mutex_exit(&rw_lock_list_mutex); -} - -/******************************************************************* -Prints debug info of an rw-lock. */ - -void -rw_lock_print( -/*==========*/ - rw_lock_t* lock) /* in: rw-lock */ -{ - rw_lock_debug_t* info; - - fprintf(stderr, - "-------------\n" - "RW-LATCH INFO\n" - "RW-LATCH: %p ", (void*) lock); - -#ifndef UNIV_SYNC_ATOMIC - mutex_enter(&(lock->mutex)); -#endif - if (lock->lock_word != X_LOCK_DECR) { - - if (rw_lock_get_waiters(lock)) { - fputs(" Waiters for the lock exist\n", stderr); - } else { - putc('\n', stderr); - } - - info = UT_LIST_GET_FIRST(lock->debug_list); - while (info != NULL) { - rw_lock_debug_print(info); - info = UT_LIST_GET_NEXT(list, info); - } - } -#ifndef UNIV_SYNC_ATOMIC - mutex_exit(&(lock->mutex)); -#endif -} - -/************************************************************************* -Prints info of a debug struct. */ - -void -rw_lock_debug_print( -/*================*/ - rw_lock_debug_t* info) /* in: debug struct */ -{ - ulint rwt; - - rwt = info->lock_type; - - fprintf(stderr, "Locked: thread %ld file %s line %ld ", - (ulong) os_thread_pf(info->thread_id), info->file_name, - (ulong) info->line); - if (rwt == RW_LOCK_SHARED) { - fputs("S-LOCK", stderr); - } else if (rwt == RW_LOCK_EX) { - fputs("X-LOCK", stderr); - } else if (rwt == RW_LOCK_WAIT_EX) { - fputs("WAIT X-LOCK", stderr); - } else { - ut_error; - } - if (info->pass != 0) { - fprintf(stderr, " pass value %lu", (ulong) info->pass); - } - putc('\n', stderr); -} - -/******************************************************************* -Returns the number of currently locked rw-locks. Works only in the debug -version. */ - -ulint -rw_lock_n_locked(void) -/*==================*/ -{ - rw_lock_t* lock; - ulint count = 0; - - mutex_enter(&rw_lock_list_mutex); - - lock = UT_LIST_GET_FIRST(rw_lock_list); - - while (lock != NULL) { - - if (lock->lock_word != X_LOCK_DECR) { - count++; - } - - lock = UT_LIST_GET_NEXT(list, lock); - } - - mutex_exit(&rw_lock_list_mutex); - - return(count); -} -#endif /* UNIV_SYNC_DEBUG */ diff --git a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c deleted file mode 100644 index a8b1ac4926e..00000000000 --- a/storage/innobase/sync/sync0sync.c +++ /dev/null @@ -1,1425 +0,0 @@ -/****************************************************** -Mutex, the basic synchronization primitive - -(c) 1995 Innobase Oy - -Created 9/5/1995 Heikki Tuuri -*******************************************************/ - -#include "sync0sync.h" -#ifdef UNIV_NONINL -#include "sync0sync.ic" -#endif - -#include "sync0rw.h" -#include "buf0buf.h" -#include "srv0srv.h" -#include "buf0types.h" - -/* - REASONS FOR IMPLEMENTING THE SPIN LOCK MUTEX - ============================================ - -Semaphore operations in operating systems are slow: Solaris on a 1993 Sparc -takes 3 microseconds (us) for a lock-unlock pair and Windows NT on a 1995 -Pentium takes 20 microseconds for a lock-unlock pair. Therefore, we have to -implement our own efficient spin lock mutex. Future operating systems may -provide efficient spin locks, but we cannot count on that. - -Another reason for implementing a spin lock is that on multiprocessor systems -it can be more efficient for a processor to run a loop waiting for the -semaphore to be released than to switch to a different thread. A thread switch -takes 25 us on both platforms mentioned above. See Gray and Reuter's book -Transaction processing for background. - -How long should the spin loop last before suspending the thread? On a -uniprocessor, spinning does not help at all, because if the thread owning the -mutex is not executing, it cannot be released. Spinning actually wastes -resources. - -On a multiprocessor, we do not know if the thread owning the mutex is -executing or not. Thus it would make sense to spin as long as the operation -guarded by the mutex would typically last assuming that the thread is -executing. If the mutex is not released by that time, we may assume that the -thread owning the mutex is not executing and suspend the waiting thread. - -A typical operation (where no i/o involved) guarded by a mutex or a read-write -lock may last 1 - 20 us on the current Pentium platform. The longest -operations are the binary searches on an index node. - -We conclude that the best choice is to set the spin time at 20 us. Then the -system should work well on a multiprocessor. On a uniprocessor we have to -make sure that thread swithches due to mutex collisions are not frequent, -i.e., they do not happen every 100 us or so, because that wastes too much -resources. If the thread switches are not frequent, the 20 us wasted in spin -loop is not too much. - -Empirical studies on the effect of spin time should be done for different -platforms. - - - IMPLEMENTATION OF THE MUTEX - =========================== - -For background, see Curt Schimmel's book on Unix implementation on modern -architectures. The key points in the implementation are atomicity and -serialization of memory accesses. The test-and-set instruction (XCHG in -Pentium) must be atomic. As new processors may have weak memory models, also -serialization of memory references may be necessary. The successor of Pentium, -P6, has at least one mode where the memory model is weak. As far as we know, -in Pentium all memory accesses are serialized in the program order and we do -not have to worry about the memory model. On other processors there are -special machine instructions called a fence, memory barrier, or storage -barrier (STBAR in Sparc), which can be used to serialize the memory accesses -to happen in program order relative to the fence instruction. - -Leslie Lamport has devised a "bakery algorithm" to implement a mutex without -the atomic test-and-set, but his algorithm should be modified for weak memory -models. We do not use Lamport's algorithm, because we guess it is slower than -the atomic test-and-set. - -Our mutex implementation works as follows: After that we perform the atomic -test-and-set instruction on the memory word. If the test returns zero, we -know we got the lock first. If the test returns not zero, some other thread -was quicker and got the lock: then we spin in a loop reading the memory word, -waiting it to become zero. It is wise to just read the word in the loop, not -perform numerous test-and-set instructions, because they generate memory -traffic between the cache and the main memory. The read loop can just access -the cache, saving bus bandwidth. - -If we cannot acquire the mutex lock in the specified time, we reserve a cell -in the wait array, set the waiters byte in the mutex to 1. To avoid a race -condition, after setting the waiters byte and before suspending the waiting -thread, we still have to check that the mutex is reserved, because it may -have happened that the thread which was holding the mutex has just released -it and did not see the waiters byte set to 1, a case which would lead the -other thread to an infinite wait. - -LEMMA 1: After a thread resets the event of a mutex (or rw_lock), some -======= -thread will eventually call os_event_set() on that particular event. -Thus no infinite wait is possible in this case. - -Proof: After making the reservation the thread sets the waiters field in the -mutex to 1. Then it checks that the mutex is still reserved by some thread, -or it reserves the mutex for itself. In any case, some thread (which may be -also some earlier thread, not necessarily the one currently holding the mutex) -will set the waiters field to 0 in mutex_exit, and then call -os_event_set() with the mutex as an argument. -Q.E.D. - -LEMMA 2: If an os_event_set() call is made after some thread has called -======= -the os_event_reset() and before it starts wait on that event, the call -will not be lost to the second thread. This is true even if there is an -intervening call to os_event_reset() by another thread. -Thus no infinite wait is possible in this case. - -Proof (non-windows platforms): os_event_reset() returns a monotonically -increasing value of signal_count. This value is increased at every -call of os_event_set() If thread A has called os_event_reset() followed -by thread B calling os_event_set() and then some other thread C calling -os_event_reset(), the is_set flag of the event will be set to FALSE; -but now if thread A calls os_event_wait_low() with the signal_count -value returned from the earlier call of os_event_reset(), it will -return immediately without waiting. -Q.E.D. - -Proof (windows): If there is a writer thread which is forced to wait for -the lock, it may be able to set the state of rw_lock to RW_LOCK_WAIT_EX -The design of rw_lock ensures that there is one and only one thread -that is able to change the state to RW_LOCK_WAIT_EX and this thread is -guaranteed to acquire the lock after it is released by the current -holders and before any other waiter gets the lock. -On windows this thread waits on a separate event i.e.: wait_ex_event. -Since only one thread can wait on this event there is no chance -of this event getting reset before the writer starts wait on it. -Therefore, this thread is guaranteed to catch the os_set_event() -signalled unconditionally at the release of the lock. -Q.E.D. */ - -/* Number of spin waits on mutexes: for performance monitoring */ - -/* round=one iteration of a spin loop */ -ib_longlong mutex_spin_round_count = 0; -ib_longlong mutex_spin_wait_count = 0; -ib_longlong mutex_os_wait_count = 0; -ib_longlong mutex_exit_count = 0; - -/* The global array of wait cells for implementation of the database's own -mutexes and read-write locks */ -sync_array_t* sync_primary_wait_array; - -/* This variable is set to TRUE when sync_init is called */ -ibool sync_initialized = FALSE; - - -typedef struct sync_level_struct sync_level_t; -typedef struct sync_thread_struct sync_thread_t; - -#ifdef UNIV_SYNC_DEBUG -/* The latch levels currently owned by threads are stored in this data -structure; the size of this array is OS_THREAD_MAX_N */ - -sync_thread_t* sync_thread_level_arrays; - -/* Mutex protecting sync_thread_level_arrays */ -mutex_t sync_thread_mutex; -#endif /* UNIV_SYNC_DEBUG */ - -/* Global list of database mutexes (not OS mutexes) created. */ -ut_list_base_node_t mutex_list; - -/* Mutex protecting the mutex_list variable */ -mutex_t mutex_list_mutex; - -#ifdef UNIV_SYNC_DEBUG -/* Latching order checks start when this is set TRUE */ -ibool sync_order_checks_on = FALSE; -#endif /* UNIV_SYNC_DEBUG */ - -struct sync_thread_struct{ - os_thread_id_t id; /* OS thread id */ - sync_level_t* levels; /* level array for this thread; if this is NULL - this slot is unused */ -}; - -/* Number of slots reserved for each OS thread in the sync level array */ -#define SYNC_THREAD_N_LEVELS 10000 - -struct sync_level_struct{ - void* latch; /* pointer to a mutex or an rw-lock; NULL means that - the slot is empty */ - ulint level; /* level of the latch in the latching order */ -}; - -/********************************************************************** -A noninlined function that reserves a mutex. In ha_innodb.cc we have disabled -inlining of InnoDB functions, and no inlined functions should be called from -there. That is why we need to duplicate the inlined function here. */ - -void -mutex_enter_noninline( -/*==================*/ - mutex_t* mutex) /* in: mutex */ -{ - mutex_enter(mutex); -} - -/********************************************************************** -Releases a mutex. */ - -void -mutex_exit_noninline( -/*=================*/ - mutex_t* mutex) /* in: mutex */ -{ - mutex_exit(mutex); -} - -/********************************************************************** -Creates, or rather, initializes a mutex object in a specified memory -location (which must be appropriately aligned). The mutex is initialized -in the reset state. Explicit freeing of the mutex with mutex_free is -necessary only if the memory block containing it is freed. */ - -void -mutex_create_func( -/*==============*/ - mutex_t* mutex, /* in: pointer to memory */ -#ifdef UNIV_DEBUG - const char* cmutex_name, /* in: mutex name */ -# ifdef UNIV_SYNC_DEBUG - ulint level, /* in: level */ -# endif /* UNIV_SYNC_DEBUG */ -#endif /* UNIV_DEBUG */ - const char* cfile_name, /* in: file name where created */ - ulint cline) /* in: file line where created */ -{ -#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) - mutex_reset_lock_word(mutex); -#elif defined(MY_ATOMIC_NOLOCK) - mutex_reset_lock_word(mutex); -#else - os_fast_mutex_init(&(mutex->os_fast_mutex)); - mutex->lock_word = 0; -#endif - mutex->event = os_event_create(NULL); - mutex_set_waiters(mutex, 0); -#ifdef UNIV_DEBUG - mutex->magic_n = MUTEX_MAGIC_N; -#endif /* UNIV_DEBUG */ -#ifdef UNIV_SYNC_DEBUG - mutex->line = 0; - mutex->file_name = "not yet reserved"; - mutex->level = level; -#endif /* UNIV_SYNC_DEBUG */ - mutex->cfile_name = cfile_name; - mutex->cline = cline; -#ifndef UNIV_HOTBACKUP - mutex->count_os_wait = 0; -# ifdef UNIV_DEBUG - mutex->cmutex_name= cmutex_name; - mutex->count_using= 0; - mutex->mutex_type= 0; - mutex->lspent_time= 0; - mutex->lmax_spent_time= 0; - mutex->count_spin_loop= 0; - mutex->count_spin_rounds= 0; - mutex->count_os_yield= 0; -# endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - - /* Check that lock_word is aligned; this is important on Intel */ - ut_ad(((ulint)(&(mutex->lock_word))) % 4 == 0); - - /* NOTE! The very first mutexes are not put to the mutex list */ - - if ((mutex == &mutex_list_mutex) -#ifdef UNIV_SYNC_DEBUG - || (mutex == &sync_thread_mutex) -#endif /* UNIV_SYNC_DEBUG */ - ) { - - return; - } - - mutex_enter(&mutex_list_mutex); - - ut_ad(UT_LIST_GET_LEN(mutex_list) == 0 - || UT_LIST_GET_FIRST(mutex_list)->magic_n == MUTEX_MAGIC_N); - - UT_LIST_ADD_FIRST(list, mutex_list, mutex); - - mutex_exit(&mutex_list_mutex); -} - -/********************************************************************** -Calling this function is obligatory only if the memory buffer containing -the mutex is freed. Removes a mutex object from the mutex list. The mutex -is checked to be in the reset state. */ - -void -mutex_free( -/*=======*/ - mutex_t* mutex) /* in: mutex */ -{ - ut_ad(mutex_validate(mutex)); - ut_a(mutex_get_lock_word(mutex) == 0); - ut_a(mutex_get_waiters(mutex) == 0); - - if (mutex != &mutex_list_mutex -#ifdef UNIV_SYNC_DEBUG - && mutex != &sync_thread_mutex -#endif /* UNIV_SYNC_DEBUG */ - ) { - - mutex_enter(&mutex_list_mutex); - - ut_ad(!UT_LIST_GET_PREV(list, mutex) - || UT_LIST_GET_PREV(list, mutex)->magic_n - == MUTEX_MAGIC_N); - ut_ad(!UT_LIST_GET_NEXT(list, mutex) - || UT_LIST_GET_NEXT(list, mutex)->magic_n - == MUTEX_MAGIC_N); - - UT_LIST_REMOVE(list, mutex_list, mutex); - - mutex_exit(&mutex_list_mutex); - } - - os_event_free(mutex->event); - -#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER) -#elif defined(MY_ATOMIC_NOLOCK) -#else - os_fast_mutex_free(&(mutex->os_fast_mutex)); -#endif - /* If we free the mutex protecting the mutex list (freeing is - not necessary), we have to reset the magic number AFTER removing - it from the list. */ -#ifdef UNIV_DEBUG - mutex->magic_n = 0; -#endif /* UNIV_DEBUG */ -} - -/************************************************************************ -NOTE! Use the corresponding macro in the header file, not this function -directly. Tries to lock the mutex for the current thread. If the lock is not -acquired immediately, returns with return value 1. */ - -ulint -mutex_enter_nowait_func( -/*====================*/ - /* out: 0 if succeed, 1 if not */ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name __attribute__((unused)), - /* in: file name where mutex - requested */ - ulint line __attribute__((unused))) - /* in: line where requested */ -{ - ut_ad(mutex_validate(mutex)); - - if (!mutex_test_and_set(mutex)) { - - ut_d(mutex->thread_id = os_thread_get_curr_id()); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - - return(0); /* Succeeded! */ - } - - return(1); -} - -#ifdef UNIV_DEBUG -/********************************************************************** -Checks that the mutex has been initialized. */ - -ibool -mutex_validate( -/*===========*/ - const mutex_t* mutex) -{ - ut_a(mutex); - ut_a(mutex->magic_n == MUTEX_MAGIC_N); - - return(TRUE); -} - -/********************************************************************** -Checks that the current thread owns the mutex. Works only in the debug -version. */ - -ibool -mutex_own( -/*======*/ - /* out: TRUE if owns */ - const mutex_t* mutex) /* in: mutex */ -{ - ut_ad(mutex_validate(mutex)); - - return(mutex_get_lock_word(mutex) == 1 - && os_thread_eq(mutex->thread_id, os_thread_get_curr_id())); -} -#endif /* UNIV_DEBUG */ - -/********************************************************************** -Sets the waiters field in a mutex. */ - -void -mutex_set_waiters( -/*==============*/ - mutex_t* mutex, /* in: mutex */ - ulint n) /* in: value to set */ -{ - volatile ulint* ptr; /* declared volatile to ensure that - the value is stored to memory */ - ut_ad(mutex); - - ptr = &(mutex->waiters); - - *ptr = n; /* Here we assume that the write of a single - word in memory is atomic */ -} - -/********************************************************************** -Reserves a mutex for the current thread. If the mutex is reserved, the -function spins a preset time (controlled by SYNC_SPIN_ROUNDS), waiting -for the mutex before suspending the thread. */ - -void -mutex_spin_wait( -/*============*/ - mutex_t* mutex, /* in: pointer to mutex */ - const char* file_name, /* in: file name where mutex - requested */ - ulint line) /* in: line where requested */ -{ - ulint index; /* index of the reserved wait cell */ - ulint i; /* spin round count */ -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - ib_longlong lstart_time = 0, lfinish_time; /* for timing os_wait */ - ulint ltime_diff; - ulint sec; - ulint ms; - uint timer_started = 0; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - ut_ad(mutex); - - /* This update is not thread safe, but we don't mind if the count - isn't exact. Moved out of ifdef that follows because we are willing - to sacrifice the cost of counting this as the data is valuable. - Count the number of calls to mutex_spin_wait. */ - mutex_spin_wait_count++; - -mutex_loop: - - i = 0; - - /* Spin waiting for the lock word to become zero. Note that we do - not have to assume that the read access to the lock word is atomic, - as the actual locking is always committed with atomic test-and-set. - In reality, however, all processors probably have an atomic read of - a memory word. */ - -spin_loop: -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - mutex->count_spin_loop++; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - - while (mutex_get_lock_word(mutex) != 0 && i < SYNC_SPIN_ROUNDS) { - if (srv_spin_wait_delay) { - ut_delay(ut_rnd_interval(0, srv_spin_wait_delay)); - } - - i++; - } - - if (i == SYNC_SPIN_ROUNDS) { -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - mutex->count_os_yield++; - if (timed_mutexes == 1 && timer_started==0) { - ut_usectime(&sec, &ms); - lstart_time= (ib_longlong)sec * 1000000 + ms; - timer_started = 1; - } -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - os_thread_yield(); - } - -#ifdef UNIV_SRV_PRINT_LATCH_WAITS - fprintf(stderr, - "Thread %lu spin wait mutex at %p" - " cfile %s cline %lu rnds %lu\n", - (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex, - mutex->cfile_name, (ulong) mutex->cline, (ulong) i); -#endif - - mutex_spin_round_count += i; - -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - mutex->count_spin_rounds += i; -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - - if (mutex_test_and_set(mutex) == 0) { - /* Succeeded! */ - - ut_d(mutex->thread_id = os_thread_get_curr_id()); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - - goto finish_timing; - } - - /* We may end up with a situation where lock_word is 0 but the OS - fast mutex is still reserved. On FreeBSD the OS does not seem to - schedule a thread which is constantly calling pthread_mutex_trylock - (in mutex_test_and_set implementation). Then we could end up - spinning here indefinitely. The following 'i++' stops this infinite - spin. */ - - i++; - - if (i < SYNC_SPIN_ROUNDS) { - goto spin_loop; - } - - sync_array_reserve_cell(sync_primary_wait_array, mutex, - SYNC_MUTEX, file_name, line, &index); - - /* The memory order of the array reservation and the change in the - waiters field is important: when we suspend a thread, we first - reserve the cell and then set waiters field to 1. When threads are - released in mutex_exit, the waiters field is first set to zero and - then the event is set to the signaled state. */ - - mutex_set_waiters(mutex, 1); - - /* Try to reserve still a few times */ - for (i = 0; i < 4; i++) { - if (mutex_test_and_set(mutex) == 0) { - /* Succeeded! Free the reserved wait cell */ - - sync_array_free_cell(sync_primary_wait_array, index); - - ut_d(mutex->thread_id = os_thread_get_curr_id()); -#ifdef UNIV_SYNC_DEBUG - mutex_set_debug_info(mutex, file_name, line); -#endif - -#ifdef UNIV_SRV_PRINT_LATCH_WAITS - fprintf(stderr, "Thread %lu spin wait succeeds at 2:" - " mutex at %p\n", - (ulong) os_thread_pf(os_thread_get_curr_id()), - (void*) mutex); -#endif - - goto finish_timing; - - /* Note that in this case we leave the waiters field - set to 1. We cannot reset it to zero, as we do not - know if there are other waiters. */ - } - } - - /* Now we know that there has been some thread holding the mutex - after the change in the wait array and the waiters field was made. - Now there is no risk of infinite wait on the event. */ - -#ifdef UNIV_SRV_PRINT_LATCH_WAITS - fprintf(stderr, - "Thread %lu OS wait mutex at %p cfile %s cline %lu rnds %lu\n", - (ulong) os_thread_pf(os_thread_get_curr_id()), (void*) mutex, - mutex->cfile_name, (ulong) mutex->cline, (ulong) i); -#endif - - mutex_os_wait_count++; - -#ifndef UNIV_HOTBACKUP - mutex->count_os_wait++; -# ifdef UNIV_DEBUG - /* !!!!! Sometimes os_wait can be called without os_thread_yield */ - - if (timed_mutexes == 1 && timer_started==0) { - ut_usectime(&sec, &ms); - lstart_time= (ib_longlong)sec * 1000000 + ms; - timer_started = 1; - } -# endif /* UNIV_DEBUG */ -#endif /* !UNIV_HOTBACKUP */ - - sync_array_wait_event(sync_primary_wait_array, index); - goto mutex_loop; - -finish_timing: -#if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP - if (timed_mutexes == 1 && timer_started==1) { - ut_usectime(&sec, &ms); - lfinish_time= (ib_longlong)sec * 1000000 + ms; - - ltime_diff= (ulint) (lfinish_time - lstart_time); - mutex->lspent_time += ltime_diff; - - if (mutex->lmax_spent_time < ltime_diff) { - mutex->lmax_spent_time= ltime_diff; - } - } -#endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */ - return; -} - -/********************************************************************** -Releases the threads waiting in the primary wait array for this mutex. */ - -void -mutex_signal_object( -/*================*/ - mutex_t* mutex) /* in: mutex */ -{ - mutex_set_waiters(mutex, 0); - - /* The memory order of resetting the waiters field and - signaling the object is important. See LEMMA 1 above. */ - os_event_set(mutex->event); - sync_array_object_signalled(sync_primary_wait_array); -} - -#ifdef UNIV_SYNC_DEBUG -/********************************************************************** -Sets the debug information for a reserved mutex. */ - -void -mutex_set_debug_info( -/*=================*/ - mutex_t* mutex, /* in: mutex */ - const char* file_name, /* in: file where requested */ - ulint line) /* in: line where requested */ -{ - ut_ad(mutex); - ut_ad(file_name); - - sync_thread_add_level(mutex, mutex->level); - - mutex->file_name = file_name; - mutex->line = line; -} - -/********************************************************************** -Gets the debug information for a reserved mutex. */ - -void -mutex_get_debug_info( -/*=================*/ - mutex_t* mutex, /* in: mutex */ - const char** file_name, /* out: file where requested */ - ulint* line, /* out: line where requested */ - os_thread_id_t* thread_id) /* out: id of the thread which owns - the mutex */ -{ - ut_ad(mutex); - - *file_name = mutex->file_name; - *line = mutex->line; - *thread_id = mutex->thread_id; -} - -/********************************************************************** -Prints debug info of currently reserved mutexes. */ -static -void -mutex_list_print_info( -/*==================*/ - FILE* file) /* in: file where to print */ -{ - mutex_t* mutex; - const char* file_name; - ulint line; - os_thread_id_t thread_id; - ulint count = 0; - - fputs("----------\n" - "MUTEX INFO\n" - "----------\n", file); - - mutex_enter(&mutex_list_mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex != NULL) { - count++; - - if (mutex_get_lock_word(mutex) != 0) { - mutex_get_debug_info(mutex, &file_name, &line, - &thread_id); - fprintf(file, - "Locked mutex: addr %p thread %ld" - " file %s line %ld\n", - (void*) mutex, os_thread_pf(thread_id), - file_name, line); - } - - mutex = UT_LIST_GET_NEXT(list, mutex); - } - - fprintf(file, "Total number of mutexes %ld\n", count); - - mutex_exit(&mutex_list_mutex); -} - -/********************************************************************** -Counts currently reserved mutexes. Works only in the debug version. */ - -ulint -mutex_n_reserved(void) -/*==================*/ -{ - mutex_t* mutex; - ulint count = 0; - - mutex_enter(&mutex_list_mutex); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex != NULL) { - if (mutex_get_lock_word(mutex) != 0) { - - count++; - } - - mutex = UT_LIST_GET_NEXT(list, mutex); - } - - mutex_exit(&mutex_list_mutex); - - ut_a(count >= 1); - - return(count - 1); /* Subtract one, because this function itself - was holding one mutex (mutex_list_mutex) */ -} - -/********************************************************************** -Returns TRUE if no mutex or rw-lock is currently locked. Works only in -the debug version. */ - -ibool -sync_all_freed(void) -/*================*/ -{ - return(mutex_n_reserved() + rw_lock_n_locked() == 0); -} - -/********************************************************************** -Gets the value in the nth slot in the thread level arrays. */ -static -sync_thread_t* -sync_thread_level_arrays_get_nth( -/*=============================*/ - /* out: pointer to thread slot */ - ulint n) /* in: slot number */ -{ - ut_ad(n < OS_THREAD_MAX_N); - - return(sync_thread_level_arrays + n); -} - -/********************************************************************** -Looks for the thread slot for the calling thread. */ -static -sync_thread_t* -sync_thread_level_arrays_find_slot(void) -/*====================================*/ - /* out: pointer to thread slot, NULL if not found */ - -{ - sync_thread_t* slot; - os_thread_id_t id; - ulint i; - - id = os_thread_get_curr_id(); - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = sync_thread_level_arrays_get_nth(i); - - if (slot->levels && os_thread_eq(slot->id, id)) { - - return(slot); - } - } - - return(NULL); -} - -/********************************************************************** -Looks for an unused thread slot. */ -static -sync_thread_t* -sync_thread_level_arrays_find_free(void) -/*====================================*/ - /* out: pointer to thread slot */ - -{ - sync_thread_t* slot; - ulint i; - - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - slot = sync_thread_level_arrays_get_nth(i); - - if (slot->levels == NULL) { - - return(slot); - } - } - - return(NULL); -} - -/********************************************************************** -Gets the value in the nth slot in the thread level array. */ -static -sync_level_t* -sync_thread_levels_get_nth( -/*=======================*/ - /* out: pointer to level slot */ - sync_level_t* arr, /* in: pointer to level array for an OS - thread */ - ulint n) /* in: slot number */ -{ - ut_ad(n < SYNC_THREAD_N_LEVELS); - - return(arr + n); -} - -/********************************************************************** -Checks if all the level values stored in the level array are greater than -the given limit. */ -static -ibool -sync_thread_levels_g( -/*=================*/ - /* out: TRUE if all greater */ - sync_level_t* arr, /* in: pointer to level array for an OS - thread */ - ulint limit) /* in: level limit */ -{ - sync_level_t* slot; - rw_lock_t* lock; - mutex_t* mutex; - ulint i; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(arr, i); - - if (slot->latch != NULL) { - if (slot->level <= limit) { - - lock = slot->latch; - mutex = slot->latch; - - fprintf(stderr, - "InnoDB: sync levels should be" - " > %lu but a level is %lu\n", - (ulong) limit, (ulong) slot->level); - - if (mutex->magic_n == MUTEX_MAGIC_N) { - fprintf(stderr, - "Mutex created at %s %lu\n", - mutex->cfile_name, - (ulong) mutex->cline); - - if (mutex_get_lock_word(mutex) != 0) { - const char* file_name; - ulint line; - os_thread_id_t thread_id; - - mutex_get_debug_info( - mutex, &file_name, - &line, &thread_id); - - fprintf(stderr, - "InnoDB: Locked mutex:" - " addr %p thread %ld" - " file %s line %ld\n", - (void*) mutex, - os_thread_pf( - thread_id), - file_name, - (ulong) line); - } else { - fputs("Not locked\n", stderr); - } - } else { - rw_lock_print(lock); - } - - return(FALSE); - } - } - } - - return(TRUE); -} - -/********************************************************************** -Checks if the level value is stored in the level array. */ -static -ibool -sync_thread_levels_contain( -/*=======================*/ - /* out: TRUE if stored */ - sync_level_t* arr, /* in: pointer to level array for an OS - thread */ - ulint level) /* in: level */ -{ - sync_level_t* slot; - ulint i; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(arr, i); - - if (slot->latch != NULL) { - if (slot->level == level) { - - return(TRUE); - } - } - } - - return(FALSE); -} - -/********************************************************************** -Checks that the level array for the current thread is empty. */ - -ibool -sync_thread_levels_empty_gen( -/*=========================*/ - /* out: TRUE if empty except the - exceptions specified below */ - ibool dict_mutex_allowed) /* in: TRUE if dictionary mutex is - allowed to be owned by the thread, - also purge_is_running mutex is - allowed */ -{ - sync_level_t* arr; - sync_thread_t* thread_slot; - sync_level_t* slot; - ulint i; - - if (!sync_order_checks_on) { - - return(TRUE); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - mutex_exit(&sync_thread_mutex); - - return(TRUE); - } - - arr = thread_slot->levels; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(arr, i); - - if (slot->latch != NULL - && (!dict_mutex_allowed - || (slot->level != SYNC_DICT - && slot->level != SYNC_DICT_OPERATION))) { - - mutex_exit(&sync_thread_mutex); - ut_error; - - return(FALSE); - } - } - - mutex_exit(&sync_thread_mutex); - - return(TRUE); -} - -/********************************************************************** -Checks that the level array for the current thread is empty. */ - -ibool -sync_thread_levels_empty(void) -/*==========================*/ - /* out: TRUE if empty */ -{ - return(sync_thread_levels_empty_gen(FALSE)); -} - -/********************************************************************** -Adds a latch and its level in the thread level array. Allocates the memory -for the array if called first time for this OS thread. Makes the checks -against other latch levels stored in the array for this thread. */ - -void -sync_thread_add_level( -/*==================*/ - void* latch, /* in: pointer to a mutex or an rw-lock */ - ulint level) /* in: level in the latching order; if - SYNC_LEVEL_VARYING, nothing is done */ -{ - sync_level_t* array; - sync_level_t* slot; - sync_thread_t* thread_slot; - ulint i; - - if (!sync_order_checks_on) { - - return; - } - - if ((latch == (void*)&sync_thread_mutex) - || (latch == (void*)&mutex_list_mutex) - || (latch == (void*)&rw_lock_debug_mutex) - || (latch == (void*)&rw_lock_list_mutex)) { - - return; - } - - if (level == SYNC_LEVEL_VARYING) { - - return; - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - /* We have to allocate the level array for a new thread */ - array = ut_malloc(sizeof(sync_level_t) * SYNC_THREAD_N_LEVELS); - - thread_slot = sync_thread_level_arrays_find_free(); - - thread_slot->id = os_thread_get_curr_id(); - thread_slot->levels = array; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(array, i); - - slot->latch = NULL; - } - } - - array = thread_slot->levels; - - /* NOTE that there is a problem with _NODE and _LEAF levels: if the - B-tree height changes, then a leaf can change to an internal node - or the other way around. We do not know at present if this can cause - unnecessary assertion failures below. */ - - switch (level) { - case SYNC_NO_ORDER_CHECK: - case SYNC_EXTERN_STORAGE: - case SYNC_TREE_NODE_FROM_HASH: - /* Do no order checking */ - break; - case SYNC_MEM_POOL: - ut_a(sync_thread_levels_g(array, SYNC_MEM_POOL)); - break; - case SYNC_MEM_HASH: - ut_a(sync_thread_levels_g(array, SYNC_MEM_HASH)); - break; - case SYNC_RECV: - ut_a(sync_thread_levels_g(array, SYNC_RECV)); - break; - case SYNC_WORK_QUEUE: - ut_a(sync_thread_levels_g(array, SYNC_WORK_QUEUE)); - break; - case SYNC_LOG: - ut_a(sync_thread_levels_g(array, SYNC_LOG)); - break; - case SYNC_THR_LOCAL: - ut_a(sync_thread_levels_g(array, SYNC_THR_LOCAL)); - break; - case SYNC_ANY_LATCH: - ut_a(sync_thread_levels_g(array, SYNC_ANY_LATCH)); - break; - case SYNC_TRX_SYS_HEADER: - ut_a(sync_thread_levels_g(array, SYNC_TRX_SYS_HEADER)); - break; - case SYNC_DOUBLEWRITE: - ut_a(sync_thread_levels_g(array, SYNC_DOUBLEWRITE)); - break; - case SYNC_BUF_BLOCK: - ut_a((sync_thread_levels_contain(array, SYNC_BUF_POOL) - && sync_thread_levels_g(array, SYNC_BUF_BLOCK - 1)) - || sync_thread_levels_g(array, SYNC_BUF_BLOCK)); - break; - case SYNC_BUF_POOL: - ut_a(sync_thread_levels_g(array, SYNC_BUF_POOL)); - break; - case SYNC_SEARCH_SYS: - ut_a(sync_thread_levels_g(array, SYNC_SEARCH_SYS)); - break; - case SYNC_TRX_LOCK_HEAP: - ut_a(sync_thread_levels_g(array, SYNC_TRX_LOCK_HEAP)); - break; - case SYNC_REC_LOCK: - ut_a((sync_thread_levels_contain(array, SYNC_KERNEL) - && sync_thread_levels_g(array, SYNC_REC_LOCK - 1)) - || sync_thread_levels_g(array, SYNC_REC_LOCK)); - break; - case SYNC_KERNEL: - ut_a(sync_thread_levels_g(array, SYNC_KERNEL)); - break; - case SYNC_IBUF_BITMAP: - ut_a((sync_thread_levels_contain(array, SYNC_IBUF_BITMAP_MUTEX) - && sync_thread_levels_g(array, SYNC_IBUF_BITMAP - 1)) - || sync_thread_levels_g(array, SYNC_IBUF_BITMAP)); - break; - case SYNC_IBUF_BITMAP_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_IBUF_BITMAP_MUTEX)); - break; - case SYNC_FSP_PAGE: - ut_a(sync_thread_levels_contain(array, SYNC_FSP)); - break; - case SYNC_FSP: - ut_a(sync_thread_levels_contain(array, SYNC_FSP) - || sync_thread_levels_g(array, SYNC_FSP)); - break; - case SYNC_TRX_UNDO_PAGE: - ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO) - || sync_thread_levels_contain(array, SYNC_RSEG) - || sync_thread_levels_contain(array, SYNC_PURGE_SYS) - || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE)); - break; - case SYNC_RSEG_HEADER: - ut_a(sync_thread_levels_contain(array, SYNC_RSEG)); - break; - case SYNC_RSEG_HEADER_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_KERNEL) - && sync_thread_levels_contain(array, SYNC_FSP_PAGE)); - break; - case SYNC_RSEG: - ut_a(sync_thread_levels_g(array, SYNC_RSEG)); - break; - case SYNC_TRX_UNDO: - ut_a(sync_thread_levels_g(array, SYNC_TRX_UNDO)); - break; - case SYNC_PURGE_LATCH: - ut_a(sync_thread_levels_g(array, SYNC_PURGE_LATCH)); - break; - case SYNC_PURGE_SYS: - ut_a(sync_thread_levels_g(array, SYNC_PURGE_SYS)); - break; - case SYNC_TREE_NODE: - ut_a(sync_thread_levels_contain(array, SYNC_INDEX_TREE) - || sync_thread_levels_contain(array, SYNC_DICT_OPERATION) - || sync_thread_levels_g(array, SYNC_TREE_NODE - 1)); - break; - case SYNC_TREE_NODE_NEW: - ut_a(sync_thread_levels_contain(array, SYNC_FSP_PAGE) - || sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - break; - case SYNC_INDEX_TREE: - ut_a((sync_thread_levels_contain(array, SYNC_IBUF_MUTEX) - && sync_thread_levels_contain(array, SYNC_FSP) - && sync_thread_levels_g(array, SYNC_FSP_PAGE - 1)) - || sync_thread_levels_g(array, SYNC_TREE_NODE - 1)); - break; - case SYNC_IBUF_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_FSP_PAGE - 1)); - break; - case SYNC_IBUF_PESS_INSERT_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1) - && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX)); - break; - case SYNC_IBUF_HEADER: - ut_a(sync_thread_levels_g(array, SYNC_FSP - 1) - && !sync_thread_levels_contain(array, SYNC_IBUF_MUTEX) - && !sync_thread_levels_contain( - array, SYNC_IBUF_PESS_INSERT_MUTEX)); - break; - case SYNC_DICT_AUTOINC_MUTEX: - ut_a(sync_thread_levels_g(array, SYNC_DICT_AUTOINC_MUTEX)); - break; - case SYNC_DICT_OPERATION: - ut_a(sync_thread_levels_g(array, SYNC_DICT_OPERATION)); - break; - case SYNC_DICT_HEADER: - ut_a(sync_thread_levels_g(array, SYNC_DICT_HEADER)); - break; - case SYNC_DICT: -#ifdef UNIV_DEBUG - ut_a(buf_debug_prints - || sync_thread_levels_g(array, SYNC_DICT)); -#else /* UNIV_DEBUG */ - ut_a(sync_thread_levels_g(array, SYNC_DICT)); -#endif /* UNIV_DEBUG */ - break; - default: - ut_error; - } - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(array, i); - - if (slot->latch == NULL) { - slot->latch = latch; - slot->level = level; - - break; - } - } - - ut_a(i < SYNC_THREAD_N_LEVELS); - - mutex_exit(&sync_thread_mutex); -} - -/********************************************************************** -Removes a latch from the thread level array if it is found there. */ - -ibool -sync_thread_reset_level( -/*====================*/ - /* out: TRUE if found from the array; it is an error - if the latch is not found */ - void* latch) /* in: pointer to a mutex or an rw-lock */ -{ - sync_level_t* array; - sync_level_t* slot; - sync_thread_t* thread_slot; - ulint i; - - if (!sync_order_checks_on) { - - return(FALSE); - } - - if ((latch == (void*)&sync_thread_mutex) - || (latch == (void*)&mutex_list_mutex) - || (latch == (void*)&rw_lock_debug_mutex) - || (latch == (void*)&rw_lock_list_mutex)) { - - return(FALSE); - } - - mutex_enter(&sync_thread_mutex); - - thread_slot = sync_thread_level_arrays_find_slot(); - - if (thread_slot == NULL) { - - ut_error; - - mutex_exit(&sync_thread_mutex); - return(FALSE); - } - - array = thread_slot->levels; - - for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) { - - slot = sync_thread_levels_get_nth(array, i); - - if (slot->latch == latch) { - slot->latch = NULL; - - mutex_exit(&sync_thread_mutex); - - return(TRUE); - } - } - - ut_error; - - mutex_exit(&sync_thread_mutex); - - return(FALSE); -} -#endif /* UNIV_SYNC_DEBUG */ - -/********************************************************************** -Initializes the synchronization data structures. */ - -void -sync_init(void) -/*===========*/ -{ -#ifdef UNIV_SYNC_DEBUG - sync_thread_t* thread_slot; - ulint i; -#endif /* UNIV_SYNC_DEBUG */ - - ut_a(sync_initialized == FALSE); - - sync_initialized = TRUE; - - /* Create the primary system wait array which is protected by an OS - mutex */ - - sync_primary_wait_array = sync_array_create(OS_THREAD_MAX_N, - SYNC_ARRAY_OS_MUTEX); -#ifdef UNIV_SYNC_DEBUG - /* Create the thread latch level array where the latch levels - are stored for each OS thread */ - - sync_thread_level_arrays = ut_malloc(OS_THREAD_MAX_N - * sizeof(sync_thread_t)); - for (i = 0; i < OS_THREAD_MAX_N; i++) { - - thread_slot = sync_thread_level_arrays_get_nth(i); - thread_slot->levels = NULL; - } -#endif /* UNIV_SYNC_DEBUG */ - /* Init the mutex list and create the mutex to protect it. */ - - UT_LIST_INIT(mutex_list); - mutex_create(&mutex_list_mutex, SYNC_NO_ORDER_CHECK); -#ifdef UNIV_SYNC_DEBUG - mutex_create(&sync_thread_mutex, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - /* Init the rw-lock list and create the mutex to protect it. */ - - UT_LIST_INIT(rw_lock_list); - mutex_create(&rw_lock_list_mutex, SYNC_NO_ORDER_CHECK); - -#ifdef UNIV_SYNC_DEBUG - mutex_create(&rw_lock_debug_mutex, SYNC_NO_ORDER_CHECK); - - rw_lock_debug_event = os_event_create(NULL); - rw_lock_debug_waiters = FALSE; -#endif /* UNIV_SYNC_DEBUG */ -} - -/********************************************************************** -Frees the resources in InnoDB's own synchronization data structures. Use -os_sync_free() after calling this. */ - -void -sync_close(void) -/*===========*/ -{ - mutex_t* mutex; - - sync_array_free(sync_primary_wait_array); - - mutex = UT_LIST_GET_FIRST(mutex_list); - - while (mutex) { - mutex_free(mutex); - mutex = UT_LIST_GET_FIRST(mutex_list); - } - - mutex_free(&mutex_list_mutex); -#ifdef UNIV_SYNC_DEBUG - mutex_free(&sync_thread_mutex); -#endif /* UNIV_SYNC_DEBUG */ -} - -/*********************************************************************** -Prints wait info of the sync system. */ - -void -sync_print_wait_info( -/*=================*/ - FILE* file) /* in: file where to print */ -{ -#ifdef UNIV_SYNC_DEBUG - fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n", - mutex_exit_count, rw_s_exit_count, rw_x_exit_count); -#endif - - fprintf(file, - "Mutex spin waits %llu, rounds %llu, OS waits %llu\n" - "RW-shared spins %llu, OS waits %llu;" - " RW-excl spins %llu, OS waits %llu\n", - mutex_spin_wait_count, - mutex_spin_round_count, - mutex_os_wait_count, - rw_s_spin_wait_count, - rw_s_os_wait_count, - rw_x_spin_wait_count, - rw_x_os_wait_count); - - fprintf(file, - "Spin rounds per wait: %.2f mutex, %.2f RW-shared, " - "%.2f RW-excl\n", - (double) mutex_spin_round_count / - (mutex_spin_wait_count ? mutex_spin_wait_count : 1), - (double) rw_s_spin_round_count / - (rw_s_spin_wait_count ? rw_s_spin_wait_count : 1), - (double) rw_x_spin_round_count / - (rw_x_spin_wait_count ? rw_x_spin_wait_count : 1)); -} - -/*********************************************************************** -Prints info of the sync system. */ - -void -sync_print( -/*=======*/ - FILE* file) /* in: file where to print */ -{ -#ifdef UNIV_SYNC_DEBUG - mutex_list_print_info(file); - - rw_lock_list_print_info(file); -#endif /* UNIV_SYNC_DEBUG */ - - sync_array_print_info(file, sync_primary_wait_array); - - sync_print_wait_info(file); -} diff --git a/storage/innobase/thr/thr0loc.c b/storage/innobase/thr/thr0loc.c deleted file mode 100644 index b803bd53101..00000000000 --- a/storage/innobase/thr/thr0loc.c +++ /dev/null @@ -1,228 +0,0 @@ -/****************************************************** -The thread local storage - -(c) 1995 Innobase Oy - -Created 10/5/1995 Heikki Tuuri -*******************************************************/ - -#include "thr0loc.h" -#ifdef UNIV_NONINL -#include "thr0loc.ic" -#endif - -#include "sync0sync.h" -#include "hash0hash.h" -#include "mem0mem.h" -#include "srv0srv.h" - -/* - IMPLEMENTATION OF THREAD LOCAL STORAGE - ====================================== - -The threads sometimes need private data which depends on the thread id. -This is implemented as a hash table, where the hash value is calculated -from the thread id, to prepare for a large number of threads. The hash table -is protected by a mutex. If you need modify the program and put new data to -the thread local storage, just add it to struct thr_local_struct in the -header file. */ - -/* Mutex protecting the local storage hash table */ -mutex_t thr_local_mutex; - -/* The hash table. The module is not yet initialized when it is NULL. */ -hash_table_t* thr_local_hash = NULL; - -/* The private data for each thread should be put to -the structure below and the accessor functions written -for the field. */ -typedef struct thr_local_struct thr_local_t; - -struct thr_local_struct{ - os_thread_id_t id; /* id of the thread which owns this struct */ - os_thread_t handle; /* operating system handle to the thread */ - ulint slot_no;/* the index of the slot in the thread table - for this thread */ - ibool in_ibuf;/* TRUE if the the thread is doing an ibuf - operation */ - hash_node_t hash; /* hash chain node */ - ulint magic_n; -}; - -#define THR_LOCAL_MAGIC_N 1231234 - -/*********************************************************************** -Returns the local storage struct for a thread. */ -static -thr_local_t* -thr_local_get( -/*==========*/ - /* out: local storage */ - os_thread_id_t id) /* in: thread id of the thread */ -{ - thr_local_t* local; - -try_again: - ut_ad(thr_local_hash); - ut_ad(mutex_own(&thr_local_mutex)); - - /* Look for the local struct in the hash table */ - - local = NULL; - - HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - local, os_thread_eq(local->id, id)); - if (local == NULL) { - mutex_exit(&thr_local_mutex); - - thr_local_create(); - - mutex_enter(&thr_local_mutex); - - goto try_again; - } - - ut_ad(local->magic_n == THR_LOCAL_MAGIC_N); - - return(local); -} - -/*********************************************************************** -Gets the slot number in the thread table of a thread. */ - -ulint -thr_local_get_slot_no( -/*==================*/ - /* out: slot number */ - os_thread_id_t id) /* in: thread id of the thread */ -{ - ulint slot_no; - thr_local_t* local; - - mutex_enter(&thr_local_mutex); - - local = thr_local_get(id); - - slot_no = local->slot_no; - - mutex_exit(&thr_local_mutex); - - return(slot_no); -} - -/*********************************************************************** -Sets the slot number in the thread table of a thread. */ - -void -thr_local_set_slot_no( -/*==================*/ - os_thread_id_t id, /* in: thread id of the thread */ - ulint slot_no)/* in: slot number */ -{ - thr_local_t* local; - - mutex_enter(&thr_local_mutex); - - local = thr_local_get(id); - - local->slot_no = slot_no; - - mutex_exit(&thr_local_mutex); -} - -/*********************************************************************** -Returns pointer to the 'in_ibuf' field within the current thread local -storage. */ - -ibool* -thr_local_get_in_ibuf_field(void) -/*=============================*/ - /* out: pointer to the in_ibuf field */ -{ - thr_local_t* local; - - mutex_enter(&thr_local_mutex); - - local = thr_local_get(os_thread_get_curr_id()); - - mutex_exit(&thr_local_mutex); - - return(&(local->in_ibuf)); -} - -/*********************************************************************** -Creates a local storage struct for the calling new thread. */ - -void -thr_local_create(void) -/*==================*/ -{ - thr_local_t* local; - - if (thr_local_hash == NULL) { - thr_local_init(); - } - - local = mem_alloc(sizeof(thr_local_t)); - - local->id = os_thread_get_curr_id(); - local->handle = os_thread_get_curr(); - local->magic_n = THR_LOCAL_MAGIC_N; - - local->in_ibuf = FALSE; - - mutex_enter(&thr_local_mutex); - - HASH_INSERT(thr_local_t, hash, thr_local_hash, - os_thread_pf(os_thread_get_curr_id()), - local); - - mutex_exit(&thr_local_mutex); -} - -/*********************************************************************** -Frees the local storage struct for the specified thread. */ - -void -thr_local_free( -/*===========*/ - os_thread_id_t id) /* in: thread id */ -{ - thr_local_t* local; - - mutex_enter(&thr_local_mutex); - - /* Look for the local struct in the hash table */ - - HASH_SEARCH(hash, thr_local_hash, os_thread_pf(id), - local, os_thread_eq(local->id, id)); - if (local == NULL) { - mutex_exit(&thr_local_mutex); - - return; - } - - HASH_DELETE(thr_local_t, hash, thr_local_hash, - os_thread_pf(id), local); - - mutex_exit(&thr_local_mutex); - - ut_a(local->magic_n == THR_LOCAL_MAGIC_N); - - mem_free(local); -} - -/******************************************************************** -Initializes the thread local storage module. */ - -void -thr_local_init(void) -/*================*/ -{ - - ut_a(thr_local_hash == NULL); - - thr_local_hash = hash_create(OS_THREAD_MAX_N + 100); - - mutex_create(&thr_local_mutex, SYNC_THR_LOCAL); -} diff --git a/storage/innobase/trx/trx0purge.c b/storage/innobase/trx/trx0purge.c deleted file mode 100644 index f0e85ef1604..00000000000 --- a/storage/innobase/trx/trx0purge.c +++ /dev/null @@ -1,1148 +0,0 @@ -/****************************************************** -Purge old versions - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0purge.h" - -#ifdef UNIV_NONINL -#include "trx0purge.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0roll.h" -#include "read0read.h" -#include "fut0fut.h" -#include "que0que.h" -#include "row0purge.h" -#include "row0upd.h" -#include "trx0rec.h" -#include "srv0que.h" -#include "os0thread.h" - -/* The global data structure coordinating a purge */ -trx_purge_t* purge_sys = NULL; - -/* A dummy undo record used as a return value when we have a whole undo log -which needs no purge */ -trx_undo_rec_t trx_purge_dummy_rec; - -/********************************************************************* -Checks if trx_id is >= purge_view: then it is guaranteed that its update -undo log still exists in the system. */ - -ibool -trx_purge_update_undo_must_exist( -/*=============================*/ - /* out: TRUE if is sure that it is preserved, also - if the function returns FALSE, it is possible that - the undo log still exists in the system */ - dulint trx_id) /* in: transaction id */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!read_view_sees_trx_id(purge_sys->view, trx_id)) { - - return(TRUE); - } - - return(FALSE); -} - -/*=================== PURGE RECORD ARRAY =============================*/ - -/*********************************************************************** -Stores info of an undo log record during a purge. */ -static -trx_undo_inf_t* -trx_purge_arr_store_info( -/*=====================*/ - /* out: pointer to the storage cell */ - dulint trx_no, /* in: transaction number */ - dulint undo_no)/* in: undo number */ -{ - trx_undo_inf_t* cell; - trx_undo_arr_t* arr; - ulint i; - - arr = purge_sys->arr; - - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); - - if (!(cell->in_use)) { - /* Not in use, we may store here */ - cell->undo_no = undo_no; - cell->trx_no = trx_no; - cell->in_use = TRUE; - - arr->n_used++; - - return(cell); - } - } -} - -/*********************************************************************** -Removes info of an undo log record during a purge. */ -UNIV_INLINE -void -trx_purge_arr_remove_info( -/*======================*/ - trx_undo_inf_t* cell) /* in: pointer to the storage cell */ -{ - trx_undo_arr_t* arr; - - arr = purge_sys->arr; - - cell->in_use = FALSE; - - ut_ad(arr->n_used > 0); - - arr->n_used--; -} - -/*********************************************************************** -Gets the biggest pair of a trx number and an undo number in a purge array. */ -static -void -trx_purge_arr_get_biggest( -/*======================*/ - trx_undo_arr_t* arr, /* in: purge array */ - dulint* trx_no, /* out: transaction number: ut_dulint_zero - if array is empty */ - dulint* undo_no)/* out: undo number */ -{ - trx_undo_inf_t* cell; - dulint pair_trx_no; - dulint pair_undo_no; - int trx_cmp; - ulint n_used; - ulint i; - ulint n; - - n = 0; - n_used = arr->n_used; - pair_trx_no = ut_dulint_zero; - pair_undo_no = ut_dulint_zero; - - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); - - if (cell->in_use) { - n++; - trx_cmp = ut_dulint_cmp(cell->trx_no, pair_trx_no); - - if ((trx_cmp > 0) - || ((trx_cmp == 0) - && (ut_dulint_cmp(cell->undo_no, - pair_undo_no) >= 0))) { - - pair_trx_no = cell->trx_no; - pair_undo_no = cell->undo_no; - } - } - - if (n == n_used) { - *trx_no = pair_trx_no; - *undo_no = pair_undo_no; - - return; - } - } -} - -/******************************************************************** -Builds a purge 'query' graph. The actual purge is performed by executing -this query graph. */ -static -que_t* -trx_purge_graph_build(void) -/*=======================*/ - /* out, own: the query graph */ -{ - mem_heap_t* heap; - que_fork_t* fork; - que_thr_t* thr; - /* que_thr_t* thr2; */ - - heap = mem_heap_create(512); - fork = que_fork_create(NULL, NULL, QUE_FORK_PURGE, heap); - fork->trx = purge_sys->trx; - - thr = que_thr_create(fork, heap); - - thr->child = row_purge_node_create(thr, heap); - - /* thr2 = que_thr_create(fork, fork, heap); - - thr2->child = row_purge_node_create(fork, thr2, heap); */ - - return(fork); -} - -/************************************************************************ -Creates the global purge system control structure and inits the history -mutex. */ - -void -trx_purge_sys_create(void) -/*======================*/ -{ - ut_ad(mutex_own(&kernel_mutex)); - - purge_sys = mem_alloc(sizeof(trx_purge_t)); - - purge_sys->state = TRX_STOP_PURGE; - - purge_sys->n_pages_handled = 0; - - purge_sys->purge_trx_no = ut_dulint_zero; - purge_sys->purge_undo_no = ut_dulint_zero; - purge_sys->next_stored = FALSE; - - rw_lock_create(&purge_sys->latch, SYNC_PURGE_LATCH); - - mutex_create(&purge_sys->mutex, SYNC_PURGE_SYS); - - purge_sys->heap = mem_heap_create(256); - - purge_sys->arr = trx_undo_arr_create(); - - purge_sys->sess = sess_open(); - - purge_sys->trx = purge_sys->sess->trx; - - purge_sys->trx->is_purge = 1; - - ut_a(trx_start_low(purge_sys->trx, ULINT_UNDEFINED)); - - purge_sys->query = trx_purge_graph_build(); - - purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, - purge_sys->heap); -} - -/*================ UNDO LOG HISTORY LIST =============================*/ - -/************************************************************************ -Adds the update undo log as the first log in the history list. Removes the -update undo log segment from the rseg slot if it is too big for reuse. */ - -void -trx_purge_add_update_undo_to_history( -/*=================================*/ - trx_t* trx, /* in: transaction */ - page_t* undo_page, /* in: update undo log header page, - x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - trx_undo_t* undo; - trx_rseg_t* rseg; - trx_rsegf_t* rseg_header; - trx_usegf_t* seg_header; - trx_ulogf_t* undo_header; - trx_upagef_t* page_header; - ulint hist_size; - - undo = trx->update_undo; - - ut_ad(undo); - - rseg = undo->rseg; - - ut_ad(mutex_own(&(rseg->mutex))); - - rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); - - undo_header = undo_page + undo->hdr_offset; - seg_header = undo_page + TRX_UNDO_SEG_HDR; - page_header = undo_page + TRX_UNDO_PAGE_HDR; - - if (undo->state != TRX_UNDO_CACHED) { - /* The undo log segment will not be reused */ - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - ut_error; - } - - trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, mtr); - - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); - ut_ad(undo->size == flst_get_len( - seg_header + TRX_UNDO_PAGE_LIST, mtr)); - - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size + undo->size, MLOG_4BYTES, mtr); - } - - /* Add the log as the first in the history list */ - flst_add_first(rseg_header + TRX_RSEG_HISTORY, - undo_header + TRX_UNDO_HISTORY_NODE, mtr); - mutex_enter(&kernel_mutex); - trx_sys->rseg_history_len++; - mutex_exit(&kernel_mutex); - - /* Write the trx number to the undo log header */ - mlog_write_dulint(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); - /* Write information about delete markings to the undo log header */ - - if (!undo->del_marks) { - mlog_write_ulint(undo_header + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, mtr); - } - - if (rseg->last_page_no == FIL_NULL) { - - rseg->last_page_no = undo->hdr_page_no; - rseg->last_offset = undo->hdr_offset; - rseg->last_trx_no = trx->no; - rseg->last_del_marks = undo->del_marks; - } -} - -/************************************************************************** -Frees an undo log segment which is in the history list. Cuts the end of the -history list at the youngest undo log in this segment. */ -static -void -trx_purge_free_segment( -/*===================*/ - trx_rseg_t* rseg, /* in: rollback segment */ - fil_addr_t hdr_addr, /* in: the file address of log_hdr */ - ulint n_removed_logs) /* in: count of how many undo logs we - will cut off from the end of the - history list */ -{ - page_t* undo_page; - trx_rsegf_t* rseg_hdr; - trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; - ibool freed; - ulint seg_size; - ulint hist_size; - ibool marked = FALSE; - mtr_t mtr; - - /* fputs("Freeing an update undo log segment\n", stderr); */ - - ut_ad(mutex_own(&(purge_sys->mutex))); -loop: - mtr_start(&mtr); - mutex_enter(&(rseg->mutex)); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - - undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr); - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - log_hdr = undo_page + hdr_addr.boffset; - - /* Mark the last undo log totally purged, so that if the system - crashes, the tail of the undo log will not get accessed again. The - list of pages in the undo log tail gets inconsistent during the - freeing of the segment, and therefore purge should not try to access - them again. */ - - if (!marked) { - mlog_write_ulint(log_hdr + TRX_UNDO_DEL_MARKS, FALSE, - MLOG_2BYTES, &mtr); - marked = TRUE; - } - - freed = fseg_free_step_not_header(seg_hdr + TRX_UNDO_FSEG_HEADER, - &mtr); - if (!freed) { - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - goto loop; - } - - /* The page list may now be inconsistent, but the length field - stored in the list base node tells us how big it was before we - started the freeing. */ - - seg_size = flst_get_len(seg_hdr + TRX_UNDO_PAGE_LIST, &mtr); - - /* We may free the undo log segment header page; it must be freed - within the same mtr as the undo log header is removed from the - history list: otherwise, in case of a database crash, the segment - could become inaccessible garbage in the file space. */ - - flst_cut_end(rseg_hdr + TRX_RSEG_HISTORY, - log_hdr + TRX_UNDO_HISTORY_NODE, n_removed_logs, &mtr); - - mutex_enter(&kernel_mutex); - ut_ad(trx_sys->rseg_history_len >= n_removed_logs); - trx_sys->rseg_history_len -= n_removed_logs; - mutex_exit(&kernel_mutex); - - freed = FALSE; - - while (!freed) { - /* Here we assume that a file segment with just the header - page can be freed in a few steps, so that the buffer pool - is not flooded with bufferfixed pages: see the note in - fsp0fsp.c. */ - - freed = fseg_free_step(seg_hdr + TRX_UNDO_FSEG_HEADER, - &mtr); - } - - hist_size = mtr_read_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, &mtr); - ut_ad(hist_size >= seg_size); - - mlog_write_ulint(rseg_hdr + TRX_RSEG_HISTORY_SIZE, - hist_size - seg_size, MLOG_4BYTES, &mtr); - - ut_ad(rseg->curr_size >= seg_size); - - rseg->curr_size -= seg_size; - - mutex_exit(&(rseg->mutex)); - - mtr_commit(&mtr); -} - -/************************************************************************ -Removes unnecessary history data from a rollback segment. */ -static -void -trx_purge_truncate_rseg_history( -/*============================*/ - trx_rseg_t* rseg, /* in: rollback segment */ - dulint limit_trx_no, /* in: remove update undo logs whose - trx number is < limit_trx_no */ - dulint limit_undo_no) /* in: if transaction number is equal - to limit_trx_no, truncate undo records - with undo number < limit_undo_no */ -{ - fil_addr_t hdr_addr; - fil_addr_t prev_hdr_addr; - trx_rsegf_t* rseg_hdr; - page_t* undo_page; - trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; - int cmp; - ulint n_removed_logs = 0; - mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); - - mtr_start(&mtr); - mutex_enter(&(rseg->mutex)); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - - hdr_addr = trx_purge_get_log_from_hist( - flst_get_last(rseg_hdr + TRX_RSEG_HISTORY, &mtr)); -loop: - if (hdr_addr.page == FIL_NULL) { - - mutex_exit(&(rseg->mutex)); - - mtr_commit(&mtr); - - return; - } - - undo_page = trx_undo_page_get(rseg->space, hdr_addr.page, &mtr); - - log_hdr = undo_page + hdr_addr.boffset; - - cmp = ut_dulint_cmp(mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO), - limit_trx_no); - if (cmp == 0) { - trx_undo_truncate_start(rseg, rseg->space, hdr_addr.page, - hdr_addr.boffset, limit_undo_no); - } - - if (cmp >= 0) { - mutex_enter(&kernel_mutex); - ut_a(trx_sys->rseg_history_len >= n_removed_logs); - trx_sys->rseg_history_len -= n_removed_logs; - mutex_exit(&kernel_mutex); - - flst_truncate_end(rseg_hdr + TRX_RSEG_HISTORY, - log_hdr + TRX_UNDO_HISTORY_NODE, - n_removed_logs, &mtr); - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - return; - } - - prev_hdr_addr = trx_purge_get_log_from_hist( - flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); - n_removed_logs++; - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - if ((mach_read_from_2(seg_hdr + TRX_UNDO_STATE) == TRX_UNDO_TO_PURGE) - && (mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG) == 0)) { - - /* We can free the whole log segment */ - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - trx_purge_free_segment(rseg, hdr_addr, n_removed_logs); - - n_removed_logs = 0; - } else { - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - } - - mtr_start(&mtr); - mutex_enter(&(rseg->mutex)); - - rseg_hdr = trx_rsegf_get(rseg->space, rseg->page_no, &mtr); - - hdr_addr = prev_hdr_addr; - - goto loop; -} - -/************************************************************************ -Removes unnecessary history data from rollback segments. NOTE that when this -function is called, the caller must not have any latches on undo log pages! */ -static -void -trx_purge_truncate_history(void) -/*============================*/ -{ - trx_rseg_t* rseg; - dulint limit_trx_no; - dulint limit_undo_no; - - ut_ad(mutex_own(&(purge_sys->mutex))); - - trx_purge_arr_get_biggest(purge_sys->arr, &limit_trx_no, - &limit_undo_no); - - if (ut_dulint_cmp(limit_trx_no, ut_dulint_zero) == 0) { - - limit_trx_no = purge_sys->purge_trx_no; - limit_undo_no = purge_sys->purge_undo_no; - } - - /* We play safe and set the truncate limit at most to the purge view - low_limit number, though this is not necessary */ - - if (ut_dulint_cmp(limit_trx_no, purge_sys->view->low_limit_no) >= 0) { - limit_trx_no = purge_sys->view->low_limit_no; - limit_undo_no = ut_dulint_zero; - } - - ut_ad((ut_dulint_cmp(limit_trx_no, - purge_sys->view->low_limit_no) <= 0)); - - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - - while (rseg) { - trx_purge_truncate_rseg_history(rseg, limit_trx_no, - limit_undo_no); - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } -} - -/************************************************************************ -Does a truncate if the purge array is empty. NOTE that when this function is -called, the caller must not have any latches on undo log pages! */ -UNIV_INLINE -ibool -trx_purge_truncate_if_arr_empty(void) -/*=================================*/ - /* out: TRUE if array empty */ -{ - ut_ad(mutex_own(&(purge_sys->mutex))); - - if (purge_sys->arr->n_used == 0) { - - trx_purge_truncate_history(); - - return(TRUE); - } - - return(FALSE); -} - -/*************************************************************************** -Updates the last not yet purged history log info in rseg when we have purged -a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ -static -void -trx_purge_rseg_get_next_history_log( -/*================================*/ - trx_rseg_t* rseg) /* in: rollback segment */ -{ - page_t* undo_page; - trx_ulogf_t* log_hdr; - trx_usegf_t* seg_hdr; - fil_addr_t prev_log_addr; - dulint trx_no; - ibool del_marks; - mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); - - mutex_enter(&(rseg->mutex)); - - ut_a(rseg->last_page_no != FIL_NULL); - - purge_sys->purge_trx_no = ut_dulint_add(rseg->last_trx_no, 1); - purge_sys->purge_undo_no = ut_dulint_zero; - purge_sys->next_stored = FALSE; - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(rseg->space, - rseg->last_page_no, &mtr); - log_hdr = undo_page + rseg->last_offset; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - /* Increase the purge page count by one for every handled log */ - - purge_sys->n_pages_handled++; - - prev_log_addr = trx_purge_get_log_from_hist( - flst_get_prev_addr(log_hdr + TRX_UNDO_HISTORY_NODE, &mtr)); - if (prev_log_addr.page == FIL_NULL) { - /* No logs left in the history list */ - - rseg->last_page_no = FIL_NULL; - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - mutex_enter(&kernel_mutex); - - /* Add debug code to track history list corruption reported - on the MySQL mailing list on Nov 9, 2004. The fut0lst.c - file-based list was corrupt. The prev node pointer was - FIL_NULL, even though the list length was over 8 million nodes! - We assume that purge truncates the history list in moderate - size pieces, and if we here reach the head of the list, the - list cannot be longer than 20 000 undo logs now. */ - - if (trx_sys->rseg_history_len > 20000) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: purge reached the" - " head of the history list,\n" - "InnoDB: but its length is still" - " reported as %lu! Make a detailed bug\n" - "InnoDB: report, and submit it" - " to http://bugs.mysql.com\n", - (ulong) trx_sys->rseg_history_len); - } - - mutex_exit(&kernel_mutex); - - return; - } - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - /* Read the trx number and del marks from the previous log header */ - mtr_start(&mtr); - - log_hdr = trx_undo_page_get_s_latched(rseg->space, - prev_log_addr.page, &mtr) - + prev_log_addr.boffset; - - trx_no = mach_read_from_8(log_hdr + TRX_UNDO_TRX_NO); - - del_marks = mach_read_from_2(log_hdr + TRX_UNDO_DEL_MARKS); - - mtr_commit(&mtr); - - mutex_enter(&(rseg->mutex)); - - rseg->last_page_no = prev_log_addr.page; - rseg->last_offset = prev_log_addr.boffset; - rseg->last_trx_no = trx_no; - rseg->last_del_marks = del_marks; - - mutex_exit(&(rseg->mutex)); -} - -/*************************************************************************** -Chooses the next undo log to purge and updates the info in purge_sys. This -function is used to initialize purge_sys when the next record to purge is -not known, and also to update the purge system info on the next record when -purge has handled the whole undo log for a transaction. */ -static -void -trx_purge_choose_next_log(void) -/*===========================*/ -{ - trx_undo_rec_t* rec; - trx_rseg_t* rseg; - trx_rseg_t* min_rseg; - dulint min_trx_no; - ulint space = 0; /* remove warning (??? bug ???) */ - ulint page_no = 0; /* remove warning (??? bug ???) */ - ulint offset = 0; /* remove warning (??? bug ???) */ - mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); - ut_ad(purge_sys->next_stored == FALSE); - - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - - min_trx_no = ut_dulint_max; - - min_rseg = NULL; - - while (rseg) { - mutex_enter(&(rseg->mutex)); - - if (rseg->last_page_no != FIL_NULL) { - - if ((min_rseg == NULL) - || (ut_dulint_cmp(min_trx_no, - rseg->last_trx_no) > 0)) { - - min_rseg = rseg; - min_trx_no = rseg->last_trx_no; - space = rseg->space; - ut_a(space == 0); /* We assume in purge of - externally stored fields - that space id == 0 */ - page_no = rseg->last_page_no; - offset = rseg->last_offset; - } - } - - mutex_exit(&(rseg->mutex)); - - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } - - if (min_rseg == NULL) { - - return; - } - - mtr_start(&mtr); - - if (!min_rseg->last_del_marks) { - /* No need to purge this log */ - - rec = &trx_purge_dummy_rec; - } else { - rec = trx_undo_get_first_rec(space, page_no, offset, - RW_S_LATCH, &mtr); - if (rec == NULL) { - /* Undo log empty */ - - rec = &trx_purge_dummy_rec; - } - } - - purge_sys->next_stored = TRUE; - purge_sys->rseg = min_rseg; - - purge_sys->hdr_page_no = page_no; - purge_sys->hdr_offset = offset; - - purge_sys->purge_trx_no = min_trx_no; - - if (rec == &trx_purge_dummy_rec) { - - purge_sys->purge_undo_no = ut_dulint_zero; - purge_sys->page_no = page_no; - purge_sys->offset = 0; - } else { - purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec); - - purge_sys->page_no = buf_frame_get_page_no(rec); - purge_sys->offset = rec - buf_frame_align(rec); - } - - mtr_commit(&mtr); -} - -/*************************************************************************** -Gets the next record to purge and updates the info in the purge system. */ -static -trx_undo_rec_t* -trx_purge_get_next_rec( -/*===================*/ - /* out: copy of an undo log record or - pointer to the dummy undo log record */ - mem_heap_t* heap) /* in: memory heap where copied */ -{ - trx_undo_rec_t* rec; - trx_undo_rec_t* rec_copy; - trx_undo_rec_t* rec2; - trx_undo_rec_t* next_rec; - page_t* undo_page; - page_t* page; - ulint offset; - ulint page_no; - ulint space; - ulint type; - ulint cmpl_info; - mtr_t mtr; - - ut_ad(mutex_own(&(purge_sys->mutex))); - ut_ad(purge_sys->next_stored); - - space = purge_sys->rseg->space; - page_no = purge_sys->page_no; - offset = purge_sys->offset; - - if (offset == 0) { - /* It is the dummy undo log record, which means that there is - no need to purge this undo log */ - - trx_purge_rseg_get_next_history_log(purge_sys->rseg); - - /* Look for the next undo log and record to purge */ - - trx_purge_choose_next_log(); - - return(&trx_purge_dummy_rec); - } - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr); - rec = undo_page + offset; - - rec2 = rec; - - for (;;) { - /* Try first to find the next record which requires a purge - operation from the same page of the same undo log */ - - next_rec = trx_undo_page_get_next_rec(rec2, - purge_sys->hdr_page_no, - purge_sys->hdr_offset); - if (next_rec == NULL) { - rec2 = trx_undo_get_next_rec( - rec2, purge_sys->hdr_page_no, - purge_sys->hdr_offset, &mtr); - break; - } - - rec2 = next_rec; - - type = trx_undo_rec_get_type(rec2); - - if (type == TRX_UNDO_DEL_MARK_REC) { - - break; - } - - cmpl_info = trx_undo_rec_get_cmpl_info(rec2); - - if (trx_undo_rec_get_extern_storage(rec2)) { - break; - } - - if ((type == TRX_UNDO_UPD_EXIST_REC) - && !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - break; - } - } - - if (rec2 == NULL) { - mtr_commit(&mtr); - - trx_purge_rseg_get_next_history_log(purge_sys->rseg); - - /* Look for the next undo log and record to purge */ - - trx_purge_choose_next_log(); - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(space, page_no, &mtr); - - rec = undo_page + offset; - } else { - page = buf_frame_align(rec2); - - purge_sys->purge_undo_no = trx_undo_rec_get_undo_no(rec2); - purge_sys->page_no = buf_frame_get_page_no(page); - purge_sys->offset = rec2 - page; - - if (undo_page != page) { - /* We advance to a new page of the undo log: */ - purge_sys->n_pages_handled++; - } - } - - rec_copy = trx_undo_rec_copy(rec, heap); - - mtr_commit(&mtr); - - return(rec_copy); -} - -/************************************************************************ -Fetches the next undo log record from the history list to purge. It must be -released with the corresponding release function. */ - -trx_undo_rec_t* -trx_purge_fetch_next_rec( -/*=====================*/ - /* out: copy of an undo log record or - pointer to the dummy undo log record - &trx_purge_dummy_rec, if the whole undo log - can skipped in purge; NULL if none left */ - dulint* roll_ptr,/* out: roll pointer to undo record */ - trx_undo_inf_t** cell, /* out: storage cell for the record in the - purge array */ - mem_heap_t* heap) /* in: memory heap where copied */ -{ - trx_undo_rec_t* undo_rec; - - mutex_enter(&(purge_sys->mutex)); - - if (purge_sys->state == TRX_STOP_PURGE) { - trx_purge_truncate_if_arr_empty(); - - mutex_exit(&(purge_sys->mutex)); - - return(NULL); - } - - if (!purge_sys->next_stored) { - trx_purge_choose_next_log(); - - if (!purge_sys->next_stored) { - purge_sys->state = TRX_STOP_PURGE; - - trx_purge_truncate_if_arr_empty(); - - if (srv_print_thread_releases) { - fprintf(stderr, - "Purge: No logs left in the" - " history list; pages handled %lu\n", - (ulong) purge_sys->n_pages_handled); - } - - mutex_exit(&(purge_sys->mutex)); - - return(NULL); - } - } - - if (purge_sys->n_pages_handled >= purge_sys->handle_limit) { - - purge_sys->state = TRX_STOP_PURGE; - - trx_purge_truncate_if_arr_empty(); - - mutex_exit(&(purge_sys->mutex)); - - return(NULL); - } - - if (ut_dulint_cmp(purge_sys->purge_trx_no, - purge_sys->view->low_limit_no) >= 0) { - purge_sys->state = TRX_STOP_PURGE; - - trx_purge_truncate_if_arr_empty(); - - mutex_exit(&(purge_sys->mutex)); - - return(NULL); - } - - /* fprintf(stderr, "Thread %lu purging trx %lu undo record %lu\n", - os_thread_get_curr_id(), - ut_dulint_get_low(purge_sys->purge_trx_no), - ut_dulint_get_low(purge_sys->purge_undo_no)); */ - - *roll_ptr = trx_undo_build_roll_ptr(FALSE, (purge_sys->rseg)->id, - purge_sys->page_no, - purge_sys->offset); - - *cell = trx_purge_arr_store_info(purge_sys->purge_trx_no, - purge_sys->purge_undo_no); - - ut_ad(ut_dulint_cmp(purge_sys->purge_trx_no, - (purge_sys->view)->low_limit_no) < 0); - - /* The following call will advance the stored values of purge_trx_no - and purge_undo_no, therefore we had to store them first */ - - undo_rec = trx_purge_get_next_rec(heap); - - mutex_exit(&(purge_sys->mutex)); - - return(undo_rec); -} - -/*********************************************************************** -Releases a reserved purge undo record. */ - -void -trx_purge_rec_release( -/*==================*/ - trx_undo_inf_t* cell) /* in: storage cell */ -{ - trx_undo_arr_t* arr; - - mutex_enter(&(purge_sys->mutex)); - - arr = purge_sys->arr; - - trx_purge_arr_remove_info(cell); - - mutex_exit(&(purge_sys->mutex)); -} - -/*********************************************************************** -This function runs a purge batch. */ - -ulint -trx_purge(void) -/*===========*/ - /* out: number of undo log pages handled in - the batch */ -{ - que_thr_t* thr; - /* que_thr_t* thr2; */ - ulint old_pages_handled; - - mutex_enter(&(purge_sys->mutex)); - - if (purge_sys->trx->n_active_thrs > 0) { - - mutex_exit(&(purge_sys->mutex)); - - /* Should not happen */ - - ut_error; - - return(0); - } - - rw_lock_x_lock(&(purge_sys->latch)); - - mutex_enter(&kernel_mutex); - - /* Close and free the old purge view */ - - read_view_close(purge_sys->view); - purge_sys->view = NULL; - mem_heap_empty(purge_sys->heap); - - /* Determine how much data manipulation language (DML) statements - need to be delayed in order to reduce the lagging of the purge - thread. */ - srv_dml_needed_delay = 0; /* in microseconds; default: no delay */ - - /* If we cannot advance the 'purge view' because of an old - 'consistent read view', then the DML statements cannot be delayed. - Also, srv_max_purge_lag <= 0 means 'infinity'. */ - if (srv_max_purge_lag > 0 - && !UT_LIST_GET_LAST(trx_sys->view_list)) { - float ratio = (float) trx_sys->rseg_history_len - / srv_max_purge_lag; - if (ratio > ULINT_MAX / 10000) { - /* Avoid overflow: maximum delay is 4295 seconds */ - srv_dml_needed_delay = ULINT_MAX; - } else if (ratio > 1) { - /* If the history list length exceeds the - innodb_max_purge_lag, the - data manipulation statements are delayed - by at least 5000 microseconds. */ - srv_dml_needed_delay = (ulint) ((ratio - .5) * 10000); - } - } - - purge_sys->view = read_view_oldest_copy_or_open_new(ut_dulint_zero, - purge_sys->heap); - mutex_exit(&kernel_mutex); - - rw_lock_x_unlock(&(purge_sys->latch)); - - purge_sys->state = TRX_PURGE_ON; - - /* Handle at most 20 undo log pages in one purge batch */ - - purge_sys->handle_limit = purge_sys->n_pages_handled + 20; - - old_pages_handled = purge_sys->n_pages_handled; - - mutex_exit(&(purge_sys->mutex)); - - mutex_enter(&kernel_mutex); - - thr = que_fork_start_command(purge_sys->query); - - ut_ad(thr); - - /* thr2 = que_fork_start_command(purge_sys->query); - - ut_ad(thr2); */ - - - mutex_exit(&kernel_mutex); - - /* srv_que_task_enqueue(thr2); */ - - if (srv_print_thread_releases) { - - fputs("Starting purge\n", stderr); - } - - que_run_threads(thr); - - if (srv_print_thread_releases) { - - fprintf(stderr, - "Purge ends; pages handled %lu\n", - (ulong) purge_sys->n_pages_handled); - } - - return(purge_sys->n_pages_handled - old_pages_handled); -} - -/********************************************************************** -Prints information of the purge system to stderr. */ - -void -trx_purge_sys_print(void) -/*=====================*/ -{ - fprintf(stderr, "InnoDB: Purge system view:\n"); - read_view_print(purge_sys->view); - - fprintf(stderr, "InnoDB: Purge trx n:o %lu %lu, undo n_o %lu %lu\n", - (ulong) ut_dulint_get_high(purge_sys->purge_trx_no), - (ulong) ut_dulint_get_low(purge_sys->purge_trx_no), - (ulong) ut_dulint_get_high(purge_sys->purge_undo_no), - (ulong) ut_dulint_get_low(purge_sys->purge_undo_no)); - fprintf(stderr, - "InnoDB: Purge next stored %lu, page_no %lu, offset %lu,\n" - "InnoDB: Purge hdr_page_no %lu, hdr_offset %lu\n", - (ulong) purge_sys->next_stored, - (ulong) purge_sys->page_no, - (ulong) purge_sys->offset, - (ulong) purge_sys->hdr_page_no, - (ulong) purge_sys->hdr_offset); -} diff --git a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c deleted file mode 100644 index 50f8b011463..00000000000 --- a/storage/innobase/trx/trx0rec.c +++ /dev/null @@ -1,1434 +0,0 @@ -/****************************************************** -Transaction undo log record - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0rec.h" - -#ifdef UNIV_NONINL -#include "trx0rec.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0undo.h" -#include "dict0dict.h" -#include "ut0mem.h" -#include "row0upd.h" -#include "que0que.h" -#include "trx0purge.h" -#include "row0row.h" - -/*=========== UNDO LOG RECORD CREATION AND DECODING ====================*/ - -/************************************************************************** -Writes the mtr log entry of the inserted undo log record on the undo log -page. */ -UNIV_INLINE -void -trx_undof_page_add_undo_rec_log( -/*============================*/ - page_t* undo_page, /* in: undo log page */ - ulint old_free, /* in: start offset of the inserted entry */ - ulint new_free, /* in: end offset of the entry */ - mtr_t* mtr) /* in: mtr */ -{ - byte* log_ptr; - const byte* log_end; - ulint len; - - log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN); - - if (log_ptr == NULL) { - - return; - } - - log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN]; - log_ptr = mlog_write_initial_log_record_fast( - undo_page, MLOG_UNDO_INSERT, log_ptr, mtr); - len = new_free - old_free - 4; - - mach_write_to_2(log_ptr, len); - log_ptr += 2; - - if (log_ptr + len <= log_end) { - memcpy(log_ptr, undo_page + old_free + 2, len); - mlog_close(mtr, log_ptr + len); - } else { - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, undo_page + old_free + 2, len); - } -} - -/*************************************************************** -Parses a redo log record of adding an undo log record. */ - -byte* -trx_undo_parse_add_undo_rec( -/*========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ -{ - ulint len; - byte* rec; - ulint first_free; - - if (end_ptr < ptr + 2) { - - return(NULL); - } - - len = mach_read_from_2(ptr); - ptr += 2; - - if (end_ptr < ptr + len) { - - return(NULL); - } - - if (page == NULL) { - - return(ptr + len); - } - - first_free = mach_read_from_2(page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - rec = page + first_free; - - mach_write_to_2(rec, first_free + 4 + len); - mach_write_to_2(rec + 2 + len, first_free); - - mach_write_to_2(page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - first_free + 4 + len); - ut_memcpy(rec + 2, ptr, len); - - return(ptr + len); -} - -/************************************************************************** -Calculates the free space left for extending an undo log record. */ -UNIV_INLINE -ulint -trx_undo_left( -/*==========*/ - /* out: bytes left */ - page_t* page, /* in: undo log page */ - byte* ptr) /* in: pointer to page */ -{ - /* The '- 10' is a safety margin, in case we have some small - calculation error below */ - - return(UNIV_PAGE_SIZE - (ptr - page) - 10 - FIL_PAGE_DATA_END); -} - -/************************************************************************** -Reports in the undo log of an insert of a clustered index record. */ -static -ulint -trx_undo_page_report_insert( -/*========================*/ - /* out: offset of the inserted entry - on the page if succeed, 0 if fail */ - page_t* undo_page, /* in: undo log page */ - trx_t* trx, /* in: transaction */ - dict_index_t* index, /* in: clustered index */ - dtuple_t* clust_entry, /* in: index entry which will be - inserted to the clustered index */ - mtr_t* mtr) /* in: mtr */ -{ - ulint first_free; - byte* ptr; - ulint len; - dfield_t* field; - ulint flen; - ulint i; - - ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_INSERT); - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - ptr = undo_page + first_free; - - ut_ad(first_free <= UNIV_PAGE_SIZE); - - if (trx_undo_left(undo_page, ptr) < 30) { - - /* NOTE: the value 30 must be big enough such that the general - fields written below fit on the undo log page */ - - return(0); - } - - /* Reserve 2 bytes for the pointer to the next undo log record */ - ptr += 2; - - /* Store first some general parameters to the undo log */ - mach_write_to_1(ptr, TRX_UNDO_INSERT_REC); - ptr++; - - len = mach_dulint_write_much_compressed(ptr, trx->undo_no); - ptr += len; - - len = mach_dulint_write_much_compressed(ptr, (index->table)->id); - ptr += len; - /*----------------------------------------*/ - /* Store then the fields required to uniquely determine the record - to be inserted in the clustered index */ - - for (i = 0; i < dict_index_get_n_unique(index); i++) { - - field = dtuple_get_nth_field(clust_entry, i); - - flen = dfield_get_len(field); - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - len = mach_write_compressed(ptr, flen); - ptr += len; - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, dfield_get_data(field), flen); - ptr += flen; - } - } - - if (trx_undo_left(undo_page, ptr) < 2) { - - return(0); - } - - /*----------------------------------------*/ - /* Write pointers to the previous and the next undo log records */ - - if (trx_undo_left(undo_page, ptr) < 2) { - - return(0); - } - - mach_write_to_2(ptr, first_free); - ptr += 2; - - mach_write_to_2(undo_page + first_free, ptr - undo_page); - - mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - ptr - undo_page); - - /* Write the log entry to the REDO log of this change in the UNDO - log */ - trx_undof_page_add_undo_rec_log(undo_page, first_free, - ptr - undo_page, mtr); - return(first_free); -} - -/************************************************************************** -Reads from an undo log record the general parameters. */ - -byte* -trx_undo_rec_get_pars( -/*==================*/ - /* out: remaining part of undo log - record after reading these values */ - trx_undo_rec_t* undo_rec, /* in: undo log record */ - ulint* type, /* out: undo record type: - TRX_UNDO_INSERT_REC, ... */ - ulint* cmpl_info, /* out: compiler info, relevant only - for update type records */ - ibool* updated_extern, /* out: TRUE if we updated an - externally stored fild */ - dulint* undo_no, /* out: undo log record number */ - dulint* table_id) /* out: table id */ -{ - byte* ptr; - ulint len; - ulint type_cmpl; - - ptr = undo_rec + 2; - - type_cmpl = mach_read_from_1(ptr); - ptr++; - - if (type_cmpl & TRX_UNDO_UPD_EXTERN) { - *updated_extern = TRUE; - type_cmpl -= TRX_UNDO_UPD_EXTERN; - } else { - *updated_extern = FALSE; - } - - *type = type_cmpl & (TRX_UNDO_CMPL_INFO_MULT - 1); - *cmpl_info = type_cmpl / TRX_UNDO_CMPL_INFO_MULT; - - *undo_no = mach_dulint_read_much_compressed(ptr); - len = mach_dulint_get_much_compressed_size(*undo_no); - ptr += len; - - *table_id = mach_dulint_read_much_compressed(ptr); - len = mach_dulint_get_much_compressed_size(*table_id); - ptr += len; - - return(ptr); -} - -/************************************************************************** -Reads from an undo log record a stored column value. */ -static -byte* -trx_undo_rec_get_col_val( -/*=====================*/ - /* out: remaining part of undo log record after - reading these values */ - byte* ptr, /* in: pointer to remaining part of undo log record */ - byte** field, /* out: pointer to stored field */ - ulint* len) /* out: length of the field, or UNIV_SQL_NULL */ -{ - *len = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*len); - - *field = ptr; - - if (*len != UNIV_SQL_NULL) { - if (*len >= UNIV_EXTERN_STORAGE_FIELD) { - ptr += (*len - UNIV_EXTERN_STORAGE_FIELD); - } else { - ptr += *len; - } - } - - return(ptr); -} - -/*********************************************************************** -Builds a row reference from an undo log record. */ - -byte* -trx_undo_rec_get_row_ref( -/*=====================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part of a copy of an undo log - record, at the start of the row reference; - NOTE that this copy of the undo log record must - be preserved as long as the row reference is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - dtuple_t** ref, /* out, own: row reference */ - mem_heap_t* heap) /* in: memory heap from which the memory - needed is allocated */ -{ - dfield_t* dfield; - byte* field; - ulint len; - ulint ref_len; - ulint i; - - ut_ad(index && ptr && ref && heap); - ut_a(index->type & DICT_CLUSTERED); - - ref_len = dict_index_get_n_unique(index); - - *ref = dtuple_create(heap, ref_len); - - dict_index_copy_types(*ref, index, ref_len); - - for (i = 0; i < ref_len; i++) { - dfield = dtuple_get_nth_field(*ref, i); - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len); - - dfield_set_data(dfield, field, len); - } - - return(ptr); -} - -/*********************************************************************** -Skips a row reference from an undo log record. */ - -byte* -trx_undo_rec_skip_row_ref( -/*======================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part in update undo log - record, at the start of the row reference */ - dict_index_t* index) /* in: clustered index */ -{ - byte* field; - ulint len; - ulint ref_len; - ulint i; - - ut_ad(index && ptr); - ut_a(index->type & DICT_CLUSTERED); - - ref_len = dict_index_get_n_unique(index); - - for (i = 0; i < ref_len; i++) { - ptr = trx_undo_rec_get_col_val(ptr, &field, &len); - } - - return(ptr); -} - -/************************************************************************** -Reports in the undo log of an update or delete marking of a clustered index -record. */ -static -ulint -trx_undo_page_report_modify( -/*========================*/ - /* out: byte offset of the inserted - undo log entry on the page if succeed, - 0 if fail */ - page_t* undo_page, /* in: undo log page */ - trx_t* trx, /* in: transaction */ - dict_index_t* index, /* in: clustered index where update or - delete marking is done */ - rec_t* rec, /* in: clustered index record which - has NOT yet been modified */ - const ulint* offsets, /* in: rec_get_offsets(rec, index) */ - upd_t* update, /* in: update vector which tells the - columns to be updated; in the case of - a delete, this should be set to NULL */ - ulint cmpl_info, /* in: compiler info on secondary - index updates */ - mtr_t* mtr) /* in: mtr */ -{ - dict_table_t* table; - upd_field_t* upd_field; - ulint first_free; - byte* ptr; - ulint len; - byte* field; - ulint flen; - ulint pos; - dulint roll_ptr; - dulint trx_id; - ulint bits; - ulint col_no; - byte* old_ptr; - ulint type_cmpl; - byte* type_cmpl_ptr; - ulint i; - - ut_a(index->type & DICT_CLUSTERED); - ut_ad(rec_offs_validate(rec, index, offsets)); - ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); - table = index->table; - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - ptr = undo_page + first_free; - - ut_ad(first_free <= UNIV_PAGE_SIZE); - - if (trx_undo_left(undo_page, ptr) < 50) { - - /* NOTE: the value 50 must be big enough so that the general - fields written below fit on the undo log page */ - - return(0); - } - - /* Reserve 2 bytes for the pointer to the next undo log record */ - ptr += 2; - - /* Store first some general parameters to the undo log */ - - if (update) { - if (rec_get_deleted_flag(rec, dict_table_is_comp(table))) { - type_cmpl = TRX_UNDO_UPD_DEL_REC; - } else { - type_cmpl = TRX_UNDO_UPD_EXIST_REC; - } - } else { - type_cmpl = TRX_UNDO_DEL_MARK_REC; - } - - type_cmpl = type_cmpl | (cmpl_info * TRX_UNDO_CMPL_INFO_MULT); - - mach_write_to_1(ptr, type_cmpl); - - type_cmpl_ptr = ptr; - - ptr++; - len = mach_dulint_write_much_compressed(ptr, trx->undo_no); - ptr += len; - - len = mach_dulint_write_much_compressed(ptr, table->id); - ptr += len; - - /*----------------------------------------*/ - /* Store the state of the info bits */ - - bits = rec_get_info_bits(rec, dict_table_is_comp(table)); - mach_write_to_1(ptr, bits); - ptr += 1; - - /* Store the values of the system columns */ - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_TRX_ID), &len); - ut_ad(len == DATA_TRX_ID_LEN); - trx_id = trx_read_trx_id(field); - field = rec_get_nth_field(rec, offsets, - dict_index_get_sys_col_pos( - index, DATA_ROLL_PTR), &len); - ut_ad(len == DATA_ROLL_PTR_LEN); - roll_ptr = trx_read_roll_ptr(field); - - len = mach_dulint_write_compressed(ptr, trx_id); - ptr += len; - - len = mach_dulint_write_compressed(ptr, roll_ptr); - ptr += len; - - /*----------------------------------------*/ - /* Store then the fields required to uniquely determine the - record which will be modified in the clustered index */ - - for (i = 0; i < dict_index_get_n_unique(index); i++) { - - field = rec_get_nth_field(rec, offsets, i, &flen); - - if (trx_undo_left(undo_page, ptr) < 4) { - - return(0); - } - - len = mach_write_compressed(ptr, flen); - ptr += len; - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - - /*----------------------------------------*/ - /* Save to the undo log the old values of the columns to be updated. */ - - if (update) { - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - len = mach_write_compressed(ptr, upd_get_n_fields(update)); - ptr += len; - - for (i = 0; i < upd_get_n_fields(update); i++) { - - upd_field = upd_get_nth_field(update, i); - pos = upd_field->field_no; - - /* Write field number to undo log */ - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - len = mach_write_compressed(ptr, pos); - ptr += len; - - /* Save the old value of field */ - field = rec_get_nth_field(rec, offsets, pos, &flen); - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - if (rec_offs_nth_extern(offsets, pos)) { - /* If a field has external storage, we add - to flen the flag */ - - len = mach_write_compressed( - ptr, - UNIV_EXTERN_STORAGE_FIELD + flen); - - /* Notify purge that it eventually has to - free the old externally stored field */ - - trx->update_undo->del_marks = TRUE; - - *type_cmpl_ptr = *type_cmpl_ptr - | TRX_UNDO_UPD_EXTERN; - } else { - len = mach_write_compressed(ptr, flen); - } - - ptr += len; - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - } - - /*----------------------------------------*/ - /* In the case of a delete marking, and also in the case of an update - where any ordering field of any index changes, store the values of all - columns which occur as ordering fields in any index. This info is used - in the purge of old versions where we use it to build and search the - delete marked index records, to look if we can remove them from the - index tree. Note that starting from 4.0.14 also externally stored - fields can be ordering in some index. But we always store at least - 384 first bytes locally to the clustered index record, which means - we can construct the column prefix fields in the index from the - stored data. */ - - if (!update || !(cmpl_info & UPD_NODE_NO_ORD_CHANGE)) { - - trx->update_undo->del_marks = TRUE; - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - old_ptr = ptr; - - /* Reserve 2 bytes to write the number of bytes the stored - fields take in this undo record */ - - ptr += 2; - - for (col_no = 0; col_no < dict_table_get_n_cols(table); - col_no++) { - - const dict_col_t* col - = dict_table_get_nth_col(table, col_no); - - if (col->ord_part > 0) { - - pos = dict_index_get_nth_col_pos(index, - col_no); - - /* Write field number to undo log */ - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - len = mach_write_compressed(ptr, pos); - ptr += len; - - /* Save the old value of field */ - field = rec_get_nth_field(rec, offsets, pos, - &flen); - - if (trx_undo_left(undo_page, ptr) < 5) { - - return(0); - } - - len = mach_write_compressed(ptr, flen); - ptr += len; - - if (flen != UNIV_SQL_NULL) { - if (trx_undo_left(undo_page, ptr) - < flen) { - - return(0); - } - - ut_memcpy(ptr, field, flen); - ptr += flen; - } - } - } - - mach_write_to_2(old_ptr, ptr - old_ptr); - } - - /*----------------------------------------*/ - /* Write pointers to the previous and the next undo log records */ - if (trx_undo_left(undo_page, ptr) < 2) { - - return(0); - } - - mach_write_to_2(ptr, first_free); - ptr += 2; - mach_write_to_2(undo_page + first_free, ptr - undo_page); - - mach_write_to_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_FREE, - ptr - undo_page); - - /* Write to the REDO log about this change in the UNDO log */ - - trx_undof_page_add_undo_rec_log(undo_page, first_free, - ptr - undo_page, mtr); - return(first_free); -} - -/************************************************************************** -Reads from an undo log update record the system field values of the old -version. */ - -byte* -trx_undo_update_rec_get_sys_cols( -/*=============================*/ - /* out: remaining part of undo log - record after reading these values */ - byte* ptr, /* in: remaining part of undo log - record after reading general - parameters */ - dulint* trx_id, /* out: trx id */ - dulint* roll_ptr, /* out: roll ptr */ - ulint* info_bits) /* out: info bits state */ -{ - ulint len; - - /* Read the state of the info bits */ - *info_bits = mach_read_from_1(ptr); - ptr += 1; - - /* Read the values of the system columns */ - - *trx_id = mach_dulint_read_compressed(ptr); - len = mach_dulint_get_compressed_size(*trx_id); - ptr += len; - - *roll_ptr = mach_dulint_read_compressed(ptr); - len = mach_dulint_get_compressed_size(*roll_ptr); - ptr += len; - - return(ptr); -} - -/************************************************************************** -Reads from an update undo log record the number of updated fields. */ -UNIV_INLINE -byte* -trx_undo_update_rec_get_n_upd_fields( -/*=================================*/ - /* out: remaining part of undo log record after - reading this value */ - byte* ptr, /* in: pointer to remaining part of undo log record */ - ulint* n) /* out: number of fields */ -{ - *n = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*n); - - return(ptr); -} - -/************************************************************************** -Reads from an update undo log record a stored field number. */ -UNIV_INLINE -byte* -trx_undo_update_rec_get_field_no( -/*=============================*/ - /* out: remaining part of undo log record after - reading this value */ - byte* ptr, /* in: pointer to remaining part of undo log record */ - ulint* field_no)/* out: field number */ -{ - *field_no = mach_read_compressed(ptr); - ptr += mach_get_compressed_size(*field_no); - - return(ptr); -} - -/*********************************************************************** -Builds an update vector based on a remaining part of an undo log record. */ - -byte* -trx_undo_update_rec_get_update( -/*===========================*/ - /* out: remaining part of the record, - NULL if an error detected, which means that - the record is corrupted */ - byte* ptr, /* in: remaining part in update undo log - record, after reading the row reference - NOTE that this copy of the undo log record must - be preserved as long as the update vector is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - ulint type, /* in: TRX_UNDO_UPD_EXIST_REC, - TRX_UNDO_UPD_DEL_REC, or - TRX_UNDO_DEL_MARK_REC; in the last case, - only trx id and roll ptr fields are added to - the update vector */ - dulint trx_id, /* in: transaction id from this undo record */ - dulint roll_ptr,/* in: roll pointer from this undo record */ - ulint info_bits,/* in: info bits from this undo record */ - trx_t* trx, /* in: transaction */ - mem_heap_t* heap, /* in: memory heap from which the memory - needed is allocated */ - upd_t** upd) /* out, own: update vector */ -{ - upd_field_t* upd_field; - upd_t* update; - ulint n_fields; - byte* buf; - byte* field; - ulint len; - ulint field_no; - ulint i; - - ut_a(index->type & DICT_CLUSTERED); - - if (type != TRX_UNDO_DEL_MARK_REC) { - ptr = trx_undo_update_rec_get_n_upd_fields(ptr, &n_fields); - } else { - n_fields = 0; - } - - update = upd_create(n_fields + 2, heap); - - update->info_bits = info_bits; - - /* Store first trx id and roll ptr to update vector */ - - upd_field = upd_get_nth_field(update, n_fields); - buf = mem_heap_alloc(heap, DATA_TRX_ID_LEN); - trx_write_trx_id(buf, trx_id); - - upd_field_set_field_no(upd_field, - dict_index_get_sys_col_pos(index, DATA_TRX_ID), - index, trx); - dfield_set_data(&(upd_field->new_val), buf, DATA_TRX_ID_LEN); - - upd_field = upd_get_nth_field(update, n_fields + 1); - buf = mem_heap_alloc(heap, DATA_ROLL_PTR_LEN); - trx_write_roll_ptr(buf, roll_ptr); - - upd_field_set_field_no( - upd_field, dict_index_get_sys_col_pos(index, DATA_ROLL_PTR), - index, trx); - dfield_set_data(&(upd_field->new_val), buf, DATA_ROLL_PTR_LEN); - - /* Store then the updated ordinary columns to the update vector */ - - for (i = 0; i < n_fields; i++) { - - ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); - - if (field_no >= dict_index_get_n_fields(index)) { - fprintf(stderr, - "InnoDB: Error: trying to access" - " update undo rec field %lu in ", - (ulong) field_no); - dict_index_name_print(stderr, trx, index); - fprintf(stderr, "\n" - "InnoDB: but index has only %lu fields\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n" - "InnoDB: Run also CHECK TABLE ", - (ulong) dict_index_get_n_fields(index)); - ut_print_name(stderr, trx, TRUE, index->table_name); - fprintf(stderr, "\n" - "InnoDB: n_fields = %lu, i = %lu, ptr %p\n", - (ulong) n_fields, (ulong) i, ptr); - return(NULL); - } - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len); - - upd_field = upd_get_nth_field(update, i); - - upd_field_set_field_no(upd_field, field_no, index, trx); - - if (len != UNIV_SQL_NULL && len >= UNIV_EXTERN_STORAGE_FIELD) { - - upd_field->extern_storage = TRUE; - - len -= UNIV_EXTERN_STORAGE_FIELD; - } - - dfield_set_data(&(upd_field->new_val), field, len); - } - - *upd = update; - - return(ptr); -} - -/*********************************************************************** -Builds a partial row from an update undo log record. It contains the -columns which occur as ordering in any index of the table. */ - -byte* -trx_undo_rec_get_partial_row( -/*=========================*/ - /* out: pointer to remaining part of undo - record */ - byte* ptr, /* in: remaining part in update undo log - record of a suitable type, at the start of - the stored index columns; - NOTE that this copy of the undo log record must - be preserved as long as the partial row is - used, as we do NOT copy the data in the - record! */ - dict_index_t* index, /* in: clustered index */ - dtuple_t** row, /* out, own: partial row */ - mem_heap_t* heap) /* in: memory heap from which the memory - needed is allocated */ -{ - dfield_t* dfield; - byte* field; - ulint len; - ulint field_no; - ulint col_no; - ulint row_len; - ulint total_len; - byte* start_ptr; - ulint i; - - ut_ad(index && ptr && row && heap); - - row_len = dict_table_get_n_cols(index->table); - - *row = dtuple_create(heap, row_len); - - dict_table_copy_types(*row, index->table); - - start_ptr = ptr; - - total_len = mach_read_from_2(ptr); - ptr += 2; - - for (i = 0;; i++) { - - if (ptr == start_ptr + total_len) { - - break; - } - - ptr = trx_undo_update_rec_get_field_no(ptr, &field_no); - - col_no = dict_index_get_nth_col_no(index, field_no); - - ptr = trx_undo_rec_get_col_val(ptr, &field, &len); - - dfield = dtuple_get_nth_field(*row, col_no); - - dfield_set_data(dfield, field, len); - } - - return(ptr); -} - -/*************************************************************************** -Erases the unused undo log page end. */ -static -void -trx_undo_erase_page_end( -/*====================*/ - page_t* undo_page, /* in: undo page whose end to erase */ - mtr_t* mtr) /* in: mtr */ -{ - ulint first_free; - - first_free = mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE); - memset(undo_page + first_free, 0xff, - (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END) - first_free); - - mlog_write_initial_log_record(undo_page, MLOG_UNDO_ERASE_END, mtr); -} - -/*************************************************************** -Parses a redo log record of erasing of an undo page end. */ - -byte* -trx_undo_parse_erase_page_end( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(ptr && end_ptr); - - if (page == NULL) { - - return(ptr); - } - - trx_undo_erase_page_end(page, mtr); - - return(ptr); -} - -/*************************************************************************** -Writes information to an undo log about an insert, update, or a delete marking -of a clustered index record. This information is used in a rollback of the -transaction and in consistent reads that must look to the history of this -transaction. */ - -ulint -trx_undo_report_row_operation( -/*==========================*/ - /* out: DB_SUCCESS or error code */ - ulint flags, /* in: if BTR_NO_UNDO_LOG_FLAG bit is - set, does nothing */ - ulint op_type, /* in: TRX_UNDO_INSERT_OP or - TRX_UNDO_MODIFY_OP */ - que_thr_t* thr, /* in: query thread */ - dict_index_t* index, /* in: clustered index */ - dtuple_t* clust_entry, /* in: in the case of an insert, - index entry to insert into the - clustered index, otherwise NULL */ - upd_t* update, /* in: in the case of an update, - the update vector, otherwise NULL */ - ulint cmpl_info, /* in: compiler info on secondary - index updates */ - rec_t* rec, /* in: in case of an update or delete - marking, the record in the clustered - index, otherwise NULL */ - dulint* roll_ptr) /* out: rollback pointer to the - inserted undo log record, - ut_dulint_zero if BTR_NO_UNDO_LOG - flag was specified */ -{ - trx_t* trx; - trx_undo_t* undo; - page_t* undo_page; - ulint offset; - ulint page_no; - ibool is_insert; - trx_rseg_t* rseg; - mtr_t mtr; - ulint err = DB_SUCCESS; - mem_heap_t* heap = NULL; - ulint offsets_[REC_OFFS_NORMAL_SIZE]; - ulint* offsets = offsets_; - *offsets_ = (sizeof offsets_) / sizeof *offsets_; - - ut_a(index->type & DICT_CLUSTERED); - - if (flags & BTR_NO_UNDO_LOG_FLAG) { - - *roll_ptr = ut_dulint_zero; - - return(err); - } - - ut_ad(thr); - ut_ad((op_type != TRX_UNDO_INSERT_OP) - || (clust_entry && !update && !rec)); - - trx = thr_get_trx(thr); - rseg = trx->rseg; - - mutex_enter(&(trx->undo_mutex)); - - /* If the undo log is not assigned yet, assign one */ - - if (op_type == TRX_UNDO_INSERT_OP) { - - if (trx->insert_undo == NULL) { - - err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT); - } - - undo = trx->insert_undo; - is_insert = TRUE; - } else { - ut_ad(op_type == TRX_UNDO_MODIFY_OP); - - if (trx->update_undo == NULL) { - - err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE); - - } - - undo = trx->update_undo; - is_insert = FALSE; - } - - if (err != DB_SUCCESS) { - /* Did not succeed: return the error encountered */ - mutex_exit(&(trx->undo_mutex)); - - return(err); - } - - page_no = undo->last_page_no; - - mtr_start(&mtr); - - for (;;) { - undo_page = buf_page_get_gen(undo->space, page_no, - RW_X_LATCH, undo->guess_page, - BUF_GET, - __FILE__, __LINE__, - &mtr); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - if (op_type == TRX_UNDO_INSERT_OP) { - offset = trx_undo_page_report_insert( - undo_page, trx, index, clust_entry, &mtr); - } else { - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, &heap); - offset = trx_undo_page_report_modify( - undo_page, trx, index, rec, offsets, update, - cmpl_info, &mtr); - } - - if (offset == 0) { - /* The record did not fit on the page. We erase the - end segment of the undo log page and write a log - record of it: this is to ensure that in the debug - version the replicate page constructed using the log - records stays identical to the original page */ - - trx_undo_erase_page_end(undo_page, &mtr); - } - - mtr_commit(&mtr); - - if (offset != 0) { - /* Success */ - - break; - } - - ut_ad(page_no == undo->last_page_no); - - /* We have to extend the undo log by one page */ - - mtr_start(&mtr); - - /* When we add a page to an undo log, this is analogous to - a pessimistic insert in a B-tree, and we must reserve the - counterpart of the tree latch, which is the rseg mutex. */ - - mutex_enter(&(rseg->mutex)); - - page_no = trx_undo_add_page(trx, undo, &mtr); - - mutex_exit(&(rseg->mutex)); - - if (page_no == FIL_NULL) { - /* Did not succeed: out of space */ - - mutex_exit(&(trx->undo_mutex)); - mtr_commit(&mtr); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(DB_OUT_OF_FILE_SPACE); - } - } - - undo->empty = FALSE; - undo->top_page_no = page_no; - undo->top_offset = offset; - undo->top_undo_no = trx->undo_no; - undo->guess_page = undo_page; - - UT_DULINT_INC(trx->undo_no); - - mutex_exit(&(trx->undo_mutex)); - - *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no, - offset); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); - } - return(err); -} - -/*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/ - -/********************************************************************** -Copies an undo record to heap. This function can be called if we know that -the undo log record exists. */ - -trx_undo_rec_t* -trx_undo_get_undo_rec_low( -/*======================*/ - /* out, own: copy of the record */ - dulint roll_ptr, /* in: roll pointer to record */ - mem_heap_t* heap) /* in: memory heap where copied */ -{ - trx_undo_rec_t* undo_rec; - ulint rseg_id; - ulint page_no; - ulint offset; - page_t* undo_page; - trx_rseg_t* rseg; - ibool is_insert; - mtr_t mtr; - - trx_undo_decode_roll_ptr(roll_ptr, &is_insert, &rseg_id, &page_no, - &offset); - rseg = trx_rseg_get_on_id(rseg_id); - - mtr_start(&mtr); - - undo_page = trx_undo_page_get_s_latched(rseg->space, page_no, &mtr); - - undo_rec = trx_undo_rec_copy(undo_page + offset, heap); - - mtr_commit(&mtr); - - return(undo_rec); -} - -/********************************************************************** -Copies an undo record to heap. */ - -ulint -trx_undo_get_undo_rec( -/*==================*/ - /* out: DB_SUCCESS, or - DB_MISSING_HISTORY if the undo log - has been truncated and we cannot - fetch the old version; NOTE: the - caller must have latches on the - clustered index page and purge_view */ - dulint roll_ptr, /* in: roll pointer to record */ - dulint trx_id, /* in: id of the trx that generated - the roll pointer: it points to an - undo log of this transaction */ - trx_undo_rec_t** undo_rec, /* out, own: copy of the record */ - mem_heap_t* heap) /* in: memory heap where copied */ -{ -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - - if (!trx_purge_update_undo_must_exist(trx_id)) { - - /* It may be that the necessary undo log has already been - deleted */ - - return(DB_MISSING_HISTORY); - } - - *undo_rec = trx_undo_get_undo_rec_low(roll_ptr, heap); - - return(DB_SUCCESS); -} - -/*********************************************************************** -Build a previous version of a clustered index record. This function checks -that the caller has a latch on the index page of the clustered index record -and an s-latch on the purge_view. This guarantees that the stack of versions -is locked. */ - -ulint -trx_undo_prev_version_build( -/*========================*/ - /* out: DB_SUCCESS, or DB_MISSING_HISTORY if - the previous version is not >= purge_view, - which means that it may have been removed, - DB_ERROR if corrupted record */ - rec_t* index_rec,/* in: clustered index record in the - index tree */ - mtr_t* index_mtr __attribute__((unused)), - /* in: mtr which contains the latch to - index_rec page and purge_view */ - rec_t* rec, /* in: version of a clustered index record */ - dict_index_t* index, /* in: clustered index */ - ulint* offsets,/* in: rec_get_offsets(rec, index) */ - mem_heap_t* heap, /* in: memory heap from which the memory - needed is allocated */ - rec_t** old_vers)/* out, own: previous version, or NULL if - rec is the first inserted version, or if - history data has been deleted */ -{ - trx_undo_rec_t* undo_rec; - dtuple_t* entry; - dulint rec_trx_id; - ulint type; - dulint undo_no; - dulint table_id; - dulint trx_id; - dulint roll_ptr; - dulint old_roll_ptr; - upd_t* update; - byte* ptr; - ulint info_bits; - ulint cmpl_info; - ibool dummy_extern; - byte* buf; - ulint err; -#ifdef UNIV_SYNC_DEBUG - ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); -#endif /* UNIV_SYNC_DEBUG */ - ut_ad(mtr_memo_contains(index_mtr, buf_block_align(index_rec), - MTR_MEMO_PAGE_S_FIX) - || mtr_memo_contains(index_mtr, buf_block_align(index_rec), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(rec_offs_validate(rec, index, offsets)); - - if (!(index->type & DICT_CLUSTERED)) { - fprintf(stderr, "InnoDB: Error: trying to access" - " update undo rec for non-clustered index %s\n" - "InnoDB: Submit a detailed bug report to" - " http://bugs.mysql.com\n" - "InnoDB: index record ", index->name); - rec_print(stderr, index_rec, index); - fputs("\n" - "InnoDB: record version ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); - return(DB_ERROR); - } - - roll_ptr = row_get_rec_roll_ptr(rec, index, offsets); - old_roll_ptr = roll_ptr; - - *old_vers = NULL; - - if (trx_undo_roll_ptr_is_insert(roll_ptr)) { - - /* The record rec is the first inserted version */ - - return(DB_SUCCESS); - } - - rec_trx_id = row_get_rec_trx_id(rec, index, offsets); - - err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); - - if (err != DB_SUCCESS) { - - return(err); - } - - ptr = trx_undo_rec_get_pars(undo_rec, &type, &cmpl_info, - &dummy_extern, &undo_no, &table_id); - - ptr = trx_undo_update_rec_get_sys_cols(ptr, &trx_id, &roll_ptr, - &info_bits); - ptr = trx_undo_rec_skip_row_ref(ptr, index); - - ptr = trx_undo_update_rec_get_update(ptr, index, type, trx_id, - roll_ptr, info_bits, - NULL, heap, &update); - - if (ut_dulint_cmp(table_id, index->table->id) != 0) { - ptr = NULL; - - fprintf(stderr, - "InnoDB: Error: trying to access update undo rec" - " for table %s\n" - "InnoDB: but the table id in the" - " undo record is wrong\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com\n" - "InnoDB: Run also CHECK TABLE %s\n", - index->table_name, index->table_name); - } - - if (ptr == NULL) { - /* The record was corrupted, return an error; these printfs - should catch an elusive bug in row_vers_old_has_index_entry */ - - fprintf(stderr, - "InnoDB: table %s, index %s, n_uniq %lu\n" - "InnoDB: undo rec address %p, type %lu cmpl_info %lu\n" - "InnoDB: undo rec table id %lu %lu," - " index table id %lu %lu\n" - "InnoDB: dump of 150 bytes in undo rec: ", - index->table_name, index->name, - (ulong) dict_index_get_n_unique(index), - undo_rec, (ulong) type, (ulong) cmpl_info, - (ulong) ut_dulint_get_high(table_id), - (ulong) ut_dulint_get_low(table_id), - (ulong) ut_dulint_get_high(index->table->id), - (ulong) ut_dulint_get_low(index->table->id)); - ut_print_buf(stderr, undo_rec, 150); - fputs("\n" - "InnoDB: index record ", stderr); - rec_print(stderr, index_rec, index); - fputs("\n" - "InnoDB: record version ", stderr); - rec_print_new(stderr, rec, offsets); - fprintf(stderr, "\n" - "InnoDB: Record trx id %lu %lu, update rec" - " trx id %lu %lu\n" - "InnoDB: Roll ptr in rec %lu %lu, in update rec" - " %lu %lu\n", - (ulong) ut_dulint_get_high(rec_trx_id), - (ulong) ut_dulint_get_low(rec_trx_id), - (ulong) ut_dulint_get_high(trx_id), - (ulong) ut_dulint_get_low(trx_id), - (ulong) ut_dulint_get_high(old_roll_ptr), - (ulong) ut_dulint_get_low(old_roll_ptr), - (ulong) ut_dulint_get_high(roll_ptr), - (ulong) ut_dulint_get_low(roll_ptr)); - - trx_purge_sys_print(); - return(DB_ERROR); - } - - if (row_upd_changes_field_size_or_external(index, offsets, update)) { - ulint* ext_vect; - ulint n_ext_vect; - - /* We have to set the appropriate extern storage bits in the - old version of the record: the extern bits in rec for those - fields that update does NOT update, as well as the the bits for - those fields that update updates to become externally stored - fields. Store the info to ext_vect: */ - - ext_vect = mem_alloc(sizeof(ulint) - * rec_offs_n_fields(offsets)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, - update); - entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, - heap); - row_upd_index_replace_new_col_vals(entry, index, update, heap); - - buf = mem_heap_alloc(heap, - rec_get_converted_size(index, entry)); - - *old_vers = rec_convert_dtuple_to_rec(buf, index, entry); - - /* Now set the extern bits in the old version of the record */ - rec_set_field_extern_bits(*old_vers, index, - ext_vect, n_ext_vect, NULL); - mem_free(ext_vect); - } else { - buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - *old_vers = rec_copy(buf, rec, offsets); - rec_offs_make_valid(*old_vers, index, offsets); - row_upd_rec_in_place(*old_vers, offsets, update); - } - - return(DB_SUCCESS); -} diff --git a/storage/innobase/trx/trx0roll.c b/storage/innobase/trx/trx0roll.c deleted file mode 100644 index 8934fe87c7e..00000000000 --- a/storage/innobase/trx/trx0roll.c +++ /dev/null @@ -1,1341 +0,0 @@ -/****************************************************** -Transaction rollback - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0roll.h" - -#ifdef UNIV_NONINL -#include "trx0roll.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "trx0undo.h" -#include "trx0rec.h" -#include "que0que.h" -#include "usr0sess.h" -#include "srv0que.h" -#include "srv0start.h" -#include "row0undo.h" -#include "row0mysql.h" -#include "lock0lock.h" -#include "pars0pars.h" - -/* This many pages must be undone before a truncate is tried within rollback */ -#define TRX_ROLL_TRUNC_THRESHOLD 1 - -/* In crash recovery, the current trx to be rolled back */ -trx_t* trx_roll_crash_recv_trx = NULL; - -/* In crash recovery we set this to the undo n:o of the current trx to be -rolled back. Then we can print how many % the rollback has progressed. */ -ib_longlong trx_roll_max_undo_no; - -/* Auxiliary variable which tells the previous progress % we printed */ -ulint trx_roll_progress_printed_pct; - -/*********************************************************************** -Rollback a transaction used in MySQL. */ - -int -trx_general_rollback_for_mysql( -/*===========================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - ibool partial,/* in: TRUE if partial rollback requested */ - trx_savept_t* savept) /* in: pointer to savepoint undo number, if - partial rollback requested */ -{ -#ifndef UNIV_HOTBACKUP - mem_heap_t* heap; - que_thr_t* thr; - roll_node_t* roll_node; - - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - trx_start_if_not_started(trx); - - heap = mem_heap_create(512); - - roll_node = roll_node_create(heap); - - roll_node->partial = partial; - - if (partial) { - roll_node->savept = *savept; - } - - trx->error_state = DB_SUCCESS; - - thr = pars_complete_graph_for_exec(roll_node, trx, heap); - - ut_a(thr == que_fork_start_command(que_node_get_parent(thr))); - que_run_threads(thr); - - mutex_enter(&kernel_mutex); - - while (trx->que_state != TRX_QUE_RUNNING) { - - mutex_exit(&kernel_mutex); - - os_thread_sleep(100000); - - mutex_enter(&kernel_mutex); - } - - mutex_exit(&kernel_mutex); - - mem_heap_free(heap); - - ut_a(trx->error_state == DB_SUCCESS); - - /* Tell Innobase server that there might be work for - utility threads: */ - - srv_active_wake_master_thread(); - - return((int) trx->error_state); -#else /* UNIV_HOTBACKUP */ - /* This function depends on MySQL code that is not included in - InnoDB Hot Backup builds. Besides, this function should never - be called in InnoDB Hot Backup. */ - ut_error; - return(DB_FAIL); -#endif /* UNIV_HOTBACKUP */ -} - -/*********************************************************************** -Rollback a transaction used in MySQL. */ - -int -trx_rollback_for_mysql( -/*===================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx) /* in: transaction handle */ -{ - int err; - - if (trx->conc_state == TRX_NOT_STARTED) { - - return(DB_SUCCESS); - } - - trx->op_info = "rollback"; - - /* If we are doing the XA recovery of prepared transactions, then - the transaction object does not have an InnoDB session object, and we - set a dummy session that we use for all MySQL transactions. */ - - mutex_enter(&kernel_mutex); - - if (trx->sess == NULL) { - /* Open a dummy session */ - - if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); - } - - trx->sess = trx_dummy_sess; - } - - mutex_exit(&kernel_mutex); - - err = trx_general_rollback_for_mysql(trx, FALSE, NULL); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************** -Rollback the latest SQL statement for MySQL. */ - -int -trx_rollback_last_sql_stat_for_mysql( -/*=================================*/ - /* out: error code or DB_SUCCESS */ - trx_t* trx) /* in: transaction handle */ -{ - int err; - - if (trx->conc_state == TRX_NOT_STARTED) { - - return(DB_SUCCESS); - } - - trx->op_info = "rollback of SQL statement"; - - err = trx_general_rollback_for_mysql(trx, TRUE, - &(trx->last_sql_stat_start)); - /* The following call should not be needed, but we play safe: */ - trx_mark_sql_stat_end(trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************** -Frees a single savepoint struct. */ - -void -trx_roll_savepoint_free( -/*=====================*/ - trx_t* trx, /* in: transaction handle */ - trx_named_savept_t* savep) /* in: savepoint to free */ -{ - ut_a(savep != NULL); - ut_a(UT_LIST_GET_LEN(trx->trx_savepoints) > 0); - - UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); - mem_free(savep->name); - mem_free(savep); -} - -/*********************************************************************** -Frees savepoint structs starting from savep, if savep == NULL then -free all savepoints. */ - -void -trx_roll_savepoints_free( -/*=====================*/ - trx_t* trx, /* in: transaction handle */ - trx_named_savept_t* savep) /* in: free all savepoints > this one; - if this is NULL, free all savepoints - of trx */ -{ - trx_named_savept_t* next_savep; - - if (savep == NULL) { - savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - } else { - savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - } - - while (savep != NULL) { - next_savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - - trx_roll_savepoint_free(trx, savep); - - savep = next_savep; - } -} - -/*********************************************************************** -Rolls back a transaction back to a named savepoint. Modifications after the -savepoint are undone but InnoDB does NOT release the corresponding locks -which are stored in memory. If a lock is 'implicit', that is, a new inserted -row holds a lock where the lock information is carried by the trx id stored in -the row, these locks are naturally released in the rollback. Savepoints which -were set after this savepoint are deleted. */ - -ulint -trx_rollback_to_savepoint_for_mysql( -/*================================*/ - /* out: if no savepoint - of the name found then - DB_NO_SAVEPOINT, - otherwise DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name, /* in: savepoint name */ - ib_longlong* mysql_binlog_cache_pos) /* out: the MySQL binlog cache - position corresponding to this - savepoint; MySQL needs this - information to remove the - binlog entries of the queries - executed after the savepoint */ -{ - trx_named_savept_t* savep; - ulint err; - - savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - - while (savep != NULL) { - if (0 == ut_strcmp(savep->name, savepoint_name)) { - /* Found */ - break; - } - savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - } - - if (savep == NULL) { - - return(DB_NO_SAVEPOINT); - } - - if (trx->conc_state == TRX_NOT_STARTED) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: transaction has a savepoint ", stderr); - ut_print_name(stderr, trx, FALSE, savep->name); - fputs(" though it is not started\n", stderr); - return(DB_ERROR); - } - - /* We can now free all savepoints strictly later than this one */ - - trx_roll_savepoints_free(trx, savep); - - *mysql_binlog_cache_pos = savep->mysql_binlog_cache_pos; - - trx->op_info = "rollback to a savepoint"; - - err = trx_general_rollback_for_mysql(trx, TRUE, &(savep->savept)); - - /* Store the current undo_no of the transaction so that we know where - to roll back if we have to roll back the next SQL statement: */ - - trx_mark_sql_stat_end(trx); - - trx->op_info = ""; - - return(err); -} - -/*********************************************************************** -Creates a named savepoint. If the transaction is not yet started, starts it. -If there is already a savepoint of the same name, this call erases that old -savepoint and replaces it with a new. Savepoints are deleted in a transaction -commit or rollback. */ - -ulint -trx_savepoint_for_mysql( -/*====================*/ - /* out: always DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name, /* in: savepoint name */ - ib_longlong binlog_cache_pos) /* in: MySQL binlog cache - position corresponding to this - connection at the time of the - savepoint */ -{ - trx_named_savept_t* savep; - - ut_a(trx); - ut_a(savepoint_name); - - trx_start_if_not_started(trx); - - savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - - while (savep != NULL) { - if (0 == ut_strcmp(savep->name, savepoint_name)) { - /* Found */ - break; - } - savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - } - - if (savep) { - /* There is a savepoint with the same name: free that */ - - UT_LIST_REMOVE(trx_savepoints, trx->trx_savepoints, savep); - - mem_free(savep->name); - mem_free(savep); - } - - /* Create a new savepoint and add it as the last in the list */ - - savep = mem_alloc(sizeof(trx_named_savept_t)); - - savep->name = mem_strdup(savepoint_name); - - savep->savept = trx_savept_take(trx); - - savep->mysql_binlog_cache_pos = binlog_cache_pos; - - UT_LIST_ADD_LAST(trx_savepoints, trx->trx_savepoints, savep); - - return(DB_SUCCESS); -} - -/*********************************************************************** -Releases only the named savepoint. Savepoints which were set after this -savepoint are left as is. */ - -ulint -trx_release_savepoint_for_mysql( -/*============================*/ - /* out: if no savepoint - of the name found then - DB_NO_SAVEPOINT, - otherwise DB_SUCCESS */ - trx_t* trx, /* in: transaction handle */ - const char* savepoint_name) /* in: savepoint name */ -{ - trx_named_savept_t* savep; - - savep = UT_LIST_GET_FIRST(trx->trx_savepoints); - - /* Search for the savepoint by name and free if found. */ - while (savep != NULL) { - if (0 == ut_strcmp(savep->name, savepoint_name)) { - trx_roll_savepoint_free(trx, savep); - return(DB_SUCCESS); - } - savep = UT_LIST_GET_NEXT(trx_savepoints, savep); - } - - return(DB_NO_SAVEPOINT); -} - -/*********************************************************************** -Returns a transaction savepoint taken at this point in time. */ - -trx_savept_t -trx_savept_take( -/*============*/ - /* out: savepoint */ - trx_t* trx) /* in: transaction */ -{ - trx_savept_t savept; - - savept.least_undo_no = trx->undo_no; - - return(savept); -} - -/*********************************************************************** -Rollback or clean up transactions which have no user session. If the -transaction already was committed, then we clean up a possible insert -undo log. If the transaction was not yet committed, then we roll it back. -Note: this is done in a background thread. */ - -os_thread_ret_t -trx_rollback_or_clean_all_without_sess( -/*===================================*/ - /* out: a dummy parameter */ - void* arg __attribute__((unused))) - /* in: a dummy parameter required by - os_thread_create */ -{ - mem_heap_t* heap; - que_fork_t* fork; - que_thr_t* thr; - roll_node_t* roll_node; - trx_t* trx; - dict_table_t* table; - ib_longlong rows_to_undo; - const char* unit = ""; - int err; - - mutex_enter(&kernel_mutex); - - /* Open a dummy session */ - - if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); - } - - mutex_exit(&kernel_mutex); - - if (UT_LIST_GET_FIRST(trx_sys->trx_list)) { - - fprintf(stderr, - "InnoDB: Starting in background the rollback" - " of uncommitted transactions\n"); - } else { - goto leave_function; - } -loop: - heap = mem_heap_create(512); - - mutex_enter(&kernel_mutex); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx) { - if ((trx->sess || (trx->conc_state == TRX_NOT_STARTED))) { - trx = UT_LIST_GET_NEXT(trx_list, trx); - } else if (trx->conc_state == TRX_PREPARED) { - - trx->sess = trx_dummy_sess; - trx = UT_LIST_GET_NEXT(trx_list, trx); - } else { - break; - } - } - - mutex_exit(&kernel_mutex); - - if (trx == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Rollback of non-prepared transactions" - " completed\n"); - - mem_heap_free(heap); - - goto leave_function; - } - - trx->sess = trx_dummy_sess; - - if (trx->conc_state == TRX_COMMITTED_IN_MEMORY) { - fprintf(stderr, "InnoDB: Cleaning up trx with id %lu %lu\n", - (ulong) ut_dulint_get_high(trx->id), - (ulong) ut_dulint_get_low(trx->id)); - - trx_cleanup_at_db_startup(trx); - - mem_heap_free(heap); - - goto loop; - } - - fork = que_fork_create(NULL, NULL, QUE_FORK_RECOVERY, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - - roll_node = roll_node_create(heap); - - thr->child = roll_node; - roll_node->common.parent = thr; - - mutex_enter(&kernel_mutex); - - trx->graph = fork; - - ut_a(thr == que_fork_start_command(fork)); - - trx_roll_crash_recv_trx = trx; - trx_roll_max_undo_no = ut_conv_dulint_to_longlong(trx->undo_no); - trx_roll_progress_printed_pct = 0; - rows_to_undo = trx_roll_max_undo_no; - - if (rows_to_undo > 1000000000) { - rows_to_undo = rows_to_undo / 1000000; - unit = "M"; - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Rolling back trx with id %lu %lu, %lu%s" - " rows to undo\n", - (ulong) ut_dulint_get_high(trx->id), - (ulong) ut_dulint_get_low(trx->id), - (ulong) rows_to_undo, unit); - mutex_exit(&kernel_mutex); - - trx->mysql_thread_id = os_thread_get_curr_id(); - - trx->mysql_process_no = os_proc_get_number(); - - if (trx->dict_operation) { - row_mysql_lock_data_dictionary(trx); - } - - que_run_threads(thr); - - mutex_enter(&kernel_mutex); - - while (trx->que_state != TRX_QUE_RUNNING) { - - mutex_exit(&kernel_mutex); - - fprintf(stderr, - "InnoDB: Waiting for rollback of trx id %lu to end\n", - (ulong) ut_dulint_get_low(trx->id)); - os_thread_sleep(100000); - - mutex_enter(&kernel_mutex); - } - - mutex_exit(&kernel_mutex); - - if (trx->dict_operation) { - /* If the transaction was for a dictionary operation, we - drop the relevant table, if it still exists */ - - fprintf(stderr, - "InnoDB: Dropping table with id %lu %lu" - " in recovery if it exists\n", - (ulong) ut_dulint_get_high(trx->table_id), - (ulong) ut_dulint_get_low(trx->table_id)); - - table = dict_table_get_on_id_low(trx->table_id); - - if (table) { - fputs("InnoDB: Table found: dropping table ", stderr); - ut_print_name(stderr, trx, TRUE, table->name); - fputs(" in recovery\n", stderr); - - err = row_drop_table_for_mysql(table->name, trx, TRUE); - - ut_a(err == (int) DB_SUCCESS); - } - } - - if (trx->dict_operation) { - row_mysql_unlock_data_dictionary(trx); - } - - fprintf(stderr, "\nInnoDB: Rolling back of trx id %lu %lu completed\n", - (ulong) ut_dulint_get_high(trx->id), - (ulong) ut_dulint_get_low(trx->id)); - mem_heap_free(heap); - - trx_roll_crash_recv_trx = NULL; - - goto loop; - -leave_function: - /* We count the number of threads in os_thread_exit(). A created - thread should always use that to exit and not use return() to exit. */ - - os_thread_exit(NULL); - - OS_THREAD_DUMMY_RETURN; -} - -/*********************************************************************** -Creates an undo number array. */ - -trx_undo_arr_t* -trx_undo_arr_create(void) -/*=====================*/ -{ - trx_undo_arr_t* arr; - mem_heap_t* heap; - ulint i; - - heap = mem_heap_create(1024); - - arr = mem_heap_alloc(heap, sizeof(trx_undo_arr_t)); - - arr->infos = mem_heap_alloc(heap, sizeof(trx_undo_inf_t) - * UNIV_MAX_PARALLELISM); - arr->n_cells = UNIV_MAX_PARALLELISM; - arr->n_used = 0; - - arr->heap = heap; - - for (i = 0; i < UNIV_MAX_PARALLELISM; i++) { - - (trx_undo_arr_get_nth_info(arr, i))->in_use = FALSE; - } - - return(arr); -} - -/*********************************************************************** -Frees an undo number array. */ - -void -trx_undo_arr_free( -/*==============*/ - trx_undo_arr_t* arr) /* in: undo number array */ -{ - ut_ad(arr->n_used == 0); - - mem_heap_free(arr->heap); -} - -/*********************************************************************** -Stores info of an undo log record to the array if it is not stored yet. */ -static -ibool -trx_undo_arr_store_info( -/*====================*/ - /* out: FALSE if the record already existed in the - array */ - trx_t* trx, /* in: transaction */ - dulint undo_no)/* in: undo number */ -{ - trx_undo_inf_t* cell; - trx_undo_inf_t* stored_here; - trx_undo_arr_t* arr; - ulint n_used; - ulint n; - ulint i; - - n = 0; - arr = trx->undo_no_arr; - n_used = arr->n_used; - stored_here = NULL; - - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); - - if (!cell->in_use) { - if (!stored_here) { - /* Not in use, we may store here */ - cell->undo_no = undo_no; - cell->in_use = TRUE; - - arr->n_used++; - - stored_here = cell; - } - } else { - n++; - - if (0 == ut_dulint_cmp(cell->undo_no, undo_no)) { - - if (stored_here) { - stored_here->in_use = FALSE; - ut_ad(arr->n_used > 0); - arr->n_used--; - } - - ut_ad(arr->n_used == n_used); - - return(FALSE); - } - } - - if (n == n_used && stored_here) { - - ut_ad(arr->n_used == 1 + n_used); - - return(TRUE); - } - } -} - -/*********************************************************************** -Removes an undo number from the array. */ -static -void -trx_undo_arr_remove_info( -/*=====================*/ - trx_undo_arr_t* arr, /* in: undo number array */ - dulint undo_no)/* in: undo number */ -{ - trx_undo_inf_t* cell; - ulint n_used; - ulint n; - ulint i; - - n_used = arr->n_used; - n = 0; - - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); - - if (cell->in_use - && 0 == ut_dulint_cmp(cell->undo_no, undo_no)) { - - cell->in_use = FALSE; - - ut_ad(arr->n_used > 0); - - arr->n_used--; - - return; - } - } -} - -/*********************************************************************** -Gets the biggest undo number in an array. */ -static -dulint -trx_undo_arr_get_biggest( -/*=====================*/ - /* out: biggest value, ut_dulint_zero if - the array is empty */ - trx_undo_arr_t* arr) /* in: undo number array */ -{ - trx_undo_inf_t* cell; - ulint n_used; - dulint biggest; - ulint n; - ulint i; - - n = 0; - n_used = arr->n_used; - biggest = ut_dulint_zero; - - for (i = 0;; i++) { - cell = trx_undo_arr_get_nth_info(arr, i); - - if (cell->in_use) { - n++; - if (ut_dulint_cmp(cell->undo_no, biggest) > 0) { - - biggest = cell->undo_no; - } - } - - if (n == n_used) { - return(biggest); - } - } -} - -/*************************************************************************** -Tries truncate the undo logs. */ - -void -trx_roll_try_truncate( -/*==================*/ - trx_t* trx) /* in: transaction */ -{ - trx_undo_arr_t* arr; - dulint limit; - dulint biggest; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(mutex_own(&((trx->rseg)->mutex))); - - trx->pages_undone = 0; - - arr = trx->undo_no_arr; - - limit = trx->undo_no; - - if (arr->n_used > 0) { - biggest = trx_undo_arr_get_biggest(arr); - - if (ut_dulint_cmp(biggest, limit) >= 0) { - - limit = ut_dulint_add(biggest, 1); - } - } - - if (trx->insert_undo) { - trx_undo_truncate_end(trx, trx->insert_undo, limit); - } - - if (trx->update_undo) { - trx_undo_truncate_end(trx, trx->update_undo, limit); - } -} - -/*************************************************************************** -Pops the topmost undo log record in a single undo log and updates the info -about the topmost record in the undo log memory struct. */ -static -trx_undo_rec_t* -trx_roll_pop_top_rec( -/*=================*/ - /* out: undo log record, the page s-latched */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* undo_page; - ulint offset; - trx_undo_rec_t* prev_rec; - page_t* prev_rec_page; - - ut_ad(mutex_own(&(trx->undo_mutex))); - - undo_page = trx_undo_page_get_s_latched(undo->space, - undo->top_page_no, mtr); - offset = undo->top_offset; - - /* fprintf(stderr, "Thread %lu undoing trx %lu undo record %lu\n", - os_thread_get_curr_id(), ut_dulint_get_low(trx->id), - ut_dulint_get_low(undo->top_undo_no)); */ - - prev_rec = trx_undo_get_prev_rec(undo_page + offset, - undo->hdr_page_no, undo->hdr_offset, - mtr); - if (prev_rec == NULL) { - - undo->empty = TRUE; - } else { - prev_rec_page = buf_frame_align(prev_rec); - - if (prev_rec_page != undo_page) { - - trx->pages_undone++; - } - - undo->top_page_no = buf_frame_get_page_no(prev_rec_page); - undo->top_offset = prev_rec - prev_rec_page; - undo->top_undo_no = trx_undo_rec_get_undo_no(prev_rec); - } - - return(undo_page + offset); -} - -/************************************************************************ -Pops the topmost record when the two undo logs of a transaction are seen -as a single stack of records ordered by their undo numbers. Inserts the -undo number of the popped undo record to the array of currently processed -undo numbers in the transaction. When the query thread finishes processing -of this undo record, it must be released with trx_undo_rec_release. */ - -trx_undo_rec_t* -trx_roll_pop_top_rec_of_trx( -/*========================*/ - /* out: undo log record copied to heap, NULL - if none left, or if the undo number of the - top record would be less than the limit */ - trx_t* trx, /* in: transaction */ - dulint limit, /* in: least undo number we need */ - dulint* roll_ptr,/* out: roll pointer to undo record */ - mem_heap_t* heap) /* in: memory heap where copied */ -{ - trx_undo_t* undo; - trx_undo_t* ins_undo; - trx_undo_t* upd_undo; - trx_undo_rec_t* undo_rec; - trx_undo_rec_t* undo_rec_copy; - dulint undo_no; - ibool is_insert; - trx_rseg_t* rseg; - ulint progress_pct; - mtr_t mtr; - - rseg = trx->rseg; -try_again: - mutex_enter(&(trx->undo_mutex)); - - if (trx->pages_undone >= TRX_ROLL_TRUNC_THRESHOLD) { - mutex_enter(&(rseg->mutex)); - - trx_roll_try_truncate(trx); - - mutex_exit(&(rseg->mutex)); - } - - ins_undo = trx->insert_undo; - upd_undo = trx->update_undo; - - if (!ins_undo || ins_undo->empty) { - undo = upd_undo; - } else if (!upd_undo || upd_undo->empty) { - undo = ins_undo; - } else if (ut_dulint_cmp(upd_undo->top_undo_no, - ins_undo->top_undo_no) > 0) { - undo = upd_undo; - } else { - undo = ins_undo; - } - - if (!undo || undo->empty - || (ut_dulint_cmp(limit, undo->top_undo_no) > 0)) { - - if ((trx->undo_no_arr)->n_used == 0) { - /* Rollback is ending */ - - mutex_enter(&(rseg->mutex)); - - trx_roll_try_truncate(trx); - - mutex_exit(&(rseg->mutex)); - } - - mutex_exit(&(trx->undo_mutex)); - - return(NULL); - } - - if (undo == ins_undo) { - is_insert = TRUE; - } else { - is_insert = FALSE; - } - - *roll_ptr = trx_undo_build_roll_ptr(is_insert, (undo->rseg)->id, - undo->top_page_no, - undo->top_offset); - mtr_start(&mtr); - - undo_rec = trx_roll_pop_top_rec(trx, undo, &mtr); - - undo_no = trx_undo_rec_get_undo_no(undo_rec); - - ut_ad(ut_dulint_cmp(ut_dulint_add(undo_no, 1), trx->undo_no) == 0); - - /* We print rollback progress info if we are in a crash recovery - and the transaction has at least 1000 row operations to undo. */ - - if (trx == trx_roll_crash_recv_trx && trx_roll_max_undo_no > 1000) { - - progress_pct = 100 - (ulint) - ((ut_conv_dulint_to_longlong(undo_no) * 100) - / trx_roll_max_undo_no); - if (progress_pct != trx_roll_progress_printed_pct) { - if (trx_roll_progress_printed_pct == 0) { - fprintf(stderr, - "\nInnoDB: Progress in percents:" - " %lu", (ulong) progress_pct); - } else { - fprintf(stderr, - " %lu", (ulong) progress_pct); - } - fflush(stderr); - trx_roll_progress_printed_pct = progress_pct; - } - } - - trx->undo_no = undo_no; - - if (!trx_undo_arr_store_info(trx, undo_no)) { - /* A query thread is already processing this undo log record */ - - mutex_exit(&(trx->undo_mutex)); - - mtr_commit(&mtr); - - goto try_again; - } - - undo_rec_copy = trx_undo_rec_copy(undo_rec, heap); - - mutex_exit(&(trx->undo_mutex)); - - mtr_commit(&mtr); - - return(undo_rec_copy); -} - -/************************************************************************ -Reserves an undo log record for a query thread to undo. This should be -called if the query thread gets the undo log record not using the pop -function above. */ - -ibool -trx_undo_rec_reserve( -/*=================*/ - /* out: TRUE if succeeded */ - trx_t* trx, /* in: transaction */ - dulint undo_no)/* in: undo number of the record */ -{ - ibool ret; - - mutex_enter(&(trx->undo_mutex)); - - ret = trx_undo_arr_store_info(trx, undo_no); - - mutex_exit(&(trx->undo_mutex)); - - return(ret); -} - -/*********************************************************************** -Releases a reserved undo record. */ - -void -trx_undo_rec_release( -/*=================*/ - trx_t* trx, /* in: transaction */ - dulint undo_no)/* in: undo number */ -{ - trx_undo_arr_t* arr; - - mutex_enter(&(trx->undo_mutex)); - - arr = trx->undo_no_arr; - - trx_undo_arr_remove_info(arr, undo_no); - - mutex_exit(&(trx->undo_mutex)); -} - -/************************************************************************* -Starts a rollback operation. */ - -void -trx_rollback( -/*=========*/ - trx_t* trx, /* in: transaction */ - trx_sig_t* sig, /* in: signal starting the rollback */ - que_thr_t** next_thr)/* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the passed value is - NULL, the parameter is ignored */ -{ - que_t* roll_graph; - que_thr_t* thr; - /* que_thr_t* thr2; */ - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad((trx->undo_no_arr == NULL) || ((trx->undo_no_arr)->n_used == 0)); - - /* Initialize the rollback field in the transaction */ - - if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { - - trx->roll_limit = ut_dulint_zero; - - } else if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { - - trx->roll_limit = (sig->savept).least_undo_no; - - } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { - - trx->roll_limit = trx->last_sql_stat_start.least_undo_no; - } else { - ut_error; - } - - ut_a(ut_dulint_cmp(trx->roll_limit, trx->undo_no) <= 0); - - trx->pages_undone = 0; - - if (trx->undo_no_arr == NULL) { - trx->undo_no_arr = trx_undo_arr_create(); - } - - /* Build a 'query' graph which will perform the undo operations */ - - roll_graph = trx_roll_graph_build(trx); - - trx->graph = roll_graph; - trx->que_state = TRX_QUE_ROLLING_BACK; - - thr = que_fork_start_command(roll_graph); - - ut_ad(thr); - - /* thr2 = que_fork_start_command(roll_graph); - - ut_ad(thr2); */ - - if (next_thr && (*next_thr == NULL)) { - *next_thr = thr; - /* srv_que_task_enqueue_low(thr2); */ - } else { - srv_que_task_enqueue_low(thr); - /* srv_que_task_enqueue_low(thr2); */ - } -} - -/******************************************************************** -Builds an undo 'query' graph for a transaction. The actual rollback is -performed by executing this query graph like a query subprocedure call. -The reply about the completion of the rollback will be sent by this -graph. */ - -que_t* -trx_roll_graph_build( -/*=================*/ - /* out, own: the query graph */ - trx_t* trx) /* in: trx handle */ -{ - mem_heap_t* heap; - que_fork_t* fork; - que_thr_t* thr; - /* que_thr_t* thr2; */ - - ut_ad(mutex_own(&kernel_mutex)); - - heap = mem_heap_create(512); - fork = que_fork_create(NULL, NULL, QUE_FORK_ROLLBACK, heap); - fork->trx = trx; - - thr = que_thr_create(fork, heap); - /* thr2 = que_thr_create(fork, heap); */ - - thr->child = row_undo_node_create(trx, thr, heap); - /* thr2->child = row_undo_node_create(trx, thr2, heap); */ - - return(fork); -} - -/************************************************************************* -Finishes error processing after the necessary partial rollback has been -done. */ -static -void -trx_finish_error_processing( -/*========================*/ - trx_t* trx) /* in: transaction */ -{ - trx_sig_t* sig; - trx_sig_t* next_sig; - - ut_ad(mutex_own(&kernel_mutex)); - - sig = UT_LIST_GET_FIRST(trx->signals); - - while (sig != NULL) { - next_sig = UT_LIST_GET_NEXT(signals, sig); - - if (sig->type == TRX_SIG_ERROR_OCCURRED) { - - trx_sig_remove(trx, sig); - } - - sig = next_sig; - } - - trx->que_state = TRX_QUE_RUNNING; -} - -/************************************************************************* -Finishes a partial rollback operation. */ -static -void -trx_finish_partial_rollback_off_kernel( -/*===================================*/ - trx_t* trx, /* in: transaction */ - que_thr_t** next_thr)/* in/out: next query thread to run; - if the value which is passed in is a pointer - to a NULL pointer, then the calling function - can start running a new query thread; if this - parameter is NULL, it is ignored */ -{ - trx_sig_t* sig; - - ut_ad(mutex_own(&kernel_mutex)); - - sig = UT_LIST_GET_FIRST(trx->signals); - - /* Remove the signal from the signal queue and send reply message - to it */ - - trx_sig_reply(sig, next_thr); - trx_sig_remove(trx, sig); - - trx->que_state = TRX_QUE_RUNNING; -} - -/******************************************************************** -Finishes a transaction rollback. */ - -void -trx_finish_rollback_off_kernel( -/*===========================*/ - que_t* graph, /* in: undo graph which can now be freed */ - trx_t* trx, /* in: transaction */ - que_thr_t** next_thr)/* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if this parameter is - NULL, it is ignored */ -{ - trx_sig_t* sig; - trx_sig_t* next_sig; - - ut_ad(mutex_own(&kernel_mutex)); - - ut_a(trx->undo_no_arr == NULL || trx->undo_no_arr->n_used == 0); - - /* Free the memory reserved by the undo graph */ - que_graph_free(graph); - - sig = UT_LIST_GET_FIRST(trx->signals); - - if (sig->type == TRX_SIG_ROLLBACK_TO_SAVEPT) { - - trx_finish_partial_rollback_off_kernel(trx, next_thr); - - return; - - } else if (sig->type == TRX_SIG_ERROR_OCCURRED) { - - trx_finish_error_processing(trx); - - return; - } - -#ifdef UNIV_DEBUG - if (lock_print_waits) { - fprintf(stderr, "Trx %lu rollback finished\n", - (ulong) ut_dulint_get_low(trx->id)); - } -#endif /* UNIV_DEBUG */ - - trx_commit_off_kernel(trx); - - /* Remove all TRX_SIG_TOTAL_ROLLBACK signals from the signal queue and - send reply messages to them */ - - trx->que_state = TRX_QUE_RUNNING; - - while (sig != NULL) { - next_sig = UT_LIST_GET_NEXT(signals, sig); - - if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { - - trx_sig_reply(sig, next_thr); - - trx_sig_remove(trx, sig); - } - - sig = next_sig; - } -} - -/************************************************************************* -Creates a rollback command node struct. */ - -roll_node_t* -roll_node_create( -/*=============*/ - /* out, own: rollback node struct */ - mem_heap_t* heap) /* in: mem heap where created */ -{ - roll_node_t* node; - - node = mem_heap_alloc(heap, sizeof(roll_node_t)); - node->common.type = QUE_NODE_ROLLBACK; - node->state = ROLL_NODE_SEND; - - node->partial = FALSE; - - return(node); -} - -/*************************************************************** -Performs an execution step for a rollback command node in a query graph. */ - -que_thr_t* -trx_rollback_step( -/*==============*/ - /* out: query thread to run next, or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - roll_node_t* node; - ulint sig_no; - trx_savept_t* savept; - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_ROLLBACK); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = ROLL_NODE_SEND; - } - - if (node->state == ROLL_NODE_SEND) { - mutex_enter(&kernel_mutex); - - node->state = ROLL_NODE_WAIT; - - if (node->partial) { - sig_no = TRX_SIG_ROLLBACK_TO_SAVEPT; - savept = &(node->savept); - } else { - sig_no = TRX_SIG_TOTAL_ROLLBACK; - savept = NULL; - } - - /* Send a rollback signal to the transaction */ - - trx_sig_send(thr_get_trx(thr), sig_no, TRX_SIG_SELF, thr, - savept, NULL); - - thr->state = QUE_THR_SIG_REPLY_WAIT; - - mutex_exit(&kernel_mutex); - - return(NULL); - } - - ut_ad(node->state == ROLL_NODE_WAIT); - - thr->run_node = que_node_get_parent(node); - - return(thr); -} diff --git a/storage/innobase/trx/trx0rseg.c b/storage/innobase/trx/trx0rseg.c deleted file mode 100644 index 020f217c90b..00000000000 --- a/storage/innobase/trx/trx0rseg.c +++ /dev/null @@ -1,254 +0,0 @@ -/****************************************************** -Rollback segment - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0rseg.h" - -#ifdef UNIV_NONINL -#include "trx0rseg.ic" -#endif - -#include "trx0undo.h" -#include "fut0lst.h" -#include "srv0srv.h" -#include "trx0purge.h" - -/********************************************************************** -Looks for a rollback segment, based on the rollback segment id. */ - -trx_rseg_t* -trx_rseg_get_on_id( -/*===============*/ - /* out: rollback segment */ - ulint id) /* in: rollback segment id */ -{ - trx_rseg_t* rseg; - - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - ut_ad(rseg); - - while (rseg->id != id) { - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - ut_ad(rseg); - } - - return(rseg); -} - -/******************************************************************** -Creates a rollback segment header. This function is called only when -a new rollback segment is created in the database. */ - -ulint -trx_rseg_header_create( -/*===================*/ - /* out: page number of the created segment, - FIL_NULL if fail */ - ulint space, /* in: space id */ - ulint max_size, /* in: max size in pages */ - ulint* slot_no, /* out: rseg id == slot number in trx sys */ - mtr_t* mtr) /* in: mtr */ -{ - ulint page_no; - trx_rsegf_t* rsegf; - trx_sysf_t* sys_header; - ulint i; - page_t* page; - - ut_ad(mtr); - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(mtr_memo_contains(mtr, fil_space_get_latch(space), - MTR_MEMO_X_LOCK)); - sys_header = trx_sysf_get(mtr); - - *slot_no = trx_sysf_rseg_find_free(mtr); - - if (*slot_no == ULINT_UNDEFINED) { - - return(FIL_NULL); - } - - /* Allocate a new file segment for the rollback segment */ - page = fseg_create(space, 0, TRX_RSEG + TRX_RSEG_FSEG_HEADER, mtr); - - if (page == NULL) { - /* No space left */ - - return(FIL_NULL); - } - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_RSEG_HEADER_NEW); -#endif /* UNIV_SYNC_DEBUG */ - - page_no = buf_frame_get_page_no(page); - - /* Get the rollback segment file page */ - rsegf = trx_rsegf_get_new(space, page_no, mtr); - - /* Initialize max size field */ - mlog_write_ulint(rsegf + TRX_RSEG_MAX_SIZE, max_size, - MLOG_4BYTES, mtr); - - /* Initialize the history list */ - - mlog_write_ulint(rsegf + TRX_RSEG_HISTORY_SIZE, 0, MLOG_4BYTES, mtr); - flst_init(rsegf + TRX_RSEG_HISTORY, mtr); - - /* Reset the undo log slots */ - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { - - trx_rsegf_set_nth_undo(rsegf, i, FIL_NULL, mtr); - } - - /* Add the rollback segment info to the free slot in the trx system - header */ - - trx_sysf_rseg_set_space(sys_header, *slot_no, space, mtr); - trx_sysf_rseg_set_page_no(sys_header, *slot_no, page_no, mtr); - - return(page_no); -} - -/*************************************************************************** -Creates and initializes a rollback segment object. The values for the -fields are read from the header. The object is inserted to the rseg -list of the trx system object and a pointer is inserted in the rseg -array in the trx system object. */ -static -trx_rseg_t* -trx_rseg_mem_create( -/*================*/ - /* out, own: rollback segment object */ - ulint id, /* in: rollback segment id */ - ulint space, /* in: space where the segment placed */ - ulint page_no, /* in: page number of the segment header */ - mtr_t* mtr) /* in: mtr */ -{ - trx_rsegf_t* rseg_header; - trx_rseg_t* rseg; - trx_ulogf_t* undo_log_hdr; - fil_addr_t node_addr; - ulint sum_of_undo_sizes; - ulint len; - - ut_ad(mutex_own(&kernel_mutex)); - - rseg = mem_alloc(sizeof(trx_rseg_t)); - - rseg->id = id; - rseg->space = space; - rseg->page_no = page_no; - - mutex_create(&rseg->mutex, SYNC_RSEG); - - UT_LIST_ADD_LAST(rseg_list, trx_sys->rseg_list, rseg); - - trx_sys_set_nth_rseg(trx_sys, id, rseg); - - rseg_header = trx_rsegf_get_new(space, page_no, mtr); - - rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE, - MLOG_4BYTES, mtr); - - /* Initialize the undo log lists according to the rseg header */ - - sum_of_undo_sizes = trx_undo_lists_init(rseg); - - rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr) - + 1 + sum_of_undo_sizes; - - len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr); - if (len > 0) { - trx_sys->rseg_history_len += len; - - node_addr = trx_purge_get_log_from_hist( - flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr)); - rseg->last_page_no = node_addr.page; - rseg->last_offset = node_addr.boffset; - - undo_log_hdr = trx_undo_page_get(rseg->space, node_addr.page, - mtr) + node_addr.boffset; - - rseg->last_trx_no = mtr_read_dulint( - undo_log_hdr + TRX_UNDO_TRX_NO, mtr); - rseg->last_del_marks = mtr_read_ulint( - undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr); - } else { - rseg->last_page_no = FIL_NULL; - } - - return(rseg); -} - -/************************************************************************* -Creates the memory copies for rollback segments and initializes the -rseg list and array in trx_sys at a database startup. */ - -void -trx_rseg_list_and_array_init( -/*=========================*/ - trx_sysf_t* sys_header, /* in: trx system header */ - mtr_t* mtr) /* in: mtr */ -{ - ulint i; - ulint page_no; - ulint space; - - UT_LIST_INIT(trx_sys->rseg_list); - - trx_sys->rseg_history_len = 0; - - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - - page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); - - if (page_no == FIL_NULL) { - - trx_sys_set_nth_rseg(trx_sys, i, NULL); - } else { - space = trx_sysf_rseg_get_space(sys_header, i, mtr); - - trx_rseg_mem_create(i, space, page_no, mtr); - } - } -} - -/******************************************************************** -Creates a new rollback segment to the database. */ - -trx_rseg_t* -trx_rseg_create( -/*============*/ - /* out: the created segment object, NULL if - fail */ - ulint space, /* in: space id */ - ulint max_size, /* in: max size in pages */ - ulint* id, /* out: rseg id */ - mtr_t* mtr) /* in: mtr */ -{ - ulint page_no; - trx_rseg_t* rseg; - - mtr_x_lock(fil_space_get_latch(space), mtr); - mutex_enter(&kernel_mutex); - - page_no = trx_rseg_header_create(space, max_size, id, mtr); - - if (page_no == FIL_NULL) { - - mutex_exit(&kernel_mutex); - return(NULL); - } - - rseg = trx_rseg_mem_create(*id, space, page_no, mtr); - - mutex_exit(&kernel_mutex); - - return(rseg); -} diff --git a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c deleted file mode 100644 index 40348dd4199..00000000000 --- a/storage/innobase/trx/trx0sys.c +++ /dev/null @@ -1,997 +0,0 @@ -/****************************************************** -Transaction system - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0sys.h" - -#ifdef UNIV_NONINL -#include "trx0sys.ic" -#endif - -#include "fsp0fsp.h" -#include "mtr0mtr.h" -#include "trx0trx.h" -#include "trx0rseg.h" -#include "trx0undo.h" -#include "srv0srv.h" -#include "trx0purge.h" -#include "log0log.h" -#include "os0file.h" - -/* The transaction system */ -trx_sys_t* trx_sys = NULL; -trx_doublewrite_t* trx_doublewrite = NULL; - -/* The following is set to TRUE when we are upgrading from the old format data -files to the new >= 4.1.x format multiple tablespaces format data files */ - -ibool trx_doublewrite_must_reset_space_ids = FALSE; - -/* The following is TRUE when we are using the database in the new format, -i.e., we have successfully upgraded, or have created a new database -installation */ - -ibool trx_sys_multiple_tablespace_format = FALSE; - -/* In a MySQL replication slave, in crash recovery we store the master log -file name and position here. We have successfully got the updates to InnoDB -up to this position. If .._pos is -1, it means no crash recovery was needed, -or there was no master log position info inside InnoDB. */ - -char trx_sys_mysql_master_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN]; -ib_longlong trx_sys_mysql_master_log_pos = -1; - -/* If this MySQL server uses binary logging, after InnoDB has been inited -and if it has done a crash recovery, we store the binlog file name and position -here. If .._pos is -1, it means there was no binlog position info inside -InnoDB. */ - -char trx_sys_mysql_bin_log_name[TRX_SYS_MYSQL_LOG_NAME_LEN]; -ib_longlong trx_sys_mysql_bin_log_pos = -1; - - -/******************************************************************** -Determines if a page number is located inside the doublewrite buffer. */ - -ibool -trx_doublewrite_page_inside( -/*========================*/ - /* out: TRUE if the location is inside - the two blocks of the doublewrite buffer */ - ulint page_no) /* in: page number */ -{ - if (trx_doublewrite == NULL) { - - return(FALSE); - } - - if (page_no >= trx_doublewrite->block1 - && page_no < trx_doublewrite->block1 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - return(TRUE); - } - - if (page_no >= trx_doublewrite->block2 - && page_no < trx_doublewrite->block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - return(TRUE); - } - - return(FALSE); -} - -/******************************************************************** -Creates or initialializes the doublewrite buffer at a database start. */ -static -void -trx_doublewrite_init( -/*=================*/ - byte* doublewrite) /* in: pointer to the doublewrite buf - header on trx sys page */ -{ - trx_doublewrite = mem_alloc(sizeof(trx_doublewrite_t)); - - /* Since we now start to use the doublewrite buffer, no need to call - fsync() after every write to a data file */ -#ifdef UNIV_DO_FLUSH - os_do_not_call_flush_at_each_write = TRUE; -#endif /* UNIV_DO_FLUSH */ - - mutex_create(&trx_doublewrite->mutex, SYNC_DOUBLEWRITE); - - trx_doublewrite->first_free = 0; - - trx_doublewrite->block1 = mach_read_from_4( - doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1); - trx_doublewrite->block2 = mach_read_from_4( - doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2); - trx_doublewrite->write_buf_unaligned = ut_malloc( - (1 + 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) * UNIV_PAGE_SIZE); - - trx_doublewrite->write_buf = ut_align( - trx_doublewrite->write_buf_unaligned, UNIV_PAGE_SIZE); - trx_doublewrite->buf_block_arr = mem_alloc( - 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * sizeof(void*)); -} - -/******************************************************************** -Marks the trx sys header when we have successfully upgraded to the >= 4.1.x -multiple tablespace format. */ - -void -trx_sys_mark_upgraded_to_multiple_tablespaces(void) -/*===============================================*/ -{ - page_t* page; - byte* doublewrite; - mtr_t mtr; - - /* We upgraded to 4.1.x and reset the space id fields in the - doublewrite buffer. Let us mark to the trx_sys header that the upgrade - has been done. */ - - mtr_start(&mtr); - - page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - doublewrite = page + TRX_SYS_DOUBLEWRITE; - - mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED, - TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, - MLOG_4BYTES, &mtr); - mtr_commit(&mtr); - - /* Flush the modified pages to disk and make a checkpoint */ - log_make_checkpoint_at(ut_dulint_max, TRUE); - - trx_sys_multiple_tablespace_format = TRUE; -} - -/******************************************************************** -Creates the doublewrite buffer to a new InnoDB installation. The header of the -doublewrite buffer is placed on the trx system header page. */ - -void -trx_sys_create_doublewrite_buf(void) -/*================================*/ -{ - page_t* page; - page_t* page2; - page_t* new_page; - byte* doublewrite; - byte* fseg_header; - ulint page_no; - ulint prev_page_no; - ulint i; - mtr_t mtr; - - if (trx_doublewrite) { - /* Already inited */ - - return; - } - -start_again: - mtr_start(&mtr); - - page = buf_page_get(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - doublewrite = page + TRX_SYS_DOUBLEWRITE; - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) - == TRX_SYS_DOUBLEWRITE_MAGIC_N) { - /* The doublewrite buffer has already been created: - just read in some numbers */ - - trx_doublewrite_init(doublewrite); - - mtr_commit(&mtr); - } else { - fprintf(stderr, - "InnoDB: Doublewrite buffer not found:" - " creating new\n"); - - if (buf_pool_get_curr_size() - < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE - + FSP_EXTENT_SIZE / 2 + 100) - * UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Cannot create doublewrite buffer:" - " you must\n" - "InnoDB: increase your buffer pool size.\n" - "InnoDB: Cannot continue operation.\n"); - - exit(1); - } - - page2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO, - TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_FSEG, &mtr); - - /* fseg_create acquires a second latch on the page, - therefore we must declare it: */ - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page2, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - if (page2 == NULL) { - fprintf(stderr, - "InnoDB: Cannot create doublewrite buffer:" - " you must\n" - "InnoDB: increase your tablespace size.\n" - "InnoDB: Cannot continue operation.\n"); - - /* We exit without committing the mtr to prevent - its modifications to the database getting to disk */ - - exit(1); - } - - fseg_header = page + TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_FSEG; - prev_page_no = 0; - - for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE - + FSP_EXTENT_SIZE / 2; i++) { - page_no = fseg_alloc_free_page(fseg_header, - prev_page_no + 1, - FSP_UP, &mtr); - if (page_no == FIL_NULL) { - fprintf(stderr, - "InnoDB: Cannot create doublewrite" - " buffer: you must\n" - "InnoDB: increase your" - " tablespace size.\n" - "InnoDB: Cannot continue operation.\n" - ); - - exit(1); - } - - /* We read the allocated pages to the buffer pool; - when they are written to disk in a flush, the space - id and page number fields are also written to the - pages. When we at database startup read pages - from the doublewrite buffer, we know that if the - space id and page number in them are the same as - the page position in the tablespace, then the page - has not been written to in doublewrite. */ - - new_page = buf_page_get(TRX_SYS_SPACE, page_no, - RW_X_LATCH, &mtr); -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(new_page, SYNC_NO_ORDER_CHECK); -#endif /* UNIV_SYNC_DEBUG */ - - /* Make a dummy change to the page to ensure it will - be written to disk in a flush */ - - mlog_write_ulint(new_page + FIL_PAGE_DATA, - TRX_SYS_DOUBLEWRITE_MAGIC_N, - MLOG_4BYTES, &mtr); - - if (i == FSP_EXTENT_SIZE / 2) { - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_BLOCK1, - page_no, MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_REPEAT - + TRX_SYS_DOUBLEWRITE_BLOCK1, - page_no, MLOG_4BYTES, &mtr); - } else if (i == FSP_EXTENT_SIZE / 2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_BLOCK2, - page_no, MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_REPEAT - + TRX_SYS_DOUBLEWRITE_BLOCK2, - page_no, MLOG_4BYTES, &mtr); - } else if (i > FSP_EXTENT_SIZE / 2) { - ut_a(page_no == prev_page_no + 1); - } - - prev_page_no = page_no; - } - - mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC, - TRX_SYS_DOUBLEWRITE_MAGIC_N, - MLOG_4BYTES, &mtr); - mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC - + TRX_SYS_DOUBLEWRITE_REPEAT, - TRX_SYS_DOUBLEWRITE_MAGIC_N, - MLOG_4BYTES, &mtr); - - mlog_write_ulint(doublewrite - + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED, - TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N, - MLOG_4BYTES, &mtr); - mtr_commit(&mtr); - - /* Flush the modified pages to disk and make a checkpoint */ - log_make_checkpoint_at(ut_dulint_max, TRUE); - - fprintf(stderr, "InnoDB: Doublewrite buffer created\n"); - - trx_sys_multiple_tablespace_format = TRUE; - - goto start_again; - } -} - -/******************************************************************** -At a database startup initializes the doublewrite buffer memory structure if -we already have a doublewrite buffer created in the data files. If we are -upgrading to an InnoDB version which supports multiple tablespaces, then this -function performs the necessary update operations. If we are in a crash -recovery, this function uses a possible doublewrite buffer to restore -half-written pages in the data files. */ - -void -trx_sys_doublewrite_init_or_restore_pages( -/*======================================*/ - ibool restore_corrupt_pages) -{ - byte* buf; - byte* read_buf; - byte* unaligned_read_buf; - ulint block1; - ulint block2; - ulint source_page_no; - byte* page; - byte* doublewrite; - ulint space_id; - ulint page_no; - ulint i; - - /* We do the file i/o past the buffer pool */ - - unaligned_read_buf = ut_malloc(2 * UNIV_PAGE_SIZE); - read_buf = ut_align(unaligned_read_buf, UNIV_PAGE_SIZE); - - /* Read the trx sys header to check if we are using the doublewrite - buffer */ - - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, TRX_SYS_PAGE_NO, 0, - UNIV_PAGE_SIZE, read_buf, NULL); - doublewrite = read_buf + TRX_SYS_DOUBLEWRITE; - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC) - == TRX_SYS_DOUBLEWRITE_MAGIC_N) { - /* The doublewrite buffer has been created */ - - trx_doublewrite_init(doublewrite); - - block1 = trx_doublewrite->block1; - block2 = trx_doublewrite->block2; - - buf = trx_doublewrite->write_buf; - } else { - goto leave_func; - } - - if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED) - != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) { - - /* We are upgrading from a version < 4.1.x to a version where - multiple tablespaces are supported. We must reset the space id - field in the pages in the doublewrite buffer because starting - from this version the space id is stored to - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */ - - trx_doublewrite_must_reset_space_ids = TRUE; - - fprintf(stderr, - "InnoDB: Resetting space id's in the" - " doublewrite buffer\n"); - } else { - trx_sys_multiple_tablespace_format = TRUE; - } - - /* Read the pages from the doublewrite buffer to memory */ - - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block1, 0, - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, - buf, NULL); - fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, block2, 0, - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, - buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE, - NULL); - /* Check if any of these pages is half-written in data files, in the - intended position */ - - page = buf; - - for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) { - - page_no = mach_read_from_4(page + FIL_PAGE_OFFSET); - - if (trx_doublewrite_must_reset_space_ids) { - - space_id = 0; - mach_write_to_4(page - + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0); - /* We do not need to calculate new checksums for the - pages because the field .._SPACE_ID does not affect - them. Write the page back to where we read it from. */ - - if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) { - source_page_no = block1 + i; - } else { - source_page_no = block2 - + i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE; - } - - fil_io(OS_FILE_WRITE, TRUE, 0, source_page_no, 0, - UNIV_PAGE_SIZE, page, NULL); - /* printf("Resetting space id in page %lu\n", - source_page_no); */ - } else { - space_id = mach_read_from_4( - page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - } - - if (!restore_corrupt_pages) { - /* The database was shut down gracefully: no need to - restore pages */ - - } else if (!fil_tablespace_exists_in_mem(space_id)) { - /* Maybe we have dropped the single-table tablespace - and this page once belonged to it: do nothing */ - - } else if (!fil_check_adress_in_tablespace(space_id, - page_no)) { - fprintf(stderr, - "InnoDB: Warning: a page in the" - " doublewrite buffer is not within space\n" - "InnoDB: bounds; space id %lu" - " page number %lu, page %lu in" - " doublewrite buf.\n", - (ulong) space_id, (ulong) page_no, (ulong) i); - - } else if (space_id == TRX_SYS_SPACE - && ((page_no >= block1 - && page_no - < block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) - || (page_no >= block2 - && page_no - < (block2 - + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) { - - /* It is an unwritten doublewrite buffer page: - do nothing */ - } else { - /* Read in the actual page from the data files */ - - fil_io(OS_FILE_READ, TRUE, space_id, page_no, 0, - UNIV_PAGE_SIZE, read_buf, NULL); - /* Check if the page is corrupt */ - - if (buf_page_is_corrupted(read_buf)) { - - fprintf(stderr, - "InnoDB: Warning: database page" - " corruption or a failed\n" - "InnoDB: file read of page %lu.\n", - (ulong) page_no); - fprintf(stderr, - "InnoDB: Trying to recover it from" - " the doublewrite buffer.\n"); - - if (buf_page_is_corrupted(page)) { - fprintf(stderr, - "InnoDB: Dump of the page:\n"); - buf_page_print(read_buf); - fprintf(stderr, - "InnoDB: Dump of" - " corresponding page" - " in doublewrite buffer:\n"); - buf_page_print(page); - - fprintf(stderr, - "InnoDB: Also the page in the" - " doublewrite buffer" - " is corrupt.\n" - "InnoDB: Cannot continue" - " operation.\n" - "InnoDB: You can try to" - " recover the database" - " with the my.cnf\n" - "InnoDB: option:\n" - "InnoDB: set-variable=" - "innodb_force_recovery=6\n"); - exit(1); - } - - /* Write the good page from the - doublewrite buffer to the intended - position */ - - fil_io(OS_FILE_WRITE, TRUE, space_id, - page_no, 0, - UNIV_PAGE_SIZE, page, NULL); - fprintf(stderr, - "InnoDB: Recovered the page from" - " the doublewrite buffer.\n"); - } - } - - page += UNIV_PAGE_SIZE; - } - - fil_flush_file_spaces(FIL_TABLESPACE); - -leave_func: - ut_free(unaligned_read_buf); -} - -/******************************************************************** -Checks that trx is in the trx list. */ - -ibool -trx_in_trx_list( -/*============*/ - /* out: TRUE if is in */ - trx_t* in_trx) /* in: trx */ -{ - trx_t* trx; - - ut_ad(mutex_own(&(kernel_mutex))); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx != NULL) { - - if (trx == in_trx) { - - return(TRUE); - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - return(FALSE); -} - -/********************************************************************* -Writes the value of max_trx_id to the file based trx system header. */ - -void -trx_sys_flush_max_trx_id(void) -/*==========================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - - ut_ad(mutex_own(&kernel_mutex)); - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, - trx_sys->max_trx_id, &mtr); - mtr_commit(&mtr); -} - -/********************************************************************* -Updates the offset information about the end of the MySQL binlog entry -which corresponds to the transaction just being committed. In a MySQL -replication slave updates the latest master binlog position up to which -replication has proceeded. */ - -void -trx_sys_update_mysql_binlog_offset( -/*===============================*/ - const char* file_name,/* in: MySQL log file name */ - ib_longlong offset, /* in: position in that log file */ - ulint field, /* in: offset of the MySQL log info field in - the trx sys header */ - mtr_t* mtr) /* in: mtr */ -{ - trx_sysf_t* sys_header; - - if (ut_strlen(file_name) >= TRX_SYS_MYSQL_LOG_NAME_LEN) { - - /* We cannot fit the name to the 512 bytes we have reserved */ - - return; - } - - sys_header = trx_sysf_get(mtr); - - if (mach_read_from_4(sys_header + field - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD, - TRX_SYS_MYSQL_LOG_MAGIC_N, - MLOG_4BYTES, mtr); - } - - if (0 != strcmp((char*) (sys_header + field + TRX_SYS_MYSQL_LOG_NAME), - file_name)) { - - mlog_write_string(sys_header + field - + TRX_SYS_MYSQL_LOG_NAME, - (byte*) file_name, 1 + ut_strlen(file_name), - mtr); - } - - if (mach_read_from_4(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH) > 0 - || (offset >> 32) > 0) { - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH, - (ulint)(offset >> 32), - MLOG_4BYTES, mtr); - } - - mlog_write_ulint(sys_header + field - + TRX_SYS_MYSQL_LOG_OFFSET_LOW, - (ulint)(offset & 0xFFFFFFFFUL), - MLOG_4BYTES, mtr); -} - -#ifdef UNIV_HOTBACKUP -/********************************************************************* -Prints to stderr the MySQL binlog info in the system header if the -magic number shows it valid. */ - -void -trx_sys_print_mysql_binlog_offset_from_page( -/*========================================*/ - byte* page) /* in: buffer containing the trx system header page, - i.e., page number TRX_SYS_PAGE_NO in the tablespace */ -{ - trx_sysf_t* sys_header; - - sys_header = page + TRX_SYS; - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - == TRX_SYS_MYSQL_LOG_MAGIC_N) { - - fprintf(stderr, - "ibbackup: Last MySQL binlog file position %lu %lu," - " file name %s\n", - (ulong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH), - (ulong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW), - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME); - } -} -#endif /* UNIV_HOTBACKUP */ - -/********************************************************************* -Stores the MySQL binlog offset info in the trx system header if -the magic number shows it valid, and print the info to stderr */ - -void -trx_sys_print_mysql_binlog_offset(void) -/*===================================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - ulint trx_sys_mysql_bin_log_pos_high; - ulint trx_sys_mysql_bin_log_pos_low; - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mtr_commit(&mtr); - - return; - } - - trx_sys_mysql_bin_log_pos_high = mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH); - trx_sys_mysql_bin_log_pos_low = mach_read_from_4( - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW); - - trx_sys_mysql_bin_log_pos - = (((ib_longlong)trx_sys_mysql_bin_log_pos_high) << 32) - + (ib_longlong)trx_sys_mysql_bin_log_pos_low; - - ut_memcpy(trx_sys_mysql_bin_log_name, - sys_header + TRX_SYS_MYSQL_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME, TRX_SYS_MYSQL_LOG_NAME_LEN); - - fprintf(stderr, - "InnoDB: Last MySQL binlog file position %lu %lu," - " file name %s\n", - trx_sys_mysql_bin_log_pos_high, trx_sys_mysql_bin_log_pos_low, - trx_sys_mysql_bin_log_name); - - mtr_commit(&mtr); -} - -/********************************************************************* -Prints to stderr the MySQL master log offset info in the trx system header if -the magic number shows it valid. */ - -void -trx_sys_print_mysql_master_log_pos(void) -/*====================================*/ -{ - trx_sysf_t* sys_header; - mtr_t mtr; - - mtr_start(&mtr); - - sys_header = trx_sysf_get(&mtr); - - if (mach_read_from_4(sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_MAGIC_N_FLD) - != TRX_SYS_MYSQL_LOG_MAGIC_N) { - - mtr_commit(&mtr); - - return; - } - - fprintf(stderr, - "InnoDB: In a MySQL replication slave the last" - " master binlog file\n" - "InnoDB: position %lu %lu, file name %s\n", - (ulong) mach_read_from_4(sys_header - + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH), - (ulong) mach_read_from_4(sys_header - + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW), - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME); - /* Copy the master log position info to global variables we can - use in ha_innobase.cc to initialize glob_mi to right values */ - - ut_memcpy(trx_sys_mysql_master_log_name, - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_NAME, - TRX_SYS_MYSQL_LOG_NAME_LEN); - - trx_sys_mysql_master_log_pos - = (((ib_longlong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_HIGH)) << 32) - + ((ib_longlong) mach_read_from_4( - sys_header + TRX_SYS_MYSQL_MASTER_LOG_INFO - + TRX_SYS_MYSQL_LOG_OFFSET_LOW)); - mtr_commit(&mtr); -} - -/******************************************************************** -Looks for a free slot for a rollback segment in the trx system file copy. */ - -ulint -trx_sysf_rseg_find_free( -/*====================*/ - /* out: slot index or ULINT_UNDEFINED if not found */ - mtr_t* mtr) /* in: mtr */ -{ - trx_sysf_t* sys_header; - ulint page_no; - ulint i; - - ut_ad(mutex_own(&(kernel_mutex))); - - sys_header = trx_sysf_get(mtr); - - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - - page_no = trx_sysf_rseg_get_page_no(sys_header, i, mtr); - - if (page_no == FIL_NULL) { - - return(i); - } - } - - return(ULINT_UNDEFINED); -} - -/********************************************************************* -Creates the file page for the transaction system. This function is called only -at the database creation, before trx_sys_init. */ -static -void -trx_sysf_create( -/*============*/ - mtr_t* mtr) /* in: mtr */ -{ - trx_sysf_t* sys_header; - ulint slot_no; - page_t* page; - ulint page_no; - ulint i; - - ut_ad(mtr); - - /* Note that below we first reserve the file space x-latch, and - then enter the kernel: we must do it in this order to conform - to the latching order rules. */ - - mtr_x_lock(fil_space_get_latch(TRX_SYS_SPACE), mtr); - mutex_enter(&kernel_mutex); - - /* Create the trx sys file block in a new allocated file segment */ - page = fseg_create(TRX_SYS_SPACE, 0, TRX_SYS + TRX_SYS_FSEG_HEADER, - mtr); - ut_a(buf_frame_get_page_no(page) == TRX_SYS_PAGE_NO); - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(page, SYNC_TRX_SYS_HEADER); -#endif /* UNIV_SYNC_DEBUG */ - - mlog_write_ulint(page + FIL_PAGE_TYPE, FIL_PAGE_TYPE_TRX_SYS, - MLOG_2BYTES, mtr); - - /* Reset the doublewrite buffer magic number to zero so that we - know that the doublewrite buffer has not yet been created (this - suppresses a Valgrind warning) */ - - mlog_write_ulint(page + TRX_SYS_DOUBLEWRITE - + TRX_SYS_DOUBLEWRITE_MAGIC, 0, MLOG_4BYTES, mtr); - - sys_header = trx_sysf_get(mtr); - - /* Start counting transaction ids from number 1 up */ - mlog_write_dulint(sys_header + TRX_SYS_TRX_ID_STORE, - ut_dulint_create(0, 1), mtr); - - /* Reset the rollback segment slots */ - for (i = 0; i < TRX_SYS_N_RSEGS; i++) { - - trx_sysf_rseg_set_space(sys_header, i, ULINT_UNDEFINED, mtr); - trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr); - } - - /* The remaining area (up to the page trailer) is uninitialized. - Silence Valgrind warnings about it. */ - UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS - + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE), - (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END - - (TRX_SYS_RSEGS - + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE - + TRX_SYS_RSEG_SPACE)) - + page - sys_header); - - /* Create the first rollback segment in the SYSTEM tablespace */ - page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no, - mtr); - ut_a(slot_no == TRX_SYS_SYSTEM_RSEG_ID); - ut_a(page_no != FIL_NULL); - - mutex_exit(&kernel_mutex); -} - -/********************************************************************* -Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. */ - -void -trx_sys_init_at_db_start(void) -/*==========================*/ -{ - trx_sysf_t* sys_header; - ib_longlong rows_to_undo = 0; - const char* unit = ""; - trx_t* trx; - mtr_t mtr; - - mtr_start(&mtr); - - ut_ad(trx_sys == NULL); - - mutex_enter(&kernel_mutex); - - trx_sys = mem_alloc(sizeof(trx_sys_t)); - - sys_header = trx_sysf_get(&mtr); - - trx_rseg_list_and_array_init(sys_header, &mtr); - - trx_sys->latest_rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - - /* VERY important: after the database is started, max_trx_id value is - divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in - trx_sys_get_new_trx_id will evaluate to TRUE when the function - is first time called, and the value for trx id will be written - to the disk-based header! Thus trx id values will not overlap when - the database is repeatedly started! */ - - trx_sys->max_trx_id = ut_dulint_add( - ut_dulint_align_up(mtr_read_dulint( - sys_header - + TRX_SYS_TRX_ID_STORE, &mtr), - TRX_SYS_TRX_ID_WRITE_MARGIN), - 2 * TRX_SYS_TRX_ID_WRITE_MARGIN); - - UT_LIST_INIT(trx_sys->mysql_trx_list); - trx_lists_init_at_db_start(); - - if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - for (;;) { - - if ( trx->conc_state != TRX_PREPARED) { - rows_to_undo += ut_conv_dulint_to_longlong( - trx->undo_no); - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - - if (!trx) { - break; - } - } - - if (rows_to_undo > 1000000000) { - unit = "M"; - rows_to_undo = rows_to_undo / 1000000; - } - - fprintf(stderr, - "InnoDB: %lu transaction(s) which must be" - " rolled back or cleaned up\n" - "InnoDB: in total %lu%s row operations to undo\n", - (ulong) UT_LIST_GET_LEN(trx_sys->trx_list), - (ulong) rows_to_undo, unit); - - fprintf(stderr, "InnoDB: Trx id counter is %lu %lu\n", - (ulong) ut_dulint_get_high(trx_sys->max_trx_id), - (ulong) ut_dulint_get_low(trx_sys->max_trx_id)); - } - - UT_LIST_INIT(trx_sys->view_list); - - trx_purge_sys_create(); - - mutex_exit(&kernel_mutex); - - mtr_commit(&mtr); -} - -/********************************************************************* -Creates and initializes the transaction system at the database creation. */ - -void -trx_sys_create(void) -/*================*/ -{ - mtr_t mtr; - - mtr_start(&mtr); - - trx_sysf_create(&mtr); - - mtr_commit(&mtr); - - trx_sys_init_at_db_start(); -} diff --git a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c deleted file mode 100644 index 8ada38845c5..00000000000 --- a/storage/innobase/trx/trx0trx.c +++ /dev/null @@ -1,2086 +0,0 @@ -/****************************************************** -The transaction - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0trx.h" - -#ifdef UNIV_NONINL -#include "trx0trx.ic" -#endif - -#include "trx0undo.h" -#include "trx0rseg.h" -#include "log0log.h" -#include "que0que.h" -#include "lock0lock.h" -#include "trx0roll.h" -#include "usr0sess.h" -#include "read0read.h" -#include "srv0srv.h" -#include "thr0loc.h" -#include "btr0sea.h" -#include "os0proc.h" -#include "trx0xa.h" -#include "ha_prototypes.h" - -/* Copy of the prototype for innobase_mysql_print_thd: this -copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */ - -void innobase_mysql_print_thd( - FILE* f, - void* thd, - ulint max_query_len); - -/* Dummy session used currently in MySQL interface */ -sess_t* trx_dummy_sess = NULL; - -/* Number of transactions currently allocated for MySQL: protected by -the kernel mutex */ -ulint trx_n_mysql_transactions = 0; - -/***************************************************************** -Starts the transaction if it is not yet started. */ - -void -trx_start_if_not_started_noninline( -/*===============================*/ - trx_t* trx) /* in: transaction */ -{ - trx_start_if_not_started(trx); -} - -/***************************************************************** -Set detailed error message for the transaction. */ - -void -trx_set_detailed_error( -/*===================*/ - trx_t* trx, /* in: transaction struct */ - const char* msg) /* in: detailed error message */ -{ - ut_strlcpy(trx->detailed_error, msg, sizeof(trx->detailed_error)); -} - -/***************************************************************** -Set detailed error message for the transaction from a file. Note that the -file is rewinded before reading from it. */ - -void -trx_set_detailed_error_from_file( -/*=============================*/ - trx_t* trx, /* in: transaction struct */ - FILE* file) /* in: file to read message from */ -{ - os_file_read_string(file, trx->detailed_error, - sizeof(trx->detailed_error)); -} - -/******************************************************************** -Retrieves the error_info field from a trx. */ - -void* -trx_get_error_info( -/*===============*/ - /* out: the error info */ - trx_t* trx) /* in: trx object */ -{ - return(trx->error_info); -} - -/******************************************************************** -Creates and initializes a transaction object. */ - -trx_t* -trx_create( -/*=======*/ - /* out, own: the transaction */ - sess_t* sess) /* in: session or NULL */ -{ - trx_t* trx; - - ut_ad(mutex_own(&kernel_mutex)); - - trx = mem_alloc(sizeof(trx_t)); - - trx->magic_n = TRX_MAGIC_N; - - trx->op_info = ""; - - trx->is_purge = 0; - trx->conc_state = TRX_NOT_STARTED; - trx->start_time = time(NULL); - - trx->isolation_level = TRX_ISO_REPEATABLE_READ; - - trx->id = ut_dulint_zero; - trx->no = ut_dulint_max; - - trx->support_xa = TRUE; - - trx->check_foreigns = TRUE; - trx->check_unique_secondary = TRUE; - - trx->flush_log_later = FALSE; - trx->must_flush_log_later = FALSE; - - trx->dict_operation = FALSE; - - trx->mysql_thd = NULL; - trx->mysql_query_str = NULL; - trx->active_trans = 0; - trx->duplicates = 0; - - trx->n_mysql_tables_in_use = 0; - trx->mysql_n_tables_locked = 0; - - trx->mysql_log_file_name = NULL; - trx->mysql_log_offset = 0; - - mutex_create(&trx->undo_mutex, SYNC_TRX_UNDO); - - trx->rseg = NULL; - - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->insert_undo = NULL; - trx->update_undo = NULL; - trx->undo_no_arr = NULL; - - trx->error_state = DB_SUCCESS; - trx->detailed_error[0] = '\0'; - - trx->sess = sess; - trx->que_state = TRX_QUE_RUNNING; - trx->n_active_thrs = 0; - - trx->handling_signals = FALSE; - - UT_LIST_INIT(trx->signals); - UT_LIST_INIT(trx->reply_signals); - - trx->graph = NULL; - - trx->wait_lock = NULL; - trx->was_chosen_as_deadlock_victim = FALSE; - UT_LIST_INIT(trx->wait_thrs); - - trx->lock_heap = mem_heap_create_in_buffer(256); - UT_LIST_INIT(trx->trx_locks); - - UT_LIST_INIT(trx->trx_savepoints); - - trx->dict_operation_lock_mode = 0; - trx->has_search_latch = FALSE; - trx->search_latch_timeout = BTR_SEA_TIMEOUT; - - trx->declared_to_be_inside_innodb = FALSE; - trx->n_tickets_to_enter_innodb = 0; - - trx->auto_inc_lock = NULL; - - trx->global_read_view_heap = mem_heap_create(256); - trx->global_read_view = NULL; - trx->read_view = NULL; - - /* Set X/Open XA transaction identification to NULL */ - memset(&trx->xid, 0, sizeof(trx->xid)); - trx->xid.formatID = -1; - - trx->n_autoinc_rows = 0; - - return(trx); -} - -/************************************************************************ -Creates a transaction object for MySQL. */ - -trx_t* -trx_allocate_for_mysql(void) -/*========================*/ - /* out, own: transaction object */ -{ - trx_t* trx; - - mutex_enter(&kernel_mutex); - - /* Open a dummy session */ - - if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); - } - - trx = trx_create(trx_dummy_sess); - - trx_n_mysql_transactions++; - - UT_LIST_ADD_FIRST(mysql_trx_list, trx_sys->mysql_trx_list, trx); - - mutex_exit(&kernel_mutex); - - trx->mysql_thread_id = os_thread_get_curr_id(); - - trx->mysql_process_no = os_proc_get_number(); - - return(trx); -} - -/************************************************************************ -Creates a transaction object for background operations by the master thread. */ - -trx_t* -trx_allocate_for_background(void) -/*=============================*/ - /* out, own: transaction object */ -{ - trx_t* trx; - - mutex_enter(&kernel_mutex); - - /* Open a dummy session */ - - if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); - } - - trx = trx_create(trx_dummy_sess); - - mutex_exit(&kernel_mutex); - - return(trx); -} - -/************************************************************************ -Releases the search latch if trx has reserved it. */ - -void -trx_search_latch_release_if_reserved( -/*=================================*/ - trx_t* trx) /* in: transaction */ -{ - if (trx->has_search_latch) { - rw_lock_s_unlock(&btr_search_latch); - - trx->has_search_latch = FALSE; - } -} - -/************************************************************************ -Frees a transaction object. */ - -void -trx_free( -/*=====*/ - trx_t* trx) /* in, own: trx object */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - if (trx->declared_to_be_inside_innodb) { - ut_print_timestamp(stderr); - fputs(" InnoDB: Error: Freeing a trx which is declared" - " to be processing\n" - "InnoDB: inside InnoDB.\n", stderr); - trx_print(stderr, trx, 600); - putc('\n', stderr); - - /* This is an error but not a fatal error. We must keep - the counters like srv_conc_n_threads accurate. */ - srv_conc_force_exit_innodb(trx); - } - - if (trx->n_mysql_tables_in_use != 0 - || trx->mysql_n_tables_locked != 0) { - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: MySQL is freeing a thd\n" - "InnoDB: though trx->n_mysql_tables_in_use is %lu\n" - "InnoDB: and trx->mysql_n_tables_locked is %lu.\n", - (ulong)trx->n_mysql_tables_in_use, - (ulong)trx->mysql_n_tables_locked); - - trx_print(stderr, trx, 600); - - ut_print_buf(stderr, trx, sizeof(trx_t)); - } - - ut_a(trx->magic_n == TRX_MAGIC_N); - - trx->magic_n = 11112222; - - ut_a(trx->conc_state == TRX_NOT_STARTED); - - mutex_free(&(trx->undo_mutex)); - - ut_a(trx->insert_undo == NULL); - ut_a(trx->update_undo == NULL); - - if (trx->undo_no_arr) { - trx_undo_arr_free(trx->undo_no_arr); - } - - ut_a(UT_LIST_GET_LEN(trx->signals) == 0); - ut_a(UT_LIST_GET_LEN(trx->reply_signals) == 0); - - ut_a(trx->wait_lock == NULL); - ut_a(UT_LIST_GET_LEN(trx->wait_thrs) == 0); - - ut_a(!trx->has_search_latch); - ut_a(!trx->auto_inc_lock); - - ut_a(trx->dict_operation_lock_mode == 0); - - if (trx->lock_heap) { - mem_heap_free(trx->lock_heap); - } - - ut_a(UT_LIST_GET_LEN(trx->trx_locks) == 0); - - if (trx->global_read_view_heap) { - mem_heap_free(trx->global_read_view_heap); - } - - trx->global_read_view = NULL; - - ut_a(trx->read_view == NULL); - - mem_free(trx); -} - -/************************************************************************ -Frees a transaction object for MySQL. */ - -void -trx_free_for_mysql( -/*===============*/ - trx_t* trx) /* in, own: trx object */ -{ - mutex_enter(&kernel_mutex); - - UT_LIST_REMOVE(mysql_trx_list, trx_sys->mysql_trx_list, trx); - - trx_free(trx); - - ut_a(trx_n_mysql_transactions > 0); - - trx_n_mysql_transactions--; - - mutex_exit(&kernel_mutex); -} - -/************************************************************************ -Frees a transaction object of a background operation of the master thread. */ - -void -trx_free_for_background( -/*====================*/ - trx_t* trx) /* in, own: trx object */ -{ - mutex_enter(&kernel_mutex); - - trx_free(trx); - - mutex_exit(&kernel_mutex); -} - -/******************************************************************** -Inserts the trx handle in the trx system trx list in the right position. -The list is sorted on the trx id so that the biggest id is at the list -start. This function is used at the database startup to insert incomplete -transactions to the list. */ -static -void -trx_list_insert_ordered( -/*====================*/ - trx_t* trx) /* in: trx handle */ -{ - trx_t* trx2; - - ut_ad(mutex_own(&kernel_mutex)); - - trx2 = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx2 != NULL) { - if (ut_dulint_cmp(trx->id, trx2->id) >= 0) { - - ut_ad(ut_dulint_cmp(trx->id, trx2->id) == 1); - break; - } - trx2 = UT_LIST_GET_NEXT(trx_list, trx2); - } - - if (trx2 != NULL) { - trx2 = UT_LIST_GET_PREV(trx_list, trx2); - - if (trx2 == NULL) { - UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx); - } else { - UT_LIST_INSERT_AFTER(trx_list, trx_sys->trx_list, - trx2, trx); - } - } else { - UT_LIST_ADD_LAST(trx_list, trx_sys->trx_list, trx); - } -} - -/******************************************************************** -Creates trx objects for transactions and initializes the trx list of -trx_sys at database start. Rollback segment and undo log lists must -already exist when this function is called, because the lists of -transactions to be rolled back or cleaned up are built based on the -undo log lists. */ - -void -trx_lists_init_at_db_start(void) -/*============================*/ -{ - trx_rseg_t* rseg; - trx_undo_t* undo; - trx_t* trx; - - UT_LIST_INIT(trx_sys->trx_list); - - /* Look from the rollback segments if there exist undo logs for - transactions */ - - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - - while (rseg != NULL) { - undo = UT_LIST_GET_FIRST(rseg->insert_undo_list); - - while (undo != NULL) { - - trx = trx_create(NULL); - - trx->id = undo->trx_id; - trx->xid = undo->xid; - trx->insert_undo = undo; - trx->rseg = rseg; - - if (undo->state != TRX_UNDO_ACTIVE) { - - /* Prepared transactions are left in - the prepared state waiting for a - commit or abort decision from MySQL */ - - if (undo->state == TRX_UNDO_PREPARED) { - - fprintf(stderr, - "InnoDB: Transaction %lu %lu" - " was in the" - " XA prepared state.\n", - ut_dulint_get_high(trx->id), - ut_dulint_get_low(trx->id)); - - if (srv_force_recovery == 0) { - - trx->conc_state = TRX_PREPARED; - } else { - fprintf(stderr, - "InnoDB: Since" - " innodb_force_recovery" - " > 0, we will" - " rollback it" - " anyway.\n"); - - trx->conc_state = TRX_ACTIVE; - } - } else { - trx->conc_state - = TRX_COMMITTED_IN_MEMORY; - } - - /* We give a dummy value for the trx no; - this should have no relevance since purge - is not interested in committed transaction - numbers, unless they are in the history - list, in which case it looks the number - from the disk based undo log structure */ - - trx->no = trx->id; - } else { - trx->conc_state = TRX_ACTIVE; - - /* A running transaction always has the number - field inited to ut_dulint_max */ - - trx->no = ut_dulint_max; - } - - if (undo->dict_operation) { - trx->dict_operation = undo->dict_operation; - trx->table_id = undo->table_id; - } - - if (!undo->empty) { - trx->undo_no = ut_dulint_add(undo->top_undo_no, - 1); - } - - trx_list_insert_ordered(trx); - - undo = UT_LIST_GET_NEXT(undo_list, undo); - } - - undo = UT_LIST_GET_FIRST(rseg->update_undo_list); - - while (undo != NULL) { - trx = trx_get_on_id(undo->trx_id); - - if (NULL == trx) { - trx = trx_create(NULL); - - trx->id = undo->trx_id; - trx->xid = undo->xid; - - if (undo->state != TRX_UNDO_ACTIVE) { - - /* Prepared transactions are left in - the prepared state waiting for a - commit or abort decision from MySQL */ - - if (undo->state == TRX_UNDO_PREPARED) { - fprintf(stderr, - "InnoDB: Transaction" - " %lu %lu was in the" - " XA prepared state.\n", - ut_dulint_get_high( - trx->id), - ut_dulint_get_low( - trx->id)); - - if (srv_force_recovery == 0) { - - trx->conc_state - = TRX_PREPARED; - } else { - fprintf(stderr, - "InnoDB: Since" - " innodb_force_recovery" - " > 0, we will" - " rollback it" - " anyway.\n"); - - trx->conc_state - = TRX_ACTIVE; - } - } else { - trx->conc_state - = TRX_COMMITTED_IN_MEMORY; - } - - /* We give a dummy value for the trx - number */ - - trx->no = trx->id; - } else { - trx->conc_state = TRX_ACTIVE; - - /* A running transaction always has - the number field inited to - ut_dulint_max */ - - trx->no = ut_dulint_max; - } - - trx->rseg = rseg; - trx_list_insert_ordered(trx); - - if (undo->dict_operation) { - trx->dict_operation - = undo->dict_operation; - trx->table_id = undo->table_id; - } - } - - trx->update_undo = undo; - - if ((!undo->empty) - && (ut_dulint_cmp(undo->top_undo_no, - trx->undo_no) >= 0)) { - - trx->undo_no = ut_dulint_add(undo->top_undo_no, - 1); - } - - undo = UT_LIST_GET_NEXT(undo_list, undo); - } - - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - } -} - -/********************************************************************** -Assigns a rollback segment to a transaction in a round-robin fashion. -Skips the SYSTEM rollback segment if another is available. */ -UNIV_INLINE -ulint -trx_assign_rseg(void) -/*=================*/ - /* out: assigned rollback segment id */ -{ - trx_rseg_t* rseg = trx_sys->latest_rseg; - - ut_ad(mutex_own(&kernel_mutex)); -loop: - /* Get next rseg in a round-robin fashion */ - - rseg = UT_LIST_GET_NEXT(rseg_list, rseg); - - if (rseg == NULL) { - rseg = UT_LIST_GET_FIRST(trx_sys->rseg_list); - } - - /* If it is the SYSTEM rollback segment, and there exist others, skip - it */ - - if ((rseg->id == TRX_SYS_SYSTEM_RSEG_ID) - && (UT_LIST_GET_LEN(trx_sys->rseg_list) > 1)) { - goto loop; - } - - trx_sys->latest_rseg = rseg; - - return(rseg->id); -} - -/******************************************************************** -Starts a new transaction. */ - -ibool -trx_start_low( -/*==========*/ - /* out: TRUE */ - trx_t* trx, /* in: transaction */ - ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -{ - trx_rseg_t* rseg; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(trx->rseg == NULL); - - if (trx->is_purge) { - trx->id = ut_dulint_zero; - trx->conc_state = TRX_ACTIVE; - trx->start_time = time(NULL); - - return(TRUE); - } - - ut_ad(trx->conc_state != TRX_ACTIVE); - - if (rseg_id == ULINT_UNDEFINED) { - - rseg_id = trx_assign_rseg(); - } - - rseg = trx_sys_get_nth_rseg(trx_sys, rseg_id); - - trx->id = trx_sys_get_new_trx_id(); - - /* The initial value for trx->no: ut_dulint_max is used in - read_view_open_now: */ - - trx->no = ut_dulint_max; - - trx->rseg = rseg; - - trx->conc_state = TRX_ACTIVE; - trx->start_time = time(NULL); - - UT_LIST_ADD_FIRST(trx_list, trx_sys->trx_list, trx); - - return(TRUE); -} - -/******************************************************************** -Starts a new transaction. */ - -ibool -trx_start( -/*======*/ - /* out: TRUE */ - trx_t* trx, /* in: transaction */ - ulint rseg_id)/* in: rollback segment id; if ULINT_UNDEFINED - is passed, the system chooses the rollback segment - automatically in a round-robin fashion */ -{ - ibool ret; - - mutex_enter(&kernel_mutex); - - ret = trx_start_low(trx, rseg_id); - - mutex_exit(&kernel_mutex); - - return(ret); -} - -/******************************************************************** -Commits a transaction. */ - -void -trx_commit_off_kernel( -/*==================*/ - trx_t* trx) /* in: transaction */ -{ - page_t* update_hdr_page; - dulint lsn; - trx_rseg_t* rseg; - trx_undo_t* undo; - ibool must_flush_log = FALSE; - mtr_t mtr; - - ut_ad(mutex_own(&kernel_mutex)); - - trx->must_flush_log_later = FALSE; - - rseg = trx->rseg; - - if (trx->insert_undo != NULL || trx->update_undo != NULL) { - - mutex_exit(&kernel_mutex); - - mtr_start(&mtr); - - must_flush_log = TRUE; - - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to some other state: these modifications to the file data - structure define the transaction as committed in the file - based world, at the serialization point of the log sequence - number lsn obtained below. */ - - mutex_enter(&(rseg->mutex)); - - if (trx->insert_undo != NULL) { - trx_undo_set_state_at_finish( - rseg, trx, trx->insert_undo, &mtr); - } - - undo = trx->update_undo; - - if (undo) { - mutex_enter(&kernel_mutex); - trx->no = trx_sys_get_new_trx_no(); - - mutex_exit(&kernel_mutex); - - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction commit for this transaction. */ - - update_hdr_page = trx_undo_set_state_at_finish( - rseg, trx, undo, &mtr); - - /* We have to do the cleanup for the update log while - holding the rseg mutex because update log headers - have to be put to the history list in the order of - the trx number. */ - - trx_undo_update_cleanup(trx, update_hdr_page, &mtr); - } - - mutex_exit(&(rseg->mutex)); - - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (trx->mysql_log_file_name - && trx->mysql_log_file_name[0] != '\0') { - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - trx->mysql_log_file_name = NULL; - } - - /* The following call commits the mini-transaction, making the - whole transaction committed in the file-based world, at this - log sequence number. The transaction becomes 'durable' when - we write the log to disk, but in the logical sense the commit - in the file-based data structures (undo logs etc.) happens - here. - - NOTE that transaction numbers, which are assigned only to - transactions with an update undo log, do not necessarily come - in exactly the same order as commit lsn's, if the transactions - have different rollback segments. To get exactly the same - order we should hold the kernel mutex up to this point, - adding to to the contention of the kernel mutex. However, if - a transaction T2 is able to see modifications made by - a transaction T1, T2 will always get a bigger transaction - number and a bigger commit lsn than T1. */ - - /*--------------*/ - mtr_commit(&mtr); - /*--------------*/ - lsn = mtr.end_lsn; - - mutex_enter(&kernel_mutex); - } - - ut_ad(trx->conc_state == TRX_ACTIVE - || trx->conc_state == TRX_PREPARED); - ut_ad(mutex_own(&kernel_mutex)); - - /* The following assignment makes the transaction committed in memory - and makes its changes to data visible to other transactions. - NOTE that there is a small discrepancy from the strict formal - visibility rules here: a human user of the database can see - modifications made by another transaction T even before the necessary - log segment has been flushed to the disk. If the database happens to - crash before the flush, the user has seen modifications from T which - will never be a committed transaction. However, any transaction T2 - which sees the modifications of the committing transaction T, and - which also itself makes modifications to the database, will get an lsn - larger than the committing transaction T. In the case where the log - flush fails, and T never gets committed, also T2 will never get - committed. */ - - /*--------------------------------------*/ - trx->conc_state = TRX_COMMITTED_IN_MEMORY; - /*--------------------------------------*/ - - lock_release_off_kernel(trx); - - if (trx->global_read_view) { - read_view_close(trx->global_read_view); - mem_heap_empty(trx->global_read_view_heap); - trx->global_read_view = NULL; - } - - trx->read_view = NULL; - - if (must_flush_log) { - - mutex_exit(&kernel_mutex); - - if (trx->insert_undo != NULL) { - - trx_undo_insert_cleanup(trx); - } - - /* NOTE that we could possibly make a group commit more - efficient here: call os_thread_yield here to allow also other - trxs to come to commit! */ - - /*-------------------------------------*/ - - /* Depending on the my.cnf options, we may now write the log - buffer to the log files, making the transaction durable if - the OS does not crash. We may also flush the log files to - disk, making the transaction durable also at an OS crash or a - power outage. - - The idea in InnoDB's group commit is that a group of - transactions gather behind a trx doing a physical disk write - to log files, and when that physical write has been completed, - one of those transactions does a write which commits the whole - group. Note that this group commit will only bring benefit if - there are > 2 users in the database. Then at least 2 users can - gather behind one doing the physical log write to disk. - - If we are calling trx_commit() under MySQL's binlog mutex, we - will delay possible log write and flush to a separate function - trx_commit_complete_for_mysql(), which is only called when the - thread has released the binlog mutex. This is to make the - group commit algorithm to work. Otherwise, the MySQL binlog - mutex would serialize all commits and prevent a group of - transactions from gathering. */ - - if (trx->flush_log_later) { - /* Do nothing yet */ - trx->must_flush_log_later = TRUE; - } else if (srv_flush_log_at_trx_commit == 0) { - /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { - if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, - FALSE); - } else { - /* Write the log to the log files AND flush - them to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); - } - } else if (srv_flush_log_at_trx_commit == 2) { - - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - } else { - ut_error; - } - - trx->commit_lsn = lsn; - - /*-------------------------------------*/ - - mutex_enter(&kernel_mutex); - } - - /* Free all savepoints */ - trx_roll_free_all_savepoints(trx); - - trx->conc_state = TRX_NOT_STARTED; - trx->rseg = NULL; - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - trx->mysql_query_str = NULL; - - ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); - ut_ad(UT_LIST_GET_LEN(trx->trx_locks) == 0); - - UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); -} - -/******************************************************************** -Cleans up a transaction at database startup. The cleanup is needed if -the transaction already got to the middle of a commit when the database -crashed, andf we cannot roll it back. */ - -void -trx_cleanup_at_db_startup( -/*======================*/ - trx_t* trx) /* in: transaction */ -{ - if (trx->insert_undo != NULL) { - - trx_undo_insert_cleanup(trx); - } - - trx->conc_state = TRX_NOT_STARTED; - trx->rseg = NULL; - trx->undo_no = ut_dulint_zero; - trx->last_sql_stat_start.least_undo_no = ut_dulint_zero; - - UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); -} - -/************************************************************************ -Assigns a read view for a consistent read query. All the consistent reads -within the same transaction will get the same read view, which is created -when this function is first called for a new started transaction. */ - -read_view_t* -trx_assign_read_view( -/*=================*/ - /* out: consistent read view */ - trx_t* trx) /* in: active transaction */ -{ - ut_ad(trx->conc_state == TRX_ACTIVE); - - if (trx->read_view) { - return(trx->read_view); - } - - mutex_enter(&kernel_mutex); - - if (!trx->read_view) { - trx->read_view = read_view_open_now( - trx->id, trx->global_read_view_heap); - trx->global_read_view = trx->read_view; - } - - mutex_exit(&kernel_mutex); - - return(trx->read_view); -} - -/******************************************************************** -Commits a transaction. NOTE that the kernel mutex is temporarily released. */ -static -void -trx_handle_commit_sig_off_kernel( -/*=============================*/ - trx_t* trx, /* in: transaction */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -{ - trx_sig_t* sig; - trx_sig_t* next_sig; - - ut_ad(mutex_own(&kernel_mutex)); - - trx->que_state = TRX_QUE_COMMITTING; - - trx_commit_off_kernel(trx); - - ut_ad(UT_LIST_GET_LEN(trx->wait_thrs) == 0); - - /* Remove all TRX_SIG_COMMIT signals from the signal queue and send - reply messages to them */ - - sig = UT_LIST_GET_FIRST(trx->signals); - - while (sig != NULL) { - next_sig = UT_LIST_GET_NEXT(signals, sig); - - if (sig->type == TRX_SIG_COMMIT) { - - trx_sig_reply(sig, next_thr); - trx_sig_remove(trx, sig); - } - - sig = next_sig; - } - - trx->que_state = TRX_QUE_RUNNING; -} - -/*************************************************************** -The transaction must be in the TRX_QUE_LOCK_WAIT state. Puts it to -the TRX_QUE_RUNNING state and releases query threads which were -waiting for a lock in the wait_thrs list. */ - -void -trx_end_lock_wait( -/*==============*/ - trx_t* trx) /* in: transaction */ -{ - que_thr_t* thr; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); - - thr = UT_LIST_GET_FIRST(trx->wait_thrs); - - while (thr != NULL) { - que_thr_end_wait_no_next_thr(thr); - - UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); - - thr = UT_LIST_GET_FIRST(trx->wait_thrs); - } - - trx->que_state = TRX_QUE_RUNNING; -} - -/*************************************************************** -Moves the query threads in the lock wait list to the SUSPENDED state and puts -the transaction to the TRX_QUE_RUNNING state. */ -static -void -trx_lock_wait_to_suspended( -/*=======================*/ - trx_t* trx) /* in: transaction in the TRX_QUE_LOCK_WAIT state */ -{ - que_thr_t* thr; - - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(trx->que_state == TRX_QUE_LOCK_WAIT); - - thr = UT_LIST_GET_FIRST(trx->wait_thrs); - - while (thr != NULL) { - thr->state = QUE_THR_SUSPENDED; - - UT_LIST_REMOVE(trx_thrs, trx->wait_thrs, thr); - - thr = UT_LIST_GET_FIRST(trx->wait_thrs); - } - - trx->que_state = TRX_QUE_RUNNING; -} - -/*************************************************************** -Moves the query threads in the sig reply wait list of trx to the SUSPENDED -state. */ -static -void -trx_sig_reply_wait_to_suspended( -/*============================*/ - trx_t* trx) /* in: transaction */ -{ - trx_sig_t* sig; - que_thr_t* thr; - - ut_ad(mutex_own(&kernel_mutex)); - - sig = UT_LIST_GET_FIRST(trx->reply_signals); - - while (sig != NULL) { - thr = sig->receiver; - - ut_ad(thr->state == QUE_THR_SIG_REPLY_WAIT); - - thr->state = QUE_THR_SUSPENDED; - - sig->receiver = NULL; - - UT_LIST_REMOVE(reply_signals, trx->reply_signals, sig); - - sig = UT_LIST_GET_FIRST(trx->reply_signals); - } -} - -/********************************************************************* -Checks the compatibility of a new signal with the other signals in the -queue. */ -static -ibool -trx_sig_is_compatible( -/*==================*/ - /* out: TRUE if the signal can be queued */ - trx_t* trx, /* in: trx handle */ - ulint type, /* in: signal type */ - ulint sender) /* in: TRX_SIG_SELF or TRX_SIG_OTHER_SESS */ -{ - trx_sig_t* sig; - - ut_ad(mutex_own(&kernel_mutex)); - - if (UT_LIST_GET_LEN(trx->signals) == 0) { - - return(TRUE); - } - - if (sender == TRX_SIG_SELF) { - if (type == TRX_SIG_ERROR_OCCURRED) { - - return(TRUE); - - } else if (type == TRX_SIG_BREAK_EXECUTION) { - - return(TRUE); - } else { - return(FALSE); - } - } - - ut_ad(sender == TRX_SIG_OTHER_SESS); - - sig = UT_LIST_GET_FIRST(trx->signals); - - if (type == TRX_SIG_COMMIT) { - while (sig != NULL) { - - if (sig->type == TRX_SIG_TOTAL_ROLLBACK) { - - return(FALSE); - } - - sig = UT_LIST_GET_NEXT(signals, sig); - } - - return(TRUE); - - } else if (type == TRX_SIG_TOTAL_ROLLBACK) { - while (sig != NULL) { - - if (sig->type == TRX_SIG_COMMIT) { - - return(FALSE); - } - - sig = UT_LIST_GET_NEXT(signals, sig); - } - - return(TRUE); - - } else if (type == TRX_SIG_BREAK_EXECUTION) { - - return(TRUE); - } else { - ut_error; - - return(FALSE); - } -} - -/******************************************************************** -Sends a signal to a trx object. */ - -void -trx_sig_send( -/*=========*/ - trx_t* trx, /* in: trx handle */ - ulint type, /* in: signal type */ - ulint sender, /* in: TRX_SIG_SELF or - TRX_SIG_OTHER_SESS */ - que_thr_t* receiver_thr, /* in: query thread which wants the - reply, or NULL; if type is - TRX_SIG_END_WAIT, this must be NULL */ - trx_savept_t* savept, /* in: possible rollback savepoint, or - NULL */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the parameter - is NULL, it is ignored */ -{ - trx_sig_t* sig; - trx_t* receiver_trx; - - ut_ad(trx); - ut_ad(mutex_own(&kernel_mutex)); - - if (!trx_sig_is_compatible(trx, type, sender)) { - /* The signal is not compatible with the other signals in - the queue: die */ - - ut_error; - } - - /* Queue the signal object */ - - if (UT_LIST_GET_LEN(trx->signals) == 0) { - - /* The signal list is empty: the 'sig' slot must be unused - (we improve performance a bit by avoiding mem_alloc) */ - sig = &(trx->sig); - } else { - /* It might be that the 'sig' slot is unused also in this - case, but we choose the easy way of using mem_alloc */ - - sig = mem_alloc(sizeof(trx_sig_t)); - } - - UT_LIST_ADD_LAST(signals, trx->signals, sig); - - sig->type = type; - sig->sender = sender; - sig->receiver = receiver_thr; - - if (savept) { - sig->savept = *savept; - } - - if (receiver_thr) { - receiver_trx = thr_get_trx(receiver_thr); - - UT_LIST_ADD_LAST(reply_signals, receiver_trx->reply_signals, - sig); - } - - if (trx->sess->state == SESS_ERROR) { - - trx_sig_reply_wait_to_suspended(trx); - } - - if ((sender != TRX_SIG_SELF) || (type == TRX_SIG_BREAK_EXECUTION)) { - ut_error; - } - - /* If there were no other signals ahead in the queue, try to start - handling of the signal */ - - if (UT_LIST_GET_FIRST(trx->signals) == sig) { - - trx_sig_start_handle(trx, next_thr); - } -} - -/******************************************************************** -Ends signal handling. If the session is in the error state, and -trx->graph_before_signal_handling != NULL, then returns control to the error -handling routine of the graph (currently just returns the control to the -graph root which then will send an error message to the client). */ - -void -trx_end_signal_handling( -/*====================*/ - trx_t* trx) /* in: trx */ -{ - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(trx->handling_signals == TRUE); - - trx->handling_signals = FALSE; - - trx->graph = trx->graph_before_signal_handling; - - if (trx->graph && (trx->sess->state == SESS_ERROR)) { - - que_fork_error_handle(trx, trx->graph); - } -} - -/******************************************************************** -Starts handling of a trx signal. */ - -void -trx_sig_start_handle( -/*=================*/ - trx_t* trx, /* in: trx handle */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread; if the parameter - is NULL, it is ignored */ -{ - trx_sig_t* sig; - ulint type; -loop: - /* We loop in this function body as long as there are queued signals - we can process immediately */ - - ut_ad(trx); - ut_ad(mutex_own(&kernel_mutex)); - - if (trx->handling_signals && (UT_LIST_GET_LEN(trx->signals) == 0)) { - - trx_end_signal_handling(trx); - - return; - } - - if (trx->conc_state == TRX_NOT_STARTED) { - - trx_start_low(trx, ULINT_UNDEFINED); - } - - /* If the trx is in a lock wait state, moves the waiting query threads - to the suspended state */ - - if (trx->que_state == TRX_QUE_LOCK_WAIT) { - - trx_lock_wait_to_suspended(trx); - } - - /* If the session is in the error state and this trx has threads - waiting for reply from signals, moves these threads to the suspended - state, canceling wait reservations; note that if the transaction has - sent a commit or rollback signal to itself, and its session is not in - the error state, then nothing is done here. */ - - if (trx->sess->state == SESS_ERROR) { - trx_sig_reply_wait_to_suspended(trx); - } - - /* If there are no running query threads, we can start processing of a - signal, otherwise we have to wait until all query threads of this - transaction are aware of the arrival of the signal. */ - - if (trx->n_active_thrs > 0) { - - return; - } - - if (trx->handling_signals == FALSE) { - trx->graph_before_signal_handling = trx->graph; - - trx->handling_signals = TRUE; - } - - sig = UT_LIST_GET_FIRST(trx->signals); - type = sig->type; - - if (type == TRX_SIG_COMMIT) { - - trx_handle_commit_sig_off_kernel(trx, next_thr); - - } else if ((type == TRX_SIG_TOTAL_ROLLBACK) - || (type == TRX_SIG_ROLLBACK_TO_SAVEPT)) { - - trx_rollback(trx, sig, next_thr); - - /* No further signals can be handled until the rollback - completes, therefore we return */ - - return; - - } else if (type == TRX_SIG_ERROR_OCCURRED) { - - trx_rollback(trx, sig, next_thr); - - /* No further signals can be handled until the rollback - completes, therefore we return */ - - return; - - } else if (type == TRX_SIG_BREAK_EXECUTION) { - - trx_sig_reply(sig, next_thr); - trx_sig_remove(trx, sig); - } else { - ut_error; - } - - goto loop; -} - -/******************************************************************** -Send the reply message when a signal in the queue of the trx has been -handled. */ - -void -trx_sig_reply( -/*==========*/ - trx_sig_t* sig, /* in: signal */ - que_thr_t** next_thr) /* in/out: next query thread to run; - if the value which is passed in is - a pointer to a NULL pointer, then the - calling function can start running - a new query thread */ -{ - trx_t* receiver_trx; - - ut_ad(sig); - ut_ad(mutex_own(&kernel_mutex)); - - if (sig->receiver != NULL) { - ut_ad((sig->receiver)->state == QUE_THR_SIG_REPLY_WAIT); - - receiver_trx = thr_get_trx(sig->receiver); - - UT_LIST_REMOVE(reply_signals, receiver_trx->reply_signals, - sig); - ut_ad(receiver_trx->sess->state != SESS_ERROR); - - que_thr_end_wait(sig->receiver, next_thr); - - sig->receiver = NULL; - - } -} - -/******************************************************************** -Removes a signal object from the trx signal queue. */ - -void -trx_sig_remove( -/*===========*/ - trx_t* trx, /* in: trx handle */ - trx_sig_t* sig) /* in, own: signal */ -{ - ut_ad(trx && sig); - ut_ad(mutex_own(&kernel_mutex)); - - ut_ad(sig->receiver == NULL); - - UT_LIST_REMOVE(signals, trx->signals, sig); - sig->type = 0; /* reset the field to catch possible bugs */ - - if (sig != &(trx->sig)) { - mem_free(sig); - } -} - -/************************************************************************* -Creates a commit command node struct. */ - -commit_node_t* -commit_node_create( -/*===============*/ - /* out, own: commit node struct */ - mem_heap_t* heap) /* in: mem heap where created */ -{ - commit_node_t* node; - - node = mem_heap_alloc(heap, sizeof(commit_node_t)); - node->common.type = QUE_NODE_COMMIT; - node->state = COMMIT_NODE_SEND; - - return(node); -} - -/*************************************************************** -Performs an execution step for a commit type node in a query graph. */ - -que_thr_t* -trx_commit_step( -/*============*/ - /* out: query thread to run next, or NULL */ - que_thr_t* thr) /* in: query thread */ -{ - commit_node_t* node; - que_thr_t* next_thr; - - node = thr->run_node; - - ut_ad(que_node_get_type(node) == QUE_NODE_COMMIT); - - if (thr->prev_node == que_node_get_parent(node)) { - node->state = COMMIT_NODE_SEND; - } - - if (node->state == COMMIT_NODE_SEND) { - mutex_enter(&kernel_mutex); - - node->state = COMMIT_NODE_WAIT; - - next_thr = NULL; - - thr->state = QUE_THR_SIG_REPLY_WAIT; - - /* Send the commit signal to the transaction */ - - trx_sig_send(thr_get_trx(thr), TRX_SIG_COMMIT, TRX_SIG_SELF, - thr, NULL, &next_thr); - - mutex_exit(&kernel_mutex); - - return(next_thr); - } - - ut_ad(node->state == COMMIT_NODE_WAIT); - - node->state = COMMIT_NODE_SEND; - - thr->run_node = que_node_get_parent(node); - - return(thr); -} - -/************************************************************************** -Does the transaction commit for MySQL. */ - -ulint -trx_commit_for_mysql( -/*=================*/ - /* out: 0 or error number */ - trx_t* trx) /* in: trx handle */ -{ - /* Because we do not do the commit by sending an Innobase - sig to the transaction, we must here make sure that trx has been - started. */ - - ut_a(trx); - - trx->op_info = "committing"; - - /* If we are doing the XA recovery of prepared transactions, then - the transaction object does not have an InnoDB session object, and we - set the dummy session that we use for all MySQL transactions. */ - - if (trx->sess == NULL) { - /* Open a dummy session */ - - if (!trx_dummy_sess) { - mutex_enter(&kernel_mutex); - - if (!trx_dummy_sess) { - trx_dummy_sess = sess_open(); - } - - mutex_exit(&kernel_mutex); - } - - trx->sess = trx_dummy_sess; - } - - trx_start_if_not_started(trx); - - mutex_enter(&kernel_mutex); - - trx_commit_off_kernel(trx); - - mutex_exit(&kernel_mutex); - - trx->op_info = ""; - - return(0); -} - -/************************************************************************** -If required, flushes the log to disk if we called trx_commit_for_mysql() -with trx->flush_log_later == TRUE. */ - -ulint -trx_commit_complete_for_mysql( -/*==========================*/ - /* out: 0 or error number */ - trx_t* trx) /* in: trx handle */ -{ - dulint lsn = trx->commit_lsn; - - ut_a(trx); - - trx->op_info = "flushing log"; - - if (!trx->must_flush_log_later) { - /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 0) { - /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { - if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - } else { - /* Write the log to the log files AND flush them to - disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); - } - } else if (srv_flush_log_at_trx_commit == 2) { - - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - } else { - ut_error; - } - - trx->must_flush_log_later = FALSE; - - trx->op_info = ""; - - return(0); -} - -/************************************************************************** -Marks the latest SQL statement ended. */ - -void -trx_mark_sql_stat_end( -/*==================*/ - trx_t* trx) /* in: trx handle */ -{ - ut_a(trx); - - if (trx->conc_state == TRX_NOT_STARTED) { - trx->undo_no = ut_dulint_zero; - } - - trx->last_sql_stat_start.least_undo_no = trx->undo_no; -} - -/************************************************************************** -Prints info about a transaction to the given file. The caller must own the -kernel mutex and must have called -innobase_mysql_prepare_print_arbitrary_thd(), unless he knows that MySQL -or InnoDB cannot meanwhile change the info printed here. */ - -void -trx_print( -/*======*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction */ - ulint max_query_len) /* in: max query length to print, or 0 to - use the default max length */ -{ - ibool newline; - - fprintf(f, "TRANSACTION %lu %lu", - (ulong) ut_dulint_get_high(trx->id), - (ulong) ut_dulint_get_low(trx->id)); - - switch (trx->conc_state) { - case TRX_NOT_STARTED: - fputs(", not started", f); - break; - case TRX_ACTIVE: - fprintf(f, ", ACTIVE %lu sec", - (ulong)difftime(time(NULL), trx->start_time)); - break; - case TRX_PREPARED: - fprintf(f, ", ACTIVE (PREPARED) %lu sec", - (ulong)difftime(time(NULL), trx->start_time)); - break; - case TRX_COMMITTED_IN_MEMORY: - fputs(", COMMITTED IN MEMORY", f); - break; - default: - fprintf(f, " state %lu", (ulong) trx->conc_state); - } - -#ifdef UNIV_LINUX - fprintf(f, ", process no %lu", trx->mysql_process_no); -#endif - fprintf(f, ", OS thread id %lu", - (ulong) os_thread_pf(trx->mysql_thread_id)); - - if (*trx->op_info) { - putc(' ', f); - fputs(trx->op_info, f); - } - - if (trx->is_purge) { - fputs(" purge trx", f); - } - - if (trx->declared_to_be_inside_innodb) { - fprintf(f, ", thread declared inside InnoDB %lu", - (ulong) trx->n_tickets_to_enter_innodb); - } - - putc('\n', f); - - if (trx->n_mysql_tables_in_use > 0 || trx->mysql_n_tables_locked > 0) { - fprintf(f, "mysql tables in use %lu, locked %lu\n", - (ulong) trx->n_mysql_tables_in_use, - (ulong) trx->mysql_n_tables_locked); - } - - newline = TRUE; - - switch (trx->que_state) { - case TRX_QUE_RUNNING: - newline = FALSE; break; - case TRX_QUE_LOCK_WAIT: - fputs("LOCK WAIT ", f); break; - case TRX_QUE_ROLLING_BACK: - fputs("ROLLING BACK ", f); break; - case TRX_QUE_COMMITTING: - fputs("COMMITTING ", f); break; - default: - fprintf(f, "que state %lu ", (ulong) trx->que_state); - } - - if (0 < UT_LIST_GET_LEN(trx->trx_locks) - || mem_heap_get_size(trx->lock_heap) > 400) { - newline = TRUE; - - fprintf(f, "%lu lock struct(s), heap size %lu," - " %lu row lock(s)", - (ulong) UT_LIST_GET_LEN(trx->trx_locks), - (ulong) mem_heap_get_size(trx->lock_heap), - (ulong) lock_number_of_rows_locked(trx)); - } - - if (trx->has_search_latch) { - newline = TRUE; - fputs(", holds adaptive hash latch", f); - } - - if (ut_dulint_cmp(trx->undo_no, ut_dulint_zero) != 0) { - newline = TRUE; - fprintf(f, ", undo log entries %lu", - (ulong) ut_dulint_get_low(trx->undo_no)); - } - - if (newline) { - putc('\n', f); - } - - if (trx->mysql_thd != NULL) { - innobase_mysql_print_thd(f, trx->mysql_thd, max_query_len); - } -} - -/*********************************************************************** -Compares the "weight" (or size) of two transactions. The weight of one -transaction is estimated as the number of altered rows + the number of -locked rows. Transactions that have edited non-transactional tables are -considered heavier than ones that have not. */ - -int -trx_weight_cmp( -/*===========*/ - /* out: <0, 0 or >0; similar to strcmp(3) */ - trx_t* a, /* in: the first transaction to be compared */ - trx_t* b) /* in: the second transaction to be compared */ -{ - ibool a_notrans_edit; - ibool b_notrans_edit; - - /* If mysql_thd is NULL for a transaction we assume that it has - not edited non-transactional tables. */ - - a_notrans_edit = a->mysql_thd != NULL - && thd_has_edited_nontrans_tables(a->mysql_thd); - - b_notrans_edit = b->mysql_thd != NULL - && thd_has_edited_nontrans_tables(b->mysql_thd); - - if (a_notrans_edit && !b_notrans_edit) { - - return(1); - } - - if (!a_notrans_edit && b_notrans_edit) { - - return(-1); - } - - /* Either both had edited non-transactional tables or both had - not, we fall back to comparing the number of altered/locked - rows. */ - -#if 0 - fprintf(stderr, - "%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n", - __func__, - ut_conv_dulint_to_longlong(a->undo_no), - UT_LIST_GET_LEN(a->trx_locks), - ut_conv_dulint_to_longlong(b->undo_no), - UT_LIST_GET_LEN(b->trx_locks)); -#endif - -#define TRX_WEIGHT(t) \ - ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks)) - - return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b))); -} - -/******************************************************************** -Prepares a transaction. */ - -void -trx_prepare_off_kernel( -/*===================*/ - trx_t* trx) /* in: transaction */ -{ - page_t* update_hdr_page; - trx_rseg_t* rseg; - ibool must_flush_log = FALSE; - dulint lsn; - mtr_t mtr; - - ut_ad(mutex_own(&kernel_mutex)); - - rseg = trx->rseg; - - if (trx->insert_undo != NULL || trx->update_undo != NULL) { - - mutex_exit(&kernel_mutex); - - mtr_start(&mtr); - - must_flush_log = TRUE; - - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to TRX_UNDO_PREPARED: these modifications to the file data - structure define the transaction as prepared in the - file-based world, at the serialization point of lsn. */ - - mutex_enter(&(rseg->mutex)); - - if (trx->insert_undo != NULL) { - - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction prepare for this transaction. */ - - trx_undo_set_state_at_prepare(trx, trx->insert_undo, - &mtr); - } - - if (trx->update_undo) { - update_hdr_page = trx_undo_set_state_at_prepare( - trx, trx->update_undo, &mtr); - } - - mutex_exit(&(rseg->mutex)); - - /*--------------*/ - mtr_commit(&mtr); /* This mtr commit makes the - transaction prepared in the file-based - world */ - /*--------------*/ - lsn = mtr.end_lsn; - - mutex_enter(&kernel_mutex); - } - - ut_ad(mutex_own(&kernel_mutex)); - - /*--------------------------------------*/ - trx->conc_state = TRX_PREPARED; - /*--------------------------------------*/ - - if (must_flush_log) { - /* Depending on the my.cnf options, we may now write the log - buffer to the log files, making the prepared state of the - transaction durable if the OS does not crash. We may also - flush the log files to disk, making the prepared state of the - transaction durable also at an OS crash or a power outage. - - The idea in InnoDB's group prepare is that a group of - transactions gather behind a trx doing a physical disk write - to log files, and when that physical write has been completed, - one of those transactions does a write which prepares the whole - group. Note that this group prepare will only bring benefit if - there are > 2 users in the database. Then at least 2 users can - gather behind one doing the physical log write to disk. - - TODO: find out if MySQL holds some mutex when calling this. - That would spoil our group prepare algorithm. */ - - mutex_exit(&kernel_mutex); - - if (srv_flush_log_at_trx_commit == 0) { - /* Do nothing */ - } else if (srv_flush_log_at_trx_commit == 1) { - if (srv_unix_file_flush_method == SRV_UNIX_NOSYNC) { - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, - FALSE); - } else { - /* Write the log to the log files AND flush - them to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, TRUE); - } - } else if (srv_flush_log_at_trx_commit == 2) { - - /* Write the log but do not flush it to disk */ - - log_write_up_to(lsn, LOG_WAIT_ONE_GROUP, FALSE); - } else { - ut_error; - } - - mutex_enter(&kernel_mutex); - } -} - -/************************************************************************** -Does the transaction prepare for MySQL. */ - -ulint -trx_prepare_for_mysql( -/*==================*/ - /* out: 0 or error number */ - trx_t* trx) /* in: trx handle */ -{ - /* Because we do not do the prepare by sending an Innobase - sig to the transaction, we must here make sure that trx has been - started. */ - - ut_a(trx); - - trx->op_info = "preparing"; - - trx_start_if_not_started(trx); - - mutex_enter(&kernel_mutex); - - trx_prepare_off_kernel(trx); - - mutex_exit(&kernel_mutex); - - trx->op_info = ""; - - return(0); -} - -/************************************************************************** -This function is used to find number of prepared transactions and -their transaction objects for a recovery. */ - -int -trx_recover_for_mysql( -/*==================*/ - /* out: number of prepared transactions - stored in xid_list */ - XID* xid_list, /* in/out: prepared transactions */ - ulint len) /* in: number of slots in xid_list */ -{ - trx_t* trx; - ulint count = 0; - - ut_ad(xid_list); - ut_ad(len); - - /* We should set those transactions which are in the prepared state - to the xid_list */ - - mutex_enter(&kernel_mutex); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx) { - if (trx->conc_state == TRX_PREPARED) { - xid_list[count] = trx->xid; - - if (count == 0) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Starting recovery for" - " XA transactions...\n"); - } - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Transaction %lu %lu in" - " prepared state after recovery\n", - (ulong) ut_dulint_get_high(trx->id), - (ulong) ut_dulint_get_low(trx->id)); - - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Transaction contains changes" - " to %lu rows\n", - (ulong) ut_conv_dulint_to_longlong( - trx->undo_no)); - - count++; - - if (count == len) { - break; - } - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - mutex_exit(&kernel_mutex); - - if (count > 0){ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: %lu transactions in prepared state" - " after recovery\n", - (ulong) count); - } - - return ((int) count); -} - -/*********************************************************************** -This function is used to find one X/Open XA distributed transaction -which is in the prepared state */ - -trx_t* -trx_get_trx_by_xid( -/*===============*/ - /* out: trx or NULL */ - XID* xid) /* in: X/Open XA transaction identification */ -{ - trx_t* trx; - - if (xid == NULL) { - - return (NULL); - } - - mutex_enter(&kernel_mutex); - - trx = UT_LIST_GET_FIRST(trx_sys->trx_list); - - while (trx) { - /* Compare two X/Open XA transaction id's: their - length should be the same and binary comparison - of gtrid_lenght+bqual_length bytes should be - the same */ - - if (xid->gtrid_length == trx->xid.gtrid_length - && xid->bqual_length == trx->xid.bqual_length - && memcmp(xid->data, trx->xid.data, - xid->gtrid_length + xid->bqual_length) == 0) { - break; - } - - trx = UT_LIST_GET_NEXT(trx_list, trx); - } - - mutex_exit(&kernel_mutex); - - if (trx) { - if (trx->conc_state != TRX_PREPARED) { - - return(NULL); - } - - return(trx); - } else { - return(NULL); - } -} diff --git a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c deleted file mode 100644 index b31580d0ce0..00000000000 --- a/storage/innobase/trx/trx0undo.c +++ /dev/null @@ -1,1920 +0,0 @@ -/****************************************************** -Transaction undo log - -(c) 1996 Innobase Oy - -Created 3/26/1996 Heikki Tuuri -*******************************************************/ - -#include "trx0undo.h" - -#ifdef UNIV_NONINL -#include "trx0undo.ic" -#endif - -#include "fsp0fsp.h" -#include "mach0data.h" -#include "trx0rseg.h" -#include "trx0trx.h" -#include "srv0srv.h" -#include "trx0rec.h" -#include "trx0purge.h" -#include "trx0xa.h" - -/* How should the old versions in the history list be managed? - ---------------------------------------------------------- -If each transaction is given a whole page for its update undo log, file -space consumption can be 10 times higher than necessary. Therefore, -partly filled update undo log pages should be reusable. But then there -is no way individual pages can be ordered so that the ordering agrees -with the serialization numbers of the transactions on the pages. Thus, -the history list must be formed of undo logs, not their header pages as -it was in the old implementation. - However, on a single header page the transactions are placed in -the order of their serialization numbers. As old versions are purged, we -may free the page when the last transaction on the page has been purged. - A problem is that the purge has to go through the transactions -in the serialization order. This means that we have to look through all -rollback segments for the one that has the smallest transaction number -in its history list. - When should we do a purge? A purge is necessary when space is -running out in any of the rollback segments. Then we may have to purge -also old version which might be needed by some consistent read. How do -we trigger the start of a purge? When a transaction writes to an undo log, -it may notice that the space is running out. When a read view is closed, -it may make some history superfluous. The server can have an utility which -periodically checks if it can purge some history. - In a parallellized purge we have the problem that a query thread -can remove a delete marked clustered index record before another query -thread has processed an earlier version of the record, which cannot then -be done because the row cannot be constructed from the clustered index -record. To avoid this problem, we will store in the update and delete mark -undo record also the columns necessary to construct the secondary index -entries which are modified. - We can latch the stack of versions of a single clustered index record -by taking a latch on the clustered index page. As long as the latch is held, -no new versions can be added and no versions removed by undo. But, a purge -can still remove old versions from the bottom of the stack. */ - -/* How to protect rollback segments, undo logs, and history lists with - ------------------------------------------------------------------- -latches? -------- -The contention of the kernel mutex should be minimized. When a transaction -does its first insert or modify in an index, an undo log is assigned for it. -Then we must have an x-latch to the rollback segment header. - When the transaction does more modifys or rolls back, the undo log is -protected with undo_mutex in the transaction. - When the transaction commits, its insert undo log is either reset and -cached for a fast reuse, or freed. In these cases we must have an x-latch on -the rollback segment page. The update undo log is put to the history list. If -it is not suitable for reuse, its slot in the rollback segment is reset. In -both cases, an x-latch must be acquired on the rollback segment. - The purge operation steps through the history list without modifying -it until a truncate operation occurs, which can remove undo logs from the end -of the list and release undo log segments. In stepping through the list, -s-latches on the undo log pages are enough, but in a truncate, x-latches must -be obtained on the rollback segment and individual pages. */ - -/************************************************************************ -Initializes the fields in an undo log segment page. */ -static -void -trx_undo_page_init( -/*===============*/ - page_t* undo_page, /* in: undo log segment page */ - ulint type, /* in: undo log segment type */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************ -Creates and initializes an undo log memory object. */ -static -trx_undo_t* -trx_undo_mem_create( -/*================*/ - /* out, own: the undo log memory object */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - ulint id, /* in: slot index within rseg */ - ulint type, /* in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - dulint trx_id, /* in: id of the trx for which the undo log - is created */ - XID* xid, /* in: X/Open XA transaction identification*/ - ulint page_no,/* in: undo log header page number */ - ulint offset);/* in: undo log header byte offset on page */ -/******************************************************************* -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - /* out: undo log header byte offset on page */ - page_t* undo_page, /* in: insert undo log segment header page, - x-latched */ - dulint trx_id, /* in: transaction id */ - mtr_t* mtr); /* in: mtr */ -/************************************************************************** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /* in: header page of an undo log of size 1 */ - mtr_t* mtr); /* in: mtr */ - - -/*************************************************************************** -Gets the previous record in an undo log from the previous page. */ -static -trx_undo_rec_t* -trx_undo_get_prev_rec_from_prev_page( -/*=================================*/ - /* out: undo log record, the page s-latched, - NULL if none */ - trx_undo_rec_t* rec, /* in: undo record */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - mtr_t* mtr) /* in: mtr */ -{ - ulint prev_page_no; - page_t* prev_page; - page_t* undo_page; - - undo_page = buf_frame_align(rec); - - prev_page_no = flst_get_prev_addr(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_NODE, mtr) - .page; - - if (prev_page_no == FIL_NULL) { - - return(NULL); - } - - prev_page = trx_undo_page_get_s_latched( - buf_frame_get_space_id(undo_page), prev_page_no, mtr); - - return(trx_undo_page_get_last_rec(prev_page, page_no, offset)); -} - -/*************************************************************************** -Gets the previous record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_prev_rec( -/*==================*/ - /* out: undo log record, the page s-latched, - NULL if none */ - trx_undo_rec_t* rec, /* in: undo record */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - mtr_t* mtr) /* in: mtr */ -{ - trx_undo_rec_t* prev_rec; - - prev_rec = trx_undo_page_get_prev_rec(rec, page_no, offset); - - if (prev_rec) { - - return(prev_rec); - } - - /* We have to go to the previous undo log page to look for the - previous record */ - - return(trx_undo_get_prev_rec_from_prev_page(rec, page_no, offset, - mtr)); -} - -/*************************************************************************** -Gets the next record in an undo log from the next page. */ -static -trx_undo_rec_t* -trx_undo_get_next_rec_from_next_page( -/*=================================*/ - /* out: undo log record, the page latched, NULL if - none */ - page_t* undo_page, /* in: undo log page */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - ulint mode, /* in: latch mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr) /* in: mtr */ -{ - trx_ulogf_t* log_hdr; - ulint next_page_no; - page_t* next_page; - ulint space; - ulint next; - - if (page_no == buf_frame_get_page_no(undo_page)) { - - log_hdr = undo_page + offset; - next = mach_read_from_2(log_hdr + TRX_UNDO_NEXT_LOG); - - if (next != 0) { - - return(NULL); - } - } - - space = buf_frame_get_space_id(undo_page); - - next_page_no = flst_get_next_addr(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_NODE, mtr) - .page; - if (next_page_no == FIL_NULL) { - - return(NULL); - } - - if (mode == RW_S_LATCH) { - next_page = trx_undo_page_get_s_latched(space, next_page_no, - mtr); - } else { - ut_ad(mode == RW_X_LATCH); - next_page = trx_undo_page_get(space, next_page_no, mtr); - } - - return(trx_undo_page_get_first_rec(next_page, page_no, offset)); -} - -/*************************************************************************** -Gets the next record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_next_rec( -/*==================*/ - /* out: undo log record, the page s-latched, - NULL if none */ - trx_undo_rec_t* rec, /* in: undo record */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - mtr_t* mtr) /* in: mtr */ -{ - trx_undo_rec_t* next_rec; - - next_rec = trx_undo_page_get_next_rec(rec, page_no, offset); - - if (next_rec) { - return(next_rec); - } - - return(trx_undo_get_next_rec_from_next_page(buf_frame_align(rec), - page_no, offset, - RW_S_LATCH, mtr)); -} - -/*************************************************************************** -Gets the first record in an undo log. */ - -trx_undo_rec_t* -trx_undo_get_first_rec( -/*===================*/ - /* out: undo log record, the page latched, NULL if - none */ - ulint space, /* in: undo log header space */ - ulint page_no,/* in: undo log header page number */ - ulint offset, /* in: undo log header offset on page */ - ulint mode, /* in: latching mode: RW_S_LATCH or RW_X_LATCH */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* undo_page; - trx_undo_rec_t* rec; - - if (mode == RW_S_LATCH) { - undo_page = trx_undo_page_get_s_latched(space, page_no, mtr); - } else { - undo_page = trx_undo_page_get(space, page_no, mtr); - } - - rec = trx_undo_page_get_first_rec(undo_page, page_no, offset); - - if (rec) { - return(rec); - } - - return(trx_undo_get_next_rec_from_next_page(undo_page, page_no, offset, - mode, mtr)); -} - -/*============== UNDO LOG FILE COPY CREATION AND FREEING ==================*/ - -/************************************************************************** -Writes the mtr log entry of an undo log page initialization. */ -UNIV_INLINE -void -trx_undo_page_init_log( -/*===================*/ - page_t* undo_page, /* in: undo log page */ - ulint type, /* in: undo log type */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_INIT, mtr); - - mlog_catenate_ulint_compressed(mtr, type); -} - -/*************************************************************** -Parses the redo log entry of an undo log page initialization. */ - -byte* -trx_undo_parse_page_init( -/*=====================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ulint type; - - ptr = mach_parse_compressed(ptr, end_ptr, &type); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - trx_undo_page_init(page, type, mtr); - } - - return(ptr); -} - -/************************************************************************ -Initializes the fields in an undo log segment page. */ -static -void -trx_undo_page_init( -/*===============*/ - page_t* undo_page, /* in: undo log segment page */ - ulint type, /* in: undo log segment type */ - mtr_t* mtr) /* in: mtr */ -{ - trx_upagef_t* page_hdr; - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_TYPE, type); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, - TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_HDR_SIZE); - - fil_page_set_type(undo_page, FIL_PAGE_UNDO_LOG); - - trx_undo_page_init_log(undo_page, type, mtr); -} - -/******************************************************************* -Creates a new undo log segment in file. */ -static -ulint -trx_undo_seg_create( -/*================*/ - /* out: DB_SUCCESS if page creation OK - possible error codes are: - DB_TOO_MANY_CONCURRENT_TRXS - DB_OUT_OF_FILE_SPACE */ - trx_rseg_t* rseg __attribute__((unused)),/* in: rollback segment */ - trx_rsegf_t* rseg_hdr,/* in: rollback segment header, page - x-latched */ - ulint type, /* in: type of the segment: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - ulint* id, /* out: slot index within rseg header */ - page_t** undo_page, - /* out: segment header page x-latched, NULL - if there was an error */ - mtr_t* mtr) /* in: mtr */ -{ - ulint slot_no; - ulint space; - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - ulint n_reserved; - ibool success; - ulint err = DB_SUCCESS; - - ut_ad(mtr && id && rseg_hdr); - ut_ad(mutex_own(&(rseg->mutex))); - - /* fputs(type == TRX_UNDO_INSERT - ? "Creating insert undo log segment\n" - : "Creating update undo log segment\n", stderr); */ - slot_no = trx_rsegf_undo_find_free(rseg_hdr, mtr); - - if (slot_no == ULINT_UNDEFINED) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Warning: cannot find a free slot for" - " an undo log. Do you have too\n" - "InnoDB: many active transactions" - " running concurrently?\n"); - - return(DB_TOO_MANY_CONCURRENT_TRXS); - } - - space = buf_frame_get_space_id(rseg_hdr); - - success = fsp_reserve_free_extents(&n_reserved, space, 2, FSP_UNDO, - mtr); - if (!success) { - - return(DB_OUT_OF_FILE_SPACE); - } - - /* Allocate a new file segment for the undo log */ - *undo_page = fseg_create_general(space, 0, - TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER, TRUE, mtr); - - fil_space_release_free_extents(space, n_reserved); - - if (*undo_page == NULL) { - /* No space left */ - - return(DB_OUT_OF_FILE_SPACE); - } - -#ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE); -#endif /* UNIV_SYNC_DEBUG */ - - page_hdr = *undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = *undo_page + TRX_UNDO_SEG_HDR; - - trx_undo_page_init(*undo_page, type, mtr); - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, - TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE, - MLOG_2BYTES, mtr); - - mlog_write_ulint(seg_hdr + TRX_UNDO_LAST_LOG, 0, MLOG_2BYTES, mtr); - - flst_init(seg_hdr + TRX_UNDO_PAGE_LIST, mtr); - - flst_add_last(seg_hdr + TRX_UNDO_PAGE_LIST, - page_hdr + TRX_UNDO_PAGE_NODE, mtr); - - trx_rsegf_set_nth_undo(rseg_hdr, slot_no, - buf_frame_get_page_no(*undo_page), mtr); - - *id = slot_no; - - return(err); -} - -/************************************************************************** -Writes the mtr log entry of an undo log header initialization. */ -UNIV_INLINE -void -trx_undo_header_create_log( -/*=======================*/ - page_t* undo_page, /* in: undo log header page */ - dulint trx_id, /* in: transaction id */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_CREATE, mtr); - - mlog_catenate_dulint_compressed(mtr, trx_id); -} - -/******************************************************************* -Creates a new undo log header in file. NOTE that this function has its own -log record type MLOG_UNDO_HDR_CREATE. You must NOT change the operation of -this function! */ -static -ulint -trx_undo_header_create( -/*===================*/ - /* out: header byte offset on page */ - page_t* undo_page, /* in: undo log segment header page, - x-latched; it is assumed that there are - TRX_UNDO_LOG_XA_HDR_SIZE bytes free space - on it */ - dulint trx_id, /* in: transaction id */ - mtr_t* mtr) /* in: mtr */ -{ - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - trx_ulogf_t* log_hdr; - trx_ulogf_t* prev_log_hdr; - ulint prev_log; - ulint free; - ulint new_free; - - ut_ad(mtr && undo_page); - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE); - - log_hdr = undo_page + free; - - new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE; - - ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); - - prev_log = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - - if (prev_log != 0) { - prev_log_hdr = undo_page + prev_log; - - mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, free); - } - - mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, free); - - log_hdr = undo_page + free; - - mach_write_to_2(log_hdr + TRX_UNDO_DEL_MARKS, TRUE); - - mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); - mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - - mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); - mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); - - mach_write_to_2(log_hdr + TRX_UNDO_NEXT_LOG, 0); - mach_write_to_2(log_hdr + TRX_UNDO_PREV_LOG, prev_log); - - /* Write the log record about the header creation */ - trx_undo_header_create_log(undo_page, trx_id, mtr); - - return(free); -} - -/************************************************************************ -Write X/Open XA Transaction Identification (XID) to undo log header */ -static -void -trx_undo_write_xid( -/*===============*/ - trx_ulogf_t* log_hdr,/* in: undo log header */ - const XID* xid, /* in: X/Open XA Transaction Identification */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_ulint(log_hdr + TRX_UNDO_XA_FORMAT, - (ulint)xid->formatID, MLOG_4BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_XA_TRID_LEN, - (ulint)xid->gtrid_length, MLOG_4BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_XA_BQUAL_LEN, - (ulint)xid->bqual_length, MLOG_4BYTES, mtr); - - mlog_write_string(log_hdr + TRX_UNDO_XA_XID, (const byte*) xid->data, - XIDDATASIZE, mtr); -} - -/************************************************************************ -Read X/Open XA Transaction Identification (XID) from undo log header */ -static -void -trx_undo_read_xid( -/*==============*/ - trx_ulogf_t* log_hdr,/* in: undo log header */ - XID* xid) /* out: X/Open XA Transaction Identification */ -{ - xid->formatID = (long)mach_read_from_4(log_hdr + TRX_UNDO_XA_FORMAT); - - xid->gtrid_length - = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_TRID_LEN); - xid->bqual_length - = (long) mach_read_from_4(log_hdr + TRX_UNDO_XA_BQUAL_LEN); - - memcpy(xid->data, log_hdr + TRX_UNDO_XA_XID, XIDDATASIZE); -} - -/******************************************************************* -Adds space for the XA XID after an undo log old-style header. */ -static -void -trx_undo_header_add_space_for_xid( -/*==============================*/ - page_t* undo_page,/* in: undo log segment header page */ - trx_ulogf_t* log_hdr,/* in: undo log header */ - mtr_t* mtr) /* in: mtr */ -{ - trx_upagef_t* page_hdr; - ulint free; - ulint new_free; - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - free = mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE); - - /* free is now the end offset of the old style undo log header */ - - ut_a(free == (ulint)(log_hdr - undo_page) + TRX_UNDO_LOG_OLD_HDR_SIZE); - - new_free = free + (TRX_UNDO_LOG_XA_HDR_SIZE - - TRX_UNDO_LOG_OLD_HDR_SIZE); - - /* Add space for a XID after the header, update the free offset - fields on the undo log page and in the undo log header */ - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_START, new_free, - MLOG_2BYTES, mtr); - - mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE, new_free, - MLOG_2BYTES, mtr); - - mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, new_free, - MLOG_2BYTES, mtr); -} - -/************************************************************************** -Writes the mtr log entry of an undo log header reuse. */ -UNIV_INLINE -void -trx_undo_insert_header_reuse_log( -/*=============================*/ - page_t* undo_page, /* in: undo log header page */ - dulint trx_id, /* in: transaction id */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_REUSE, mtr); - - mlog_catenate_dulint_compressed(mtr, trx_id); -} - -/*************************************************************** -Parses the redo log entry of an undo log page header create or reuse. */ - -byte* -trx_undo_parse_page_header( -/*=======================*/ - /* out: end of log record or NULL */ - ulint type, /* in: MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - dulint trx_id; - - ptr = mach_dulint_parse_compressed(ptr, end_ptr, &trx_id); - - if (ptr == NULL) { - - return(NULL); - } - - if (page) { - if (type == MLOG_UNDO_HDR_CREATE) { - trx_undo_header_create(page, trx_id, mtr); - } else { - ut_ad(type == MLOG_UNDO_HDR_REUSE); - trx_undo_insert_header_reuse(page, trx_id, mtr); - } - } - - return(ptr); -} - -/******************************************************************* -Initializes a cached insert undo log header page for new use. NOTE that this -function has its own log record type MLOG_UNDO_HDR_REUSE. You must NOT change -the operation of this function! */ -static -ulint -trx_undo_insert_header_reuse( -/*=========================*/ - /* out: undo log header byte offset on page */ - page_t* undo_page, /* in: insert undo log segment header page, - x-latched */ - dulint trx_id, /* in: transaction id */ - mtr_t* mtr) /* in: mtr */ -{ - trx_upagef_t* page_hdr; - trx_usegf_t* seg_hdr; - trx_ulogf_t* log_hdr; - ulint free; - ulint new_free; - - ut_ad(mtr && undo_page); - - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - - free = TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE; - - ut_a(free + TRX_UNDO_LOG_XA_HDR_SIZE < UNIV_PAGE_SIZE - 100); - - log_hdr = undo_page + free; - - new_free = free + TRX_UNDO_LOG_OLD_HDR_SIZE; - - /* Insert undo data is not needed after commit: we may free all - the space on the page */ - - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_INSERT); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, new_free); - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, new_free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_ACTIVE); - - log_hdr = undo_page + free; - - mach_write_to_8(log_hdr + TRX_UNDO_TRX_ID, trx_id); - mach_write_to_2(log_hdr + TRX_UNDO_LOG_START, new_free); - - mach_write_to_1(log_hdr + TRX_UNDO_XID_EXISTS, FALSE); - mach_write_to_1(log_hdr + TRX_UNDO_DICT_TRANS, FALSE); - - /* Write the log record MLOG_UNDO_HDR_REUSE */ - trx_undo_insert_header_reuse_log(undo_page, trx_id, mtr); - - return(free); -} - -/************************************************************************** -Writes the redo log entry of an update undo log header discard. */ -UNIV_INLINE -void -trx_undo_discard_latest_log( -/*========================*/ - page_t* undo_page, /* in: undo log header page */ - mtr_t* mtr) /* in: mtr */ -{ - mlog_write_initial_log_record(undo_page, MLOG_UNDO_HDR_DISCARD, mtr); -} - -/*************************************************************** -Parses the redo log entry of an undo log page header discard. */ - -byte* -trx_undo_parse_discard_latest( -/*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ -{ - ut_ad(end_ptr); - - if (page) { - trx_undo_discard_latest_update_undo(page, mtr); - } - - return(ptr); -} - -/************************************************************************** -If an update undo log can be discarded immediately, this function frees the -space, resetting the page to the proper state for caching. */ -static -void -trx_undo_discard_latest_update_undo( -/*================================*/ - page_t* undo_page, /* in: header page of an undo log of size 1 */ - mtr_t* mtr) /* in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - trx_ulogf_t* log_hdr; - trx_ulogf_t* prev_log_hdr; - ulint free; - ulint prev_hdr_offset; - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - free = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - log_hdr = undo_page + free; - - prev_hdr_offset = mach_read_from_2(log_hdr + TRX_UNDO_PREV_LOG); - - if (prev_hdr_offset != 0) { - prev_log_hdr = undo_page + prev_hdr_offset; - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_START, - mach_read_from_2(prev_log_hdr - + TRX_UNDO_LOG_START)); - mach_write_to_2(prev_log_hdr + TRX_UNDO_NEXT_LOG, 0); - } - - mach_write_to_2(page_hdr + TRX_UNDO_PAGE_FREE, free); - - mach_write_to_2(seg_hdr + TRX_UNDO_STATE, TRX_UNDO_CACHED); - mach_write_to_2(seg_hdr + TRX_UNDO_LAST_LOG, prev_hdr_offset); - - trx_undo_discard_latest_log(undo_page, mtr); -} - -/************************************************************************ -Tries to add a page to the undo log segment where the undo log is placed. */ - -ulint -trx_undo_add_page( -/*==============*/ - /* out: page number if success, else - FIL_NULL */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory object */ - mtr_t* mtr) /* in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -{ - page_t* header_page; - page_t* new_page; - trx_rseg_t* rseg; - ulint page_no; - ulint n_reserved; - ibool success; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(!mutex_own(&kernel_mutex)); - ut_ad(mutex_own(&(trx->rseg->mutex))); - - rseg = trx->rseg; - - if (rseg->curr_size == rseg->max_size) { - - return(FIL_NULL); - } - - header_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - - success = fsp_reserve_free_extents(&n_reserved, undo->space, 1, - FSP_UNDO, mtr); - if (!success) { - - return(FIL_NULL); - } - - page_no = fseg_alloc_free_page_general(header_page + TRX_UNDO_SEG_HDR - + TRX_UNDO_FSEG_HEADER, - undo->top_page_no + 1, FSP_UP, - TRUE, mtr); - - fil_space_release_free_extents(undo->space, n_reserved); - - if (page_no == FIL_NULL) { - - /* No space left */ - - return(FIL_NULL); - } - - undo->last_page_no = page_no; - - new_page = trx_undo_page_get(undo->space, page_no, mtr); - - trx_undo_page_init(new_page, undo->type, mtr); - - flst_add_last(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - new_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); - undo->size++; - rseg->curr_size++; - - return(page_no); -} - -/************************************************************************ -Frees an undo log page that is not the header page. */ -static -ulint -trx_undo_free_page( -/*===============*/ - /* out: last page number in remaining log */ - trx_rseg_t* rseg, /* in: rollback segment */ - ibool in_history, /* in: TRUE if the undo log is in the history - list */ - ulint space, /* in: space */ - ulint hdr_page_no, /* in: header page number */ - ulint page_no, /* in: page number to free: must not be the - header page */ - mtr_t* mtr) /* in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -{ - page_t* header_page; - page_t* undo_page; - fil_addr_t last_addr; - trx_rsegf_t* rseg_header; - ulint hist_size; - - ut_a(hdr_page_no != page_no); - ut_ad(!mutex_own(&kernel_mutex)); - ut_ad(mutex_own(&(rseg->mutex))); - - undo_page = trx_undo_page_get(space, page_no, mtr); - - header_page = trx_undo_page_get(space, hdr_page_no, mtr); - - flst_remove(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_PAGE_LIST, - undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_NODE, mtr); - - fseg_free_page(header_page + TRX_UNDO_SEG_HDR + TRX_UNDO_FSEG_HEADER, - space, page_no, mtr); - - last_addr = flst_get_last(header_page + TRX_UNDO_SEG_HDR - + TRX_UNDO_PAGE_LIST, mtr); - rseg->curr_size--; - - if (in_history) { - rseg_header = trx_rsegf_get(space, rseg->page_no, mtr); - - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); - ut_ad(hist_size > 0); - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size - 1, MLOG_4BYTES, mtr); - } - - return(last_addr.page); -} - -/************************************************************************ -Frees an undo log page when there is also the memory object for the undo -log. */ -static -void -trx_undo_free_page_in_rollback( -/*===========================*/ - trx_t* trx __attribute__((unused)), /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory copy */ - ulint page_no,/* in: page number to free: must not be the - header page */ - mtr_t* mtr) /* in: mtr which does not have a latch to any - undo log page; the caller must have reserved - the rollback segment mutex */ -{ - ulint last_page_no; - - ut_ad(undo->hdr_page_no != page_no); - ut_ad(mutex_own(&(trx->undo_mutex))); - - last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space, - undo->hdr_page_no, page_no, mtr); - - undo->last_page_no = last_page_no; - undo->size--; -} - -/************************************************************************ -Empties an undo log header page of undo records for that undo log. Other -undo logs may still have records on that page, if it is an update undo log. */ -static -void -trx_undo_empty_header_page( -/*=======================*/ - ulint space, /* in: space */ - ulint hdr_page_no, /* in: header page number */ - ulint hdr_offset, /* in: header offset */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* header_page; - trx_ulogf_t* log_hdr; - ulint end; - - header_page = trx_undo_page_get(space, hdr_page_no, mtr); - - log_hdr = header_page + hdr_offset; - - end = trx_undo_page_get_end(header_page, hdr_page_no, hdr_offset); - - mlog_write_ulint(log_hdr + TRX_UNDO_LOG_START, end, MLOG_2BYTES, mtr); -} - -/*************************************************************************** -Truncates an undo log from the end. This function is used during a rollback -to free space from an undo log. */ - -void -trx_undo_truncate_end( -/*==================*/ - trx_t* trx, /* in: transaction whose undo log it is */ - trx_undo_t* undo, /* in: undo log */ - dulint limit) /* in: all undo records with undo number - >= this value should be truncated */ -{ - page_t* undo_page; - ulint last_page_no; - trx_undo_rec_t* rec; - trx_undo_rec_t* trunc_here; - trx_rseg_t* rseg; - mtr_t mtr; - - ut_ad(mutex_own(&(trx->undo_mutex))); - ut_ad(mutex_own(&(trx->rseg->mutex))); - - rseg = trx->rseg; - - for (;;) { - mtr_start(&mtr); - - trunc_here = NULL; - - last_page_no = undo->last_page_no; - - undo_page = trx_undo_page_get(undo->space, last_page_no, &mtr); - - rec = trx_undo_page_get_last_rec(undo_page, undo->hdr_page_no, - undo->hdr_offset); - for (;;) { - if (rec == NULL) { - if (last_page_no == undo->hdr_page_no) { - - goto function_exit; - } - - trx_undo_free_page_in_rollback( - trx, undo, last_page_no, &mtr); - break; - } - - if (ut_dulint_cmp(trx_undo_rec_get_undo_no(rec), limit) - >= 0) { - /* Truncate at least this record off, maybe - more */ - trunc_here = rec; - } else { - goto function_exit; - } - - rec = trx_undo_page_get_prev_rec(rec, - undo->hdr_page_no, - undo->hdr_offset); - } - - mtr_commit(&mtr); - } - -function_exit: - if (trunc_here) { - mlog_write_ulint(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_FREE, - trunc_here - undo_page, MLOG_2BYTES, &mtr); - } - - mtr_commit(&mtr); -} - -/*************************************************************************** -Truncates an undo log from the start. This function is used during a purge -operation. */ - -void -trx_undo_truncate_start( -/*====================*/ - trx_rseg_t* rseg, /* in: rollback segment */ - ulint space, /* in: space id of the log */ - ulint hdr_page_no, /* in: header page number */ - ulint hdr_offset, /* in: header offset on the page */ - dulint limit) /* in: all undo pages with undo numbers < - this value should be truncated; NOTE that - the function only frees whole pages; the - header page is not freed, but emptied, if - all the records there are < limit */ -{ - page_t* undo_page; - trx_undo_rec_t* rec; - trx_undo_rec_t* last_rec; - ulint page_no; - mtr_t mtr; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (0 == ut_dulint_cmp(limit, ut_dulint_zero)) { - - return; - } -loop: - mtr_start(&mtr); - - rec = trx_undo_get_first_rec(space, hdr_page_no, hdr_offset, - RW_X_LATCH, &mtr); - if (rec == NULL) { - /* Already empty */ - - mtr_commit(&mtr); - - return; - } - - undo_page = buf_frame_align(rec); - - last_rec = trx_undo_page_get_last_rec(undo_page, hdr_page_no, - hdr_offset); - if (ut_dulint_cmp(trx_undo_rec_get_undo_no(last_rec), limit) >= 0) { - - mtr_commit(&mtr); - - return; - } - - page_no = buf_frame_get_page_no(undo_page); - - if (page_no == hdr_page_no) { - trx_undo_empty_header_page(space, hdr_page_no, hdr_offset, - &mtr); - } else { - trx_undo_free_page(rseg, TRUE, space, hdr_page_no, - page_no, &mtr); - } - - mtr_commit(&mtr); - - goto loop; -} - -/************************************************************************** -Frees an undo log segment which is not in the history list. */ -static -void -trx_undo_seg_free( -/*==============*/ - trx_undo_t* undo) /* in: undo log */ -{ - trx_rseg_t* rseg; - fseg_header_t* file_seg; - trx_rsegf_t* rseg_header; - trx_usegf_t* seg_header; - ibool finished; - mtr_t mtr; - - finished = FALSE; - rseg = undo->rseg; - - while (!finished) { - - mtr_start(&mtr); - - ut_ad(!mutex_own(&kernel_mutex)); - - mutex_enter(&(rseg->mutex)); - - seg_header = trx_undo_page_get(undo->space, undo->hdr_page_no, - &mtr) + TRX_UNDO_SEG_HDR; - - file_seg = seg_header + TRX_UNDO_FSEG_HEADER; - - finished = fseg_free_step(file_seg, &mtr); - - if (finished) { - /* Update the rseg header */ - rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, - &mtr); - trx_rsegf_set_nth_undo(rseg_header, undo->id, FIL_NULL, - &mtr); - } - - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - } -} - -/*========== UNDO LOG MEMORY COPY INITIALIZATION =====================*/ - -/************************************************************************ -Creates and initializes an undo log memory object according to the values -in the header in file, when the database is started. The memory object is -inserted in the appropriate list of rseg. */ -static -trx_undo_t* -trx_undo_mem_create_at_db_start( -/*============================*/ - /* out, own: the undo log memory object */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - ulint id, /* in: slot index within rseg */ - ulint page_no,/* in: undo log segment page number */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* undo_page; - trx_upagef_t* page_header; - trx_usegf_t* seg_header; - trx_ulogf_t* undo_header; - trx_undo_t* undo; - ulint type; - ulint state; - dulint trx_id; - ulint offset; - fil_addr_t last_addr; - page_t* last_page; - trx_undo_rec_t* rec; - XID xid; - ibool xid_exists = FALSE; - - if (id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) id); - ut_error; - } - - undo_page = trx_undo_page_get(rseg->space, page_no, mtr); - - page_header = undo_page + TRX_UNDO_PAGE_HDR; - - type = mtr_read_ulint(page_header + TRX_UNDO_PAGE_TYPE, MLOG_2BYTES, - mtr); - seg_header = undo_page + TRX_UNDO_SEG_HDR; - - state = mach_read_from_2(seg_header + TRX_UNDO_STATE); - - offset = mach_read_from_2(seg_header + TRX_UNDO_LAST_LOG); - - undo_header = undo_page + offset; - - trx_id = mtr_read_dulint(undo_header + TRX_UNDO_TRX_ID, mtr); - - xid_exists = mtr_read_ulint(undo_header + TRX_UNDO_XID_EXISTS, - MLOG_1BYTE, mtr); - - /* Read X/Open XA transaction identification if it exists, or - set it to NULL. */ - - memset(&xid, 0, sizeof(xid)); - xid.formatID = -1; - - if (xid_exists == TRUE) { - trx_undo_read_xid(undo_header, &xid); - } - - mutex_enter(&(rseg->mutex)); - - undo = trx_undo_mem_create(rseg, id, type, trx_id, &xid, - page_no, offset); - mutex_exit(&(rseg->mutex)); - - undo->dict_operation = mtr_read_ulint( - undo_header + TRX_UNDO_DICT_TRANS, MLOG_1BYTE, mtr); - - undo->table_id = mtr_read_dulint(undo_header + TRX_UNDO_TABLE_ID, mtr); - undo->state = state; - undo->size = flst_get_len(seg_header + TRX_UNDO_PAGE_LIST, mtr); - - /* If the log segment is being freed, the page list is inconsistent! */ - if (state == TRX_UNDO_TO_FREE) { - - goto add_to_list; - } - - last_addr = flst_get_last(seg_header + TRX_UNDO_PAGE_LIST, mtr); - - undo->last_page_no = last_addr.page; - undo->top_page_no = last_addr.page; - - last_page = trx_undo_page_get(rseg->space, undo->last_page_no, mtr); - - rec = trx_undo_page_get_last_rec(last_page, page_no, offset); - - if (rec == NULL) { - undo->empty = TRUE; - } else { - undo->empty = FALSE; - undo->top_offset = rec - last_page; - undo->top_undo_no = trx_undo_rec_get_undo_no(rec); - } -add_to_list: - if (type == TRX_UNDO_INSERT) { - if (state != TRX_UNDO_CACHED) { - UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_list, - undo); - } else { - UT_LIST_ADD_LAST(undo_list, rseg->insert_undo_cached, - undo); - } - } else { - ut_ad(type == TRX_UNDO_UPDATE); - if (state != TRX_UNDO_CACHED) { - UT_LIST_ADD_LAST(undo_list, rseg->update_undo_list, - undo); - } else { - UT_LIST_ADD_LAST(undo_list, rseg->update_undo_cached, - undo); - } - } - - return(undo); -} - -/************************************************************************ -Initializes the undo log lists for a rollback segment memory copy. This -function is only called when the database is started or a new rollback -segment is created. */ - -ulint -trx_undo_lists_init( -/*================*/ - /* out: the combined size of undo log segments - in pages */ - trx_rseg_t* rseg) /* in: rollback segment memory object */ -{ - ulint page_no; - trx_undo_t* undo; - ulint size = 0; - trx_rsegf_t* rseg_header; - ulint i; - mtr_t mtr; - - UT_LIST_INIT(rseg->update_undo_list); - UT_LIST_INIT(rseg->update_undo_cached); - UT_LIST_INIT(rseg->insert_undo_list); - UT_LIST_INIT(rseg->insert_undo_cached); - - mtr_start(&mtr); - - rseg_header = trx_rsegf_get_new(rseg->space, rseg->page_no, &mtr); - - for (i = 0; i < TRX_RSEG_N_SLOTS; i++) { - page_no = trx_rsegf_get_nth_undo(rseg_header, i, &mtr); - - /* In forced recovery: try to avoid operations which look - at database pages; undo logs are rapidly changing data, and - the probability that they are in an inconsistent state is - high */ - - if (page_no != FIL_NULL - && srv_force_recovery < SRV_FORCE_NO_UNDO_LOG_SCAN) { - - undo = trx_undo_mem_create_at_db_start(rseg, i, - page_no, &mtr); - size += undo->size; - - mtr_commit(&mtr); - - mtr_start(&mtr); - - rseg_header = trx_rsegf_get(rseg->space, - rseg->page_no, &mtr); - } - } - - mtr_commit(&mtr); - - return(size); -} - -/************************************************************************ -Creates and initializes an undo log memory object. */ -static -trx_undo_t* -trx_undo_mem_create( -/*================*/ - /* out, own: the undo log memory object */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - ulint id, /* in: slot index within rseg */ - ulint type, /* in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - dulint trx_id, /* in: id of the trx for which the undo log - is created */ - XID* xid, /* in: X/Open transaction identification */ - ulint page_no,/* in: undo log header page number */ - ulint offset) /* in: undo log header byte offset on page */ -{ - trx_undo_t* undo; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) id); - ut_error; - } - - undo = mem_alloc(sizeof(trx_undo_t)); - - if (undo == NULL) { - - return NULL; - } - - undo->id = id; - undo->type = type; - undo->state = TRX_UNDO_ACTIVE; - undo->del_marks = FALSE; - undo->trx_id = trx_id; - undo->xid = *xid; - - undo->dict_operation = FALSE; - - undo->rseg = rseg; - - undo->space = rseg->space; - undo->hdr_page_no = page_no; - undo->hdr_offset = offset; - undo->last_page_no = page_no; - undo->size = 1; - - undo->empty = TRUE; - undo->top_page_no = page_no; - undo->guess_page = NULL; - - return(undo); -} - -/************************************************************************ -Initializes a cached undo log object for new use. */ -static -void -trx_undo_mem_init_for_reuse( -/*========================*/ - trx_undo_t* undo, /* in: undo log to init */ - dulint trx_id, /* in: id of the trx for which the undo log - is created */ - XID* xid, /* in: X/Open XA transaction identification*/ - ulint offset) /* in: undo log header byte offset on page */ -{ - ut_ad(mutex_own(&((undo->rseg)->mutex))); - - if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - - mem_analyze_corruption(undo); - ut_error; - } - - undo->state = TRX_UNDO_ACTIVE; - undo->del_marks = FALSE; - undo->trx_id = trx_id; - undo->xid = *xid; - - undo->dict_operation = FALSE; - - undo->hdr_offset = offset; - undo->empty = TRUE; -} - -/************************************************************************ -Frees an undo log memory copy. */ -static -void -trx_undo_mem_free( -/*==============*/ - trx_undo_t* undo) /* in: the undo object to be freed */ -{ - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, - "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id); - ut_error; - } - - mem_free(undo); -} - -/************************************************************************** -Creates a new undo log. */ -static -ulint -trx_undo_create( -/*============*/ - /* out: DB_SUCCESS if successful in creating - the new undo lob object, possible error - codes are: - DB_TOO_MANY_CONCURRENT_TRXS - DB_OUT_OF_FILE_SPACE - DB_OUT_OF_MEMORY*/ - trx_t* trx, /* in: transaction */ - trx_rseg_t* rseg, /* in: rollback segment memory copy */ - ulint type, /* in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - dulint trx_id, /* in: id of the trx for which the undo log - is created */ - XID* xid, /* in: X/Open transaction identification*/ - trx_undo_t** undo, /* out: the new undo log object, undefined - * if did not succeed */ - mtr_t* mtr) /* in: mtr */ -{ - trx_rsegf_t* rseg_header; - ulint page_no; - ulint offset; - ulint id; - page_t* undo_page; - ulint err; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (rseg->curr_size == rseg->max_size) { - - return(DB_OUT_OF_FILE_SPACE); - } - - rseg->curr_size++; - - rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr); - - err = trx_undo_seg_create(rseg, rseg_header, type, &id, - &undo_page, mtr); - - if (err != DB_SUCCESS) { - /* Did not succeed */ - - rseg->curr_size--; - - return(err); - } - - page_no = buf_frame_get_page_no(undo_page); - - offset = trx_undo_header_create(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid(undo_page, - undo_page + offset, mtr); - } - - *undo = trx_undo_mem_create(rseg, id, type, trx_id, xid, - page_no, offset); - if (*undo == NULL) { - - err = DB_OUT_OF_MEMORY; - } - - return(err); -} - -/*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/ - -/************************************************************************ -Reuses a cached undo log. */ -static -trx_undo_t* -trx_undo_reuse_cached( -/*==================*/ - /* out: the undo log memory object, NULL if - none cached */ - trx_t* trx, /* in: transaction */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - ulint type, /* in: type of the log: TRX_UNDO_INSERT or - TRX_UNDO_UPDATE */ - dulint trx_id, /* in: id of the trx for which the undo log - is used */ - XID* xid, /* in: X/Open XA transaction identification */ - mtr_t* mtr) /* in: mtr */ -{ - trx_undo_t* undo; - page_t* undo_page; - ulint offset; - - ut_ad(mutex_own(&(rseg->mutex))); - - if (type == TRX_UNDO_INSERT) { - - undo = UT_LIST_GET_FIRST(rseg->insert_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(undo_list, rseg->insert_undo_cached, undo); - } else { - ut_ad(type == TRX_UNDO_UPDATE); - - undo = UT_LIST_GET_FIRST(rseg->update_undo_cached); - if (undo == NULL) { - - return(NULL); - } - - UT_LIST_REMOVE(undo_list, rseg->update_undo_cached, undo); - } - - ut_ad(undo->size == 1); - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - - if (type == TRX_UNDO_INSERT) { - offset = trx_undo_insert_header_reuse(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } - } else { - ut_a(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR - + TRX_UNDO_PAGE_TYPE) - == TRX_UNDO_UPDATE); - - offset = trx_undo_header_create(undo_page, trx_id, mtr); - - if (trx->support_xa) { - trx_undo_header_add_space_for_xid( - undo_page, undo_page + offset, mtr); - } - } - - trx_undo_mem_init_for_reuse(undo, trx_id, xid, offset); - - return(undo); -} - -/************************************************************************** -Marks an undo log header as a header of a data dictionary operation -transaction. */ -static -void -trx_undo_mark_as_dict_operation( -/*============================*/ - trx_t* trx, /* in: dict op transaction */ - trx_undo_t* undo, /* in: assigned undo log */ - mtr_t* mtr) /* in: mtr */ -{ - page_t* hdr_page; - - ut_a(trx->dict_operation); - - hdr_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - - mlog_write_ulint(hdr_page + undo->hdr_offset - + TRX_UNDO_DICT_TRANS, - trx->dict_operation, MLOG_1BYTE, mtr); - - mlog_write_dulint(hdr_page + undo->hdr_offset + TRX_UNDO_TABLE_ID, - trx->table_id, mtr); - - undo->dict_operation = trx->dict_operation; - undo->table_id = trx->table_id; -} - -/************************************************************************** -Assigns an undo log for a transaction. A new undo log is created or a cached -undo log reused. */ - -ulint -trx_undo_assign_undo( -/*=================*/ - /* out: DB_SUCCESS if undo log assign - successful, possible error codes are: - DD_TOO_MANY_CONCURRENT_TRXS - DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/ - trx_t* trx, /* in: transaction */ - ulint type) /* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */ -{ - trx_rseg_t* rseg; - trx_undo_t* undo; - mtr_t mtr; - ulint err = DB_SUCCESS; - - ut_ad(trx); - ut_ad(trx->rseg); - - rseg = trx->rseg; - - ut_ad(mutex_own(&(trx->undo_mutex))); - - mtr_start(&mtr); - - ut_ad(!mutex_own(&kernel_mutex)); - - mutex_enter(&(rseg->mutex)); - - undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid, - &mtr); - if (undo == NULL) { - err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid, - &undo, &mtr); - if (err != DB_SUCCESS) { - - goto func_exit; - } - } - - if (type == TRX_UNDO_INSERT) { - UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_list, undo); - ut_ad(trx->insert_undo == NULL); - trx->insert_undo = undo; - } else { - UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_list, undo); - ut_ad(trx->update_undo == NULL); - trx->update_undo = undo; - } - - if (trx->dict_operation) { - trx_undo_mark_as_dict_operation(trx, undo, &mtr); - } - -func_exit: - mutex_exit(&(rseg->mutex)); - mtr_commit(&mtr); - - return err; -} - -/********************************************************************** -Sets the state of the undo log segment at a transaction finish. */ - -page_t* -trx_undo_set_state_at_finish( -/*=========================*/ - /* out: undo log segment header page, - x-latched */ - trx_rseg_t* rseg, /* in: rollback segment memory object */ - trx_t* trx __attribute__((unused)), /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory copy */ - mtr_t* mtr) /* in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - page_t* undo_page; - ulint state; - - ut_ad(trx); - ut_ad(undo); - ut_ad(mtr); - ut_ad(mutex_own(&rseg->mutex)); - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - if (undo->size == 1 - && mach_read_from_2(page_hdr + TRX_UNDO_PAGE_FREE) - < TRX_UNDO_PAGE_REUSE_LIMIT) { - - /* This is a heuristic to avoid the problem of all UNDO - slots ending up in one of the UNDO lists. Previously if - the server crashed with all the slots in one of the lists, - transactions that required the slots of a different type - would fail for lack of slots. */ - - if (UT_LIST_GET_LEN(rseg->update_undo_list) < 500 - && UT_LIST_GET_LEN(rseg->insert_undo_list) < 500) { - - state = TRX_UNDO_CACHED; - } else { - state = TRX_UNDO_TO_FREE; - } - - } else if (undo->type == TRX_UNDO_INSERT) { - - state = TRX_UNDO_TO_FREE; - } else { - state = TRX_UNDO_TO_PURGE; - } - - undo->state = state; - - mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, state, MLOG_2BYTES, mtr); - - return(undo_page); -} - -/********************************************************************** -Sets the state of the undo log segment at a transaction prepare. */ - -page_t* -trx_undo_set_state_at_prepare( -/*==========================*/ - /* out: undo log segment header page, - x-latched */ - trx_t* trx, /* in: transaction */ - trx_undo_t* undo, /* in: undo log memory copy */ - mtr_t* mtr) /* in: mtr */ -{ - trx_usegf_t* seg_hdr; - trx_upagef_t* page_hdr; - trx_ulogf_t* undo_header; - page_t* undo_page; - ulint offset; - - ut_ad(trx && undo && mtr); - - if (undo->id >= TRX_RSEG_N_SLOTS) { - fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", - (ulong) undo->id); - mem_analyze_corruption(undo); - ut_error; - } - - undo_page = trx_undo_page_get(undo->space, undo->hdr_page_no, mtr); - - seg_hdr = undo_page + TRX_UNDO_SEG_HDR; - page_hdr = undo_page + TRX_UNDO_PAGE_HDR; - - /*------------------------------*/ - undo->state = TRX_UNDO_PREPARED; - undo->xid = trx->xid; - /*------------------------------*/ - - mlog_write_ulint(seg_hdr + TRX_UNDO_STATE, undo->state, - MLOG_2BYTES, mtr); - - offset = mach_read_from_2(seg_hdr + TRX_UNDO_LAST_LOG); - undo_header = undo_page + offset; - - mlog_write_ulint(undo_header + TRX_UNDO_XID_EXISTS, - TRUE, MLOG_1BYTE, mtr); - - trx_undo_write_xid(undo_header, &undo->xid, mtr); - - return(undo_page); -} - -/************************************************************************** -Adds the update undo log header as the first in the history list, and -frees the memory object, or puts it to the list of cached update undo log -segments. */ - -void -trx_undo_update_cleanup( -/*====================*/ - trx_t* trx, /* in: trx owning the update undo log */ - page_t* undo_page, /* in: update undo log header page, - x-latched */ - mtr_t* mtr) /* in: mtr */ -{ - trx_rseg_t* rseg; - trx_undo_t* undo; - - undo = trx->update_undo; - rseg = trx->rseg; - - ut_ad(mutex_own(&(rseg->mutex))); - - trx_purge_add_update_undo_to_history(trx, undo_page, mtr); - - UT_LIST_REMOVE(undo_list, rseg->update_undo_list, undo); - - trx->update_undo = NULL; - - if (undo->state == TRX_UNDO_CACHED) { - - UT_LIST_ADD_FIRST(undo_list, rseg->update_undo_cached, undo); - } else { - ut_ad(undo->state == TRX_UNDO_TO_PURGE); - - trx_undo_mem_free(undo); - } -} - -/********************************************************************** -Frees or caches an insert undo log after a transaction commit or rollback. -Knowledge of inserts is not needed after a commit or rollback, therefore -the data can be discarded. */ - -void -trx_undo_insert_cleanup( -/*====================*/ - trx_t* trx) /* in: transaction handle */ -{ - trx_undo_t* undo; - trx_rseg_t* rseg; - - undo = trx->insert_undo; - ut_ad(undo); - - rseg = trx->rseg; - - mutex_enter(&(rseg->mutex)); - - UT_LIST_REMOVE(undo_list, rseg->insert_undo_list, undo); - trx->insert_undo = NULL; - - if (undo->state == TRX_UNDO_CACHED) { - - UT_LIST_ADD_FIRST(undo_list, rseg->insert_undo_cached, undo); - } else { - ut_ad(undo->state == TRX_UNDO_TO_FREE); - - /* Delete first the undo log segment in the file */ - - mutex_exit(&(rseg->mutex)); - - trx_undo_seg_free(undo); - - mutex_enter(&(rseg->mutex)); - - ut_ad(rseg->curr_size > undo->size); - - rseg->curr_size -= undo->size; - - trx_undo_mem_free(undo); - } - - mutex_exit(&(rseg->mutex)); -} diff --git a/storage/innobase/usr/usr0sess.c b/storage/innobase/usr/usr0sess.c deleted file mode 100644 index 3740c05eaab..00000000000 --- a/storage/innobase/usr/usr0sess.c +++ /dev/null @@ -1,81 +0,0 @@ -/****************************************************** -Sessions - -(c) 1996 Innobase Oy - -Created 6/25/1996 Heikki Tuuri -*******************************************************/ - -#include "usr0sess.h" - -#ifdef UNIV_NONINL -#include "usr0sess.ic" -#endif - -#include "trx0trx.h" - -/************************************************************************* -Closes a session, freeing the memory occupied by it. */ -static -void -sess_close( -/*=======*/ - sess_t* sess); /* in, own: session object */ - -/************************************************************************* -Opens a session. */ - -sess_t* -sess_open(void) -/*===========*/ - /* out, own: session object */ -{ - sess_t* sess; - - ut_ad(mutex_own(&kernel_mutex)); - - sess = mem_alloc(sizeof(sess_t)); - - sess->state = SESS_ACTIVE; - - sess->trx = trx_create(sess); - - UT_LIST_INIT(sess->graphs); - - return(sess); -} - -/************************************************************************* -Closes a session, freeing the memory occupied by it. */ -static -void -sess_close( -/*=======*/ - sess_t* sess) /* in, own: session object */ -{ - ut_ad(mutex_own(&kernel_mutex)); - ut_ad(sess->trx == NULL); - - mem_free(sess); -} - -/************************************************************************* -Closes a session, freeing the memory occupied by it, if it is in a state -where it should be closed. */ - -ibool -sess_try_close( -/*===========*/ - /* out: TRUE if closed */ - sess_t* sess) /* in, own: session object */ -{ - ut_ad(mutex_own(&kernel_mutex)); - - if (UT_LIST_GET_LEN(sess->graphs) == 0) { - sess_close(sess); - - return(TRUE); - } - - return(FALSE); -} diff --git a/storage/innobase/ut/ut0byte.c b/storage/innobase/ut/ut0byte.c deleted file mode 100644 index b5467fde601..00000000000 --- a/storage/innobase/ut/ut0byte.c +++ /dev/null @@ -1,31 +0,0 @@ -/******************************************************************* -Byte utilities - -(c) 1994, 1995 Innobase Oy - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0byte.h" - -#ifdef UNIV_NONINL -#include "ut0byte.ic" -#endif - -#include "ut0sort.h" - -/* Zero value for a dulint */ -dulint ut_dulint_zero = {0, 0}; - -/* Maximum value for a dulint */ -dulint ut_dulint_max = {0xFFFFFFFFUL, 0xFFFFFFFFUL}; - -/**************************************************************** -Sort function for dulint arrays. */ -void -ut_dulint_sort(dulint* arr, dulint* aux_arr, ulint low, ulint high) -/*===============================================================*/ -{ - UT_SORT_FUNCTION_BODY(ut_dulint_sort, arr, aux_arr, low, high, - ut_dulint_cmp); -} diff --git a/storage/innobase/ut/ut0dbg.c b/storage/innobase/ut/ut0dbg.c deleted file mode 100644 index 8c4be190d77..00000000000 --- a/storage/innobase/ut/ut0dbg.c +++ /dev/null @@ -1,98 +0,0 @@ -/********************************************************************* -Debug utilities for Innobase. - -(c) 1994, 1995 Innobase Oy - -Created 1/30/1994 Heikki Tuuri -**********************************************************************/ - -#include "univ.i" - -#if defined(__GNUC__) && (__GNUC__ > 2) -#else -/* This is used to eliminate compiler warnings */ -ulint ut_dbg_zero = 0; -#endif - -#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) -/* If this is set to TRUE all threads will stop into the next assertion -and assert */ -ibool ut_dbg_stop_threads = FALSE; -#endif -#ifdef __NETWARE__ -ibool panic_shutdown = FALSE; /* This is set to TRUE when on NetWare there - happens an InnoDB assertion failure or other - fatal error condition that requires an - immediate shutdown. */ -#elif !defined(UT_DBG_USE_ABORT) -/* Null pointer used to generate memory trap */ - -ulint* ut_dbg_null_ptr = NULL; -#endif - -/***************************************************************** -Report a failed assertion. */ - -void -ut_dbg_assertion_failed( -/*====================*/ - const char* expr, /* in: the failed assertion (optional) */ - const char* file, /* in: source file containing the assertion */ - ulint line) /* in: line number of the assertion */ -{ - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Assertion failure in thread %lu" - " in file %s line %lu\n", - os_thread_pf(os_thread_get_curr_id()), file, line); - if (expr) { - fprintf(stderr, - "InnoDB: Failing assertion: %s\n", expr); - } - - fputs("InnoDB: We intentionally generate a memory trap.\n" - "InnoDB: Submit a detailed bug report" - " to http://bugs.mysql.com.\n" - "InnoDB: If you get repeated assertion failures" - " or crashes, even\n" - "InnoDB: immediately after the mysqld startup, there may be\n" - "InnoDB: corruption in the InnoDB tablespace. Please refer to\n" - "InnoDB: http://dev.mysql.com/doc/refman/5.1/en/" - "forcing-recovery.html\n" - "InnoDB: about forcing recovery.\n", stderr); -#if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) - ut_dbg_stop_threads = TRUE; -#endif -} - -#ifdef __NETWARE__ -/***************************************************************** -Shut down MySQL/InnoDB after assertion failure. */ - -void -ut_dbg_panic(void) -/*==============*/ -{ - if (!panic_shutdown) { - panic_shutdown = TRUE; - innobase_shutdown_for_mysql(); - } - exit(1); -} -#else /* __NETWARE__ */ -# if defined(UNIV_SYNC_DEBUG) || !defined(UT_DBG_USE_ABORT) -/***************************************************************** -Stop a thread after assertion failure. */ - -void -ut_dbg_stop_thread( -/*===============*/ - const char* file, - ulint line) -{ - fprintf(stderr, "InnoDB: Thread %lu stopped in file %s line %lu\n", - os_thread_pf(os_thread_get_curr_id()), file, line); - os_thread_sleep(1000000000); -} -# endif -#endif /* __NETWARE__ */ diff --git a/storage/innobase/ut/ut0list.c b/storage/innobase/ut/ut0list.c deleted file mode 100644 index a0db7ff7b55..00000000000 --- a/storage/innobase/ut/ut0list.c +++ /dev/null @@ -1,169 +0,0 @@ -#include "ut0list.h" -#ifdef UNIV_NONINL -#include "ut0list.ic" -#endif - -/******************************************************************** -Create a new list. */ - -ib_list_t* -ib_list_create(void) -/*=================*/ - /* out: list */ -{ - ib_list_t* list = mem_alloc(sizeof(ib_list_t)); - - list->first = NULL; - list->last = NULL; - list->is_heap_list = FALSE; - - return(list); -} - -/******************************************************************** -Create a new list using the given heap. ib_list_free MUST NOT BE CALLED for -lists created with this function. */ - -ib_list_t* -ib_list_create_heap( -/*================*/ - /* out: list */ - mem_heap_t* heap) /* in: memory heap to use */ -{ - ib_list_t* list = mem_heap_alloc(heap, sizeof(ib_list_t)); - - list->first = NULL; - list->last = NULL; - list->is_heap_list = TRUE; - - return(list); -} - -/******************************************************************** -Free a list. */ - -void -ib_list_free( -/*=========*/ - ib_list_t* list) /* in: list */ -{ - ut_a(!list->is_heap_list); - - /* We don't check that the list is empty because it's entirely valid - to e.g. have all the nodes allocated from a single heap that is then - freed after the list itself is freed. */ - - mem_free(list); -} - -/******************************************************************** -Add the data to the start of the list. */ - -ib_list_node_t* -ib_list_add_first( -/*==============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - void* data, /* in: data */ - mem_heap_t* heap) /* in: memory heap to use */ -{ - return(ib_list_add_after(list, ib_list_get_first(list), data, heap)); -} - -/******************************************************************** -Add the data to the end of the list. */ - -ib_list_node_t* -ib_list_add_last( -/*=============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - void* data, /* in: data */ - mem_heap_t* heap) /* in: memory heap to use */ -{ - return(ib_list_add_after(list, ib_list_get_last(list), data, heap)); -} - -/******************************************************************** -Add the data after the indicated node. */ - -ib_list_node_t* -ib_list_add_after( -/*==============*/ - /* out: new list node*/ - ib_list_t* list, /* in: list */ - ib_list_node_t* prev_node, /* in: node preceding new node (can - be NULL) */ - void* data, /* in: data */ - mem_heap_t* heap) /* in: memory heap to use */ -{ - ib_list_node_t* node = mem_heap_alloc(heap, sizeof(ib_list_node_t)); - - node->data = data; - - if (!list->first) { - /* Empty list. */ - - ut_a(!prev_node); - - node->prev = NULL; - node->next = NULL; - - list->first = node; - list->last = node; - } else if (!prev_node) { - /* Start of list. */ - - node->prev = NULL; - node->next = list->first; - - list->first->prev = node; - - list->first = node; - } else { - /* Middle or end of list. */ - - node->prev = prev_node; - node->next = prev_node->next; - - prev_node->next = node; - - if (node->next) { - node->next->prev = node; - } else { - list->last = node; - } - } - - return(node); -} - -/******************************************************************** -Remove the node from the list. */ - -void -ib_list_remove( -/*===========*/ - ib_list_t* list, /* in: list */ - ib_list_node_t* node) /* in: node to remove */ -{ - if (node->prev) { - node->prev->next = node->next; - } else { - /* First item in list. */ - - ut_ad(list->first == node); - - list->first = node->next; - } - - if (node->next) { - node->next->prev = node->prev; - } else { - /* Last item in list. */ - - ut_ad(list->last == node); - - list->last = node->prev; - } -} diff --git a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c deleted file mode 100644 index b466a5f6872..00000000000 --- a/storage/innobase/ut/ut0mem.c +++ /dev/null @@ -1,548 +0,0 @@ -/************************************************************************ -Memory primitives - -(c) 1994, 1995 Innobase Oy - -Created 5/11/1994 Heikki Tuuri -*************************************************************************/ - -#include "ut0mem.h" - -#ifdef UNIV_NONINL -#include "ut0mem.ic" -#endif - -#include "mem0mem.h" -#include "os0sync.h" -#include "os0thread.h" - -/* This struct is placed first in every allocated memory block */ -typedef struct ut_mem_block_struct ut_mem_block_t; - -/* The total amount of memory currently allocated from the OS with malloc */ -ulint ut_total_allocated_memory = 0; - -struct ut_mem_block_struct{ - UT_LIST_NODE_T(ut_mem_block_t) mem_block_list; - /* mem block list node */ - ulint size; /* size of allocated memory */ - ulint magic_n; -}; - -#define UT_MEM_MAGIC_N 1601650166 - -/* List of all memory blocks allocated from the operating system -with malloc */ -UT_LIST_BASE_NODE_T(ut_mem_block_t) ut_mem_block_list; - -os_fast_mutex_t ut_list_mutex; /* this protects the list */ - -ibool ut_mem_block_list_inited = FALSE; - -ulint* ut_mem_null_ptr = NULL; - -/************************************************************************** -Initializes the mem block list at database startup. */ -static -void -ut_mem_block_list_init(void) -/*========================*/ -{ - os_fast_mutex_init(&ut_list_mutex); - UT_LIST_INIT(ut_mem_block_list); - ut_mem_block_list_inited = TRUE; -} - -/************************************************************************** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined and set_to_zero is TRUE. */ - -void* -ut_malloc_low( -/*==========*/ - /* out, own: allocated memory */ - ulint n, /* in: number of bytes to allocate */ - ibool set_to_zero, /* in: TRUE if allocated memory should be - set to zero if UNIV_SET_MEM_TO_ZERO is - defined */ - ibool assert_on_error)/* in: if TRUE, we crash mysqld if the - memory cannot be allocated */ -{ - ulint retry_count = 0; - void* ret; - - ut_ad((sizeof(ut_mem_block_t) % 8) == 0); /* check alignment ok */ - - if (!ut_mem_block_list_inited) { - ut_mem_block_list_init(); - } -retry: - os_fast_mutex_lock(&ut_list_mutex); - - ret = malloc(n + sizeof(ut_mem_block_t)); - - if (ret == NULL && retry_count < 60) { - if (retry_count == 0) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: Error: cannot allocate" - " %lu bytes of\n" - "InnoDB: memory with malloc!" - " Total allocated memory\n" - "InnoDB: by InnoDB %lu bytes." - " Operating system errno: %lu\n" - "InnoDB: Check if you should" - " increase the swap file or\n" - "InnoDB: ulimits of your operating system.\n" - "InnoDB: On FreeBSD check you" - " have compiled the OS with\n" - "InnoDB: a big enough maximum process size.\n" - "InnoDB: Note that in most 32-bit" - " computers the process\n" - "InnoDB: memory space is limited" - " to 2 GB or 4 GB.\n" - "InnoDB: We keep retrying" - " the allocation for 60 seconds...\n", - (ulong) n, (ulong) ut_total_allocated_memory, -#ifdef __WIN__ - (ulong) GetLastError() -#else - (ulong) errno -#endif - ); - } - - os_fast_mutex_unlock(&ut_list_mutex); - - /* Sleep for a second and retry the allocation; maybe this is - just a temporary shortage of memory */ - - os_thread_sleep(1000000); - - retry_count++; - - goto retry; - } - - if (ret == NULL) { - /* Flush stderr to make more probable that the error - message gets in the error file before we generate a seg - fault */ - - fflush(stderr); - - os_fast_mutex_unlock(&ut_list_mutex); - - /* Make an intentional seg fault so that we get a stack - trace */ - /* Intentional segfault on NetWare causes an abend. Avoid this - by graceful exit handling in ut_a(). */ -#if (!defined __NETWARE__) - if (assert_on_error) { - ut_print_timestamp(stderr); - - fprintf(stderr, - " InnoDB: We now intentionally" - " generate a seg fault so that\n" - "InnoDB: on Linux we get a stack trace.\n"); - - if (*ut_mem_null_ptr) ut_mem_null_ptr = 0; - } else { - return(NULL); - } -#else - ut_a(0); -#endif - } - - if (set_to_zero) { -#ifdef UNIV_SET_MEM_TO_ZERO - memset(ret, '\0', n + sizeof(ut_mem_block_t)); -#endif - } - - UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t)); - - ((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t); - ((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N; - - ut_total_allocated_memory += n + sizeof(ut_mem_block_t); - - UT_LIST_ADD_FIRST(mem_block_list, ut_mem_block_list, - ((ut_mem_block_t*)ret)); - os_fast_mutex_unlock(&ut_list_mutex); - - return((void*)((byte*)ret + sizeof(ut_mem_block_t))); -} - -/************************************************************************** -Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is -defined. */ - -void* -ut_malloc( -/*======*/ - /* out, own: allocated memory */ - ulint n) /* in: number of bytes to allocate */ -{ - return(ut_malloc_low(n, TRUE, TRUE)); -} - -/************************************************************************** -Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs -out. It cannot be used if we want to return an error message. Prints to -stderr a message if fails. */ - -ibool -ut_test_malloc( -/*===========*/ - /* out: TRUE if succeeded */ - ulint n) /* in: try to allocate this many bytes */ -{ - void* ret; - - ret = malloc(n); - - if (ret == NULL) { - ut_print_timestamp(stderr); - fprintf(stderr, - " InnoDB: Error: cannot allocate" - " %lu bytes of memory for\n" - "InnoDB: a BLOB with malloc! Total allocated memory\n" - "InnoDB: by InnoDB %lu bytes." - " Operating system errno: %d\n" - "InnoDB: Check if you should increase" - " the swap file or\n" - "InnoDB: ulimits of your operating system.\n" - "InnoDB: On FreeBSD check you have" - " compiled the OS with\n" - "InnoDB: a big enough maximum process size.\n", - (ulong) n, - (ulong) ut_total_allocated_memory, - (int) errno); - return(FALSE); - } - - free(ret); - - return(TRUE); -} - -/************************************************************************** -Frees a memory block allocated with ut_malloc. */ - -void -ut_free( -/*====*/ - void* ptr) /* in, own: memory block */ -{ - ut_mem_block_t* block; - - block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t)); - - os_fast_mutex_lock(&ut_list_mutex); - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - ut_a(ut_total_allocated_memory >= block->size); - - ut_total_allocated_memory -= block->size; - - UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); - free(block); - - os_fast_mutex_unlock(&ut_list_mutex); -} - -/************************************************************************** -Implements realloc. This is needed by /pars/lexyy.c. Otherwise, you should not -use this function because the allocation functions in mem0mem.h are the -recommended ones in InnoDB. - -man realloc in Linux, 2004: - - realloc() changes the size of the memory block pointed to - by ptr to size bytes. The contents will be unchanged to - the minimum of the old and new sizes; newly allocated mem - ory will be uninitialized. If ptr is NULL, the call is - equivalent to malloc(size); if size is equal to zero, the - call is equivalent to free(ptr). Unless ptr is NULL, it - must have been returned by an earlier call to malloc(), - calloc() or realloc(). - -RETURN VALUE - realloc() returns a pointer to the newly allocated memory, - which is suitably aligned for any kind of variable and may - be different from ptr, or NULL if the request fails. If - size was equal to 0, either NULL or a pointer suitable to - be passed to free() is returned. If realloc() fails the - original block is left untouched - it is not freed or - moved. */ - -void* -ut_realloc( -/*=======*/ - /* out, own: pointer to new mem block or NULL */ - void* ptr, /* in: pointer to old block or NULL */ - ulint size) /* in: desired size */ -{ - ut_mem_block_t* block; - ulint old_size; - ulint min_size; - void* new_ptr; - - if (ptr == NULL) { - - return(ut_malloc(size)); - } - - if (size == 0) { - ut_free(ptr); - - return(NULL); - } - - block = (ut_mem_block_t*)((byte*)ptr - sizeof(ut_mem_block_t)); - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - - old_size = block->size - sizeof(ut_mem_block_t); - - if (size < old_size) { - min_size = size; - } else { - min_size = old_size; - } - - new_ptr = ut_malloc(size); - - if (new_ptr == NULL) { - - return(NULL); - } - - /* Copy the old data from ptr */ - ut_memcpy(new_ptr, ptr, min_size); - - ut_free(ptr); - - return(new_ptr); -} - -/************************************************************************** -Frees in shutdown all allocated memory not freed yet. */ - -void -ut_free_all_mem(void) -/*=================*/ -{ - ut_mem_block_t* block; - - os_fast_mutex_free(&ut_list_mutex); - - while ((block = UT_LIST_GET_FIRST(ut_mem_block_list))) { - - ut_a(block->magic_n == UT_MEM_MAGIC_N); - ut_a(ut_total_allocated_memory >= block->size); - - ut_total_allocated_memory -= block->size; - - UT_LIST_REMOVE(mem_block_list, ut_mem_block_list, block); - free(block); - } - - if (ut_total_allocated_memory != 0) { - fprintf(stderr, - "InnoDB: Warning: after shutdown" - " total allocated memory is %lu\n", - (ulong) ut_total_allocated_memory); - } -} - -/************************************************************************** -Copies up to size - 1 characters from the NUL-terminated string src to -dst, NUL-terminating the result. Returns strlen(src), so truncation -occurred if the return value >= size. */ - -ulint -ut_strlcpy( -/*=======*/ - /* out: strlen(src) */ - char* dst, /* in: destination buffer */ - const char* src, /* in: source buffer */ - ulint size) /* in: size of destination buffer */ -{ - ulint src_size = strlen(src); - - if (size != 0) { - ulint n = ut_min(src_size, size - 1); - - memcpy(dst, src, n); - dst[n] = '\0'; - } - - return(src_size); -} - -/************************************************************************** -Like ut_strlcpy, but if src doesn't fit in dst completely, copies the last -(size - 1) bytes of src, not the first. */ - -ulint -ut_strlcpy_rev( -/*===========*/ - /* out: strlen(src) */ - char* dst, /* in: destination buffer */ - const char* src, /* in: source buffer */ - ulint size) /* in: size of destination buffer */ -{ - ulint src_size = strlen(src); - - if (size != 0) { - ulint n = ut_min(src_size, size - 1); - - memcpy(dst, src + src_size - n, n + 1); - } - - return(src_size); -} - -/************************************************************************** -Make a quoted copy of a NUL-terminated string. Leading and trailing -quotes will not be included; only embedded quotes will be escaped. -See also ut_strlenq() and ut_memcpyq(). */ - -char* -ut_strcpyq( -/*=======*/ - /* out: pointer to end of dest */ - char* dest, /* in: output buffer */ - char q, /* in: the quote character */ - const char* src) /* in: null-terminated string */ -{ - while (*src) { - if ((*dest++ = *src++) == q) { - *dest++ = q; - } - } - - return(dest); -} - -/************************************************************************** -Make a quoted copy of a fixed-length string. Leading and trailing -quotes will not be included; only embedded quotes will be escaped. -See also ut_strlenq() and ut_strcpyq(). */ - -char* -ut_memcpyq( -/*=======*/ - /* out: pointer to end of dest */ - char* dest, /* in: output buffer */ - char q, /* in: the quote character */ - const char* src, /* in: string to be quoted */ - ulint len) /* in: length of src */ -{ - const char* srcend = src + len; - - while (src < srcend) { - if ((*dest++ = *src++) == q) { - *dest++ = q; - } - } - - return(dest); -} - -/************************************************************************** -Return the number of times s2 occurs in s1. Overlapping instances of s2 -are only counted once. */ - -ulint -ut_strcount( -/*========*/ - /* out: the number of times s2 occurs in s1 */ - const char* s1, /* in: string to search in */ - const char* s2) /* in: string to search for */ -{ - ulint count = 0; - ulint len = strlen(s2); - - if (len == 0) { - - return(0); - } - - for (;;) { - s1 = strstr(s1, s2); - - if (!s1) { - - break; - } - - count++; - s1 += len; - } - - return(count); -} - -/************************************************************************** -Replace every occurrence of s1 in str with s2. Overlapping instances of s1 -are only replaced once. */ - -char * -ut_strreplace( -/*==========*/ - /* out, own: modified string, must be - freed with mem_free() */ - const char* str, /* in: string to operate on */ - const char* s1, /* in: string to replace */ - const char* s2) /* in: string to replace s1 with */ -{ - char* new_str; - char* ptr; - const char* str_end; - ulint str_len = strlen(str); - ulint s1_len = strlen(s1); - ulint s2_len = strlen(s2); - ulint count = 0; - int len_delta = (int)s2_len - (int)s1_len; - - str_end = str + str_len; - - if (len_delta <= 0) { - len_delta = 0; - } else { - count = ut_strcount(str, s1); - } - - new_str = mem_alloc(str_len + count * len_delta + 1); - ptr = new_str; - - while (str) { - const char* next = strstr(str, s1); - - if (!next) { - next = str_end; - } - - memcpy(ptr, str, next - str); - ptr += next - str; - - if (next == str_end) { - - break; - } - - memcpy(ptr, s2, s2_len); - ptr += s2_len; - - str = next + s1_len; - } - - *ptr = '\0'; - - return(new_str); -} diff --git a/storage/innobase/ut/ut0rnd.c b/storage/innobase/ut/ut0rnd.c deleted file mode 100644 index 016809e0474..00000000000 --- a/storage/innobase/ut/ut0rnd.c +++ /dev/null @@ -1,78 +0,0 @@ -/******************************************************************* -Random numbers and hashing - -(c) 1994, 1995 Innobase Oy - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0rnd.h" - -#ifdef UNIV_NONINL -#include "ut0rnd.ic" -#endif - -/* These random numbers are used in ut_find_prime */ -#define UT_RANDOM_1 1.0412321 -#define UT_RANDOM_2 1.1131347 -#define UT_RANDOM_3 1.0132677 - - -ulint ut_rnd_ulint_counter = 65654363; - -/*************************************************************** -Looks for a prime number slightly greater than the given argument. -The prime is chosen so that it is not near any power of 2. */ - -ulint -ut_find_prime( -/*==========*/ - /* out: prime */ - ulint n) /* in: positive number > 100 */ -{ - ulint pow2; - ulint i; - - n += 100; - - pow2 = 1; - while (pow2 * 2 < n) { - pow2 = 2 * pow2; - } - - if ((double)n < 1.05 * (double)pow2) { - n = (ulint) ((double)n * UT_RANDOM_1); - } - - pow2 = 2 * pow2; - - if ((double)n > 0.95 * (double)pow2) { - n = (ulint) ((double)n * UT_RANDOM_2); - } - - if (n > pow2 - 20) { - n += 30; - } - - /* Now we have n far enough from powers of 2. To make - n more random (especially, if it was not near - a power of 2), we then multiply it by a random number. */ - - n = (ulint) ((double)n * UT_RANDOM_3); - - for (;; n++) { - i = 2; - while (i * i <= n) { - if (n % i == 0) { - goto next_n; - } - i++; - } - - /* Found a prime */ - break; -next_n: ; - } - - return(n); -} diff --git a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c deleted file mode 100644 index 1ae43172894..00000000000 --- a/storage/innobase/ut/ut0ut.c +++ /dev/null @@ -1,592 +0,0 @@ -/******************************************************************* -Various utilities for Innobase. - -(c) 1994, 1995 Innobase Oy - -Created 5/11/1994 Heikki Tuuri -********************************************************************/ - -#include "ut0ut.h" - -#ifdef UNIV_NONINL -#include "ut0ut.ic" -#endif - -#include <stdarg.h> -#include <string.h> -#include <ctype.h> - -#include "ut0sort.h" -#include "trx0trx.h" -#include "ha_prototypes.h" - -ibool ut_always_false = FALSE; - -#ifdef __WIN__ -/********************************************************************* -NOTE: The Windows epoch starts from 1601/01/01 whereas the Unix -epoch starts from 1970/1/1. For selection of constant see: -http://support.microsoft.com/kb/167296/ */ -#define WIN_TO_UNIX_DELTA_USEC ((ib_longlong) 11644473600000000ULL) - - -/********************************************************************* -This is the Windows version of gettimeofday(2).*/ -static -int -ut_gettimeofday( -/*============*/ - /* out: 0 if all OK else -1 */ - struct timeval* tv, /* out: Values are relative to Unix epoch */ - void* tz) /* in: not used */ -{ - FILETIME ft; - ib_longlong tm; - - if (!tv) { - errno = EINVAL; - return(-1); - } - - GetSystemTimeAsFileTime(&ft); - - tm = (ib_longlong) ft.dwHighDateTime << 32; - tm |= ft.dwLowDateTime; - - ut_a(tm >= 0); /* If tm wraps over to negative, the quotient / 10 - does not work */ - - tm /= 10; /* Convert from 100 nsec periods to usec */ - - /* If we don't convert to the Unix epoch the value for - struct timeval::tv_sec will overflow.*/ - tm -= WIN_TO_UNIX_DELTA_USEC; - - tv->tv_sec = (long) (tm / 1000000L); - tv->tv_usec = (long) (tm % 1000000L); - - return(0); -} -#else -#define ut_gettimeofday gettimeofday -#endif - -/************************************************************ -Gets the high 32 bits in a ulint. That is makes a shift >> 32, -but since there seem to be compiler bugs in both gcc and Visual C++, -we do this by a special conversion. */ - -ulint -ut_get_high32( -/*==========*/ - /* out: a >> 32 */ - ulint a) /* in: ulint */ -{ - ib_longlong i; - - i = (ib_longlong)a; - - i = i >> 32; - - return((ulint)i); -} - -/************************************************************ -The following function returns elapsed CPU time in milliseconds. */ - -ulint -ut_clock(void) -{ - return((clock() * 1000) / CLOCKS_PER_SEC); -} - -/************************************************************** -Returns system time. We do not specify the format of the time returned: -the only way to manipulate it is to use the function ut_difftime. */ - -ib_time_t -ut_time(void) -/*=========*/ -{ - return(time(NULL)); -} - -/************************************************************** -Returns system time. -Upon successful completion, the value 0 is returned; otherwise the -value -1 is returned and the global variable errno is set to indicate the -error. */ - -int -ut_usectime( -/*========*/ - /* out: 0 on success, -1 otherwise */ - ulint* sec, /* out: seconds since the Epoch */ - ulint* ms) /* out: microseconds since the Epoch+*sec */ -{ - struct timeval tv; - int ret; - int errno_gettimeofday; - int i; - - for (i = 0; i < 10; i++) { - - ret = ut_gettimeofday(&tv, NULL); - - if (ret == -1) { - errno_gettimeofday = errno; - ut_print_timestamp(stderr); - fprintf(stderr, " InnoDB: gettimeofday(): %s\n", - strerror(errno_gettimeofday)); - os_thread_sleep(100000); /* 0.1 sec */ - errno = errno_gettimeofday; - } else { - break; - } - } - - if (ret != -1) { - *sec = (ulint) tv.tv_sec; - *ms = (ulint) tv.tv_usec; - } - - return(ret); -} - -/************************************************************** -Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms) */ - -ib_longlong -ut_usecdiff( -/*========*/ - ulint end_sec, /* in: seconds since the Epoch */ - ulint end_ms, /* in: microseconds since the Epoch+*sec1 */ - ulint start_sec, /* in: seconds since the Epoch */ - ulint start_ms) /* in: microseconds since the Epoch+*sec2 */ -{ - ib_longlong end_mics = end_sec * 1000000LL + end_ms; - ib_longlong start_mics = start_sec * 1000000LL + start_ms; - - return end_mics - start_mics; -} - -/************************************************************** -Returns the difference of two times in seconds. */ - -double -ut_difftime( -/*========*/ - /* out: time2 - time1 expressed in seconds */ - ib_time_t time2, /* in: time */ - ib_time_t time1) /* in: time */ -{ - return(difftime(time2, time1)); -} - -/************************************************************** -Prints a timestamp to a file. */ - -void -ut_print_timestamp( -/*===============*/ - FILE* file) /* in: file where to print */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - fprintf(file,"%02d%02d%02d %2d:%02d:%02d", - (int)cal_tm.wYear % 100, - (int)cal_tm.wMonth, - (int)cal_tm.wDay, - (int)cal_tm.wHour, - (int)cal_tm.wMinute, - (int)cal_tm.wSecond); -#else - struct tm cal_tm; - struct tm* cal_tm_ptr; - time_t tm; - - time(&tm); - -#ifdef HAVE_LOCALTIME_R - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - cal_tm_ptr = localtime(&tm); -#endif - fprintf(file,"%02d%02d%02d %2d:%02d:%02d", - cal_tm_ptr->tm_year % 100, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec); -#endif -} - -/************************************************************** -Sprintfs a timestamp to a buffer, 13..14 chars plus terminating NUL. */ - -void -ut_sprintf_timestamp( -/*=================*/ - char* buf) /* in: buffer where to sprintf */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - sprintf(buf, "%02d%02d%02d %2d:%02d:%02d", - (int)cal_tm.wYear % 100, - (int)cal_tm.wMonth, - (int)cal_tm.wDay, - (int)cal_tm.wHour, - (int)cal_tm.wMinute, - (int)cal_tm.wSecond); -#else - struct tm cal_tm; - struct tm* cal_tm_ptr; - time_t tm; - - time(&tm); - -#ifdef HAVE_LOCALTIME_R - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - cal_tm_ptr = localtime(&tm); -#endif - sprintf(buf, "%02d%02d%02d %2d:%02d:%02d", - cal_tm_ptr->tm_year % 100, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec); -#endif -} - -/************************************************************** -Sprintfs a timestamp to a buffer with no spaces and with ':' characters -replaced by '_'. */ - -void -ut_sprintf_timestamp_without_extra_chars( -/*=====================================*/ - char* buf) /* in: buffer where to sprintf */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d", - (int)cal_tm.wYear % 100, - (int)cal_tm.wMonth, - (int)cal_tm.wDay, - (int)cal_tm.wHour, - (int)cal_tm.wMinute, - (int)cal_tm.wSecond); -#else - struct tm cal_tm; - struct tm* cal_tm_ptr; - time_t tm; - - time(&tm); - -#ifdef HAVE_LOCALTIME_R - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - cal_tm_ptr = localtime(&tm); -#endif - sprintf(buf, "%02d%02d%02d_%2d_%02d_%02d", - cal_tm_ptr->tm_year % 100, - cal_tm_ptr->tm_mon + 1, - cal_tm_ptr->tm_mday, - cal_tm_ptr->tm_hour, - cal_tm_ptr->tm_min, - cal_tm_ptr->tm_sec); -#endif -} - -/************************************************************** -Returns current year, month, day. */ - -void -ut_get_year_month_day( -/*==================*/ - ulint* year, /* out: current year */ - ulint* month, /* out: month */ - ulint* day) /* out: day */ -{ -#ifdef __WIN__ - SYSTEMTIME cal_tm; - - GetLocalTime(&cal_tm); - - *year = (ulint)cal_tm.wYear; - *month = (ulint)cal_tm.wMonth; - *day = (ulint)cal_tm.wDay; -#else - struct tm cal_tm; - struct tm* cal_tm_ptr; - time_t tm; - - time(&tm); - -#ifdef HAVE_LOCALTIME_R - localtime_r(&tm, &cal_tm); - cal_tm_ptr = &cal_tm; -#else - cal_tm_ptr = localtime(&tm); -#endif - *year = (ulint)cal_tm_ptr->tm_year + 1900; - *month = (ulint)cal_tm_ptr->tm_mon + 1; - *day = (ulint)cal_tm_ptr->tm_mday; -#endif -} - -/***************************************************************** -Runs an idle loop on CPU. The argument gives the desired delay -in microseconds on 100 MHz Pentium + Visual C++. */ - -ulint -ut_delay( -/*=====*/ - /* out: dummy value */ - ulint delay) /* in: delay in microseconds on 100 MHz Pentium */ -{ - ulint i, j; - - j = 0; - - for (i = 0; i < delay * 50; i++) { - PAUSE_INSTRUCTION(); - j += i; - } - - if (ut_always_false) { - ut_always_false = (ibool) j; - } - - return(j); -} - -/***************************************************************** -Prints the contents of a memory buffer in hex and ascii. */ - -void -ut_print_buf( -/*=========*/ - FILE* file, /* in: file where to print */ - const void* buf, /* in: memory buffer */ - ulint len) /* in: length of the buffer */ -{ - const byte* data; - ulint i; - - UNIV_MEM_ASSERT_RW(buf, len); - - fprintf(file, " len %lu; hex ", len); - - for (data = (const byte*)buf, i = 0; i < len; i++) { - fprintf(file, "%02lx", (ulong)*data++); - } - - fputs("; asc ", file); - - data = (const byte*)buf; - - for (i = 0; i < len; i++) { - int c = (int) *data++; - putc(isprint(c) ? c : ' ', file); - } - - putc(';', file); -} - -/**************************************************************** -Sort function for ulint arrays. */ - -void -ut_ulint_sort(ulint* arr, ulint* aux_arr, ulint low, ulint high) -/*============================================================*/ -{ - UT_SORT_FUNCTION_BODY(ut_ulint_sort, arr, aux_arr, low, high, - ut_ulint_cmp); -} - -/***************************************************************** -Calculates fast the number rounded up to the nearest power of 2. */ - -ulint -ut_2_power_up( -/*==========*/ - /* out: first power of 2 which is >= n */ - ulint n) /* in: number != 0 */ -{ - ulint res; - - res = 1; - - ut_ad(n > 0); - - while (res < n) { - res = res * 2; - } - - return(res); -} - -/************************************************************************** -Outputs a NUL-terminated file name, quoted with apostrophes. */ - -void -ut_print_filename( -/*==============*/ - FILE* f, /* in: output stream */ - const char* name) /* in: name to print */ -{ - putc('\'', f); - for (;;) { - int c = *name++; - switch (c) { - case 0: - goto done; - case '\'': - putc(c, f); - /* fall through */ - default: - putc(c, f); - } - } -done: - putc('\'', f); -} - -/************************************************************************** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ - -void -ut_print_name( -/*==========*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name) /* in: name to print */ -{ - ut_print_namel(f, trx, table_id, name, strlen(name)); -} - -/************************************************************************** -Outputs a fixed-length string, quoted as an SQL identifier. -If the string contains a slash '/', the string will be -output as two identifiers separated by a period (.), -as in SQL database_name.identifier. */ - -void -ut_print_namel( -/*===========*/ - FILE* f, /* in: output stream */ - trx_t* trx, /* in: transaction (NULL=no quotes) */ - ibool table_id,/* in: TRUE=print a table name, - FALSE=print other identifier */ - const char* name, /* in: name to print */ - ulint namelen)/* in: length of name */ -{ -#ifdef UNIV_HOTBACKUP - fwrite(name, 1, namelen, f); -#else - if (table_id) { - char* slash = memchr(name, '/', namelen); - if (!slash) { - - goto no_db_name; - } - - /* Print the database name and table name separately. */ - innobase_print_identifier(f, trx, TRUE, name, slash - name); - putc('.', f); - innobase_print_identifier(f, trx, TRUE, slash + 1, - namelen - (slash - name) - 1); - } else { -no_db_name: - innobase_print_identifier(f, trx, table_id, name, namelen); - } -#endif -} - -/************************************************************************** -Catenate files. */ - -void -ut_copy_file( -/*=========*/ - FILE* dest, /* in: output file */ - FILE* src) /* in: input file to be appended to output */ -{ - long len = ftell(src); - char buf[4096]; - - rewind(src); - do { - size_t maxs = len < (long) sizeof buf - ? (size_t) len - : sizeof buf; - size_t size = fread(buf, 1, maxs, src); - fwrite(buf, 1, size, dest); - len -= (long) size; - if (size < maxs) { - break; - } - } while (len > 0); -} - -/************************************************************************** -snprintf(). */ - -#ifdef __WIN__ -#include <stdarg.h> -int -ut_snprintf( - /* out: number of characters that would - have been printed if the size were - unlimited, not including the terminating - '\0'. */ - char* str, /* out: string */ - size_t size, /* in: str size */ - const char* fmt, /* in: format */ - ...) /* in: format values */ -{ - int res; - va_list ap1; - va_list ap2; - - va_start(ap1, fmt); - va_start(ap2, fmt); - - res = _vscprintf(fmt, ap1); - ut_a(res != -1); - - if (size > 0) { - _vsnprintf(str, size, fmt, ap2); - - if ((size_t) res >= size) { - str[size - 1] = '\0'; - } - } - - va_end(ap1); - va_end(ap2); - - return(res); -} -#endif /* __WIN__ */ diff --git a/storage/innobase/ut/ut0vec.c b/storage/innobase/ut/ut0vec.c deleted file mode 100644 index e0d3e84d4a2..00000000000 --- a/storage/innobase/ut/ut0vec.c +++ /dev/null @@ -1,54 +0,0 @@ -#include "ut0vec.h" -#ifdef UNIV_NONINL -#include "ut0vec.ic" -#endif -#include <string.h> - -/******************************************************************** -Create a new vector with the given initial size. */ - -ib_vector_t* -ib_vector_create( -/*=============*/ - /* out: vector */ - mem_heap_t* heap, /* in: heap */ - ulint size) /* in: initial size */ -{ - ib_vector_t* vec; - - ut_a(size > 0); - - vec = mem_heap_alloc(heap, sizeof(*vec)); - - vec->heap = heap; - vec->data = mem_heap_alloc(heap, sizeof(void*) * size); - vec->used = 0; - vec->total = size; - - return(vec); -} - -/******************************************************************** -Push a new element to the vector, increasing its size if necessary. */ - -void -ib_vector_push( -/*===========*/ - ib_vector_t* vec, /* in: vector */ - void* elem) /* in: data element */ -{ - if (vec->used >= vec->total) { - void** new_data; - ulint new_total = vec->total * 2; - - new_data = mem_heap_alloc(vec->heap, - sizeof(void*) * new_total); - memcpy(new_data, vec->data, sizeof(void*) * vec->total); - - vec->data = new_data; - vec->total = new_total; - } - - vec->data[vec->used] = elem; - vec->used++; -} diff --git a/storage/innobase/ut/ut0wqueue.c b/storage/innobase/ut/ut0wqueue.c deleted file mode 100644 index 7e090e89a4f..00000000000 --- a/storage/innobase/ut/ut0wqueue.c +++ /dev/null @@ -1,92 +0,0 @@ -#include "ut0wqueue.h" - -/******************************************************************** -Create a new work queue. */ - -ib_wqueue_t* -ib_wqueue_create(void) -/*===================*/ - /* out: work queue */ -{ - ib_wqueue_t* wq = mem_alloc(sizeof(ib_wqueue_t)); - - mutex_create(&wq->mutex, SYNC_WORK_QUEUE); - - wq->items = ib_list_create(); - wq->event = os_event_create(NULL); - - return(wq); -} - -/******************************************************************** -Free a work queue. */ - -void -ib_wqueue_free( -/*===========*/ - ib_wqueue_t* wq) /* in: work queue */ -{ - ut_a(!ib_list_get_first(wq->items)); - - mutex_free(&wq->mutex); - ib_list_free(wq->items); - os_event_free(wq->event); - - mem_free(wq); -} - -/******************************************************************** -Add a work item to the queue. */ - -void -ib_wqueue_add( -/*==========*/ - ib_wqueue_t* wq, /* in: work queue */ - void* item, /* in: work item */ - mem_heap_t* heap) /* in: memory heap to use for allocating the - list node */ -{ - mutex_enter(&wq->mutex); - - ib_list_add_last(wq->items, item, heap); - os_event_set(wq->event); - - mutex_exit(&wq->mutex); -} - -/******************************************************************** -Wait for a work item to appear in the queue. */ - -void* -ib_wqueue_wait( - /* out: work item */ - ib_wqueue_t* wq) /* in: work queue */ -{ - ib_list_node_t* node; - - for (;;) { - os_event_wait(wq->event); - - mutex_enter(&wq->mutex); - - node = ib_list_get_first(wq->items); - - if (node) { - ib_list_remove(wq->items, node); - - if (!ib_list_get_first(wq->items)) { - /* We must reset the event when the list - gets emptied. */ - os_event_reset(wq->event); - } - - break; - } - - mutex_exit(&wq->mutex); - } - - mutex_exit(&wq->mutex); - - return(node->data); -} diff --git a/storage/innobase/win_atomics32_test.c b/storage/innobase/win_atomics32_test.c deleted file mode 100644 index fcb88d6b54e..00000000000 --- a/storage/innobase/win_atomics32_test.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (C) 2009 Sun Microsystems AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - -#include <windows.h> - -int main() -{ - volatile long var32 = 0; - long add32 = 1; - long old32 = 0; - long exch32 = 1; - long ret_value; - - ret_value = InterlockedExchangeAdd(&var32, add32); - ret_value = InterlockedCompareExchange(&var32, exch32, old32); - MemoryBarrier(); - return EXIT_SUCCESS; -} diff --git a/storage/innobase/win_atomics64_test.c b/storage/innobase/win_atomics64_test.c deleted file mode 100644 index 123cb6d98cf..00000000000 --- a/storage/innobase/win_atomics64_test.c +++ /dev/null @@ -1,30 +0,0 @@ -/* Copyright (C) 2009 Sun Microsystems AB - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; version 2 of the License. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ - -#include <windows.h> - -int main() -{ - volatile long long var64 = 0; - long long add64 = 1; - long long old64 = 0; - long long exch64 = 1; - long long ret_value; - - ret_value = InterlockedExchangeAdd64(&var64, add64); - ret_value = InterlockedCompareExchange64(&var64, exch64, old64); - MemoryBarrier(); - return EXIT_SUCCESS; -} |