diff options
Diffstat (limited to 'innobase/include')
49 files changed, 2177 insertions, 840 deletions
diff --git a/innobase/include/Makefile.am b/innobase/include/Makefile.am index 102d25566da..eb1e3b72877 100644 --- a/innobase/include/Makefile.am +++ b/innobase/include/Makefile.am @@ -49,7 +49,7 @@ noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \ thr0loc.h thr0loc.ic trx0purge.h trx0purge.ic trx0rec.h \ trx0rec.ic trx0roll.h trx0roll.ic trx0rseg.h trx0rseg.ic \ trx0sys.h trx0sys.ic trx0trx.h trx0trx.ic trx0types.h \ - trx0undo.h trx0undo.ic univ.i \ + trx0undo.h trx0undo.ic trx0xa.h univ.i \ usr0sess.h usr0sess.ic usr0types.h ut0byte.h ut0byte.ic \ ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \ ut0sort.h ut0ut.h ut0ut.ic diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h index 8606fcd2a5c..0b19e64d4e0 100644 --- a/innobase/include/btr0btr.h +++ b/innobase/include/btr0btr.h @@ -155,7 +155,8 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec); /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /**************************************************************** Creates the root node for a new index tree. */ @@ -167,6 +168,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mini-transaction handle */ /**************************************************************** Frees a B-tree except the root page, which MUST be freed after this @@ -210,8 +212,9 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Decides if the page should be split at the convergence point of inserts converging to left. */ @@ -273,6 +276,7 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes on the upper level the node pointer to a page. */ @@ -332,6 +336,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** @@ -340,11 +345,12 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /****************************************************************** Gets the number of pages in a B-tree. */ diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic index b0aa0756307..1d1f97d3668 100644 --- a/innobase/include/btr0btr.ic +++ b/innobase/include/btr0btr.ic @@ -183,17 +183,18 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec) /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; byte* field; ulint len; ulint page_no; - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index f1334656d53..0a8d8ceaeb7 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -34,7 +34,7 @@ page_cur_t* btr_cur_get_page_cur( /*=================*/ /* out: pointer to page cursor component */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the record pointer of a tree cursor. */ UNIV_INLINE @@ -42,14 +42,14 @@ rec_t* btr_cur_get_rec( /*============*/ /* out: pointer to record */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Invalidates a tree cursor by setting record pointer to NULL. */ UNIV_INLINE void btr_cur_invalidate( /*===============*/ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the page of a tree cursor. */ UNIV_INLINE @@ -57,7 +57,7 @@ page_t* btr_cur_get_page( /*=============*/ /* out: pointer to page */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the tree of a cursor. */ UNIV_INLINE @@ -65,7 +65,7 @@ dict_tree_t* btr_cur_get_tree( /*=============*/ /* out: tree */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Positions a tree cursor at a given record. */ UNIV_INLINE @@ -283,8 +283,9 @@ only used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Tries to compress a page of the tree on the leaf level. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid @@ -361,10 +362,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index); /* in: index corresponding to page */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a clustered index record. */ @@ -372,10 +374,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a secondary index record. */ @@ -383,10 +386,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /*********************************************************************** Estimates the number of rows in a given index range. */ @@ -417,9 +421,10 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr */ /*********************************************************************** The complement of the previous function: in an update entry may inherit some externally stored fields from a record. We must mark them as inherited @@ -456,6 +461,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr); /* in: mtr containing the latch to @@ -496,6 +502,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -510,6 +517,7 @@ btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap); /* in: mem heap */ @@ -540,10 +548,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated - to have place for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update); /* in: update vector */ + ulint* ext_vect,/* in: array of ulints, must be preallocated + to have space for all fields in rec */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector or NULL */ /*######################################################################*/ diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic index a3a04b60c45..dcad3e9e14d 100644 --- a/innobase/include/btr0cur.ic +++ b/innobase/include/btr0cur.ic @@ -134,17 +134,15 @@ btr_cur_can_delete_without_compress( /* out: TRUE if can be deleted without recommended compression */ btr_cur_t* cursor, /* in: btr cursor */ + ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/ mtr_t* mtr) /* in: mtr */ { - ulint rec_size; page_t* page; ut_ad(mtr_memo_contains(mtr, buf_block_align( btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); - rec_size = rec_get_size(btr_cur_get_rec(cursor)); - page = btr_cur_get_page(cursor); if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 81f19af4d40..6384222be51 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -462,6 +462,7 @@ struct btr_pcur_struct{ contains an initial segment of the latest record cursor was positioned either on, before, or after */ + ulint old_n_fields; /* number of fields in old_rec */ ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on whether cursor was on, before, or after the diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index ce4140ecf92..78e88a24083 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -77,8 +77,10 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page); /* in: index page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page */ + dict_index_t* index); /* in: record descriptor */ /************************************************************************ Drops a page hash index. */ @@ -129,8 +131,8 @@ Validates the search system. */ ibool btr_search_validate(void); -/*=====================*/ - +/*======================*/ + /* out: TRUE if ok */ /* Search info directions */ #define BTR_SEA_NO_DIRECTION 1 diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h index 53599d03c73..5ee323f1b1e 100644 --- a/innobase/include/buf0buf.h +++ b/innobase/include/buf0buf.h @@ -52,11 +52,15 @@ Created 11/5/1995 Heikki Tuuri /* Modes for buf_page_get_known_nowait */ #define BUF_MAKE_YOUNG 51 #define BUF_KEEP_OLD 52 +/* Magic value to use instead of checksums when they are disabled */ +#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL extern buf_pool_t* buf_pool; /* The buffer pool of the database */ extern ibool buf_debug_prints;/* If this is set TRUE, the program prints info whenever read or flush occurs */ +extern ulint srv_buf_pool_write_requests; /* variable to count write request + issued */ /************************************************************************ Creates the buffer pool. */ @@ -496,6 +500,12 @@ void buf_print(void); /*============*/ /************************************************************************* +Returns the number of latched pages in the buffer pool. */ + +ulint +buf_get_latched_pages_number(void); +/*==============================*/ +/************************************************************************* Returns the number of pending buf pool ios. */ ulint @@ -731,6 +741,8 @@ struct buf_block_struct{ buffer pool which are index pages, but this flag is not set because we do not keep track of all pages */ + dict_index_t* index; /* index for which the adaptive + hash index has been created */ /* 2. Page flushing fields */ UT_LIST_NODE_T(buf_block_t) flush_list; diff --git a/innobase/include/buf0flu.ic b/innobase/include/buf0flu.ic index d6dbdcc0865..9a8a021e029 100644 --- a/innobase/include/buf0flu.ic +++ b/innobase/include/buf0flu.ic @@ -61,6 +61,8 @@ buf_flush_note_modification( ut_ad(ut_dulint_cmp(block->oldest_modification, mtr->start_lsn) <= 0); } + + ++srv_buf_pool_write_requests; } /************************************************************************ diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index 946b646ffbf..0b92ffbe7f1 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -8,6 +8,17 @@ Created 1/16/1996 Heikki Tuuri #include "mach0data.h" +/********************************************************************** +Determines whether the given character set is of variable length. + +NOTE: the prototype of this function is copied from ha_innodb.cc! If you change +this function, you MUST change also the prototype here! */ +extern +ibool +innobase_is_mb_cset( +/*================*/ + ulint cset); /* in: MySQL charset-collation code */ + /************************************************************************* Sets a data type structure. */ UNIV_INLINE @@ -149,8 +160,10 @@ dtype_new_store_for_order_and_null_size( bytes where we store the info */ dtype_t* type) /* in: type struct */ { - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { @@ -166,10 +179,12 @@ dtype_new_store_for_order_and_null_size( mach_write_to_2(buf + 2, type->len & 0xFFFFUL); + ut_ad(dtype_get_charset_coll(type->prtype) < 256); mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); - /* Note that the second last byte is left unused, because the - charset-collation code is always < 256 */ + if (type->prtype & DATA_NOT_NULL) { + buf[4] |= 128; + } } /************************************************************************** @@ -211,20 +226,26 @@ dtype_new_read_for_order_and_null_size( { ulint charset_coll; - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif type->mtype = buf[0] & 63; type->prtype = buf[1]; if (buf[0] & 128) { - type->prtype = type->prtype | DATA_BINARY_TYPE; + type->prtype |= DATA_BINARY_TYPE; + } + + if (buf[4] & 128) { + type->prtype |= DATA_NOT_NULL; } type->len = mach_read_from_2(buf + 2); mach_read_from_2(buf + 4); - charset_coll = mach_read_from_2(buf + 4); + charset_coll = mach_read_from_2(buf + 4) & 0x7fff; if (dtype_is_string_type(type->mtype)) { ut_a(charset_coll < 256); @@ -257,23 +278,39 @@ dtype_get_fixed_size( mtype = dtype_get_mtype(type); switch (mtype) { + case DATA_SYS: +#ifdef UNIV_DEBUG + switch (type->prtype & DATA_MYSQL_TYPE_MASK) { + default: + ut_ad(0); + return(0); + case DATA_ROW_ID: + ut_ad(type->len == DATA_ROW_ID_LEN); + break; + case DATA_TRX_ID: + ut_ad(type->len == DATA_TRX_ID_LEN); + break; + case DATA_ROLL_PTR: + ut_ad(type->len == DATA_ROLL_PTR_LEN); + break; + case DATA_MIX_ID: + ut_ad(type->len == DATA_MIX_ID_LEN); + break; + } +#endif /* UNIV_DEBUG */ case DATA_CHAR: case DATA_FIXBINARY: case DATA_INT: case DATA_FLOAT: case DATA_DOUBLE: case DATA_MYSQL: - return(dtype_get_len(type)); - - case DATA_SYS: if (type->prtype == DATA_ROW_ID) { - return(DATA_ROW_ID_LEN); - } else if (type->prtype == DATA_TRX_ID) { - return(DATA_TRX_ID_LEN); - } else if (type->prtype == DATA_ROLL_PTR) { - return(DATA_ROLL_PTR_LEN); - } else { - return(0); + if ((type->prtype & DATA_BINARY_TYPE) + || !innobase_is_mb_cset( + dtype_get_charset_coll( + type->prtype))) { + return(dtype_get_len(type)); } + /* fall through for variable-length charsets */ case DATA_VARCHAR: case DATA_BINARY: case DATA_DECIMAL: diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index ca632691450..eaf5b06b2a9 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -639,6 +639,16 @@ dict_index_get_sys_col_pos( dict_index_t* index, /* in: index */ ulint type); /* in: DATA_ROW_ID, ... */ /*********************************************************************** +Adds a column to index. */ + +void +dict_index_add_col( +/*===============*/ + dict_index_t* index, /* in: index */ + dict_col_t* col, /* in: column */ + ulint order, /* in: order criterion */ + ulint prefix_len); /* in: column prefix length */ +/*********************************************************************** Copies types of fields contained in index to tuple. */ void @@ -647,18 +657,6 @@ dict_index_copy_types( dtuple_t* tuple, /* in: data tuple */ dict_index_t* index, /* in: index */ ulint n_fields); /* in: number of field types to copy */ -/************************************************************************ -Gets the value of a system column in a clustered index record. The clustered -index must contain the system column: if the index is unique, row id is -not contained there! */ -UNIV_INLINE -dulint -dict_index_rec_get_sys_col( -/*=======================*/ - /* out: system column value */ - dict_index_t* index, /* in: clustered index describing the record */ - ulint type, /* in: column type: DATA_ROLL_PTR, ... */ - rec_t* rec); /* in: record */ /************************************************************************* Gets the index tree where the index is stored. */ UNIV_INLINE @@ -720,7 +718,7 @@ dict_tree_find_index_for_tuple( dtuple_t* tuple); /* in: tuple for which to find index */ /*********************************************************************** Checks if a table which is a mixed cluster member owns a record. */ -UNIV_INLINE + ibool dict_is_mixed_table_rec( /*====================*/ @@ -770,6 +768,7 @@ dict_tree_copy_rec_order_prefix( /* out: pointer to the prefix record */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to copy prefix */ + ulint* n_fields,/* out: number of fields copied */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size);/* in/out: buffer size */ @@ -782,6 +781,7 @@ dict_tree_build_data_tuple( /* out, own: data tuple */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ mem_heap_t* heap); /* in: memory heap where tuple created */ /************************************************************************* Gets the space id of the root of the index tree. */ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 85e4aaf1a05..928a693f860 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -9,7 +9,6 @@ Created 1/8/1996 Heikki Tuuri #include "dict0load.h" #include "trx0undo.h" #include "trx0sys.h" -#include "rem0rec.h" /************************************************************************* Gets the column data type. */ @@ -168,7 +167,7 @@ dict_table_get_sys_col( col = dict_table_get_nth_col(table, table->n_cols - DATA_N_SYS_COLS + sys); ut_ad(col->type.mtype == DATA_SYS); - ut_ad(col->type.prtype == sys); + ut_ad(col->type.prtype == (sys | DATA_NOT_NULL)); return(col); } @@ -312,49 +311,6 @@ dict_index_get_sys_col_pos( dict_table_get_sys_col_no(index->table, type))); } -/************************************************************************ -Gets the value of a system column in a clustered index record. The clustered -index must contain the system column: if the index is unique, row id is -not contained there! */ -UNIV_INLINE -dulint -dict_index_rec_get_sys_col( -/*=======================*/ - /* out: system column value */ - dict_index_t* index, /* in: clustered index describing the record */ - ulint type, /* in: column type: DATA_ROLL_PTR, ... */ - rec_t* rec) /* in: record */ -{ - ulint pos; - byte* field; - ulint len; - - ut_ad(index); - ut_ad(index->type & DICT_CLUSTERED); - - pos = dict_index_get_sys_col_pos(index, type); - - ut_ad(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, pos, &len); - - if (type == DATA_ROLL_PTR) { - ut_ad(len == 7); - - return(trx_read_roll_ptr(field)); - } else if (type == DATA_TRX_ID) { - - return(trx_read_trx_id(field)); - } else if (type == DATA_MIX_ID) { - - return(mach_dulint_read_compressed(field)); - } else { - ut_a(type == DATA_ROW_ID); - - return(mach_read_from_6(field)); - } -} - /************************************************************************* Gets the index tree where the index is stored. */ UNIV_INLINE @@ -662,28 +618,3 @@ dict_table_get_index( return(index); } - -/*********************************************************************** -Checks if a table which is a mixed cluster member owns a record. */ -UNIV_INLINE -ibool -dict_is_mixed_table_rec( -/*====================*/ - /* out: TRUE if the record belongs to this - table */ - dict_table_t* table, /* in: table in a mixed cluster */ - rec_t* rec) /* in: user record in the clustered index */ -{ - byte* mix_id_field; - ulint len; - - mix_id_field = rec_get_nth_field(rec, table->mix_len, &len); - - if ((len != table->mix_id_len) - || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) { - - return(FALSE); - } - - return(TRUE); -} diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 1e496a25477..670b3445a55 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -54,7 +54,8 @@ dict_mem_table_create( of the table is placed; this parameter is ignored if the table is made a member of a cluster */ - ulint n_cols); /* in: number of columns */ + ulint n_cols, /* in: number of columns */ + ibool comp); /* in: TRUE=compact page format */ /************************************************************************** Creates a cluster memory object. */ @@ -171,6 +172,13 @@ struct dict_field_struct{ DICT_MAX_COL_PREFIX_LEN; NOTE that in the UTF-8 charset, MySQL sets this to 3 * the prefix len in UTF-8 chars */ + ulint fixed_len; /* 0 or the fixed length of the + column if smaller than + DICT_MAX_COL_PREFIX_LEN */ + ulint fixed_offs; /* offset to the field, or + ULINT_UNDEFINED if it is not fixed + within the record (due to preceding + variable-length fields) */ }; /* Data structure for an index tree */ @@ -225,6 +233,7 @@ struct dict_index_struct{ ulint n_def; /* number of fields defined so far */ ulint n_fields;/* number of fields in the index */ dict_field_t* fields; /* array of field descriptions */ + ulint n_nullable;/* number of nullable fields */ UT_LIST_NODE_T(dict_index_t) indexes;/* list of indexes of the table */ dict_tree_t* tree; /* index tree struct */ @@ -320,6 +329,7 @@ struct dict_table_struct{ ibool tablespace_discarded;/* this flag is set TRUE when the user calls DISCARD TABLESPACE on this table, and reset to FALSE in IMPORT TABLESPACE */ + ibool comp; /* flag: TRUE=compact page format */ hash_node_t name_hash; /* hash chain node */ hash_node_t id_hash; /* hash chain node */ ulint n_def; /* number of columns defined so far */ diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h index c1a127aadca..aa1ec5c25a5 100644 --- a/innobase/include/fil0fil.h +++ b/innobase/include/fil0fil.h @@ -89,6 +89,8 @@ extern fil_addr_t fil_addr_null; #define FIL_TABLESPACE 501 #define FIL_LOG 502 +extern ulint fil_n_log_flushes; + extern ulint fil_n_pending_log_flushes; extern ulint fil_n_pending_tablespace_flushes; diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index 1fd7492d517..710c945375c 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -47,7 +47,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: secondary index */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Checks if some transaction has an implicit x-lock on a record in a clustered index. */ @@ -58,7 +59,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /***************************************************************** Resets the lock bits for a single record. Releases transactions waiting for lock requests here. */ @@ -275,6 +277,7 @@ lock_clust_rec_modify_check_and_lock( does nothing */ rec_t* rec, /* in: record which should be modified */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr); /* in: query thread */ /************************************************************************* Checks if locks of other transactions prevent an immediate modify @@ -308,6 +311,7 @@ lock_sec_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -333,6 +337,34 @@ lock_clust_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + ulint mode, /* in: mode of the lock which the read cursor + should set on records: LOCK_S or LOCK_X; the + latter is possible in SELECT FOR UPDATE */ + ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or + LOCK_REC_NOT_GAP */ + que_thr_t* thr); /* in: query thread */ +/************************************************************************* +Checks if locks of other transactions prevent an immediate read, or passing +over by a read cursor, of a clustered index record. If they do, first tests +if the query thread should anyway be suspended for some reason; if not, then +puts the transaction and the query thread to the lock wait state and inserts a +waiting request for a record lock to the lock queue. Sets the requested mode +lock on the record. This is an alternative version of +lock_clust_rec_read_check_and_lock() that does not require the parameter +"offsets". */ + +ulint +lock_clust_rec_read_check_and_lock_alt( +/*===================================*/ + /* out: DB_SUCCESS, DB_LOCK_WAIT, + DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */ + ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set, + does nothing */ + rec_t* rec, /* in: user record or page supremum record + which should be read or passed over by a read + cursor */ + dict_index_t* index, /* in: clustered index */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -350,6 +382,7 @@ lock_clust_rec_cons_read_sees( rec_t* rec, /* in: user record which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ read_view_t* view); /* in: consistent read view */ /************************************************************************* Checks that a non-clustered index record is seen in a consistent read. */ @@ -499,6 +532,7 @@ lock_check_trx_id_sanity( dulint trx_id, /* in: trx id */ rec_t* rec, /* in: user record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ ibool has_kernel_mutex);/* in: TRUE if the caller owns the kernel mutex */ /************************************************************************* @@ -509,7 +543,8 @@ lock_rec_queue_validate( /*====================*/ /* out: TRUE if ok */ rec_t* rec, /* in: record to look at */ - dict_index_t* index); /* in: index, or NULL if not known */ + dict_index_t* index, /* in: index, or NULL if not known */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Prints info of a table lock. */ @@ -577,6 +612,8 @@ extern lock_sys_t* lock_sys; #define LOCK_TABLE 16 /* these type values should be so high that */ #define LOCK_REC 32 /* they can be ORed to the lock mode */ #define LOCK_TABLE_EXP 80 /* explicit table lock (80 = 16 + 64) */ +#define LOCK_TABLE_TRANSACTIONAL 144 + /* transactional table lock (144 = 16 + 128)*/ #define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the type_mode field in a lock */ /* Waiting lock flag */ diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic index fabc9256401..c7a71bb45d8 100644 --- a/innobase/include/lock0lock.ic +++ b/innobase/include/lock0lock.ic @@ -60,7 +60,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { dulint trx_id; @@ -70,7 +71,7 @@ lock_clust_rec_some_has_impl( ut_ad(index->type & DICT_CLUSTERED); ut_ad(page_rec_is_user_rec(rec)); - trx_id = row_get_rec_trx_id(rec, index); + trx_id = row_get_rec_trx_id(rec, index, offsets); if (trx_is_active(trx_id)) { /* The modifying or inserting transaction is active */ diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h index 9c9c6f696e8..c0636ea1e1e 100644 --- a/innobase/include/mtr0log.h +++ b/innobase/include/mtr0log.h @@ -11,6 +11,7 @@ Created 12/7/1995 Heikki Tuuri #include "univ.i" #include "mtr0mtr.h" +#include "dict0types.h" /************************************************************ Writes 1 - 4 bytes to a file page buffered in the buffer pool. @@ -173,6 +174,38 @@ mlog_parse_string( byte* page); /* in: page where to apply the log record, or NULL */ +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. Reserves space +for further log entries. The log entry must be closed with +mtr_close(). */ + +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size); /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ + +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index); /* out, own: dummy index */ + /* Insert, update, and maybe other functions may use this value to define an extra mlog buffer size for variable size data */ #define MLOG_BUF_MARGIN 256 diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h index e8c68a91dad..071279d5259 100644 --- a/innobase/include/mtr0mtr.h +++ b/innobase/include/mtr0mtr.h @@ -102,7 +102,31 @@ flag value must give the length also! */ file rename */ #define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd file deletion */ -#define MLOG_BIGGEST_TYPE ((byte)35) /* biggest value (used in +#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record + as the predefined minimum + record */ +#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact + index page */ +#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */ +#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) + /* mark compact clustered index + record deleted */ +#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index + record deleted */ +#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record, + preserves record field sizes */ +#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record + from a page */ +#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list + end on index page */ +#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list + start on index page */ +#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) + /* copy compact record list end + to a new created index page */ +#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ + +#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in asserts) */ /******************************************************************* diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h index d1439faf29f..599e78bab48 100644 --- a/innobase/include/os0file.h +++ b/innobase/include/os0file.h @@ -24,6 +24,9 @@ extern ibool os_aio_print_debug; extern ulint os_file_n_pending_preads; extern ulint os_file_n_pending_pwrites; +extern ulint os_n_pending_reads; +extern ulint os_n_pending_writes; + #ifdef __WIN__ /* We define always WIN_ASYNC_IO, and check at run-time whether diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h index d0d3cf82e38..b0b72e18675 100644 --- a/innobase/include/os0proc.h +++ b/innobase/include/os0proc.h @@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri #include "univ.i" +#ifdef UNIV_LINUX +#include <sys/ipc.h> +#include <sys/shm.h> +#endif + typedef void* os_process_t; typedef unsigned long int os_process_id_t; @@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB pages. */ #define OS_AWE_X86_PAGE_SIZE 4096 +extern ibool os_use_large_pages; +/* Large page size. This may be a boot-time option on some platforms */ +extern ulint os_large_page_size; + /******************************************************************** Windows AWE support. Tries to enable the "lock pages in memory" privilege for the current process so that the current process can allocate memory-locked @@ -103,6 +112,25 @@ os_mem_alloc_nocache( /* out: allocated memory */ ulint n); /* in: number of bytes */ /******************************************************************** +Allocates large pages memory. */ + +void* +os_mem_alloc_large( +/*=================*/ + /* out: allocated memory */ + ulint n, /* in: number of bytes */ + ibool set_to_zero, /* in: TRUE if allocated memory should be set + to zero if UNIV_SET_MEM_TO_ZERO is defined */ + ibool assert_on_error); /* in: if TRUE, we crash mysqld if the memory + cannot be allocated */ +/******************************************************************** +Frees large pages memory. */ + +void +os_mem_free_large( +/*=================*/ +void *ptr); /* in: number of bytes */ +/******************************************************************** Sets the priority boost for threads released from waiting within the current process. */ diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h index c85669ed4df..a693931968e 100644 --- a/innobase/include/page0cur.h +++ b/innobase/include/page0cur.h @@ -128,7 +128,8 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -142,6 +143,7 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -155,9 +157,9 @@ page_cur_insert_rec_low( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - ulint data_size,/* in: data size of tuple */ - rec_t* rec, /* in: pointer to a physical record or NULL */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + dict_index_t* index, /* in: record descriptor */ + rec_t* rec, /* in: pointer to a physical record or NULL */ mtr_t* mtr); /* in: mini-transaction handle */ /***************************************************************** Copies records from page to a newly created page, from a given record onward, @@ -166,10 +168,11 @@ including that record. Infimum and supremum records are not copied. */ void page_copy_rec_list_end_to_created_page( /*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /*************************************************************** Deletes a record at the page cursor. The cursor is moved to the next record after the deleted one. */ @@ -178,6 +181,7 @@ void page_cur_delete_rec( /*================*/ page_cur_t* cursor, /* in: a page cursor */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /******************************************************************** Searches the right position for a page cursor. */ @@ -187,6 +191,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -198,6 +203,7 @@ void page_cur_search_with_match( /*=======================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -229,34 +235,37 @@ Parses a log record of a record insert on a page. */ byte* page_cur_parse_insert_rec( /*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /************************************************************** Parses a log record of copying a record list end to a new created page. */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses log record of a record delete on a page. */ byte* page_cur_parse_delete_rec( /*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /* Index page cursor */ diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic index 39f8ab11513..03010fbd766 100644 --- a/innobase/include/page0cur.ic +++ b/innobase/include/page0cur.ic @@ -143,7 +143,7 @@ UNIV_INLINE void page_cur_move_to_prev( /*==================*/ - page_cur_t* cur) /* in: cursor; must not before first */ + page_cur_t* cur) /* in: page cursor, not before first */ { ut_ad(!page_cur_is_before_first(cur)); @@ -158,6 +158,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -170,7 +171,7 @@ page_cur_search( ut_ad(dtuple_check_typed(tuple)); - page_cur_search_with_match(page, tuple, mode, + page_cur_search_with_match(page, index, tuple, mode, &up_matched_fields, &up_matched_bytes, &low_matched_fields, @@ -190,16 +191,11 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - ulint data_size; - - ut_ad(dtuple_check_typed(tuple)); - - data_size = dtuple_get_data_size(tuple); - - return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr)); + return(page_cur_insert_rec_low(cursor, tuple, index, NULL, mtr)); } /*************************************************************** @@ -214,8 +210,9 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr)); + return(page_cur_insert_rec_low(cursor, NULL, index, rec, mtr)); } diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index 969313614e3..d3ef8214eb6 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -37,7 +37,8 @@ typedef byte page_header_t; /*-----------------------------*/ #define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ #define PAGE_HEAP_TOP 2 /* pointer to record heap top */ -#define PAGE_N_HEAP 4 /* number of records in the heap */ +#define PAGE_N_HEAP 4 /* number of records in the heap, + bit 15=flag: new-style compact page format */ #define PAGE_FREE 6 /* pointer to start of page free record list */ #define PAGE_GARBAGE 8 /* number of bytes in deleted records */ #define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or @@ -79,15 +80,24 @@ typedef byte page_header_t; #define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) /* start of data on the page */ -#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES) - /* offset of the page infimum record on the - page */ -#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8) - /* offset of the page supremum record on the - page */ -#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9) +#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) + /* offset of the page infimum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) + /* offset of the page supremum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) /* offset of the page supremum record end on - the page */ + an old-style page */ +#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) + /* offset of the page infimum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) + /* offset of the page supremum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) + /* offset of the page supremum record end on + a new-style compact page */ /*-----------------------------*/ /* Directions of cursor movement */ @@ -233,6 +243,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -259,6 +270,22 @@ page_rec_get_n_recs_before( /* out: number of records */ rec_t* rec); /* in: the physical record */ /***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page); /* in: index page */ +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap);/* in: number of records */ +/***************************************************************** Gets the number of dir slots in directory. */ UNIV_INLINE ulint @@ -267,6 +294,15 @@ page_dir_get_n_slots( /* out: number of slots */ page_t* page); /* in: index page */ /***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots);/* in: number of slots */ +/***************************************************************** Gets pointer to nth directory slot. */ UNIV_INLINE page_dir_slot_t* @@ -333,7 +369,16 @@ ulint page_dir_find_owner_slot( /*=====================*/ /* out: the directory slot number */ - rec_t* rec); /* in: the physical record */ + rec_t* rec); /* in: the physical record */ +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page); /* in: index page */ /**************************************************************** Gets the pointer to the next record on the page. */ UNIV_INLINE @@ -359,9 +404,10 @@ UNIV_INLINE rec_t* page_rec_get_prev( /*==============*/ - /* out: pointer to previous record */ - rec_t* rec); /* in: pointer to record, must not be page - infimum */ + /* out: pointer to previous record */ + rec_t* rec); /* in: pointer to record, + must not be page infimum */ + /**************************************************************** TRUE if the record is a user record on the page. */ UNIV_INLINE @@ -446,9 +492,11 @@ page_get_max_insert_size_after_reorganize( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void); -/*==============================*/ - /* out: free space */ +page_get_free_space_of_empty( +/*=========================*/ + /* out: free space */ + ibool comp) /* in: TRUE=compact page format */ + __attribute__((const)); /**************************************************************** Returns the sum of the sizes of the records in the record list excluding the infimum and supremum records. */ @@ -464,20 +512,23 @@ Allocates a block of memory from an index page. */ byte* page_mem_alloc( /*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - ulint* heap_no);/* out: this contains the heap number - of the allocated record if allocation succeeds */ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + dict_index_t* index, /* in: record descriptor */ + ulint* heap_no);/* out: this contains the heap number + of the allocated record + if allocation succeeds */ /**************************************************************** Puts a record to free list. */ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec); /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** The index page creation function. */ @@ -487,7 +538,8 @@ page_create( /* out: pointer to the page */ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr); /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp); /* in: TRUE=compact page format */ /***************************************************************** Differs from page_copy_rec_list_end, because this function does not touch the lock table and max trx id on page. */ @@ -495,10 +547,11 @@ touch the lock table and max trx id on page. */ void page_copy_rec_list_end_no_locks( /*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, from the given record onward, including that record. Infimum and supremum records are not copied. @@ -507,10 +560,11 @@ The records are copied to the start of the record list on new_page. */ void page_copy_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, up to the given record, NOT including that record. Infimum and supremum records are not copied. @@ -519,10 +573,11 @@ The records are copied to the end of the record list on new_page. */ void page_copy_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ @@ -530,14 +585,15 @@ The infimum and supremum records are not deleted. */ void page_delete_rec_list_end( /*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED - if not known */ - ulint size, /* in: the sum of the sizes of the records in the end - of the chain to delete, or ULINT_UNDEFINED if not - known */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from page, up to the given record, NOT including that record. Infimum and supremum records are not deleted. */ @@ -545,9 +601,10 @@ that record. Infimum and supremum records are not deleted. */ void page_delete_rec_list_start( /*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list end to another page. Moved records include split_rec. */ @@ -555,10 +612,11 @@ split_rec. */ void page_move_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list start to another page. Moved records do not include split_rec. */ @@ -566,10 +624,11 @@ split_rec. */ void page_move_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /******************************************************************** Splits a directory slot which owns too many records. */ @@ -595,13 +654,16 @@ Parses a log record of a record list end or start deletion. */ byte* page_parse_delete_rec_list( /*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE or - MLOG_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses a redo log record of creating a page. */ @@ -611,6 +673,7 @@ page_parse_create( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /**************************************************************** @@ -620,7 +683,8 @@ the index page context. */ void page_rec_print( /*===========*/ - rec_t* rec); + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: record descriptor */ /******************************************************************* This is used to print the contents of the directory for debugging purposes. */ @@ -637,8 +701,9 @@ debugging purposes. */ void page_print_list( /*============*/ - page_t* page, /* in: index page */ - ulint pr_n); /* in: print n first and n last entries */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n); /* in: print n first and n last entries */ /******************************************************************* Prints the info in a page header. */ @@ -653,9 +718,12 @@ debugging purposes. */ void page_print( /*======*/ - page_t* page, /* in: index page */ - ulint dn, /* in: print dn first and last entries in directory */ - ulint rn); /* in: print rn first and last records on page */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn); /* in: print rn first and last records + in directory */ /******************************************************************* The following is used to validate a record on a page. This function differs from rec_validate as it can also check the n_owned field and @@ -664,8 +732,9 @@ the heap_no field. */ ibool page_rec_validate( /*==============*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: record on the page */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Checks that the first directory slot points to the infimum record and the last to the supremum. This function is intended to track if the diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic index 3d2bf3b090e..a63b5ca4238 100644 --- a/innobase/include/page0page.ic +++ b/innobase/include/page0page.ic @@ -73,7 +73,8 @@ page_header_set_field( { ut_ad(page); ut_ad(field <= PAGE_N_RECS); - ut_ad(val < UNIV_PAGE_SIZE); + ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); + ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); mach_write_to_2(page + PAGE_HEADER + field, val); } @@ -152,6 +153,19 @@ page_header_reset_last_insert( } /**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page) /* in: index page */ +{ + return(!!(page_header_get_field(page, PAGE_N_HEAP) & 0x8000)); +} + +/**************************************************************** Gets the first record on the page. */ UNIV_INLINE rec_t* @@ -162,7 +176,11 @@ page_get_infimum_rec( { ut_ad(page); - return(page + PAGE_INFIMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_INFIMUM); + } else { + return(page + PAGE_OLD_INFIMUM); + } } /**************************************************************** @@ -176,7 +194,11 @@ page_get_supremum_rec( { ut_ad(page); - return(page + PAGE_SUPREMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_SUPREMUM); + } else { + return(page + PAGE_OLD_SUPREMUM); + } } /**************************************************************** @@ -309,6 +331,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -320,6 +343,7 @@ page_cmp_dtuple_rec_with_match( page_t* page; ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); page = buf_frame_align(rec); @@ -328,7 +352,7 @@ page_cmp_dtuple_rec_with_match( } else if (rec == page_get_supremum_rec(page)) { return(-1); } else { - return(cmp_dtuple_rec_with_match(dtuple, rec, + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, matched_fields, matched_bytes)); } @@ -358,6 +382,45 @@ page_dir_get_n_slots( { return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); } +/***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots)/* in: number of slots */ +{ + page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots); +} + +/***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); +} + +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap) /* in: number of records */ +{ + ut_ad(n_heap < 0x8000); + + page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 & + page_header_get_field(page, PAGE_N_HEAP))); +} /***************************************************************** Gets pointer to nth directory slot. */ @@ -369,7 +432,7 @@ page_dir_get_nth_slot( page_t* page, /* in: index page */ ulint n) /* in: position */ { - ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n); + ut_ad(page_dir_get_n_slots(page) > n); return(page + UNIV_PAGE_SIZE - PAGE_DIR - (n + 1) * PAGE_DIR_SLOT_SIZE); @@ -431,7 +494,8 @@ page_dir_slot_get_n_owned( /* out: number of records */ page_dir_slot_t* slot) /* in: page directory slot */ { - return(rec_get_n_owned(page_dir_slot_get_rec(slot))); + return(rec_get_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)))); } /******************************************************************* @@ -444,7 +508,8 @@ page_dir_slot_set_n_owned( ulint n) /* in: number of records owned by the slot */ { - rec_set_n_owned(page_dir_slot_get_rec(slot), n); + rec_set_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)), n); } /**************************************************************** @@ -477,7 +542,7 @@ page_rec_get_next( page = buf_frame_align(rec); - offs = rec_get_next_offs(rec); + offs = rec_get_next_offs(rec, page_is_comp(page)); if (offs >= UNIV_PAGE_SIZE) { fprintf(stderr, @@ -513,6 +578,7 @@ page_rec_set_next( infimum */ { page_t* page; + ulint offs; ut_ad(page_rec_check(rec)); ut_a((next == NULL) @@ -523,11 +589,13 @@ page_rec_set_next( ut_ad(rec != page_get_supremum_rec(page)); ut_ad(next != page_get_infimum_rec(page)); - if (next == NULL) { - rec_set_next_offs(rec, 0); + if (next) { + offs = (ulint) (next - page); } else { - rec_set_next_offs(rec, (ulint)(next - page)); + offs = 0; } + + rec_set_next_offs(rec, page_is_comp(page), offs); } /**************************************************************** @@ -545,6 +613,7 @@ page_rec_get_prev( rec_t* rec2; rec_t* prev_rec = NULL; page_t* page; + ibool comp; ut_ad(page_rec_check(rec)); @@ -559,6 +628,7 @@ page_rec_get_prev( slot = page_dir_get_nth_slot(page, slot_no - 1); rec2 = page_dir_slot_get_rec(slot); + comp = page_is_comp(page); while (rec != rec2) { prev_rec = rec2; @@ -579,9 +649,12 @@ page_rec_find_owner_rec( /* out: the owner record */ rec_t* rec) /* in: the physical record */ { + ibool comp; + ut_ad(page_rec_check(rec)); + comp = page_is_comp(buf_frame_align(rec)); - while (rec_get_n_owned(rec) == 0) { + while (rec_get_n_owned(rec, comp) == 0) { rec = page_rec_get_next(rec); } @@ -601,7 +674,9 @@ page_get_data_size( ulint ret; ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END + : PAGE_OLD_SUPREMUM_END) - page_header_get_field(page, PAGE_GARBAGE)); ut_ad(ret < UNIV_PAGE_SIZE); @@ -613,12 +688,13 @@ page_get_data_size( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void) -/*==============================*/ +page_get_free_space_of_empty( +/*=========================*/ /* out: free space */ + ibool comp) /* in: TRUE=compact page layout */ { return((ulint)(UNIV_PAGE_SIZE - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) - PAGE_DIR - 2 * PAGE_DIR_SLOT_SIZE)); } @@ -640,13 +716,16 @@ page_get_max_insert_size( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) + page_dir_calc_reserved_space( - n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2)); + n_recs + page_dir_get_n_heap(page) - 2); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); /* Above the 'n_recs +' part reserves directory space for the new inserted records; the '- 2' excludes page infimum and supremum @@ -673,11 +752,14 @@ page_get_max_insert_size_after_reorganize( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_get_data_size(page) + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); if (occupied > free_space) { @@ -693,11 +775,12 @@ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec) /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index) /* in: record descriptor */ { - rec_t* free; - ulint garbage; + rec_t* free; + ulint garbage; free = page_header_get_ptr(page, PAGE_FREE); @@ -707,7 +790,7 @@ page_mem_free( garbage = page_header_get_field(page, PAGE_GARBAGE); page_header_set_field(page, PAGE_GARBAGE, - garbage + rec_get_size(rec)); + garbage + rec_get_size(rec, index)); } #ifdef UNIV_MATERIALIZE diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h index e1874edcaf2..298ec494750 100644 --- a/innobase/include/que0que.h +++ b/innobase/include/que0que.h @@ -359,6 +359,7 @@ struct que_thr_struct{ the control came */ ulint resource; /* resource usage of the query thread thus far */ + ulint lock_state; /* lock state of thread (table or row) */ }; #define QUE_THR_MAGIC_N 8476583 @@ -482,6 +483,11 @@ struct que_fork_struct{ #define QUE_THR_SUSPENDED 7 #define QUE_THR_ERROR 8 +/* Query thread lock states */ +#define QUE_THR_LOCK_NOLOCK 0 +#define QUE_THR_LOCK_ROW 1 +#define QUE_THR_LOCK_TABLE 2 + /* From where the cursor position is counted */ #define QUE_CUR_NOT_DEFINED 1 #define QUE_CUR_START 2 diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 712e263350e..1b1ee26b809 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -90,6 +90,7 @@ cmp_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns, contains the value for current comparison */ @@ -107,7 +108,8 @@ cmp_dtuple_rec( less than rec, respectively; see the comments for cmp_dtuple_rec_with_match */ dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Checks if a dtuple is a prefix of a record. The last field in dtuple is allowed to be a prefix of the corresponding field in the record. */ @@ -116,23 +118,9 @@ ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ -/****************************************************************** -Compares a prefix of a data tuple to a prefix of a physical record for -equality. If there are less fields in rec than parameter n_fields, FALSE -is returned. NOTE that n_fields_cmp of dtuple does not affect this -comparison. */ - -ibool -cmp_dtuple_rec_prefix_equal( -/*========================*/ - /* out: TRUE if equal */ dtuple_t* dtuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ - ulint n_fields); /* in: number of fields which should be - compared; must not exceed the number of - fields in dtuple */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /***************************************************************** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is @@ -146,6 +134,8 @@ cmp_rec_rec_with_match( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, @@ -167,6 +157,8 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index); /* in: data dictionary index */ diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic index 75cb3ef04e8..b86534e0a6a 100644 --- a/innobase/include/rem0cmp.ic +++ b/innobase/include/rem0cmp.ic @@ -57,10 +57,13 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index) /* in: data dictionary index */ { ulint match_f = 0; ulint match_b = 0; - return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b)); + return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, + &match_f, &match_b)); } diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h index 86bf263170f..ab89b912523 100644 --- a/innobase/include/rem0rec.h +++ b/innobase/include/rem0rec.h @@ -23,9 +23,18 @@ Created 5/30/1994 Heikki Tuuri info bits of a record */ #define REC_INFO_MIN_REC_FLAG 0x10UL -/* Number of extra bytes in a record, in addition to the data and the -offsets */ -#define REC_N_EXTRA_BYTES 6 +/* Number of extra bytes in an old-style record, +in addition to the data and the offsets */ +#define REC_N_OLD_EXTRA_BYTES 6 +/* Number of extra bytes in a new-style record, +in addition to the data and the offsets */ +#define REC_N_NEW_EXTRA_BYTES 5 + +/* Record status values */ +#define REC_STATUS_ORDINARY 0 +#define REC_STATUS_NODE_PTR 1 +#define REC_STATUS_INFIMUM 2 +#define REC_STATUS_SUPREMUM 3 /********************************************************** The following function is used to get the offset of the @@ -36,7 +45,8 @@ rec_get_next_offs( /*==============*/ /* out: the page offset of the next chained record */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the next record offset field of the record. */ @@ -45,17 +55,28 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint next); /* in: offset of the next record */ /********************************************************** The following function is used to get the number of fields -in the record. */ +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec); /* in: physical record */ /********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ +/********************************************************** The following function is used to get the number of records owned by the previous directory record. */ UNIV_INLINE @@ -63,7 +84,8 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the number of owned records. */ @@ -72,6 +94,7 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned); /* in: the number of owned */ /********************************************************** The following function is used to retrieve the info bits of @@ -81,7 +104,8 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the info bits of a record. */ UNIV_INLINE @@ -89,15 +113,26 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits); /* in: info bits */ /********************************************************** -Gets the value of the deleted falg in info bits. */ +The following function retrieves the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits); /* in: info bits from a record */ +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits); /* in: info bits */ + /********************************************************** The following function tells if record is delete marked. */ UNIV_INLINE @@ -105,7 +140,8 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the deleted bit. */ UNIV_INLINE @@ -113,8 +149,25 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag); /* in: TRUE if delete marked */ /********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag); /* in: TRUE if the record is a node pointer */ +/********************************************************** The following function is used to get the order number of the record in the heap of the index page. */ UNIV_INLINE @@ -122,7 +175,8 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the heap number field in the record. */ @@ -131,6 +185,7 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no);/* in: the heap number */ /********************************************************** The following function is used to test whether the data offsets @@ -141,31 +196,65 @@ rec_get_1byte_offs_flag( /*====================*/ /* out: TRUE if 1-byte form */ rec_t* rec); /* in: physical record */ +/********************************************************** +The following function determines the offsets to each field +in the record. It can reuse a previously allocated array. */ + +ulint* +rec_get_offsets_func( +/*=================*/ + /* out: the new offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in: array consisting of offsets[0] + allocated elements, or an array from + rec_get_offsets(), or NULL */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t** heap, /* in/out: memory heap */ + const char* file, /* in: file name where called */ + ulint line); /* in: line number where called */ + +#define rec_get_offsets(rec,index,offsets,n,heap) \ + rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__) + +/**************************************************************** +Validates offsets returned by rec_get_offsets(). */ +UNIV_INLINE +ibool +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + rec_t* rec, /* in: record */ + dict_index_t* index,/* in: record descriptor */ + ulint* offsets);/* in: array returned by rec_get_offsets() */ + /**************************************************************** The following function is used to get a pointer to the nth -data field in the record. */ +data field in an old-style record. */ byte* -rec_get_nth_field( -/*==============*/ +rec_get_nth_field_old( +/*==================*/ /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL null */ /**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n); /* in: index of the field */ -/**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -173,131 +262,194 @@ rec_get_nth_field_size( /* out: field size in bytes */ rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len); /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i); /* in: ith field */ +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ + /********************************************************** Returns TRUE if the extern bit is set in any of the fields of rec. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec); /* in: record */ +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*************************************************************** Sets the value of the ith field extern storage bit. */ - +UNIV_INLINE void rec_set_nth_field_extern_bit( /*=========================*/ - rec_t* rec, /* in: record */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page where - rec is, or NULL; in the NULL case we do not - write to log about the change */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** Sets TRUE the extern storage bits of fields mentioned in an array. */ void rec_set_field_extern_bits( /*======================*/ - rec_t* rec, /* in: record */ - ulint* vec, /* in: array of field numbers */ - ulint n_fields, /* in: number of fields numbers */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case we - do not write to log about the change */ -/**************************************************************** -The following function is used to get a copy of the nth -data field in the record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL - null */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + const ulint* vec, /* in: array of field numbers */ + ulint n_fields,/* in: number of fields numbers */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** -This is used to modify the value of an already existing field in -a physical record. The previous value must have exactly the same -size as the new value. If len is UNIV_SQL_NULL then the field is -treated as SQL null. */ +This is used to modify the value of an already existing field in a record. +The previous value must have exactly the same size as the new value. If len +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len); /* in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same length as the - previous value. If SQL null, previous value must be - SQL null. */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data if not SQL null */ + ulint len); /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ /************************************************************** -The following function returns the data size of a physical +The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ +rec_get_data_size_old( +/*==================*/ + /* out: size */ rec_t* rec); /* in: physical record */ /************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*===============*/ + /* out: number of fields */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of record minus data size of record. The value returned by the function is the distance from record start to record origin in bytes. */ UNIV_INLINE ulint -rec_get_extra_size( -/*===============*/ - /* out: size */ - rec_t* rec); /* in: physical record */ -/************************************************************** +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE ulint +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns the total size of a physical record. */ + +ulint rec_get_size( /*=========*/ - /* out: size */ - rec_t* rec); /* in: physical record */ + /* out: size */ + rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** Returns a pointer to the start of the record. */ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /************************************************************** Returns a pointer to the end of the record. */ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Copies a physical record to a buffer. */ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec); /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Copies the first n fields of a physical record to a new physical record in a buffer. */ @@ -305,49 +457,43 @@ a buffer. */ rec_t* rec_copy_prefix_to_buf( /*===================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - ulint n_fields, /* in: number of fields to copy */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size); /* in/out: buffer size */ + /* out, own: copied record */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, or NULL */ + ulint* buf_size); /* in/out: buffer size */ /**************************************************************** Folds a prefix of a physical record to a ulint. */ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id); /* in: index tree id */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id); /* in: index tree id */ /************************************************************* Builds a physical record out of a data tuple and stores it beginning from address destination. */ -UNIV_INLINE + rec_t* rec_convert_dtuple_to_rec( /*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple); /* in: data tuple */ -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ - -rec_t* -rec_convert_dtuple_to_rec_low( -/*==========================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple, /* in: data tuple */ - ulint data_size); /* in: data size of dtuple */ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ /************************************************************** -Returns the extra size of a physical record if we know its +Returns the extra size of an old-style physical record if we know its data size and number of fields. */ UNIV_INLINE ulint @@ -355,7 +501,8 @@ rec_get_converted_extra_size( /*=========================*/ /* out: extra size */ ulint data_size, /* in: data size */ - ulint n_fields); /* in: number of fields */ + ulint n_fields) /* in: number of fields */ + __attribute__((const)); /************************************************************** The following function returns the size of a data tuple when converted to a physical record. */ @@ -364,6 +511,7 @@ ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple);/* in: data tuple */ /****************************************************************** Copies the first n fields of a physical record to a data tuple. @@ -374,6 +522,7 @@ rec_copy_prefix_to_dtuple( /*======================*/ dtuple_t* tuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ ulint n_fields, /* in: number of fields to copy */ mem_heap_t* heap); /* in: memory heap */ /******************************************************************* @@ -382,16 +531,35 @@ Validates the consistency of a physical record. */ ibool rec_validate( /*=========*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: physical record */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints an old-style physical record. */ + +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec); /* in: physical record */ +/******************************************************************* +Prints a physical record. */ + +void +rec_print_new( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Prints a physical record. */ void rec_print( /*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec); /* in: physical record */ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ #define REC_INFO_BITS 6 /* This is single byte bit-field */ diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index c36bf8f6d6e..7d35e8e4110 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -8,9 +8,19 @@ Created 5/30/1994 Heikki Tuuri #include "mach0data.h" #include "ut0byte.h" +#include "dict0dict.h" -/* Offsets of the bit-fields in the record. NOTE! In the table the most -significant bytes and bits are written below less significant. +/* Compact flag ORed to the extra size returned by rec_get_offsets() */ +#define REC_OFFS_COMPACT ((ulint) 1 << 31) +/* SQL NULL flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) +/* External flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) +/* Mask for offsets returned by rec_get_offsets() */ +#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) + +/* Offsets of the bit-fields in an old-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. (1) byte offset (2) bit usage within byte downward from @@ -25,6 +35,35 @@ significant bytes and bits are written below less significant. 4 bits info bits */ +/* Offsets of the bit-fields in a new-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. + + (1) byte offset (2) bit usage within byte + downward from + origin -> 1 8 bits relative offset of next record + 2 8 bits relative offset of next record + the relative offset is an unsigned 16-bit + integer: + (offset_of_next_record + - offset_of_this_record) mod 64Ki, + where mod is the modulo as a non-negative + number; + we can calculate the the offset of the next + record with the formula: + relative_offset + offset_of_this_record + mod UNIV_PAGE_SIZE + 3 3 bits status: + 000=conventional record + 001=node pointer record (inside B-tree) + 010=infimum record + 011=supremum record + 1xx=reserved + 5 bits heap number + 4 8 bits heap number + 5 4 bits n_owned + 4 bits info bits +*/ + /* We list the byte offsets from the origin of the record, the mask, and the shift needed to obtain each bit-field of the record. */ @@ -32,22 +71,30 @@ and the shift needed to obtain each bit-field of the record. */ #define REC_NEXT_MASK 0xFFFFUL #define REC_NEXT_SHIFT 0 -#define REC_SHORT 3 /* This is single byte bit-field */ -#define REC_SHORT_MASK 0x1UL -#define REC_SHORT_SHIFT 0 +#define REC_OLD_SHORT 3 /* This is single byte bit-field */ +#define REC_OLD_SHORT_MASK 0x1UL +#define REC_OLD_SHORT_SHIFT 0 -#define REC_N_FIELDS 4 -#define REC_N_FIELDS_MASK 0x7FEUL -#define REC_N_FIELDS_SHIFT 1 +#define REC_OLD_N_FIELDS 4 +#define REC_OLD_N_FIELDS_MASK 0x7FEUL +#define REC_OLD_N_FIELDS_SHIFT 1 -#define REC_HEAP_NO 5 +#define REC_NEW_STATUS 3 /* This is single byte bit-field */ +#define REC_NEW_STATUS_MASK 0x7UL +#define REC_NEW_STATUS_SHIFT 0 + +#define REC_OLD_HEAP_NO 5 +#define REC_NEW_HEAP_NO 4 #define REC_HEAP_NO_MASK 0xFFF8UL #define REC_HEAP_NO_SHIFT 3 -#define REC_N_OWNED 6 /* This is single byte bit-field */ +#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ +#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ #define REC_N_OWNED_MASK 0xFUL #define REC_N_OWNED_SHIFT 0 +#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ +#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ #define REC_INFO_BITS_MASK 0xF0UL #define REC_INFO_BITS_SHIFT 0 @@ -65,26 +112,24 @@ a field stored to another page: */ #define REC_2BYTE_EXTERN_MASK 0x4000UL -/**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n) /* in: index of the field */ -{ - ulint len; - - rec_get_nth_field(rec, n, &len); - - return(len); -} +#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ + ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ + ^ 0xFFFFFFFFUL +# error "sum of old-style masks != 0xFFFFFFFFUL" +#endif +#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ + ^ 0xFFFFFFUL +# error "sum of new-style masks != 0xFFFFFFUL" +#endif /*************************************************************** -Sets the value of the ith field SQL null bit. */ +Sets the value of the ith field SQL null bit of an old-style record. */ void rec_set_nth_field_null_bit( @@ -93,8 +138,8 @@ rec_set_nth_field_null_bit( ulint i, /* in: ith field */ ibool val); /* in: value to set */ /*************************************************************** -Sets a record field to SQL null. The physical size of the field is not -changed. */ +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ void rec_set_nth_field_sql_null( @@ -102,6 +147,32 @@ rec_set_nth_field_sql_null( rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ +/*************************************************************** +Sets the value of the ith field extern storage bit of an old-style record. */ + +void +rec_set_nth_field_extern_bit_old( +/*=============================*/ + rec_t* rec, /* in: old-style record */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page where + rec is, or NULL; in the NULL case we do not + write to log about the change */ +/*************************************************************** +Sets the value of the ith field extern storage bit of a new-style record. */ + +void +rec_set_nth_field_extern_bit_new( +/*=============================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint ith, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ + /********************************************************** Gets a bit field from within 1 byte. */ UNIV_INLINE @@ -131,7 +202,7 @@ rec_set_bit_field_1( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask); ut_ad(mask <= 0xFFUL); ut_ad(((mask >> shift) << shift) == mask); @@ -171,30 +242,14 @@ rec_set_bit_field_2( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask > 0xFFUL); ut_ad(mask <= 0xFFFFUL); ut_ad((mask >> shift) & 1); ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); ut_ad(((mask >> shift) << shift) == mask); ut_ad(((val << shift) & mask) == (val << shift)); -#ifdef UNIV_DEBUG - { - ulint m; - - /* The following assertion checks that the masks of currently - defined bit-fields in bytes 3-6 do not overlap. */ - m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3))) - + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4))) - + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4))) - + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3))) - + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3)))); - if (m != ut_dbg_zero + 0xFFFFFFFFUL) { - fprintf(stderr, "Sum of masks %lx\n", m); - ut_error; - } - } -#endif + mach_write_to_2(rec - offs, (mach_read_from_2(rec - offs) & ~mask) | (val << shift)); @@ -207,18 +262,38 @@ UNIV_INLINE ulint rec_get_next_offs( /*==============*/ - /* out: the page offset of the next chained record */ - rec_t* rec) /* in: physical record */ + /* out: the page offset of the next chained record, or + 0 if none */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - ulint ret; + ulint field_value; + + ut_ad(REC_NEXT_MASK == 0xFFFFUL); + ut_ad(REC_NEXT_SHIFT == 0); - ut_ad(rec); + field_value = mach_read_from_2(rec - REC_NEXT); - ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); - ut_ad(ret < UNIV_PAGE_SIZE); + if (comp) { +#if UNIV_PAGE_SIZE <= 32768 + /* Note that for 64 KiB pages, field_value can 'wrap around' + and the debug assertion is not valid */ - return(ret); + ut_ad((int16_t)field_value + + ut_align_offset(rec, UNIV_PAGE_SIZE) + < UNIV_PAGE_SIZE); +#endif + if (field_value == 0) { + + return(0); + } + + return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE)); + } else { + ut_ad(field_value < UNIV_PAGE_SIZE); + + return(field_value); + } } /********************************************************** @@ -229,21 +304,42 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ - ulint next) /* in: offset of the next record */ + ibool comp, /* in: TRUE=compact page format */ + ulint next) /* in: offset of the next record, or 0 if none */ { ut_ad(rec); ut_ad(UNIV_PAGE_SIZE > next); + ut_ad(REC_NEXT_MASK == 0xFFFFUL); + ut_ad(REC_NEXT_SHIFT == 0); + + if (comp) { + ulint field_value; - rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); + if (next) { + /* The following two statements calculate + next - offset_of_rec mod 64Ki, where mod is the modulo + as a non-negative number */ + + field_value = (ulint)((lint)next + - (lint)ut_align_offset(rec, UNIV_PAGE_SIZE)); + field_value &= REC_NEXT_MASK; + } else { + field_value = 0; + } + + mach_write_to_2(rec - REC_NEXT, field_value); + } else { + mach_write_to_2(rec - REC_NEXT, next); + } } /********************************************************** -The following function is used to get the number of fields in the record. */ +The following function is used to get the number of fields +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec) /* in: physical record */ { @@ -251,8 +347,8 @@ rec_get_n_fields( ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); ut_ad(ret <= REC_MAX_N_FIELDS); ut_ad(ret > 0); @@ -260,12 +356,12 @@ rec_get_n_fields( } /********************************************************** -The following function is used to set the number of fields field in the -record. */ +The following function is used to set the number of fields +in an old-style record. */ UNIV_INLINE void -rec_set_n_fields( -/*=============*/ +rec_set_n_fields_old( +/*=================*/ rec_t* rec, /* in: physical record */ ulint n_fields) /* in: the number of fields */ { @@ -273,8 +369,58 @@ rec_set_n_fields( ut_ad(n_fields <= REC_MAX_N_FIELDS); ut_ad(n_fields > 0); - rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); +} + +/********************************************************** +The following function retrieves the status bits of a new-style record. */ +UNIV_INLINE +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec) /* in: physical record */ +{ + ulint ret; + + ut_ad(rec); + + ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); + ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); + + return(ret); +} + +/********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + ut_ad(rec); + ut_ad(index); + if (!index->table->comp) { + return(rec_get_n_fields_old(rec)); + } + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + return(dict_index_get_n_fields(index)); + case REC_STATUS_NODE_PTR: + return(dict_index_get_n_unique_in_tree(index) + 1); + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + return(1); + default: + ut_error; + return(ULINT_UNDEFINED); + } } /********************************************************** @@ -285,14 +431,16 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); ut_ad(ret <= REC_MAX_N_OWNED); return(ret); @@ -305,13 +453,15 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned) /* in: the number of owned */ { ut_ad(rec); ut_ad(n_owned <= REC_MAX_N_OWNED); - rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + rec_set_bit_field_1(rec, n_owned, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); } /********************************************************** @@ -321,14 +471,16 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); ut_ad((ret & ~REC_INFO_BITS_MASK) == 0); return(ret); @@ -341,30 +493,31 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits) /* in: info bits */ { ut_ad(rec); ut_ad((bits & ~REC_INFO_BITS_MASK) == 0); - rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + rec_set_bit_field_1(rec, bits, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); } /********************************************************** -Gets the value of the deleted flag in info bits. */ +The following function is used to set the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits) /* in: info bits from a record */ +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits) /* in: info bits */ { - if (info_bits & REC_INFO_DELETED_FLAG) { - - return(TRUE); - } + ut_ad(rec); + ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0); - return(FALSE); + rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); } /********************************************************** @@ -374,9 +527,10 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) { + if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec, comp)) { return(TRUE); } @@ -391,6 +545,7 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag) /* in: TRUE if delete marked */ { ulint old_val; @@ -399,7 +554,7 @@ rec_set_deleted_flag( ut_ad(TRUE == 1); ut_ad(flag <= TRUE); - old_val = rec_get_info_bits(rec); + old_val = rec_get_info_bits(rec, comp); if (flag) { new_val = REC_INFO_DELETED_FLAG | old_val; @@ -407,7 +562,39 @@ rec_set_deleted_flag( new_val = ~REC_INFO_DELETED_FLAG & old_val; } - rec_set_info_bits(rec, new_val); + rec_set_info_bits(rec, comp, new_val); +} + +/********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec) /* in: physical record */ +{ + return(REC_STATUS_NODE_PTR == rec_get_status(rec)); +} + +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag) /* in: TRUE if the record is a node pointer */ +{ + ulint status; + ut_ad(flag <= TRUE); + ut_ad(REC_STATUS_NODE_PTR >= rec_get_status(rec)); + if (flag) { + status = REC_STATUS_NODE_PTR; + } else { + status = REC_STATUS_ORDINARY; + } + rec_set_status(rec, status); } /********************************************************** @@ -418,14 +605,16 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + ret = rec_get_bit_field_2(rec, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); ut_ad(ret <= REC_MAX_HEAP_NO); return(ret); @@ -438,12 +627,14 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no)/* in: the heap number */ { ut_ad(heap_no <= REC_MAX_HEAP_NO); - rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + rec_set_bit_field_2(rec, heap_no, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); } /********************************************************** @@ -456,10 +647,12 @@ rec_get_1byte_offs_flag( /* out: TRUE if 1-byte form */ rec_t* rec) /* in: physical record */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif - return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT)); + return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT)); } /********************************************************** @@ -471,11 +664,13 @@ rec_set_1byte_offs_flag( rec_t* rec, /* in: physical record */ ibool flag) /* in: TRUE if 1byte form */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif ut_ad(flag <= TRUE); - rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT); + rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT); } /********************************************************** @@ -492,9 +687,9 @@ rec_1_get_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); } /********************************************************** @@ -511,68 +706,289 @@ rec_2_get_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); } -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +#ifdef UNIV_DEBUG +/* Length of the rec_get_offsets() header */ +# define REC_OFFS_HEADER_SIZE 4 +#else /* UNIV_DEBUG */ +/* Length of the rec_get_offsets() header */ +# define REC_OFFS_HEADER_SIZE 2 +#endif /* UNIV_DEBUG */ + +/* Get the base address of offsets. The extra_size is stored at +this position, and following positions hold the end offsets of +the fields. */ +#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) + +/************************************************************** +The following function returns the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +ulint +rec_offs_get_n_alloc( +/*=================*/ + /* out: number of elements */ + const ulint* offsets)/* in: array for rec_get_offsets() */ +{ + ulint n_alloc; + ut_ad(offsets); + n_alloc = offsets[0]; + ut_ad(n_alloc > 0); + return(n_alloc); +} + +/************************************************************** +The following function sets the number of allocated elements +for an array of offsets. */ +UNIV_INLINE +void +rec_offs_set_n_alloc( +/*=================*/ + ulint* offsets, /* in: array for rec_get_offsets() */ + ulint n_alloc) /* in: number of elements */ +{ + ut_ad(offsets); + ut_ad(n_alloc > 0); + offsets[0] = n_alloc; +} + +/************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*===============*/ + /* out: number of fields */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint n_fields; + ut_ad(offsets); + n_fields = offsets[1]; + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + ut_ad(n_fields + REC_OFFS_HEADER_SIZE + <= rec_offs_get_n_alloc(offsets)); + return(n_fields); +} + +/**************************************************************** +Validates offsets returned by rec_get_offsets(). */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i) /* in: ith field */ +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint i = rec_offs_n_fields(offsets); + ulint last = ULINT_MAX; + ibool comp = (*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0; + + if (rec) { + ut_ad((ulint) rec == offsets[2]); + if (!comp) { + ut_a(rec_get_n_fields_old(rec) >= i); + } + } + if (index) { + ulint max_n_fields; + ut_ad((ulint) index == offsets[3]); + max_n_fields = ut_max( + dict_index_get_n_fields(index), + dict_index_get_n_unique_in_tree(index) + 1); + if (comp && rec) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + break; + case REC_STATUS_NODE_PTR: + max_n_fields = + dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + max_n_fields = 1; + break; + default: + ut_error; + } + } + /* index->n_def == 0 for dummy indexes if !comp */ + ut_a(!comp || index->n_def); + ut_a(!index->n_def || i <= max_n_fields); + } + while (i--) { + ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; + ut_a(curr <= last); + last = curr; + } + return(TRUE); +} +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + rec_t* rec __attribute__((unused)), + /* in: record */ + dict_index_t* index __attribute__((unused)), + /* in: record descriptor */ + ulint* offsets __attribute__((unused))) + /* in: array returned by rec_get_offsets() */ { - ulint info; +#ifdef UNIV_DEBUG + ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets)); + offsets[2] = (ulint) rec; + offsets[3] = (ulint) index; +#endif /* UNIV_DEBUG */ +} - if (rec_get_1byte_offs_flag(rec)) { +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len) /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +{ + byte* field; + ulint length; + ut_ad(rec); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + ut_ad(len); - return(FALSE); + if (n == 0) { + field = rec; + } else { + field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK); } - info = rec_2_get_field_end_info(rec, i); + length = rec_offs_base(offsets)[1 + n]; - if (info & REC_2BYTE_EXTERN_MASK) { - return(TRUE); + if (length & REC_OFFS_SQL_NULL) { + length = UNIV_SQL_NULL; + } else { + length &= REC_OFFS_MASK; + length -= field - rec; } - return(FALSE); + *len = length; + return(field); } /********************************************************** -Returns TRUE if the extern bit is set in any of the fields -of rec. */ +Determine if the offsets are for a record in the new +compact format. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec) /* in: record */ +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n; - ulint i; - - if (rec_get_1byte_offs_flag(rec)) { - - return(FALSE); - } + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + return((*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0); +} - n = rec_get_n_fields(rec); +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL) != 0); +} +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL) != 0); +} - for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n]) + & REC_OFFS_MASK); +} +/********************************************************** +Returns TRUE if the extern bit is set in any of the fields +of an old-style record. */ +UNIV_INLINE +ibool +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint i; + for (i = rec_offs_n_fields(offsets); i--; ) { + if (rec_offs_nth_extern(offsets, i)) { return(TRUE); } } - return(FALSE); } +/*************************************************************** +Sets the value of the ith field extern storage bit. */ +UNIV_INLINE +void +rec_set_nth_field_extern_bit( +/*=========================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ +{ + if (index->table->comp) { + rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr); + } else { + rec_set_nth_field_extern_bit_old(rec, i, val, mtr); + } +} + /********************************************************** Returns the offset of n - 1th field end if the record is stored in the 1-byte offsets form. If the field is SQL null, the flag is ORed in the returned @@ -589,9 +1005,9 @@ rec_1_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); } /********************************************************** @@ -608,9 +1024,9 @@ rec_2_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); } /********************************************************** @@ -625,9 +1041,9 @@ rec_1_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info); + mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); } /********************************************************** @@ -642,9 +1058,9 @@ rec_2_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info); + mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); } /********************************************************** @@ -659,7 +1075,7 @@ rec_1_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -682,7 +1098,7 @@ rec_2_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -707,7 +1123,7 @@ rec_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -723,8 +1139,9 @@ rec_get_field_start_offs( } /**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -744,133 +1161,134 @@ rec_get_nth_field_size( return(next_os - os); } -/**************************************************************** -The following function is used to get a copy of the nth data field in a -record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL - null */ -{ - byte* ptr; - - ut_ad(buf && rec && len); - - ptr = rec_get_nth_field(rec, n, len); - - if (*len == UNIV_SQL_NULL) { - - return; - } - - ut_memcpy(buf, ptr, *len); -} - /*************************************************************** This is used to modify the value of an already existing field in a record. The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. */ +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len) /* in: length of the data or UNIV_SQL_NULL */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data + if not SQL null */ + ulint len) /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ { byte* data2; ulint len2; - ut_ad((len == UNIV_SQL_NULL) - || (rec_get_nth_field_size(rec, n) == len)); - + ut_ad(rec); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + if (len == UNIV_SQL_NULL) { + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_sql_null(rec, n); return; } - data2 = rec_get_nth_field(rec, n, &len2); - - ut_memcpy(data2, data, len); - + data2 = rec_get_nth_field(rec, offsets, n, &len2); if (len2 == UNIV_SQL_NULL) { - + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_null_bit(rec, n, FALSE); + ut_ad(len == rec_get_nth_field_size(rec, n)); + } else { + ut_ad(len2 == len); } + + ut_memcpy(data2, data, len); } /************************************************************** -The following function returns the data size of a physical +The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ +rec_get_data_size_old( +/*==================*/ + /* out: size */ rec_t* rec) /* in: physical record */ { ut_ad(rec); - return(rec_get_field_start_offs(rec, rec_get_n_fields(rec))); + return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); } /************************************************************** -Returns the total size of record minus data size of record. The value -returned by the function is the distance from record start to record origin -in bytes. */ +The following function sets the number of fields in offsets. */ +UNIV_INLINE +void +rec_offs_set_n_fields( +/*==================*/ + ulint* offsets, /* in: array returned by rec_get_offsets() */ + ulint n_fields) /* in: number of fields */ +{ + ut_ad(offsets); + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + ut_ad(n_fields + REC_OFFS_HEADER_SIZE + <= rec_offs_get_n_alloc(offsets)); + offsets[1] = n_fields; +} + +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_extra_size( +rec_offs_data_size( /*===============*/ - /* out: size */ - rec_t* rec) /* in: physical record */ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); - - if (rec_get_1byte_offs_flag(rec)) { + ulint size; - return(REC_N_EXTRA_BYTES + n_fields); - } + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] + & REC_OFFS_MASK; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); +} - return(REC_N_EXTRA_BYTES + 2 * n_fields); +/************************************************************** +Returns the total size of record minus data size of record. The value +returned by the function is the distance from record start to record origin +in bytes. */ +UNIV_INLINE +ulint +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint size; + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); } -/************************************************************** +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE ulint -rec_get_size( -/*=========*/ - /* out: size */ - rec_t* rec) /* in: physical record */ +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); - - if (rec_get_1byte_offs_flag(rec)) { - - return(REC_N_EXTRA_BYTES + n_fields - + rec_1_get_field_start_offs(rec, n_fields)); - } - - return(REC_N_EXTRA_BYTES + 2 * n_fields - + rec_2_get_field_start_offs(rec, n_fields)); + return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); } /************************************************************** @@ -879,10 +1297,11 @@ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec + rec_get_data_size(rec)); + return(rec + rec_offs_data_size(offsets)); } /************************************************************** @@ -891,10 +1310,11 @@ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec - rec_get_extra_size(rec)); + return(rec - rec_offs_extra_size(offsets)); } /******************************************************************* @@ -903,18 +1323,20 @@ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec) /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint extra_len; ulint data_len; ut_ad(rec && buf); - ut_ad(rec_validate(rec)); + ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); - extra_len = rec_get_extra_size(rec); - data_len = rec_get_data_size(rec); + extra_len = rec_offs_extra_size(offsets); + data_len = rec_offs_data_size(offsets); ut_memcpy(buf, rec - extra_len, extra_len + data_len); @@ -922,8 +1344,8 @@ rec_copy( } /************************************************************** -Returns the extra size of a physical record if we know its data size and -the number of fields. */ +Returns the extra size of an old-style physical record if we know its +data size and number of fields. */ UNIV_INLINE ulint rec_get_converted_extra_size( @@ -934,28 +1356,51 @@ rec_get_converted_extra_size( { if (data_size <= REC_1BYTE_OFFS_LIMIT) { - return(REC_N_EXTRA_BYTES + n_fields); + return(REC_N_OLD_EXTRA_BYTES + n_fields); } - return(REC_N_EXTRA_BYTES + 2 * n_fields); + return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); } /************************************************************** The following function returns the size of a data tuple when converted to +a new-style physical record. */ + +ulint +rec_get_converted_size_new( +/*=======================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ +/************************************************************** +The following function returns the size of a data tuple when converted to a physical record. */ UNIV_INLINE ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple) /* in: data tuple */ { ulint data_size; ulint extra_size; - + + ut_ad(index); ut_ad(dtuple); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(index->type & DICT_UNIVERSAL + || dtuple_get_n_fields(dtuple) == + (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) + == REC_STATUS_NODE_PTR) + ? dict_index_get_n_unique_in_tree(index) + 1 + : dict_index_get_n_fields(index))); + + if (index->table->comp) { + return(rec_get_converted_size_new(index, dtuple)); + } + data_size = dtuple_get_data_size(dtuple); extra_size = rec_get_converted_extra_size( @@ -971,12 +1416,15 @@ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id) /* in: index tree id */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id) /* in: index tree id */ { ulint i; byte* data; @@ -984,12 +1432,13 @@ rec_fold( ulint fold; ulint n_fields_rec; - ut_ad(rec_validate(rec)); - ut_ad(n_fields <= rec_get_n_fields(rec)); - ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); ut_ad(n_fields + n_bytes > 0); - - n_fields_rec = rec_get_n_fields(rec); + + n_fields_rec = rec_offs_n_fields(offsets); + ut_ad(n_fields <= n_fields_rec); + ut_ad(n_fields < n_fields_rec || n_bytes == 0); if (n_fields > n_fields_rec) { n_fields = n_fields_rec; @@ -1002,7 +1451,7 @@ rec_fold( fold = ut_fold_dulint(tree_id); for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { fold = ut_fold_ulint_pair(fold, @@ -1011,7 +1460,7 @@ rec_fold( } if (n_bytes > 0) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { if (len > n_bytes) { @@ -1025,19 +1474,3 @@ rec_fold( return(fold); } - -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -the address destination. */ -UNIV_INLINE -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple) /* in: data tuple */ -{ - return(rec_convert_dtuple_to_rec_low(destination, dtuple, - dtuple_get_data_size(dtuple))); -} diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h index 13773ed380d..f0dc4630475 100644 --- a/innobase/include/row0mysql.h +++ b/innobase/include/row0mysql.h @@ -239,6 +239,17 @@ row_update_for_mysql( the MySQL format */ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL handle */ + +/************************************************************************* +Does an unlock of a row for MySQL. */ + +int +row_unlock_for_mysql( +/*=================*/ + /* out: error code or DB_SUCCESS */ + row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL + handle */ + /************************************************************************* Creates an query graph node of 'update' type to be used in the MySQL interface. */ @@ -569,6 +580,10 @@ struct row_prebuilt_struct { allocated mem buf start, because there is a 4 byte magic number at the start and at the end */ + ibool keep_other_fields_on_keyread; /* when using fetch + cache with HA_EXTRA_KEYREAD, don't + overwrite other fields in mysql row + row buffer.*/ ulint fetch_cache_first;/* position of the first not yet fetched row in fetch_cache */ ulint n_fetch_cached; /* number of not yet fetched rows diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 951e211fb37..782973d8f5d 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -27,7 +27,8 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Reads the roll pointer field from a clustered index record. */ UNIV_INLINE @@ -36,7 +37,8 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Writes the trx id field to a clustered index record. */ UNIV_INLINE @@ -45,7 +47,8 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ - dulint trx_id); /* in: value of the field */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + dulint trx_id);/* in: value of the field */ /************************************************************************* Sets the roll pointer field in a clustered index record. */ UNIV_INLINE @@ -54,6 +57,7 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr);/* in: value of the field */ /********************************************************************* When an insert to a table is performed, this function builds the entry which @@ -90,6 +94,9 @@ row_build( the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL, in which case this function + will invoke rec_get_offsets() */ mem_heap_t* heap); /* in: memory heap from which the memory needed is allocated */ /*********************************************************************** @@ -175,14 +182,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec); /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Searches the clustered index record for a row, if we have the row reference. */ diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic index 8e5121f5a96..85410beacf0 100644 --- a/innobase/include/row0row.ic +++ b/innobase/include/row0row.ic @@ -20,7 +20,8 @@ row_get_rec_sys_field( /* out: value of the field */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Sets the trx id or roll ptr field in a clustered index record: this function is slower than the specialized inline functions. */ @@ -32,6 +33,7 @@ row_set_rec_sys_field( ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint val); /* in: value to set */ /************************************************************************* @@ -42,18 +44,21 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_trx_id(rec + offset)); } else { - return(row_get_rec_sys_field(DATA_TRX_ID, rec, index)); + return(row_get_rec_sys_field(DATA_TRX_ID, + rec, index, offsets)); } } @@ -65,18 +70,21 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); } else { - return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index)); + return(row_get_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets)); } } @@ -88,18 +96,21 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint trx_id) /* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_trx_id(rec + offset, trx_id); } else { - row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id); + row_set_rec_sys_field(DATA_TRX_ID, + rec, index, offsets, trx_id); } } @@ -111,18 +122,21 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr)/* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); } else { - row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr); + row_set_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets, roll_ptr); } } @@ -133,14 +147,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec) /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { dfield_t* dfield; byte* field; @@ -149,6 +164,7 @@ row_build_row_ref_fast( ulint field_no; ulint i; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ref_len = dtuple_get_n_fields(ref); for (i = 0; i < ref_len; i++) { @@ -158,7 +174,8 @@ row_build_row_ref_fast( if (field_no != ULINT_UNDEFINED) { - field = rec_get_nth_field(rec, field_no, &len); + field = rec_get_nth_field(rec, offsets, + field_no, &len); dfield_set_data(dfield, field, len); } } diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 28210364833..673e0511153 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -80,6 +80,7 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* @@ -124,8 +125,8 @@ row_upd_changes_field_size_or_external( /* out: TRUE if the update changes the size of some field in index or the field is external in rec or update */ - rec_t* rec, /* in: record in index */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update);/* in: update vector */ /*************************************************************** Replaces the new column values stored in the update vector to the record @@ -135,8 +136,9 @@ a clustered index */ void row_upd_rec_in_place( /*=================*/ - rec_t* rec, /* in/out: record where replaced */ - upd_t* update);/* in: update vector */ + rec_t* rec, /* in/out: record where replaced */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector */ /******************************************************************* Builds an update vector from those fields which in a secondary index entry differ from a record that has the equal ordering fields. NOTE: we compare @@ -274,10 +276,11 @@ recovery. */ void row_upd_rec_sys_fields_in_recovery( /*===============================*/ - rec_t* rec, /* in: record */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr);/* in: roll ptr of the undo log record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* Parses the log data written by row_upd_index_write_log. */ diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic index a124228a0de..e2d81a39cfa 100644 --- a/innobase/include/row0upd.ic +++ b/innobase/include/row0upd.ic @@ -106,15 +106,17 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr)/* in: roll ptr of the undo log record */ { ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); #ifdef UNIV_SYNC_DEBUG ut_ad(!buf_block_align(rec)->is_hashed || rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - row_set_rec_trx_id(rec, index, trx->id); - row_set_rec_roll_ptr(rec, index, roll_ptr); + row_set_rec_trx_id(rec, index, offsets, trx->id); + row_set_rec_roll_ptr(rec, index, offsets, roll_ptr); } diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h index 30cf82144e9..0dd40fda65f 100644 --- a/innobase/include/row0vers.h +++ b/innobase/include/row0vers.h @@ -30,7 +30,8 @@ row_vers_impl_x_locked_off_kernel( transaction; NOTE that the kernel mutex is temporarily released! */ rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index); /* in: the secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /********************************************************************* Finds out if we must preserve a delete marked earlier version of a clustered index record, because it is >= the purge view. */ diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic index 5ece47c35d1..ab1e264635b 100644 --- a/innobase/include/row0vers.ic +++ b/innobase/include/row0vers.ic @@ -11,73 +11,3 @@ Created 2/6/1997 Heikki Tuuri #include "read0read.h" #include "page0page.h" #include "log0recv.h" - -/************************************************************************* -Fetches the trx id of a clustered index record or version. */ -UNIV_INLINE -dulint -row_vers_get_trx_id( -/*================*/ - /* out: trx id or ut_dulint_zero if the - clustered index record not found */ - rec_t* rec, /* in: clustered index record, or an old - version of it */ - dict_table_t* table) /* in: table */ -{ - return(row_get_rec_trx_id(rec, dict_table_get_first_index(table))); -} - -/************************************************************************* -Checks if a consistent read can be performed immediately on the index -record, or if an older version is needed. */ -UNIV_INLINE -ibool -row_vers_clust_rec_sees_older( -/*==========================*/ - /* out: FALSE if can read immediately */ - rec_t* rec, /* in: record which should be read or passed - over by a read cursor */ - dict_index_t* index, /* in: clustered index */ - read_view_t* view) /* in: read view */ -{ - ut_ad(index->type & DICT_CLUSTERED); - - if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Checks if a secondary index record can be read immediately by a consistent -read, or if an older version may be needed. To be sure, we will have to -look in the clustered index. */ -UNIV_INLINE -ibool -row_vers_sec_rec_may_see_older( -/*===========================*/ - /* out: FALSE if can be read immediately */ - rec_t* rec, /* in: record which should be read or passed */ - dict_index_t* index __attribute__((unused)),/* in: secondary index */ - read_view_t* view) /* in: read view */ -{ - page_t* page; - - ut_ad(!(index->type & DICT_CLUSTERED)); - - page = buf_frame_align(rec); - - if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0) - || recv_recovery_is_on()) { - - /* It may be that the record was inserted or modified by a - transaction the view should not see: we have to look in the - clustered index */ - - return(TRUE); - } - - return(FALSE); -} diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index 6cfe9cef927..c5374fd00fa 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -107,6 +107,7 @@ extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the extern ibool srv_innodb_status; extern ibool srv_use_doublewrite_buf; +extern ibool srv_use_checksums; extern ibool srv_set_thread_priorities; extern int srv_query_thread_priority; @@ -133,6 +134,8 @@ extern ibool srv_lock_timeout_and_monitor_active; extern ibool srv_error_monitor_active; extern ulint srv_n_spin_wait_rounds; +extern ulint srv_n_free_tickets_to_enter; +extern ulint srv_thread_sleep_delay; extern ulint srv_spin_wait_delay; extern ibool srv_priority_boost; @@ -184,6 +187,63 @@ i/o handler thread */ extern const char* srv_io_thread_op_info[]; extern const char* srv_io_thread_function[]; +/* the number of the log write requests done */ +extern ulint srv_log_write_requests; + +/* the number of physical writes to the log performed */ +extern ulint srv_log_writes; + +/* amount of data written to the log files in bytes */ +extern ulint srv_os_log_written; + +/* amount of writes being done to the log files */ +extern ulint srv_os_log_pending_writes; + +/* we increase this counter, when there we don't have enough space in the +log buffer and have to flush it */ +extern ulint srv_log_waits; + +/* variable that counts amount of data read in total (in bytes) */ +extern ulint srv_data_read; + +/* here we count the amount of data written in total (in bytes) */ +extern ulint srv_data_written; + +/* this variable counts the amount of times, when the doublewrite buffer +was flushed */ +extern ulint srv_dblwr_writes; + +/* here we store the number of pages that have been flushed to the +doublewrite buffer */ +extern ulint srv_dblwr_pages_written; + +/* in this variable we store the number of write requests issued */ +extern ulint srv_buf_pool_write_requests; + +/* here we store the number of times when we had to wait for a free page +in the buffer pool. It happens when the buffer pool is full and we need +to make a flush, in order to be able to read or create a page. */ +extern ulint srv_buf_pool_wait_free; + +/* variable to count the number of pages that were written from the +buffer pool to disk */ +extern ulint srv_buf_pool_flushed; + +/* variable to count the number of buffer pool reads that led to the +reading of a disk page */ +extern ulint srv_buf_pool_reads; + +/* variable to count the number of sequential read-aheads were done */ +extern ulint srv_read_ahead_seq; + +/* variable to count the number of random read-aheads were done */ +extern ulint srv_read_ahead_rnd; + +/* In this structure we store status variables to be passed to MySQL */ +typedef struct export_var_struct export_struc; + +extern export_struc export_vars; + typedef struct srv_sys_struct srv_sys_t; /* The server system */ @@ -400,7 +460,12 @@ void srv_printf_innodb_monitor( /*======================*/ FILE* file); /* in: output stream */ +/************************************************************************ +Function to pass InnoDB status variables to MySQL */ +void +srv_export_innodb_status(void); +/*=====================*/ /* Types for the threads existing in the system. Threads of types 4 - 9 are called utility threads. Note that utility threads are mainly disk @@ -426,6 +491,53 @@ typedef struct srv_slot_struct srv_slot_t; /* Thread table is an array of slots */ typedef srv_slot_t srv_table_t; +/* In this structure we store status variables to be passed to MySQL */ +struct export_var_struct{ + ulint innodb_data_pending_reads; + ulint innodb_data_pending_writes; + ulint innodb_data_pending_fsyncs; + ulint innodb_data_fsyncs; + ulint innodb_data_read; + ulint innodb_data_writes; + ulint innodb_data_written; + ulint innodb_data_reads; + ulint innodb_buffer_pool_pages_total; + ulint innodb_buffer_pool_pages_data; + ulint innodb_buffer_pool_pages_dirty; + ulint innodb_buffer_pool_pages_misc; + ulint innodb_buffer_pool_pages_free; + ulint innodb_buffer_pool_pages_latched; + ulint innodb_buffer_pool_read_requests; + ulint innodb_buffer_pool_reads; + ulint innodb_buffer_pool_wait_free; + ulint innodb_buffer_pool_pages_flushed; + ulint innodb_buffer_pool_write_requests; + ulint innodb_buffer_pool_read_ahead_seq; + ulint innodb_buffer_pool_read_ahead_rnd; + ulint innodb_dblwr_pages_written; + ulint innodb_dblwr_writes; + ulint innodb_log_waits; + ulint innodb_log_write_requests; + ulint innodb_log_writes; + ulint innodb_os_log_written; + ulint innodb_os_log_fsyncs; + ulint innodb_os_log_pending_writes; + ulint innodb_os_log_pending_fsyncs; + ulint innodb_page_size; + ulint innodb_pages_created; + ulint innodb_pages_read; + ulint innodb_pages_written; + ulint innodb_row_lock_waits; + ulint innodb_row_lock_current_waits; + ib_longlong innodb_row_lock_time; + ulint innodb_row_lock_time_avg; + ulint innodb_row_lock_time_max; + ulint innodb_rows_read; + ulint innodb_rows_inserted; + ulint innodb_rows_updated; + ulint innodb_rows_deleted; +}; + /* The server system struct */ struct srv_sys_struct{ os_event_t operational; /* created threads must wait for the @@ -434,6 +546,10 @@ struct srv_sys_struct{ srv_table_t* threads; /* server thread table */ UT_LIST_BASE_NODE_T(que_thr_t) tasks; /* task queue */ + dict_index_t* dummy_ind1; /* dummy index for old-style + supremum and infimum records */ + dict_index_t* dummy_ind2; /* dummy index for new-style + supremum and infimum records */ }; extern ulint srv_n_threads_active[]; diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h index 9a988a03e92..911c8ac3f4a 100644 --- a/innobase/include/sync0rw.h +++ b/innobase/include/sync0rw.h @@ -61,8 +61,8 @@ Creates, or rather, initializes an rw-lock object in a specified memory location (which must be appropriately aligned). The rw-lock is initialized to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free is necessary only if the memory block containing it is freed. */ - -#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__) +#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__, #L) + /*=====================*/ /********************************************************************** Creates, or rather, initializes an rw-lock object in a specified memory @@ -75,7 +75,8 @@ rw_lock_create_func( /*================*/ rw_lock_t* lock, /* in: pointer to memory */ const char* cfile_name, /* in: file name where created */ - ulint cline); /* in: file line where created */ + ulint cline, /* in: file line where created */ + const char* cmutex_name); /* in: mutex name */ /********************************************************************** Calling this function is obligatory only if the memory buffer containing the rw-lock is freed. Removes an rw-lock object from the global list. The diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h index 8e0ec715b12..5046a960bcf 100644 --- a/innobase/include/sync0sync.h +++ b/innobase/include/sync0sync.h @@ -17,6 +17,8 @@ Created 9/5/1995 Heikki Tuuri #include "os0sync.h" #include "sync0arr.h" +extern my_bool timed_mutexes; + /********************************************************************** Initializes the synchronization data structures. */ @@ -35,8 +37,7 @@ location (which must be appropriately aligned). The mutex is initialized in the reset state. Explicit freeing of the mutex with mutex_free is necessary only if the memory block containing it is freed. */ - -#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__) +#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__, #M) /*===================*/ /********************************************************************** Creates, or rather, initializes a mutex object in a specified memory @@ -49,7 +50,8 @@ mutex_create_func( /*==============*/ mutex_t* mutex, /* in: pointer to memory */ const char* cfile_name, /* in: file name where created */ - ulint cline); /* in: file line where created */ + ulint cline, /* in: file line where created */ + const char* cmutex_name); /* in: mutex name */ /********************************************************************** Calling this function is obligatory only if the memory buffer containing the mutex is freed. Removes a mutex object from the mutex list. The mutex @@ -413,6 +415,8 @@ or row lock! */ /*------------------------------------- Insert buffer tree */ #define SYNC_IBUF_BITMAP_MUTEX 351 #define SYNC_IBUF_BITMAP 350 +/*------------------------------------- MySQL query cache mutex */ +/*------------------------------------- MySQL binlog mutex */ /*-------------------------------*/ #define SYNC_KERNEL 300 #define SYNC_REC_LOCK 299 @@ -471,6 +475,15 @@ struct mutex_struct { const char* cfile_name;/* File name where mutex created */ ulint cline; /* Line where created */ ulint magic_n; + ulong count_using; /* count of times mutex used */ + ulong count_spin_loop; /* count of spin loops */ + ulong count_spin_rounds; /* count of spin rounds */ + ulong count_os_wait; /* count of os_wait */ + ulong count_os_yield; /* count of os_wait */ + ulonglong lspent_time; /* mutex os_wait timer msec */ + ulonglong lmax_spent_time; /* mutex os_wait timer msec */ + const char* cmutex_name;/* mutex name */ + ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */ }; #define MUTEX_MAGIC_N (ulint)979585 @@ -504,6 +517,13 @@ extern ibool sync_order_checks_on; /* This variable is set to TRUE when sync_init is called */ extern ibool sync_initialized; +/* Global list of database mutexes (not OS mutexes) created. */ +UT_LIST_BASE_NODE_T(mutex_t) mutex_list; + +/* Mutex protecting the mutex_list variable */ +mutex_t mutex_list_mutex; + + #ifndef UNIV_NONINL #include "sync0sync.ic" #endif diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic index aaf5e1fd9e9..f26f3788dc3 100644 --- a/innobase/include/sync0sync.ic +++ b/innobase/include/sync0sync.ic @@ -249,8 +249,11 @@ mutex_enter_func( /* Note that we do not peek at the value of lock_word before trying the atomic test_and_set; we could peek, and possibly save time. */ + + mutex->count_using++; - if (!mutex_test_and_set(mutex)) { + if (!mutex_test_and_set(mutex)) + { #ifdef UNIV_SYNC_DEBUG mutex_set_debug_info(mutex, file_name, line); #endif @@ -258,4 +261,5 @@ mutex_enter_func( } mutex_spin_wait(mutex, file_name, line); + } diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h index 9d7f41cd94e..4387ce1a61e 100644 --- a/innobase/include/trx0rec.h +++ b/innobase/include/trx0rec.h @@ -246,6 +246,7 @@ trx_undo_prev_version_build( index_rec page and purge_view */ rec_t* rec, /* in: version of a clustered index record */ dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ mem_heap_t* heap, /* in: memory heap from which the memory needed is allocated */ rec_t** old_vers);/* out, own: previous version, or NULL if diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h index 6004551f456..9d025da4a5f 100644 --- a/innobase/include/trx0roll.h +++ b/innobase/include/trx0roll.h @@ -104,11 +104,20 @@ trx_rollback( /*********************************************************************** Rollback or clean up transactions which have no user session. If the transaction already was committed, then we clean up a possible insert -undo log. If the transaction was not yet committed, then we roll it back. */ +undo log. If the transaction was not yet committed, then we roll it back. +Note: this is done in a background thread. */ -void -trx_rollback_or_clean_all_without_sess(void); -/*========================================*/ +#ifndef __WIN__ +void* +#else +ulint +#endif +trx_rollback_or_clean_all_without_sess( +/*===================================*/ + /* out: a dummy parameter */ + void* arg __attribute__((unused))); + /* in: a dummy parameter required by + os_thread_create */ /******************************************************************** Finishes a transaction rollback. */ diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h index 8336e05bdb0..76b051105de 100644 --- a/innobase/include/trx0trx.h +++ b/innobase/include/trx0trx.h @@ -16,6 +16,7 @@ Created 3/26/1996 Heikki Tuuri #include "que0types.h" #include "mem0mem.h" #include "read0types.h" +#include "trx0xa.h" extern ulint trx_n_mysql_transactions; @@ -156,6 +157,36 @@ trx_commit_for_mysql( /*=================*/ /* out: 0 or error number */ trx_t* trx); /* in: trx handle */ + +/************************************************************************** +Does the transaction prepare for MySQL. */ + +ulint +trx_prepare_for_mysql( +/*=================*/ + /* out: 0 or error number */ + trx_t* trx); /* in: trx handle */ + +/************************************************************************** +This function is used to find number of prepared transactions and +their transaction objects for a recovery. */ + +int +trx_recover_for_mysql( +/*=================*/ + /* out: number of prepared transactions */ + XID* xid_list, /* in/out: prepared transactions */ + uint len); /* in: number of slots in xid_list */ + +/*********************************************************************** +This function is used to commit one X/Open XA distributed transaction +which is in the prepared state */ +trx_t * +trx_get_trx_by_xid( +/*===============*/ + /* out: trx or NULL */ + XID* xid); /* in: X/Open XA Transaction Idenfication */ + /************************************************************************** If required, flushes the log to disk if we called trx_commit_for_mysql() with trx->flush_log_later == TRUE. */ @@ -339,6 +370,9 @@ struct trx_struct{ if we can use the insert buffer for them, we set this FALSE */ dulint id; /* transaction id */ + XID xid; /* X/Open XA transaction + identification to identify a + transaction branch */ dulint no; /* transaction serialization number == max trx id when the transaction is moved to COMMITTED_IN_MEMORY state */ @@ -353,8 +387,10 @@ struct trx_struct{ dulint table_id; /* table id if the preceding field is TRUE */ /*------------------------------*/ - void* mysql_thd; /* MySQL thread handle corresponding - to this trx, or NULL */ + int active_trans; /* whether a transaction in MySQL + is active */ + void* mysql_thd; /* MySQL thread handle corresponding + to this trx, or NULL */ char** mysql_query_str;/* pointer to the field in mysqld_thd which contains the pointer to the current SQL query string */ @@ -436,9 +472,15 @@ struct trx_struct{ lock_t* auto_inc_lock; /* possible auto-inc lock reserved by the transaction; note that it is also in the lock list trx_locks */ + ibool trx_create_lock;/* this is TRUE if we have created a + new lock for a record accessed */ ulint n_lock_table_exp;/* number of explicit table locks (LOCK TABLES) reserved by the transaction, stored in trx_locks */ + ulint n_lock_table_transactional; + /* number of transactional table locks + (LOCK TABLES..WHERE ENGINE) reserved by + the transaction, stored in trx_locks */ UT_LIST_NODE_T(trx_t) trx_list; /* list of transactions */ UT_LIST_NODE_T(trx_t) @@ -554,6 +596,7 @@ struct trx_struct{ #define TRX_NOT_STARTED 1 #define TRX_ACTIVE 2 #define TRX_COMMITTED_IN_MEMORY 3 +#define TRX_PREPARED 4 /* Support for 2PC/XA */ /* Transaction execution states when trx state is TRX_ACTIVE */ #define TRX_QUE_RUNNING 1 /* transaction is running */ diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h index 20002076cc3..fce62e46046 100644 --- a/innobase/include/trx0undo.h +++ b/innobase/include/trx0undo.h @@ -14,6 +14,7 @@ Created 3/26/1996 Heikki Tuuri #include "mtr0mtr.h" #include "trx0sys.h" #include "page0types.h" +#include "trx0xa.h" /*************************************************************************** Builds a roll pointer dulint. */ @@ -36,7 +37,7 @@ trx_undo_decode_roll_ptr( ibool* is_insert, /* out: TRUE if insert undo log */ ulint* rseg_id, /* out: rollback segment id */ ulint* page_no, /* out: page number */ - ulint* offset); /* out: offset of the undo entry within page */ + ulint* offset); /* out: offset of the undo entry within page */ /*************************************************************************** Returns TRUE if the roll pointer is of the insert type. */ UNIV_INLINE @@ -239,6 +240,18 @@ trx_undo_set_state_at_finish( trx_t* trx, /* in: transaction */ trx_undo_t* undo, /* in: undo log memory copy */ mtr_t* mtr); /* in: mtr */ +/********************************************************************** +Sets the state of the undo log segment at a transaction prepare. */ + +page_t* +trx_undo_set_state_at_prepare( +/*==========================*/ + /* out: undo log segment header page, + x-latched */ + trx_t* trx, /* in: transaction */ + trx_undo_t* undo, /* in: undo log memory copy */ + mtr_t* mtr); /* in: mtr */ + /************************************************************************** Adds the update undo log header as the first in the history list, and frees the memory object, or puts it to the list of cached update undo log @@ -294,7 +307,23 @@ trx_undo_parse_discard_latest( byte* end_ptr,/* in: buffer end */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ +/************************************************************************ +Write X/Open XA Transaction Identification (XID) to undo log header */ +void +trx_undo_write_xid( +/*===============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid); /* in: X/Open XA Transaction Identification */ + +/************************************************************************ +Read X/Open XA Transaction Identification (XID) from undo log header */ + +void +trx_undo_read_xid( +/*==============*/ + trx_ulogf_t* log_hdr,/* in: undo log header */ + XID* xid); /* out: X/Open XA Transaction Identification */ /* Types of an undo log segment */ #define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */ @@ -310,6 +339,8 @@ trx_undo_parse_discard_latest( #define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be reused: it can be freed in purge when all undo data in it is removed */ +#define TRX_UNDO_PREPARED 5 /* contains an undo log of an + prepared transaction */ /* Transaction undo log memory object; this is protected by the undo_mutex in the corresponding transaction object */ @@ -332,6 +363,8 @@ struct trx_undo_struct{ field */ dulint trx_id; /* id of the trx assigned to the undo log */ + XID xid; /* X/Open XA transaction + identification */ ibool dict_operation; /* TRUE if a dict operation trx */ dulint table_id; /* if a dict operation, then the table id */ @@ -436,7 +469,10 @@ page of an update undo log segment. */ log start, and therefore this is not necessarily the same as this log header end offset */ -#define TRX_UNDO_DICT_OPERATION 20 /* TRUE if the transaction is a table +#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes + X/Open XA transaction identification + XID */ +#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table create, index create, or drop transaction: in recovery the transaction cannot be rolled back @@ -452,7 +488,17 @@ page of an update undo log segment. */ #define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history list, the file list node is here */ /*-------------------------------------------------------------*/ -#define TRX_UNDO_LOG_HDR_SIZE (34 + FLST_NODE_SIZE) +/* X/Open XA Transaction Identification (XID) */ + +#define TRX_UNDO_XA_FORMAT (34 + FLST_NODE_SIZE) +#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4) +#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4) +#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4) +#define TRX_UNDO_XA_LEN (TRX_UNDO_XA_XID + XIDDATASIZE) + +/*-------------------------------------------------------------*/ +#define TRX_UNDO_LOG_HDR_SIZE (TRX_UNDO_XA_LEN) +/*-------------------------------------------------------------*/ #ifndef UNIV_NONINL #include "trx0undo.ic" diff --git a/innobase/include/trx0xa.h b/innobase/include/trx0xa.h new file mode 100644 index 00000000000..34b7a2f95a8 --- /dev/null +++ b/innobase/include/trx0xa.h @@ -0,0 +1,182 @@ +/* + * Start of xa.h header + * + * Define a symbol to prevent multiple inclusions of this header file + */ +#ifndef XA_H +#define XA_H + +/* + * Transaction branch identification: XID and NULLXID: + */ +#ifndef XIDDATASIZE + +#define XIDDATASIZE 128 /* size in bytes */ +#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */ +#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */ + +struct xid_t { + long formatID; /* format identifier */ + long gtrid_length; /* value from 1 through 64 */ + long bqual_length; /* value from 1 through 64 */ + char data[XIDDATASIZE]; +}; +typedef struct xid_t XID; +#endif +/* + * A value of -1 in formatID means that the XID is null. + */ + + +#ifdef NOTDEFINED +/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */ + +/* + * Declarations of routines by which RMs call TMs: + */ +extern int ax_reg __P((int, XID *, long)); +extern int ax_unreg __P((int, long)); + +/* + * XA Switch Data Structure + */ +#define RMNAMESZ 32 /* length of resource manager name, */ + /* including the null terminator */ +#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */ + /* strings, including the null + terminator */ + + +struct xa_switch_t { + char name[RMNAMESZ]; /* name of resource manager */ + long flags; /* resource manager specific options */ + long version; /* must be 0 */ + int (*xa_open_entry) /* xa_open function pointer */ + __P((char *, int, long)); + int (*xa_close_entry) /* xa_close function pointer */ + __P((char *, int, long)); + int (*xa_start_entry) /* xa_start function pointer */ + __P((XID *, int, long)); + int (*xa_end_entry) /* xa_end function pointer */ + __P((XID *, int, long)); + int (*xa_rollback_entry) /* xa_rollback function pointer */ + __P((XID *, int, long)); + int (*xa_prepare_entry) /* xa_prepare function pointer */ + __P((XID *, int, long)); + int (*xa_commit_entry) /* xa_commit function pointer */ + __P((XID *, int, long)); + int (*xa_recover_entry) /* xa_recover function pointer */ + __P((XID *, long, int, long)); + int (*xa_forget_entry) /* xa_forget function pointer */ + __P((XID *, int, long)); + int (*xa_complete_entry) /* xa_complete function pointer */ + __P((int *, int *, int, long)); +}; +#endif /* NOTDEFINED */ + + +/* + * Flag definitions for the RM switch + */ +#define TMNOFLAGS 0x00000000L /* no resource manager features + selected */ +#define TMREGISTER 0x00000001L /* resource manager dynamically + registers */ +#define TMNOMIGRATE 0x00000002L /* resource manager does not support + association migration */ +#define TMUSEASYNC 0x00000004L /* resource manager supports + asynchronous operations */ +/* + * Flag definitions for xa_ and ax_ routines + */ +/* use TMNOFLAGGS, defined above, when not specifying other flags */ +#define TMASYNC 0x80000000L /* perform routine asynchronously */ +#define TMONEPHASE 0x40000000L /* caller is using one-phase commit + optimisation */ +#define TMFAIL 0x20000000L /* dissociates caller and marks + transaction branch rollback-only */ +#define TMNOWAIT 0x10000000L /* return if blocking condition + exists */ +#define TMRESUME 0x08000000L /* caller is resuming association with + suspended transaction branch */ +#define TMSUCCESS 0x04000000L /* dissociate caller from transaction + branch */ +#define TMSUSPEND 0x02000000L /* caller is suspending, not ending, + association */ +#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */ +#define TMENDRSCAN 0x00800000L /* end a recovery scan */ +#define TMMULTIPLE 0x00400000L /* wait for any asynchronous + operation */ +#define TMJOIN 0x00200000L /* caller is joining existing + transaction branch */ +#define TMMIGRATE 0x00100000L /* caller intends to perform + migration */ + +/* + * ax_() return codes (transaction manager reports to resource manager) + */ +#define TM_JOIN 2 /* caller is joining existing + transaction branch */ +#define TM_RESUME 1 /* caller is resuming association with + suspended transaction branch */ +#define TM_OK 0 /* normal execution */ +#define TMER_TMERR -1 /* an error occurred in the transaction + manager */ +#define TMER_INVAL -2 /* invalid arguments were given */ +#define TMER_PROTO -3 /* routine invoked in an improper + context */ + +/* + * xa_() return codes (resource manager reports to transaction manager) + */ +#define XA_RBBASE 100 /* The inclusive lower bound of the + rollback codes */ +#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an + unspecified reason */ +#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a + communication failure */ +#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */ +#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the + integrity of the resources was + detected */ +#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the + transaction branch for a reason not + on this list */ +#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the + resource manager */ +#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */ +#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */ +#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the + rollback codes */ +#define XA_NOMIGRATE 9 /* resumption must occur where + suspension occurred */ +#define XA_HEURHAZ 8 /* the transaction branch may have + been heuristically completed */ +#define XA_HEURCOM 7 /* the transaction branch has been + heuristically committed */ +#define XA_HEURRB 6 /* the transaction branch has been + heuristically rolled back */ +#define XA_HEURMIX 5 /* the transaction branch has been + heuristically committed and rolled + back */ +#define XA_RETRY 4 /* routine returned with no effect and + may be re-issued */ +#define XA_RDONLY 3 /* the transaction branch was read-only + and has been committed */ +#define XA_OK 0 /* normal execution */ +#define XAER_ASYNC -2 /* asynchronous operation already + outstanding */ +#define XAER_RMERR -3 /* a resource manager error occurred in + the transaction branch */ +#define XAER_NOTA -4 /* the XID is not valid */ +#define XAER_INVAL -5 /* invalid arguments were given */ +#define XAER_PROTO -6 /* routine invoked in an improper + context */ +#define XAER_RMFAIL -7 /* resource manager unavailable */ +#define XAER_DUPID -8 /* the XID already exists */ +#define XAER_OUTSIDE -9 /* resource manager doing work outside + transaction */ +#endif /* ifndef XA_H */ +/* + * End of xa.h header + */ diff --git a/innobase/include/univ.i b/innobase/include/univ.i index be71d4211b3..80024f71992 100644 --- a/innobase/include/univ.i +++ b/innobase/include/univ.i @@ -88,6 +88,7 @@ memory is read outside the allocated blocks. */ #define UNIV_SEARCH_DEBUG #define UNIV_SYNC_PERF_STAT #define UNIV_SEARCH_PERF_STAT +#define UNIV_SRV_PRINT_LATCH_WAITS; */ #define UNIV_LIGHT_MEM_DEBUG diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h index a62c2e2e318..22d488abeaf 100644 --- a/innobase/include/ut0byte.h +++ b/innobase/include/ut0byte.h @@ -208,7 +208,20 @@ ut_align_down( /*==========*/ /* out: aligned pointer */ void* ptr, /* in: pointer */ - ulint align_no); /* in: align by this number */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*==========*/ + /* out: distance from aligned + pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic index 5a70dcf12a8..e141de3aa3f 100644 --- a/innobase/include/ut0byte.ic +++ b/innobase/include/ut0byte.ic @@ -335,6 +335,27 @@ ut_align_down( return((void*)((((ulint)ptr)) & ~(align_no - 1))); } +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*============*/ + /* out: distance from + aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return(((ulint)ptr) & (align_no - 1)); +} + /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h index dee8785c9e7..8938957cd12 100644 --- a/innobase/include/ut0ut.h +++ b/innobase/include/ut0ut.h @@ -139,6 +139,14 @@ ib_time_t ut_time(void); /*=========*/ /************************************************************** +Returns system time. */ + +void +ut_usectime( +/*========*/ + ulint* sec, /* out: seconds since the Epoch */ + ulint* ms); /* out: microseconds since the Epoch+*sec */ +/************************************************************** Returns the difference of two times in seconds. */ double |