summaryrefslogtreecommitdiff
path: root/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/include')
-rw-r--r--innobase/include/Makefile.am2
-rw-r--r--innobase/include/btr0btr.h22
-rw-r--r--innobase/include/btr0btr.ic9
-rw-r--r--innobase/include/btr0cur.h60
-rw-r--r--innobase/include/btr0cur.ic4
-rw-r--r--innobase/include/btr0pcur.h1
-rw-r--r--innobase/include/btr0sea.h10
-rw-r--r--innobase/include/buf0buf.h12
-rw-r--r--innobase/include/buf0flu.ic2
-rw-r--r--innobase/include/data0type.ic71
-rw-r--r--innobase/include/dict0dict.h26
-rw-r--r--innobase/include/dict0dict.ic71
-rw-r--r--innobase/include/dict0mem.h12
-rw-r--r--innobase/include/fil0fil.h2
-rw-r--r--innobase/include/lock0lock.h43
-rw-r--r--innobase/include/lock0lock.ic5
-rw-r--r--innobase/include/mtr0log.h33
-rw-r--r--innobase/include/mtr0mtr.h26
-rw-r--r--innobase/include/os0file.h3
-rw-r--r--innobase/include/os0proc.h28
-rw-r--r--innobase/include/page0cur.h57
-rw-r--r--innobase/include/page0cur.ic19
-rw-r--r--innobase/include/page0page.h211
-rw-r--r--innobase/include/page0page.ic133
-rw-r--r--innobase/include/que0que.h6
-rw-r--r--innobase/include/rem0cmp.h24
-rw-r--r--innobase/include/rem0cmp.ic5
-rw-r--r--innobase/include/rem0rec.h434
-rw-r--r--innobase/include/rem0rec.ic997
-rw-r--r--innobase/include/row0mysql.h15
-rw-r--r--innobase/include/row0row.h30
-rw-r--r--innobase/include/row0row.ic49
-rw-r--r--innobase/include/row0upd.h17
-rw-r--r--innobase/include/row0upd.ic6
-rw-r--r--innobase/include/row0vers.h3
-rw-r--r--innobase/include/row0vers.ic70
-rw-r--r--innobase/include/srv0srv.h116
-rw-r--r--innobase/include/sync0rw.h7
-rw-r--r--innobase/include/sync0sync.h26
-rw-r--r--innobase/include/sync0sync.ic6
-rw-r--r--innobase/include/trx0rec.h1
-rw-r--r--innobase/include/trx0roll.h17
-rw-r--r--innobase/include/trx0trx.h47
-rw-r--r--innobase/include/trx0undo.h52
-rw-r--r--innobase/include/trx0xa.h182
-rw-r--r--innobase/include/univ.i1
-rw-r--r--innobase/include/ut0byte.h15
-rw-r--r--innobase/include/ut0byte.ic21
-rw-r--r--innobase/include/ut0ut.h8
49 files changed, 2177 insertions, 840 deletions
diff --git a/innobase/include/Makefile.am b/innobase/include/Makefile.am
index 102d25566da..eb1e3b72877 100644
--- a/innobase/include/Makefile.am
+++ b/innobase/include/Makefile.am
@@ -49,7 +49,7 @@ noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \
thr0loc.h thr0loc.ic trx0purge.h trx0purge.ic trx0rec.h \
trx0rec.ic trx0roll.h trx0roll.ic trx0rseg.h trx0rseg.ic \
trx0sys.h trx0sys.ic trx0trx.h trx0trx.ic trx0types.h \
- trx0undo.h trx0undo.ic univ.i \
+ trx0undo.h trx0undo.ic trx0xa.h univ.i \
usr0sess.h usr0sess.ic usr0types.h ut0byte.h ut0byte.ic \
ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \
ut0sort.h ut0ut.h ut0ut.ic
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index 8606fcd2a5c..0b19e64d4e0 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -155,7 +155,8 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
- rec_t* rec); /* in: node pointer record */
+ rec_t* rec, /* in: node pointer record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/****************************************************************
Creates the root node for a new index tree. */
@@ -167,6 +168,7 @@ btr_create(
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
+ ibool comp, /* in: TRUE=compact page format */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
@@ -210,8 +212,9 @@ Reorganizes an index page. */
void
btr_page_reorganize(
/*================*/
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: page to be reorganized */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
@@ -273,6 +276,7 @@ void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
+ ibool comp, /* in: TRUE=compact page format */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
@@ -332,6 +336,7 @@ btr_parse_set_min_rec_mark(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
+ ibool comp, /* in: TRUE=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
@@ -340,11 +345,12 @@ Parses a redo log record of reorganizing a page. */
byte*
btr_parse_page_reorganize(
/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/******************************************************************
Gets the number of pages in a B-tree. */
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
index b0aa0756307..1d1f97d3668 100644
--- a/innobase/include/btr0btr.ic
+++ b/innobase/include/btr0btr.ic
@@ -183,17 +183,18 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
- rec_t* rec) /* in: node pointer record */
+ rec_t* rec, /* in: node pointer record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
byte* field;
ulint len;
ulint page_no;
- n_fields = rec_get_n_fields(rec);
+ ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
- field = rec_get_nth_field(rec, n_fields - 1, &len);
+ field = rec_get_nth_field(rec, offsets,
+ rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index f1334656d53..0a8d8ceaeb7 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -34,7 +34,7 @@ page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor component */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
@@ -42,14 +42,14 @@ rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
@@ -57,7 +57,7 @@ page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the tree of a cursor. */
UNIV_INLINE
@@ -65,7 +65,7 @@ dict_tree_t*
btr_cur_get_tree(
/*=============*/
/* out: tree */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
@@ -283,8 +283,9 @@ only used by the insert buffer insert merge mechanism. */
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr); /* in: mtr */
+ rec_t* rec, /* in: record to delete unmark */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@@ -361,10 +362,11 @@ Parses a redo log record of updating a record in-place. */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered
index record. */
@@ -372,10 +374,11 @@ index record. */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: index corresponding to page */
+ page_t* page); /* in: page or NULL */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary
index record. */
@@ -383,10 +386,11 @@ index record. */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: index corresponding to page */
+ page_t* page); /* in: page or NULL */
/***********************************************************************
Estimates the number of rows in a given index range. */
@@ -417,9 +421,10 @@ to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- upd_t* update, /* in: update vector */
- mtr_t* mtr); /* in: mtr */
+ rec_t* rec, /* in: record in a clustered index */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update, /* in: update vector */
+ mtr_t* mtr); /* in: mtr */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
@@ -456,6 +461,7 @@ btr_store_big_rec_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
+ const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /* in: mtr containing the latch to
@@ -496,6 +502,7 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
@@ -510,6 +517,7 @@ btr_rec_copy_externally_stored_field(
/*=================================*/
/* out: the field copied to heap */
rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint no, /* in: field number */
ulint* len, /* out: length of the field */
mem_heap_t* heap); /* in: mem heap */
@@ -540,10 +548,10 @@ ulint
btr_push_update_extern_fields(
/*==========================*/
/* out: number of values stored in ext_vect */
- ulint* ext_vect, /* in: array of ulints, must be preallocated
- to have place for all fields in rec */
- rec_t* rec, /* in: record */
- upd_t* update); /* in: update vector */
+ ulint* ext_vect,/* in: array of ulints, must be preallocated
+ to have space for all fields in rec */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update);/* in: update vector or NULL */
/*######################################################################*/
diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic
index a3a04b60c45..dcad3e9e14d 100644
--- a/innobase/include/btr0cur.ic
+++ b/innobase/include/btr0cur.ic
@@ -134,17 +134,15 @@ btr_cur_can_delete_without_compress(
/* out: TRUE if can be deleted without
recommended compression */
btr_cur_t* cursor, /* in: btr cursor */
+ ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
mtr_t* mtr) /* in: mtr */
{
- ulint rec_size;
page_t* page;
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
- rec_size = rec_get_size(btr_cur_get_rec(cursor));
-
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 81f19af4d40..6384222be51 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -462,6 +462,7 @@ struct btr_pcur_struct{
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
+ ulint old_n_fields; /* number of fields in old_rec */
ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index ce4140ecf92..78e88a24083 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -77,8 +77,10 @@ parameters as page (this often happens when a page is split). */
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
- page_t* new_page, /* in: records are copied to this page */
- page_t* page); /* in: index page */
+ page_t* new_page, /* in: records are copied
+ to this page */
+ page_t* page, /* in: index page */
+ dict_index_t* index); /* in: record descriptor */
/************************************************************************
Drops a page hash index. */
@@ -129,8 +131,8 @@ Validates the search system. */
ibool
btr_search_validate(void);
-/*=====================*/
-
+/*======================*/
+ /* out: TRUE if ok */
/* Search info directions */
#define BTR_SEA_NO_DIRECTION 1
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 53599d03c73..5ee323f1b1e 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -52,11 +52,15 @@ Created 11/5/1995 Heikki Tuuri
/* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52
+/* Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
extern buf_pool_t* buf_pool; /* The buffer pool of the database */
extern ibool buf_debug_prints;/* If this is set TRUE, the program
prints info whenever read or flush
occurs */
+extern ulint srv_buf_pool_write_requests; /* variable to count write request
+ issued */
/************************************************************************
Creates the buffer pool. */
@@ -496,6 +500,12 @@ void
buf_print(void);
/*============*/
/*************************************************************************
+Returns the number of latched pages in the buffer pool. */
+
+ulint
+buf_get_latched_pages_number(void);
+/*==============================*/
+/*************************************************************************
Returns the number of pending buf pool ios. */
ulint
@@ -731,6 +741,8 @@ struct buf_block_struct{
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
+ dict_index_t* index; /* index for which the adaptive
+ hash index has been created */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
diff --git a/innobase/include/buf0flu.ic b/innobase/include/buf0flu.ic
index d6dbdcc0865..9a8a021e029 100644
--- a/innobase/include/buf0flu.ic
+++ b/innobase/include/buf0flu.ic
@@ -61,6 +61,8 @@ buf_flush_note_modification(
ut_ad(ut_dulint_cmp(block->oldest_modification,
mtr->start_lsn) <= 0);
}
+
+ ++srv_buf_pool_write_requests;
}
/************************************************************************
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
index 946b646ffbf..0b92ffbe7f1 100644
--- a/innobase/include/data0type.ic
+++ b/innobase/include/data0type.ic
@@ -8,6 +8,17 @@ Created 1/16/1996 Heikki Tuuri
#include "mach0data.h"
+/**********************************************************************
+Determines whether the given character set is of variable length.
+
+NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
+this function, you MUST change also the prototype here! */
+extern
+ibool
+innobase_is_mb_cset(
+/*================*/
+ ulint cset); /* in: MySQL charset-collation code */
+
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
@@ -149,8 +160,10 @@ dtype_new_store_for_order_and_null_size(
bytes where we store the info */
dtype_t* type) /* in: type struct */
{
- ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
@@ -166,10 +179,12 @@ dtype_new_store_for_order_and_null_size(
mach_write_to_2(buf + 2, type->len & 0xFFFFUL);
+ ut_ad(dtype_get_charset_coll(type->prtype) < 256);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
- /* Note that the second last byte is left unused, because the
- charset-collation code is always < 256 */
+ if (type->prtype & DATA_NOT_NULL) {
+ buf[4] |= 128;
+ }
}
/**************************************************************************
@@ -211,20 +226,26 @@ dtype_new_read_for_order_and_null_size(
{
ulint charset_coll;
- ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
- type->prtype = type->prtype | DATA_BINARY_TYPE;
+ type->prtype |= DATA_BINARY_TYPE;
+ }
+
+ if (buf[4] & 128) {
+ type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
mach_read_from_2(buf + 4);
- charset_coll = mach_read_from_2(buf + 4);
+ charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll < 256);
@@ -257,23 +278,39 @@ dtype_get_fixed_size(
mtype = dtype_get_mtype(type);
switch (mtype) {
+ case DATA_SYS:
+#ifdef UNIV_DEBUG
+ switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+ default:
+ ut_ad(0);
+ return(0);
+ case DATA_ROW_ID:
+ ut_ad(type->len == DATA_ROW_ID_LEN);
+ break;
+ case DATA_TRX_ID:
+ ut_ad(type->len == DATA_TRX_ID_LEN);
+ break;
+ case DATA_ROLL_PTR:
+ ut_ad(type->len == DATA_ROLL_PTR_LEN);
+ break;
+ case DATA_MIX_ID:
+ ut_ad(type->len == DATA_MIX_ID_LEN);
+ break;
+ }
+#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
case DATA_MYSQL:
- return(dtype_get_len(type));
-
- case DATA_SYS: if (type->prtype == DATA_ROW_ID) {
- return(DATA_ROW_ID_LEN);
- } else if (type->prtype == DATA_TRX_ID) {
- return(DATA_TRX_ID_LEN);
- } else if (type->prtype == DATA_ROLL_PTR) {
- return(DATA_ROLL_PTR_LEN);
- } else {
- return(0);
+ if ((type->prtype & DATA_BINARY_TYPE)
+ || !innobase_is_mb_cset(
+ dtype_get_charset_coll(
+ type->prtype))) {
+ return(dtype_get_len(type));
}
+ /* fall through for variable-length charsets */
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index ca632691450..eaf5b06b2a9 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -639,6 +639,16 @@ dict_index_get_sys_col_pos(
dict_index_t* index, /* in: index */
ulint type); /* in: DATA_ROW_ID, ... */
/***********************************************************************
+Adds a column to index. */
+
+void
+dict_index_add_col(
+/*===============*/
+ dict_index_t* index, /* in: index */
+ dict_col_t* col, /* in: column */
+ ulint order, /* in: order criterion */
+ ulint prefix_len); /* in: column prefix length */
+/***********************************************************************
Copies types of fields contained in index to tuple. */
void
@@ -647,18 +657,6 @@ dict_index_copy_types(
dtuple_t* tuple, /* in: data tuple */
dict_index_t* index, /* in: index */
ulint n_fields); /* in: number of field types to copy */
-/************************************************************************
-Gets the value of a system column in a clustered index record. The clustered
-index must contain the system column: if the index is unique, row id is
-not contained there! */
-UNIV_INLINE
-dulint
-dict_index_rec_get_sys_col(
-/*=======================*/
- /* out: system column value */
- dict_index_t* index, /* in: clustered index describing the record */
- ulint type, /* in: column type: DATA_ROLL_PTR, ... */
- rec_t* rec); /* in: record */
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
@@ -720,7 +718,7 @@ dict_tree_find_index_for_tuple(
dtuple_t* tuple); /* in: tuple for which to find index */
/***********************************************************************
Checks if a table which is a mixed cluster member owns a record. */
-UNIV_INLINE
+
ibool
dict_is_mixed_table_rec(
/*====================*/
@@ -770,6 +768,7 @@ dict_tree_copy_rec_order_prefix(
/* out: pointer to the prefix record */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to copy prefix */
+ ulint* n_fields,/* out: number of fields copied */
byte** buf, /* in/out: memory buffer for the copied prefix,
or NULL */
ulint* buf_size);/* in/out: buffer size */
@@ -782,6 +781,7 @@ dict_tree_build_data_tuple(
/* out, own: data tuple */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to build data tuple */
+ ulint n_fields,/* in: number of data fields */
mem_heap_t* heap); /* in: memory heap where tuple created */
/*************************************************************************
Gets the space id of the root of the index tree. */
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
index 85e4aaf1a05..928a693f860 100644
--- a/innobase/include/dict0dict.ic
+++ b/innobase/include/dict0dict.ic
@@ -9,7 +9,6 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0load.h"
#include "trx0undo.h"
#include "trx0sys.h"
-#include "rem0rec.h"
/*************************************************************************
Gets the column data type. */
@@ -168,7 +167,7 @@ dict_table_get_sys_col(
col = dict_table_get_nth_col(table, table->n_cols
- DATA_N_SYS_COLS + sys);
ut_ad(col->type.mtype == DATA_SYS);
- ut_ad(col->type.prtype == sys);
+ ut_ad(col->type.prtype == (sys | DATA_NOT_NULL));
return(col);
}
@@ -312,49 +311,6 @@ dict_index_get_sys_col_pos(
dict_table_get_sys_col_no(index->table, type)));
}
-/************************************************************************
-Gets the value of a system column in a clustered index record. The clustered
-index must contain the system column: if the index is unique, row id is
-not contained there! */
-UNIV_INLINE
-dulint
-dict_index_rec_get_sys_col(
-/*=======================*/
- /* out: system column value */
- dict_index_t* index, /* in: clustered index describing the record */
- ulint type, /* in: column type: DATA_ROLL_PTR, ... */
- rec_t* rec) /* in: record */
-{
- ulint pos;
- byte* field;
- ulint len;
-
- ut_ad(index);
- ut_ad(index->type & DICT_CLUSTERED);
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- ut_ad(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, pos, &len);
-
- if (type == DATA_ROLL_PTR) {
- ut_ad(len == 7);
-
- return(trx_read_roll_ptr(field));
- } else if (type == DATA_TRX_ID) {
-
- return(trx_read_trx_id(field));
- } else if (type == DATA_MIX_ID) {
-
- return(mach_dulint_read_compressed(field));
- } else {
- ut_a(type == DATA_ROW_ID);
-
- return(mach_read_from_6(field));
- }
-}
-
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
@@ -662,28 +618,3 @@ dict_table_get_index(
return(index);
}
-
-/***********************************************************************
-Checks if a table which is a mixed cluster member owns a record. */
-UNIV_INLINE
-ibool
-dict_is_mixed_table_rec(
-/*====================*/
- /* out: TRUE if the record belongs to this
- table */
- dict_table_t* table, /* in: table in a mixed cluster */
- rec_t* rec) /* in: user record in the clustered index */
-{
- byte* mix_id_field;
- ulint len;
-
- mix_id_field = rec_get_nth_field(rec, table->mix_len, &len);
-
- if ((len != table->mix_id_len)
- || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index 1e496a25477..670b3445a55 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -54,7 +54,8 @@ dict_mem_table_create(
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
- ulint n_cols); /* in: number of columns */
+ ulint n_cols, /* in: number of columns */
+ ibool comp); /* in: TRUE=compact page format */
/**************************************************************************
Creates a cluster memory object. */
@@ -171,6 +172,13 @@ struct dict_field_struct{
DICT_MAX_COL_PREFIX_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
+ ulint fixed_len; /* 0 or the fixed length of the
+ column if smaller than
+ DICT_MAX_COL_PREFIX_LEN */
+ ulint fixed_offs; /* offset to the field, or
+ ULINT_UNDEFINED if it is not fixed
+ within the record (due to preceding
+ variable-length fields) */
};
/* Data structure for an index tree */
@@ -225,6 +233,7 @@ struct dict_index_struct{
ulint n_def; /* number of fields defined so far */
ulint n_fields;/* number of fields in the index */
dict_field_t* fields; /* array of field descriptions */
+ ulint n_nullable;/* number of nullable fields */
UT_LIST_NODE_T(dict_index_t)
indexes;/* list of indexes of the table */
dict_tree_t* tree; /* index tree struct */
@@ -320,6 +329,7 @@ struct dict_table_struct{
ibool tablespace_discarded;/* this flag is set TRUE when the
user calls DISCARD TABLESPACE on this table,
and reset to FALSE in IMPORT TABLESPACE */
+ ibool comp; /* flag: TRUE=compact page format */
hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */
ulint n_def; /* number of columns defined so far */
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index c1a127aadca..aa1ec5c25a5 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -89,6 +89,8 @@ extern fil_addr_t fil_addr_null;
#define FIL_TABLESPACE 501
#define FIL_LOG 502
+extern ulint fil_n_log_flushes;
+
extern ulint fil_n_pending_log_flushes;
extern ulint fil_n_pending_tablespace_flushes;
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index 1fd7492d517..710c945375c 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -47,7 +47,8 @@ lock_sec_rec_some_has_impl_off_kernel(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index); /* in: secondary index */
+ dict_index_t* index, /* in: secondary index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Checks if some transaction has an implicit x-lock on a record in a clustered
index. */
@@ -58,7 +59,8 @@ lock_clust_rec_some_has_impl(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*****************************************************************
Resets the lock bits for a single record. Releases transactions
waiting for lock requests here. */
@@ -275,6 +277,7 @@ lock_clust_rec_modify_check_and_lock(
does nothing */
rec_t* rec, /* in: record which should be modified */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate modify
@@ -308,6 +311,7 @@ lock_sec_rec_read_check_and_lock(
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
@@ -333,6 +337,34 @@ lock_clust_rec_read_check_and_lock(
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets". */
+
+ulint
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: clustered index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
@@ -350,6 +382,7 @@ lock_clust_rec_cons_read_sees(
rec_t* rec, /* in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
read_view_t* view); /* in: consistent read view */
/*************************************************************************
Checks that a non-clustered index record is seen in a consistent read. */
@@ -499,6 +532,7 @@ lock_check_trx_id_sanity(
dulint trx_id, /* in: trx id */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets, /* in: rec_get_offsets(rec, index) */
ibool has_kernel_mutex);/* in: TRUE if the caller owns the
kernel mutex */
/*************************************************************************
@@ -509,7 +543,8 @@ lock_rec_queue_validate(
/*====================*/
/* out: TRUE if ok */
rec_t* rec, /* in: record to look at */
- dict_index_t* index); /* in: index, or NULL if not known */
+ dict_index_t* index, /* in: index, or NULL if not known */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Prints info of a table lock. */
@@ -577,6 +612,8 @@ extern lock_sys_t* lock_sys;
#define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TABLE_EXP 80 /* explicit table lock (80 = 16 + 64) */
+#define LOCK_TABLE_TRANSACTIONAL 144
+ /* transactional table lock (144 = 16 + 128)*/
#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the
type_mode field in a lock */
/* Waiting lock flag */
diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic
index fabc9256401..c7a71bb45d8 100644
--- a/innobase/include/lock0lock.ic
+++ b/innobase/include/lock0lock.ic
@@ -60,7 +60,8 @@ lock_clust_rec_some_has_impl(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
dulint trx_id;
@@ -70,7 +71,7 @@ lock_clust_rec_some_has_impl(
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(page_rec_is_user_rec(rec));
- trx_id = row_get_rec_trx_id(rec, index);
+ trx_id = row_get_rec_trx_id(rec, index, offsets);
if (trx_is_active(trx_id)) {
/* The modifying or inserting transaction is active */
diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h
index 9c9c6f696e8..c0636ea1e1e 100644
--- a/innobase/include/mtr0log.h
+++ b/innobase/include/mtr0log.h
@@ -11,6 +11,7 @@ Created 12/7/1995 Heikki Tuuri
#include "univ.i"
#include "mtr0mtr.h"
+#include "dict0types.h"
/************************************************************
Writes 1 - 4 bytes to a file page buffered in the buffer pool.
@@ -173,6 +174,38 @@ mlog_parse_string(
byte* page); /* in: page where to apply the log record, or NULL */
+/************************************************************
+Opens a buffer for mlog, writes the initial log record and,
+if needed, the field lengths of an index. Reserves space
+for further log entries. The log entry must be closed with
+mtr_close(). */
+
+byte*
+mlog_open_and_write_index(
+/*======================*/
+ /* out: buffer, NULL if log mode
+ MTR_LOG_NONE */
+ mtr_t* mtr, /* in: mtr */
+ byte* rec, /* in: index record or page */
+ dict_index_t* index, /* in: record descriptor */
+ byte type, /* in: log item type */
+ ulint size); /* in: requested buffer size in bytes
+ (if 0, calls mlog_close() and returns NULL) */
+
+/************************************************************
+Parses a log record written by mlog_open_and_write_index. */
+
+byte*
+mlog_parse_index(
+/*=============*/
+ /* out: parsed record end,
+ NULL if not a complete record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ /* out: new value of log_ptr */
+ ibool comp, /* in: TRUE=compact record format */
+ dict_index_t** index); /* out, own: dummy index */
+
/* Insert, update, and maybe other functions may use this value to define an
extra mlog buffer size for variable size data */
#define MLOG_BUF_MARGIN 256
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
index e8c68a91dad..071279d5259 100644
--- a/innobase/include/mtr0mtr.h
+++ b/innobase/include/mtr0mtr.h
@@ -102,7 +102,31 @@ flag value must give the length also! */
file rename */
#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd
file deletion */
-#define MLOG_BIGGEST_TYPE ((byte)35) /* biggest value (used in
+#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record
+ as the predefined minimum
+ record */
+#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact
+ index page */
+#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */
+#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
+ /* mark compact clustered index
+ record deleted */
+#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index
+ record deleted */
+#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record,
+ preserves record field sizes */
+#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record
+ from a page */
+#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list
+ end on index page */
+#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list
+ start on index page */
+#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
+ /* copy compact record list end
+ to a new created index page */
+#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
+
+#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in
asserts) */
/*******************************************************************
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index d1439faf29f..599e78bab48 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -24,6 +24,9 @@ extern ibool os_aio_print_debug;
extern ulint os_file_n_pending_preads;
extern ulint os_file_n_pending_pwrites;
+extern ulint os_n_pending_reads;
+extern ulint os_n_pending_writes;
+
#ifdef __WIN__
/* We define always WIN_ASYNC_IO, and check at run-time whether
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index d0d3cf82e38..b0b72e18675 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri
#include "univ.i"
+#ifdef UNIV_LINUX
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
@@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */
#define OS_AWE_X86_PAGE_SIZE 4096
+extern ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+extern ulint os_large_page_size;
+
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
@@ -103,6 +112,25 @@ os_mem_alloc_nocache(
/* out: allocated memory */
ulint n); /* in: number of bytes */
/********************************************************************
+Allocates large pages memory. */
+
+void*
+os_mem_alloc_large(
+/*=================*/
+ /* out: allocated memory */
+ ulint n, /* in: number of bytes */
+ ibool set_to_zero, /* in: TRUE if allocated memory should be set
+ to zero if UNIV_SET_MEM_TO_ZERO is defined */
+ ibool assert_on_error); /* in: if TRUE, we crash mysqld if the memory
+ cannot be allocated */
+/********************************************************************
+Frees large pages memory. */
+
+void
+os_mem_free_large(
+/*=================*/
+void *ptr); /* in: number of bytes */
+/********************************************************************
Sets the priority boost for threads released from waiting within the current
process. */
diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h
index c85669ed4df..a693931968e 100644
--- a/innobase/include/page0cur.h
+++ b/innobase/include/page0cur.h
@@ -128,7 +128,8 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
/***************************************************************
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -142,6 +143,7 @@ page_cur_rec_insert(
otherwise */
page_cur_t* cursor, /* in: a page cursor */
rec_t* rec, /* in: record to insert */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
/***************************************************************
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -155,9 +157,9 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- ulint data_size,/* in: data size of tuple */
- rec_t* rec, /* in: pointer to a physical record or NULL */
+ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
+ dict_index_t* index, /* in: record descriptor */
+ rec_t* rec, /* in: pointer to a physical record or NULL */
mtr_t* mtr); /* in: mini-transaction handle */
/*****************************************************************
Copies records from page to a newly created page, from a given record onward,
@@ -166,10 +168,11 @@ including that record. Infimum and supremum records are not copied. */
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: first record to copy */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/***************************************************************
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
@@ -178,6 +181,7 @@ void
page_cur_delete_rec(
/*================*/
page_cur_t* cursor, /* in: a page cursor */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
/********************************************************************
Searches the right position for a page cursor. */
@@ -187,6 +191,7 @@ page_cur_search(
/*============*/
/* out: number of matched fields on the left */
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -198,6 +203,7 @@ void
page_cur_search_with_match(
/*=======================*/
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -229,34 +235,37 @@ Parses a log record of a record insert on a page. */
byte*
page_cur_parse_insert_rec(
/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ ibool is_short,/* in: TRUE if short inserts */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/**************************************************************
Parses a log record of copying a record list end to a new created page. */
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses log record of a record delete on a page. */
byte*
page_cur_parse_delete_rec(
/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: pointer to record end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */
diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic
index 39f8ab11513..03010fbd766 100644
--- a/innobase/include/page0cur.ic
+++ b/innobase/include/page0cur.ic
@@ -143,7 +143,7 @@ UNIV_INLINE
void
page_cur_move_to_prev(
/*==================*/
- page_cur_t* cur) /* in: cursor; must not before first */
+ page_cur_t* cur) /* in: page cursor, not before first */
{
ut_ad(!page_cur_is_before_first(cur));
@@ -158,6 +158,7 @@ page_cur_search(
/*============*/
/* out: number of matched fields on the left */
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -170,7 +171,7 @@ page_cur_search(
ut_ad(dtuple_check_typed(tuple));
- page_cur_search_with_match(page, tuple, mode,
+ page_cur_search_with_match(page, index, tuple, mode,
&up_matched_fields,
&up_matched_bytes,
&low_matched_fields,
@@ -190,16 +191,11 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
{
- ulint data_size;
-
- ut_ad(dtuple_check_typed(tuple));
-
- data_size = dtuple_get_data_size(tuple);
-
- return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr));
+ return(page_cur_insert_rec_low(cursor, tuple, index, NULL, mtr));
}
/***************************************************************
@@ -214,8 +210,9 @@ page_cur_rec_insert(
otherwise */
page_cur_t* cursor, /* in: a page cursor */
rec_t* rec, /* in: record to insert */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
{
- return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr));
+ return(page_cur_insert_rec_low(cursor, NULL, index, rec, mtr));
}
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 969313614e3..d3ef8214eb6 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -37,7 +37,8 @@ typedef byte page_header_t;
/*-----------------------------*/
#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
-#define PAGE_N_HEAP 4 /* number of records in the heap */
+#define PAGE_N_HEAP 4 /* number of records in the heap,
+ bit 15=flag: new-style compact page format */
#define PAGE_FREE 6 /* pointer to start of page free record list */
#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
@@ -79,15 +80,24 @@ typedef byte page_header_t;
#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
/* start of data on the page */
-#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES)
- /* offset of the page infimum record on the
- page */
-#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8)
- /* offset of the page supremum record on the
- page */
-#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9)
+#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
+ /* offset of the page infimum record on an
+ old-style page */
+#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
+ /* offset of the page supremum record on an
+ old-style page */
+#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
/* offset of the page supremum record end on
- the page */
+ an old-style page */
+#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
+ /* offset of the page infimum record on a
+ new-style compact page */
+#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
+ /* offset of the page supremum record on a
+ new-style compact page */
+#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
+ /* offset of the page supremum record end on
+ a new-style compact page */
/*-----------------------------*/
/* Directions of cursor movement */
@@ -233,6 +243,7 @@ page_cmp_dtuple_rec_with_match(
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
@@ -259,6 +270,22 @@ page_rec_get_n_recs_before(
/* out: number of records */
rec_t* rec); /* in: the physical record */
/*****************************************************************
+Gets the number of records in the heap. */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+ /* out: number of user records */
+ page_t* page); /* in: index page */
+/*****************************************************************
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+ page_t* page, /* in: index page */
+ ulint n_heap);/* in: number of records */
+/*****************************************************************
Gets the number of dir slots in directory. */
UNIV_INLINE
ulint
@@ -267,6 +294,15 @@ page_dir_get_n_slots(
/* out: number of slots */
page_t* page); /* in: index page */
/*****************************************************************
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page, /* in: index page */
+ ulint n_slots);/* in: number of slots */
+/*****************************************************************
Gets pointer to nth directory slot. */
UNIV_INLINE
page_dir_slot_t*
@@ -333,7 +369,16 @@ ulint
page_dir_find_owner_slot(
/*=====================*/
/* out: the directory slot number */
- rec_t* rec); /* in: the physical record */
+ rec_t* rec); /* in: the physical record */
+/****************************************************************
+Determine whether the page is in new-style compact format. */
+UNIV_INLINE
+ibool
+page_is_comp(
+/*=========*/
+ /* out: TRUE if the page is in compact format
+ FALSE if it is in old-style format */
+ page_t* page); /* in: index page */
/****************************************************************
Gets the pointer to the next record on the page. */
UNIV_INLINE
@@ -359,9 +404,10 @@ UNIV_INLINE
rec_t*
page_rec_get_prev(
/*==============*/
- /* out: pointer to previous record */
- rec_t* rec); /* in: pointer to record, must not be page
- infimum */
+ /* out: pointer to previous record */
+ rec_t* rec); /* in: pointer to record,
+ must not be page infimum */
+
/****************************************************************
TRUE if the record is a user record on the page. */
UNIV_INLINE
@@ -446,9 +492,11 @@ page_get_max_insert_size_after_reorganize(
Calculates free space if a page is emptied. */
UNIV_INLINE
ulint
-page_get_free_space_of_empty(void);
-/*==============================*/
- /* out: free space */
+page_get_free_space_of_empty(
+/*=========================*/
+ /* out: free space */
+ ibool comp) /* in: TRUE=compact page format */
+ __attribute__((const));
/****************************************************************
Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records. */
@@ -464,20 +512,23 @@ Allocates a block of memory from an index page. */
byte*
page_mem_alloc(
/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- ulint* heap_no);/* out: this contains the heap number
- of the allocated record if allocation succeeds */
+ /* out: pointer to start of allocated
+ buffer, or NULL if allocation fails */
+ page_t* page, /* in: index page */
+ ulint need, /* in: number of bytes needed */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* heap_no);/* out: this contains the heap number
+ of the allocated record
+ if allocation succeeds */
/****************************************************************
Puts a record to free list. */
UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec); /* in: pointer to the (origin of) record */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: pointer to the (origin of) record */
+ dict_index_t* index); /* in: record descriptor */
/**************************************************************
The index page creation function. */
@@ -487,7 +538,8 @@ page_create(
/* out: pointer to the page */
buf_frame_t* frame, /* in: a buffer frame where the page is
created */
- mtr_t* mtr); /* in: mini-transaction handle */
+ mtr_t* mtr, /* in: mini-transaction handle */
+ ibool comp); /* in: TRUE=compact page format */
/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page. */
@@ -495,10 +547,11 @@ touch the lock table and max trx id on page. */
void
page_copy_rec_list_end_no_locks(
/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
@@ -507,10 +560,11 @@ The records are copied to the start of the record list on new_page. */
void
page_copy_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
@@ -519,10 +573,11 @@ The records are copied to the end of the record list on new_page. */
void
page_copy_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
@@ -530,14 +585,15 @@ The infimum and supremum records are not deleted. */
void
page_delete_rec_list_end(
/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED
- if not known */
- ulint size, /* in: the sum of the sizes of the records in the end
- of the chain to delete, or ULINT_UNDEFINED if not
- known */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ ulint n_recs, /* in: number of records to delete,
+ or ULINT_UNDEFINED if not known */
+ ulint size, /* in: the sum of the sizes of the
+ records in the end of the chain to
+ delete, or ULINT_UNDEFINED if not known */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
@@ -545,9 +601,10 @@ that record. Infimum and supremum records are not deleted. */
void
page_delete_rec_list_start(
/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Moves record list end to another page. Moved records include
split_rec. */
@@ -555,10 +612,11 @@ split_rec. */
void
page_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record to move */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Moves record list start to another page. Moved records do not include
split_rec. */
@@ -566,10 +624,11 @@ split_rec. */
void
page_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record not to move */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/********************************************************************
Splits a directory slot which owns too many records. */
@@ -595,13 +654,16 @@ Parses a log record of a record list end or start deletion. */
byte*
page_parse_delete_rec_list(
/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE or
- MLOG_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte type, /* in: MLOG_LIST_END_DELETE,
+ MLOG_LIST_START_DELETE,
+ MLOG_COMP_LIST_END_DELETE or
+ MLOG_COMP_LIST_START_DELETE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses a redo log record of creating a page. */
@@ -611,6 +673,7 @@ page_parse_create(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
+ ibool comp, /* in: TRUE=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/****************************************************************
@@ -620,7 +683,8 @@ the index page context. */
void
page_rec_print(
/*===========*/
- rec_t* rec);
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: record descriptor */
/*******************************************************************
This is used to print the contents of the directory for
debugging purposes. */
@@ -637,8 +701,9 @@ debugging purposes. */
void
page_print_list(
/*============*/
- page_t* page, /* in: index page */
- ulint pr_n); /* in: print n first and n last entries */
+ page_t* page, /* in: index page */
+ dict_index_t* index, /* in: dictionary index of the page */
+ ulint pr_n); /* in: print n first and n last entries */
/*******************************************************************
Prints the info in a page header. */
@@ -653,9 +718,12 @@ debugging purposes. */
void
page_print(
/*======*/
- page_t* page, /* in: index page */
- ulint dn, /* in: print dn first and last entries in directory */
- ulint rn); /* in: print rn first and last records on page */
+ page_t* page, /* in: index page */
+ dict_index_t* index, /* in: dictionary index of the page */
+ ulint dn, /* in: print dn first and last entries
+ in directory */
+ ulint rn); /* in: print rn first and last records
+ in directory */
/*******************************************************************
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
@@ -664,8 +732,9 @@ the heap_no field. */
ibool
page_rec_validate(
/*==============*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: record on the page */
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
index 3d2bf3b090e..a63b5ca4238 100644
--- a/innobase/include/page0page.ic
+++ b/innobase/include/page0page.ic
@@ -73,7 +73,8 @@ page_header_set_field(
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
- ut_ad(val < UNIV_PAGE_SIZE);
+ ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
+ ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val);
}
@@ -152,6 +153,19 @@ page_header_reset_last_insert(
}
/****************************************************************
+Determine whether the page is in new-style compact format. */
+UNIV_INLINE
+ibool
+page_is_comp(
+/*=========*/
+ /* out: TRUE if the page is in compact format
+ FALSE if it is in old-style format */
+ page_t* page) /* in: index page */
+{
+ return(!!(page_header_get_field(page, PAGE_N_HEAP) & 0x8000));
+}
+
+/****************************************************************
Gets the first record on the page. */
UNIV_INLINE
rec_t*
@@ -162,7 +176,11 @@ page_get_infimum_rec(
{
ut_ad(page);
- return(page + PAGE_INFIMUM);
+ if (page_is_comp(page)) {
+ return(page + PAGE_NEW_INFIMUM);
+ } else {
+ return(page + PAGE_OLD_INFIMUM);
+ }
}
/****************************************************************
@@ -176,7 +194,11 @@ page_get_supremum_rec(
{
ut_ad(page);
- return(page + PAGE_SUPREMUM);
+ if (page_is_comp(page)) {
+ return(page + PAGE_NEW_SUPREMUM);
+ } else {
+ return(page + PAGE_OLD_SUPREMUM);
+ }
}
/****************************************************************
@@ -309,6 +331,7 @@ page_cmp_dtuple_rec_with_match(
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
@@ -320,6 +343,7 @@ page_cmp_dtuple_rec_with_match(
page_t* page;
ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
page = buf_frame_align(rec);
@@ -328,7 +352,7 @@ page_cmp_dtuple_rec_with_match(
} else if (rec == page_get_supremum_rec(page)) {
return(-1);
} else {
- return(cmp_dtuple_rec_with_match(dtuple, rec,
+ return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
matched_fields,
matched_bytes));
}
@@ -358,6 +382,45 @@ page_dir_get_n_slots(
{
return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
}
+/*****************************************************************
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page, /* in: index page */
+ ulint n_slots)/* in: number of slots */
+{
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
+}
+
+/*****************************************************************
+Gets the number of records in the heap. */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+ /* out: number of user records */
+ page_t* page) /* in: index page */
+{
+ return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
+}
+
+/*****************************************************************
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+ page_t* page, /* in: index page */
+ ulint n_heap) /* in: number of records */
+{
+ ut_ad(n_heap < 0x8000);
+
+ page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 &
+ page_header_get_field(page, PAGE_N_HEAP)));
+}
/*****************************************************************
Gets pointer to nth directory slot. */
@@ -369,7 +432,7 @@ page_dir_get_nth_slot(
page_t* page, /* in: index page */
ulint n) /* in: position */
{
- ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n);
+ ut_ad(page_dir_get_n_slots(page) > n);
return(page + UNIV_PAGE_SIZE - PAGE_DIR
- (n + 1) * PAGE_DIR_SLOT_SIZE);
@@ -431,7 +494,8 @@ page_dir_slot_get_n_owned(
/* out: number of records */
page_dir_slot_t* slot) /* in: page directory slot */
{
- return(rec_get_n_owned(page_dir_slot_get_rec(slot)));
+ return(rec_get_n_owned(page_dir_slot_get_rec(slot),
+ page_is_comp(buf_frame_align(slot))));
}
/*******************************************************************
@@ -444,7 +508,8 @@ page_dir_slot_set_n_owned(
ulint n) /* in: number of records owned
by the slot */
{
- rec_set_n_owned(page_dir_slot_get_rec(slot), n);
+ rec_set_n_owned(page_dir_slot_get_rec(slot),
+ page_is_comp(buf_frame_align(slot)), n);
}
/****************************************************************
@@ -477,7 +542,7 @@ page_rec_get_next(
page = buf_frame_align(rec);
- offs = rec_get_next_offs(rec);
+ offs = rec_get_next_offs(rec, page_is_comp(page));
if (offs >= UNIV_PAGE_SIZE) {
fprintf(stderr,
@@ -513,6 +578,7 @@ page_rec_set_next(
infimum */
{
page_t* page;
+ ulint offs;
ut_ad(page_rec_check(rec));
ut_a((next == NULL)
@@ -523,11 +589,13 @@ page_rec_set_next(
ut_ad(rec != page_get_supremum_rec(page));
ut_ad(next != page_get_infimum_rec(page));
- if (next == NULL) {
- rec_set_next_offs(rec, 0);
+ if (next) {
+ offs = (ulint) (next - page);
} else {
- rec_set_next_offs(rec, (ulint)(next - page));
+ offs = 0;
}
+
+ rec_set_next_offs(rec, page_is_comp(page), offs);
}
/****************************************************************
@@ -545,6 +613,7 @@ page_rec_get_prev(
rec_t* rec2;
rec_t* prev_rec = NULL;
page_t* page;
+ ibool comp;
ut_ad(page_rec_check(rec));
@@ -559,6 +628,7 @@ page_rec_get_prev(
slot = page_dir_get_nth_slot(page, slot_no - 1);
rec2 = page_dir_slot_get_rec(slot);
+ comp = page_is_comp(page);
while (rec != rec2) {
prev_rec = rec2;
@@ -579,9 +649,12 @@ page_rec_find_owner_rec(
/* out: the owner record */
rec_t* rec) /* in: the physical record */
{
+ ibool comp;
+
ut_ad(page_rec_check(rec));
+ comp = page_is_comp(buf_frame_align(rec));
- while (rec_get_n_owned(rec) == 0) {
+ while (rec_get_n_owned(rec, comp) == 0) {
rec = page_rec_get_next(rec);
}
@@ -601,7 +674,9 @@ page_get_data_size(
ulint ret;
ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_SUPREMUM_END
+ - (page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END)
- page_header_get_field(page, PAGE_GARBAGE));
ut_ad(ret < UNIV_PAGE_SIZE);
@@ -613,12 +688,13 @@ page_get_data_size(
Calculates free space if a page is emptied. */
UNIV_INLINE
ulint
-page_get_free_space_of_empty(void)
-/*==============================*/
+page_get_free_space_of_empty(
+/*=========================*/
/* out: free space */
+ ibool comp) /* in: TRUE=compact page layout */
{
return((ulint)(UNIV_PAGE_SIZE
- - PAGE_SUPREMUM_END
+ - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
}
@@ -640,13 +716,16 @@ page_get_max_insert_size(
{
ulint occupied;
ulint free_space;
+ ibool comp;
+
+ comp = page_is_comp(page);
occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_SUPREMUM_END
+ - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)
+ page_dir_calc_reserved_space(
- n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2));
+ n_recs + page_dir_get_n_heap(page) - 2);
- free_space = page_get_free_space_of_empty();
+ free_space = page_get_free_space_of_empty(comp);
/* Above the 'n_recs +' part reserves directory space for the new
inserted records; the '- 2' excludes page infimum and supremum
@@ -673,11 +752,14 @@ page_get_max_insert_size_after_reorganize(
{
ulint occupied;
ulint free_space;
+ ibool comp;
+
+ comp = page_is_comp(page);
occupied = page_get_data_size(page)
+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
- free_space = page_get_free_space_of_empty();
+ free_space = page_get_free_space_of_empty(comp);
if (occupied > free_space) {
@@ -693,11 +775,12 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec) /* in: pointer to the (origin of) record */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: pointer to the (origin of) record */
+ dict_index_t* index) /* in: record descriptor */
{
- rec_t* free;
- ulint garbage;
+ rec_t* free;
+ ulint garbage;
free = page_header_get_ptr(page, PAGE_FREE);
@@ -707,7 +790,7 @@ page_mem_free(
garbage = page_header_get_field(page, PAGE_GARBAGE);
page_header_set_field(page, PAGE_GARBAGE,
- garbage + rec_get_size(rec));
+ garbage + rec_get_size(rec, index));
}
#ifdef UNIV_MATERIALIZE
diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h
index e1874edcaf2..298ec494750 100644
--- a/innobase/include/que0que.h
+++ b/innobase/include/que0que.h
@@ -359,6 +359,7 @@ struct que_thr_struct{
the control came */
ulint resource; /* resource usage of the query thread
thus far */
+ ulint lock_state; /* lock state of thread (table or row) */
};
#define QUE_THR_MAGIC_N 8476583
@@ -482,6 +483,11 @@ struct que_fork_struct{
#define QUE_THR_SUSPENDED 7
#define QUE_THR_ERROR 8
+/* Query thread lock states */
+#define QUE_THR_LOCK_NOLOCK 0
+#define QUE_THR_LOCK_ROW 1
+#define QUE_THR_LOCK_TABLE 2
+
/* From where the cursor position is counted */
#define QUE_CUR_NOT_DEFINED 1
#define QUE_CUR_START 2
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
index 712e263350e..1b1ee26b809 100644
--- a/innobase/include/rem0cmp.h
+++ b/innobase/include/rem0cmp.h
@@ -90,6 +90,7 @@ cmp_dtuple_rec_with_match(
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
@@ -107,7 +108,8 @@ cmp_dtuple_rec(
less than rec, respectively; see the comments
for cmp_dtuple_rec_with_match */
dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/******************************************************************
Checks if a dtuple is a prefix of a record. The last field in dtuple
is allowed to be a prefix of the corresponding field in the record. */
@@ -116,23 +118,9 @@ ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
/* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec); /* in: physical record */
-/******************************************************************
-Compares a prefix of a data tuple to a prefix of a physical record for
-equality. If there are less fields in rec than parameter n_fields, FALSE
-is returned. NOTE that n_fields_cmp of dtuple does not affect this
-comparison. */
-
-ibool
-cmp_dtuple_rec_prefix_equal(
-/*========================*/
- /* out: TRUE if equal */
dtuple_t* dtuple, /* in: data tuple */
rec_t* rec, /* in: physical record */
- ulint n_fields); /* in: number of fields which should be
- compared; must not exceed the number of
- fields in dtuple */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*****************************************************************
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
@@ -146,6 +134,8 @@ cmp_rec_rec_with_match(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
@@ -167,6 +157,8 @@ cmp_rec_rec(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index); /* in: data dictionary index */
diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic
index 75cb3ef04e8..b86534e0a6a 100644
--- a/innobase/include/rem0cmp.ic
+++ b/innobase/include/rem0cmp.ic
@@ -57,10 +57,13 @@ cmp_rec_rec(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index) /* in: data dictionary index */
{
ulint match_f = 0;
ulint match_b = 0;
- return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b));
+ return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+ &match_f, &match_b));
}
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index 86bf263170f..ab89b912523 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -23,9 +23,18 @@ Created 5/30/1994 Heikki Tuuri
info bits of a record */
#define REC_INFO_MIN_REC_FLAG 0x10UL
-/* Number of extra bytes in a record, in addition to the data and the
-offsets */
-#define REC_N_EXTRA_BYTES 6
+/* Number of extra bytes in an old-style record,
+in addition to the data and the offsets */
+#define REC_N_OLD_EXTRA_BYTES 6
+/* Number of extra bytes in a new-style record,
+in addition to the data and the offsets */
+#define REC_N_NEW_EXTRA_BYTES 5
+
+/* Record status values */
+#define REC_STATUS_ORDINARY 0
+#define REC_STATUS_NODE_PTR 1
+#define REC_STATUS_INFIMUM 2
+#define REC_STATUS_SUPREMUM 3
/**********************************************************
The following function is used to get the offset of the
@@ -36,7 +45,8 @@ rec_get_next_offs(
/*==============*/
/* out: the page offset of the next
chained record */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the next record offset field
of the record. */
@@ -45,17 +55,28 @@ void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields
-in the record. */
+in an old-style record. */
UNIV_INLINE
ulint
-rec_get_n_fields(
-/*=============*/
+rec_get_n_fields_old(
+/*=================*/
/* out: number of data fields */
rec_t* rec); /* in: physical record */
/**********************************************************
+The following function is used to get the number of fields
+in a record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index); /* in: record descriptor */
+/**********************************************************
The following function is used to get the number of records
owned by the previous directory record. */
UNIV_INLINE
@@ -63,7 +84,8 @@ ulint
rec_get_n_owned(
/*============*/
/* out: number of owned records */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the number of owned
records. */
@@ -72,6 +94,7 @@ void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint n_owned); /* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of
@@ -81,7 +104,8 @@ ulint
rec_get_info_bits(
/*==============*/
/* out: info bits */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -89,15 +113,26 @@ void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint bits); /* in: info bits */
/**********************************************************
-Gets the value of the deleted falg in info bits. */
+The following function retrieves the status bits of a new-style record. */
UNIV_INLINE
-ibool
-rec_info_bits_get_deleted_flag(
-/*===========================*/
- /* out: TRUE if deleted flag set */
- ulint info_bits); /* in: info bits from a record */
+ulint
+rec_get_status(
+/*===========*/
+ /* out: status bits */
+ rec_t* rec); /* in: physical record */
+
+/**********************************************************
+The following function is used to set the status bits of a new-style record. */
+UNIV_INLINE
+void
+rec_set_status(
+/*===========*/
+ rec_t* rec, /* in: physical record */
+ ulint bits); /* in: info bits */
+
/**********************************************************
The following function tells if record is delete marked. */
UNIV_INLINE
@@ -105,7 +140,8 @@ ibool
rec_get_deleted_flag(
/*=================*/
/* out: TRUE if delete marked */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -113,8 +149,25 @@ void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ibool flag); /* in: TRUE if delete marked */
/**********************************************************
+The following function tells if a new-style record is a node pointer. */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*=================*/
+ /* out: TRUE if node pointer */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to flag a record as a node pointer. */
+UNIV_INLINE
+void
+rec_set_node_ptr_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag); /* in: TRUE if the record is a node pointer */
+/**********************************************************
The following function is used to get the order number
of the record in the heap of the index page. */
UNIV_INLINE
@@ -122,7 +175,8 @@ ulint
rec_get_heap_no(
/*=============*/
/* out: heap order number */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the heap number
field in the record. */
@@ -131,6 +185,7 @@ void
rec_set_heap_no(
/*=============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets
@@ -141,31 +196,65 @@ rec_get_1byte_offs_flag(
/*====================*/
/* out: TRUE if 1-byte form */
rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function determines the offsets to each field
+in the record. It can reuse a previously allocated array. */
+
+ulint*
+rec_get_offsets_func(
+/*=================*/
+ /* out: the new offsets */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* offsets,/* in: array consisting of offsets[0]
+ allocated elements, or an array from
+ rec_get_offsets(), or NULL */
+ ulint n_fields,/* in: maximum number of initialized fields
+ (ULINT_UNDEFINED if all fields) */
+ mem_heap_t** heap, /* in/out: memory heap */
+ const char* file, /* in: file name where called */
+ ulint line); /* in: line number where called */
+
+#define rec_get_offsets(rec,index,offsets,n,heap) \
+ rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
+
+/****************************************************************
+Validates offsets returned by rec_get_offsets(). */
+UNIV_INLINE
+ibool
+rec_offs_validate(
+/*==============*/
+ /* out: TRUE if valid */
+ rec_t* rec, /* in: record or NULL */
+ dict_index_t* index, /* in: record descriptor or NULL */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/****************************************************************
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index,/* in: record descriptor */
+ ulint* offsets);/* in: array returned by rec_get_offsets() */
+
/****************************************************************
The following function is used to get a pointer to the nth
-data field in the record. */
+data field in an old-style record. */
byte*
-rec_get_nth_field(
-/*==============*/
+rec_get_nth_field_old(
+/*==================*/
/* out: pointer to the field */
rec_t* rec, /* in: record */
ulint n, /* in: index of the field */
ulint* len); /* out: length of the field; UNIV_SQL_NULL
if SQL null */
/****************************************************************
-Return field length or UNIV_SQL_NULL. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_len(
-/*==================*/
- /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-/****************************************************************
-Gets the physical size of a field. Also an SQL null may have a field of
-size > 0, if the data type is of a fixed size. */
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size. */
UNIV_INLINE
ulint
rec_get_nth_field_size(
@@ -173,131 +262,194 @@ rec_get_nth_field_size(
/* out: field size in bytes */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
-/***************************************************************
-Gets the value of the ith field extern storage bit. If it is TRUE
-it means that the field is stored on another page. */
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in an old-style record. */
+UNIV_INLINE
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index of the field */
+ ulint* len); /* out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+/**********************************************************
+Determine if the offsets are for a record in the new
+compact format. */
UNIV_INLINE
ibool
-rec_get_nth_field_extern_bit(
-/*=========================*/
- /* in: TRUE or FALSE */
- rec_t* rec, /* in: record */
- ulint i); /* in: ith field */
+rec_offs_comp(
+/*==========*/
+ /* out: TRUE if compact format */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**********************************************************
+Returns TRUE if the nth field of rec is SQL NULL. */
+UNIV_INLINE
+ibool
+rec_offs_nth_null(
+/*==============*/
+ /* out: TRUE if SQL NULL */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+/**********************************************************
+Returns TRUE if the extern bit is set in nth field of rec. */
+UNIV_INLINE
+ibool
+rec_offs_nth_extern(
+/*================*/
+ /* out: TRUE if externally stored */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+/**********************************************************
+Gets the physical size of a field. */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+ /* out: length of field */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+
/**********************************************************
Returns TRUE if the extern bit is set in any of the fields
of rec. */
UNIV_INLINE
ibool
-rec_contains_externally_stored_field(
-/*=================================*/
- /* out: TRUE if a field is stored externally */
- rec_t* rec); /* in: record */
+rec_offs_any_extern(
+/*================*/
+ /* out: TRUE if a field is stored externally */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/***************************************************************
Sets the value of the ith field extern storage bit. */
-
+UNIV_INLINE
void
rec_set_nth_field_extern_bit(
/*=========================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
/***************************************************************
Sets TRUE the extern storage bits of fields mentioned in an array. */
void
rec_set_field_extern_bits(
/*======================*/
- rec_t* rec, /* in: record */
- ulint* vec, /* in: array of field numbers */
- ulint n_fields, /* in: number of fields numbers */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case we
- do not write to log about the change */
-/****************************************************************
-The following function is used to get a copy of the nth
-data field in the record to a buffer. */
-UNIV_INLINE
-void
-rec_copy_nth_field(
-/*===============*/
- void* buf, /* in: pointer to the buffer */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ const ulint* vec, /* in: array of field numbers */
+ ulint n_fields,/* in: number of fields numbers */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
/***************************************************************
-This is used to modify the value of an already existing field in
-a physical record. The previous value must have exactly the same
-size as the new value. If len is UNIV_SQL_NULL then the field is
-treated as SQL null. */
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
+records. For new-style records, len must not be UNIV_SQL_NULL. */
UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- void* data, /* in: pointer to the data if not SQL null */
- ulint len); /* in: length of the data or UNIV_SQL_NULL.
- If not SQL null, must have the same length as the
- previous value. If SQL null, previous value must be
- SQL null. */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index number of the field */
+ const void* data, /* in: pointer to the data if not SQL null */
+ ulint len); /* in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same
+ length as the previous value.
+ If SQL null, previous value must be
+ SQL null. */
/**************************************************************
-The following function returns the data size of a physical
+The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_data_size(
-/*==============*/
- /* out: size */
+rec_get_data_size_old(
+/*==================*/
+ /* out: size */
rec_t* rec); /* in: physical record */
/**************************************************************
+The following function returns the number of fields in a record. */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*===============*/
+ /* out: number of fields */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_offs_data_size(
+/*===============*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
start to record origin in bytes. */
UNIV_INLINE
ulint
-rec_get_extra_size(
-/*===============*/
- /* out: size */
- rec_t* rec); /* in: physical record */
-/**************************************************************
+rec_offs_extra_size(
+/*================*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
Returns the total size of a physical record. */
UNIV_INLINE
ulint
+rec_offs_size(
+/*==========*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
+Returns the total size of a physical record. */
+
+ulint
rec_get_size(
/*=========*/
- /* out: size */
- rec_t* rec); /* in: physical record */
+ /* out: size */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index); /* in: record descriptor */
/**************************************************************
Returns a pointer to the start of the record. */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec); /* in: pointer to record */
+ /* out: pointer to start */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**************************************************************
Returns a pointer to the end of the record. */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec); /* in: pointer to record */
+ /* out: pointer to end */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Copies a physical record to a buffer. */
UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copied record */
- void* buf, /* in: buffer */
- rec_t* rec); /* in: physical record */
+ /* out: pointer to the origin of the copy */
+ void* buf, /* in: buffer */
+ const rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/******************************************************************
Copies the first n fields of a physical record to a new physical record in
a buffer. */
@@ -305,49 +457,43 @@ a buffer. */
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size); /* in/out: buffer size */
+ /* out, own: copied record */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint n_fields, /* in: number of fields to copy */
+ byte** buf, /* in/out: memory buffer
+ for the copied prefix, or NULL */
+ ulint* buf_size); /* in/out: buffer size */
/****************************************************************
Folds a prefix of a physical record to a ulint. */
UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- ulint n_fields, /* in: number of complete fields to fold */
- ulint n_bytes, /* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id); /* in: index tree id */
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint n_fields, /* in: number of complete
+ fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold
+ in an incomplete last field */
+ dulint tree_id); /* in: index tree id */
/*************************************************************
Builds a physical record out of a data tuple and stores it beginning from
address destination. */
-UNIV_INLINE
+
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple); /* in: data tuple */
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-address destination. */
-
-rec_t*
-rec_convert_dtuple_to_rec_low(
-/*==========================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple, /* in: data tuple */
- ulint data_size); /* in: data size of dtuple */
+ /* out: pointer to the origin
+ of physical record */
+ byte* buf, /* in: start address of the
+ physical record */
+ dict_index_t* index, /* in: record descriptor */
+ dtuple_t* dtuple);/* in: data tuple */
/**************************************************************
-Returns the extra size of a physical record if we know its
+Returns the extra size of an old-style physical record if we know its
data size and number of fields. */
UNIV_INLINE
ulint
@@ -355,7 +501,8 @@ rec_get_converted_extra_size(
/*=========================*/
/* out: extra size */
ulint data_size, /* in: data size */
- ulint n_fields); /* in: number of fields */
+ ulint n_fields) /* in: number of fields */
+ __attribute__((const));
/**************************************************************
The following function returns the size of a data tuple when converted to
a physical record. */
@@ -364,6 +511,7 @@ ulint
rec_get_converted_size(
/*===================*/
/* out: size */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* dtuple);/* in: data tuple */
/******************************************************************
Copies the first n fields of a physical record to a data tuple.
@@ -374,6 +522,7 @@ rec_copy_prefix_to_dtuple(
/*======================*/
dtuple_t* tuple, /* in: data tuple */
rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
ulint n_fields, /* in: number of fields to copy */
mem_heap_t* heap); /* in: memory heap */
/*******************************************************************
@@ -382,16 +531,35 @@ Validates the consistency of a physical record. */
ibool
rec_validate(
/*=========*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: physical record */
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/*******************************************************************
+Prints an old-style physical record. */
+
+void
+rec_print_old(
+/*==========*/
+ FILE* file, /* in: file where to print */
+ rec_t* rec); /* in: physical record */
+/*******************************************************************
+Prints a physical record. */
+
+void
+rec_print_new(
+/*==========*/
+ FILE* file, /* in: file where to print */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Prints a physical record. */
void
rec_print(
/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec); /* in: physical record */
+ FILE* file, /* in: file where to print */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index); /* in: record descriptor */
#define REC_INFO_BITS 6 /* This is single byte bit-field */
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index c36bf8f6d6e..7d35e8e4110 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -8,9 +8,19 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
+#include "dict0dict.h"
-/* Offsets of the bit-fields in the record. NOTE! In the table the most
-significant bytes and bits are written below less significant.
+/* Compact flag ORed to the extra size returned by rec_get_offsets() */
+#define REC_OFFS_COMPACT ((ulint) 1 << 31)
+/* SQL NULL flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
+/* External flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
+/* Mask for offsets returned by rec_get_offsets() */
+#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
+
+/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
(1) byte offset (2) bit usage within byte
downward from
@@ -25,6 +35,35 @@ significant bytes and bits are written below less significant.
4 bits info bits
*/
+/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
+
+ (1) byte offset (2) bit usage within byte
+ downward from
+ origin -> 1 8 bits relative offset of next record
+ 2 8 bits relative offset of next record
+ the relative offset is an unsigned 16-bit
+ integer:
+ (offset_of_next_record
+ - offset_of_this_record) mod 64Ki,
+ where mod is the modulo as a non-negative
+ number;
+ we can calculate the the offset of the next
+ record with the formula:
+ relative_offset + offset_of_this_record
+ mod UNIV_PAGE_SIZE
+ 3 3 bits status:
+ 000=conventional record
+ 001=node pointer record (inside B-tree)
+ 010=infimum record
+ 011=supremum record
+ 1xx=reserved
+ 5 bits heap number
+ 4 8 bits heap number
+ 5 4 bits n_owned
+ 4 bits info bits
+*/
+
/* We list the byte offsets from the origin of the record, the mask,
and the shift needed to obtain each bit-field of the record. */
@@ -32,22 +71,30 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_NEXT_MASK 0xFFFFUL
#define REC_NEXT_SHIFT 0
-#define REC_SHORT 3 /* This is single byte bit-field */
-#define REC_SHORT_MASK 0x1UL
-#define REC_SHORT_SHIFT 0
+#define REC_OLD_SHORT 3 /* This is single byte bit-field */
+#define REC_OLD_SHORT_MASK 0x1UL
+#define REC_OLD_SHORT_SHIFT 0
-#define REC_N_FIELDS 4
-#define REC_N_FIELDS_MASK 0x7FEUL
-#define REC_N_FIELDS_SHIFT 1
+#define REC_OLD_N_FIELDS 4
+#define REC_OLD_N_FIELDS_MASK 0x7FEUL
+#define REC_OLD_N_FIELDS_SHIFT 1
-#define REC_HEAP_NO 5
+#define REC_NEW_STATUS 3 /* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK 0x7UL
+#define REC_NEW_STATUS_SHIFT 0
+
+#define REC_OLD_HEAP_NO 5
+#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_MASK 0xFFF8UL
#define REC_HEAP_NO_SHIFT 3
-#define REC_N_OWNED 6 /* This is single byte bit-field */
+#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
+#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
#define REC_N_OWNED_MASK 0xFUL
#define REC_N_OWNED_SHIFT 0
+#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */
+#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
@@ -65,26 +112,24 @@ a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000UL
-/****************************************************************
-Return field length or UNIV_SQL_NULL. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_len(
-/*==================*/
- /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
-{
- ulint len;
-
- rec_get_nth_field(rec, n, &len);
-
- return(len);
-}
+#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
+ ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
+ ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
+ ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
+ ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
+ ^ 0xFFFFFFFFUL
+# error "sum of old-style masks != 0xFFFFFFFFUL"
+#endif
+#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
+ ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
+ ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
+ ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
+ ^ 0xFFFFFFUL
+# error "sum of new-style masks != 0xFFFFFFUL"
+#endif
/***************************************************************
-Sets the value of the ith field SQL null bit. */
+Sets the value of the ith field SQL null bit of an old-style record. */
void
rec_set_nth_field_null_bit(
@@ -93,8 +138,8 @@ rec_set_nth_field_null_bit(
ulint i, /* in: ith field */
ibool val); /* in: value to set */
/***************************************************************
-Sets a record field to SQL null. The physical size of the field is not
-changed. */
+Sets an old-style record field to SQL null.
+The physical size of the field is not changed. */
void
rec_set_nth_field_sql_null(
@@ -102,6 +147,32 @@ rec_set_nth_field_sql_null(
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
+/***************************************************************
+Sets the value of the ith field extern storage bit of an old-style record. */
+
+void
+rec_set_nth_field_extern_bit_old(
+/*=============================*/
+ rec_t* rec, /* in: old-style record */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page where
+ rec is, or NULL; in the NULL case we do not
+ write to log about the change */
+/***************************************************************
+Sets the value of the ith field extern storage bit of a new-style record. */
+
+void
+rec_set_nth_field_extern_bit_new(
+/*=============================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint ith, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
+
/**********************************************************
Gets a bit field from within 1 byte. */
UNIV_INLINE
@@ -131,7 +202,7 @@ rec_set_bit_field_1(
ulint shift) /* in: shift right applied after masking */
{
ut_ad(rec);
- ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
ut_ad(mask);
ut_ad(mask <= 0xFFUL);
ut_ad(((mask >> shift) << shift) == mask);
@@ -171,30 +242,14 @@ rec_set_bit_field_2(
ulint shift) /* in: shift right applied after masking */
{
ut_ad(rec);
- ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
ut_ad(mask > 0xFFUL);
ut_ad(mask <= 0xFFFFUL);
ut_ad((mask >> shift) & 1);
ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
ut_ad(((mask >> shift) << shift) == mask);
ut_ad(((val << shift) & mask) == (val << shift));
-#ifdef UNIV_DEBUG
- {
- ulint m;
-
- /* The following assertion checks that the masks of currently
- defined bit-fields in bytes 3-6 do not overlap. */
- m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3)))
- + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4)))
- + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4)))
- + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3)))
- + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
- if (m != ut_dbg_zero + 0xFFFFFFFFUL) {
- fprintf(stderr, "Sum of masks %lx\n", m);
- ut_error;
- }
- }
-#endif
+
mach_write_to_2(rec - offs,
(mach_read_from_2(rec - offs) & ~mask)
| (val << shift));
@@ -207,18 +262,38 @@ UNIV_INLINE
ulint
rec_get_next_offs(
/*==============*/
- /* out: the page offset of the next chained record */
- rec_t* rec) /* in: physical record */
+ /* out: the page offset of the next chained record, or
+ 0 if none */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
- ulint ret;
+ ulint field_value;
+
+ ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+ ut_ad(REC_NEXT_SHIFT == 0);
- ut_ad(rec);
+ field_value = mach_read_from_2(rec - REC_NEXT);
- ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK,
- REC_NEXT_SHIFT);
- ut_ad(ret < UNIV_PAGE_SIZE);
+ if (comp) {
+#if UNIV_PAGE_SIZE <= 32768
+ /* Note that for 64 KiB pages, field_value can 'wrap around'
+ and the debug assertion is not valid */
- return(ret);
+ ut_ad((int16_t)field_value
+ + ut_align_offset(rec, UNIV_PAGE_SIZE)
+ < UNIV_PAGE_SIZE);
+#endif
+ if (field_value == 0) {
+
+ return(0);
+ }
+
+ return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(field_value < UNIV_PAGE_SIZE);
+
+ return(field_value);
+ }
}
/**********************************************************
@@ -229,21 +304,42 @@ void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
- ulint next) /* in: offset of the next record */
+ ibool comp, /* in: TRUE=compact page format */
+ ulint next) /* in: offset of the next record, or 0 if none */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
+ ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+ ut_ad(REC_NEXT_SHIFT == 0);
+
+ if (comp) {
+ ulint field_value;
- rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK,
- REC_NEXT_SHIFT);
+ if (next) {
+ /* The following two statements calculate
+ next - offset_of_rec mod 64Ki, where mod is the modulo
+ as a non-negative number */
+
+ field_value = (ulint)((lint)next
+ - (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
+ field_value &= REC_NEXT_MASK;
+ } else {
+ field_value = 0;
+ }
+
+ mach_write_to_2(rec - REC_NEXT, field_value);
+ } else {
+ mach_write_to_2(rec - REC_NEXT, next);
+ }
}
/**********************************************************
-The following function is used to get the number of fields in the record. */
+The following function is used to get the number of fields
+in an old-style record. */
UNIV_INLINE
ulint
-rec_get_n_fields(
-/*=============*/
+rec_get_n_fields_old(
+/*=================*/
/* out: number of data fields */
rec_t* rec) /* in: physical record */
{
@@ -251,8 +347,8 @@ rec_get_n_fields(
ut_ad(rec);
- ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK,
- REC_N_FIELDS_SHIFT);
+ ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
+ REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
ut_ad(ret <= REC_MAX_N_FIELDS);
ut_ad(ret > 0);
@@ -260,12 +356,12 @@ rec_get_n_fields(
}
/**********************************************************
-The following function is used to set the number of fields field in the
-record. */
+The following function is used to set the number of fields
+in an old-style record. */
UNIV_INLINE
void
-rec_set_n_fields(
-/*=============*/
+rec_set_n_fields_old(
+/*=================*/
rec_t* rec, /* in: physical record */
ulint n_fields) /* in: the number of fields */
{
@@ -273,8 +369,58 @@ rec_set_n_fields(
ut_ad(n_fields <= REC_MAX_N_FIELDS);
ut_ad(n_fields > 0);
- rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK,
- REC_N_FIELDS_SHIFT);
+ rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
+ REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
+}
+
+/**********************************************************
+The following function retrieves the status bits of a new-style record. */
+UNIV_INLINE
+ulint
+rec_get_status(
+/*===========*/
+ /* out: status bits */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
+ REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
+ ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to get the number of fields
+in a record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index) /* in: record descriptor */
+{
+ ut_ad(rec);
+ ut_ad(index);
+ if (!index->table->comp) {
+ return(rec_get_n_fields_old(rec));
+ }
+ switch (rec_get_status(rec)) {
+ case REC_STATUS_ORDINARY:
+ return(dict_index_get_n_fields(index));
+ case REC_STATUS_NODE_PTR:
+ return(dict_index_get_n_unique_in_tree(index) + 1);
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ return(1);
+ default:
+ ut_error;
+ return(ULINT_UNDEFINED);
+ }
}
/**********************************************************
@@ -285,14 +431,16 @@ ulint
rec_get_n_owned(
/*============*/
/* out: number of owned records */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK,
- REC_N_OWNED_SHIFT);
+ ret = rec_get_bit_field_1(rec,
+ comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
ut_ad(ret <= REC_MAX_N_OWNED);
return(ret);
@@ -305,13 +453,15 @@ void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint n_owned) /* in: the number of owned */
{
ut_ad(rec);
ut_ad(n_owned <= REC_MAX_N_OWNED);
- rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK,
- REC_N_OWNED_SHIFT);
+ rec_set_bit_field_1(rec, n_owned,
+ comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
/**********************************************************
@@ -321,14 +471,16 @@ ulint
rec_get_info_bits(
/*==============*/
/* out: info bits */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK,
- REC_INFO_BITS_SHIFT);
+ ret = rec_get_bit_field_1(rec,
+ comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
return(ret);
@@ -341,30 +493,31 @@ void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint bits) /* in: info bits */
{
ut_ad(rec);
ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
- rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK,
- REC_INFO_BITS_SHIFT);
+ rec_set_bit_field_1(rec, bits,
+ comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
/**********************************************************
-Gets the value of the deleted flag in info bits. */
+The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
-ibool
-rec_info_bits_get_deleted_flag(
-/*===========================*/
- /* out: TRUE if deleted flag set */
- ulint info_bits) /* in: info bits from a record */
+void
+rec_set_status(
+/*===========*/
+ rec_t* rec, /* in: physical record */
+ ulint bits) /* in: info bits */
{
- if (info_bits & REC_INFO_DELETED_FLAG) {
-
- return(TRUE);
- }
+ ut_ad(rec);
+ ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
- return(FALSE);
+ rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
+ REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
}
/**********************************************************
@@ -374,9 +527,10 @@ ibool
rec_get_deleted_flag(
/*=================*/
/* out: TRUE if delete marked */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
- if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) {
+ if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec, comp)) {
return(TRUE);
}
@@ -391,6 +545,7 @@ void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ibool flag) /* in: TRUE if delete marked */
{
ulint old_val;
@@ -399,7 +554,7 @@ rec_set_deleted_flag(
ut_ad(TRUE == 1);
ut_ad(flag <= TRUE);
- old_val = rec_get_info_bits(rec);
+ old_val = rec_get_info_bits(rec, comp);
if (flag) {
new_val = REC_INFO_DELETED_FLAG | old_val;
@@ -407,7 +562,39 @@ rec_set_deleted_flag(
new_val = ~REC_INFO_DELETED_FLAG & old_val;
}
- rec_set_info_bits(rec, new_val);
+ rec_set_info_bits(rec, comp, new_val);
+}
+
+/**********************************************************
+The following function tells if a new-style record is a node pointer. */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*=================*/
+ /* out: TRUE if node pointer */
+ rec_t* rec) /* in: physical record */
+{
+ return(REC_STATUS_NODE_PTR == rec_get_status(rec));
+}
+
+/**********************************************************
+The following function is used to flag a record as a node pointer. */
+UNIV_INLINE
+void
+rec_set_node_ptr_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag) /* in: TRUE if the record is a node pointer */
+{
+ ulint status;
+ ut_ad(flag <= TRUE);
+ ut_ad(REC_STATUS_NODE_PTR >= rec_get_status(rec));
+ if (flag) {
+ status = REC_STATUS_NODE_PTR;
+ } else {
+ status = REC_STATUS_ORDINARY;
+ }
+ rec_set_status(rec, status);
}
/**********************************************************
@@ -418,14 +605,16 @@ ulint
rec_get_heap_no(
/*=============*/
/* out: heap order number */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK,
- REC_HEAP_NO_SHIFT);
+ ret = rec_get_bit_field_2(rec,
+ comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
ut_ad(ret <= REC_MAX_HEAP_NO);
return(ret);
@@ -438,12 +627,14 @@ void
rec_set_heap_no(
/*=============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint heap_no)/* in: the heap number */
{
ut_ad(heap_no <= REC_MAX_HEAP_NO);
- rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK,
- REC_HEAP_NO_SHIFT);
+ rec_set_bit_field_2(rec, heap_no,
+ comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/**********************************************************
@@ -456,10 +647,12 @@ rec_get_1byte_offs_flag(
/* out: TRUE if 1-byte form */
rec_t* rec) /* in: physical record */
{
- ut_ad(TRUE == 1);
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
- return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK,
- REC_SHORT_SHIFT));
+ return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+ REC_OLD_SHORT_SHIFT));
}
/**********************************************************
@@ -471,11 +664,13 @@ rec_set_1byte_offs_flag(
rec_t* rec, /* in: physical record */
ibool flag) /* in: TRUE if 1byte form */
{
- ut_ad(TRUE == 1);
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
ut_ad(flag <= TRUE);
- rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK,
- REC_SHORT_SHIFT);
+ rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+ REC_OLD_SHORT_SHIFT);
}
/**********************************************************
@@ -492,9 +687,9 @@ rec_1_get_field_end_info(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1)));
+ return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
}
/**********************************************************
@@ -511,68 +706,289 @@ rec_2_get_field_end_info(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
+ return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
-/***************************************************************
-Gets the value of the ith field extern storage bit. If it is TRUE
-it means that the field is stored on another page. */
+#ifdef UNIV_DEBUG
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 4
+#else /* UNIV_DEBUG */
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 2
+#endif /* UNIV_DEBUG */
+
+/* Get the base address of offsets. The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/**************************************************************
+The following function returns the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+ /* out: number of elements */
+ const ulint* offsets)/* in: array for rec_get_offsets() */
+{
+ ulint n_alloc;
+ ut_ad(offsets);
+ n_alloc = offsets[0];
+ ut_ad(n_alloc > 0);
+ return(n_alloc);
+}
+
+/**************************************************************
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+ ulint* offsets, /* in: array for rec_get_offsets() */
+ ulint n_alloc) /* in: number of elements */
+{
+ ut_ad(offsets);
+ ut_ad(n_alloc > 0);
+ offsets[0] = n_alloc;
+}
+
+/**************************************************************
+The following function returns the number of fields in a record. */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*===============*/
+ /* out: number of fields */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint n_fields;
+ ut_ad(offsets);
+ n_fields = offsets[1];
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+ <= rec_offs_get_n_alloc(offsets));
+ return(n_fields);
+}
+
+/****************************************************************
+Validates offsets returned by rec_get_offsets(). */
UNIV_INLINE
ibool
-rec_get_nth_field_extern_bit(
-/*=========================*/
- /* in: TRUE or FALSE */
- rec_t* rec, /* in: record */
- ulint i) /* in: ith field */
+rec_offs_validate(
+/*==============*/
+ /* out: TRUE if valid */
+ rec_t* rec, /* in: record or NULL */
+ dict_index_t* index, /* in: record descriptor or NULL */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint i = rec_offs_n_fields(offsets);
+ ulint last = ULINT_MAX;
+ ibool comp = (*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0;
+
+ if (rec) {
+ ut_ad((ulint) rec == offsets[2]);
+ if (!comp) {
+ ut_a(rec_get_n_fields_old(rec) >= i);
+ }
+ }
+ if (index) {
+ ulint max_n_fields;
+ ut_ad((ulint) index == offsets[3]);
+ max_n_fields = ut_max(
+ dict_index_get_n_fields(index),
+ dict_index_get_n_unique_in_tree(index) + 1);
+ if (comp && rec) {
+ switch (rec_get_status(rec)) {
+ case REC_STATUS_ORDINARY:
+ break;
+ case REC_STATUS_NODE_PTR:
+ max_n_fields =
+ dict_index_get_n_unique_in_tree(index) + 1;
+ break;
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ max_n_fields = 1;
+ break;
+ default:
+ ut_error;
+ }
+ }
+ /* index->n_def == 0 for dummy indexes if !comp */
+ ut_a(!comp || index->n_def);
+ ut_a(!index->n_def || i <= max_n_fields);
+ }
+ while (i--) {
+ ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
+ ut_a(curr <= last);
+ last = curr;
+ }
+ return(TRUE);
+}
+/****************************************************************
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+ rec_t* rec __attribute__((unused)),
+ /* in: record */
+ dict_index_t* index __attribute__((unused)),
+ /* in: record descriptor */
+ ulint* offsets __attribute__((unused)))
+ /* in: array returned by rec_get_offsets() */
{
- ulint info;
+#ifdef UNIV_DEBUG
+ ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
+ offsets[2] = (ulint) rec;
+ offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+}
- if (rec_get_1byte_offs_flag(rec)) {
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in an old-style record. */
+UNIV_INLINE
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index of the field */
+ ulint* len) /* out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+{
+ byte* field;
+ ulint length;
+ ut_ad(rec);
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ ut_ad(len);
- return(FALSE);
+ if (n == 0) {
+ field = rec;
+ } else {
+ field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK);
}
- info = rec_2_get_field_end_info(rec, i);
+ length = rec_offs_base(offsets)[1 + n];
- if (info & REC_2BYTE_EXTERN_MASK) {
- return(TRUE);
+ if (length & REC_OFFS_SQL_NULL) {
+ length = UNIV_SQL_NULL;
+ } else {
+ length &= REC_OFFS_MASK;
+ length -= field - rec;
}
- return(FALSE);
+ *len = length;
+ return(field);
}
/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of rec. */
+Determine if the offsets are for a record in the new
+compact format. */
UNIV_INLINE
ibool
-rec_contains_externally_stored_field(
-/*=================================*/
- /* out: TRUE if a field is stored externally */
- rec_t* rec) /* in: record */
+rec_offs_comp(
+/*==========*/
+ /* out: TRUE if compact format */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n;
- ulint i;
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(FALSE);
- }
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ return((*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0);
+}
- n = rec_get_n_fields(rec);
+/**********************************************************
+Returns TRUE if the nth field of rec is SQL NULL. */
+UNIV_INLINE
+ibool
+rec_offs_nth_null(
+/*==============*/
+ /* out: TRUE if SQL NULL */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL) != 0);
+}
+/**********************************************************
+Returns TRUE if the extern bit is set in nth field of rec. */
+UNIV_INLINE
+ibool
+rec_offs_nth_extern(
+/*================*/
+ /* out: TRUE if externally stored */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL) != 0);
+}
- for (i = 0; i < n; i++) {
- if (rec_get_nth_field_extern_bit(rec, i)) {
+/**********************************************************
+Gets the physical size of a field. */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+ /* out: length of field */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
+ & REC_OFFS_MASK);
+}
+/**********************************************************
+Returns TRUE if the extern bit is set in any of the fields
+of an old-style record. */
+UNIV_INLINE
+ibool
+rec_offs_any_extern(
+/*================*/
+ /* out: TRUE if a field is stored externally */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint i;
+ for (i = rec_offs_n_fields(offsets); i--; ) {
+ if (rec_offs_nth_extern(offsets, i)) {
return(TRUE);
}
}
-
return(FALSE);
}
+/***************************************************************
+Sets the value of the ith field extern storage bit. */
+UNIV_INLINE
+void
+rec_set_nth_field_extern_bit(
+/*=========================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr) /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
+{
+ if (index->table->comp) {
+ rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
+ } else {
+ rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
+ }
+}
+
/**********************************************************
Returns the offset of n - 1th field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
@@ -589,9 +1005,9 @@ rec_1_get_prev_field_end_info(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
- return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n)));
+ return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
}
/**********************************************************
@@ -608,9 +1024,9 @@ rec_2_get_prev_field_end_info(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
- return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n)));
+ return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
}
/**********************************************************
@@ -625,9 +1041,9 @@ rec_1_set_field_end_info(
ulint info) /* in: value to set */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info);
+ mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
}
/**********************************************************
@@ -642,9 +1058,9 @@ rec_2_set_field_end_info(
ulint info) /* in: value to set */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info);
+ mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
}
/**********************************************************
@@ -659,7 +1075,7 @@ rec_1_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -682,7 +1098,7 @@ rec_2_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -707,7 +1123,7 @@ rec_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(rec);
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -723,8 +1139,9 @@ rec_get_field_start_offs(
}
/****************************************************************
-Gets the physical size of a field. Also an SQL null may have a field of
-size > 0, if the data type is of a fixed size. */
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size. */
UNIV_INLINE
ulint
rec_get_nth_field_size(
@@ -744,133 +1161,134 @@ rec_get_nth_field_size(
return(next_os - os);
}
-/****************************************************************
-The following function is used to get a copy of the nth data field in a
-record to a buffer. */
-UNIV_INLINE
-void
-rec_copy_nth_field(
-/*===============*/
- void* buf, /* in: pointer to the buffer */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
-{
- byte* ptr;
-
- ut_ad(buf && rec && len);
-
- ptr = rec_get_nth_field(rec, n, len);
-
- if (*len == UNIV_SQL_NULL) {
-
- return;
- }
-
- ut_memcpy(buf, ptr, *len);
-}
-
/***************************************************************
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null. */
+is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
+records. For new-style records, len must not be UNIV_SQL_NULL. */
UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- void* data, /* in: pointer to the data if not SQL null */
- ulint len) /* in: length of the data or UNIV_SQL_NULL */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index number of the field */
+ const void* data, /* in: pointer to the data
+ if not SQL null */
+ ulint len) /* in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same
+ length as the previous value.
+ If SQL null, previous value must be
+ SQL null. */
{
byte* data2;
ulint len2;
- ut_ad((len == UNIV_SQL_NULL)
- || (rec_get_nth_field_size(rec, n) == len));
-
+ ut_ad(rec);
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
if (len == UNIV_SQL_NULL) {
+ ut_ad(!rec_offs_comp(offsets));
rec_set_nth_field_sql_null(rec, n);
return;
}
- data2 = rec_get_nth_field(rec, n, &len2);
-
- ut_memcpy(data2, data, len);
-
+ data2 = rec_get_nth_field(rec, offsets, n, &len2);
if (len2 == UNIV_SQL_NULL) {
-
+ ut_ad(!rec_offs_comp(offsets));
rec_set_nth_field_null_bit(rec, n, FALSE);
+ ut_ad(len == rec_get_nth_field_size(rec, n));
+ } else {
+ ut_ad(len2 == len);
}
+
+ ut_memcpy(data2, data, len);
}
/**************************************************************
-The following function returns the data size of a physical
+The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_data_size(
-/*==============*/
- /* out: size */
+rec_get_data_size_old(
+/*==================*/
+ /* out: size */
rec_t* rec) /* in: physical record */
{
ut_ad(rec);
- return(rec_get_field_start_offs(rec, rec_get_n_fields(rec)));
+ return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
}
/**************************************************************
-Returns the total size of record minus data size of record. The value
-returned by the function is the distance from record start to record origin
-in bytes. */
+The following function sets the number of fields in offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_fields(
+/*==================*/
+ ulint* offsets, /* in: array returned by rec_get_offsets() */
+ ulint n_fields) /* in: number of fields */
+{
+ ut_ad(offsets);
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+ <= rec_offs_get_n_alloc(offsets));
+ offsets[1] = n_fields;
+}
+
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_extra_size(
+rec_offs_data_size(
/*===============*/
- /* out: size */
- rec_t* rec) /* in: physical record */
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
-
- ut_ad(rec);
-
- n_fields = rec_get_n_fields(rec);
-
- if (rec_get_1byte_offs_flag(rec)) {
+ ulint size;
- return(REC_N_EXTRA_BYTES + n_fields);
- }
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
+ & REC_OFFS_MASK;
+ ut_ad(size < UNIV_PAGE_SIZE);
+ return(size);
+}
- return(REC_N_EXTRA_BYTES + 2 * n_fields);
+/**************************************************************
+Returns the total size of record minus data size of record. The value
+returned by the function is the distance from record start to record origin
+in bytes. */
+UNIV_INLINE
+ulint
+rec_offs_extra_size(
+/*================*/
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint size;
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT;
+ ut_ad(size < UNIV_PAGE_SIZE);
+ return(size);
}
-/**************************************************************
+/**************************************************************
Returns the total size of a physical record. */
UNIV_INLINE
ulint
-rec_get_size(
-/*=========*/
- /* out: size */
- rec_t* rec) /* in: physical record */
+rec_offs_size(
+/*==========*/
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
-
- ut_ad(rec);
-
- n_fields = rec_get_n_fields(rec);
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(REC_N_EXTRA_BYTES + n_fields
- + rec_1_get_field_start_offs(rec, n_fields));
- }
-
- return(REC_N_EXTRA_BYTES + 2 * n_fields
- + rec_2_get_field_start_offs(rec, n_fields));
+ return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
/**************************************************************
@@ -879,10 +1297,11 @@ UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec) /* in: pointer to record */
+ /* out: pointer to end */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- return(rec + rec_get_data_size(rec));
+ return(rec + rec_offs_data_size(offsets));
}
/**************************************************************
@@ -891,10 +1310,11 @@ UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec) /* in: pointer to record */
+ /* out: pointer to start */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- return(rec - rec_get_extra_size(rec));
+ return(rec - rec_offs_extra_size(offsets));
}
/*******************************************************************
@@ -903,18 +1323,20 @@ UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copied record */
- void* buf, /* in: buffer */
- rec_t* rec) /* in: physical record */
+ /* out: pointer to the origin of the copy */
+ void* buf, /* in: buffer */
+ const rec_t* rec, /* in: physical record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ulint extra_len;
ulint data_len;
ut_ad(rec && buf);
- ut_ad(rec_validate(rec));
+ ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
+ ut_ad(rec_validate((rec_t*) rec, offsets));
- extra_len = rec_get_extra_size(rec);
- data_len = rec_get_data_size(rec);
+ extra_len = rec_offs_extra_size(offsets);
+ data_len = rec_offs_data_size(offsets);
ut_memcpy(buf, rec - extra_len, extra_len + data_len);
@@ -922,8 +1344,8 @@ rec_copy(
}
/**************************************************************
-Returns the extra size of a physical record if we know its data size and
-the number of fields. */
+Returns the extra size of an old-style physical record if we know its
+data size and number of fields. */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
@@ -934,28 +1356,51 @@ rec_get_converted_extra_size(
{
if (data_size <= REC_1BYTE_OFFS_LIMIT) {
- return(REC_N_EXTRA_BYTES + n_fields);
+ return(REC_N_OLD_EXTRA_BYTES + n_fields);
}
- return(REC_N_EXTRA_BYTES + 2 * n_fields);
+ return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
}
/**************************************************************
The following function returns the size of a data tuple when converted to
+a new-style physical record. */
+
+ulint
+rec_get_converted_size_new(
+/*=======================*/
+ /* out: size */
+ dict_index_t* index, /* in: record descriptor */
+ dtuple_t* dtuple);/* in: data tuple */
+/**************************************************************
+The following function returns the size of a data tuple when converted to
a physical record. */
UNIV_INLINE
ulint
rec_get_converted_size(
/*===================*/
/* out: size */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* dtuple) /* in: data tuple */
{
ulint data_size;
ulint extra_size;
-
+
+ ut_ad(index);
ut_ad(dtuple);
ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(index->type & DICT_UNIVERSAL
+ || dtuple_get_n_fields(dtuple) ==
+ (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+ == REC_STATUS_NODE_PTR)
+ ? dict_index_get_n_unique_in_tree(index) + 1
+ : dict_index_get_n_fields(index)));
+
+ if (index->table->comp) {
+ return(rec_get_converted_size_new(index, dtuple));
+ }
+
data_size = dtuple_get_data_size(dtuple);
extra_size = rec_get_converted_extra_size(
@@ -971,12 +1416,15 @@ UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- ulint n_fields, /* in: number of complete fields to fold */
- ulint n_bytes, /* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id) /* in: index tree id */
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint n_fields, /* in: number of complete
+ fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold
+ in an incomplete last field */
+ dulint tree_id) /* in: index tree id */
{
ulint i;
byte* data;
@@ -984,12 +1432,13 @@ rec_fold(
ulint fold;
ulint n_fields_rec;
- ut_ad(rec_validate(rec));
- ut_ad(n_fields <= rec_get_n_fields(rec));
- ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(rec_validate((rec_t*) rec, offsets));
ut_ad(n_fields + n_bytes > 0);
-
- n_fields_rec = rec_get_n_fields(rec);
+
+ n_fields_rec = rec_offs_n_fields(offsets);
+ ut_ad(n_fields <= n_fields_rec);
+ ut_ad(n_fields < n_fields_rec || n_bytes == 0);
if (n_fields > n_fields_rec) {
n_fields = n_fields_rec;
@@ -1002,7 +1451,7 @@ rec_fold(
fold = ut_fold_dulint(tree_id);
for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, i, &len);
+ data = rec_get_nth_field(rec, offsets, i, &len);
if (len != UNIV_SQL_NULL) {
fold = ut_fold_ulint_pair(fold,
@@ -1011,7 +1460,7 @@ rec_fold(
}
if (n_bytes > 0) {
- data = rec_get_nth_field(rec, i, &len);
+ data = rec_get_nth_field(rec, offsets, i, &len);
if (len != UNIV_SQL_NULL) {
if (len > n_bytes) {
@@ -1025,19 +1474,3 @@ rec_fold(
return(fold);
}
-
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-the address destination. */
-UNIV_INLINE
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple) /* in: data tuple */
-{
- return(rec_convert_dtuple_to_rec_low(destination, dtuple,
- dtuple_get_data_size(dtuple)));
-}
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 13773ed380d..f0dc4630475 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -239,6 +239,17 @@ row_update_for_mysql(
the MySQL format */
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
+
+/*************************************************************************
+Does an unlock of a row for MySQL. */
+
+int
+row_unlock_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+
/*************************************************************************
Creates an query graph node of 'update' type to be used in the MySQL
interface. */
@@ -569,6 +580,10 @@ struct row_prebuilt_struct {
allocated mem buf start, because
there is a 4 byte magic number at the
start and at the end */
+ ibool keep_other_fields_on_keyread; /* when using fetch
+ cache with HA_EXTRA_KEYREAD, don't
+ overwrite other fields in mysql row
+ row buffer.*/
ulint fetch_cache_first;/* position of the first not yet
fetched row in fetch_cache */
ulint n_fetch_cached; /* number of not yet fetched rows
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
index 951e211fb37..782973d8f5d 100644
--- a/innobase/include/row0row.h
+++ b/innobase/include/row0row.h
@@ -27,7 +27,8 @@ row_get_rec_trx_id(
/*===============*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Reads the roll pointer field from a clustered index record. */
UNIV_INLINE
@@ -36,7 +37,8 @@ row_get_rec_roll_ptr(
/*=================*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
@@ -45,7 +47,8 @@ row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
- dulint trx_id); /* in: value of the field */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
+ dulint trx_id);/* in: value of the field */
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
@@ -54,6 +57,7 @@ row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */
/*********************************************************************
When an insert to a table is performed, this function builds the entry which
@@ -90,6 +94,9 @@ row_build(
the buffer page of this record must be
at least s-latched and the latch held
as long as the row dtuple is used! */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index)
+ or NULL, in which case this function
+ will invoke rec_get_offsets() */
mem_heap_t* heap); /* in: memory heap from which the memory
needed is allocated */
/***********************************************************************
@@ -175,14 +182,15 @@ UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the reference
- is built */
- ulint* map, /* in: array of field numbers in rec telling
- how ref should be built from the fields of
- rec */
- rec_t* rec); /* in: record in the index; must be preserved
- while ref is used, as we do not copy field
- values to heap */
+ dtuple_t* ref, /* in: typed data tuple where the
+ reference is built */
+ const ulint* map, /* in: array of field numbers in rec
+ telling how ref should be built from
+ the fields of rec */
+ rec_t* rec, /* in: record in the index; must be
+ preserved while ref is used, as we do
+ not copy field values to heap */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Searches the clustered index record for a row, if we have the row
reference. */
diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic
index 8e5121f5a96..85410beacf0 100644
--- a/innobase/include/row0row.ic
+++ b/innobase/include/row0row.ic
@@ -20,7 +20,8 @@ row_get_rec_sys_field(
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
@@ -32,6 +33,7 @@ row_set_rec_sys_field(
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */
/*************************************************************************
@@ -42,18 +44,21 @@ row_get_rec_trx_id(
/*===============*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
return(trx_read_trx_id(rec + offset));
} else {
- return(row_get_rec_sys_field(DATA_TRX_ID, rec, index));
+ return(row_get_rec_sys_field(DATA_TRX_ID,
+ rec, index, offsets));
}
}
@@ -65,18 +70,21 @@ row_get_rec_roll_ptr(
/*=================*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
} else {
- return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index));
+ return(row_get_rec_sys_field(DATA_ROLL_PTR,
+ rec, index, offsets));
}
}
@@ -88,18 +96,21 @@ row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_trx_id(rec + offset, trx_id);
} else {
- row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id);
+ row_set_rec_sys_field(DATA_TRX_ID,
+ rec, index, offsets, trx_id);
}
}
@@ -111,18 +122,21 @@ row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
} else {
- row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr);
+ row_set_rec_sys_field(DATA_ROLL_PTR,
+ rec, index, offsets, roll_ptr);
}
}
@@ -133,14 +147,15 @@ UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the reference
- is built */
- ulint* map, /* in: array of field numbers in rec telling
- how ref should be built from the fields of
- rec */
- rec_t* rec) /* in: record in the index; must be preserved
- while ref is used, as we do not copy field
- values to heap */
+ dtuple_t* ref, /* in: typed data tuple where the
+ reference is built */
+ const ulint* map, /* in: array of field numbers in rec
+ telling how ref should be built from
+ the fields of rec */
+ rec_t* rec, /* in: record in the index; must be
+ preserved while ref is used, as we do
+ not copy field values to heap */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
dfield_t* dfield;
byte* field;
@@ -149,6 +164,7 @@ row_build_row_ref_fast(
ulint field_no;
ulint i;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
ref_len = dtuple_get_n_fields(ref);
for (i = 0; i < ref_len; i++) {
@@ -158,7 +174,8 @@ row_build_row_ref_fast(
if (field_no != ULINT_UNDEFINED) {
- field = rec_get_nth_field(rec, field_no, &len);
+ field = rec_get_nth_field(rec, offsets,
+ field_no, &len);
dfield_set_data(dfield, field, len);
}
}
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 28210364833..673e0511153 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -80,6 +80,7 @@ row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr);/* in: roll ptr of the undo log record */
/*************************************************************************
@@ -124,8 +125,8 @@ row_upd_changes_field_size_or_external(
/* out: TRUE if the update changes the size of
some field in index or the field is external
in rec or update */
- rec_t* rec, /* in: record in index */
dict_index_t* index, /* in: index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update);/* in: update vector */
/***************************************************************
Replaces the new column values stored in the update vector to the record
@@ -135,8 +136,9 @@ a clustered index */
void
row_upd_rec_in_place(
/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- upd_t* update);/* in: update vector */
+ rec_t* rec, /* in/out: record where replaced */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update);/* in: update vector */
/*******************************************************************
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
@@ -274,10 +276,11 @@ recovery. */
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
- rec_t* rec, /* in: record */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint pos, /* in: TRX_ID position in rec */
+ dulint trx_id, /* in: transaction id */
+ dulint roll_ptr);/* in: roll ptr of the undo log record */
/*************************************************************************
Parses the log data written by row_upd_index_write_log. */
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
index a124228a0de..e2d81a39cfa 100644
--- a/innobase/include/row0upd.ic
+++ b/innobase/include/row0upd.ic
@@ -106,15 +106,17 @@ row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- row_set_rec_trx_id(rec, index, trx->id);
- row_set_rec_roll_ptr(rec, index, roll_ptr);
+ row_set_rec_trx_id(rec, index, offsets, trx->id);
+ row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
}
diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h
index 30cf82144e9..0dd40fda65f 100644
--- a/innobase/include/row0vers.h
+++ b/innobase/include/row0vers.h
@@ -30,7 +30,8 @@ row_vers_impl_x_locked_off_kernel(
transaction; NOTE that the kernel mutex is
temporarily released! */
rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index); /* in: the secondary index */
+ dict_index_t* index, /* in: the secondary index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*********************************************************************
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view. */
diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic
index 5ece47c35d1..ab1e264635b 100644
--- a/innobase/include/row0vers.ic
+++ b/innobase/include/row0vers.ic
@@ -11,73 +11,3 @@ Created 2/6/1997 Heikki Tuuri
#include "read0read.h"
#include "page0page.h"
#include "log0recv.h"
-
-/*************************************************************************
-Fetches the trx id of a clustered index record or version. */
-UNIV_INLINE
-dulint
-row_vers_get_trx_id(
-/*================*/
- /* out: trx id or ut_dulint_zero if the
- clustered index record not found */
- rec_t* rec, /* in: clustered index record, or an old
- version of it */
- dict_table_t* table) /* in: table */
-{
- return(row_get_rec_trx_id(rec, dict_table_get_first_index(table)));
-}
-
-/*************************************************************************
-Checks if a consistent read can be performed immediately on the index
-record, or if an older version is needed. */
-UNIV_INLINE
-ibool
-row_vers_clust_rec_sees_older(
-/*==========================*/
- /* out: FALSE if can read immediately */
- rec_t* rec, /* in: record which should be read or passed
- over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- read_view_t* view) /* in: read view */
-{
- ut_ad(index->type & DICT_CLUSTERED);
-
- if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Checks if a secondary index record can be read immediately by a consistent
-read, or if an older version may be needed. To be sure, we will have to
-look in the clustered index. */
-UNIV_INLINE
-ibool
-row_vers_sec_rec_may_see_older(
-/*===========================*/
- /* out: FALSE if can be read immediately */
- rec_t* rec, /* in: record which should be read or passed */
- dict_index_t* index __attribute__((unused)),/* in: secondary index */
- read_view_t* view) /* in: read view */
-{
- page_t* page;
-
- ut_ad(!(index->type & DICT_CLUSTERED));
-
- page = buf_frame_align(rec);
-
- if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0)
- || recv_recovery_is_on()) {
-
- /* It may be that the record was inserted or modified by a
- transaction the view should not see: we have to look in the
- clustered index */
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index 6cfe9cef927..c5374fd00fa 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -107,6 +107,7 @@ extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the
extern ibool srv_innodb_status;
extern ibool srv_use_doublewrite_buf;
+extern ibool srv_use_checksums;
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
@@ -133,6 +134,8 @@ extern ibool srv_lock_timeout_and_monitor_active;
extern ibool srv_error_monitor_active;
extern ulint srv_n_spin_wait_rounds;
+extern ulint srv_n_free_tickets_to_enter;
+extern ulint srv_thread_sleep_delay;
extern ulint srv_spin_wait_delay;
extern ibool srv_priority_boost;
@@ -184,6 +187,63 @@ i/o handler thread */
extern const char* srv_io_thread_op_info[];
extern const char* srv_io_thread_function[];
+/* the number of the log write requests done */
+extern ulint srv_log_write_requests;
+
+/* the number of physical writes to the log performed */
+extern ulint srv_log_writes;
+
+/* amount of data written to the log files in bytes */
+extern ulint srv_os_log_written;
+
+/* amount of writes being done to the log files */
+extern ulint srv_os_log_pending_writes;
+
+/* we increase this counter, when there we don't have enough space in the
+log buffer and have to flush it */
+extern ulint srv_log_waits;
+
+/* variable that counts amount of data read in total (in bytes) */
+extern ulint srv_data_read;
+
+/* here we count the amount of data written in total (in bytes) */
+extern ulint srv_data_written;
+
+/* this variable counts the amount of times, when the doublewrite buffer
+was flushed */
+extern ulint srv_dblwr_writes;
+
+/* here we store the number of pages that have been flushed to the
+doublewrite buffer */
+extern ulint srv_dblwr_pages_written;
+
+/* in this variable we store the number of write requests issued */
+extern ulint srv_buf_pool_write_requests;
+
+/* here we store the number of times when we had to wait for a free page
+in the buffer pool. It happens when the buffer pool is full and we need
+to make a flush, in order to be able to read or create a page. */
+extern ulint srv_buf_pool_wait_free;
+
+/* variable to count the number of pages that were written from the
+buffer pool to disk */
+extern ulint srv_buf_pool_flushed;
+
+/* variable to count the number of buffer pool reads that led to the
+reading of a disk page */
+extern ulint srv_buf_pool_reads;
+
+/* variable to count the number of sequential read-aheads were done */
+extern ulint srv_read_ahead_seq;
+
+/* variable to count the number of random read-aheads were done */
+extern ulint srv_read_ahead_rnd;
+
+/* In this structure we store status variables to be passed to MySQL */
+typedef struct export_var_struct export_struc;
+
+extern export_struc export_vars;
+
typedef struct srv_sys_struct srv_sys_t;
/* The server system */
@@ -400,7 +460,12 @@ void
srv_printf_innodb_monitor(
/*======================*/
FILE* file); /* in: output stream */
+/************************************************************************
+Function to pass InnoDB status variables to MySQL */
+void
+srv_export_innodb_status(void);
+/*=====================*/
/* Types for the threads existing in the system. Threads of types 4 - 9
are called utility threads. Note that utility threads are mainly disk
@@ -426,6 +491,53 @@ typedef struct srv_slot_struct srv_slot_t;
/* Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
+/* In this structure we store status variables to be passed to MySQL */
+struct export_var_struct{
+ ulint innodb_data_pending_reads;
+ ulint innodb_data_pending_writes;
+ ulint innodb_data_pending_fsyncs;
+ ulint innodb_data_fsyncs;
+ ulint innodb_data_read;
+ ulint innodb_data_writes;
+ ulint innodb_data_written;
+ ulint innodb_data_reads;
+ ulint innodb_buffer_pool_pages_total;
+ ulint innodb_buffer_pool_pages_data;
+ ulint innodb_buffer_pool_pages_dirty;
+ ulint innodb_buffer_pool_pages_misc;
+ ulint innodb_buffer_pool_pages_free;
+ ulint innodb_buffer_pool_pages_latched;
+ ulint innodb_buffer_pool_read_requests;
+ ulint innodb_buffer_pool_reads;
+ ulint innodb_buffer_pool_wait_free;
+ ulint innodb_buffer_pool_pages_flushed;
+ ulint innodb_buffer_pool_write_requests;
+ ulint innodb_buffer_pool_read_ahead_seq;
+ ulint innodb_buffer_pool_read_ahead_rnd;
+ ulint innodb_dblwr_pages_written;
+ ulint innodb_dblwr_writes;
+ ulint innodb_log_waits;
+ ulint innodb_log_write_requests;
+ ulint innodb_log_writes;
+ ulint innodb_os_log_written;
+ ulint innodb_os_log_fsyncs;
+ ulint innodb_os_log_pending_writes;
+ ulint innodb_os_log_pending_fsyncs;
+ ulint innodb_page_size;
+ ulint innodb_pages_created;
+ ulint innodb_pages_read;
+ ulint innodb_pages_written;
+ ulint innodb_row_lock_waits;
+ ulint innodb_row_lock_current_waits;
+ ib_longlong innodb_row_lock_time;
+ ulint innodb_row_lock_time_avg;
+ ulint innodb_row_lock_time_max;
+ ulint innodb_rows_read;
+ ulint innodb_rows_inserted;
+ ulint innodb_rows_updated;
+ ulint innodb_rows_deleted;
+};
+
/* The server system struct */
struct srv_sys_struct{
os_event_t operational; /* created threads must wait for the
@@ -434,6 +546,10 @@ struct srv_sys_struct{
srv_table_t* threads; /* server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /* task queue */
+ dict_index_t* dummy_ind1; /* dummy index for old-style
+ supremum and infimum records */
+ dict_index_t* dummy_ind2; /* dummy index for new-style
+ supremum and infimum records */
};
extern ulint srv_n_threads_active[];
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
index 9a988a03e92..911c8ac3f4a 100644
--- a/innobase/include/sync0rw.h
+++ b/innobase/include/sync0rw.h
@@ -61,8 +61,8 @@ Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-
-#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__)
+#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__, #L)
+
/*=====================*/
/**********************************************************************
Creates, or rather, initializes an rw-lock object in a specified memory
@@ -75,7 +75,8 @@ rw_lock_create_func(
/*================*/
rw_lock_t* lock, /* in: pointer to memory */
const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
+ ulint cline, /* in: file line where created */
+ const char* cmutex_name); /* in: mutex name */
/**********************************************************************
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 8e0ec715b12..5046a960bcf 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -17,6 +17,8 @@ Created 9/5/1995 Heikki Tuuri
#include "os0sync.h"
#include "sync0arr.h"
+extern my_bool timed_mutexes;
+
/**********************************************************************
Initializes the synchronization data structures. */
@@ -35,8 +37,7 @@ location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
-
-#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__)
+#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__, #M)
/*===================*/
/**********************************************************************
Creates, or rather, initializes a mutex object in a specified memory
@@ -49,7 +50,8 @@ mutex_create_func(
/*==============*/
mutex_t* mutex, /* in: pointer to memory */
const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
+ ulint cline, /* in: file line where created */
+ const char* cmutex_name); /* in: mutex name */
/**********************************************************************
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
@@ -413,6 +415,8 @@ or row lock! */
/*------------------------------------- Insert buffer tree */
#define SYNC_IBUF_BITMAP_MUTEX 351
#define SYNC_IBUF_BITMAP 350
+/*------------------------------------- MySQL query cache mutex */
+/*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/
#define SYNC_KERNEL 300
#define SYNC_REC_LOCK 299
@@ -471,6 +475,15 @@ struct mutex_struct {
const char* cfile_name;/* File name where mutex created */
ulint cline; /* Line where created */
ulint magic_n;
+ ulong count_using; /* count of times mutex used */
+ ulong count_spin_loop; /* count of spin loops */
+ ulong count_spin_rounds; /* count of spin rounds */
+ ulong count_os_wait; /* count of os_wait */
+ ulong count_os_yield; /* count of os_wait */
+ ulonglong lspent_time; /* mutex os_wait timer msec */
+ ulonglong lmax_spent_time; /* mutex os_wait timer msec */
+ const char* cmutex_name;/* mutex name */
+ ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */
};
#define MUTEX_MAGIC_N (ulint)979585
@@ -504,6 +517,13 @@ extern ibool sync_order_checks_on;
/* This variable is set to TRUE when sync_init is called */
extern ibool sync_initialized;
+/* Global list of database mutexes (not OS mutexes) created. */
+UT_LIST_BASE_NODE_T(mutex_t) mutex_list;
+
+/* Mutex protecting the mutex_list variable */
+mutex_t mutex_list_mutex;
+
+
#ifndef UNIV_NONINL
#include "sync0sync.ic"
#endif
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
index aaf5e1fd9e9..f26f3788dc3 100644
--- a/innobase/include/sync0sync.ic
+++ b/innobase/include/sync0sync.ic
@@ -249,8 +249,11 @@ mutex_enter_func(
/* Note that we do not peek at the value of lock_word before trying
the atomic test_and_set; we could peek, and possibly save time. */
+
+ mutex->count_using++;
- if (!mutex_test_and_set(mutex)) {
+ if (!mutex_test_and_set(mutex))
+ {
#ifdef UNIV_SYNC_DEBUG
mutex_set_debug_info(mutex, file_name, line);
#endif
@@ -258,4 +261,5 @@ mutex_enter_func(
}
mutex_spin_wait(mutex, file_name, line);
+
}
diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h
index 9d7f41cd94e..4387ce1a61e 100644
--- a/innobase/include/trx0rec.h
+++ b/innobase/include/trx0rec.h
@@ -246,6 +246,7 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
rec_t* rec, /* in: version of a clustered index record */
dict_index_t* index, /* in: clustered index */
+ ulint* offsets,/* in: rec_get_offsets(rec, index) */
mem_heap_t* heap, /* in: memory heap from which the memory
needed is allocated */
rec_t** old_vers);/* out, own: previous version, or NULL if
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
index 6004551f456..9d025da4a5f 100644
--- a/innobase/include/trx0roll.h
+++ b/innobase/include/trx0roll.h
@@ -104,11 +104,20 @@ trx_rollback(
/***********************************************************************
Rollback or clean up transactions which have no user session. If the
transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back. */
+undo log. If the transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread. */
-void
-trx_rollback_or_clean_all_without_sess(void);
-/*========================================*/
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+trx_rollback_or_clean_all_without_sess(
+/*===================================*/
+ /* out: a dummy parameter */
+ void* arg __attribute__((unused)));
+ /* in: a dummy parameter required by
+ os_thread_create */
/********************************************************************
Finishes a transaction rollback. */
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index 8336e05bdb0..76b051105de 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -16,6 +16,7 @@ Created 3/26/1996 Heikki Tuuri
#include "que0types.h"
#include "mem0mem.h"
#include "read0types.h"
+#include "trx0xa.h"
extern ulint trx_n_mysql_transactions;
@@ -156,6 +157,36 @@ trx_commit_for_mysql(
/*=================*/
/* out: 0 or error number */
trx_t* trx); /* in: trx handle */
+
+/**************************************************************************
+Does the transaction prepare for MySQL. */
+
+ulint
+trx_prepare_for_mysql(
+/*=================*/
+ /* out: 0 or error number */
+ trx_t* trx); /* in: trx handle */
+
+/**************************************************************************
+This function is used to find number of prepared transactions and
+their transaction objects for a recovery. */
+
+int
+trx_recover_for_mysql(
+/*=================*/
+ /* out: number of prepared transactions */
+ XID* xid_list, /* in/out: prepared transactions */
+ uint len); /* in: number of slots in xid_list */
+
+/***********************************************************************
+This function is used to commit one X/Open XA distributed transaction
+which is in the prepared state */
+trx_t *
+trx_get_trx_by_xid(
+/*===============*/
+ /* out: trx or NULL */
+ XID* xid); /* in: X/Open XA Transaction Idenfication */
+
/**************************************************************************
If required, flushes the log to disk if we called trx_commit_for_mysql()
with trx->flush_log_later == TRUE. */
@@ -339,6 +370,9 @@ struct trx_struct{
if we can use the insert buffer for
them, we set this FALSE */
dulint id; /* transaction id */
+ XID xid; /* X/Open XA transaction
+ identification to identify a
+ transaction branch */
dulint no; /* transaction serialization number ==
max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */
@@ -353,8 +387,10 @@ struct trx_struct{
dulint table_id; /* table id if the preceding field is
TRUE */
/*------------------------------*/
- void* mysql_thd; /* MySQL thread handle corresponding
- to this trx, or NULL */
+ int active_trans; /* whether a transaction in MySQL
+ is active */
+ void* mysql_thd; /* MySQL thread handle corresponding
+ to this trx, or NULL */
char** mysql_query_str;/* pointer to the field in mysqld_thd
which contains the pointer to the
current SQL query string */
@@ -436,9 +472,15 @@ struct trx_struct{
lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
the transaction; note that it is also
in the lock list trx_locks */
+ ibool trx_create_lock;/* this is TRUE if we have created a
+ new lock for a record accessed */
ulint n_lock_table_exp;/* number of explicit table locks
(LOCK TABLES) reserved by the
transaction, stored in trx_locks */
+ ulint n_lock_table_transactional;
+ /* number of transactional table locks
+ (LOCK TABLES..WHERE ENGINE) reserved by
+ the transaction, stored in trx_locks */
UT_LIST_NODE_T(trx_t)
trx_list; /* list of transactions */
UT_LIST_NODE_T(trx_t)
@@ -554,6 +596,7 @@ struct trx_struct{
#define TRX_NOT_STARTED 1
#define TRX_ACTIVE 2
#define TRX_COMMITTED_IN_MEMORY 3
+#define TRX_PREPARED 4 /* Support for 2PC/XA */
/* Transaction execution states when trx state is TRX_ACTIVE */
#define TRX_QUE_RUNNING 1 /* transaction is running */
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
index 20002076cc3..fce62e46046 100644
--- a/innobase/include/trx0undo.h
+++ b/innobase/include/trx0undo.h
@@ -14,6 +14,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
#include "page0types.h"
+#include "trx0xa.h"
/***************************************************************************
Builds a roll pointer dulint. */
@@ -36,7 +37,7 @@ trx_undo_decode_roll_ptr(
ibool* is_insert, /* out: TRUE if insert undo log */
ulint* rseg_id, /* out: rollback segment id */
ulint* page_no, /* out: page number */
- ulint* offset); /* out: offset of the undo entry within page */
+ ulint* offset); /* out: offset of the undo entry within page */
/***************************************************************************
Returns TRUE if the roll pointer is of the insert type. */
UNIV_INLINE
@@ -239,6 +240,18 @@ trx_undo_set_state_at_finish(
trx_t* trx, /* in: transaction */
trx_undo_t* undo, /* in: undo log memory copy */
mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Sets the state of the undo log segment at a transaction prepare. */
+
+page_t*
+trx_undo_set_state_at_prepare(
+/*==========================*/
+ /* out: undo log segment header page,
+ x-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ mtr_t* mtr); /* in: mtr */
+
/**************************************************************************
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
@@ -294,7 +307,23 @@ trx_undo_parse_discard_latest(
byte* end_ptr,/* in: buffer end */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
+/************************************************************************
+Write X/Open XA Transaction Identification (XID) to undo log header */
+void
+trx_undo_write_xid(
+/*===============*/
+ trx_ulogf_t* log_hdr,/* in: undo log header */
+ XID* xid); /* in: X/Open XA Transaction Identification */
+
+/************************************************************************
+Read X/Open XA Transaction Identification (XID) from undo log header */
+
+void
+trx_undo_read_xid(
+/*==============*/
+ trx_ulogf_t* log_hdr,/* in: undo log header */
+ XID* xid); /* out: X/Open XA Transaction Identification */
/* Types of an undo log segment */
#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
@@ -310,6 +339,8 @@ trx_undo_parse_discard_latest(
#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
reused: it can be freed in purge when
all undo data in it is removed */
+#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
+ prepared transaction */
/* Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
@@ -332,6 +363,8 @@ struct trx_undo_struct{
field */
dulint trx_id; /* id of the trx assigned to the undo
log */
+ XID xid; /* X/Open XA transaction
+ identification */
ibool dict_operation; /* TRUE if a dict operation trx */
dulint table_id; /* if a dict operation, then the table
id */
@@ -436,7 +469,10 @@ page of an update undo log segment. */
log start, and therefore this is not
necessarily the same as this log
header end offset */
-#define TRX_UNDO_DICT_OPERATION 20 /* TRUE if the transaction is a table
+#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes
+ X/Open XA transaction identification
+ XID */
+#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table
create, index create, or drop
transaction: in recovery
the transaction cannot be rolled back
@@ -452,7 +488,17 @@ page of an update undo log segment. */
#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history
list, the file list node is here */
/*-------------------------------------------------------------*/
-#define TRX_UNDO_LOG_HDR_SIZE (34 + FLST_NODE_SIZE)
+/* X/Open XA Transaction Identification (XID) */
+
+#define TRX_UNDO_XA_FORMAT (34 + FLST_NODE_SIZE)
+#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
+#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
+#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
+#define TRX_UNDO_XA_LEN (TRX_UNDO_XA_XID + XIDDATASIZE)
+
+/*-------------------------------------------------------------*/
+#define TRX_UNDO_LOG_HDR_SIZE (TRX_UNDO_XA_LEN)
+/*-------------------------------------------------------------*/
#ifndef UNIV_NONINL
#include "trx0undo.ic"
diff --git a/innobase/include/trx0xa.h b/innobase/include/trx0xa.h
new file mode 100644
index 00000000000..34b7a2f95a8
--- /dev/null
+++ b/innobase/include/trx0xa.h
@@ -0,0 +1,182 @@
+/*
+ * Start of xa.h header
+ *
+ * Define a symbol to prevent multiple inclusions of this header file
+ */
+#ifndef XA_H
+#define XA_H
+
+/*
+ * Transaction branch identification: XID and NULLXID:
+ */
+#ifndef XIDDATASIZE
+
+#define XIDDATASIZE 128 /* size in bytes */
+#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */
+#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */
+
+struct xid_t {
+ long formatID; /* format identifier */
+ long gtrid_length; /* value from 1 through 64 */
+ long bqual_length; /* value from 1 through 64 */
+ char data[XIDDATASIZE];
+};
+typedef struct xid_t XID;
+#endif
+/*
+ * A value of -1 in formatID means that the XID is null.
+ */
+
+
+#ifdef NOTDEFINED
+/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */
+
+/*
+ * Declarations of routines by which RMs call TMs:
+ */
+extern int ax_reg __P((int, XID *, long));
+extern int ax_unreg __P((int, long));
+
+/*
+ * XA Switch Data Structure
+ */
+#define RMNAMESZ 32 /* length of resource manager name, */
+ /* including the null terminator */
+#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */
+ /* strings, including the null
+ terminator */
+
+
+struct xa_switch_t {
+ char name[RMNAMESZ]; /* name of resource manager */
+ long flags; /* resource manager specific options */
+ long version; /* must be 0 */
+ int (*xa_open_entry) /* xa_open function pointer */
+ __P((char *, int, long));
+ int (*xa_close_entry) /* xa_close function pointer */
+ __P((char *, int, long));
+ int (*xa_start_entry) /* xa_start function pointer */
+ __P((XID *, int, long));
+ int (*xa_end_entry) /* xa_end function pointer */
+ __P((XID *, int, long));
+ int (*xa_rollback_entry) /* xa_rollback function pointer */
+ __P((XID *, int, long));
+ int (*xa_prepare_entry) /* xa_prepare function pointer */
+ __P((XID *, int, long));
+ int (*xa_commit_entry) /* xa_commit function pointer */
+ __P((XID *, int, long));
+ int (*xa_recover_entry) /* xa_recover function pointer */
+ __P((XID *, long, int, long));
+ int (*xa_forget_entry) /* xa_forget function pointer */
+ __P((XID *, int, long));
+ int (*xa_complete_entry) /* xa_complete function pointer */
+ __P((int *, int *, int, long));
+};
+#endif /* NOTDEFINED */
+
+
+/*
+ * Flag definitions for the RM switch
+ */
+#define TMNOFLAGS 0x00000000L /* no resource manager features
+ selected */
+#define TMREGISTER 0x00000001L /* resource manager dynamically
+ registers */
+#define TMNOMIGRATE 0x00000002L /* resource manager does not support
+ association migration */
+#define TMUSEASYNC 0x00000004L /* resource manager supports
+ asynchronous operations */
+/*
+ * Flag definitions for xa_ and ax_ routines
+ */
+/* use TMNOFLAGGS, defined above, when not specifying other flags */
+#define TMASYNC 0x80000000L /* perform routine asynchronously */
+#define TMONEPHASE 0x40000000L /* caller is using one-phase commit
+ optimisation */
+#define TMFAIL 0x20000000L /* dissociates caller and marks
+ transaction branch rollback-only */
+#define TMNOWAIT 0x10000000L /* return if blocking condition
+ exists */
+#define TMRESUME 0x08000000L /* caller is resuming association with
+ suspended transaction branch */
+#define TMSUCCESS 0x04000000L /* dissociate caller from transaction
+ branch */
+#define TMSUSPEND 0x02000000L /* caller is suspending, not ending,
+ association */
+#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */
+#define TMENDRSCAN 0x00800000L /* end a recovery scan */
+#define TMMULTIPLE 0x00400000L /* wait for any asynchronous
+ operation */
+#define TMJOIN 0x00200000L /* caller is joining existing
+ transaction branch */
+#define TMMIGRATE 0x00100000L /* caller intends to perform
+ migration */
+
+/*
+ * ax_() return codes (transaction manager reports to resource manager)
+ */
+#define TM_JOIN 2 /* caller is joining existing
+ transaction branch */
+#define TM_RESUME 1 /* caller is resuming association with
+ suspended transaction branch */
+#define TM_OK 0 /* normal execution */
+#define TMER_TMERR -1 /* an error occurred in the transaction
+ manager */
+#define TMER_INVAL -2 /* invalid arguments were given */
+#define TMER_PROTO -3 /* routine invoked in an improper
+ context */
+
+/*
+ * xa_() return codes (resource manager reports to transaction manager)
+ */
+#define XA_RBBASE 100 /* The inclusive lower bound of the
+ rollback codes */
+#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an
+ unspecified reason */
+#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a
+ communication failure */
+#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */
+#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the
+ integrity of the resources was
+ detected */
+#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the
+ transaction branch for a reason not
+ on this list */
+#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the
+ resource manager */
+#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */
+#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */
+#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the
+ rollback codes */
+#define XA_NOMIGRATE 9 /* resumption must occur where
+ suspension occurred */
+#define XA_HEURHAZ 8 /* the transaction branch may have
+ been heuristically completed */
+#define XA_HEURCOM 7 /* the transaction branch has been
+ heuristically committed */
+#define XA_HEURRB 6 /* the transaction branch has been
+ heuristically rolled back */
+#define XA_HEURMIX 5 /* the transaction branch has been
+ heuristically committed and rolled
+ back */
+#define XA_RETRY 4 /* routine returned with no effect and
+ may be re-issued */
+#define XA_RDONLY 3 /* the transaction branch was read-only
+ and has been committed */
+#define XA_OK 0 /* normal execution */
+#define XAER_ASYNC -2 /* asynchronous operation already
+ outstanding */
+#define XAER_RMERR -3 /* a resource manager error occurred in
+ the transaction branch */
+#define XAER_NOTA -4 /* the XID is not valid */
+#define XAER_INVAL -5 /* invalid arguments were given */
+#define XAER_PROTO -6 /* routine invoked in an improper
+ context */
+#define XAER_RMFAIL -7 /* resource manager unavailable */
+#define XAER_DUPID -8 /* the XID already exists */
+#define XAER_OUTSIDE -9 /* resource manager doing work outside
+ transaction */
+#endif /* ifndef XA_H */
+/*
+ * End of xa.h header
+ */
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index be71d4211b3..80024f71992 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -88,6 +88,7 @@ memory is read outside the allocated blocks. */
#define UNIV_SEARCH_DEBUG
#define UNIV_SYNC_PERF_STAT
#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SRV_PRINT_LATCH_WAITS;
*/
#define UNIV_LIGHT_MEM_DEBUG
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
index a62c2e2e318..22d488abeaf 100644
--- a/innobase/include/ut0byte.h
+++ b/innobase/include/ut0byte.h
@@ -208,7 +208,20 @@ ut_align_down(
/*==========*/
/* out: aligned pointer */
void* ptr, /* in: pointer */
- ulint align_no); /* in: align by this number */
+ ulint align_no) /* in: align by this number */
+ __attribute__((const));
+/*************************************************************
+The following function computes the offset of a pointer from the nearest
+aligned address. */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*==========*/
+ /* out: distance from aligned
+ pointer */
+ const void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+ __attribute__((const));
/*********************************************************************
Gets the nth bit of a ulint. */
UNIV_INLINE
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
index 5a70dcf12a8..e141de3aa3f 100644
--- a/innobase/include/ut0byte.ic
+++ b/innobase/include/ut0byte.ic
@@ -335,6 +335,27 @@ ut_align_down(
return((void*)((((ulint)ptr)) & ~(align_no - 1)));
}
+/*************************************************************
+The following function computes the offset of a pointer from the nearest
+aligned address. */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+ /* out: distance from
+ aligned pointer */
+ const void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+ ut_ad(ptr);
+
+ ut_ad(sizeof(void*) == sizeof(ulint));
+
+ return(((ulint)ptr) & (align_no - 1));
+}
+
/*********************************************************************
Gets the nth bit of a ulint. */
UNIV_INLINE
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index dee8785c9e7..8938957cd12 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -139,6 +139,14 @@ ib_time_t
ut_time(void);
/*=========*/
/**************************************************************
+Returns system time. */
+
+void
+ut_usectime(
+/*========*/
+ ulint* sec, /* out: seconds since the Epoch */
+ ulint* ms); /* out: microseconds since the Epoch+*sec */
+/**************************************************************
Returns the difference of two times in seconds. */
double