summaryrefslogtreecommitdiff
path: root/innobase/include
diff options
context:
space:
mode:
Diffstat (limited to 'innobase/include')
-rw-r--r--innobase/include/Makefile.am2
-rw-r--r--innobase/include/btr0btr.h22
-rw-r--r--innobase/include/btr0btr.ic9
-rw-r--r--innobase/include/btr0cur.h60
-rw-r--r--innobase/include/btr0cur.ic4
-rw-r--r--innobase/include/btr0pcur.h1
-rw-r--r--innobase/include/btr0sea.h10
-rw-r--r--innobase/include/buf0buf.h12
-rw-r--r--innobase/include/buf0flu.ic2
-rw-r--r--innobase/include/data0type.h109
-rw-r--r--innobase/include/data0type.ic248
-rw-r--r--innobase/include/dict0boot.h1
-rw-r--r--innobase/include/dict0crea.h15
-rw-r--r--innobase/include/dict0dict.h32
-rw-r--r--innobase/include/dict0dict.ic71
-rw-r--r--innobase/include/dict0mem.h13
-rw-r--r--innobase/include/fil0fil.h2
-rw-r--r--innobase/include/lock0lock.h43
-rw-r--r--innobase/include/lock0lock.ic5
-rw-r--r--innobase/include/mtr0log.h33
-rw-r--r--innobase/include/mtr0mtr.h26
-rw-r--r--innobase/include/os0file.h3
-rw-r--r--innobase/include/os0proc.h28
-rw-r--r--innobase/include/page0cur.h64
-rw-r--r--innobase/include/page0cur.ic21
-rw-r--r--innobase/include/page0page.h211
-rw-r--r--innobase/include/page0page.ic144
-rw-r--r--innobase/include/que0que.h7
-rw-r--r--innobase/include/rem0cmp.h24
-rw-r--r--innobase/include/rem0cmp.ic5
-rw-r--r--innobase/include/rem0rec.h455
-rw-r--r--innobase/include/rem0rec.ic1054
-rw-r--r--innobase/include/row0mysql.h129
-rw-r--r--innobase/include/row0mysql.ic119
-rw-r--r--innobase/include/row0row.h30
-rw-r--r--innobase/include/row0row.ic49
-rw-r--r--innobase/include/row0upd.h17
-rw-r--r--innobase/include/row0upd.ic6
-rw-r--r--innobase/include/row0vers.h7
-rw-r--r--innobase/include/row0vers.ic70
-rw-r--r--innobase/include/srv0srv.h137
-rw-r--r--innobase/include/srv0start.h10
-rw-r--r--innobase/include/sync0rw.h7
-rw-r--r--innobase/include/sync0sync.h30
-rw-r--r--innobase/include/sync0sync.ic8
-rw-r--r--innobase/include/trx0rec.h1
-rw-r--r--innobase/include/trx0roll.h32
-rw-r--r--innobase/include/trx0sys.ic7
-rw-r--r--innobase/include/trx0trx.h53
-rw-r--r--innobase/include/trx0undo.h48
-rw-r--r--innobase/include/trx0xa.h182
-rw-r--r--innobase/include/univ.i1
-rw-r--r--innobase/include/ut0byte.h15
-rw-r--r--innobase/include/ut0byte.ic21
-rw-r--r--innobase/include/ut0ut.h8
55 files changed, 2668 insertions, 1055 deletions
diff --git a/innobase/include/Makefile.am b/innobase/include/Makefile.am
index 102d25566da..eb1e3b72877 100644
--- a/innobase/include/Makefile.am
+++ b/innobase/include/Makefile.am
@@ -49,7 +49,7 @@ noinst_HEADERS = btr0btr.h btr0btr.ic btr0cur.h btr0cur.ic \
thr0loc.h thr0loc.ic trx0purge.h trx0purge.ic trx0rec.h \
trx0rec.ic trx0roll.h trx0roll.ic trx0rseg.h trx0rseg.ic \
trx0sys.h trx0sys.ic trx0trx.h trx0trx.ic trx0types.h \
- trx0undo.h trx0undo.ic univ.i \
+ trx0undo.h trx0undo.ic trx0xa.h univ.i \
usr0sess.h usr0sess.ic usr0types.h ut0byte.h ut0byte.ic \
ut0dbg.h ut0lst.h ut0mem.h ut0mem.ic ut0rnd.h ut0rnd.ic \
ut0sort.h ut0ut.h ut0ut.ic
diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h
index 8606fcd2a5c..0b19e64d4e0 100644
--- a/innobase/include/btr0btr.h
+++ b/innobase/include/btr0btr.h
@@ -155,7 +155,8 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
- rec_t* rec); /* in: node pointer record */
+ rec_t* rec, /* in: node pointer record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/****************************************************************
Creates the root node for a new index tree. */
@@ -167,6 +168,7 @@ btr_create(
ulint type, /* in: type of the index */
ulint space, /* in: space where created */
dulint index_id,/* in: index id */
+ ibool comp, /* in: TRUE=compact page format */
mtr_t* mtr); /* in: mini-transaction handle */
/****************************************************************
Frees a B-tree except the root page, which MUST be freed after this
@@ -210,8 +212,9 @@ Reorganizes an index page. */
void
btr_page_reorganize(
/*================*/
- page_t* page, /* in: page to be reorganized */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: page to be reorganized */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Decides if the page should be split at the convergence point of
inserts converging to left. */
@@ -273,6 +276,7 @@ void
btr_set_min_rec_mark(
/*=================*/
rec_t* rec, /* in: record */
+ ibool comp, /* in: TRUE=compact page format */
mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes on the upper level the node pointer to a page. */
@@ -332,6 +336,7 @@ btr_parse_set_min_rec_mark(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
+ ibool comp, /* in: TRUE=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
@@ -340,11 +345,12 @@ Parses a redo log record of reorganizing a page. */
byte*
btr_parse_page_reorganize(
/*======================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/******************************************************************
Gets the number of pages in a B-tree. */
diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic
index b0aa0756307..1d1f97d3668 100644
--- a/innobase/include/btr0btr.ic
+++ b/innobase/include/btr0btr.ic
@@ -183,17 +183,18 @@ ulint
btr_node_ptr_get_child_page_no(
/*===========================*/
/* out: child node address */
- rec_t* rec) /* in: node pointer record */
+ rec_t* rec, /* in: node pointer record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
byte* field;
ulint len;
ulint page_no;
- n_fields = rec_get_n_fields(rec);
+ ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec));
/* The child address is in the last field */
- field = rec_get_nth_field(rec, n_fields - 1, &len);
+ field = rec_get_nth_field(rec, offsets,
+ rec_offs_n_fields(offsets) - 1, &len);
ut_ad(len == 4);
diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h
index f1334656d53..0a8d8ceaeb7 100644
--- a/innobase/include/btr0cur.h
+++ b/innobase/include/btr0cur.h
@@ -34,7 +34,7 @@ page_cur_t*
btr_cur_get_page_cur(
/*=================*/
/* out: pointer to page cursor component */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the record pointer of a tree cursor. */
UNIV_INLINE
@@ -42,14 +42,14 @@ rec_t*
btr_cur_get_rec(
/*============*/
/* out: pointer to record */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Invalidates a tree cursor by setting record pointer to NULL. */
UNIV_INLINE
void
btr_cur_invalidate(
/*===============*/
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the page of a tree cursor. */
UNIV_INLINE
@@ -57,7 +57,7 @@ page_t*
btr_cur_get_page(
/*=============*/
/* out: pointer to page */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Returns the tree of a cursor. */
UNIV_INLINE
@@ -65,7 +65,7 @@ dict_tree_t*
btr_cur_get_tree(
/*=============*/
/* out: tree */
- btr_cur_t* cursor); /* in: tree cursor */
+ btr_cur_t* cursor);/* in: tree cursor */
/*************************************************************
Positions a tree cursor at a given record. */
UNIV_INLINE
@@ -283,8 +283,9 @@ only used by the insert buffer insert merge mechanism. */
void
btr_cur_del_unmark_for_ibuf(
/*========================*/
- rec_t* rec, /* in: record to delete unmark */
- mtr_t* mtr); /* in: mtr */
+ rec_t* rec, /* in: record to delete unmark */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Tries to compress a page of the tree on the leaf level. It is assumed
that mtr holds an x-latch on the tree and on the cursor page. To avoid
@@ -361,10 +362,11 @@ Parses a redo log record of updating a record in-place. */
byte*
btr_cur_parse_update_in_place(
/*==========================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ page_t* page, /* in: page or NULL */
+ dict_index_t* index); /* in: index corresponding to page */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a clustered
index record. */
@@ -372,10 +374,11 @@ index record. */
byte*
btr_cur_parse_del_mark_set_clust_rec(
/*=================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: index corresponding to page */
+ page_t* page); /* in: page or NULL */
/********************************************************************
Parses the redo log record for delete marking or unmarking of a secondary
index record. */
@@ -383,10 +386,11 @@ index record. */
byte*
btr_cur_parse_del_mark_set_sec_rec(
/*===============================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page); /* in: page or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: index corresponding to page */
+ page_t* page); /* in: page or NULL */
/***********************************************************************
Estimates the number of rows in a given index range. */
@@ -417,9 +421,10 @@ to free the field. */
void
btr_cur_mark_extern_inherited_fields(
/*=================================*/
- rec_t* rec, /* in: record in a clustered index */
- upd_t* update, /* in: update vector */
- mtr_t* mtr); /* in: mtr */
+ rec_t* rec, /* in: record in a clustered index */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update, /* in: update vector */
+ mtr_t* mtr); /* in: mtr */
/***********************************************************************
The complement of the previous function: in an update entry may inherit
some externally stored fields from a record. We must mark them as inherited
@@ -456,6 +461,7 @@ btr_store_big_rec_extern_fields(
dict_index_t* index, /* in: index of rec; the index tree
MUST be X-latched */
rec_t* rec, /* in: record */
+ const ulint* offsets, /* in: rec_get_offsets(rec, index) */
big_rec_t* big_rec_vec, /* in: vector containing fields
to be stored externally */
mtr_t* local_mtr); /* in: mtr containing the latch to
@@ -496,6 +502,7 @@ btr_rec_free_externally_stored_fields(
dict_index_t* index, /* in: index of the data, the index
tree MUST be X-latched */
rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ibool do_not_free_inherited,/* in: TRUE if called in a
rollback and we do not want to free
inherited fields */
@@ -510,6 +517,7 @@ btr_rec_copy_externally_stored_field(
/*=================================*/
/* out: the field copied to heap */
rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint no, /* in: field number */
ulint* len, /* out: length of the field */
mem_heap_t* heap); /* in: mem heap */
@@ -540,10 +548,10 @@ ulint
btr_push_update_extern_fields(
/*==========================*/
/* out: number of values stored in ext_vect */
- ulint* ext_vect, /* in: array of ulints, must be preallocated
- to have place for all fields in rec */
- rec_t* rec, /* in: record */
- upd_t* update); /* in: update vector */
+ ulint* ext_vect,/* in: array of ulints, must be preallocated
+ to have space for all fields in rec */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update);/* in: update vector or NULL */
/*######################################################################*/
diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic
index a3a04b60c45..dcad3e9e14d 100644
--- a/innobase/include/btr0cur.ic
+++ b/innobase/include/btr0cur.ic
@@ -134,17 +134,15 @@ btr_cur_can_delete_without_compress(
/* out: TRUE if can be deleted without
recommended compression */
btr_cur_t* cursor, /* in: btr cursor */
+ ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/
mtr_t* mtr) /* in: mtr */
{
- ulint rec_size;
page_t* page;
ut_ad(mtr_memo_contains(mtr, buf_block_align(
btr_cur_get_page(cursor)),
MTR_MEMO_PAGE_X_FIX));
- rec_size = rec_get_size(btr_cur_get_rec(cursor));
-
page = btr_cur_get_page(cursor);
if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT)
diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h
index 9339eb5d0ee..eb3822aab7a 100644
--- a/innobase/include/btr0pcur.h
+++ b/innobase/include/btr0pcur.h
@@ -462,6 +462,7 @@ struct btr_pcur_struct{
contains an initial segment of the
latest record cursor was positioned
either on, before, or after */
+ ulint old_n_fields; /* number of fields in old_rec */
ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or
BTR_PCUR_AFTER, depending on whether
cursor was on, before, or after the
diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h
index ce4140ecf92..78e88a24083 100644
--- a/innobase/include/btr0sea.h
+++ b/innobase/include/btr0sea.h
@@ -77,8 +77,10 @@ parameters as page (this often happens when a page is split). */
void
btr_search_move_or_delete_hash_entries(
/*===================================*/
- page_t* new_page, /* in: records are copied to this page */
- page_t* page); /* in: index page */
+ page_t* new_page, /* in: records are copied
+ to this page */
+ page_t* page, /* in: index page */
+ dict_index_t* index); /* in: record descriptor */
/************************************************************************
Drops a page hash index. */
@@ -129,8 +131,8 @@ Validates the search system. */
ibool
btr_search_validate(void);
-/*=====================*/
-
+/*======================*/
+ /* out: TRUE if ok */
/* Search info directions */
#define BTR_SEA_NO_DIRECTION 1
diff --git a/innobase/include/buf0buf.h b/innobase/include/buf0buf.h
index 53599d03c73..5ee323f1b1e 100644
--- a/innobase/include/buf0buf.h
+++ b/innobase/include/buf0buf.h
@@ -52,11 +52,15 @@ Created 11/5/1995 Heikki Tuuri
/* Modes for buf_page_get_known_nowait */
#define BUF_MAKE_YOUNG 51
#define BUF_KEEP_OLD 52
+/* Magic value to use instead of checksums when they are disabled */
+#define BUF_NO_CHECKSUM_MAGIC 0xDEADBEEFUL
extern buf_pool_t* buf_pool; /* The buffer pool of the database */
extern ibool buf_debug_prints;/* If this is set TRUE, the program
prints info whenever read or flush
occurs */
+extern ulint srv_buf_pool_write_requests; /* variable to count write request
+ issued */
/************************************************************************
Creates the buffer pool. */
@@ -496,6 +500,12 @@ void
buf_print(void);
/*============*/
/*************************************************************************
+Returns the number of latched pages in the buffer pool. */
+
+ulint
+buf_get_latched_pages_number(void);
+/*==============================*/
+/*************************************************************************
Returns the number of pending buf pool ios. */
ulint
@@ -731,6 +741,8 @@ struct buf_block_struct{
buffer pool which are index pages,
but this flag is not set because
we do not keep track of all pages */
+ dict_index_t* index; /* index for which the adaptive
+ hash index has been created */
/* 2. Page flushing fields */
UT_LIST_NODE_T(buf_block_t) flush_list;
diff --git a/innobase/include/buf0flu.ic b/innobase/include/buf0flu.ic
index d6dbdcc0865..9a8a021e029 100644
--- a/innobase/include/buf0flu.ic
+++ b/innobase/include/buf0flu.ic
@@ -61,6 +61,8 @@ buf_flush_note_modification(
ut_ad(ut_dulint_cmp(block->oldest_modification,
mtr->start_lsn) <= 0);
}
+
+ ++srv_buf_pool_write_requests;
}
/************************************************************************
diff --git a/innobase/include/data0type.h b/innobase/include/data0type.h
index 02c874836fd..a4d2c1a2e1d 100644
--- a/innobase/include/data0type.h
+++ b/innobase/include/data0type.h
@@ -24,7 +24,11 @@ extern dtype_t* dtype_binary;
/*-------------------------------------------*/
/* The 'MAIN TYPE' of a column */
#define DATA_VARCHAR 1 /* character varying of the
- latin1_swedish_ci charset-collation */
+ latin1_swedish_ci charset-collation; note
+ that the MySQL format for this, DATA_BINARY,
+ DATA_VARMYSQL, is also affected by whether the
+ 'precise type' contains
+ DATA_MYSQL_TRUE_VARCHAR */
#define DATA_CHAR 2 /* fixed length character of the
latin1_swedish_ci charset-collation */
#define DATA_FIXBINARY 3 /* binary string of fixed length */
@@ -32,7 +36,9 @@ extern dtype_t* dtype_binary;
#define DATA_BLOB 5 /* binary large object, or a TEXT type;
if prtype & DATA_BINARY_TYPE == 0, then this is
actually a TEXT column (or a BLOB created
- with < 4.0.14) */
+ with < 4.0.14; since column prefix indexes
+ came only in 4.0.14, the missing flag in BLOBs
+ created before that does not cause any harm) */
#define DATA_INT 6 /* integer: can be any size 1 - 8 bytes */
#define DATA_SYS_CHILD 7 /* address of the child page in node pointer */
#define DATA_SYS 8 /* system column */
@@ -102,6 +108,8 @@ columns, and for them the precise type is usually not used at all.
#define DATA_MYSQL_TYPE_MASK 255 /* AND with this mask to extract the MySQL
type from the precise type */
+#define DATA_MYSQL_TRUE_VARCHAR 15 /* MySQL type code for the >= 5.0.3
+ format true VARCHAR */
/* Precise data types for system columns and the length of those columns;
NOTE: the values must run from 0 up in the order given! All codes must
@@ -134,6 +142,10 @@ be less than 256 */
In earlier versions this was set for some
BLOB columns.
*/
+#define DATA_LONG_TRUE_VARCHAR 4096 /* this is ORed to the precise data
+ type when the column is true VARCHAR where
+ MySQL uses 2 bytes to store the data len;
+ for shorter VARCHARs MySQL uses only 1 byte */
/*-------------------------------------------*/
/* This many bytes we need to store the type information affecting the
@@ -145,28 +157,31 @@ store the charset-collation number; one byte is left unused, though */
#define DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE 6
/*************************************************************************
-Checks if a string type has to be compared by the MySQL comparison functions.
-InnoDB internally only handles binary byte string comparisons, as well as
-latin1_swedish_ci strings. For example, UTF-8 strings have to be compared
-by MySQL. */
-
-ibool
-dtype_str_needs_mysql_cmp(
-/*======================*/
- /* out: TRUE if a string type that requires
- comparison with MySQL functions */
- dtype_t* dtype); /* in: type struct */
+Gets the MySQL type code from a dtype. */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+ /* out: MySQL type code; this is NOT an InnoDB
+ type code! */
+ dtype_t* type); /* in: type struct */
/*************************************************************************
-For the documentation of this function, see innobase_get_at_most_n_mbchars()
-in ha_innodb.cc. */
+Determine how many bytes the first n characters of the given string occupy.
+If the string is shorter than n characters, returns the number of bytes
+the characters in the string occupy. */
ulint
dtype_get_at_most_n_mbchars(
/*========================*/
- dtype_t* dtype,
- ulint prefix_len,
- ulint data_len,
- const char* str);
+ /* out: length of the prefix,
+ in bytes */
+ const dtype_t* dtype, /* in: data type */
+ ulint prefix_len, /* in: length of the requested
+ prefix, in characters, multiplied by
+ dtype_get_mbmaxlen(dtype) */
+ ulint data_len, /* in: length of str (in bytes) */
+ const char* str); /* in: the string whose prefix
+ length is being determined */
/*************************************************************************
Checks if a data main type is a string type. Also a BLOB is considered a
string type. */
@@ -271,6 +286,24 @@ dtype_get_prec(
/*===========*/
dtype_t* type);
/*************************************************************************
+Gets the minimum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+ /* out: minimum length of a char, in bytes,
+ or 0 if this is not a character type */
+ const dtype_t* type); /* in: type */
+/*************************************************************************
+Gets the maximum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+ /* out: maximum length of a char, in bytes,
+ or 0 if this is not a character type */
+ const dtype_t* type); /* in: type */
+/*************************************************************************
Gets the padding character code for the type. */
UNIV_INLINE
ulint
@@ -288,6 +321,14 @@ dtype_get_fixed_size(
/* out: fixed size, or 0 */
dtype_t* type); /* in: type */
/***************************************************************************
+Returns the minimum size of a data type. */
+UNIV_INLINE
+ulint
+dtype_get_min_size(
+/*===============*/
+ /* out: minimum size */
+ const dtype_t* type); /* in: type */
+/***************************************************************************
Returns a stored SQL NULL size for a type. For fixed length types it is
the fixed length of the type, otherwise 0. */
UNIV_INLINE
@@ -352,16 +393,34 @@ dtype_print(
/*========*/
dtype_t* type); /* in: type */
-/* Structure for an SQL data type */
+/* Structure for an SQL data type.
+If you add fields to this structure, be sure to initialize them everywhere.
+This structure is initialized in the following functions:
+dtype_set()
+dtype_read_for_order_and_null_size()
+dtype_new_read_for_order_and_null_size()
+sym_tab_add_null_lit() */
struct dtype_struct{
ulint mtype; /* main data type */
- ulint prtype; /* precise type; MySQL data type */
-
- /* the remaining two fields do not affect alphabetical ordering: */
-
- ulint len; /* length */
+ ulint prtype; /* precise type; MySQL data type, charset code,
+ flags to indicate nullability, signedness,
+ whether this is a binary string, whether this
+ is a true VARCHAR where MySQL uses 2 bytes to
+ store the length */
+
+ /* the remaining fields do not affect alphabetical ordering: */
+
+ ulint len; /* length; for MySQL data this is
+ field->pack_length(), except that for a
+ >= 5.0.3 type true VARCHAR this is the
+ maximum byte length of the string data
+ (in addition to the string, MySQL uses 1 or
+ 2 bytes to store the string length) */
ulint prec; /* precision */
+
+ ulint mbminlen; /* minimum length of a character, in bytes */
+ ulint mbmaxlen; /* maximum length of a character, in bytes */
};
#ifndef UNIV_NONINL
diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic
index 946b646ffbf..a87a08ca582 100644
--- a/innobase/include/data0type.ic
+++ b/innobase/include/data0type.ic
@@ -8,6 +8,61 @@ Created 1/16/1996 Heikki Tuuri
#include "mach0data.h"
+/**********************************************************************
+Get the variable length bounds of the given character set.
+
+NOTE: the prototype of this function is copied from ha_innodb.cc! If you change
+this function, you MUST change also the prototype here! */
+extern
+void
+innobase_get_cset_width(
+/*====================*/
+ ulint cset, /* in: MySQL charset-collation code */
+ ulint* mbminlen, /* out: minimum length of a char (in bytes) */
+ ulint* mbmaxlen); /* out: maximum length of a char (in bytes) */
+
+/*************************************************************************
+Gets the MySQL charset-collation code for MySQL string types. */
+UNIV_INLINE
+ulint
+dtype_get_charset_coll(
+/*===================*/
+ ulint prtype) /* in: precise data type */
+{
+ return((prtype >> 16) & 0xFFUL);
+}
+
+/*************************************************************************
+Gets the MySQL type code from a dtype. */
+UNIV_INLINE
+ulint
+dtype_get_mysql_type(
+/*=================*/
+ /* out: MySQL type code; this is NOT an InnoDB
+ type code! */
+ dtype_t* type) /* in: type struct */
+{
+ return(type->prtype & 0xFFUL);
+}
+
+/*************************************************************************
+Sets the mbminlen and mbmaxlen members of a data type structure. */
+UNIV_INLINE
+void
+dtype_set_mblen(
+/*============*/
+ dtype_t* type) /* in/out: type struct */
+{
+ ut_ad(type);
+ if (dtype_is_string_type(type->mtype)) {
+ innobase_get_cset_width(dtype_get_charset_coll(type->prtype),
+ &type->mbminlen, &type->mbmaxlen);
+ ut_ad(type->mbminlen <= type->mbmaxlen);
+ } else {
+ type->mbminlen = type->mbmaxlen = 0;
+ }
+}
+
/*************************************************************************
Sets a data type structure. */
UNIV_INLINE
@@ -28,6 +83,7 @@ dtype_set(
type->len = len;
type->prec = prec;
+ dtype_set_mblen(type);
ut_ad(dtype_validate(type));
}
@@ -72,17 +128,6 @@ dtype_get_prtype(
}
/*************************************************************************
-Gets the MySQL charset-collation code for MySQL string types. */
-UNIV_INLINE
-ulint
-dtype_get_charset_coll(
-/*===================*/
- ulint prtype) /* in: precise data type */
-{
- return((prtype >> 16) & 0xFFUL);
-}
-
-/*************************************************************************
Gets the type length. */
UNIV_INLINE
ulint
@@ -109,6 +154,33 @@ dtype_get_prec(
}
/*************************************************************************
+Gets the minimum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbminlen(
+/*===============*/
+ /* out: minimum length of a char, in bytes,
+ or 0 if this is not a character type */
+ const dtype_t* type) /* in: type */
+{
+ ut_ad(type);
+ return(type->mbminlen);
+}
+/*************************************************************************
+Gets the maximum length of a character, in bytes. */
+UNIV_INLINE
+ulint
+dtype_get_mbmaxlen(
+/*===============*/
+ /* out: maximum length of a char, in bytes,
+ or 0 if this is not a character type */
+ const dtype_t* type) /* in: type */
+{
+ ut_ad(type);
+ return(type->mbmaxlen);
+}
+
+/*************************************************************************
Gets the padding character code for the type. */
UNIV_INLINE
ulint
@@ -123,10 +195,12 @@ dtype_get_pad_char(
|| type->mtype == DATA_BINARY
|| type->mtype == DATA_FIXBINARY
|| type->mtype == DATA_MYSQL
- || type->mtype == DATA_VARMYSQL) {
+ || type->mtype == DATA_VARMYSQL
+ || (type->mtype == DATA_BLOB
+ && (type->prtype & DATA_BINARY_TYPE) == 0)) {
/* Space is the padding character for all char and binary
- strings */
+ strings, and starting from 5.0.3, also for TEXT strings. */
return((ulint)' ');
}
@@ -149,8 +223,10 @@ dtype_new_store_for_order_and_null_size(
bytes where we store the info */
dtype_t* type) /* in: type struct */
{
- ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
-
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
+
buf[0] = (byte)(type->mtype & 0xFFUL);
if (type->prtype & DATA_BINARY_TYPE) {
@@ -166,10 +242,12 @@ dtype_new_store_for_order_and_null_size(
mach_write_to_2(buf + 2, type->len & 0xFFFFUL);
+ ut_ad(dtype_get_charset_coll(type->prtype) < 256);
mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype));
- /* Note that the second last byte is left unused, because the
- charset-collation code is always < 256 */
+ if (type->prtype & DATA_NOT_NULL) {
+ buf[4] |= 128;
+ }
}
/**************************************************************************
@@ -196,6 +274,7 @@ dtype_read_for_order_and_null_size(
type->prtype = dtype_form_prtype(type->prtype,
data_mysql_default_charset_coll);
+ dtype_set_mblen(type);
}
/**************************************************************************
@@ -211,20 +290,26 @@ dtype_new_read_for_order_and_null_size(
{
ulint charset_coll;
- ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE);
+#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE
+#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE"
+#endif
type->mtype = buf[0] & 63;
type->prtype = buf[1];
if (buf[0] & 128) {
- type->prtype = type->prtype | DATA_BINARY_TYPE;
+ type->prtype |= DATA_BINARY_TYPE;
+ }
+
+ if (buf[4] & 128) {
+ type->prtype |= DATA_NOT_NULL;
}
type->len = mach_read_from_2(buf + 2);
mach_read_from_2(buf + 4);
- charset_coll = mach_read_from_2(buf + 4);
+ charset_coll = mach_read_from_2(buf + 4) & 0x7fff;
if (dtype_is_string_type(type->mtype)) {
ut_a(charset_coll < 256);
@@ -241,8 +326,10 @@ dtype_new_read_for_order_and_null_size(
type->prtype = dtype_form_prtype(type->prtype, charset_coll);
}
+ dtype_set_mblen(type);
}
+#ifndef UNIV_HOTBACKUP
/***************************************************************************
Returns the size of a fixed size data type, 0 if not a fixed size type. */
UNIV_INLINE
@@ -257,23 +344,127 @@ dtype_get_fixed_size(
mtype = dtype_get_mtype(type);
switch (mtype) {
+ case DATA_SYS:
+#ifdef UNIV_DEBUG
+ switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+ default:
+ ut_ad(0);
+ return(0);
+ case DATA_ROW_ID:
+ ut_ad(type->len == DATA_ROW_ID_LEN);
+ break;
+ case DATA_TRX_ID:
+ ut_ad(type->len == DATA_TRX_ID_LEN);
+ break;
+ case DATA_ROLL_PTR:
+ ut_ad(type->len == DATA_ROLL_PTR_LEN);
+ break;
+ case DATA_MIX_ID:
+ ut_ad(type->len == DATA_MIX_ID_LEN);
+ break;
+ }
+#endif /* UNIV_DEBUG */
case DATA_CHAR:
case DATA_FIXBINARY:
case DATA_INT:
case DATA_FLOAT:
case DATA_DOUBLE:
- case DATA_MYSQL:
return(dtype_get_len(type));
-
- case DATA_SYS: if (type->prtype == DATA_ROW_ID) {
- return(DATA_ROW_ID_LEN);
- } else if (type->prtype == DATA_TRX_ID) {
- return(DATA_TRX_ID_LEN);
- } else if (type->prtype == DATA_ROLL_PTR) {
- return(DATA_ROLL_PTR_LEN);
+ case DATA_MYSQL:
+ if (type->prtype & DATA_BINARY_TYPE) {
+ return(dtype_get_len(type));
} else {
+ /* We play it safe here and ask MySQL for
+ mbminlen and mbmaxlen. Although
+ type->mbminlen and type->mbmaxlen are
+ initialized if and only if type->prtype
+ is (in one of the 3 functions in this file),
+ it could be that none of these functions
+ has been called. */
+
+ ulint mbminlen, mbmaxlen;
+
+ innobase_get_cset_width(
+ dtype_get_charset_coll(type->prtype),
+ &mbminlen, &mbmaxlen);
+
+ if (type->mbminlen != mbminlen
+ || type->mbmaxlen != mbmaxlen) {
+
+ ut_print_timestamp(stderr);
+ fprintf(stderr, " InnoDB: "
+ "mbminlen=%lu, "
+ "mbmaxlen=%lu, "
+ "type->mbminlen=%lu, "
+ "type->mbmaxlen=%lu\n",
+ (ulong) mbminlen,
+ (ulong) mbmaxlen,
+ (ulong) type->mbminlen,
+ (ulong) type->mbmaxlen);
+ }
+ if (mbminlen == mbmaxlen) {
+ return(dtype_get_len(type));
+ }
+ }
+ /* fall through for variable-length charsets */
+ case DATA_VARCHAR:
+ case DATA_BINARY:
+ case DATA_DECIMAL:
+ case DATA_VARMYSQL:
+ case DATA_BLOB:
+ return(0);
+ default: ut_error;
+ }
+
+ return(0);
+}
+
+/***************************************************************************
+Returns the size of a fixed size data type, 0 if not a fixed size type. */
+UNIV_INLINE
+ulint
+dtype_get_min_size(
+/*===============*/
+ /* out: minimum size */
+ const dtype_t* type) /* in: type */
+{
+ switch (type->mtype) {
+ case DATA_SYS:
+#ifdef UNIV_DEBUG
+ switch (type->prtype & DATA_MYSQL_TYPE_MASK) {
+ default:
+ ut_ad(0);
return(0);
+ case DATA_ROW_ID:
+ ut_ad(type->len == DATA_ROW_ID_LEN);
+ break;
+ case DATA_TRX_ID:
+ ut_ad(type->len == DATA_TRX_ID_LEN);
+ break;
+ case DATA_ROLL_PTR:
+ ut_ad(type->len == DATA_ROLL_PTR_LEN);
+ break;
+ case DATA_MIX_ID:
+ ut_ad(type->len == DATA_MIX_ID_LEN);
+ break;
+ }
+#endif /* UNIV_DEBUG */
+ case DATA_CHAR:
+ case DATA_FIXBINARY:
+ case DATA_INT:
+ case DATA_FLOAT:
+ case DATA_DOUBLE:
+ return(type->len);
+ case DATA_MYSQL:
+ if ((type->prtype & DATA_BINARY_TYPE)
+ || type->mbminlen == type->mbmaxlen) {
+ return(type->len);
}
+ /* this is a variable-length character set */
+ ut_a(type->mbminlen > 0);
+ ut_a(type->mbmaxlen > type->mbminlen);
+ ut_a(type->len % type->mbmaxlen == 0);
+ return(type->len * type->mbminlen / type->mbmaxlen);
case DATA_VARCHAR:
case DATA_BINARY:
case DATA_DECIMAL:
@@ -285,6 +476,7 @@ dtype_get_fixed_size(
return(0);
}
+#endif /* !UNIV_HOTBACKUP */
/***************************************************************************
Returns a stored SQL NULL size for a type. For fixed length types it is
diff --git a/innobase/include/dict0boot.h b/innobase/include/dict0boot.h
index 35eff5af29a..86702cbca05 100644
--- a/innobase/include/dict0boot.h
+++ b/innobase/include/dict0boot.h
@@ -119,6 +119,7 @@ dict_create(void);
clustered index */
#define DICT_SYS_INDEXES_PAGE_NO_FIELD 8
#define DICT_SYS_INDEXES_SPACE_NO_FIELD 7
+#define DICT_SYS_INDEXES_TYPE_FIELD 6
/* When a row id which is zero modulo this number (which must be a power of
two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is
diff --git a/innobase/include/dict0crea.h b/innobase/include/dict0crea.h
index 8b6944fc605..5dd571be59c 100644
--- a/innobase/include/dict0crea.h
+++ b/innobase/include/dict0crea.h
@@ -54,6 +54,20 @@ dict_create_index_step(
/* out: query thread to run next or NULL */
que_thr_t* thr); /* in: query thread */
/***********************************************************************
+Truncates the index tree associated with a row in SYS_INDEXES table. */
+
+ulint
+dict_truncate_index_tree(
+/*=====================*/
+ /* out: new root page number, or
+ FIL_NULL on failure */
+ dict_table_t* table, /* in: the table the index belongs to */
+ rec_t* rec, /* in: record in the clustered index of
+ SYS_INDEXES table */
+ mtr_t* mtr); /* in: mtr having the latch
+ on the record page. The mtr may be
+ committed and restarted in this call. */
+/***********************************************************************
Drops the index tree associated with a row in SYS_INDEXES table. */
void
@@ -142,6 +156,7 @@ struct ind_node_struct{
/*----------------------*/
/* Local storage for this graph node */
ulint state; /* node execution state */
+ ulint page_no;/* root page number of the index */
dict_table_t* table; /* table which owns the index */
dtuple_t* ind_row;/* index definition row built */
ulint field_no;/* next field definition to insert */
diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h
index 3333385ec56..d9cda402bac 100644
--- a/innobase/include/dict0dict.h
+++ b/innobase/include/dict0dict.h
@@ -516,8 +516,9 @@ dict_index_add_to_cache(
/*====================*/
/* out: TRUE if success */
dict_table_t* table, /* in: table on which the index is */
- dict_index_t* index); /* in, own: index; NOTE! The index memory
+ dict_index_t* index, /* in, own: index; NOTE! The index memory
object is freed in this function! */
+ ulint page_no);/* in: root page number of the index */
/************************************************************************
Gets the number of fields in the internal representation of an index,
including fields added by the dictionary system. */
@@ -647,6 +648,16 @@ dict_index_get_sys_col_pos(
dict_index_t* index, /* in: index */
ulint type); /* in: DATA_ROW_ID, ... */
/***********************************************************************
+Adds a column to index. */
+
+void
+dict_index_add_col(
+/*===============*/
+ dict_index_t* index, /* in: index */
+ dict_col_t* col, /* in: column */
+ ulint order, /* in: order criterion */
+ ulint prefix_len); /* in: column prefix length */
+/***********************************************************************
Copies types of fields contained in index to tuple. */
void
@@ -655,18 +666,6 @@ dict_index_copy_types(
dtuple_t* tuple, /* in: data tuple */
dict_index_t* index, /* in: index */
ulint n_fields); /* in: number of field types to copy */
-/************************************************************************
-Gets the value of a system column in a clustered index record. The clustered
-index must contain the system column: if the index is unique, row id is
-not contained there! */
-UNIV_INLINE
-dulint
-dict_index_rec_get_sys_col(
-/*=======================*/
- /* out: system column value */
- dict_index_t* index, /* in: clustered index describing the record */
- ulint type, /* in: column type: DATA_ROLL_PTR, ... */
- rec_t* rec); /* in: record */
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
@@ -696,9 +695,10 @@ dict_tree_t*
dict_tree_create(
/*=============*/
/* out, own: created tree */
- dict_index_t* index); /* in: the index for which to create: in the
+ dict_index_t* index, /* in: the index for which to create: in the
case of a mixed tree, this should be the
index of the cluster object */
+ ulint page_no);/* in: root page number of the index */
/**************************************************************************
Frees an index tree struct. */
@@ -728,7 +728,7 @@ dict_tree_find_index_for_tuple(
dtuple_t* tuple); /* in: tuple for which to find index */
/***********************************************************************
Checks if a table which is a mixed cluster member owns a record. */
-UNIV_INLINE
+
ibool
dict_is_mixed_table_rec(
/*====================*/
@@ -778,6 +778,7 @@ dict_tree_copy_rec_order_prefix(
/* out: pointer to the prefix record */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to copy prefix */
+ ulint* n_fields,/* out: number of fields copied */
byte** buf, /* in/out: memory buffer for the copied prefix,
or NULL */
ulint* buf_size);/* in/out: buffer size */
@@ -790,6 +791,7 @@ dict_tree_build_data_tuple(
/* out, own: data tuple */
dict_tree_t* tree, /* in: index tree */
rec_t* rec, /* in: record for which to build data tuple */
+ ulint n_fields,/* in: number of data fields */
mem_heap_t* heap); /* in: memory heap where tuple created */
/*************************************************************************
Gets the space id of the root of the index tree. */
diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic
index 85e4aaf1a05..928a693f860 100644
--- a/innobase/include/dict0dict.ic
+++ b/innobase/include/dict0dict.ic
@@ -9,7 +9,6 @@ Created 1/8/1996 Heikki Tuuri
#include "dict0load.h"
#include "trx0undo.h"
#include "trx0sys.h"
-#include "rem0rec.h"
/*************************************************************************
Gets the column data type. */
@@ -168,7 +167,7 @@ dict_table_get_sys_col(
col = dict_table_get_nth_col(table, table->n_cols
- DATA_N_SYS_COLS + sys);
ut_ad(col->type.mtype == DATA_SYS);
- ut_ad(col->type.prtype == sys);
+ ut_ad(col->type.prtype == (sys | DATA_NOT_NULL));
return(col);
}
@@ -312,49 +311,6 @@ dict_index_get_sys_col_pos(
dict_table_get_sys_col_no(index->table, type)));
}
-/************************************************************************
-Gets the value of a system column in a clustered index record. The clustered
-index must contain the system column: if the index is unique, row id is
-not contained there! */
-UNIV_INLINE
-dulint
-dict_index_rec_get_sys_col(
-/*=======================*/
- /* out: system column value */
- dict_index_t* index, /* in: clustered index describing the record */
- ulint type, /* in: column type: DATA_ROLL_PTR, ... */
- rec_t* rec) /* in: record */
-{
- ulint pos;
- byte* field;
- ulint len;
-
- ut_ad(index);
- ut_ad(index->type & DICT_CLUSTERED);
-
- pos = dict_index_get_sys_col_pos(index, type);
-
- ut_ad(pos != ULINT_UNDEFINED);
-
- field = rec_get_nth_field(rec, pos, &len);
-
- if (type == DATA_ROLL_PTR) {
- ut_ad(len == 7);
-
- return(trx_read_roll_ptr(field));
- } else if (type == DATA_TRX_ID) {
-
- return(trx_read_trx_id(field));
- } else if (type == DATA_MIX_ID) {
-
- return(mach_dulint_read_compressed(field));
- } else {
- ut_a(type == DATA_ROW_ID);
-
- return(mach_read_from_6(field));
- }
-}
-
/*************************************************************************
Gets the index tree where the index is stored. */
UNIV_INLINE
@@ -662,28 +618,3 @@ dict_table_get_index(
return(index);
}
-
-/***********************************************************************
-Checks if a table which is a mixed cluster member owns a record. */
-UNIV_INLINE
-ibool
-dict_is_mixed_table_rec(
-/*====================*/
- /* out: TRUE if the record belongs to this
- table */
- dict_table_t* table, /* in: table in a mixed cluster */
- rec_t* rec) /* in: user record in the clustered index */
-{
- byte* mix_id_field;
- ulint len;
-
- mix_id_field = rec_get_nth_field(rec, table->mix_len, &len);
-
- if ((len != table->mix_id_len)
- || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h
index 1e496a25477..ff6c4ec9b28 100644
--- a/innobase/include/dict0mem.h
+++ b/innobase/include/dict0mem.h
@@ -54,7 +54,8 @@ dict_mem_table_create(
of the table is placed; this parameter
is ignored if the table is made
a member of a cluster */
- ulint n_cols); /* in: number of columns */
+ ulint n_cols, /* in: number of columns */
+ ibool comp); /* in: TRUE=compact page format */
/**************************************************************************
Creates a cluster memory object. */
@@ -171,6 +172,13 @@ struct dict_field_struct{
DICT_MAX_COL_PREFIX_LEN; NOTE that
in the UTF-8 charset, MySQL sets this
to 3 * the prefix len in UTF-8 chars */
+ ulint fixed_len; /* 0 or the fixed length of the
+ column if smaller than
+ DICT_MAX_COL_PREFIX_LEN */
+ ulint fixed_offs; /* offset to the field, or
+ ULINT_UNDEFINED if it is not fixed
+ within the record (due to preceding
+ variable-length fields) */
};
/* Data structure for an index tree */
@@ -210,7 +218,6 @@ struct dict_index_struct{
const char* table_name; /* table name */
dict_table_t* table; /* back pointer to table */
ulint space; /* space where the index tree is placed */
- ulint page_no;/* page number of the index tree root */
ulint trx_id_offset;/* position of the the trx id column
in a clustered index record, if the fields
before it are known to be of a fixed size,
@@ -225,6 +232,7 @@ struct dict_index_struct{
ulint n_def; /* number of fields defined so far */
ulint n_fields;/* number of fields in the index */
dict_field_t* fields; /* array of field descriptions */
+ ulint n_nullable;/* number of nullable fields */
UT_LIST_NODE_T(dict_index_t)
indexes;/* list of indexes of the table */
dict_tree_t* tree; /* index tree struct */
@@ -320,6 +328,7 @@ struct dict_table_struct{
ibool tablespace_discarded;/* this flag is set TRUE when the
user calls DISCARD TABLESPACE on this table,
and reset to FALSE in IMPORT TABLESPACE */
+ ibool comp; /* flag: TRUE=compact page format */
hash_node_t name_hash; /* hash chain node */
hash_node_t id_hash; /* hash chain node */
ulint n_def; /* number of columns defined so far */
diff --git a/innobase/include/fil0fil.h b/innobase/include/fil0fil.h
index c1a127aadca..aa1ec5c25a5 100644
--- a/innobase/include/fil0fil.h
+++ b/innobase/include/fil0fil.h
@@ -89,6 +89,8 @@ extern fil_addr_t fil_addr_null;
#define FIL_TABLESPACE 501
#define FIL_LOG 502
+extern ulint fil_n_log_flushes;
+
extern ulint fil_n_pending_log_flushes;
extern ulint fil_n_pending_tablespace_flushes;
diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h
index 74288d57285..45a81a4ac77 100644
--- a/innobase/include/lock0lock.h
+++ b/innobase/include/lock0lock.h
@@ -47,7 +47,8 @@ lock_sec_rec_some_has_impl_off_kernel(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index); /* in: secondary index */
+ dict_index_t* index, /* in: secondary index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Checks if some transaction has an implicit x-lock on a record in a clustered
index. */
@@ -58,7 +59,8 @@ lock_clust_rec_some_has_impl(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*****************************************************************
Resets the lock bits for a single record. Releases transactions
waiting for lock requests here. */
@@ -275,6 +277,7 @@ lock_clust_rec_modify_check_and_lock(
does nothing */
rec_t* rec, /* in: record which should be modified */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
que_thr_t* thr); /* in: query thread */
/*************************************************************************
Checks if locks of other transactions prevent an immediate modify
@@ -308,6 +311,7 @@ lock_sec_rec_read_check_and_lock(
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: secondary index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
@@ -333,6 +337,34 @@ lock_clust_rec_read_check_and_lock(
which should be read or passed over by a read
cursor */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
+ ulint mode, /* in: mode of the lock which the read cursor
+ should set on records: LOCK_S or LOCK_X; the
+ latter is possible in SELECT FOR UPDATE */
+ ulint gap_mode,/* in: LOCK_ORDINARY, LOCK_GAP, or
+ LOCK_REC_NOT_GAP */
+ que_thr_t* thr); /* in: query thread */
+/*************************************************************************
+Checks if locks of other transactions prevent an immediate read, or passing
+over by a read cursor, of a clustered index record. If they do, first tests
+if the query thread should anyway be suspended for some reason; if not, then
+puts the transaction and the query thread to the lock wait state and inserts a
+waiting request for a record lock to the lock queue. Sets the requested mode
+lock on the record. This is an alternative version of
+lock_clust_rec_read_check_and_lock() that does not require the parameter
+"offsets". */
+
+ulint
+lock_clust_rec_read_check_and_lock_alt(
+/*===================================*/
+ /* out: DB_SUCCESS, DB_LOCK_WAIT,
+ DB_DEADLOCK, or DB_QUE_THR_SUSPENDED */
+ ulint flags, /* in: if BTR_NO_LOCKING_FLAG bit is set,
+ does nothing */
+ rec_t* rec, /* in: user record or page supremum record
+ which should be read or passed over by a read
+ cursor */
+ dict_index_t* index, /* in: clustered index */
ulint mode, /* in: mode of the lock which the read cursor
should set on records: LOCK_S or LOCK_X; the
latter is possible in SELECT FOR UPDATE */
@@ -350,6 +382,7 @@ lock_clust_rec_cons_read_sees(
rec_t* rec, /* in: user record which should be read or
passed over by a read cursor */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
read_view_t* view); /* in: consistent read view */
/*************************************************************************
Checks that a non-clustered index record is seen in a consistent read. */
@@ -499,6 +532,7 @@ lock_check_trx_id_sanity(
dulint trx_id, /* in: trx id */
rec_t* rec, /* in: user record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets, /* in: rec_get_offsets(rec, index) */
ibool has_kernel_mutex);/* in: TRUE if the caller owns the
kernel mutex */
/*************************************************************************
@@ -509,7 +543,8 @@ lock_rec_queue_validate(
/*====================*/
/* out: TRUE if ok */
rec_t* rec, /* in: record to look at */
- dict_index_t* index); /* in: index, or NULL if not known */
+ dict_index_t* index, /* in: index, or NULL if not known */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Prints info of a table lock. */
@@ -584,6 +619,8 @@ extern lock_sys_t* lock_sys;
#define LOCK_TABLE 16 /* these type values should be so high that */
#define LOCK_REC 32 /* they can be ORed to the lock mode */
#define LOCK_TABLE_EXP 80 /* explicit table lock (80 = 16 + 64) */
+#define LOCK_TABLE_TRANSACTIONAL 144
+ /* transactional table lock (144 = 16 + 128)*/
#define LOCK_TYPE_MASK 0xF0UL /* mask used to extract lock type from the
type_mode field in a lock */
/* Waiting lock flag */
diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic
index fabc9256401..c7a71bb45d8 100644
--- a/innobase/include/lock0lock.ic
+++ b/innobase/include/lock0lock.ic
@@ -60,7 +60,8 @@ lock_clust_rec_some_has_impl(
/* out: transaction which has the x-lock, or
NULL */
rec_t* rec, /* in: user record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
dulint trx_id;
@@ -70,7 +71,7 @@ lock_clust_rec_some_has_impl(
ut_ad(index->type & DICT_CLUSTERED);
ut_ad(page_rec_is_user_rec(rec));
- trx_id = row_get_rec_trx_id(rec, index);
+ trx_id = row_get_rec_trx_id(rec, index, offsets);
if (trx_is_active(trx_id)) {
/* The modifying or inserting transaction is active */
diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h
index 9c9c6f696e8..c0636ea1e1e 100644
--- a/innobase/include/mtr0log.h
+++ b/innobase/include/mtr0log.h
@@ -11,6 +11,7 @@ Created 12/7/1995 Heikki Tuuri
#include "univ.i"
#include "mtr0mtr.h"
+#include "dict0types.h"
/************************************************************
Writes 1 - 4 bytes to a file page buffered in the buffer pool.
@@ -173,6 +174,38 @@ mlog_parse_string(
byte* page); /* in: page where to apply the log record, or NULL */
+/************************************************************
+Opens a buffer for mlog, writes the initial log record and,
+if needed, the field lengths of an index. Reserves space
+for further log entries. The log entry must be closed with
+mtr_close(). */
+
+byte*
+mlog_open_and_write_index(
+/*======================*/
+ /* out: buffer, NULL if log mode
+ MTR_LOG_NONE */
+ mtr_t* mtr, /* in: mtr */
+ byte* rec, /* in: index record or page */
+ dict_index_t* index, /* in: record descriptor */
+ byte type, /* in: log item type */
+ ulint size); /* in: requested buffer size in bytes
+ (if 0, calls mlog_close() and returns NULL) */
+
+/************************************************************
+Parses a log record written by mlog_open_and_write_index. */
+
+byte*
+mlog_parse_index(
+/*=============*/
+ /* out: parsed record end,
+ NULL if not a complete record */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ /* out: new value of log_ptr */
+ ibool comp, /* in: TRUE=compact record format */
+ dict_index_t** index); /* out, own: dummy index */
+
/* Insert, update, and maybe other functions may use this value to define an
extra mlog buffer size for variable size data */
#define MLOG_BUF_MARGIN 256
diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h
index e8c68a91dad..071279d5259 100644
--- a/innobase/include/mtr0mtr.h
+++ b/innobase/include/mtr0mtr.h
@@ -102,7 +102,31 @@ flag value must give the length also! */
file rename */
#define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd
file deletion */
-#define MLOG_BIGGEST_TYPE ((byte)35) /* biggest value (used in
+#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record
+ as the predefined minimum
+ record */
+#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact
+ index page */
+#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */
+#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39)
+ /* mark compact clustered index
+ record deleted */
+#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index
+ record deleted */
+#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record,
+ preserves record field sizes */
+#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record
+ from a page */
+#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list
+ end on index page */
+#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list
+ start on index page */
+#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45)
+ /* copy compact record list end
+ to a new created index page */
+#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */
+
+#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in
asserts) */
/*******************************************************************
diff --git a/innobase/include/os0file.h b/innobase/include/os0file.h
index ebc014df9fd..f55c345537e 100644
--- a/innobase/include/os0file.h
+++ b/innobase/include/os0file.h
@@ -24,6 +24,9 @@ extern ibool os_aio_print_debug;
extern ulint os_file_n_pending_preads;
extern ulint os_file_n_pending_pwrites;
+extern ulint os_n_pending_reads;
+extern ulint os_n_pending_writes;
+
#ifdef __WIN__
/* We define always WIN_ASYNC_IO, and check at run-time whether
diff --git a/innobase/include/os0proc.h b/innobase/include/os0proc.h
index d0d3cf82e38..b0b72e18675 100644
--- a/innobase/include/os0proc.h
+++ b/innobase/include/os0proc.h
@@ -12,6 +12,11 @@ Created 9/30/1995 Heikki Tuuri
#include "univ.i"
+#ifdef UNIV_LINUX
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#endif
+
typedef void* os_process_t;
typedef unsigned long int os_process_id_t;
@@ -27,6 +32,10 @@ page size of an Intel x86 processor. We cannot use AWE with 2 MB or 4 MB
pages. */
#define OS_AWE_X86_PAGE_SIZE 4096
+extern ibool os_use_large_pages;
+/* Large page size. This may be a boot-time option on some platforms */
+extern ulint os_large_page_size;
+
/********************************************************************
Windows AWE support. Tries to enable the "lock pages in memory" privilege for
the current process so that the current process can allocate memory-locked
@@ -103,6 +112,25 @@ os_mem_alloc_nocache(
/* out: allocated memory */
ulint n); /* in: number of bytes */
/********************************************************************
+Allocates large pages memory. */
+
+void*
+os_mem_alloc_large(
+/*=================*/
+ /* out: allocated memory */
+ ulint n, /* in: number of bytes */
+ ibool set_to_zero, /* in: TRUE if allocated memory should be set
+ to zero if UNIV_SET_MEM_TO_ZERO is defined */
+ ibool assert_on_error); /* in: if TRUE, we crash mysqld if the memory
+ cannot be allocated */
+/********************************************************************
+Frees large pages memory. */
+
+void
+os_mem_free_large(
+/*=================*/
+void *ptr); /* in: number of bytes */
+/********************************************************************
Sets the priority boost for threads released from waiting within the current
process. */
diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h
index c85669ed4df..4fc62f37db7 100644
--- a/innobase/include/page0cur.h
+++ b/innobase/include/page0cur.h
@@ -128,7 +128,8 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr); /* in: mini-transaction handle */
/***************************************************************
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -142,6 +143,8 @@ page_cur_rec_insert(
otherwise */
page_cur_t* cursor, /* in: a page cursor */
rec_t* rec, /* in: record to insert */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr); /* in: mini-transaction handle */
/***************************************************************
Inserts a record next to page cursor. Returns pointer to inserted record if
@@ -155,9 +158,10 @@ page_cur_insert_rec_low(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
- ulint data_size,/* in: data size of tuple */
- rec_t* rec, /* in: pointer to a physical record or NULL */
+ dtuple_t* tuple, /* in: pointer to a data tuple or NULL */
+ dict_index_t* index, /* in: record descriptor */
+ rec_t* rec, /* in: pointer to a physical record or NULL */
+ ulint* offsets,/* in: rec_get_offsets(rec, index) or NULL */
mtr_t* mtr); /* in: mini-transaction handle */
/*****************************************************************
Copies records from page to a newly created page, from a given record onward,
@@ -166,10 +170,11 @@ including that record. Infimum and supremum records are not copied. */
void
page_copy_rec_list_end_to_created_page(
/*===================================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: first record to copy */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: first record to copy */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/***************************************************************
Deletes a record at the page cursor. The cursor is moved to the
next record after the deleted one. */
@@ -177,8 +182,10 @@ next record after the deleted one. */
void
page_cur_delete_rec(
/*================*/
- page_cur_t* cursor, /* in: a page cursor */
- mtr_t* mtr); /* in: mini-transaction handle */
+ page_cur_t* cursor, /* in: a page cursor */
+ dict_index_t* index, /* in: record descriptor */
+ const ulint* offsets,/* in: rec_get_offsets(cursor->rec, index) */
+ mtr_t* mtr); /* in: mini-transaction handle */
/********************************************************************
Searches the right position for a page cursor. */
UNIV_INLINE
@@ -187,6 +194,7 @@ page_cur_search(
/*============*/
/* out: number of matched fields on the left */
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -198,6 +206,7 @@ void
page_cur_search_with_match(
/*=======================*/
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -229,34 +238,37 @@ Parses a log record of a record insert on a page. */
byte*
page_cur_parse_insert_rec(
/*======================*/
- /* out: end of log record or NULL */
- ibool is_short,/* in: TRUE if short inserts */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ ibool is_short,/* in: TRUE if short inserts */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/**************************************************************
Parses a log record of copying a record list end to a new created page. */
byte*
page_parse_copy_rec_list_to_created_page(
/*=====================================*/
- /* out: end of log record or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses log record of a record delete on a page. */
byte*
page_cur_parse_delete_rec(
/*======================*/
- /* out: pointer to record end or NULL */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: pointer to record end or NULL */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/* Index page cursor */
diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic
index 39f8ab11513..e99d799b372 100644
--- a/innobase/include/page0cur.ic
+++ b/innobase/include/page0cur.ic
@@ -143,7 +143,7 @@ UNIV_INLINE
void
page_cur_move_to_prev(
/*==================*/
- page_cur_t* cur) /* in: cursor; must not before first */
+ page_cur_t* cur) /* in: page cursor, not before first */
{
ut_ad(!page_cur_is_before_first(cur));
@@ -158,6 +158,7 @@ page_cur_search(
/*============*/
/* out: number of matched fields on the left */
page_t* page, /* in: index page */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* tuple, /* in: data tuple */
ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G,
or PAGE_CUR_GE */
@@ -170,7 +171,7 @@ page_cur_search(
ut_ad(dtuple_check_typed(tuple));
- page_cur_search_with_match(page, tuple, mode,
+ page_cur_search_with_match(page, index, tuple, mode,
&up_matched_fields,
&up_matched_bytes,
&low_matched_fields,
@@ -190,16 +191,11 @@ page_cur_tuple_insert(
/* out: pointer to record if succeed, NULL
otherwise */
page_cur_t* cursor, /* in: a page cursor */
- dtuple_t* tuple, /* in: pointer to a data tuple */
+ dtuple_t* tuple, /* in: pointer to a data tuple */
+ dict_index_t* index, /* in: record descriptor */
mtr_t* mtr) /* in: mini-transaction handle */
{
- ulint data_size;
-
- ut_ad(dtuple_check_typed(tuple));
-
- data_size = dtuple_get_data_size(tuple);
-
- return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr));
+ return(page_cur_insert_rec_low(cursor, tuple, index, NULL, NULL, mtr));
}
/***************************************************************
@@ -214,8 +210,11 @@ page_cur_rec_insert(
otherwise */
page_cur_t* cursor, /* in: a page cursor */
rec_t* rec, /* in: record to insert */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* offsets,/* in: rec_get_offsets(rec, index) */
mtr_t* mtr) /* in: mini-transaction handle */
{
- return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr));
+ return(page_cur_insert_rec_low(cursor, NULL, index, rec,
+ offsets, mtr));
}
diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h
index 969313614e3..144c297b811 100644
--- a/innobase/include/page0page.h
+++ b/innobase/include/page0page.h
@@ -37,7 +37,8 @@ typedef byte page_header_t;
/*-----------------------------*/
#define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */
#define PAGE_HEAP_TOP 2 /* pointer to record heap top */
-#define PAGE_N_HEAP 4 /* number of records in the heap */
+#define PAGE_N_HEAP 4 /* number of records in the heap,
+ bit 15=flag: new-style compact page format */
#define PAGE_FREE 6 /* pointer to start of page free record list */
#define PAGE_GARBAGE 8 /* number of bytes in deleted records */
#define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or
@@ -79,15 +80,24 @@ typedef byte page_header_t;
#define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE)
/* start of data on the page */
-#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES)
- /* offset of the page infimum record on the
- page */
-#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8)
- /* offset of the page supremum record on the
- page */
-#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9)
+#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES)
+ /* offset of the page infimum record on an
+ old-style page */
+#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8)
+ /* offset of the page supremum record on an
+ old-style page */
+#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9)
/* offset of the page supremum record end on
- the page */
+ an old-style page */
+#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES)
+ /* offset of the page infimum record on a
+ new-style compact page */
+#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8)
+ /* offset of the page supremum record on a
+ new-style compact page */
+#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8)
+ /* offset of the page supremum record end on
+ a new-style compact page */
/*-----------------------------*/
/* Directions of cursor movement */
@@ -233,6 +243,7 @@ page_cmp_dtuple_rec_with_match(
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
@@ -259,6 +270,22 @@ page_rec_get_n_recs_before(
/* out: number of records */
rec_t* rec); /* in: the physical record */
/*****************************************************************
+Gets the number of records in the heap. */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+ /* out: number of user records */
+ page_t* page); /* in: index page */
+/*****************************************************************
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+ page_t* page, /* in: index page */
+ ulint n_heap);/* in: number of records */
+/*****************************************************************
Gets the number of dir slots in directory. */
UNIV_INLINE
ulint
@@ -267,6 +294,15 @@ page_dir_get_n_slots(
/* out: number of slots */
page_t* page); /* in: index page */
/*****************************************************************
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page, /* in: index page */
+ ulint n_slots);/* in: number of slots */
+/*****************************************************************
Gets pointer to nth directory slot. */
UNIV_INLINE
page_dir_slot_t*
@@ -333,7 +369,16 @@ ulint
page_dir_find_owner_slot(
/*=====================*/
/* out: the directory slot number */
- rec_t* rec); /* in: the physical record */
+ rec_t* rec); /* in: the physical record */
+/****************************************************************
+Determine whether the page is in new-style compact format. */
+UNIV_INLINE
+ibool
+page_is_comp(
+/*=========*/
+ /* out: TRUE if the page is in compact format
+ FALSE if it is in old-style format */
+ page_t* page); /* in: index page */
/****************************************************************
Gets the pointer to the next record on the page. */
UNIV_INLINE
@@ -359,9 +404,10 @@ UNIV_INLINE
rec_t*
page_rec_get_prev(
/*==============*/
- /* out: pointer to previous record */
- rec_t* rec); /* in: pointer to record, must not be page
- infimum */
+ /* out: pointer to previous record */
+ rec_t* rec); /* in: pointer to record,
+ must not be page infimum */
+
/****************************************************************
TRUE if the record is a user record on the page. */
UNIV_INLINE
@@ -446,9 +492,11 @@ page_get_max_insert_size_after_reorganize(
Calculates free space if a page is emptied. */
UNIV_INLINE
ulint
-page_get_free_space_of_empty(void);
-/*==============================*/
- /* out: free space */
+page_get_free_space_of_empty(
+/*=========================*/
+ /* out: free space */
+ ibool comp) /* in: TRUE=compact page format */
+ __attribute__((const));
/****************************************************************
Returns the sum of the sizes of the records in the record list
excluding the infimum and supremum records. */
@@ -464,20 +512,23 @@ Allocates a block of memory from an index page. */
byte*
page_mem_alloc(
/*===========*/
- /* out: pointer to start of allocated
- buffer, or NULL if allocation fails */
- page_t* page, /* in: index page */
- ulint need, /* in: number of bytes needed */
- ulint* heap_no);/* out: this contains the heap number
- of the allocated record if allocation succeeds */
+ /* out: pointer to start of allocated
+ buffer, or NULL if allocation fails */
+ page_t* page, /* in: index page */
+ ulint need, /* in: number of bytes needed */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* heap_no);/* out: this contains the heap number
+ of the allocated record
+ if allocation succeeds */
/****************************************************************
Puts a record to free list. */
UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec); /* in: pointer to the (origin of) record */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: pointer to the (origin of) record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**************************************************************
The index page creation function. */
@@ -487,7 +538,8 @@ page_create(
/* out: pointer to the page */
buf_frame_t* frame, /* in: a buffer frame where the page is
created */
- mtr_t* mtr); /* in: mini-transaction handle */
+ mtr_t* mtr, /* in: mini-transaction handle */
+ ibool comp); /* in: TRUE=compact page format */
/*****************************************************************
Differs from page_copy_rec_list_end, because this function does not
touch the lock table and max trx id on page. */
@@ -495,10 +547,11 @@ touch the lock table and max trx id on page. */
void
page_copy_rec_list_end_no_locks(
/*============================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Copies records from page to new_page, from the given record onward,
including that record. Infimum and supremum records are not copied.
@@ -507,10 +560,11 @@ The records are copied to the start of the record list on new_page. */
void
page_copy_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Copies records from page to new_page, up to the given record, NOT
including that record. Infimum and supremum records are not copied.
@@ -519,10 +573,11 @@ The records are copied to the end of the record list on new_page. */
void
page_copy_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page to copy to */
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page to copy to */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes records from a page from a given record onward, including that record.
The infimum and supremum records are not deleted. */
@@ -530,14 +585,15 @@ The infimum and supremum records are not deleted. */
void
page_delete_rec_list_end(
/*=====================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED
- if not known */
- ulint size, /* in: the sum of the sizes of the records in the end
- of the chain to delete, or ULINT_UNDEFINED if not
- known */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ ulint n_recs, /* in: number of records to delete,
+ or ULINT_UNDEFINED if not known */
+ ulint size, /* in: the sum of the sizes of the
+ records in the end of the chain to
+ delete, or ULINT_UNDEFINED if not known */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Deletes records from page, up to the given record, NOT including
that record. Infimum and supremum records are not deleted. */
@@ -545,9 +601,10 @@ that record. Infimum and supremum records are not deleted. */
void
page_delete_rec_list_start(
/*=======================*/
- page_t* page, /* in: index page */
- rec_t* rec, /* in: record on page */
- mtr_t* mtr); /* in: mtr */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: record on page */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Moves record list end to another page. Moved records include
split_rec. */
@@ -555,10 +612,11 @@ split_rec. */
void
page_move_rec_list_end(
/*===================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record to move */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record to move */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/*****************************************************************
Moves record list start to another page. Moved records do not include
split_rec. */
@@ -566,10 +624,11 @@ split_rec. */
void
page_move_rec_list_start(
/*=====================*/
- page_t* new_page, /* in: index page where to move */
- page_t* page, /* in: index page */
- rec_t* split_rec, /* in: first record not to move */
- mtr_t* mtr); /* in: mtr */
+ page_t* new_page, /* in: index page where to move */
+ page_t* page, /* in: index page */
+ rec_t* split_rec, /* in: first record not to move */
+ dict_index_t* index, /* in: record descriptor */
+ mtr_t* mtr); /* in: mtr */
/********************************************************************
Splits a directory slot which owns too many records. */
@@ -595,13 +654,16 @@ Parses a log record of a record list end or start deletion. */
byte*
page_parse_delete_rec_list(
/*=======================*/
- /* out: end of log record or NULL */
- byte type, /* in: MLOG_LIST_END_DELETE or
- MLOG_LIST_START_DELETE */
- byte* ptr, /* in: buffer */
- byte* end_ptr,/* in: buffer end */
- page_t* page, /* in: page or NULL */
- mtr_t* mtr); /* in: mtr or NULL */
+ /* out: end of log record or NULL */
+ byte type, /* in: MLOG_LIST_END_DELETE,
+ MLOG_LIST_START_DELETE,
+ MLOG_COMP_LIST_END_DELETE or
+ MLOG_COMP_LIST_START_DELETE */
+ byte* ptr, /* in: buffer */
+ byte* end_ptr,/* in: buffer end */
+ dict_index_t* index, /* in: record descriptor */
+ page_t* page, /* in: page or NULL */
+ mtr_t* mtr); /* in: mtr or NULL */
/***************************************************************
Parses a redo log record of creating a page. */
@@ -611,6 +673,7 @@ page_parse_create(
/* out: end of log record or NULL */
byte* ptr, /* in: buffer */
byte* end_ptr,/* in: buffer end */
+ ibool comp, /* in: TRUE=compact page format */
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
/****************************************************************
@@ -620,7 +683,8 @@ the index page context. */
void
page_rec_print(
/*===========*/
- rec_t* rec);
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: record descriptor */
/*******************************************************************
This is used to print the contents of the directory for
debugging purposes. */
@@ -637,8 +701,9 @@ debugging purposes. */
void
page_print_list(
/*============*/
- page_t* page, /* in: index page */
- ulint pr_n); /* in: print n first and n last entries */
+ page_t* page, /* in: index page */
+ dict_index_t* index, /* in: dictionary index of the page */
+ ulint pr_n); /* in: print n first and n last entries */
/*******************************************************************
Prints the info in a page header. */
@@ -653,9 +718,12 @@ debugging purposes. */
void
page_print(
/*======*/
- page_t* page, /* in: index page */
- ulint dn, /* in: print dn first and last entries in directory */
- ulint rn); /* in: print rn first and last records on page */
+ page_t* page, /* in: index page */
+ dict_index_t* index, /* in: dictionary index of the page */
+ ulint dn, /* in: print dn first and last entries
+ in directory */
+ ulint rn); /* in: print rn first and last records
+ in directory */
/*******************************************************************
The following is used to validate a record on a page. This function
differs from rec_validate as it can also check the n_owned field and
@@ -664,8 +732,9 @@ the heap_no field. */
ibool
page_rec_validate(
/*==============*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: record on the page */
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Checks that the first directory slot points to the infimum record and
the last to the supremum. This function is intended to track if the
diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic
index c7bf78040e9..bc0805ca30c 100644
--- a/innobase/include/page0page.ic
+++ b/innobase/include/page0page.ic
@@ -73,7 +73,8 @@ page_header_set_field(
{
ut_ad(page);
ut_ad(field <= PAGE_N_RECS);
- ut_ad(val < UNIV_PAGE_SIZE);
+ ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE);
+ ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE);
mach_write_to_2(page + PAGE_HEADER + field, val);
}
@@ -152,6 +153,19 @@ page_header_reset_last_insert(
}
/****************************************************************
+Determine whether the page is in new-style compact format. */
+UNIV_INLINE
+ibool
+page_is_comp(
+/*=========*/
+ /* out: TRUE if the page is in compact format
+ FALSE if it is in old-style format */
+ page_t* page) /* in: index page */
+{
+ return(!!(page_header_get_field(page, PAGE_N_HEAP) & 0x8000));
+}
+
+/****************************************************************
Gets the first record on the page. */
UNIV_INLINE
rec_t*
@@ -162,7 +176,11 @@ page_get_infimum_rec(
{
ut_ad(page);
- return(page + PAGE_INFIMUM);
+ if (page_is_comp(page)) {
+ return(page + PAGE_NEW_INFIMUM);
+ } else {
+ return(page + PAGE_OLD_INFIMUM);
+ }
}
/****************************************************************
@@ -176,7 +194,11 @@ page_get_supremum_rec(
{
ut_ad(page);
- return(page + PAGE_SUPREMUM);
+ if (page_is_comp(page)) {
+ return(page + PAGE_NEW_SUPREMUM);
+ } else {
+ return(page + PAGE_OLD_SUPREMUM);
+ }
}
/****************************************************************
@@ -309,6 +331,7 @@ page_cmp_dtuple_rec_with_match(
be page infimum or supremum, in which case
matched-parameter values below are not
affected */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns
contains the value for current comparison */
@@ -320,6 +343,7 @@ page_cmp_dtuple_rec_with_match(
page_t* page;
ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
page = buf_frame_align(rec);
@@ -328,7 +352,7 @@ page_cmp_dtuple_rec_with_match(
} else if (rec == page_get_supremum_rec(page)) {
return(-1);
} else {
- return(cmp_dtuple_rec_with_match(dtuple, rec,
+ return(cmp_dtuple_rec_with_match(dtuple, rec, offsets,
matched_fields,
matched_bytes));
}
@@ -358,6 +382,45 @@ page_dir_get_n_slots(
{
return(page_header_get_field(page, PAGE_N_DIR_SLOTS));
}
+/*****************************************************************
+Sets the number of dir slots in directory. */
+UNIV_INLINE
+void
+page_dir_set_n_slots(
+/*=================*/
+ /* out: number of slots */
+ page_t* page, /* in: index page */
+ ulint n_slots)/* in: number of slots */
+{
+ page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots);
+}
+
+/*****************************************************************
+Gets the number of records in the heap. */
+UNIV_INLINE
+ulint
+page_dir_get_n_heap(
+/*================*/
+ /* out: number of user records */
+ page_t* page) /* in: index page */
+{
+ return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff);
+}
+
+/*****************************************************************
+Sets the number of records in the heap. */
+UNIV_INLINE
+void
+page_dir_set_n_heap(
+/*================*/
+ page_t* page, /* in: index page */
+ ulint n_heap) /* in: number of records */
+{
+ ut_ad(n_heap < 0x8000);
+
+ page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 &
+ page_header_get_field(page, PAGE_N_HEAP)));
+}
/*****************************************************************
Gets pointer to nth directory slot. */
@@ -369,7 +432,7 @@ page_dir_get_nth_slot(
page_t* page, /* in: index page */
ulint n) /* in: position */
{
- ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n);
+ ut_ad(page_dir_get_n_slots(page) > n);
return(page + UNIV_PAGE_SIZE - PAGE_DIR
- (n + 1) * PAGE_DIR_SLOT_SIZE);
@@ -431,7 +494,8 @@ page_dir_slot_get_n_owned(
/* out: number of records */
page_dir_slot_t* slot) /* in: page directory slot */
{
- return(rec_get_n_owned(page_dir_slot_get_rec(slot)));
+ return(rec_get_n_owned(page_dir_slot_get_rec(slot),
+ page_is_comp(buf_frame_align(slot))));
}
/*******************************************************************
@@ -444,7 +508,8 @@ page_dir_slot_set_n_owned(
ulint n) /* in: number of records owned
by the slot */
{
- rec_set_n_owned(page_dir_slot_get_rec(slot), n);
+ rec_set_n_owned(page_dir_slot_get_rec(slot),
+ page_is_comp(buf_frame_align(slot)), n);
}
/****************************************************************
@@ -477,7 +542,7 @@ page_rec_get_next(
page = buf_frame_align(rec);
- offs = rec_get_next_offs(rec);
+ offs = rec_get_next_offs(rec, page_is_comp(page));
if (offs >= UNIV_PAGE_SIZE) {
fprintf(stderr,
@@ -513,6 +578,7 @@ page_rec_set_next(
infimum */
{
page_t* page;
+ ulint offs;
ut_ad(page_rec_check(rec));
ut_a((next == NULL)
@@ -523,11 +589,13 @@ page_rec_set_next(
ut_ad(rec != page_get_supremum_rec(page));
ut_ad(next != page_get_infimum_rec(page));
- if (next == NULL) {
- rec_set_next_offs(rec, 0);
+ if (next) {
+ offs = (ulint) (next - page);
} else {
- rec_set_next_offs(rec, (ulint)(next - page));
+ offs = 0;
}
+
+ rec_set_next_offs(rec, page_is_comp(page), offs);
}
/****************************************************************
@@ -545,6 +613,7 @@ page_rec_get_prev(
rec_t* rec2;
rec_t* prev_rec = NULL;
page_t* page;
+ ibool comp;
ut_ad(page_rec_check(rec));
@@ -559,6 +628,7 @@ page_rec_get_prev(
slot = page_dir_get_nth_slot(page, slot_no - 1);
rec2 = page_dir_slot_get_rec(slot);
+ comp = page_is_comp(page);
while (rec != rec2) {
prev_rec = rec2;
@@ -579,9 +649,12 @@ page_rec_find_owner_rec(
/* out: the owner record */
rec_t* rec) /* in: the physical record */
{
+ ibool comp;
+
ut_ad(page_rec_check(rec));
+ comp = page_is_comp(buf_frame_align(rec));
- while (rec_get_n_owned(rec) == 0) {
+ while (rec_get_n_owned(rec, comp) == 0) {
rec = page_rec_get_next(rec);
}
@@ -601,7 +674,9 @@ page_get_data_size(
ulint ret;
ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_SUPREMUM_END
+ - (page_is_comp(page)
+ ? PAGE_NEW_SUPREMUM_END
+ : PAGE_OLD_SUPREMUM_END)
- page_header_get_field(page, PAGE_GARBAGE));
ut_ad(ret < UNIV_PAGE_SIZE);
@@ -613,12 +688,13 @@ page_get_data_size(
Calculates free space if a page is emptied. */
UNIV_INLINE
ulint
-page_get_free_space_of_empty(void)
-/*==============================*/
+page_get_free_space_of_empty(
+/*=========================*/
/* out: free space */
+ ibool comp) /* in: TRUE=compact page layout */
{
return((ulint)(UNIV_PAGE_SIZE
- - PAGE_SUPREMUM_END
+ - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)
- PAGE_DIR
- 2 * PAGE_DIR_SLOT_SIZE));
}
@@ -640,13 +716,16 @@ page_get_max_insert_size(
{
ulint occupied;
ulint free_space;
+ ibool comp;
+
+ comp = page_is_comp(page);
occupied = page_header_get_field(page, PAGE_HEAP_TOP)
- - PAGE_SUPREMUM_END
+ - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)
+ page_dir_calc_reserved_space(
- n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2));
+ n_recs + page_dir_get_n_heap(page) - 2);
- free_space = page_get_free_space_of_empty();
+ free_space = page_get_free_space_of_empty(comp);
/* Above the 'n_recs +' part reserves directory space for the new
inserted records; the '- 2' excludes page infimum and supremum
@@ -673,11 +752,14 @@ page_get_max_insert_size_after_reorganize(
{
ulint occupied;
ulint free_space;
+ ibool comp;
+
+ comp = page_is_comp(page);
occupied = page_get_data_size(page)
+ page_dir_calc_reserved_space(n_recs + page_get_n_recs(page));
- free_space = page_get_free_space_of_empty();
+ free_space = page_get_free_space_of_empty(comp);
if (occupied > free_space) {
@@ -693,21 +775,33 @@ UNIV_INLINE
void
page_mem_free(
/*==========*/
- page_t* page, /* in: index page */
- rec_t* rec) /* in: pointer to the (origin of) record */
+ page_t* page, /* in: index page */
+ rec_t* rec, /* in: pointer to the (origin of) record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- rec_t* free;
- ulint garbage;
+ rec_t* free;
+ ulint garbage;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
free = page_header_get_ptr(page, PAGE_FREE);
page_rec_set_next(rec, free);
page_header_set_ptr(page, PAGE_FREE, rec);
+#if 0 /* It's better not to destroy the user's data. */
+
+ /* Clear the data bytes of the deleted record in order to improve
+ the compression ratio of the page and to make it easier to read
+ page dumps in corruption reports. The extra bytes of the record
+ cannot be cleared, because page_mem_alloc() needs them in order
+ to determine the size of the deleted record. */
+ memset(rec, 0, rec_offs_data_size(offsets));
+#endif
+
garbage = page_header_get_field(page, PAGE_GARBAGE);
page_header_set_field(page, PAGE_GARBAGE,
- garbage + rec_get_size(rec));
+ garbage + rec_offs_size(offsets));
}
#ifdef UNIV_MATERIALIZE
diff --git a/innobase/include/que0que.h b/innobase/include/que0que.h
index e1874edcaf2..4113e52d425 100644
--- a/innobase/include/que0que.h
+++ b/innobase/include/que0que.h
@@ -359,6 +359,8 @@ struct que_thr_struct{
the control came */
ulint resource; /* resource usage of the query thread
thus far */
+ ulint lock_state; /* lock state of thread (table or
+ row) */
};
#define QUE_THR_MAGIC_N 8476583
@@ -482,6 +484,11 @@ struct que_fork_struct{
#define QUE_THR_SUSPENDED 7
#define QUE_THR_ERROR 8
+/* Query thread lock states */
+#define QUE_THR_LOCK_NOLOCK 0
+#define QUE_THR_LOCK_ROW 1
+#define QUE_THR_LOCK_TABLE 2
+
/* From where the cursor position is counted */
#define QUE_CUR_NOT_DEFINED 1
#define QUE_CUR_START 2
diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h
index 712e263350e..1b1ee26b809 100644
--- a/innobase/include/rem0cmp.h
+++ b/innobase/include/rem0cmp.h
@@ -90,6 +90,7 @@ cmp_dtuple_rec_with_match(
dtuple in some of the common fields, or which
has an equal number or more fields than
dtuple */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when function returns,
contains the value for current comparison */
@@ -107,7 +108,8 @@ cmp_dtuple_rec(
less than rec, respectively; see the comments
for cmp_dtuple_rec_with_match */
dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/******************************************************************
Checks if a dtuple is a prefix of a record. The last field in dtuple
is allowed to be a prefix of the corresponding field in the record. */
@@ -116,23 +118,9 @@ ibool
cmp_dtuple_is_prefix_of_rec(
/*========================*/
/* out: TRUE if prefix */
- dtuple_t* dtuple, /* in: data tuple */
- rec_t* rec); /* in: physical record */
-/******************************************************************
-Compares a prefix of a data tuple to a prefix of a physical record for
-equality. If there are less fields in rec than parameter n_fields, FALSE
-is returned. NOTE that n_fields_cmp of dtuple does not affect this
-comparison. */
-
-ibool
-cmp_dtuple_rec_prefix_equal(
-/*========================*/
- /* out: TRUE if equal */
dtuple_t* dtuple, /* in: data tuple */
rec_t* rec, /* in: physical record */
- ulint n_fields); /* in: number of fields which should be
- compared; must not exceed the number of
- fields in dtuple */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*****************************************************************
This function is used to compare two physical records. Only the common
first fields are compared, and if an externally stored field is
@@ -146,6 +134,8 @@ cmp_rec_rec_with_match(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index, /* in: data dictionary index */
ulint* matched_fields, /* in/out: number of already completely
matched fields; when the function returns,
@@ -167,6 +157,8 @@ cmp_rec_rec(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index); /* in: data dictionary index */
diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic
index 75cb3ef04e8..b86534e0a6a 100644
--- a/innobase/include/rem0cmp.ic
+++ b/innobase/include/rem0cmp.ic
@@ -57,10 +57,13 @@ cmp_rec_rec(
first fields are compared */
rec_t* rec1, /* in: physical record */
rec_t* rec2, /* in: physical record */
+ const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */
+ const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */
dict_index_t* index) /* in: data dictionary index */
{
ulint match_f = 0;
ulint match_b = 0;
- return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b));
+ return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index,
+ &match_f, &match_b));
}
diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h
index 86bf263170f..134c37c8030 100644
--- a/innobase/include/rem0rec.h
+++ b/innobase/include/rem0rec.h
@@ -23,9 +23,23 @@ Created 5/30/1994 Heikki Tuuri
info bits of a record */
#define REC_INFO_MIN_REC_FLAG 0x10UL
-/* Number of extra bytes in a record, in addition to the data and the
-offsets */
-#define REC_N_EXTRA_BYTES 6
+/* Number of extra bytes in an old-style record,
+in addition to the data and the offsets */
+#define REC_N_OLD_EXTRA_BYTES 6
+/* Number of extra bytes in a new-style record,
+in addition to the data and the offsets */
+#define REC_N_NEW_EXTRA_BYTES 5
+
+/* Record status values */
+#define REC_STATUS_ORDINARY 0
+#define REC_STATUS_NODE_PTR 1
+#define REC_STATUS_INFIMUM 2
+#define REC_STATUS_SUPREMUM 3
+
+/* Number of elements that should be initially allocated for the
+offsets[] array, first passed to rec_get_offsets() */
+#define REC_OFFS_NORMAL_SIZE 100
+#define REC_OFFS_SMALL_SIZE 10
/**********************************************************
The following function is used to get the offset of the
@@ -36,7 +50,8 @@ rec_get_next_offs(
/*==============*/
/* out: the page offset of the next
chained record */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the next record offset field
of the record. */
@@ -45,17 +60,28 @@ void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint next); /* in: offset of the next record */
/**********************************************************
The following function is used to get the number of fields
-in the record. */
+in an old-style record. */
UNIV_INLINE
ulint
-rec_get_n_fields(
-/*=============*/
+rec_get_n_fields_old(
+/*=================*/
/* out: number of data fields */
rec_t* rec); /* in: physical record */
/**********************************************************
+The following function is used to get the number of fields
+in a record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index); /* in: record descriptor */
+/**********************************************************
The following function is used to get the number of records
owned by the previous directory record. */
UNIV_INLINE
@@ -63,7 +89,8 @@ ulint
rec_get_n_owned(
/*============*/
/* out: number of owned records */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the number of owned
records. */
@@ -72,6 +99,7 @@ void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint n_owned); /* in: the number of owned */
/**********************************************************
The following function is used to retrieve the info bits of
@@ -81,7 +109,8 @@ ulint
rec_get_info_bits(
/*==============*/
/* out: info bits */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the info bits of a record. */
UNIV_INLINE
@@ -89,15 +118,47 @@ void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint bits); /* in: info bits */
/**********************************************************
-Gets the value of the deleted falg in info bits. */
+The following function retrieves the status bits of a new-style record. */
UNIV_INLINE
-ibool
-rec_info_bits_get_deleted_flag(
-/*===========================*/
- /* out: TRUE if deleted flag set */
- ulint info_bits); /* in: info bits from a record */
+ulint
+rec_get_status(
+/*===========*/
+ /* out: status bits */
+ rec_t* rec); /* in: physical record */
+
+/**********************************************************
+The following function is used to set the status bits of a new-style record. */
+UNIV_INLINE
+void
+rec_set_status(
+/*===========*/
+ rec_t* rec, /* in: physical record */
+ ulint bits); /* in: info bits */
+
+/**********************************************************
+The following function is used to retrieve the info and status
+bits of a record. (Only compact records have status bits.) */
+UNIV_INLINE
+ulint
+rec_get_info_and_status_bits(
+/*=========================*/
+ /* out: info bits */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
+/**********************************************************
+The following function is used to set the info and status
+bits of a record. (Only compact records have status bits.) */
+UNIV_INLINE
+void
+rec_set_info_and_status_bits(
+/*=========================*/
+ rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
+ ulint bits); /* in: info bits */
+
/**********************************************************
The following function tells if record is delete marked. */
UNIV_INLINE
@@ -105,7 +166,8 @@ ibool
rec_get_deleted_flag(
/*=================*/
/* out: TRUE if delete marked */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the deleted bit. */
UNIV_INLINE
@@ -113,8 +175,25 @@ void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ibool flag); /* in: TRUE if delete marked */
/**********************************************************
+The following function tells if a new-style record is a node pointer. */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*=================*/
+ /* out: TRUE if node pointer */
+ rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function is used to flag a record as a node pointer. */
+UNIV_INLINE
+void
+rec_set_node_ptr_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag); /* in: TRUE if the record is a node pointer */
+/**********************************************************
The following function is used to get the order number
of the record in the heap of the index page. */
UNIV_INLINE
@@ -122,7 +201,8 @@ ulint
rec_get_heap_no(
/*=============*/
/* out: heap order number */
- rec_t* rec); /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp); /* in: TRUE=compact page format */
/**********************************************************
The following function is used to set the heap number
field in the record. */
@@ -131,6 +211,7 @@ void
rec_set_heap_no(
/*=============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint heap_no);/* in: the heap number */
/**********************************************************
The following function is used to test whether the data offsets
@@ -141,31 +222,65 @@ rec_get_1byte_offs_flag(
/*====================*/
/* out: TRUE if 1-byte form */
rec_t* rec); /* in: physical record */
+/**********************************************************
+The following function determines the offsets to each field
+in the record. It can reuse a previously allocated array. */
+
+ulint*
+rec_get_offsets_func(
+/*=================*/
+ /* out: the new offsets */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint* offsets,/* in: array consisting of offsets[0]
+ allocated elements, or an array from
+ rec_get_offsets(), or NULL */
+ ulint n_fields,/* in: maximum number of initialized fields
+ (ULINT_UNDEFINED if all fields) */
+ mem_heap_t** heap, /* in/out: memory heap */
+ const char* file, /* in: file name where called */
+ ulint line); /* in: line number where called */
+
+#define rec_get_offsets(rec,index,offsets,n,heap) \
+ rec_get_offsets_func(rec,index,offsets,n,heap,__FILE__,__LINE__)
+
+/****************************************************************
+Validates offsets returned by rec_get_offsets(). */
+UNIV_INLINE
+ibool
+rec_offs_validate(
+/*==============*/
+ /* out: TRUE if valid */
+ rec_t* rec, /* in: record or NULL */
+ dict_index_t* index, /* in: record descriptor or NULL */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/****************************************************************
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index,/* in: record descriptor */
+ ulint* offsets);/* in: array returned by rec_get_offsets() */
+
/****************************************************************
The following function is used to get a pointer to the nth
-data field in the record. */
+data field in an old-style record. */
byte*
-rec_get_nth_field(
-/*==============*/
+rec_get_nth_field_old(
+/*==================*/
/* out: pointer to the field */
rec_t* rec, /* in: record */
ulint n, /* in: index of the field */
ulint* len); /* out: length of the field; UNIV_SQL_NULL
if SQL null */
/****************************************************************
-Return field length or UNIV_SQL_NULL. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_len(
-/*==================*/
- /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
- rec_t* rec, /* in: record */
- ulint n); /* in: index of the field */
-/****************************************************************
-Gets the physical size of a field. Also an SQL null may have a field of
-size > 0, if the data type is of a fixed size. */
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size. */
UNIV_INLINE
ulint
rec_get_nth_field_size(
@@ -173,131 +288,185 @@ rec_get_nth_field_size(
/* out: field size in bytes */
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
-/***************************************************************
-Gets the value of the ith field extern storage bit. If it is TRUE
-it means that the field is stored on another page. */
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in an old-style record. */
+UNIV_INLINE
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index of the field */
+ ulint* len); /* out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+/**********************************************************
+Determine if the offsets are for a record in the new
+compact format. */
UNIV_INLINE
ibool
-rec_get_nth_field_extern_bit(
-/*=========================*/
- /* in: TRUE or FALSE */
- rec_t* rec, /* in: record */
- ulint i); /* in: ith field */
+rec_offs_comp(
+/*==========*/
+ /* out: TRUE if compact format */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**********************************************************
+Returns TRUE if the nth field of rec is SQL NULL. */
+UNIV_INLINE
+ibool
+rec_offs_nth_null(
+/*==============*/
+ /* out: TRUE if SQL NULL */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+/**********************************************************
+Returns TRUE if the extern bit is set in nth field of rec. */
+UNIV_INLINE
+ibool
+rec_offs_nth_extern(
+/*================*/
+ /* out: TRUE if externally stored */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+/**********************************************************
+Gets the physical size of a field. */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+ /* out: length of field */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n); /* in: nth field */
+
/**********************************************************
Returns TRUE if the extern bit is set in any of the fields
of rec. */
UNIV_INLINE
ibool
-rec_contains_externally_stored_field(
-/*=================================*/
- /* out: TRUE if a field is stored externally */
- rec_t* rec); /* in: record */
+rec_offs_any_extern(
+/*================*/
+ /* out: TRUE if a field is stored externally */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/***************************************************************
Sets the value of the ith field extern storage bit. */
-
+UNIV_INLINE
void
rec_set_nth_field_extern_bit(
/*=========================*/
- rec_t* rec, /* in: record */
- ulint i, /* in: ith field */
- ibool val, /* in: value to set */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page where
- rec is, or NULL; in the NULL case we do not
- write to log about the change */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
/***************************************************************
Sets TRUE the extern storage bits of fields mentioned in an array. */
void
rec_set_field_extern_bits(
/*======================*/
- rec_t* rec, /* in: record */
- ulint* vec, /* in: array of field numbers */
- ulint n_fields, /* in: number of fields numbers */
- mtr_t* mtr); /* in: mtr holding an X-latch to the page
- where rec is, or NULL; in the NULL case we
- do not write to log about the change */
-/****************************************************************
-The following function is used to get a copy of the nth
-data field in the record to a buffer. */
-UNIV_INLINE
-void
-rec_copy_nth_field(
-/*===============*/
- void* buf, /* in: pointer to the buffer */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ const ulint* vec, /* in: array of field numbers */
+ ulint n_fields,/* in: number of fields numbers */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
/***************************************************************
-This is used to modify the value of an already existing field in
-a physical record. The previous value must have exactly the same
-size as the new value. If len is UNIV_SQL_NULL then the field is
-treated as SQL null. */
+This is used to modify the value of an already existing field in a record.
+The previous value must have exactly the same size as the new value. If len
+is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
+records. For new-style records, len must not be UNIV_SQL_NULL. */
UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- void* data, /* in: pointer to the data if not SQL null */
- ulint len); /* in: length of the data or UNIV_SQL_NULL.
- If not SQL null, must have the same length as the
- previous value. If SQL null, previous value must be
- SQL null. */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index number of the field */
+ const void* data, /* in: pointer to the data if not SQL null */
+ ulint len); /* in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same
+ length as the previous value.
+ If SQL null, previous value must be
+ SQL null. */
/**************************************************************
-The following function returns the data size of a physical
+The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_data_size(
-/*==============*/
- /* out: size */
+rec_get_data_size_old(
+/*==================*/
+ /* out: size */
rec_t* rec); /* in: physical record */
/**************************************************************
+The following function returns the number of fields in a record. */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*===============*/
+ /* out: number of fields */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
+UNIV_INLINE
+ulint
+rec_offs_data_size(
+/*===============*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
Returns the total size of record minus data size of record.
The value returned by the function is the distance from record
start to record origin in bytes. */
UNIV_INLINE
ulint
-rec_get_extra_size(
-/*===============*/
- /* out: size */
- rec_t* rec); /* in: physical record */
-/**************************************************************
+rec_offs_extra_size(
+/*================*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/**************************************************************
Returns the total size of a physical record. */
UNIV_INLINE
ulint
-rec_get_size(
-/*=========*/
- /* out: size */
- rec_t* rec); /* in: physical record */
+rec_offs_size(
+/*==========*/
+ /* out: size */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**************************************************************
Returns a pointer to the start of the record. */
UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec); /* in: pointer to record */
+ /* out: pointer to start */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/**************************************************************
Returns a pointer to the end of the record. */
UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec); /* in: pointer to record */
+ /* out: pointer to end */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Copies a physical record to a buffer. */
UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copied record */
- void* buf, /* in: buffer */
- rec_t* rec); /* in: physical record */
+ /* out: pointer to the origin of the copy */
+ void* buf, /* in: buffer */
+ const rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/******************************************************************
Copies the first n fields of a physical record to a new physical record in
a buffer. */
@@ -305,49 +474,43 @@ a buffer. */
rec_t*
rec_copy_prefix_to_buf(
/*===================*/
- /* out, own: copied record */
- rec_t* rec, /* in: physical record */
- ulint n_fields, /* in: number of fields to copy */
- byte** buf, /* in/out: memory buffer for the copied prefix,
- or NULL */
- ulint* buf_size); /* in/out: buffer size */
+ /* out, own: copied record */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint n_fields, /* in: number of fields to copy */
+ byte** buf, /* in/out: memory buffer
+ for the copied prefix, or NULL */
+ ulint* buf_size); /* in/out: buffer size */
/****************************************************************
Folds a prefix of a physical record to a ulint. */
UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- ulint n_fields, /* in: number of complete fields to fold */
- ulint n_bytes, /* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id); /* in: index tree id */
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint n_fields, /* in: number of complete
+ fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold
+ in an incomplete last field */
+ dulint tree_id); /* in: index tree id */
/*************************************************************
Builds a physical record out of a data tuple and stores it beginning from
address destination. */
-UNIV_INLINE
+
rec_t*
rec_convert_dtuple_to_rec(
/*======================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple); /* in: data tuple */
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-address destination. */
-
-rec_t*
-rec_convert_dtuple_to_rec_low(
-/*==========================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple, /* in: data tuple */
- ulint data_size); /* in: data size of dtuple */
+ /* out: pointer to the origin
+ of physical record */
+ byte* buf, /* in: start address of the
+ physical record */
+ dict_index_t* index, /* in: record descriptor */
+ dtuple_t* dtuple);/* in: data tuple */
/**************************************************************
-Returns the extra size of a physical record if we know its
+Returns the extra size of an old-style physical record if we know its
data size and number of fields. */
UNIV_INLINE
ulint
@@ -355,7 +518,8 @@ rec_get_converted_extra_size(
/*=========================*/
/* out: extra size */
ulint data_size, /* in: data size */
- ulint n_fields); /* in: number of fields */
+ ulint n_fields) /* in: number of fields */
+ __attribute__((const));
/**************************************************************
The following function returns the size of a data tuple when converted to
a physical record. */
@@ -364,6 +528,7 @@ ulint
rec_get_converted_size(
/*===================*/
/* out: size */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* dtuple);/* in: data tuple */
/******************************************************************
Copies the first n fields of a physical record to a data tuple.
@@ -374,6 +539,7 @@ rec_copy_prefix_to_dtuple(
/*======================*/
dtuple_t* tuple, /* in: data tuple */
rec_t* rec, /* in: physical record */
+ dict_index_t* index, /* in: record descriptor */
ulint n_fields, /* in: number of fields to copy */
mem_heap_t* heap); /* in: memory heap */
/*******************************************************************
@@ -382,16 +548,35 @@ Validates the consistency of a physical record. */
ibool
rec_validate(
/*=========*/
- /* out: TRUE if ok */
- rec_t* rec); /* in: physical record */
+ /* out: TRUE if ok */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
+/*******************************************************************
+Prints an old-style physical record. */
+
+void
+rec_print_old(
+/*==========*/
+ FILE* file, /* in: file where to print */
+ rec_t* rec); /* in: physical record */
+/*******************************************************************
+Prints a physical record. */
+
+void
+rec_print_new(
+/*==========*/
+ FILE* file, /* in: file where to print */
+ rec_t* rec, /* in: physical record */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Prints a physical record. */
void
rec_print(
/*======*/
- FILE* file, /* in: file where to print */
- rec_t* rec); /* in: physical record */
+ FILE* file, /* in: file where to print */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index); /* in: record descriptor */
#define REC_INFO_BITS 6 /* This is single byte bit-field */
diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic
index c36bf8f6d6e..2593fb8edeb 100644
--- a/innobase/include/rem0rec.ic
+++ b/innobase/include/rem0rec.ic
@@ -8,9 +8,19 @@ Created 5/30/1994 Heikki Tuuri
#include "mach0data.h"
#include "ut0byte.h"
+#include "dict0dict.h"
-/* Offsets of the bit-fields in the record. NOTE! In the table the most
-significant bytes and bits are written below less significant.
+/* Compact flag ORed to the extra size returned by rec_get_offsets() */
+#define REC_OFFS_COMPACT ((ulint) 1 << 31)
+/* SQL NULL flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_SQL_NULL ((ulint) 1 << 31)
+/* External flag in offsets returned by rec_get_offsets() */
+#define REC_OFFS_EXTERNAL ((ulint) 1 << 30)
+/* Mask for offsets returned by rec_get_offsets() */
+#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1)
+
+/* Offsets of the bit-fields in an old-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
(1) byte offset (2) bit usage within byte
downward from
@@ -25,6 +35,35 @@ significant bytes and bits are written below less significant.
4 bits info bits
*/
+/* Offsets of the bit-fields in a new-style record. NOTE! In the table the
+most significant bytes and bits are written below less significant.
+
+ (1) byte offset (2) bit usage within byte
+ downward from
+ origin -> 1 8 bits relative offset of next record
+ 2 8 bits relative offset of next record
+ the relative offset is an unsigned 16-bit
+ integer:
+ (offset_of_next_record
+ - offset_of_this_record) mod 64Ki,
+ where mod is the modulo as a non-negative
+ number;
+ we can calculate the the offset of the next
+ record with the formula:
+ relative_offset + offset_of_this_record
+ mod UNIV_PAGE_SIZE
+ 3 3 bits status:
+ 000=conventional record
+ 001=node pointer record (inside B-tree)
+ 010=infimum record
+ 011=supremum record
+ 1xx=reserved
+ 5 bits heap number
+ 4 8 bits heap number
+ 5 4 bits n_owned
+ 4 bits info bits
+*/
+
/* We list the byte offsets from the origin of the record, the mask,
and the shift needed to obtain each bit-field of the record. */
@@ -32,22 +71,30 @@ and the shift needed to obtain each bit-field of the record. */
#define REC_NEXT_MASK 0xFFFFUL
#define REC_NEXT_SHIFT 0
-#define REC_SHORT 3 /* This is single byte bit-field */
-#define REC_SHORT_MASK 0x1UL
-#define REC_SHORT_SHIFT 0
+#define REC_OLD_SHORT 3 /* This is single byte bit-field */
+#define REC_OLD_SHORT_MASK 0x1UL
+#define REC_OLD_SHORT_SHIFT 0
+
+#define REC_OLD_N_FIELDS 4
+#define REC_OLD_N_FIELDS_MASK 0x7FEUL
+#define REC_OLD_N_FIELDS_SHIFT 1
-#define REC_N_FIELDS 4
-#define REC_N_FIELDS_MASK 0x7FEUL
-#define REC_N_FIELDS_SHIFT 1
+#define REC_NEW_STATUS 3 /* This is single byte bit-field */
+#define REC_NEW_STATUS_MASK 0x7UL
+#define REC_NEW_STATUS_SHIFT 0
-#define REC_HEAP_NO 5
+#define REC_OLD_HEAP_NO 5
+#define REC_NEW_HEAP_NO 4
#define REC_HEAP_NO_MASK 0xFFF8UL
#define REC_HEAP_NO_SHIFT 3
-#define REC_N_OWNED 6 /* This is single byte bit-field */
+#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */
+#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */
#define REC_N_OWNED_MASK 0xFUL
#define REC_N_OWNED_SHIFT 0
+#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */
+#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */
#define REC_INFO_BITS_MASK 0xF0UL
#define REC_INFO_BITS_SHIFT 0
@@ -65,26 +112,24 @@ a field stored to another page: */
#define REC_2BYTE_EXTERN_MASK 0x4000UL
-/****************************************************************
-Return field length or UNIV_SQL_NULL. */
-UNIV_INLINE
-ulint
-rec_get_nth_field_len(
-/*==================*/
- /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
- rec_t* rec, /* in: record */
- ulint n) /* in: index of the field */
-{
- ulint len;
-
- rec_get_nth_field(rec, n, &len);
-
- return(len);
-}
+#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \
+ ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \
+ ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \
+ ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \
+ ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \
+ ^ 0xFFFFFFFFUL
+# error "sum of old-style masks != 0xFFFFFFFFUL"
+#endif
+#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \
+ ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \
+ ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \
+ ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \
+ ^ 0xFFFFFFUL
+# error "sum of new-style masks != 0xFFFFFFUL"
+#endif
/***************************************************************
-Sets the value of the ith field SQL null bit. */
+Sets the value of the ith field SQL null bit of an old-style record. */
void
rec_set_nth_field_null_bit(
@@ -93,8 +138,8 @@ rec_set_nth_field_null_bit(
ulint i, /* in: ith field */
ibool val); /* in: value to set */
/***************************************************************
-Sets a record field to SQL null. The physical size of the field is not
-changed. */
+Sets an old-style record field to SQL null.
+The physical size of the field is not changed. */
void
rec_set_nth_field_sql_null(
@@ -102,6 +147,32 @@ rec_set_nth_field_sql_null(
rec_t* rec, /* in: record */
ulint n); /* in: index of the field */
+/***************************************************************
+Sets the value of the ith field extern storage bit of an old-style record. */
+
+void
+rec_set_nth_field_extern_bit_old(
+/*=============================*/
+ rec_t* rec, /* in: old-style record */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page where
+ rec is, or NULL; in the NULL case we do not
+ write to log about the change */
+/***************************************************************
+Sets the value of the ith field extern storage bit of a new-style record. */
+
+void
+rec_set_nth_field_extern_bit_new(
+/*=============================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint ith, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr); /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
+
/**********************************************************
Gets a bit field from within 1 byte. */
UNIV_INLINE
@@ -131,7 +202,7 @@ rec_set_bit_field_1(
ulint shift) /* in: shift right applied after masking */
{
ut_ad(rec);
- ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
ut_ad(mask);
ut_ad(mask <= 0xFFUL);
ut_ad(((mask >> shift) << shift) == mask);
@@ -171,30 +242,14 @@ rec_set_bit_field_2(
ulint shift) /* in: shift right applied after masking */
{
ut_ad(rec);
- ut_ad(offs <= REC_N_EXTRA_BYTES);
+ ut_ad(offs <= REC_N_OLD_EXTRA_BYTES);
ut_ad(mask > 0xFFUL);
ut_ad(mask <= 0xFFFFUL);
ut_ad((mask >> shift) & 1);
ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1)));
ut_ad(((mask >> shift) << shift) == mask);
ut_ad(((val << shift) & mask) == (val << shift));
-#ifdef UNIV_DEBUG
- {
- ulint m;
-
- /* The following assertion checks that the masks of currently
- defined bit-fields in bytes 3-6 do not overlap. */
- m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3)))
- + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4)))
- + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4)))
- + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3)))
- + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3))));
- if (m != ut_dbg_zero + 0xFFFFFFFFUL) {
- fprintf(stderr, "Sum of masks %lx\n", m);
- ut_error;
- }
- }
-#endif
+
mach_write_to_2(rec - offs,
(mach_read_from_2(rec - offs) & ~mask)
| (val << shift));
@@ -207,18 +262,46 @@ UNIV_INLINE
ulint
rec_get_next_offs(
/*==============*/
- /* out: the page offset of the next chained record */
- rec_t* rec) /* in: physical record */
+ /* out: the page offset of the next chained record, or
+ 0 if none */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
- ulint ret;
-
- ut_ad(rec);
-
- ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK,
- REC_NEXT_SHIFT);
- ut_ad(ret < UNIV_PAGE_SIZE);
+ ulint field_value;
+
+ ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+ ut_ad(REC_NEXT_SHIFT == 0);
+
+ field_value = mach_read_from_2(rec - REC_NEXT);
+
+ if (comp) {
+#if UNIV_PAGE_SIZE <= 32768
+ /* Note that for 64 KiB pages, field_value can 'wrap around'
+ and the debug assertion is not valid */
+
+ /* In the following assertion, field_value is interpreted
+ as signed 16-bit integer in 2's complement arithmetics.
+ If all platforms defined int16_t in the standard headers,
+ the expression could be written simpler as
+ (int16_t) field_value + ut_align_offset(...) < UNIV_PAGE_SIZE
+ */
+ ut_ad((field_value >= 32768
+ ? field_value - 65536
+ : field_value)
+ + ut_align_offset(rec, UNIV_PAGE_SIZE)
+ < UNIV_PAGE_SIZE);
+#endif
+ if (field_value == 0) {
+
+ return(0);
+ }
+
+ return(ut_align_offset(rec + field_value, UNIV_PAGE_SIZE));
+ } else {
+ ut_ad(field_value < UNIV_PAGE_SIZE);
- return(ret);
+ return(field_value);
+ }
}
/**********************************************************
@@ -229,21 +312,42 @@ void
rec_set_next_offs(
/*==============*/
rec_t* rec, /* in: physical record */
- ulint next) /* in: offset of the next record */
+ ibool comp, /* in: TRUE=compact page format */
+ ulint next) /* in: offset of the next record, or 0 if none */
{
ut_ad(rec);
ut_ad(UNIV_PAGE_SIZE > next);
+ ut_ad(REC_NEXT_MASK == 0xFFFFUL);
+ ut_ad(REC_NEXT_SHIFT == 0);
+
+ if (comp) {
+ ulint field_value;
+
+ if (next) {
+ /* The following two statements calculate
+ next - offset_of_rec mod 64Ki, where mod is the modulo
+ as a non-negative number */
+
+ field_value = (ulint)((lint)next
+ - (lint)ut_align_offset(rec, UNIV_PAGE_SIZE));
+ field_value &= REC_NEXT_MASK;
+ } else {
+ field_value = 0;
+ }
- rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK,
- REC_NEXT_SHIFT);
+ mach_write_to_2(rec - REC_NEXT, field_value);
+ } else {
+ mach_write_to_2(rec - REC_NEXT, next);
+ }
}
/**********************************************************
-The following function is used to get the number of fields in the record. */
+The following function is used to get the number of fields
+in an old-style record. */
UNIV_INLINE
ulint
-rec_get_n_fields(
-/*=============*/
+rec_get_n_fields_old(
+/*=================*/
/* out: number of data fields */
rec_t* rec) /* in: physical record */
{
@@ -251,8 +355,8 @@ rec_get_n_fields(
ut_ad(rec);
- ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK,
- REC_N_FIELDS_SHIFT);
+ ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS,
+ REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
ut_ad(ret <= REC_MAX_N_FIELDS);
ut_ad(ret > 0);
@@ -260,12 +364,12 @@ rec_get_n_fields(
}
/**********************************************************
-The following function is used to set the number of fields field in the
-record. */
+The following function is used to set the number of fields
+in an old-style record. */
UNIV_INLINE
void
-rec_set_n_fields(
-/*=============*/
+rec_set_n_fields_old(
+/*=================*/
rec_t* rec, /* in: physical record */
ulint n_fields) /* in: the number of fields */
{
@@ -273,8 +377,58 @@ rec_set_n_fields(
ut_ad(n_fields <= REC_MAX_N_FIELDS);
ut_ad(n_fields > 0);
- rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK,
- REC_N_FIELDS_SHIFT);
+ rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS,
+ REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT);
+}
+
+/**********************************************************
+The following function retrieves the status bits of a new-style record. */
+UNIV_INLINE
+ulint
+rec_get_status(
+/*===========*/
+ /* out: status bits */
+ rec_t* rec) /* in: physical record */
+{
+ ulint ret;
+
+ ut_ad(rec);
+
+ ret = rec_get_bit_field_1(rec, REC_NEW_STATUS,
+ REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
+ ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0);
+
+ return(ret);
+}
+
+/**********************************************************
+The following function is used to get the number of fields
+in a record. */
+UNIV_INLINE
+ulint
+rec_get_n_fields(
+/*=============*/
+ /* out: number of data fields */
+ rec_t* rec, /* in: physical record */
+ dict_index_t* index) /* in: record descriptor */
+{
+ ut_ad(rec);
+ ut_ad(index);
+ if (!index->table->comp) {
+ return(rec_get_n_fields_old(rec));
+ }
+ switch (rec_get_status(rec)) {
+ case REC_STATUS_ORDINARY:
+ return(dict_index_get_n_fields(index));
+ case REC_STATUS_NODE_PTR:
+ return(dict_index_get_n_unique_in_tree(index) + 1);
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ return(1);
+ default:
+ ut_error;
+ return(ULINT_UNDEFINED);
+ }
}
/**********************************************************
@@ -285,14 +439,16 @@ ulint
rec_get_n_owned(
/*============*/
/* out: number of owned records */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK,
- REC_N_OWNED_SHIFT);
+ ret = rec_get_bit_field_1(rec,
+ comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
ut_ad(ret <= REC_MAX_N_OWNED);
return(ret);
@@ -305,13 +461,15 @@ void
rec_set_n_owned(
/*============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint n_owned) /* in: the number of owned */
{
ut_ad(rec);
ut_ad(n_owned <= REC_MAX_N_OWNED);
- rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK,
- REC_N_OWNED_SHIFT);
+ rec_set_bit_field_1(rec, n_owned,
+ comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED,
+ REC_N_OWNED_MASK, REC_N_OWNED_SHIFT);
}
/**********************************************************
@@ -321,14 +479,16 @@ ulint
rec_get_info_bits(
/*==============*/
/* out: info bits */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK,
- REC_INFO_BITS_SHIFT);
+ ret = rec_get_bit_field_1(rec,
+ comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
ut_ad((ret & ~REC_INFO_BITS_MASK) == 0);
return(ret);
@@ -341,30 +501,78 @@ void
rec_set_info_bits(
/*==============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint bits) /* in: info bits */
{
ut_ad(rec);
ut_ad((bits & ~REC_INFO_BITS_MASK) == 0);
- rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK,
- REC_INFO_BITS_SHIFT);
+ rec_set_bit_field_1(rec, bits,
+ comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS,
+ REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT);
}
/**********************************************************
-Gets the value of the deleted flag in info bits. */
+The following function is used to set the status bits of a new-style record. */
UNIV_INLINE
-ibool
-rec_info_bits_get_deleted_flag(
-/*===========================*/
- /* out: TRUE if deleted flag set */
- ulint info_bits) /* in: info bits from a record */
+void
+rec_set_status(
+/*===========*/
+ rec_t* rec, /* in: physical record */
+ ulint bits) /* in: info bits */
{
- if (info_bits & REC_INFO_DELETED_FLAG) {
+ ut_ad(rec);
+ ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0);
- return(TRUE);
- }
+ rec_set_bit_field_1(rec, bits, REC_NEW_STATUS,
+ REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT);
+}
- return(FALSE);
+/**********************************************************
+The following function is used to retrieve the info and status
+bits of a record. (Only compact records have status bits.) */
+UNIV_INLINE
+ulint
+rec_get_info_and_status_bits(
+/*=========================*/
+ /* out: info bits */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
+{
+ ulint bits;
+#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
+& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
+# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
+#endif
+ if (comp) {
+ bits = rec_get_info_bits(rec, TRUE) | rec_get_status(rec);
+ } else {
+ bits = rec_get_info_bits(rec, FALSE);
+ ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
+ }
+ return(bits);
+}
+/**********************************************************
+The following function is used to set the info and status
+bits of a record. (Only compact records have status bits.) */
+UNIV_INLINE
+void
+rec_set_info_and_status_bits(
+/*=========================*/
+ rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
+ ulint bits) /* in: info bits */
+{
+#if (REC_NEW_STATUS_MASK >> REC_NEW_STATUS_SHIFT) \
+& (REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)
+# error "REC_NEW_STATUS_MASK and REC_INFO_BITS_MASK overlap"
+#endif
+ if (comp) {
+ rec_set_status(rec, bits & REC_NEW_STATUS_MASK);
+ } else {
+ ut_ad(!(bits & ~(REC_INFO_BITS_MASK >> REC_INFO_BITS_SHIFT)));
+ }
+ rec_set_info_bits(rec, comp, bits & ~REC_NEW_STATUS_MASK);
}
/**********************************************************
@@ -374,9 +582,10 @@ ibool
rec_get_deleted_flag(
/*=================*/
/* out: TRUE if delete marked */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
- if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) {
+ if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec, comp)) {
return(TRUE);
}
@@ -391,6 +600,7 @@ void
rec_set_deleted_flag(
/*=================*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ibool flag) /* in: TRUE if delete marked */
{
ulint old_val;
@@ -399,7 +609,7 @@ rec_set_deleted_flag(
ut_ad(TRUE == 1);
ut_ad(flag <= TRUE);
- old_val = rec_get_info_bits(rec);
+ old_val = rec_get_info_bits(rec, comp);
if (flag) {
new_val = REC_INFO_DELETED_FLAG | old_val;
@@ -407,7 +617,39 @@ rec_set_deleted_flag(
new_val = ~REC_INFO_DELETED_FLAG & old_val;
}
- rec_set_info_bits(rec, new_val);
+ rec_set_info_bits(rec, comp, new_val);
+}
+
+/**********************************************************
+The following function tells if a new-style record is a node pointer. */
+UNIV_INLINE
+ibool
+rec_get_node_ptr_flag(
+/*=================*/
+ /* out: TRUE if node pointer */
+ rec_t* rec) /* in: physical record */
+{
+ return(REC_STATUS_NODE_PTR == rec_get_status(rec));
+}
+
+/**********************************************************
+The following function is used to flag a record as a node pointer. */
+UNIV_INLINE
+void
+rec_set_node_ptr_flag(
+/*=================*/
+ rec_t* rec, /* in: physical record */
+ ibool flag) /* in: TRUE if the record is a node pointer */
+{
+ ulint status;
+ ut_ad(flag <= TRUE);
+ ut_ad(REC_STATUS_NODE_PTR >= rec_get_status(rec));
+ if (flag) {
+ status = REC_STATUS_NODE_PTR;
+ } else {
+ status = REC_STATUS_ORDINARY;
+ }
+ rec_set_status(rec, status);
}
/**********************************************************
@@ -418,14 +660,16 @@ ulint
rec_get_heap_no(
/*=============*/
/* out: heap order number */
- rec_t* rec) /* in: physical record */
+ rec_t* rec, /* in: physical record */
+ ibool comp) /* in: TRUE=compact page format */
{
ulint ret;
ut_ad(rec);
- ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK,
- REC_HEAP_NO_SHIFT);
+ ret = rec_get_bit_field_2(rec,
+ comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
ut_ad(ret <= REC_MAX_HEAP_NO);
return(ret);
@@ -438,12 +682,14 @@ void
rec_set_heap_no(
/*=============*/
rec_t* rec, /* in: physical record */
+ ibool comp, /* in: TRUE=compact page format */
ulint heap_no)/* in: the heap number */
{
ut_ad(heap_no <= REC_MAX_HEAP_NO);
- rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK,
- REC_HEAP_NO_SHIFT);
+ rec_set_bit_field_2(rec, heap_no,
+ comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO,
+ REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT);
}
/**********************************************************
@@ -456,10 +702,12 @@ rec_get_1byte_offs_flag(
/* out: TRUE if 1-byte form */
rec_t* rec) /* in: physical record */
{
- ut_ad(TRUE == 1);
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
- return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK,
- REC_SHORT_SHIFT));
+ return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+ REC_OLD_SHORT_SHIFT));
}
/**********************************************************
@@ -471,11 +719,13 @@ rec_set_1byte_offs_flag(
rec_t* rec, /* in: physical record */
ibool flag) /* in: TRUE if 1byte form */
{
- ut_ad(TRUE == 1);
+#if TRUE != 1
+#error "TRUE != 1"
+#endif
ut_ad(flag <= TRUE);
- rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK,
- REC_SHORT_SHIFT);
+ rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK,
+ REC_OLD_SHORT_SHIFT);
}
/**********************************************************
@@ -492,9 +742,9 @@ rec_1_get_field_end_info(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1)));
+ return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1)));
}
/**********************************************************
@@ -511,68 +761,289 @@ rec_2_get_field_end_info(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2)));
+ return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2)));
}
-/***************************************************************
-Gets the value of the ith field extern storage bit. If it is TRUE
-it means that the field is stored on another page. */
+#ifdef UNIV_DEBUG
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 4
+#else /* UNIV_DEBUG */
+/* Length of the rec_get_offsets() header */
+# define REC_OFFS_HEADER_SIZE 2
+#endif /* UNIV_DEBUG */
+
+/* Get the base address of offsets. The extra_size is stored at
+this position, and following positions hold the end offsets of
+the fields. */
+#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE)
+
+/**************************************************************
+The following function returns the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+ulint
+rec_offs_get_n_alloc(
+/*=================*/
+ /* out: number of elements */
+ const ulint* offsets)/* in: array for rec_get_offsets() */
+{
+ ulint n_alloc;
+ ut_ad(offsets);
+ n_alloc = offsets[0];
+ ut_ad(n_alloc > 0);
+ return(n_alloc);
+}
+
+/**************************************************************
+The following function sets the number of allocated elements
+for an array of offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_alloc(
+/*=================*/
+ ulint* offsets, /* in: array for rec_get_offsets() */
+ ulint n_alloc) /* in: number of elements */
+{
+ ut_ad(offsets);
+ ut_ad(n_alloc > 0);
+ offsets[0] = n_alloc;
+}
+
+/**************************************************************
+The following function returns the number of fields in a record. */
+UNIV_INLINE
+ulint
+rec_offs_n_fields(
+/*===============*/
+ /* out: number of fields */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint n_fields;
+ ut_ad(offsets);
+ n_fields = offsets[1];
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+ <= rec_offs_get_n_alloc(offsets));
+ return(n_fields);
+}
+
+/****************************************************************
+Validates offsets returned by rec_get_offsets(). */
UNIV_INLINE
ibool
-rec_get_nth_field_extern_bit(
-/*=========================*/
- /* in: TRUE or FALSE */
- rec_t* rec, /* in: record */
- ulint i) /* in: ith field */
+rec_offs_validate(
+/*==============*/
+ /* out: TRUE if valid */
+ rec_t* rec, /* in: record or NULL */
+ dict_index_t* index, /* in: record descriptor or NULL */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint info;
+ ulint i = rec_offs_n_fields(offsets);
+ ulint last = ULINT_MAX;
+ ibool comp = (*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0;
+
+ if (rec) {
+ ut_ad((ulint) rec == offsets[2]);
+ if (!comp) {
+ ut_a(rec_get_n_fields_old(rec) >= i);
+ }
+ }
+ if (index) {
+ ulint max_n_fields;
+ ut_ad((ulint) index == offsets[3]);
+ max_n_fields = ut_max(
+ dict_index_get_n_fields(index),
+ dict_index_get_n_unique_in_tree(index) + 1);
+ if (comp && rec) {
+ switch (rec_get_status(rec)) {
+ case REC_STATUS_ORDINARY:
+ break;
+ case REC_STATUS_NODE_PTR:
+ max_n_fields =
+ dict_index_get_n_unique_in_tree(index) + 1;
+ break;
+ case REC_STATUS_INFIMUM:
+ case REC_STATUS_SUPREMUM:
+ max_n_fields = 1;
+ break;
+ default:
+ ut_error;
+ }
+ }
+ /* index->n_def == 0 for dummy indexes if !comp */
+ ut_a(!comp || index->n_def);
+ ut_a(!index->n_def || i <= max_n_fields);
+ }
+ while (i--) {
+ ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK;
+ ut_a(curr <= last);
+ last = curr;
+ }
+ return(TRUE);
+}
+/****************************************************************
+Updates debug data in offsets, in order to avoid bogus
+rec_offs_validate() failures. */
+UNIV_INLINE
+void
+rec_offs_make_valid(
+/*================*/
+ rec_t* rec __attribute__((unused)),
+ /* in: record */
+ dict_index_t* index __attribute__((unused)),
+ /* in: record descriptor */
+ ulint* offsets __attribute__((unused)))
+ /* in: array returned by rec_get_offsets() */
+{
+#ifdef UNIV_DEBUG
+ ut_ad(rec_get_n_fields(rec, index) >= rec_offs_n_fields(offsets));
+ offsets[2] = (ulint) rec;
+ offsets[3] = (ulint) index;
+#endif /* UNIV_DEBUG */
+}
- if (rec_get_1byte_offs_flag(rec)) {
+/****************************************************************
+The following function is used to get a pointer to the nth
+data field in an old-style record. */
+UNIV_INLINE
+byte*
+rec_get_nth_field(
+/*==============*/
+ /* out: pointer to the field */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index of the field */
+ ulint* len) /* out: length of the field; UNIV_SQL_NULL
+ if SQL null */
+{
+ byte* field;
+ ulint length;
+ ut_ad(rec);
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ ut_ad(len);
- return(FALSE);
+ if (n == 0) {
+ field = rec;
+ } else {
+ field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK);
}
- info = rec_2_get_field_end_info(rec, i);
+ length = rec_offs_base(offsets)[1 + n];
- if (info & REC_2BYTE_EXTERN_MASK) {
- return(TRUE);
+ if (length & REC_OFFS_SQL_NULL) {
+ length = UNIV_SQL_NULL;
+ } else {
+ length &= REC_OFFS_MASK;
+ length -= field - rec;
}
- return(FALSE);
+ *len = length;
+ return(field);
}
/**********************************************************
-Returns TRUE if the extern bit is set in any of the fields
-of rec. */
+Determine if the offsets are for a record in the new
+compact format. */
UNIV_INLINE
ibool
-rec_contains_externally_stored_field(
-/*=================================*/
- /* out: TRUE if a field is stored externally */
- rec_t* rec) /* in: record */
+rec_offs_comp(
+/*==========*/
+ /* out: TRUE if compact format */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n;
- ulint i;
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(FALSE);
- }
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ return((*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0);
+}
- n = rec_get_n_fields(rec);
+/**********************************************************
+Returns TRUE if the nth field of rec is SQL NULL. */
+UNIV_INLINE
+ibool
+rec_offs_nth_null(
+/*==============*/
+ /* out: TRUE if SQL NULL */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL) != 0);
+}
+/**********************************************************
+Returns TRUE if the extern bit is set in nth field of rec. */
+UNIV_INLINE
+ibool
+rec_offs_nth_extern(
+/*================*/
+ /* out: TRUE if externally stored */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL) != 0);
+}
- for (i = 0; i < n; i++) {
- if (rec_get_nth_field_extern_bit(rec, i)) {
+/**********************************************************
+Gets the physical size of a field. */
+UNIV_INLINE
+ulint
+rec_offs_nth_size(
+/*==============*/
+ /* out: length of field */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n) /* in: nth field */
+{
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ ut_ad(n < rec_offs_n_fields(offsets));
+ return((rec_offs_base(offsets)[1 + n] - rec_offs_base(offsets)[n])
+ & REC_OFFS_MASK);
+}
+/**********************************************************
+Returns TRUE if the extern bit is set in any of the fields
+of an old-style record. */
+UNIV_INLINE
+ibool
+rec_offs_any_extern(
+/*================*/
+ /* out: TRUE if a field is stored externally */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint i;
+ for (i = rec_offs_n_fields(offsets); i--; ) {
+ if (rec_offs_nth_extern(offsets, i)) {
return(TRUE);
}
}
-
return(FALSE);
}
+/***************************************************************
+Sets the value of the ith field extern storage bit. */
+UNIV_INLINE
+void
+rec_set_nth_field_extern_bit(
+/*=========================*/
+ rec_t* rec, /* in: record */
+ dict_index_t* index, /* in: record descriptor */
+ ulint i, /* in: ith field */
+ ibool val, /* in: value to set */
+ mtr_t* mtr) /* in: mtr holding an X-latch to the page
+ where rec is, or NULL; in the NULL case
+ we do not write to log about the change */
+{
+ if (index->table->comp) {
+ rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr);
+ } else {
+ rec_set_nth_field_extern_bit_old(rec, i, val, mtr);
+ }
+}
+
/**********************************************************
Returns the offset of n - 1th field end if the record is stored in the 1-byte
offsets form. If the field is SQL null, the flag is ORed in the returned
@@ -589,9 +1060,9 @@ rec_1_get_prev_field_end_info(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
- return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n)));
+ return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n)));
}
/**********************************************************
@@ -608,9 +1079,9 @@ rec_2_get_prev_field_end_info(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
- return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n)));
+ return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n)));
}
/**********************************************************
@@ -625,9 +1096,9 @@ rec_1_set_field_end_info(
ulint info) /* in: value to set */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info);
+ mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info);
}
/**********************************************************
@@ -642,9 +1113,9 @@ rec_2_set_field_end_info(
ulint info) /* in: value to set */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n < rec_get_n_fields(rec));
+ ut_ad(n < rec_get_n_fields_old(rec));
- mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info);
+ mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info);
}
/**********************************************************
@@ -659,7 +1130,7 @@ rec_1_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -682,7 +1153,7 @@ rec_2_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(!rec_get_1byte_offs_flag(rec));
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -707,7 +1178,7 @@ rec_get_field_start_offs(
ulint n) /* in: field index */
{
ut_ad(rec);
- ut_ad(n <= rec_get_n_fields(rec));
+ ut_ad(n <= rec_get_n_fields_old(rec));
if (n == 0) {
@@ -723,8 +1194,9 @@ rec_get_field_start_offs(
}
/****************************************************************
-Gets the physical size of a field. Also an SQL null may have a field of
-size > 0, if the data type is of a fixed size. */
+Gets the physical size of an old-style field.
+Also an SQL null may have a field of size > 0,
+if the data type is of a fixed size. */
UNIV_INLINE
ulint
rec_get_nth_field_size(
@@ -744,133 +1216,134 @@ rec_get_nth_field_size(
return(next_os - os);
}
-/****************************************************************
-The following function is used to get a copy of the nth data field in a
-record to a buffer. */
-UNIV_INLINE
-void
-rec_copy_nth_field(
-/*===============*/
- void* buf, /* in: pointer to the buffer */
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL
- null */
-{
- byte* ptr;
-
- ut_ad(buf && rec && len);
-
- ptr = rec_get_nth_field(rec, n, len);
-
- if (*len == UNIV_SQL_NULL) {
-
- return;
- }
-
- ut_memcpy(buf, ptr, *len);
-}
-
/***************************************************************
This is used to modify the value of an already existing field in a record.
The previous value must have exactly the same size as the new value. If len
-is UNIV_SQL_NULL then the field is treated as an SQL null. */
+is UNIV_SQL_NULL then the field is treated as an SQL null for old-style
+records. For new-style records, len must not be UNIV_SQL_NULL. */
UNIV_INLINE
void
rec_set_nth_field(
/*==============*/
- rec_t* rec, /* in: record */
- ulint n, /* in: index of the field */
- void* data, /* in: pointer to the data if not SQL null */
- ulint len) /* in: length of the data or UNIV_SQL_NULL */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint n, /* in: index number of the field */
+ const void* data, /* in: pointer to the data
+ if not SQL null */
+ ulint len) /* in: length of the data or UNIV_SQL_NULL.
+ If not SQL null, must have the same
+ length as the previous value.
+ If SQL null, previous value must be
+ SQL null. */
{
byte* data2;
ulint len2;
- ut_ad((len == UNIV_SQL_NULL)
- || (rec_get_nth_field_size(rec, n) == len));
-
+ ut_ad(rec);
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+
if (len == UNIV_SQL_NULL) {
+ ut_ad(!rec_offs_comp(offsets));
rec_set_nth_field_sql_null(rec, n);
return;
}
- data2 = rec_get_nth_field(rec, n, &len2);
-
- ut_memcpy(data2, data, len);
-
+ data2 = rec_get_nth_field(rec, offsets, n, &len2);
if (len2 == UNIV_SQL_NULL) {
-
+ ut_ad(!rec_offs_comp(offsets));
rec_set_nth_field_null_bit(rec, n, FALSE);
+ ut_ad(len == rec_get_nth_field_size(rec, n));
+ } else {
+ ut_ad(len2 == len);
}
+
+ ut_memcpy(data2, data, len);
}
/**************************************************************
-The following function returns the data size of a physical
+The following function returns the data size of an old-style physical
record, that is the sum of field lengths. SQL null fields
are counted as length 0 fields. The value returned by the function
is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_data_size(
-/*==============*/
- /* out: size */
+rec_get_data_size_old(
+/*==================*/
+ /* out: size */
rec_t* rec) /* in: physical record */
{
ut_ad(rec);
- return(rec_get_field_start_offs(rec, rec_get_n_fields(rec)));
+ return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec)));
}
/**************************************************************
-Returns the total size of record minus data size of record. The value
-returned by the function is the distance from record start to record origin
-in bytes. */
+The following function sets the number of fields in offsets. */
+UNIV_INLINE
+void
+rec_offs_set_n_fields(
+/*==================*/
+ ulint* offsets, /* in: array returned by rec_get_offsets() */
+ ulint n_fields) /* in: number of fields */
+{
+ ut_ad(offsets);
+ ut_ad(n_fields > 0);
+ ut_ad(n_fields <= REC_MAX_N_FIELDS);
+ ut_ad(n_fields + REC_OFFS_HEADER_SIZE
+ <= rec_offs_get_n_alloc(offsets));
+ offsets[1] = n_fields;
+}
+
+/**************************************************************
+The following function returns the data size of a physical
+record, that is the sum of field lengths. SQL null fields
+are counted as length 0 fields. The value returned by the function
+is the distance from record origin to record end in bytes. */
UNIV_INLINE
ulint
-rec_get_extra_size(
+rec_offs_data_size(
/*===============*/
- /* out: size */
- rec_t* rec) /* in: physical record */
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
-
- ut_ad(rec);
-
- n_fields = rec_get_n_fields(rec);
+ ulint size;
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(REC_N_EXTRA_BYTES + n_fields);
- }
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)]
+ & REC_OFFS_MASK;
+ ut_ad(size < UNIV_PAGE_SIZE);
+ return(size);
+}
- return(REC_N_EXTRA_BYTES + 2 * n_fields);
+/**************************************************************
+Returns the total size of record minus data size of record. The value
+returned by the function is the distance from record start to record origin
+in bytes. */
+UNIV_INLINE
+ulint
+rec_offs_extra_size(
+/*================*/
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
+{
+ ulint size;
+ ut_ad(rec_offs_validate(NULL, NULL, offsets));
+ size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT;
+ ut_ad(size < UNIV_PAGE_SIZE);
+ return(size);
}
-/**************************************************************
+/**************************************************************
Returns the total size of a physical record. */
UNIV_INLINE
ulint
-rec_get_size(
-/*=========*/
- /* out: size */
- rec_t* rec) /* in: physical record */
+rec_offs_size(
+/*==========*/
+ /* out: size */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- ulint n_fields;
-
- ut_ad(rec);
-
- n_fields = rec_get_n_fields(rec);
-
- if (rec_get_1byte_offs_flag(rec)) {
-
- return(REC_N_EXTRA_BYTES + n_fields
- + rec_1_get_field_start_offs(rec, n_fields));
- }
-
- return(REC_N_EXTRA_BYTES + 2 * n_fields
- + rec_2_get_field_start_offs(rec, n_fields));
+ return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets));
}
/**************************************************************
@@ -879,10 +1352,11 @@ UNIV_INLINE
byte*
rec_get_end(
/*========*/
- /* out: pointer to end */
- rec_t* rec) /* in: pointer to record */
+ /* out: pointer to end */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- return(rec + rec_get_data_size(rec));
+ return(rec + rec_offs_data_size(offsets));
}
/**************************************************************
@@ -891,10 +1365,11 @@ UNIV_INLINE
byte*
rec_get_start(
/*==========*/
- /* out: pointer to start */
- rec_t* rec) /* in: pointer to record */
+ /* out: pointer to start */
+ rec_t* rec, /* in: pointer to record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
- return(rec - rec_get_extra_size(rec));
+ return(rec - rec_offs_extra_size(offsets));
}
/*******************************************************************
@@ -903,18 +1378,20 @@ UNIV_INLINE
rec_t*
rec_copy(
/*=====*/
- /* out: pointer to the origin of the copied record */
- void* buf, /* in: buffer */
- rec_t* rec) /* in: physical record */
+ /* out: pointer to the origin of the copy */
+ void* buf, /* in: buffer */
+ const rec_t* rec, /* in: physical record */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
ulint extra_len;
ulint data_len;
ut_ad(rec && buf);
- ut_ad(rec_validate(rec));
+ ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets));
+ ut_ad(rec_validate((rec_t*) rec, offsets));
- extra_len = rec_get_extra_size(rec);
- data_len = rec_get_data_size(rec);
+ extra_len = rec_offs_extra_size(offsets);
+ data_len = rec_offs_data_size(offsets);
ut_memcpy(buf, rec - extra_len, extra_len + data_len);
@@ -922,8 +1399,8 @@ rec_copy(
}
/**************************************************************
-Returns the extra size of a physical record if we know its data size and
-the number of fields. */
+Returns the extra size of an old-style physical record if we know its
+data size and number of fields. */
UNIV_INLINE
ulint
rec_get_converted_extra_size(
@@ -934,28 +1411,51 @@ rec_get_converted_extra_size(
{
if (data_size <= REC_1BYTE_OFFS_LIMIT) {
- return(REC_N_EXTRA_BYTES + n_fields);
+ return(REC_N_OLD_EXTRA_BYTES + n_fields);
}
- return(REC_N_EXTRA_BYTES + 2 * n_fields);
+ return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields);
}
/**************************************************************
The following function returns the size of a data tuple when converted to
+a new-style physical record. */
+
+ulint
+rec_get_converted_size_new(
+/*=======================*/
+ /* out: size */
+ dict_index_t* index, /* in: record descriptor */
+ dtuple_t* dtuple);/* in: data tuple */
+/**************************************************************
+The following function returns the size of a data tuple when converted to
a physical record. */
UNIV_INLINE
ulint
rec_get_converted_size(
/*===================*/
/* out: size */
+ dict_index_t* index, /* in: record descriptor */
dtuple_t* dtuple) /* in: data tuple */
{
ulint data_size;
ulint extra_size;
-
+
+ ut_ad(index);
ut_ad(dtuple);
ut_ad(dtuple_check_typed(dtuple));
+ ut_ad(index->type & DICT_UNIVERSAL
+ || dtuple_get_n_fields(dtuple) ==
+ (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK)
+ == REC_STATUS_NODE_PTR)
+ ? dict_index_get_n_unique_in_tree(index) + 1
+ : dict_index_get_n_fields(index)));
+
+ if (index->table->comp) {
+ return(rec_get_converted_size_new(index, dtuple));
+ }
+
data_size = dtuple_get_data_size(dtuple);
extra_size = rec_get_converted_extra_size(
@@ -971,12 +1471,15 @@ UNIV_INLINE
ulint
rec_fold(
/*=====*/
- /* out: the folded value */
- rec_t* rec, /* in: the physical record */
- ulint n_fields, /* in: number of complete fields to fold */
- ulint n_bytes, /* in: number of bytes to fold in an
- incomplete last field */
- dulint tree_id) /* in: index tree id */
+ /* out: the folded value */
+ rec_t* rec, /* in: the physical record */
+ const ulint* offsets, /* in: array returned by
+ rec_get_offsets() */
+ ulint n_fields, /* in: number of complete
+ fields to fold */
+ ulint n_bytes, /* in: number of bytes to fold
+ in an incomplete last field */
+ dulint tree_id) /* in: index tree id */
{
ulint i;
byte* data;
@@ -984,12 +1487,13 @@ rec_fold(
ulint fold;
ulint n_fields_rec;
- ut_ad(rec_validate(rec));
- ut_ad(n_fields <= rec_get_n_fields(rec));
- ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0));
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
+ ut_ad(rec_validate((rec_t*) rec, offsets));
ut_ad(n_fields + n_bytes > 0);
-
- n_fields_rec = rec_get_n_fields(rec);
+
+ n_fields_rec = rec_offs_n_fields(offsets);
+ ut_ad(n_fields <= n_fields_rec);
+ ut_ad(n_fields < n_fields_rec || n_bytes == 0);
if (n_fields > n_fields_rec) {
n_fields = n_fields_rec;
@@ -1002,7 +1506,7 @@ rec_fold(
fold = ut_fold_dulint(tree_id);
for (i = 0; i < n_fields; i++) {
- data = rec_get_nth_field(rec, i, &len);
+ data = rec_get_nth_field(rec, offsets, i, &len);
if (len != UNIV_SQL_NULL) {
fold = ut_fold_ulint_pair(fold,
@@ -1011,7 +1515,7 @@ rec_fold(
}
if (n_bytes > 0) {
- data = rec_get_nth_field(rec, i, &len);
+ data = rec_get_nth_field(rec, offsets, i, &len);
if (len != UNIV_SQL_NULL) {
if (len > n_bytes) {
@@ -1025,19 +1529,3 @@ rec_fold(
return(fold);
}
-
-/*************************************************************
-Builds a physical record out of a data tuple and stores it beginning from
-the address destination. */
-UNIV_INLINE
-rec_t*
-rec_convert_dtuple_to_rec(
-/*======================*/
- /* out: pointer to the origin of physical
- record */
- byte* destination, /* in: start address of the physical record */
- dtuple_t* dtuple) /* in: data tuple */
-{
- return(rec_convert_dtuple_to_rec_low(destination, dtuple,
- dtuple_get_data_size(dtuple)));
-}
diff --git a/innobase/include/row0mysql.h b/innobase/include/row0mysql.h
index 13773ed380d..e44d689b88b 100644
--- a/innobase/include/row0mysql.h
+++ b/innobase/include/row0mysql.h
@@ -22,36 +22,6 @@ Created 9/17/2000 Heikki Tuuri
typedef struct row_prebuilt_struct row_prebuilt_t;
/***********************************************************************
-Stores a variable-length field (like VARCHAR) length to dest, in the
-MySQL format. */
-UNIV_INLINE
-byte*
-row_mysql_store_var_len(
-/*====================*/
- /* out: dest + 2 */
- byte* dest, /* in: where to store */
- ulint len); /* in: length, must fit in two bytes */
-/***********************************************************************
-Reads a MySQL format variable-length field (like VARCHAR) length and
-returns pointer to the field data. */
-UNIV_INLINE
-byte*
-row_mysql_read_var_ref(
-/*===================*/
- /* out: field + 2 */
- ulint* len, /* out: variable-length field length */
- byte* field); /* in: field */
-/***********************************************************************
-Reads a MySQL format variable-length field (like VARCHAR) length and
-returns pointer to the field data. */
-
-byte*
-row_mysql_read_var_ref_noninline(
-/*=============================*/
- /* out: field + 2 */
- ulint* len, /* out: variable-length field length */
- byte* field); /* in: field */
-/***********************************************************************
Frees the blob heap in prebuilt when no longer needed. */
void
@@ -60,6 +30,30 @@ row_mysql_prebuilt_free_blob_heap(
row_prebuilt_t* prebuilt); /* in: prebuilt struct of a
ha_innobase:: table handle */
/***********************************************************************
+Stores a >= 5.0.3 format true VARCHAR length to dest, in the MySQL row
+format. */
+
+byte*
+row_mysql_store_true_var_len(
+/*=========================*/
+ /* out: pointer to the data, we skip the 1 or 2 bytes
+ at the start that are used to store the len */
+ byte* dest, /* in: where to store */
+ ulint len, /* in: length, must fit in two bytes */
+ ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
+/***********************************************************************
+Reads a >= 5.0.3 format true VARCHAR length, in the MySQL row format, and
+returns a pointer to the data. */
+
+byte*
+row_mysql_read_true_varchar(
+/*========================*/
+ /* out: pointer to the data, we skip the 1 or 2 bytes
+ at the start that are used to store the len */
+ ulint* len, /* out: variable-length field length */
+ byte* field, /* in: field in the MySQL format */
+ ulint lenlen);/* in: storage length of len: either 1 or 2 bytes */
+/***********************************************************************
Stores a reference to a BLOB in the MySQL format. */
void
@@ -83,23 +77,40 @@ row_mysql_read_blob_ref(
ulint col_len); /* in: BLOB reference length (not BLOB
length) */
/******************************************************************
-Stores a non-SQL-NULL field given in the MySQL format in the Innobase
-format. */
-UNIV_INLINE
-void
+Stores a non-SQL-NULL field given in the MySQL format in the InnoDB format.
+The counterpart of this function is row_sel_field_store_in_mysql_format() in
+row0sel.c. */
+
+byte*
row_mysql_store_col_in_innobase_format(
/*===================================*/
- dfield_t* dfield, /* in/out: dfield */
- byte* buf, /* in/out: buffer for the converted
- value */
+ /* out: up to which byte we used
+ buf in the conversion */
+ dfield_t* dfield, /* in/out: dfield where dtype
+ information must be already set when
+ this function is called! */
+ byte* buf, /* in/out: buffer for a converted
+ integer value; this must be at least
+ col_len long then! */
+ ibool row_format_col, /* TRUE if the mysql_data is from
+ a MySQL row, FALSE if from a MySQL
+ key value;
+ in MySQL, a true VARCHAR storage
+ format differs in a row and in a
+ key value: in a key value the length
+ is always stored in 2 bytes! */
byte* mysql_data, /* in: MySQL column value, not
SQL NULL; NOTE that dfield may also
get a pointer to mysql_data,
therefore do not discard this as long
as dfield is used! */
- ulint col_len, /* in: MySQL column length */
- ulint type, /* in: data type */
- ulint is_unsigned); /* in: != 0 if unsigned integer type */
+ ulint col_len, /* in: MySQL column length; NOTE that
+ this is the storage length of the
+ column in the MySQL format row, not
+ necessarily the length of the actual
+ payload data; if the column is a true
+ VARCHAR then this is irrelevant */
+ ibool comp); /* in: TRUE = compact format */
/********************************************************************
Handles user errors and lock waits detected by the database engine. */
@@ -239,6 +250,17 @@ row_update_for_mysql(
the MySQL format */
row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
handle */
+
+/*************************************************************************
+Does an unlock of a row for MySQL. */
+
+int
+row_unlock_for_mysql(
+/*=================*/
+ /* out: error code or DB_SUCCESS */
+ row_prebuilt_t* prebuilt); /* in: prebuilt struct in MySQL
+ handle */
+
/*************************************************************************
Creates an query graph node of 'update' type to be used in the MySQL
interface. */
@@ -352,6 +374,15 @@ row_get_background_drop_list_len_low(void);
/*======================================*/
/* out: how many tables in list */
/*************************************************************************
+Truncates a table for MySQL. */
+
+int
+row_truncate_table_for_mysql(
+/*=========================*/
+ /* out: error code or DB_SUCCESS */
+ dict_table_t* table, /* in: table handle */
+ trx_t* trx); /* in: transaction handle */
+/*************************************************************************
Drops a table for MySQL. If the name of the dropped table ends to
characters INNODB_MONITOR, then this also stops printing of monitor
output by the master thread. */
@@ -436,8 +467,22 @@ struct mysql_row_templ_struct {
zero if column cannot be NULL */
ulint type; /* column type in Innobase mtype
numbers DATA_CHAR... */
+ ulint mysql_type; /* MySQL type code; this is always
+ < 256 */
+ ulint mysql_length_bytes; /* if mysql_type
+ == DATA_MYSQL_TRUE_VARCHAR, this tells
+ whether we should use 1 or 2 bytes to
+ store the MySQL true VARCHAR data
+ length at the start of row in the MySQL
+ format (NOTE that the MySQL key value
+ format always uses 2 bytes for the data
+ len) */
ulint charset; /* MySQL charset-collation code
of the column, or zero */
+ ulint mbminlen; /* minimum length of a char, in bytes,
+ or zero if not a char type */
+ ulint mbmaxlen; /* maximum length of a char, in bytes,
+ or zero if not a char type */
ulint is_unsigned; /* if a column type is an integer
type and this field is != 0, then
it is an unsigned integer type */
@@ -569,6 +614,10 @@ struct row_prebuilt_struct {
allocated mem buf start, because
there is a 4 byte magic number at the
start and at the end */
+ ibool keep_other_fields_on_keyread; /* when using fetch
+ cache with HA_EXTRA_KEYREAD, don't
+ overwrite other fields in mysql row
+ row buffer.*/
ulint fetch_cache_first;/* position of the first not yet
fetched row in fetch_cache */
ulint n_fetch_cached; /* number of not yet fetched rows
diff --git a/innobase/include/row0mysql.ic b/innobase/include/row0mysql.ic
index fc922b52d0a..aa8a70d8761 100644
--- a/innobase/include/row0mysql.ic
+++ b/innobase/include/row0mysql.ic
@@ -5,122 +5,3 @@ MySQL interface for Innobase
Created 1/23/2001 Heikki Tuuri
*******************************************************/
-
-/***********************************************************************
-Stores a variable-length field (like VARCHAR) length to dest, in the
-MySQL format. No real var implemented in MySQL yet! */
-UNIV_INLINE
-byte*
-row_mysql_store_var_len(
-/*====================*/
- /* out: dest + 2 */
- byte* dest, /* in: where to store */
- ulint len __attribute__((unused))) /* in: length, must fit in two
- bytes */
-{
- ut_ad(len < 256 * 256);
-/*
- mach_write_to_2_little_endian(dest, len);
-
- return(dest + 2);
-*/
- return(dest); /* No real var implemented in MySQL yet! */
-}
-
-/***********************************************************************
-Reads a MySQL format variable-length field (like VARCHAR) length and
-returns pointer to the field data. No real var implemented in MySQL yet! */
-UNIV_INLINE
-byte*
-row_mysql_read_var_ref(
-/*===================*/
- /* out: field + 2 */
- ulint* len, /* out: variable-length field length; does not work
- yet! */
- byte* field) /* in: field */
-{
-/*
- *len = mach_read_from_2_little_endian(field);
-
- return(field + 2);
-*/
- UT_NOT_USED(len);
-
- return(field); /* No real var implemented in MySQL yet! */
-}
-
-/******************************************************************
-Stores a non-SQL-NULL field given in the MySQL format in the Innobase
-format. */
-UNIV_INLINE
-void
-row_mysql_store_col_in_innobase_format(
-/*===================================*/
- dfield_t* dfield, /* in/out: dfield */
- byte* buf, /* in/out: buffer for the converted
- value; this must be at least col_len
- long! */
- byte* mysql_data, /* in: MySQL column value, not
- SQL NULL; NOTE that dfield may also
- get a pointer to mysql_data,
- therefore do not discard this as long
- as dfield is used! */
- ulint col_len, /* in: MySQL column length */
- ulint type, /* in: data type */
- ulint is_unsigned) /* in: != 0 if unsigned integer type */
-{
- byte* ptr = mysql_data;
-
- if (type == DATA_INT) {
- /* Store integer data in Innobase in a big-endian format,
- sign bit negated */
-
- ptr = buf + col_len;
-
- for (;;) {
- ptr--;
- *ptr = *mysql_data;
- if (ptr == buf) {
- break;
- }
- mysql_data++;
- }
-
- if (!is_unsigned) {
- *ptr = (byte) (*ptr ^ 128);
- }
- } else if (type == DATA_VARCHAR || type == DATA_VARMYSQL
- || type == DATA_BINARY) {
- /* Remove trailing spaces. */
-
- /* Handle UCS2 strings differently. As no new
- collations will be introduced in 4.1, we hardcode the
- charset-collation codes here. In 5.0, the logic will
- be based on mbminlen. */
- ulint cset = dtype_get_charset_coll(
- dtype_get_prtype(dfield_get_type(dfield)));
- ptr = row_mysql_read_var_ref(&col_len, mysql_data);
- if (cset == 35/*ucs2_general_ci*/
- || cset == 90/*ucs2_bin*/
- || (cset >= 128/*ucs2_unicode_ci*/
- && cset <= 144/*ucs2_persian_ci*/)) {
- /* space=0x0020 */
- /* Trim "half-chars", just in case. */
- col_len &= ~1;
-
- while (col_len >= 2 && ptr[col_len - 2] == 0x00
- && ptr[col_len - 1] == 0x20) {
- col_len -= 2;
- }
- } else {
- /* space=0x20 */
- while (col_len > 0 && ptr[col_len - 1] == 0x20) {
- col_len--;
- }
- }
- } else if (type == DATA_BLOB) {
- ptr = row_mysql_read_blob_ref(&col_len, mysql_data, col_len);
- }
-
- dfield_set_data(dfield, ptr, col_len);
-}
diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h
index 951e211fb37..782973d8f5d 100644
--- a/innobase/include/row0row.h
+++ b/innobase/include/row0row.h
@@ -27,7 +27,8 @@ row_get_rec_trx_id(
/*===============*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Reads the roll pointer field from a clustered index record. */
UNIV_INLINE
@@ -36,7 +37,8 @@ row_get_rec_roll_ptr(
/*=================*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Writes the trx id field to a clustered index record. */
UNIV_INLINE
@@ -45,7 +47,8 @@ row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
- dulint trx_id); /* in: value of the field */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
+ dulint trx_id);/* in: value of the field */
/*************************************************************************
Sets the roll pointer field in a clustered index record. */
UNIV_INLINE
@@ -54,6 +57,7 @@ row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr);/* in: value of the field */
/*********************************************************************
When an insert to a table is performed, this function builds the entry which
@@ -90,6 +94,9 @@ row_build(
the buffer page of this record must be
at least s-latched and the latch held
as long as the row dtuple is used! */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index)
+ or NULL, in which case this function
+ will invoke rec_get_offsets() */
mem_heap_t* heap); /* in: memory heap from which the memory
needed is allocated */
/***********************************************************************
@@ -175,14 +182,15 @@ UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the reference
- is built */
- ulint* map, /* in: array of field numbers in rec telling
- how ref should be built from the fields of
- rec */
- rec_t* rec); /* in: record in the index; must be preserved
- while ref is used, as we do not copy field
- values to heap */
+ dtuple_t* ref, /* in: typed data tuple where the
+ reference is built */
+ const ulint* map, /* in: array of field numbers in rec
+ telling how ref should be built from
+ the fields of rec */
+ rec_t* rec, /* in: record in the index; must be
+ preserved while ref is used, as we do
+ not copy field values to heap */
+ const ulint* offsets);/* in: array returned by rec_get_offsets() */
/*******************************************************************
Searches the clustered index record for a row, if we have the row
reference. */
diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic
index 8e5121f5a96..85410beacf0 100644
--- a/innobase/include/row0row.ic
+++ b/innobase/include/row0row.ic
@@ -20,7 +20,8 @@ row_get_rec_sys_field(
/* out: value of the field */
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
- dict_index_t* index); /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*************************************************************************
Sets the trx id or roll ptr field in a clustered index record: this function
is slower than the specialized inline functions. */
@@ -32,6 +33,7 @@ row_set_rec_sys_field(
ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint val); /* in: value to set */
/*************************************************************************
@@ -42,18 +44,21 @@ row_get_rec_trx_id(
/*===============*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
return(trx_read_trx_id(rec + offset));
} else {
- return(row_get_rec_sys_field(DATA_TRX_ID, rec, index));
+ return(row_get_rec_sys_field(DATA_TRX_ID,
+ rec, index, offsets));
}
}
@@ -65,18 +70,21 @@ row_get_rec_roll_ptr(
/*=================*/
/* out: value of the field */
rec_t* rec, /* in: record */
- dict_index_t* index) /* in: clustered index */
+ dict_index_t* index, /* in: clustered index */
+ const ulint* offsets)/* in: rec_get_offsets(rec, index) */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN));
} else {
- return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index));
+ return(row_get_rec_sys_field(DATA_ROLL_PTR,
+ rec, index, offsets));
}
}
@@ -88,18 +96,21 @@ row_set_rec_trx_id(
/*===============*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint trx_id) /* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_trx_id(rec + offset, trx_id);
} else {
- row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id);
+ row_set_rec_sys_field(DATA_TRX_ID,
+ rec, index, offsets, trx_id);
}
}
@@ -111,18 +122,21 @@ row_set_rec_roll_ptr(
/*=================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
dulint roll_ptr)/* in: value of the field */
{
ulint offset;
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
offset = index->trx_id_offset;
if (offset) {
trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr);
} else {
- row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr);
+ row_set_rec_sys_field(DATA_ROLL_PTR,
+ rec, index, offsets, roll_ptr);
}
}
@@ -133,14 +147,15 @@ UNIV_INLINE
void
row_build_row_ref_fast(
/*===================*/
- dtuple_t* ref, /* in: typed data tuple where the reference
- is built */
- ulint* map, /* in: array of field numbers in rec telling
- how ref should be built from the fields of
- rec */
- rec_t* rec) /* in: record in the index; must be preserved
- while ref is used, as we do not copy field
- values to heap */
+ dtuple_t* ref, /* in: typed data tuple where the
+ reference is built */
+ const ulint* map, /* in: array of field numbers in rec
+ telling how ref should be built from
+ the fields of rec */
+ rec_t* rec, /* in: record in the index; must be
+ preserved while ref is used, as we do
+ not copy field values to heap */
+ const ulint* offsets)/* in: array returned by rec_get_offsets() */
{
dfield_t* dfield;
byte* field;
@@ -149,6 +164,7 @@ row_build_row_ref_fast(
ulint field_no;
ulint i;
+ ut_ad(rec_offs_validate(rec, NULL, offsets));
ref_len = dtuple_get_n_fields(ref);
for (i = 0; i < ref_len; i++) {
@@ -158,7 +174,8 @@ row_build_row_ref_fast(
if (field_no != ULINT_UNDEFINED) {
- field = rec_get_nth_field(rec, field_no, &len);
+ field = rec_get_nth_field(rec, offsets,
+ field_no, &len);
dfield_set_data(dfield, field, len);
}
}
diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h
index 28210364833..673e0511153 100644
--- a/innobase/include/row0upd.h
+++ b/innobase/include/row0upd.h
@@ -80,6 +80,7 @@ row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr);/* in: roll ptr of the undo log record */
/*************************************************************************
@@ -124,8 +125,8 @@ row_upd_changes_field_size_or_external(
/* out: TRUE if the update changes the size of
some field in index or the field is external
in rec or update */
- rec_t* rec, /* in: record in index */
dict_index_t* index, /* in: index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
upd_t* update);/* in: update vector */
/***************************************************************
Replaces the new column values stored in the update vector to the record
@@ -135,8 +136,9 @@ a clustered index */
void
row_upd_rec_in_place(
/*=================*/
- rec_t* rec, /* in/out: record where replaced */
- upd_t* update);/* in: update vector */
+ rec_t* rec, /* in/out: record where replaced */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ upd_t* update);/* in: update vector */
/*******************************************************************
Builds an update vector from those fields which in a secondary index entry
differ from a record that has the equal ordering fields. NOTE: we compare
@@ -274,10 +276,11 @@ recovery. */
void
row_upd_rec_sys_fields_in_recovery(
/*===============================*/
- rec_t* rec, /* in: record */
- ulint pos, /* in: TRX_ID position in rec */
- dulint trx_id, /* in: transaction id */
- dulint roll_ptr);/* in: roll ptr of the undo log record */
+ rec_t* rec, /* in: record */
+ const ulint* offsets,/* in: array returned by rec_get_offsets() */
+ ulint pos, /* in: TRX_ID position in rec */
+ dulint trx_id, /* in: transaction id */
+ dulint roll_ptr);/* in: roll ptr of the undo log record */
/*************************************************************************
Parses the log data written by row_upd_index_write_log. */
diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic
index a124228a0de..e2d81a39cfa 100644
--- a/innobase/include/row0upd.ic
+++ b/innobase/include/row0upd.ic
@@ -106,15 +106,17 @@ row_upd_rec_sys_fields(
/*===================*/
rec_t* rec, /* in: record */
dict_index_t* index, /* in: clustered index */
+ const ulint* offsets,/* in: rec_get_offsets(rec, index) */
trx_t* trx, /* in: transaction */
dulint roll_ptr)/* in: roll ptr of the undo log record */
{
ut_ad(index->type & DICT_CLUSTERED);
+ ut_ad(rec_offs_validate(rec, index, offsets));
#ifdef UNIV_SYNC_DEBUG
ut_ad(!buf_block_align(rec)->is_hashed
|| rw_lock_own(&btr_search_latch, RW_LOCK_EX));
#endif /* UNIV_SYNC_DEBUG */
- row_set_rec_trx_id(rec, index, trx->id);
- row_set_rec_roll_ptr(rec, index, roll_ptr);
+ row_set_rec_trx_id(rec, index, offsets, trx->id);
+ row_set_rec_roll_ptr(rec, index, offsets, roll_ptr);
}
diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h
index 30cf82144e9..079d841f7f3 100644
--- a/innobase/include/row0vers.h
+++ b/innobase/include/row0vers.h
@@ -30,7 +30,8 @@ row_vers_impl_x_locked_off_kernel(
transaction; NOTE that the kernel mutex is
temporarily released! */
rec_t* rec, /* in: record in a secondary index */
- dict_index_t* index); /* in: the secondary index */
+ dict_index_t* index, /* in: the secondary index */
+ const ulint* offsets);/* in: rec_get_offsets(rec, index) */
/*********************************************************************
Finds out if we must preserve a delete marked earlier version of a clustered
index record, because it is >= the purge view. */
@@ -78,7 +79,11 @@ row_vers_build_for_consistent_read(
mtr_t* mtr, /* in: mtr holding the latch on rec; it will
also hold the latch on purge_view */
dict_index_t* index, /* in: the clustered index */
+ ulint** offsets,/* in/out: offsets returned by
+ rec_get_offsets(rec, index) */
read_view_t* view, /* in: the consistent read view */
+ mem_heap_t** offset_heap,/* in/out: memory heap from which
+ the offsets are allocated */
mem_heap_t* in_heap,/* in: memory heap from which the memory for
old_vers is allocated; memory for possible
intermediate versions is allocated and freed
diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic
index 5ece47c35d1..ab1e264635b 100644
--- a/innobase/include/row0vers.ic
+++ b/innobase/include/row0vers.ic
@@ -11,73 +11,3 @@ Created 2/6/1997 Heikki Tuuri
#include "read0read.h"
#include "page0page.h"
#include "log0recv.h"
-
-/*************************************************************************
-Fetches the trx id of a clustered index record or version. */
-UNIV_INLINE
-dulint
-row_vers_get_trx_id(
-/*================*/
- /* out: trx id or ut_dulint_zero if the
- clustered index record not found */
- rec_t* rec, /* in: clustered index record, or an old
- version of it */
- dict_table_t* table) /* in: table */
-{
- return(row_get_rec_trx_id(rec, dict_table_get_first_index(table)));
-}
-
-/*************************************************************************
-Checks if a consistent read can be performed immediately on the index
-record, or if an older version is needed. */
-UNIV_INLINE
-ibool
-row_vers_clust_rec_sees_older(
-/*==========================*/
- /* out: FALSE if can read immediately */
- rec_t* rec, /* in: record which should be read or passed
- over by a read cursor */
- dict_index_t* index, /* in: clustered index */
- read_view_t* view) /* in: read view */
-{
- ut_ad(index->type & DICT_CLUSTERED);
-
- if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) {
-
- return(FALSE);
- }
-
- return(TRUE);
-}
-
-/*************************************************************************
-Checks if a secondary index record can be read immediately by a consistent
-read, or if an older version may be needed. To be sure, we will have to
-look in the clustered index. */
-UNIV_INLINE
-ibool
-row_vers_sec_rec_may_see_older(
-/*===========================*/
- /* out: FALSE if can be read immediately */
- rec_t* rec, /* in: record which should be read or passed */
- dict_index_t* index __attribute__((unused)),/* in: secondary index */
- read_view_t* view) /* in: read view */
-{
- page_t* page;
-
- ut_ad(!(index->type & DICT_CLUSTERED));
-
- page = buf_frame_align(rec);
-
- if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0)
- || recv_recovery_is_on()) {
-
- /* It may be that the record was inserted or modified by a
- transaction the view should not see: we have to look in the
- clustered index */
-
- return(TRUE);
- }
-
- return(FALSE);
-}
diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h
index 4352083b21f..6e4241965c1 100644
--- a/innobase/include/srv0srv.h
+++ b/innobase/include/srv0srv.h
@@ -93,20 +93,23 @@ extern ulint srv_max_n_open_files;
extern ulint srv_max_dirty_pages_pct;
extern ulint srv_force_recovery;
-extern ulint srv_thread_concurrency;
+extern ulong srv_thread_concurrency;
extern ulint srv_max_n_threads;
extern lint srv_conc_n_threads;
-extern ibool srv_fast_shutdown;
-extern ibool srv_very_fast_shutdown; /* if this TRUE, do not flush the
+extern ulint srv_fast_shutdown; /* If this is 1, do not do a
+ purge and index buffer merge.
+ If this 2, do not even flush the
buffer pool to data files at the
- shutdown; we effectively 'crash'
- InnoDB */
+ shutdown: we effectively 'crash'
+ InnoDB (but lose no committed
+ transactions). */
extern ibool srv_innodb_status;
extern ibool srv_use_doublewrite_buf;
+extern ibool srv_use_checksums;
extern ibool srv_set_thread_priorities;
extern int srv_query_thread_priority;
@@ -131,7 +134,9 @@ extern ibool srv_print_innodb_table_monitor;
extern ibool srv_lock_timeout_and_monitor_active;
extern ibool srv_error_monitor_active;
-extern ulint srv_n_spin_wait_rounds;
+extern ulong srv_n_spin_wait_rounds;
+extern ulong srv_n_free_tickets_to_enter;
+extern ulong srv_thread_sleep_delay;
extern ulint srv_spin_wait_delay;
extern ibool srv_priority_boost;
@@ -183,6 +188,63 @@ i/o handler thread */
extern const char* srv_io_thread_op_info[];
extern const char* srv_io_thread_function[];
+/* the number of the log write requests done */
+extern ulint srv_log_write_requests;
+
+/* the number of physical writes to the log performed */
+extern ulint srv_log_writes;
+
+/* amount of data written to the log files in bytes */
+extern ulint srv_os_log_written;
+
+/* amount of writes being done to the log files */
+extern ulint srv_os_log_pending_writes;
+
+/* we increase this counter, when there we don't have enough space in the
+log buffer and have to flush it */
+extern ulint srv_log_waits;
+
+/* variable that counts amount of data read in total (in bytes) */
+extern ulint srv_data_read;
+
+/* here we count the amount of data written in total (in bytes) */
+extern ulint srv_data_written;
+
+/* this variable counts the amount of times, when the doublewrite buffer
+was flushed */
+extern ulint srv_dblwr_writes;
+
+/* here we store the number of pages that have been flushed to the
+doublewrite buffer */
+extern ulint srv_dblwr_pages_written;
+
+/* in this variable we store the number of write requests issued */
+extern ulint srv_buf_pool_write_requests;
+
+/* here we store the number of times when we had to wait for a free page
+in the buffer pool. It happens when the buffer pool is full and we need
+to make a flush, in order to be able to read or create a page. */
+extern ulint srv_buf_pool_wait_free;
+
+/* variable to count the number of pages that were written from the
+buffer pool to disk */
+extern ulint srv_buf_pool_flushed;
+
+/* variable to count the number of buffer pool reads that led to the
+reading of a disk page */
+extern ulint srv_buf_pool_reads;
+
+/* variable to count the number of sequential read-aheads were done */
+extern ulint srv_read_ahead_seq;
+
+/* variable to count the number of random read-aheads were done */
+extern ulint srv_read_ahead_rnd;
+
+/* In this structure we store status variables to be passed to MySQL */
+typedef struct export_var_struct export_struc;
+
+extern export_struc export_vars;
+
typedef struct srv_sys_struct srv_sys_t;
/* The server system */
@@ -233,6 +295,12 @@ srv_boot(void);
/*==========*/
/* out: DB_SUCCESS or error code */
/*************************************************************************
+Initializes the server. */
+
+void
+srv_init(void);
+/*==========*/
+/*************************************************************************
Frees the OS fast mutex created in srv_boot(). */
void
@@ -404,6 +472,12 @@ srv_printf_innodb_monitor(
ulint* trx_end); /* out: file position of the end of
the list of active transactions */
+/**********************************************************************
+Function to pass InnoDB status variables to MySQL */
+
+void
+srv_export_innodb_status(void);
+/*=====================*/
/* Types for the threads existing in the system. Threads of types 4 - 9
are called utility threads. Note that utility threads are mainly disk
@@ -429,6 +503,53 @@ typedef struct srv_slot_struct srv_slot_t;
/* Thread table is an array of slots */
typedef srv_slot_t srv_table_t;
+/* In this structure we store status variables to be passed to MySQL */
+struct export_var_struct{
+ ulint innodb_data_pending_reads;
+ ulint innodb_data_pending_writes;
+ ulint innodb_data_pending_fsyncs;
+ ulint innodb_data_fsyncs;
+ ulint innodb_data_read;
+ ulint innodb_data_writes;
+ ulint innodb_data_written;
+ ulint innodb_data_reads;
+ ulint innodb_buffer_pool_pages_total;
+ ulint innodb_buffer_pool_pages_data;
+ ulint innodb_buffer_pool_pages_dirty;
+ ulint innodb_buffer_pool_pages_misc;
+ ulint innodb_buffer_pool_pages_free;
+ ulint innodb_buffer_pool_pages_latched;
+ ulint innodb_buffer_pool_read_requests;
+ ulint innodb_buffer_pool_reads;
+ ulint innodb_buffer_pool_wait_free;
+ ulint innodb_buffer_pool_pages_flushed;
+ ulint innodb_buffer_pool_write_requests;
+ ulint innodb_buffer_pool_read_ahead_seq;
+ ulint innodb_buffer_pool_read_ahead_rnd;
+ ulint innodb_dblwr_pages_written;
+ ulint innodb_dblwr_writes;
+ ulint innodb_log_waits;
+ ulint innodb_log_write_requests;
+ ulint innodb_log_writes;
+ ulint innodb_os_log_written;
+ ulint innodb_os_log_fsyncs;
+ ulint innodb_os_log_pending_writes;
+ ulint innodb_os_log_pending_fsyncs;
+ ulint innodb_page_size;
+ ulint innodb_pages_created;
+ ulint innodb_pages_read;
+ ulint innodb_pages_written;
+ ulint innodb_row_lock_waits;
+ ulint innodb_row_lock_current_waits;
+ ib_longlong innodb_row_lock_time;
+ ulint innodb_row_lock_time_avg;
+ ulint innodb_row_lock_time_max;
+ ulint innodb_rows_read;
+ ulint innodb_rows_inserted;
+ ulint innodb_rows_updated;
+ ulint innodb_rows_deleted;
+};
+
/* The server system struct */
struct srv_sys_struct{
os_event_t operational; /* created threads must wait for the
@@ -437,6 +558,10 @@ struct srv_sys_struct{
srv_table_t* threads; /* server thread table */
UT_LIST_BASE_NODE_T(que_thr_t)
tasks; /* task queue */
+ dict_index_t* dummy_ind1; /* dummy index for old-style
+ supremum and infimum records */
+ dict_index_t* dummy_ind2; /* dummy index for new-style
+ supremum and infimum records */
};
extern ulint srv_n_threads_active[];
diff --git a/innobase/include/srv0start.h b/innobase/include/srv0start.h
index 8df0f97c4ff..d24f119c0b0 100644
--- a/innobase/include/srv0start.h
+++ b/innobase/include/srv0start.h
@@ -53,6 +53,16 @@ srv_parse_log_group_home_dirs(
error */
char* str, /* in: character string */
char*** log_group_home_dirs); /* out, own: log group home dirs */
+/*************************************************************************
+Adds a slash or a backslash to the end of a string if it is missing
+and the string is not empty. */
+
+char*
+srv_add_path_separator_if_needed(
+/*=============================*/
+ /* out: string which has the separator if the
+ string is not empty */
+ char* str); /* in: null-terminated character string */
/********************************************************************
Starts Innobase and creates a new database if database files
are not found and the user wants. Server parameters are
diff --git a/innobase/include/sync0rw.h b/innobase/include/sync0rw.h
index 9a988a03e92..911c8ac3f4a 100644
--- a/innobase/include/sync0rw.h
+++ b/innobase/include/sync0rw.h
@@ -61,8 +61,8 @@ Creates, or rather, initializes an rw-lock object in a specified memory
location (which must be appropriately aligned). The rw-lock is initialized
to the non-locked state. Explicit freeing of the rw-lock with rw_lock_free
is necessary only if the memory block containing it is freed. */
-
-#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__)
+#define rw_lock_create(L) rw_lock_create_func((L), __FILE__, __LINE__, #L)
+
/*=====================*/
/**********************************************************************
Creates, or rather, initializes an rw-lock object in a specified memory
@@ -75,7 +75,8 @@ rw_lock_create_func(
/*================*/
rw_lock_t* lock, /* in: pointer to memory */
const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
+ ulint cline, /* in: file line where created */
+ const char* cmutex_name); /* in: mutex name */
/**********************************************************************
Calling this function is obligatory only if the memory buffer containing
the rw-lock is freed. Removes an rw-lock object from the global list. The
diff --git a/innobase/include/sync0sync.h b/innobase/include/sync0sync.h
index 8e0ec715b12..c798c047fa3 100644
--- a/innobase/include/sync0sync.h
+++ b/innobase/include/sync0sync.h
@@ -17,6 +17,10 @@ Created 9/5/1995 Heikki Tuuri
#include "os0sync.h"
#include "sync0arr.h"
+#ifndef UNIV_HOTBACKUP
+extern my_bool timed_mutexes;
+#endif /* UNIV_HOTBACKUP */
+
/**********************************************************************
Initializes the synchronization data structures. */
@@ -35,8 +39,7 @@ location (which must be appropriately aligned). The mutex is initialized
in the reset state. Explicit freeing of the mutex with mutex_free is
necessary only if the memory block containing it is freed. */
-
-#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__)
+#define mutex_create(M) mutex_create_func((M), __FILE__, __LINE__, #M)
/*===================*/
/**********************************************************************
Creates, or rather, initializes a mutex object in a specified memory
@@ -49,7 +52,8 @@ mutex_create_func(
/*==============*/
mutex_t* mutex, /* in: pointer to memory */
const char* cfile_name, /* in: file name where created */
- ulint cline); /* in: file line where created */
+ ulint cline, /* in: file line where created */
+ const char* cmutex_name); /* in: mutex name */
/**********************************************************************
Calling this function is obligatory only if the memory buffer containing
the mutex is freed. Removes a mutex object from the mutex list. The mutex
@@ -413,6 +417,8 @@ or row lock! */
/*------------------------------------- Insert buffer tree */
#define SYNC_IBUF_BITMAP_MUTEX 351
#define SYNC_IBUF_BITMAP 350
+/*------------------------------------- MySQL query cache mutex */
+/*------------------------------------- MySQL binlog mutex */
/*-------------------------------*/
#define SYNC_KERNEL 300
#define SYNC_REC_LOCK 299
@@ -471,6 +477,17 @@ struct mutex_struct {
const char* cfile_name;/* File name where mutex created */
ulint cline; /* Line where created */
ulint magic_n;
+#ifndef UNIV_HOTBACKUP
+ ulong count_using; /* count of times mutex used */
+ ulong count_spin_loop; /* count of spin loops */
+ ulong count_spin_rounds; /* count of spin rounds */
+ ulong count_os_wait; /* count of os_wait */
+ ulong count_os_yield; /* count of os_wait */
+ ulonglong lspent_time; /* mutex os_wait timer msec */
+ ulonglong lmax_spent_time; /* mutex os_wait timer msec */
+ const char* cmutex_name;/* mutex name */
+ ulint mutex_type;/* 0 - usual mutex 1 - rw_lock mutex */
+#endif /* !UNIV_HOTBACKUP */
};
#define MUTEX_MAGIC_N (ulint)979585
@@ -504,6 +521,13 @@ extern ibool sync_order_checks_on;
/* This variable is set to TRUE when sync_init is called */
extern ibool sync_initialized;
+/* Global list of database mutexes (not OS mutexes) created. */
+UT_LIST_BASE_NODE_T(mutex_t) mutex_list;
+
+/* Mutex protecting the mutex_list variable */
+mutex_t mutex_list_mutex;
+
+
#ifndef UNIV_NONINL
#include "sync0sync.ic"
#endif
diff --git a/innobase/include/sync0sync.ic b/innobase/include/sync0sync.ic
index aaf5e1fd9e9..b3fde61db5e 100644
--- a/innobase/include/sync0sync.ic
+++ b/innobase/include/sync0sync.ic
@@ -249,8 +249,13 @@ mutex_enter_func(
/* Note that we do not peek at the value of lock_word before trying
the atomic test_and_set; we could peek, and possibly save time. */
+
+#ifndef UNIV_HOTBACKUP
+ mutex->count_using++;
+#endif /* UNIV_HOTBACKUP */
- if (!mutex_test_and_set(mutex)) {
+ if (!mutex_test_and_set(mutex))
+ {
#ifdef UNIV_SYNC_DEBUG
mutex_set_debug_info(mutex, file_name, line);
#endif
@@ -258,4 +263,5 @@ mutex_enter_func(
}
mutex_spin_wait(mutex, file_name, line);
+
}
diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h
index 9d7f41cd94e..4387ce1a61e 100644
--- a/innobase/include/trx0rec.h
+++ b/innobase/include/trx0rec.h
@@ -246,6 +246,7 @@ trx_undo_prev_version_build(
index_rec page and purge_view */
rec_t* rec, /* in: version of a clustered index record */
dict_index_t* index, /* in: clustered index */
+ ulint* offsets,/* in: rec_get_offsets(rec, index) */
mem_heap_t* heap, /* in: memory heap from which the memory
needed is allocated */
rec_t** old_vers);/* out, own: previous version, or NULL if
diff --git a/innobase/include/trx0roll.h b/innobase/include/trx0roll.h
index 6004551f456..944142a299d 100644
--- a/innobase/include/trx0roll.h
+++ b/innobase/include/trx0roll.h
@@ -104,11 +104,20 @@ trx_rollback(
/***********************************************************************
Rollback or clean up transactions which have no user session. If the
transaction already was committed, then we clean up a possible insert
-undo log. If the transaction was not yet committed, then we roll it back. */
+undo log. If the transaction was not yet committed, then we roll it back.
+Note: this is done in a background thread. */
-void
-trx_rollback_or_clean_all_without_sess(void);
-/*========================================*/
+#ifndef __WIN__
+void*
+#else
+ulint
+#endif
+trx_rollback_or_clean_all_without_sess(
+/*===================================*/
+ /* out: a dummy parameter */
+ void* arg __attribute__((unused)));
+ /* in: a dummy parameter required by
+ os_thread_create */
/********************************************************************
Finishes a transaction rollback. */
@@ -216,6 +225,21 @@ trx_savepoint_for_mysql(
position corresponding to this
connection at the time of the
savepoint */
+
+/***********************************************************************
+Releases a named savepoint. Savepoints which
+were set after this savepoint are deleted. */
+
+ulint
+trx_release_savepoint_for_mysql(
+/*================================*/
+ /* out: if no savepoint
+ of the name found then
+ DB_NO_SAVEPOINT,
+ otherwise DB_SUCCESS */
+ trx_t* trx, /* in: transaction handle */
+ const char* savepoint_name); /* in: savepoint name */
+
/***********************************************************************
Frees savepoint structs. */
diff --git a/innobase/include/trx0sys.ic b/innobase/include/trx0sys.ic
index 8f455e554ea..24610bef827 100644
--- a/innobase/include/trx0sys.ic
+++ b/innobase/include/trx0sys.ic
@@ -315,7 +315,7 @@ trx_is_active(
}
if (ut_dulint_cmp(trx_id, trx_sys->max_trx_id) >= 0) {
-
+
/* There must be corruption: we return TRUE because this
function is only called by lock_clust_rec_some_has_impl()
and row_vers_impl_x_locked_off_kernel() and they have
@@ -325,8 +325,9 @@ trx_is_active(
}
trx = trx_get_on_id(trx_id);
- if (trx && (trx->conc_state == TRX_ACTIVE)) {
-
+ if (trx && (trx->conc_state == TRX_ACTIVE ||
+ trx->conc_state == TRX_PREPARED)) {
+
return(TRUE);
}
diff --git a/innobase/include/trx0trx.h b/innobase/include/trx0trx.h
index 905b25447a8..d46613c3a68 100644
--- a/innobase/include/trx0trx.h
+++ b/innobase/include/trx0trx.h
@@ -16,6 +16,7 @@ Created 3/26/1996 Heikki Tuuri
#include "que0types.h"
#include "mem0mem.h"
#include "read0types.h"
+#include "trx0xa.h"
extern ulint trx_n_mysql_transactions;
@@ -157,6 +158,32 @@ trx_commit_for_mysql(
/* out: 0 or error number */
trx_t* trx); /* in: trx handle */
/**************************************************************************
+Does the transaction prepare for MySQL. */
+
+ulint
+trx_prepare_for_mysql(
+/*=================*/
+ /* out: 0 or error number */
+ trx_t* trx); /* in: trx handle */
+/**************************************************************************
+This function is used to find number of prepared transactions and
+their transaction objects for a recovery. */
+
+int
+trx_recover_for_mysql(
+/*==================*/
+ /* out: number of prepared transactions */
+ XID* xid_list, /* in/out: prepared transactions */
+ ulint len); /* in: number of slots in xid_list */
+/***********************************************************************
+This function is used to commit one X/Open XA distributed transaction
+which is in the prepared state */
+trx_t *
+trx_get_trx_by_xid(
+/*===============*/
+ /* out: trx or NULL */
+ XID* xid); /* in: X/Open XA transaction identification */
+/**************************************************************************
If required, flushes the log to disk if we called trx_commit_for_mysql()
with trx->flush_log_later == TRUE. */
@@ -339,6 +366,14 @@ struct trx_struct{
if we can use the insert buffer for
them, we set this FALSE */
dulint id; /* transaction id */
+ XID xid; /* X/Open XA transaction
+ identification to identify a
+ transaction branch */
+ ibool support_xa; /* normally we do the XA two-phase
+ commit steps, but by setting this to
+ FALSE, one can save CPU time and about
+ 150 bytes in the undo log size as then
+ we skip XA steps */
dulint no; /* transaction serialization number ==
max trx id when the transaction is
moved to COMMITTED_IN_MEMORY state */
@@ -355,12 +390,17 @@ struct trx_struct{
dulint commit_lsn; /* lsn at the time of the commit */
ibool dict_operation; /* TRUE if the trx is used to create
a table, create an index, or drop a
- table */
+ table. This is a hint that the table
+ may need to be dropped in crash
+ recovery. */
dulint table_id; /* table id if the preceding field is
TRUE */
/*------------------------------*/
- void* mysql_thd; /* MySQL thread handle corresponding
- to this trx, or NULL */
+ int active_trans; /* 1 - if a transaction in MySQL
+ is active. 2 - if prepare_commit_mutex
+ was taken */
+ void* mysql_thd; /* MySQL thread handle corresponding
+ to this trx, or NULL */
char** mysql_query_str;/* pointer to the field in mysqld_thd
which contains the pointer to the
current SQL query string */
@@ -442,9 +482,15 @@ struct trx_struct{
lock_t* auto_inc_lock; /* possible auto-inc lock reserved by
the transaction; note that it is also
in the lock list trx_locks */
+ ibool trx_create_lock;/* this is TRUE if we have created a
+ new lock for a record accessed */
ulint n_lock_table_exp;/* number of explicit table locks
(LOCK TABLES) reserved by the
transaction, stored in trx_locks */
+ ulint n_lock_table_transactional;
+ /* number of transactional table locks
+ (LOCK TABLES..WHERE ENGINE) reserved by
+ the transaction, stored in trx_locks */
UT_LIST_NODE_T(trx_t)
trx_list; /* list of transactions */
UT_LIST_NODE_T(trx_t)
@@ -560,6 +606,7 @@ struct trx_struct{
#define TRX_NOT_STARTED 1
#define TRX_ACTIVE 2
#define TRX_COMMITTED_IN_MEMORY 3
+#define TRX_PREPARED 4 /* Support for 2PC/XA */
/* Transaction execution states when trx state is TRX_ACTIVE */
#define TRX_QUE_RUNNING 1 /* transaction is running */
diff --git a/innobase/include/trx0undo.h b/innobase/include/trx0undo.h
index 20002076cc3..bd7337e4f90 100644
--- a/innobase/include/trx0undo.h
+++ b/innobase/include/trx0undo.h
@@ -14,6 +14,7 @@ Created 3/26/1996 Heikki Tuuri
#include "mtr0mtr.h"
#include "trx0sys.h"
#include "page0types.h"
+#include "trx0xa.h"
/***************************************************************************
Builds a roll pointer dulint. */
@@ -36,7 +37,7 @@ trx_undo_decode_roll_ptr(
ibool* is_insert, /* out: TRUE if insert undo log */
ulint* rseg_id, /* out: rollback segment id */
ulint* page_no, /* out: page number */
- ulint* offset); /* out: offset of the undo entry within page */
+ ulint* offset); /* out: offset of the undo entry within page */
/***************************************************************************
Returns TRUE if the roll pointer is of the insert type. */
UNIV_INLINE
@@ -239,6 +240,18 @@ trx_undo_set_state_at_finish(
trx_t* trx, /* in: transaction */
trx_undo_t* undo, /* in: undo log memory copy */
mtr_t* mtr); /* in: mtr */
+/**********************************************************************
+Sets the state of the undo log segment at a transaction prepare. */
+
+page_t*
+trx_undo_set_state_at_prepare(
+/*==========================*/
+ /* out: undo log segment header page,
+ x-latched */
+ trx_t* trx, /* in: transaction */
+ trx_undo_t* undo, /* in: undo log memory copy */
+ mtr_t* mtr); /* in: mtr */
+
/**************************************************************************
Adds the update undo log header as the first in the history list, and
frees the memory object, or puts it to the list of cached update undo log
@@ -295,7 +308,6 @@ trx_undo_parse_discard_latest(
page_t* page, /* in: page or NULL */
mtr_t* mtr); /* in: mtr or NULL */
-
/* Types of an undo log segment */
#define TRX_UNDO_INSERT 1 /* contains undo entries for inserts */
#define TRX_UNDO_UPDATE 2 /* contains undo entries for updates
@@ -310,6 +322,8 @@ trx_undo_parse_discard_latest(
#define TRX_UNDO_TO_PURGE 4 /* update undo segment will not be
reused: it can be freed in purge when
all undo data in it is removed */
+#define TRX_UNDO_PREPARED 5 /* contains an undo log of an
+ prepared transaction */
/* Transaction undo log memory object; this is protected by the undo_mutex
in the corresponding transaction object */
@@ -332,6 +346,8 @@ struct trx_undo_struct{
field */
dulint trx_id; /* id of the trx assigned to the undo
log */
+ XID xid; /* X/Open XA transaction
+ identification */
ibool dict_operation; /* TRUE if a dict operation trx */
dulint table_id; /* if a dict operation, then the table
id */
@@ -386,7 +402,8 @@ struct trx_undo_struct{
#define TRX_UNDO_PAGE_HDR_SIZE (6 + FLST_NODE_SIZE)
/* An update undo segment with just one page can be reused if it has
-< this number bytes used */
+< this number bytes used; we must leave space at least for one new undo
+log header on the page */
#define TRX_UNDO_PAGE_REUSE_LIMIT (3 * UNIV_PAGE_SIZE / 4)
@@ -436,7 +453,10 @@ page of an update undo log segment. */
log start, and therefore this is not
necessarily the same as this log
header end offset */
-#define TRX_UNDO_DICT_OPERATION 20 /* TRUE if the transaction is a table
+#define TRX_UNDO_XID_EXISTS 20 /* TRUE if undo log header includes
+ X/Open XA transaction identification
+ XID */
+#define TRX_UNDO_DICT_TRANS 21 /* TRUE if the transaction is a table
create, index create, or drop
transaction: in recovery
the transaction cannot be rolled back
@@ -452,7 +472,25 @@ page of an update undo log segment. */
#define TRX_UNDO_HISTORY_NODE 34 /* If the log is put to the history
list, the file list node is here */
/*-------------------------------------------------------------*/
-#define TRX_UNDO_LOG_HDR_SIZE (34 + FLST_NODE_SIZE)
+#define TRX_UNDO_LOG_OLD_HDR_SIZE (34 + FLST_NODE_SIZE)
+
+/* Note: the writing of the undo log old header is coded by a log record
+MLOG_UNDO_HDR_CREATE or MLOG_UNDO_HDR_REUSE. The appending of an XID to the
+header is logged separately. In this sense, the XID is not really a member
+of the undo log header. TODO: do not append the XID to the log header if XA
+is not needed by the user. The XID wastes about 150 bytes of space in every
+undo log. In the history list we may have millions of undo logs, which means
+quite a large overhead. */
+
+/* X/Open XA Transaction Identification (XID) */
+
+#define TRX_UNDO_XA_FORMAT (TRX_UNDO_LOG_OLD_HDR_SIZE)
+#define TRX_UNDO_XA_TRID_LEN (TRX_UNDO_XA_FORMAT + 4)
+#define TRX_UNDO_XA_BQUAL_LEN (TRX_UNDO_XA_TRID_LEN + 4)
+#define TRX_UNDO_XA_XID (TRX_UNDO_XA_BQUAL_LEN + 4)
+/*--------------------------------------------------------------*/
+#define TRX_UNDO_LOG_XA_HDR_SIZE (TRX_UNDO_XA_XID + XIDDATASIZE)
+ /* Total size of the header with the XA XID */
#ifndef UNIV_NONINL
#include "trx0undo.ic"
diff --git a/innobase/include/trx0xa.h b/innobase/include/trx0xa.h
new file mode 100644
index 00000000000..34b7a2f95a8
--- /dev/null
+++ b/innobase/include/trx0xa.h
@@ -0,0 +1,182 @@
+/*
+ * Start of xa.h header
+ *
+ * Define a symbol to prevent multiple inclusions of this header file
+ */
+#ifndef XA_H
+#define XA_H
+
+/*
+ * Transaction branch identification: XID and NULLXID:
+ */
+#ifndef XIDDATASIZE
+
+#define XIDDATASIZE 128 /* size in bytes */
+#define MAXGTRIDSIZE 64 /* maximum size in bytes of gtrid */
+#define MAXBQUALSIZE 64 /* maximum size in bytes of bqual */
+
+struct xid_t {
+ long formatID; /* format identifier */
+ long gtrid_length; /* value from 1 through 64 */
+ long bqual_length; /* value from 1 through 64 */
+ char data[XIDDATASIZE];
+};
+typedef struct xid_t XID;
+#endif
+/*
+ * A value of -1 in formatID means that the XID is null.
+ */
+
+
+#ifdef NOTDEFINED
+/* Let us comment this out to remove compiler errors!!!!!!!!!!!! */
+
+/*
+ * Declarations of routines by which RMs call TMs:
+ */
+extern int ax_reg __P((int, XID *, long));
+extern int ax_unreg __P((int, long));
+
+/*
+ * XA Switch Data Structure
+ */
+#define RMNAMESZ 32 /* length of resource manager name, */
+ /* including the null terminator */
+#define MAXINFOSIZE 256 /* maximum size in bytes of xa_info */
+ /* strings, including the null
+ terminator */
+
+
+struct xa_switch_t {
+ char name[RMNAMESZ]; /* name of resource manager */
+ long flags; /* resource manager specific options */
+ long version; /* must be 0 */
+ int (*xa_open_entry) /* xa_open function pointer */
+ __P((char *, int, long));
+ int (*xa_close_entry) /* xa_close function pointer */
+ __P((char *, int, long));
+ int (*xa_start_entry) /* xa_start function pointer */
+ __P((XID *, int, long));
+ int (*xa_end_entry) /* xa_end function pointer */
+ __P((XID *, int, long));
+ int (*xa_rollback_entry) /* xa_rollback function pointer */
+ __P((XID *, int, long));
+ int (*xa_prepare_entry) /* xa_prepare function pointer */
+ __P((XID *, int, long));
+ int (*xa_commit_entry) /* xa_commit function pointer */
+ __P((XID *, int, long));
+ int (*xa_recover_entry) /* xa_recover function pointer */
+ __P((XID *, long, int, long));
+ int (*xa_forget_entry) /* xa_forget function pointer */
+ __P((XID *, int, long));
+ int (*xa_complete_entry) /* xa_complete function pointer */
+ __P((int *, int *, int, long));
+};
+#endif /* NOTDEFINED */
+
+
+/*
+ * Flag definitions for the RM switch
+ */
+#define TMNOFLAGS 0x00000000L /* no resource manager features
+ selected */
+#define TMREGISTER 0x00000001L /* resource manager dynamically
+ registers */
+#define TMNOMIGRATE 0x00000002L /* resource manager does not support
+ association migration */
+#define TMUSEASYNC 0x00000004L /* resource manager supports
+ asynchronous operations */
+/*
+ * Flag definitions for xa_ and ax_ routines
+ */
+/* use TMNOFLAGGS, defined above, when not specifying other flags */
+#define TMASYNC 0x80000000L /* perform routine asynchronously */
+#define TMONEPHASE 0x40000000L /* caller is using one-phase commit
+ optimisation */
+#define TMFAIL 0x20000000L /* dissociates caller and marks
+ transaction branch rollback-only */
+#define TMNOWAIT 0x10000000L /* return if blocking condition
+ exists */
+#define TMRESUME 0x08000000L /* caller is resuming association with
+ suspended transaction branch */
+#define TMSUCCESS 0x04000000L /* dissociate caller from transaction
+ branch */
+#define TMSUSPEND 0x02000000L /* caller is suspending, not ending,
+ association */
+#define TMSTARTRSCAN 0x01000000L /* start a recovery scan */
+#define TMENDRSCAN 0x00800000L /* end a recovery scan */
+#define TMMULTIPLE 0x00400000L /* wait for any asynchronous
+ operation */
+#define TMJOIN 0x00200000L /* caller is joining existing
+ transaction branch */
+#define TMMIGRATE 0x00100000L /* caller intends to perform
+ migration */
+
+/*
+ * ax_() return codes (transaction manager reports to resource manager)
+ */
+#define TM_JOIN 2 /* caller is joining existing
+ transaction branch */
+#define TM_RESUME 1 /* caller is resuming association with
+ suspended transaction branch */
+#define TM_OK 0 /* normal execution */
+#define TMER_TMERR -1 /* an error occurred in the transaction
+ manager */
+#define TMER_INVAL -2 /* invalid arguments were given */
+#define TMER_PROTO -3 /* routine invoked in an improper
+ context */
+
+/*
+ * xa_() return codes (resource manager reports to transaction manager)
+ */
+#define XA_RBBASE 100 /* The inclusive lower bound of the
+ rollback codes */
+#define XA_RBROLLBACK XA_RBBASE /* The rollback was caused by an
+ unspecified reason */
+#define XA_RBCOMMFAIL XA_RBBASE+1 /* The rollback was caused by a
+ communication failure */
+#define XA_RBDEADLOCK XA_RBBASE+2 /* A deadlock was detected */
+#define XA_RBINTEGRITY XA_RBBASE+3 /* A condition that violates the
+ integrity of the resources was
+ detected */
+#define XA_RBOTHER XA_RBBASE+4 /* The resource manager rolled back the
+ transaction branch for a reason not
+ on this list */
+#define XA_RBPROTO XA_RBBASE+5 /* A protocol error occurred in the
+ resource manager */
+#define XA_RBTIMEOUT XA_RBBASE+6 /* A transaction branch took too long */
+#define XA_RBTRANSIENT XA_RBBASE+7 /* May retry the transaction branch */
+#define XA_RBEND XA_RBTRANSIENT /* The inclusive upper bound of the
+ rollback codes */
+#define XA_NOMIGRATE 9 /* resumption must occur where
+ suspension occurred */
+#define XA_HEURHAZ 8 /* the transaction branch may have
+ been heuristically completed */
+#define XA_HEURCOM 7 /* the transaction branch has been
+ heuristically committed */
+#define XA_HEURRB 6 /* the transaction branch has been
+ heuristically rolled back */
+#define XA_HEURMIX 5 /* the transaction branch has been
+ heuristically committed and rolled
+ back */
+#define XA_RETRY 4 /* routine returned with no effect and
+ may be re-issued */
+#define XA_RDONLY 3 /* the transaction branch was read-only
+ and has been committed */
+#define XA_OK 0 /* normal execution */
+#define XAER_ASYNC -2 /* asynchronous operation already
+ outstanding */
+#define XAER_RMERR -3 /* a resource manager error occurred in
+ the transaction branch */
+#define XAER_NOTA -4 /* the XID is not valid */
+#define XAER_INVAL -5 /* invalid arguments were given */
+#define XAER_PROTO -6 /* routine invoked in an improper
+ context */
+#define XAER_RMFAIL -7 /* resource manager unavailable */
+#define XAER_DUPID -8 /* the XID already exists */
+#define XAER_OUTSIDE -9 /* resource manager doing work outside
+ transaction */
+#endif /* ifndef XA_H */
+/*
+ * End of xa.h header
+ */
diff --git a/innobase/include/univ.i b/innobase/include/univ.i
index 625978ffc38..8158c198e21 100644
--- a/innobase/include/univ.i
+++ b/innobase/include/univ.i
@@ -88,6 +88,7 @@ memory is read outside the allocated blocks. */
#define UNIV_SEARCH_DEBUG
#define UNIV_SYNC_PERF_STAT
#define UNIV_SEARCH_PERF_STAT
+#define UNIV_SRV_PRINT_LATCH_WAITS;
*/
#define UNIV_LIGHT_MEM_DEBUG
diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h
index a62c2e2e318..22d488abeaf 100644
--- a/innobase/include/ut0byte.h
+++ b/innobase/include/ut0byte.h
@@ -208,7 +208,20 @@ ut_align_down(
/*==========*/
/* out: aligned pointer */
void* ptr, /* in: pointer */
- ulint align_no); /* in: align by this number */
+ ulint align_no) /* in: align by this number */
+ __attribute__((const));
+/*************************************************************
+The following function computes the offset of a pointer from the nearest
+aligned address. */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*==========*/
+ /* out: distance from aligned
+ pointer */
+ const void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+ __attribute__((const));
/*********************************************************************
Gets the nth bit of a ulint. */
UNIV_INLINE
diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic
index 5a70dcf12a8..e141de3aa3f 100644
--- a/innobase/include/ut0byte.ic
+++ b/innobase/include/ut0byte.ic
@@ -335,6 +335,27 @@ ut_align_down(
return((void*)((((ulint)ptr)) & ~(align_no - 1)));
}
+/*************************************************************
+The following function computes the offset of a pointer from the nearest
+aligned address. */
+UNIV_INLINE
+ulint
+ut_align_offset(
+/*============*/
+ /* out: distance from
+ aligned pointer */
+ const void* ptr, /* in: pointer */
+ ulint align_no) /* in: align by this number */
+{
+ ut_ad(align_no > 0);
+ ut_ad(((align_no - 1) & align_no) == 0);
+ ut_ad(ptr);
+
+ ut_ad(sizeof(void*) == sizeof(ulint));
+
+ return(((ulint)ptr) & (align_no - 1));
+}
+
/*********************************************************************
Gets the nth bit of a ulint. */
UNIV_INLINE
diff --git a/innobase/include/ut0ut.h b/innobase/include/ut0ut.h
index dee8785c9e7..8938957cd12 100644
--- a/innobase/include/ut0ut.h
+++ b/innobase/include/ut0ut.h
@@ -139,6 +139,14 @@ ib_time_t
ut_time(void);
/*=========*/
/**************************************************************
+Returns system time. */
+
+void
+ut_usectime(
+/*========*/
+ ulint* sec, /* out: seconds since the Epoch */
+ ulint* ms); /* out: microseconds since the Epoch+*sec */
+/**************************************************************
Returns the difference of two times in seconds. */
double