From d2c4b545405845900af52e033240040ee2ab83dd Mon Sep 17 00:00:00 2001 From: unknown Date: Thu, 2 Dec 2004 19:45:07 +0200 Subject: Many files: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/btr/btr0btr.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/btr/btr0cur.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/btr/btr0pcur.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/btr/btr0sea.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/data/data0data.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/data/data0type.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/dict/dict0boot.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/dict/dict0crea.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/dict/dict0dict.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/dict/dict0load.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/dict/dict0mem.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/fil/fil0fil.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/fsp/fsp0fsp.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/ibuf/ibuf0ibuf.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0btr.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0btr.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0cur.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0cur.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0pcur.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/btr0sea.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/data0type.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/dict0dict.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/dict0dict.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/dict0mem.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/lock0lock.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/lock0lock.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/mtr0log.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/mtr0mtr.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/page0cur.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/page0cur.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/page0page.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/page0page.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/rem0cmp.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/rem0cmp.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/rem0rec.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/rem0rec.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0row.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0row.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0upd.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0upd.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0vers.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/row0vers.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/srv0srv.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/trx0rec.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/ut0byte.h: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/include/ut0byte.ic: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/lock/lock0lock.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/log/log0recv.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/mtr/mtr0log.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/page/page0cur.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/page/page0page.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/pars/pars0pars.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/rem/rem0cmp.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/rem/rem0rec.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0ins.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0mysql.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0purge.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0row.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0sel.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0umod.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0undo.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0upd.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/row/row0vers.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/srv/srv0srv.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/trx/trx0rec.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. innobase/trx/trx0undo.c: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. sql/ha_innodb.cc: Implement more compact InnoDB record format. Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC. --- innobase/btr/btr0btr.c | 474 +++++++++++------- innobase/btr/btr0cur.c | 546 +++++++++++++-------- innobase/btr/btr0pcur.c | 52 +- innobase/btr/btr0sea.c | 166 ++++--- innobase/data/data0data.c | 9 +- innobase/data/data0type.c | 2 +- innobase/dict/dict0boot.c | 18 +- innobase/dict/dict0crea.c | 19 +- innobase/dict/dict0dict.c | 105 ++-- innobase/dict/dict0load.c | 157 +++--- innobase/dict/dict0mem.c | 9 +- innobase/fil/fil0fil.c | 28 +- innobase/fsp/fsp0fsp.c | 2 +- innobase/ibuf/ibuf0ibuf.c | 414 ++++++++++------ innobase/include/btr0btr.h | 22 +- innobase/include/btr0btr.ic | 9 +- innobase/include/btr0cur.h | 60 ++- innobase/include/btr0cur.ic | 4 +- innobase/include/btr0pcur.h | 1 + innobase/include/btr0sea.h | 13 +- innobase/include/data0type.ic | 54 ++- innobase/include/dict0dict.h | 13 + innobase/include/dict0dict.ic | 13 +- innobase/include/dict0mem.h | 12 +- innobase/include/lock0lock.h | 14 +- innobase/include/lock0lock.ic | 5 +- innobase/include/mtr0log.h | 33 ++ innobase/include/mtr0mtr.h | 26 +- innobase/include/page0cur.h | 57 ++- innobase/include/page0cur.ic | 19 +- innobase/include/page0page.h | 211 +++++--- innobase/include/page0page.ic | 133 +++++- innobase/include/rem0cmp.h | 32 +- innobase/include/rem0cmp.ic | 6 +- innobase/include/rem0rec.h | 435 +++++++++++------ innobase/include/rem0rec.ic | 898 +++++++++++++++++++++++----------- innobase/include/row0row.h | 30 +- innobase/include/row0row.ic | 49 +- innobase/include/row0upd.h | 17 +- innobase/include/row0upd.ic | 6 +- innobase/include/row0vers.h | 3 +- innobase/include/row0vers.ic | 70 --- innobase/include/srv0srv.h | 4 + innobase/include/trx0rec.h | 1 + innobase/include/ut0byte.h | 15 +- innobase/include/ut0byte.ic | 21 + innobase/lock/lock0lock.c | 289 +++++++---- innobase/log/log0recv.c | 183 ++++--- innobase/mtr/mtr0log.c | 157 ++++++ innobase/page/page0cur.c | 408 ++++++++++------ innobase/page/page0page.c | 520 ++++++++++++-------- innobase/pars/pars0pars.c | 7 +- innobase/rem/rem0cmp.c | 127 +++-- innobase/rem/rem0rec.c | 1063 +++++++++++++++++++++++++++++++++++++---- innobase/row/row0ins.c | 213 ++++++--- innobase/row/row0mysql.c | 14 +- innobase/row/row0purge.c | 12 +- innobase/row/row0row.c | 103 ++-- innobase/row/row0sel.c | 337 ++++++++----- innobase/row/row0umod.c | 6 +- innobase/row/row0undo.c | 9 +- innobase/row/row0upd.c | 128 +++-- innobase/row/row0vers.c | 106 ++-- innobase/srv/srv0srv.c | 27 ++ innobase/trx/trx0rec.c | 104 ++-- innobase/trx/trx0undo.c | 7 +- 66 files changed, 5631 insertions(+), 2476 deletions(-) (limited to 'innobase') diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index ae967e0525e..06602c856fa 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -86,15 +86,6 @@ btr_page_create( page_t* page, /* in: page to be created */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr); /* in: mtr */ /**************************************************************** Returns the upper level node pointer to a page. It is assumed that mtr holds an x-latch on the tree. */ @@ -128,7 +119,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple should be first */ - dtuple_t* tuple); /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap); /* in: temporary memory heap */ /****************************************************************** Gets the root node of a tree and x-latches it. */ @@ -143,11 +137,13 @@ btr_root_get( ulint space; ulint root_page_no; page_t* root; + ibool comp = UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp; space = dict_tree_get_space(tree); root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(root) == comp); return(root); } @@ -194,6 +190,7 @@ btr_get_prev_user_rec( MTR_MEMO_PAGE_S_FIX)) || (mtr_memo_contains(mtr, buf_block_align(prev_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page)); @@ -246,6 +243,7 @@ btr_get_next_user_rec( || (mtr_memo_contains(mtr, buf_block_align(next_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(next_page) == page_is_comp(page)); next_rec = page_rec_get_next(page_get_infimum_rec(next_page)); return(next_rec); @@ -267,7 +265,8 @@ btr_page_create( { ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - page_create(page, mtr); + page_create(page, mtr, + UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; btr_page_set_index_id(page, tree->id, mtr); @@ -503,20 +502,21 @@ UNIV_INLINE void btr_node_ptr_set_child_page_no( /*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint page_no,/* in: child node address */ + mtr_t* mtr) /* in: mtr */ { - ulint n_fields; byte* field; ulint len; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); @@ -529,16 +529,18 @@ static page_t* btr_node_ptr_get_child( /*===================*/ - /* out: child page, x-latched */ - rec_t* node_ptr, /* in: node pointer */ - mtr_t* mtr) /* in: mtr */ + /* out: child page, x-latched */ + rec_t* node_ptr,/* in: node pointer */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + mtr_t* mtr) /* in: mtr */ { ulint page_no; ulint space; page_t* page; - + + ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); space = buf_frame_get_space_id(node_ptr); - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); page = btr_page_get(space, page_no, RW_X_LATCH, mtr); @@ -564,6 +566,8 @@ btr_page_get_father_for_rec( dtuple_t* tuple; btr_cur_t cursor; rec_t* node_ptr; + dict_index_t* index; + ulint* offsets; ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -576,18 +580,20 @@ btr_page_get_father_for_rec( tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap, btr_page_get_level(page, mtr)); + index = UT_LIST_GET_FIRST(tree->tree_indexes); /* In the following, we choose just any index from the tree as the first parameter for btr_cur_search_to_nth_level. */ - - btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes), + + btr_cur_search_to_nth_level(index, btr_page_get_level(page, mtr) + 1, tuple, PAGE_CUR_LE, BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); node_ptr = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets(node_ptr, index, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page)) { fputs("InnoDB: Dump of the child page:\n", stderr); buf_page_print(buf_frame_align(page)); @@ -595,17 +601,22 @@ btr_page_get_father_for_rec( buf_page_print(buf_frame_align(node_ptr)); fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->table_name); + ut_print_name(stderr, NULL, index->table_name); fputs(", index ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->name); + ut_print_name(stderr, NULL, index->name); fprintf(stderr, ",\n" "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) btr_node_ptr_get_child_page_no(node_ptr), + (ulong) + btr_node_ptr_get_child_page_no(node_ptr, offsets), (ulong) buf_frame_get_page_no(page)); - page_rec_print(page_rec_get_next(page_get_infimum_rec(page))); - page_rec_print(node_ptr); + offsets = rec_reget_offsets(page_rec_get_next( + page_get_infimum_rec(page)), index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(page_rec_get_next(page_get_infimum_rec(page)), + offsets); + offsets = rec_reget_offsets(node_ptr, index, offsets, + ULINT_UNDEFINED, heap); + page_rec_print(node_ptr, offsets); fputs( "InnoDB: You should dump + drop + reimport the table to fix the\n" @@ -614,7 +625,7 @@ btr_page_get_father_for_rec( "InnoDB: forcing recovery. Then dump + drop + reimport.\n", stderr); } - ut_a(btr_node_ptr_get_child_page_no(node_ptr) == + ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) == buf_frame_get_page_no(page)); mem_heap_free(heap); @@ -649,6 +660,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mini-transaction handle */ { ulint page_no; @@ -716,7 +728,7 @@ btr_create( } /* Create a new index page on the the allocated segment page */ - page = page_create(frame, mtr); + page = page_create(frame, mtr, comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Set the index id of the page */ @@ -821,12 +833,14 @@ static void btr_page_reorganize_low( /*====================*/ - ibool recovery,/* in: TRUE if called in recovery: locks should not - be updated, i.e., there cannot exist locks on the - page, and a hash index should not be dropped: it - cannot exist */ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + ibool recovery,/* in: TRUE if called in recovery: + locks should not be updated, i.e., + there cannot exist locks on the + page, and a hash index should not be + dropped: it cannot exist */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_t* new_page; ulint log_mode; @@ -841,7 +855,9 @@ btr_page_reorganize_low( max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); /* Write the log record */ - mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr); + mlog_open_and_write_index(mtr, page, index, index->table->comp + ? MLOG_COMP_PAGE_REORGANIZE + : MLOG_PAGE_REORGANIZE, 0); /* Turn logging off */ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); @@ -858,14 +874,14 @@ btr_page_reorganize_low( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, index->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ page_copy_rec_list_end_no_locks(page, new_page, - page_get_infimum_rec(new_page), mtr); + page_get_infimum_rec(new_page), index, mtr); /* Copy max trx id to recreated page */ page_set_max_trx_id(page, page_get_max_trx_id(new_page)); @@ -901,10 +917,11 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - btr_page_reorganize_low(FALSE, page, mtr); + btr_page_reorganize_low(FALSE, page, index, mtr); } /*************************************************************** @@ -913,18 +930,20 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr __attribute__((unused)), + /* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ut_ad(ptr && end_ptr); /* The record is empty, except for the record initial part */ if (page) { - btr_page_reorganize_low(TRUE, page, mtr); + btr_page_reorganize_low(TRUE, page, index, mtr); } return(ptr); @@ -946,7 +965,7 @@ btr_page_empty( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, page_is_comp(page)); buf_block_align(page)->check_index_page_at_flush = TRUE; } @@ -1011,7 +1030,7 @@ btr_root_raise_and_insert( /* Move the records from root to the new page */ page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), - mtr); + cursor->index, mtr); /* If this is a pessimistic insert which is actually done to perform a pessimistic update then we have stored the lock information of the record to be inserted on the infimum of the @@ -1031,7 +1050,7 @@ btr_root_raise_and_insert( node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap, level); /* Reorganize the root to get free space */ - btr_page_reorganize(root, mtr); + btr_page_reorganize(root, cursor->index, mtr); page_cursor = btr_cur_get_page_cur(cursor); @@ -1039,7 +1058,8 @@ btr_root_raise_and_insert( page_cur_set_before_first(root, page_cursor); - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, mtr); + node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, + cursor->index, mtr); ut_ad(node_ptr_rec); @@ -1047,7 +1067,7 @@ btr_root_raise_and_insert( as there is no lower alphabetical limit to records in the leftmost node of a level: */ - btr_set_min_rec_mark(node_ptr_rec, mtr); + btr_set_min_rec_mark(node_ptr_rec, cursor->index->table->comp, mtr); /* Free the memory heap */ mem_heap_free(heap); @@ -1060,7 +1080,8 @@ btr_root_raise_and_insert( ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), new_page); /* Reposition the cursor to the child node */ - page_cur_search(new_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(new_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); /* Split the child and insert tuple */ return(btr_page_split_and_insert(cursor, tuple, mtr)); @@ -1190,11 +1211,13 @@ btr_page_get_sure_split_rec( rec_t* rec; rec_t* next_rec; ulint n; - + mem_heap_t* heap; + ulint* offsets; + page = btr_cur_get_page(cursor); - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1208,6 +1231,9 @@ btr_page_get_sure_split_rec( ins_rec = btr_cur_get_rec(cursor); rec = page_get_infimum_rec(page); + heap = mem_heap_create(100); + offsets = NULL; + /* We start to include records to the left half, and when the space reserved by them exceeds half of total_space, then if the included records fit on the left page, they will be put there @@ -1230,7 +1256,9 @@ btr_page_get_sure_split_rec( /* Include tuple */ incl_data += insert_size; } else { - incl_data += rec_get_size(rec); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, ULINT_UNDEFINED, heap); + incl_data += rec_offs_size(offsets); } n++; @@ -1252,11 +1280,12 @@ btr_page_get_sure_split_rec( next_rec = page_rec_get_next(rec); } if (next_rec != page_get_supremum_rec(page)) { - + mem_heap_free(heap); return(next_rec); } } + mem_heap_free(heap); return(rec); } } @@ -1275,7 +1304,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple to be inserted should be first */ - dtuple_t* tuple) /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap) /* in: temporary memory heap */ { page_t* page; ulint insert_size; @@ -1284,11 +1316,19 @@ btr_page_insert_fits( ulint total_n_recs; rec_t* rec; rec_t* end_rec; + ulint* offs; page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + + ut_ad(!split_rec == !offsets); + ut_ad(!offsets + || cursor->index->table->comp == rec_offs_comp(offsets)); + ut_ad(!offsets + || rec_offs_validate(split_rec, cursor->index, offsets)); + ut_ad(page_is_comp(page) == cursor->index->table->comp); + + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1303,7 +1343,7 @@ btr_page_insert_fits( rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - } else if (cmp_dtuple_rec(tuple, split_rec) >= 0) { + } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = split_rec; @@ -1321,11 +1361,16 @@ btr_page_insert_fits( return(TRUE); } + offs = NULL; + while (rec != end_rec) { /* In this loop we calculate the amount of reserved space after rec is removed from page. */ - total_data -= rec_get_size(rec); + offs = rec_reget_offsets(rec, cursor->index, offs, + ULINT_UNDEFINED, heap); + + total_data -= rec_offs_size(offs); total_n_recs--; if (total_data + page_dir_calc_reserved_space(total_n_recs) @@ -1411,6 +1456,10 @@ btr_attach_half_pages( MTR_MEMO_PAGE_X_FIX)); ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), MTR_MEMO_PAGE_X_FIX)); + ut_a(page_is_comp(page) == page_is_comp(new_page)); + + /* Create a memory heap where the data tuple is stored */ + heap = mem_heap_create(100); /* Based on split direction, decide upper and lower pages */ if (direction == FSP_DOWN) { @@ -1426,7 +1475,12 @@ btr_attach_half_pages( /* Replace the address of the old child node (= page) with the address of the new lower half */ - btr_node_ptr_set_child_page_no(node_ptr, lower_page_no, mtr); + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, + UT_LIST_GET_FIRST(tree->tree_indexes), + ULINT_UNDEFINED, heap), + lower_page_no, mtr); + mem_heap_empty(heap); } else { lower_page_no = buf_frame_get_page_no(page); upper_page_no = buf_frame_get_page_no(new_page); @@ -1434,9 +1488,6 @@ btr_attach_half_pages( upper_page = new_page; } - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(100); - /* Get the level of the split pages */ level = btr_page_get_level(page, mtr); @@ -1465,6 +1516,7 @@ btr_attach_half_pages( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, lower_page_no, mtr); } @@ -1472,6 +1524,7 @@ btr_attach_half_pages( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, upper_page_no, mtr); } @@ -1522,7 +1575,15 @@ btr_page_split_and_insert( ibool insert_will_fit; ulint n_iterations = 0; rec_t* rec; + mem_heap_t* heap; + ulint n_uniq; + ulint* offsets; + + heap = mem_heap_create(1024); + n_uniq = dict_index_get_n_unique_in_tree(cursor->index); func_start: + mem_heap_empty(heap); + offsets = NULL; tree = btr_cur_get_tree(cursor); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), @@ -1574,9 +1635,10 @@ func_start: first_rec = split_rec; move_limit = split_rec; } else { - buf = mem_alloc(rec_get_converted_size(tuple)); + buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); - first_rec = rec_convert_dtuple_to_rec(buf, tuple); + first_rec = rec_convert_dtuple_to_rec(buf, + cursor->index, tuple); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } @@ -1593,7 +1655,16 @@ func_start: We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - insert_will_fit = btr_page_insert_fits(cursor, split_rec, tuple); + if (split_rec) { + offsets = rec_reget_offsets(split_rec, cursor->index, + offsets, n_uniq, heap); + + insert_will_fit = btr_page_insert_fits(cursor, + split_rec, offsets, tuple, heap); + } else { + insert_will_fit = btr_page_insert_fits(cursor, + NULL, NULL, tuple, heap); + } if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { @@ -1605,7 +1676,8 @@ func_start: if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ - page_move_rec_list_start(new_page, page, move_limit, mtr); + page_move_rec_list_start(new_page, page, move_limit, + cursor->index, mtr); left_page = new_page; right_page = page; @@ -1613,7 +1685,8 @@ func_start: } else { /* fputs("Split right\n", stderr); */ - page_move_rec_list_end(new_page, page, move_limit, mtr); + page_move_rec_list_end(new_page, page, move_limit, + cursor->index, mtr); left_page = page; right_page = new_page; @@ -1626,19 +1699,25 @@ func_start: if (split_rec == NULL) { insert_page = right_page; - } else if (cmp_dtuple_rec(tuple, first_rec) >= 0) { - - insert_page = right_page; } else { - insert_page = left_page; + offsets = rec_reget_offsets(first_rec, cursor->index, + offsets, n_uniq, heap); + + if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { + + insert_page = right_page; + } else { + insert_page = left_page; + } } /* 7. Reposition the cursor for insert and try insertion */ page_cursor = btr_cur_get_page_cur(cursor); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec != NULL) { /* Insert fit on the page: update the free bits for the @@ -1650,15 +1729,17 @@ func_start: /* fprintf(stderr, "Split and insert done %lu %lu\n", buf_frame_get_page_no(left_page), buf_frame_get_page_no(right_page)); */ + mem_heap_free(heap); return(rec); } /* 8. If insert did not fit, try page reorganization */ - btr_page_reorganize(insert_page, mtr); + btr_page_reorganize(insert_page, cursor->index, mtr); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec == NULL) { /* The insert did not fit on the page: loop back to the @@ -1688,6 +1769,7 @@ func_start: ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes))); ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes))); + mem_heap_free(heap); return(rec); } @@ -1721,6 +1803,7 @@ btr_level_list_remove( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, next_page_no, mtr); } @@ -1728,6 +1811,7 @@ btr_level_list_remove( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, prev_page_no, mtr); } @@ -1741,9 +1825,11 @@ void btr_set_min_rec_mark_log( /*=====================*/ rec_t* rec, /* in: record */ + ibool comp, /* TRUE=compact record format */ mtr_t* mtr) /* in: mtr */ { - mlog_write_initial_log_record(rec, MLOG_REC_MIN_MARK, mtr); + mlog_write_initial_log_record(rec, + comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); /* Write rec offset as a 2-byte ulint */ mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES); @@ -1759,6 +1845,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr) /* in: mtr or NULL */ { @@ -1772,7 +1859,7 @@ btr_parse_set_min_rec_mark( if (page) { rec = page + mach_read_from_2(ptr); - btr_set_min_rec_mark(rec, mtr); + btr_set_min_rec_mark(rec, comp, mtr); } return(ptr + 2); @@ -1785,15 +1872,16 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mtr */ { ulint info_bits; - info_bits = rec_get_info_bits(rec); + info_bits = rec_get_info_bits(rec, comp); - rec_set_info_bits(rec, info_bits | REC_INFO_MIN_REC_FLAG); + rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); - btr_set_min_rec_mark_log(rec, mtr); + btr_set_min_rec_mark_log(rec, comp, mtr); } /***************************************************************** @@ -1842,18 +1930,19 @@ btr_lift_page_up( record from the page should be removed */ mtr_t* mtr) /* in: mtr */ { - rec_t* node_ptr; - page_t* father_page; - ulint page_level; - + page_t* father_page; + ulint page_level; + dict_index_t* index; + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); - father_page = buf_frame_align(node_ptr); + father_page = buf_frame_align( + btr_page_get_father_node_ptr(tree, page, mtr)); page_level = btr_page_get_level(page, mtr); + index = UT_LIST_GET_FIRST(tree->tree_indexes); btr_search_drop_page_hash_index(page); @@ -1862,7 +1951,7 @@ btr_lift_page_up( /* Move records to the father */ page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), - mtr); + index, mtr); lock_update_copy_and_discard(father_page, page); btr_page_set_level(father_page, page_level, mtr); @@ -1871,10 +1960,8 @@ btr_lift_page_up( btr_page_free(tree, page, mtr); /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), - father_page); - ut_ad(page_validate(father_page, - UT_LIST_GET_FIRST(tree->tree_indexes))); + ibuf_reset_free_bits(index, father_page); + ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(tree, father_page, mtr)); } @@ -1914,9 +2001,11 @@ btr_compress( ulint max_ins_size; ulint max_ins_size_reorg; ulint level; - + ibool comp = cursor->index->table->comp; + page = btr_cur_get_page(cursor); tree = btr_cur_get_tree(cursor); + ut_a(comp == page_is_comp(page)); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -1932,7 +2021,9 @@ btr_compress( right_page_no); */ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); + ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); father_page = buf_frame_align(node_ptr); + ut_a(comp == page_is_comp(father_page)); /* Decide the page to which we try to merge and which will inherit the locks */ @@ -1957,6 +2048,7 @@ btr_compress( n_recs = page_get_n_recs(page); data_size = page_get_data_size(page); + ut_a(page_is_comp(merge_page) == page_is_comp(page)); max_ins_size_reorg = page_get_max_insert_size_after_reorganize( merge_page, n_recs); @@ -1975,7 +2067,7 @@ btr_compress( /* We have to reorganize merge_page */ - btr_page_reorganize(merge_page, mtr); + btr_page_reorganize(merge_page, cursor->index, mtr); max_ins_size = page_get_max_insert_size(merge_page, n_recs); @@ -1999,11 +2091,14 @@ btr_compress( if (is_left) { btr_node_ptr_delete(tree, page, mtr); } else { + mem_heap_t* heap = mem_heap_create(100); /* Replace the address of the old child node (= page) with the address of the merge page to the right */ - btr_node_ptr_set_child_page_no(node_ptr, right_page_no, mtr); - + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, cursor->index, + ULINT_UNDEFINED, heap), right_page_no, mtr); + mem_heap_free(heap); btr_node_ptr_delete(tree, merge_page, mtr); } @@ -2012,14 +2107,14 @@ btr_compress( orig_pred = page_rec_get_prev( page_get_supremum_rec(merge_page)); page_copy_rec_list_start(merge_page, page, - page_get_supremum_rec(page), mtr); + page_get_supremum_rec(page), cursor->index, mtr); lock_update_merge_left(merge_page, orig_pred, page); } else { orig_succ = page_rec_get_next( page_get_infimum_rec(merge_page)); page_copy_rec_list_end(merge_page, page, - page_get_infimum_rec(page), mtr); + page_get_infimum_rec(page), cursor->index, mtr); lock_update_merge_right(orig_succ, page); } @@ -2133,6 +2228,7 @@ btr_discard_page( return; } + ut_a(page_is_comp(merge_page) == page_is_comp(page)); btr_search_drop_page_hash_index(page); if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { @@ -2144,7 +2240,8 @@ btr_discard_page( ut_ad(node_ptr != page_get_supremum_rec(merge_page)); - btr_set_min_rec_mark(node_ptr, mtr); + btr_set_min_rec_mark(node_ptr, + cursor->index->table->comp, mtr); } btr_node_ptr_delete(tree, page, mtr); @@ -2215,6 +2312,8 @@ btr_print_recursive( page_t* page, /* in: index page */ ulint width, /* in: print this many entries from start and end */ + mem_heap_t* heap, /* in: heap for rec_reget_offsets() */ + ulint** offsets,/* in/out: buffer for rec_reget_offsets() */ mtr_t* mtr) /* in: mtr */ { page_cur_t cursor; @@ -2223,14 +2322,16 @@ btr_print_recursive( mtr_t mtr2; rec_t* node_ptr; page_t* child; - + dict_index_t* index; + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", (ulong) btr_page_get_level(page, mtr), (ulong) buf_frame_get_page_no(page)); - page_print(page, width, width); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + page_print(page, index, width, width); n_recs = page_get_n_recs(page); @@ -2249,15 +2350,20 @@ btr_print_recursive( node_ptr = page_cur_get_rec(&cursor); - child = btr_node_ptr_get_child(node_ptr, &mtr2); - - btr_print_recursive(tree, child, width, &mtr2); + *offsets = rec_reget_offsets(node_ptr, index, + *offsets, ULINT_UNDEFINED, heap); + child = btr_node_ptr_get_child(node_ptr, + *offsets, &mtr2); + btr_print_recursive(tree, child, width, + heap, offsets, &mtr2); mtr_commit(&mtr2); } page_cur_move_to_next(&cursor); i++; } + + mem_heap_free(heap); } /****************************************************************** @@ -2270,8 +2376,10 @@ btr_print_tree( ulint width) /* in: print this many entries from start and end */ { - mtr_t mtr; - page_t* root; + mtr_t mtr; + page_t* root; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; fputs("--------------------------\n" "INDEX TREE PRINT\n", stderr); @@ -2280,7 +2388,8 @@ btr_print_tree( root = btr_root_get(tree, &mtr); - btr_print_recursive(tree, root, width, &mtr); + btr_print_recursive(tree, root, width, heap, &offsets, &mtr); + mem_heap_free(heap); mtr_commit(&mtr); @@ -2323,7 +2432,10 @@ btr_check_node_ptr( page_rec_get_next(page_get_infimum_rec(page)), 0, heap, btr_page_get_level(page, mtr)); - ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0); + ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr, + rec_get_offsets(node_ptr, + dict_tree_find_index(tree, node_ptr), + ULINT_UNDEFINED, heap)) == 0); mem_heap_free(heap); @@ -2360,10 +2472,12 @@ btr_index_rec_validate( should print hex dump of record and page on error */ { - ulint len; - ulint n; - ulint i; - page_t* page; + ulint len; + ulint n; + ulint i; + page_t* page; + mem_heap_t* heap; + ulint* offsets; page = buf_frame_align(rec); @@ -2377,10 +2491,10 @@ btr_index_rec_validate( n = dict_index_get_n_fields(index); - if (rec_get_n_fields(rec) != n) { + if (!index->table->comp && rec_get_n_fields_old(rec) != n) { btr_index_rec_validate_report(page, rec, index); fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields(rec), (ulong) n); + (ulong) rec_get_n_fields_old(rec), (ulong) n); if (!dump_on_error) { @@ -2390,16 +2504,19 @@ btr_index_rec_validate( buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print_old(stderr, rec); putc('\n', stderr); return(FALSE); } + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + for (i = 0; i < n; i++) { dtype_t* type = dict_index_get_nth_type(index, i); - rec_get_nth_field(rec, i, &len); + rec_get_nth_field(rec, offsets, i, &len); /* Note that prefix indexes are not fixed size even when their type is CHAR. */ @@ -2419,20 +2536,22 @@ btr_index_rec_validate( (ulong) i, (ulong) len, (ulong) dtype_get_fixed_size(type)); if (!dump_on_error) { - + mem_heap_free(heap); return(FALSE); } buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); + mem_heap_free(heap); return(FALSE); } } + mem_heap_free(heap); return(TRUE); } @@ -2527,15 +2646,18 @@ btr_validate_level( page_t* right_father_page; rec_t* node_ptr; rec_t* right_node_ptr; + rec_t* rec; ulint right_page_no; ulint left_page_no; page_cur_t cursor; - mem_heap_t* heap; dtuple_t* node_ptr_tuple; ibool ret = TRUE; dict_index_t* index; mtr_t mtr; - + mem_heap_t* heap = mem_heap_create(256); + ulint* offsets = NULL; + ulint* offsets2= NULL; + mtr_start(&mtr); mtr_x_lock(dict_tree_get_lock(tree), &mtr); @@ -2544,6 +2666,8 @@ btr_validate_level( space = buf_frame_get_space_id(page); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + while (level != btr_page_get_level(page, &mtr)) { ut_a(btr_page_get_level(page, &mtr) > 0); @@ -2552,14 +2676,16 @@ btr_validate_level( page_cur_move_to_next(&cursor); node_ptr = page_cur_get_rec(&cursor); - page = btr_node_ptr_get_child(node_ptr, &mtr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); + page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); } - index = UT_LIST_GET_FIRST(tree->tree_indexes); - /* Now we are on the desired level. Loop through the pages on that level. */ loop: + mem_heap_empty(heap); + offsets = offsets2 = NULL; mtr_x_lock(dict_tree_get_lock(tree), &mtr); /* Check ordering etc. of records */ @@ -2588,12 +2714,20 @@ loop: (buf_frame_get_page_no(page) == dict_tree_get_page(tree)))); if (right_page_no != FIL_NULL) { - + rec_t* right_rec; right_page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); - if (cmp_rec_rec(page_rec_get_prev(page_get_supremum_rec(page)), - page_rec_get_next(page_get_infimum_rec(right_page)), - UT_LIST_GET_FIRST(tree->tree_indexes)) >= 0) { + ut_a(page_is_comp(right_page) == page_is_comp(page)); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + right_rec = page_rec_get_next( + page_get_infimum_rec(right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(right_rec, index, + offsets2, ULINT_UNDEFINED, heap); + if (cmp_rec_rec(rec, right_rec, offsets, offsets2, + dict_index_get_n_fields(index), + index) >= 0) { btr_validate_report2(index, level, page, right_page); @@ -2604,12 +2738,17 @@ loop: buf_page_print(right_page); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_prev( - page_get_supremum_rec(page))); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(right_page))); + rec = page_rec_get_next(page_get_infimum_rec( + right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2618,7 +2757,8 @@ loop: if (level > 0 && left_page_no == FIL_NULL) { ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)))); + page_rec_get_next(page_get_infimum_rec(page)), + index->table->comp)); } if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) { @@ -2627,12 +2767,14 @@ loop: node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr); father_page = buf_frame_align(node_ptr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page) || node_ptr != btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)) { + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr)) { btr_validate_report1(index, level, page); fputs("InnoDB: node pointer to the page is wrong\n", @@ -2642,17 +2784,20 @@ loop: buf_page_print(page); fputs("InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fprintf(stderr, "\n" "InnoDB: node ptr child page n:o %lu\n", - (unsigned long) btr_node_ptr_get_child_page_no(node_ptr)); + (unsigned long) btr_node_ptr_get_child_page_no( + node_ptr, offsets)); fputs("InnoDB: record on page ", stderr); - rec_print(stderr, - btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)); + rec = btr_page_get_father_for_rec(tree, page, + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2660,7 +2805,8 @@ loop: } if (btr_page_get_level(page, &mtr) > 0) { - heap = mem_heap_create(256); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); node_ptr_tuple = dict_tree_build_node_ptr( tree, @@ -2669,7 +2815,10 @@ loop: 0, heap, btr_page_get_level(page, &mtr)); - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr) != 0) { + if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, + offsets)) { + rec_t* first_rec = page_rec_get_next( + page_get_infimum_rec(page)); btr_validate_report1(index, level, page); @@ -2679,18 +2828,16 @@ loop: fputs("InnoDB: Error: node ptrs differ" " on levels > 0\n" "InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fputs("InnoDB: first rec ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(page))); + offsets = rec_reget_offsets(first_rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, first_rec, offsets); putc('\n', stderr); ret = FALSE; - mem_heap_free(heap); goto node_ptr_fails; } - - mem_heap_free(heap); } if (left_page_no == FIL_NULL) { @@ -2701,7 +2848,7 @@ loop: if (right_page_no == FIL_NULL) { ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); + page_get_supremum_rec(father_page))); ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); } @@ -2771,13 +2918,16 @@ node_ptr_fails: mtr_commit(&mtr); if (right_page_no != FIL_NULL) { + ibool comp = page_is_comp(page); mtr_start(&mtr); page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); + ut_a(page_is_comp(page) == comp); goto loop; } + mem_heap_free(heap); return(ret); } diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 48de5644908..f5e146172ed 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -73,8 +73,9 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*********************************************************************** Adds path information to the cursor for the current page, for which the binary search has been performed. */ @@ -96,6 +97,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -108,9 +110,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec); /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*==================== B-TREE SEARCH =========================*/ @@ -137,11 +140,13 @@ btr_cur_latch_leaves( if (latch_mode == BTR_SEARCH_LEAF) { get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_LEAF) { get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_TREE) { @@ -152,11 +157,13 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { get_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; right_page_no = btr_page_get_next(page, mtr); @@ -176,11 +183,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_PREV) { @@ -191,11 +201,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else { ut_error; @@ -261,6 +274,8 @@ btr_cur_search_to_nth_level( #ifdef BTR_CUR_ADAPT btr_search_t* info; #endif + mem_heap_t* heap; + ulint* offsets; /* Currently, PAGE_CUR_LE is the only search mode used for searches ending to upper levels */ @@ -379,7 +394,9 @@ btr_cur_search_to_nth_level( page_mode = mode; break; } - + + heap = mem_heap_create(100); + offsets = NULL; /* Loop and search until we arrive at the desired level */ for (;;) { @@ -414,7 +431,7 @@ retry_page_get: cursor->thr)) { /* Insertion to the insert buffer succeeded */ cursor->flag = BTR_CUR_INSERT_TO_IBUF; - + mem_heap_free(heap); return; } @@ -470,9 +487,9 @@ retry_page_get: page_mode = mode; } - page_cur_search_with_match(page, tuple, page_mode, &up_match, - &up_bytes, &low_match, &low_bytes, - page_cursor); + page_cur_search_with_match(page, index, tuple, page_mode, + &up_match, &up_bytes, + &low_match, &low_bytes, page_cursor); if (estimate) { btr_cur_add_path_info(cursor, height, root_height); } @@ -486,7 +503,9 @@ retry_page_get: if (level > 0) { /* x-latch the page */ - btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(btr_page_get(space, + page_no, RW_X_LATCH, mtr)) + == index->table->comp); } break; @@ -498,11 +517,14 @@ retry_page_get: guess = NULL; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + mem_heap_free(heap); + if (level == 0) { cursor->low_match = low_match; cursor->low_bytes = low_bytes; @@ -552,6 +574,8 @@ btr_cur_open_at_index_side( rec_t* node_ptr; ulint estimate; ulint savepoint; + mem_heap_t* heap; + ulint* offsets = NULL; estimate = latch_mode & BTR_ESTIMATE; latch_mode = latch_mode & ~BTR_ESTIMATE; @@ -576,7 +600,8 @@ btr_cur_open_at_index_side( page_no = dict_tree_get_page(tree); height = ULINT_UNDEFINED; - + heap = mem_heap_create(100); + for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, @@ -645,10 +670,13 @@ btr_cur_open_at_index_side( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /************************************************************************** @@ -669,6 +697,8 @@ btr_cur_open_at_rnd_pos( ulint space; ulint height; rec_t* node_ptr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; tree = index->tree; @@ -717,10 +747,13 @@ btr_cur_open_at_rnd_pos( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /*==================== B-TREE INSERT =========================*/ @@ -758,18 +791,20 @@ btr_cur_insert_if_possible( page_cursor = btr_cur_get_page_cur(cursor); /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (!rec) { /* If record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, cursor->index, mtr); *reorg = TRUE; - page_cur_search(page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, + cursor->index, mtr); } return(rec); @@ -887,8 +922,6 @@ btr_cur_optimistic_insert( ibool reorg; ibool inherit; ulint rec_size; - ulint data_size; - ulint extra_size; ulint type; ulint err; @@ -914,13 +947,11 @@ btr_cur_optimistic_insert( calculate_sizes_again: /* Calculate the record size when entry is converted to a record */ - data_size = dtuple_get_data_size(entry); - extra_size = rec_get_converted_extra_size(data_size, - dtuple_get_n_fields(entry)); - rec_size = data_size + extra_size; + rec_size = rec_get_converted_size(index, entry); - if ((rec_size >= page_get_free_space_of_empty() / 2) - || (rec_size >= REC_MAX_DATA_SIZE)) { + if (rec_size >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -983,19 +1014,18 @@ calculate_sizes_again: /* Now, try the insert */ - *rec = page_cur_insert_rec_low(page_cursor, entry, data_size, - NULL, mtr); + *rec = page_cur_insert_rec_low(page_cursor, entry, index, NULL, mtr); if (!(*rec)) { /* If the record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, index, mtr); ut_ad(page_get_max_insert_size(page, 1) == max_size); reorg = TRUE; - page_cur_search(page, entry, PAGE_CUR_LE, page_cursor); + page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor); - *rec = page_cur_tuple_insert(page_cursor, entry, mtr); + *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr); if (!*rec) { fputs("InnoDB: Error: cannot insert tuple ", stderr); @@ -1123,9 +1153,9 @@ btr_cur_pessimistic_insert( } } - if ((rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE)) { + if (rec_get_converted_size(index, entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -1212,8 +1242,11 @@ btr_cur_upd_lock_and_undo( err = DB_SUCCESS; if (!(flags & BTR_NO_LOCKING_FLAG)) { + mem_heap_t* heap = mem_heap_create(100); err = lock_clust_rec_modify_check_and_lock(flags, rec, index, - thr); + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap), + thr); + mem_heap_free(heap); if (err != DB_SUCCESS) { return(err); @@ -1243,14 +1276,17 @@ btr_cur_update_in_place_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); - - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_UPDATE_IN_PLACE, log_ptr, mtr); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_UPDATE_IN_PLACE + : MLOG_REC_UPDATE_IN_PLACE, + 1 + DATA_ROLL_PTR_LEN + 14 + 2 + MLOG_BUF_MARGIN); - mach_write_to_1(log_ptr, flags); - log_ptr++; + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } /* The code below assumes index is a clustered index: change index to the clustered index if we are updating a secondary index record (or we @@ -1259,6 +1295,9 @@ btr_cur_update_in_place_log( index = dict_table_get_first_index(index->table); + mach_write_to_1(log_ptr, flags); + log_ptr++; + log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, mtr); mach_write_to_2(log_ptr, rec - buf_frame_align(rec)); @@ -1273,10 +1312,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index) /* in: index corresponding to page */ { ulint flags; rec_t* rec; @@ -1286,6 +1326,7 @@ btr_cur_parse_update_in_place( dulint roll_ptr; ulint rec_offset; mem_heap_t* heap; + ulint* offsets; if (end_ptr < ptr + 1) { @@ -1333,11 +1374,14 @@ btr_cur_parse_update_in_place( /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, roll_ptr); + row_upd_rec_sys_fields_in_recovery(rec, offsets, + pos, trx_id, roll_ptr); } - row_upd_rec_in_place(rec, update); + row_upd_rec_in_place(rec, offsets, update); mem_heap_free(heap); @@ -1369,14 +1413,18 @@ btr_cur_update_in_place( dulint roll_ptr = ut_dulint_zero; trx_t* trx; ibool was_delete_marked; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; trx = thr_get_trx(thr); - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(trx, index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } /* Do lock checking and undo logging */ @@ -1384,6 +1432,7 @@ btr_cur_update_in_place( thr, &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -1405,15 +1454,15 @@ btr_cur_update_in_place( } if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } /* FIXME: in a mixed tree, all records may not have enough ordering fields for btr search: */ - was_delete_marked = rec_get_deleted_flag(rec); - - row_upd_rec_in_place(rec, update); + was_delete_marked = rec_get_deleted_flag(rec, index->table->comp); + + row_upd_rec_in_place(rec, offsets, update); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); @@ -1421,13 +1470,14 @@ btr_cur_update_in_place( btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, mtr); - if (was_delete_marked && !rec_get_deleted_flag(rec)) { + if (was_delete_marked && !rec_get_deleted_flag(rec, index->table->comp)) { /* The new updated record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1469,24 +1519,28 @@ btr_cur_optimistic_update( mem_heap_t* heap; ibool reorganized = FALSE; ulint i; - + ulint* offsets; + page = btr_cur_get_page(cursor); rec = btr_cur_get_rec(cursor); index = cursor->index; + heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - if (!row_upd_changes_field_size_or_external(rec, index, update)) { + if (!row_upd_changes_field_size_or_external(index, offsets, update)) { /* The simplest and the most common case: the update does not change the size of any field and none of the updated fields is externally stored in rec or update */ - + mem_heap_free(heap); return(btr_cur_update_in_place(flags, cursor, update, cmpl_info, thr, mtr)); } @@ -1497,29 +1551,30 @@ btr_cur_optimistic_update( /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } } - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { + if (rec_offs_any_extern(offsets)) { /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } page_cursor = btr_cur_get_page_cur(cursor); - heap = mem_heap_create(1024); - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, NULL); - old_rec_size = rec_get_size(rec); - new_rec_size = rec_get_converted_size(new_entry); + old_rec_size = rec_offs_size(offsets); + new_rec_size = rec_get_converted_size(index, new_entry); - if (new_rec_size >= page_get_free_space_of_empty() / 2) { + if (new_rec_size >= + page_get_free_space_of_empty(index->table->comp) / 2) { mem_heap_free(heap); @@ -1570,7 +1625,7 @@ btr_cur_optimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1587,11 +1642,13 @@ btr_cur_optimistic_update( ut_a(rec); /* <- We calculated above the insert would fit */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } /* Restore the old explicit lock state on the record */ @@ -1690,6 +1747,7 @@ btr_cur_pessimistic_update( ulint* ext_vect; ulint n_ext_vect; ulint reserve_flag; + ulint* offsets = NULL; *big_rec = NULL; @@ -1743,6 +1801,7 @@ btr_cur_pessimistic_update( } heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); trx = thr_get_trx(thr); @@ -1767,28 +1826,29 @@ btr_cur_pessimistic_update( ut_a(big_rec_vec == NULL); - btr_rec_free_updated_extern_fields(index, rec, update, - TRUE, mtr); + btr_rec_free_updated_extern_fields(index, rec, offsets, + update, TRUE, mtr); } /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ - ext_vect = mem_heap_alloc(heap, sizeof(ulint) * rec_get_n_fields(rec)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, update); - - if ((rec_get_converted_size(new_entry) >= - page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) { + ext_vect = mem_heap_alloc(heap, sizeof(ulint) + * dict_index_get_n_fields(index)); + ut_ad(!cursor->index->table->comp || !rec_get_node_ptr_flag(rec)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); + + if (rec_get_converted_size(index, new_entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { big_rec_vec = dtuple_convert_big_rec(index, new_entry, ext_vect, n_ext_vect); if (big_rec_vec == NULL) { - mem_heap_free(heap); - err = DB_TOO_BIG_RECORD; - goto return_after_reservations; } } @@ -1808,7 +1868,7 @@ btr_cur_pessimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1817,21 +1877,22 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + lock_rec_restore_from_page_infimum(rec, page); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, + ext_vect, n_ext_vect, mtr); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; - mem_heap_free(heap); - goto return_after_reservations; } @@ -1856,13 +1917,15 @@ btr_cur_pessimistic_update( ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } lock_rec_restore_from_page_infimum(rec, page); @@ -1876,9 +1939,8 @@ btr_cur_pessimistic_update( btr_cur_pess_upd_restore_supremum(rec, mtr); } - mem_heap_free(heap); - return_after_reservations: + mem_heap_free(heap); if (n_extents > 0) { fil_space_release_free_extents(cursor->index->space, @@ -1908,11 +1970,18 @@ btr_cur_del_mark_set_clust_rec_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_CLUST_DELETE_MARK + : MLOG_REC_CLUST_DELETE_MARK, + 1 + 1 + DATA_ROLL_PTR_LEN + 14 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_CLUST_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } mach_write_to_1(log_ptr, flags); log_ptr++; @@ -1934,10 +2003,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page) /* in: page or NULL */ { ulint flags; ibool val; @@ -1978,15 +2048,19 @@ btr_cur_parse_del_mark_set_clust_rec( rec = page + offset; if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, - roll_ptr); + mem_heap_t* heap = mem_heap_create(100); + row_upd_rec_sys_fields_in_recovery(rec, + rec_get_offsets(rec, index, + ULINT_UNDEFINED, heap), + pos, trx_id, roll_ptr); + mem_heap_free(heap); } /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2015,22 +2089,28 @@ btr_cur_del_mark_set_clust_rec( ulint err; rec_t* rec; trx_t* trx; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_get_deleted_flag(rec) == FALSE); + ut_ad(rec_get_deleted_flag(rec, index->table->comp) == FALSE); - err = lock_clust_rec_modify_check_and_lock(flags, rec, index, thr); + err = lock_clust_rec_modify_check_and_lock(flags, + rec, index, offsets, thr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2039,6 +2119,7 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2048,13 +2129,12 @@ btr_cur_del_mark_set_clust_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); trx = thr_get_trx(thr); if (!(flags & BTR_KEEP_SYS_FLAG)) { - - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } if (block->is_hashed) { @@ -2063,6 +2143,7 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); + mem_heap_free(heap); return(DB_SUCCESS); } @@ -2073,16 +2154,24 @@ UNIV_INLINE void btr_cur_del_mark_set_sec_rec_log( /*=============================*/ - rec_t* rec, /* in: record */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_SEC_DELETE_MARK + : MLOG_REC_SEC_DELETE_MARK, + 1 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } mach_write_to_1(log_ptr, val); log_ptr++; @@ -2100,10 +2189,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page) /* in: page or NULL */ { ibool val; ulint offset; @@ -2129,7 +2219,7 @@ btr_cur_parse_del_mark_set_sec_rec( is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2156,9 +2246,12 @@ btr_cur_del_mark_set_sec_rec( rec = btr_cur_get_rec(cursor); if (btr_cur_print_record_ops && thr) { + mem_heap_t* heap = mem_heap_create(100); btr_cur_trx_report(thr_get_trx(thr), cursor->index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap)); + mem_heap_free(heap); } err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index, @@ -2174,13 +2267,13 @@ btr_cur_del_mark_set_sec_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, cursor->index->table->comp, val); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); } - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, cursor->index, val, mtr); return(DB_SUCCESS); } @@ -2192,15 +2285,16 @@ used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { /* We do not need to reserve btr_search_latch, as the page has just been read to the buffer pool and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, FALSE); + rec_set_deleted_flag(rec, index->table->comp, FALSE); - btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, index, FALSE, mtr); } /*==================== B-TREE RECORD REMOVE =========================*/ @@ -2279,8 +2373,11 @@ btr_cur_optimistic_delete( successor of the deleted record */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - ulint max_ins_size; + page_t* page; + ulint max_ins_size; + mem_heap_t* heap; + rec_t* rec; + const ulint* offsets; ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); @@ -2290,26 +2387,30 @@ btr_cur_optimistic_delete( ut_ad(btr_page_get_level(page, mtr) == 0); - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { - - return(FALSE); - } + heap = mem_heap_create(100); + rec = btr_cur_get_rec(cursor); + offsets = rec_get_offsets(rec, cursor->index, ULINT_UNDEFINED, heap); - if (btr_cur_can_delete_without_compress(cursor, mtr)) { + if (!rec_offs_any_extern(offsets) + && btr_cur_can_delete_without_compress( + cursor, rec_offs_size(offsets), mtr)) { - lock_update_delete(btr_cur_get_rec(cursor)); + lock_update_delete(rec); btr_search_update_hash_on_delete(cursor); max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), + cursor->index, mtr); ibuf_update_free_bits_low(cursor->index, page, max_ins_size, mtr); + mem_heap_free(heap); return(TRUE); } + mem_heap_free(heap); return(FALSE); } @@ -2375,8 +2476,20 @@ btr_cur_pessimistic_delete( } } - btr_rec_free_externally_stored_fields(cursor->index, - btr_cur_get_rec(cursor), in_rollback, mtr); + heap = mem_heap_create(256); + rec = btr_cur_get_rec(cursor); + + /* Free externally stored fields if the record is neither + a node pointer nor in two-byte format. + This avoids unnecessary calls to rec_get_offsets(). */ + if (cursor->index->table->comp + ? !rec_get_node_ptr_flag(rec) + : !rec_get_1byte_offs_flag(rec)) { + btr_rec_free_externally_stored_fields(cursor->index, + rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), + in_rollback, mtr); + } if ((page_get_n_recs(page) < 2) && (dict_tree_get_page(btr_cur_get_tree(cursor)) @@ -2393,8 +2506,6 @@ btr_cur_pessimistic_delete( goto return_after_reservations; } - rec = btr_cur_get_rec(cursor); - lock_update_delete(rec); if ((btr_page_get_level(page, mtr) > 0) @@ -2406,7 +2517,8 @@ btr_cur_pessimistic_delete( non-leaf level, we must mark the new leftmost node pointer as the predefined minimum record */ - btr_set_min_rec_mark(page_rec_get_next(rec), mtr); + btr_set_min_rec_mark(page_rec_get_next(rec), + cursor->index->table->comp, mtr); } else { /* Otherwise, if we delete the leftmost node pointer on a page, we have to change the father node pointer @@ -2415,8 +2527,6 @@ btr_cur_pessimistic_delete( btr_node_ptr_delete(tree, page, mtr); - heap = mem_heap_create(256); - node_ptr = dict_tree_build_node_ptr( tree, page_rec_get_next(rec), buf_frame_get_page_no(page), @@ -2425,20 +2535,19 @@ btr_cur_pessimistic_delete( btr_insert_on_non_leaf_level(tree, btr_page_get_level(page, mtr) + 1, node_ptr, mtr); - - mem_heap_free(heap); } } btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, mtr); ut_ad(btr_check_node_ptr(tree, page, mtr)); *err = DB_SUCCESS; return_after_reservations: + mem_heap_free(heap); if (ret == FALSE) { ret = btr_cur_compress_if_useful(cursor, mtr); @@ -2663,9 +2772,13 @@ btr_estimate_number_of_different_key_vals( ulint j; ulint add_on; mtr_t mtr; + mem_heap_t* heap; + ulint* offsets1 = 0; + ulint* offsets2 = 0; n_cols = dict_index_get_n_unique(index); + heap = mem_heap_create(100); n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); for (j = 0; j <= n_cols; j++) { @@ -2697,11 +2810,17 @@ btr_estimate_number_of_different_key_vals( while (rec != page_get_supremum_rec(page) && page_rec_get_next(rec) != page_get_supremum_rec(page)) { + rec_t* next_rec = page_rec_get_next(rec); matched_fields = 0; matched_bytes = 0; - - cmp_rec_rec_with_match(rec, page_rec_get_next(rec), - index, &matched_fields, + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(next_rec, index, + offsets2, n_cols, heap); + + cmp_rec_rec_with_match(rec, next_rec, + offsets1, offsets2, + index, n_cols, &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -2712,7 +2831,8 @@ btr_estimate_number_of_different_key_vals( } total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len( + rec, offsets1); rec = page_rec_get_next(rec); } @@ -2736,8 +2856,11 @@ btr_estimate_number_of_different_key_vals( } } + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len(rec, + offsets1); mtr_commit(&mtr); } @@ -2778,6 +2901,7 @@ btr_estimate_number_of_different_key_vals( } mem_free(n_diff); + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ @@ -2788,9 +2912,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec) /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; byte* data; @@ -2799,17 +2924,13 @@ btr_rec_get_externally_stored_len( ulint total_extern_len = 0; ulint i; - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return(0); - } - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -2830,16 +2951,17 @@ static void btr_cur_set_ownership_of_extern_field( /*==================================*/ - rec_t* rec, /* in: clustered index record */ - ulint i, /* in: field number */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: clustered index record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint i, /* in: field number */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* data; ulint local_len; ulint byte_val; - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); @@ -2866,19 +2988,22 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr) /* in: mtr */ { ibool is_updated; ulint n; ulint j; ulint i; - - n = rec_get_n_fields(rec); + + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -2894,8 +3019,8 @@ btr_cur_mark_extern_inherited_fields( } if (!is_updated) { - btr_cur_set_ownership_of_extern_field(rec, i, - FALSE, mtr); + btr_cur_set_ownership_of_extern_field(rec, + offsets, i, FALSE, mtr); } } } @@ -2967,18 +3092,20 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n; ulint i; - n = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { - - btr_cur_set_ownership_of_extern_field(rec, i, + if (rec_offs_nth_extern(offsets, i)) { + + btr_cur_set_ownership_of_extern_field(rec, offsets, i, TRUE, mtr); } } @@ -3028,10 +3155,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated + ulint* ext_vect,/* in: array of ulints, must be preallocated to have space for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update) /* in: update vector or NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update) /* in: update vector or NULL */ { ulint n_pushed = 0; ibool is_updated; @@ -3054,10 +3181,10 @@ btr_push_update_extern_fields( } } - n = rec_get_n_fields(rec); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -3119,6 +3246,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr __attribute__((unused))) /* in: mtr @@ -3139,6 +3267,7 @@ btr_store_big_rec_extern_fields( ulint i; mtr_t mtr; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec), @@ -3152,8 +3281,8 @@ btr_store_big_rec_extern_fields( for (i = 0; i < big_rec_vec->n_fields; i++) { - data = rec_get_nth_field(rec, big_rec_vec->fields[i].field_no, - &local_len); + data = rec_get_nth_field(rec, offsets, + big_rec_vec->fields[i].field_no, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; extern_len = big_rec_vec->fields[i].len; @@ -3254,7 +3383,7 @@ btr_store_big_rec_extern_fields( /* Set the bit denoting that this field in rec is stored externally */ - rec_set_nth_field_extern_bit(rec, + rec_set_nth_field_extern_bit(rec, index, big_rec_vec->fields[i].field_no, TRUE, &mtr); } @@ -3407,6 +3536,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -3419,21 +3549,18 @@ btr_rec_free_externally_stored_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return; - } - /* Free possible externally stored fields in the record */ - n_fields = rec_get_n_fields(rec); + ut_ad(index->table->comp == rec_offs_comp(offsets)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3450,6 +3577,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -3463,13 +3591,10 @@ btr_rec_free_updated_extern_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - return; - } - /* Free possible externally stored fields in the record */ n_fields = upd_get_n_fields(update); @@ -3477,9 +3602,10 @@ btr_rec_free_updated_extern_fields( for (i = 0; i < n_fields; i++) { ufield = upd_get_nth_field(update, i); - if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) { + if (rec_offs_nth_extern(offsets, ufield->field_no)) { - data = rec_get_nth_field(rec, ufield->field_no, &len); + data = rec_get_nth_field(rec, offsets, + ufield->field_no, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3583,7 +3709,8 @@ byte* btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ - rec_t* rec, /* in: record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap) /* in: mem heap */ @@ -3591,7 +3718,8 @@ btr_rec_copy_externally_stored_field( ulint local_len; byte* data; - ut_a(rec_get_nth_field_extern_bit(rec, no)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_a(rec_offs_nth_extern(offsets, no)); /* An externally stored field can contain some initial data from the field, and in the last 20 bytes it has the @@ -3602,7 +3730,7 @@ btr_rec_copy_externally_stored_field( limit so that field offsets are stored in two bytes, and the extern bit is available in those two bytes. */ - data = rec_get_nth_field(rec, no, &local_len); + data = rec_get_nth_field(rec, offsets, no, &local_len); return(btr_copy_externally_stored_field(len, data, local_len, heap)); } diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index cf8a612ef28..7df8e53cd07 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -45,12 +45,12 @@ btr_pcur_free_for_mysql( mem_free(cursor->old_rec_buf); - cursor->old_rec = NULL; cursor->old_rec_buf = NULL; } cursor->btr_cur.page_cur.rec = NULL; cursor->old_rec = NULL; + cursor->old_n_fields = 0; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; cursor->latch_mode = BTR_NO_LATCHES; @@ -133,9 +133,10 @@ btr_pcur_store_position( cursor->old_stored = BTR_PCUR_OLD_STORED; cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec, - &(cursor->old_rec_buf), - &(cursor->buf_size)); - + &cursor->old_n_fields, + &cursor->old_rec_buf, + &cursor->buf_size); + cursor->block_when_stored = buf_block_align(page); cursor->modify_clock = buf_frame_get_modify_clock(page); } @@ -166,6 +167,8 @@ btr_pcur_copy_stored_position( pcur_receive->old_rec = pcur_receive->old_rec_buf + (pcur_donate->old_rec - pcur_donate->old_rec_buf); } + + pcur_receive->old_n_fields = pcur_donate->old_n_fields; } /****************************************************************** @@ -228,6 +231,7 @@ btr_pcur_restore_position( } ut_a(cursor->old_rec); + ut_a(cursor->old_n_fields); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); @@ -242,17 +246,32 @@ btr_pcur_restore_position( buf_page_dbg_add_level(page, SYNC_TREE_NODE); #endif /* UNIV_SYNC_DEBUG */ if (cursor->rel_pos == BTR_PCUR_ON) { - +#ifdef UNIV_DEBUG + rec_t* rec; + ulint* offsets1; + ulint* offsets2; + dict_index_t* index; +#endif /* UNIV_DEBUG */ cursor->latch_mode = latch_mode; - - ut_ad(cmp_rec_rec(cursor->old_rec, - btr_pcur_get_rec(cursor), - dict_tree_find_index( - btr_cur_get_tree( +#ifdef UNIV_DEBUG + rec = btr_pcur_get_rec(cursor); + index = dict_tree_find_index( + btr_cur_get_tree( btr_pcur_get_btr_cur(cursor)), - btr_pcur_get_rec(cursor))) - == 0); + rec); + + heap = mem_heap_create(256); + offsets1 = rec_get_offsets(cursor->old_rec, + index, ULINT_UNDEFINED, heap); + offsets2 = rec_get_offsets(rec, + index, ULINT_UNDEFINED, heap); + ut_ad(cmp_rec_rec(cursor->old_rec, + rec, offsets1, offsets2, + cursor->old_n_fields, + index) == 0); + mem_heap_free(heap); +#endif /* UNIV_DEBUG */ return(TRUE); } @@ -265,7 +284,8 @@ btr_pcur_restore_position( heap = mem_heap_create(256); tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor)); - tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, heap); + tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, + cursor->old_n_fields, heap); /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; @@ -287,7 +307,10 @@ btr_pcur_restore_position( if (cursor->rel_pos == BTR_PCUR_ON && btr_pcur_is_on_user_rec(cursor, mtr) - && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { + && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), + rec_get_offsets(btr_pcur_get_rec(cursor), + btr_pcur_get_btr_cur(cursor)->index, + ULINT_UNDEFINED, heap))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! But we can retain @@ -376,6 +399,7 @@ btr_pcur_move_to_next_page( ut_ad(next_page_no != FIL_NULL); next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); buf_block_align(next_page)->check_index_page_at_flush = TRUE; btr_leaf_page_release(page, cursor->latch_mode, mtr); diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index ad74f9704da..40ccf56492f 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -416,7 +416,7 @@ btr_search_update_hash_ref( && (block->curr_n_fields == info->n_fields) && (block->curr_n_bytes == info->n_bytes) && (block->curr_side == info->side)) { - + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); if (!page_rec_is_user_rec(rec)) { @@ -425,10 +425,11 @@ btr_search_update_hash_ref( } tree_id = ((cursor->index)->tree)->id; - - fold = rec_fold(rec, block->curr_n_fields, - block->curr_n_bytes, tree_id); - + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ @@ -535,15 +536,17 @@ btr_search_check_guess( or PAGE_CUR_GE */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - rec_t* rec; - rec_t* prev_rec; - rec_t* next_rec; - ulint n_unique; - ulint match; - ulint bytes; - int cmp; - + page_t* page; + rec_t* rec; + rec_t* prev_rec; + rec_t* next_rec; + ulint n_unique; + ulint match; + ulint bytes; + int cmp; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; + n_unique = dict_index_get_n_unique_in_tree(cursor->index); rec = btr_cur_get_rec(cursor); @@ -554,23 +557,25 @@ btr_search_check_guess( match = 0; bytes = 0; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &match, &bytes); + offsets = rec_get_offsets(rec, cursor->index, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, + offsets, &match, &bytes); if (mode == PAGE_CUR_GE) { if (cmp == 1) { - + mem_heap_free(heap); return(FALSE); } cursor->up_match = match; if (match >= n_unique) { - + mem_heap_free(heap); return(TRUE); } } else if (mode == PAGE_CUR_LE) { if (cmp == -1) { - + mem_heap_free(heap); return(FALSE); } @@ -578,12 +583,12 @@ btr_search_check_guess( } else if (mode == PAGE_CUR_G) { if (cmp != -1) { - + mem_heap_free(heap); return(FALSE); } } else if (mode == PAGE_CUR_L) { if (cmp != 1) { - + mem_heap_free(heap); return(FALSE); } } @@ -591,7 +596,7 @@ btr_search_check_guess( if (can_only_compare_to_cursor_rec) { /* Since we could not determine if our guess is right just by looking at the record under the cursor, return FALSE */ - + mem_heap_free(heap); return(FALSE); } @@ -605,17 +610,15 @@ btr_search_check_guess( prev_rec = page_rec_get_prev(rec); if (prev_rec == page_get_infimum_rec(page)) { - - if (btr_page_get_prev(page, mtr) != FIL_NULL) { - - return(FALSE); - } - - return(TRUE); + mem_heap_free(heap); + return(btr_page_get_prev(page, mtr) == FIL_NULL); } + offsets = rec_reget_offsets(prev_rec, cursor->index, + offsets, n_unique, heap); cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec, - &match, &bytes); + offsets, &match, &bytes); + mem_heap_free(heap); if (mode == PAGE_CUR_GE) { if (cmp != 1) { @@ -636,6 +639,7 @@ btr_search_check_guess( next_rec = page_rec_get_next(rec); if (next_rec == page_get_supremum_rec(page)) { + mem_heap_free(heap); if (btr_page_get_next(page, mtr) == FIL_NULL) { @@ -647,8 +651,12 @@ btr_search_check_guess( return(FALSE); } - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &match, &bytes); - + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, + offsets, &match, &bytes); + mem_heap_free(heap); + if (mode == PAGE_CUR_LE) { if (cmp != -1) { @@ -1003,8 +1011,7 @@ static void btr_search_build_page_hash_index( /*=============================*/ - dict_index_t* index, /* in: index for which to build, or NULL if - not known */ + dict_index_t* index, /* in: index for which to build */ page_t* page, /* in: index page, s- or x-latched */ ulint n_fields,/* in: hash this many full fields */ ulint n_bytes,/* in: hash this many bytes from the next @@ -1024,7 +1031,11 @@ btr_search_build_page_hash_index( ulint* folds; rec_t** recs; ulint i; - + mem_heap_t* heap; + ulint* offsets; + + ut_ad(index); + block = buf_block_align(page); table = btr_search_sys->hash_index; @@ -1061,9 +1072,9 @@ btr_search_build_page_hash_index( return; } - if (index && (dict_index_get_n_unique_in_tree(index) < n_fields + if (dict_index_get_n_unique_in_tree(index) < n_fields || (dict_index_get_n_unique_in_tree(index) == n_fields - && n_bytes > 0))) { + && n_bytes > 0)) { return; } @@ -1072,6 +1083,7 @@ btr_search_build_page_hash_index( folds = mem_alloc(n_recs * sizeof(ulint)); recs = mem_alloc(n_recs * sizeof(rec_t*)); + heap = mem_heap_create(100); n_cached = 0; @@ -1082,18 +1094,19 @@ btr_search_build_page_hash_index( rec = page_get_infimum_rec(page); rec = page_rec_get_next(rec); + offsets = rec_get_offsets(rec, index, n_fields + (n_bytes > 0), heap); + if (rec != sup) { - ut_a(n_fields <= rec_get_n_fields(rec)); + ut_a(n_fields <= rec_offs_n_fields(offsets)); if (n_bytes > 0) { - ut_a(n_fields < rec_get_n_fields(rec)); + ut_a(n_fields < rec_offs_n_fields(offsets)); } } /* FIXME: in a mixed tree, all records may not have enough ordering fields: */ - - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1117,7 +1130,10 @@ btr_search_build_page_hash_index( break; } - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); if (fold != next_fold) { /* Insert an entry into the hash index */ @@ -1145,13 +1161,7 @@ btr_search_build_page_hash_index( if (block->is_hashed && ((block->curr_n_fields != n_fields) || (block->curr_n_bytes != n_bytes) || (block->curr_side != side))) { - - rw_lock_x_unlock(&btr_search_latch); - - mem_free(folds); - mem_free(recs); - - return; + goto exit_func; } block->is_hashed = TRUE; @@ -1166,10 +1176,12 @@ btr_search_build_page_hash_index( ha_insert_for_fold(table, folds[i], recs[i]); } +exit_func: rw_lock_x_unlock(&btr_search_latch); mem_free(folds); mem_free(recs); + mem_heap_free(heap); } /************************************************************************ @@ -1181,10 +1193,13 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page) /* in: index page from which records were - copied, and the copied records will be deleted - from this page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page from which + records were copied, and the + copied records will be deleted + from this page */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; buf_block_t* new_block; @@ -1194,6 +1209,7 @@ btr_search_move_or_delete_hash_entries( block = buf_block_align(page); new_block = buf_block_align(new_page); + ut_a(page_is_comp(page) == page_is_comp(new_page)); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); @@ -1224,8 +1240,8 @@ btr_search_move_or_delete_hash_entries( rw_lock_s_unlock(&btr_search_latch); ut_a(n_fields + n_bytes > 0); - - btr_search_build_page_hash_index(NULL, new_page, n_fields, + + btr_search_build_page_hash_index(index, new_page, n_fields, n_bytes, side); ut_a(n_fields == block->curr_n_fields); ut_a(n_bytes == block->curr_n_bytes); @@ -1253,6 +1269,7 @@ btr_search_update_hash_on_delete( ulint fold; dulint tree_id; ibool found; + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); @@ -1272,9 +1289,11 @@ btr_search_update_hash_on_delete( table = btr_search_sys->hash_index; tree_id = cursor->index->tree->id; - - fold = rec_fold(rec, block->curr_n_fields, block->curr_n_bytes, - tree_id); + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); rw_lock_x_lock(&btr_search_latch); found = ha_search_and_delete_if_found(table, fold, rec); @@ -1355,6 +1374,8 @@ btr_search_update_hash_on_insert( ulint n_bytes; ulint side; ibool locked = FALSE; + mem_heap_t* heap; + ulint* offsets; table = btr_search_sys->hash_index; @@ -1383,15 +1404,22 @@ btr_search_update_hash_on_insert( next_rec = page_rec_get_next(ins_rec); page = buf_frame_align(rec); - - ins_fold = rec_fold(ins_rec, n_fields, n_bytes, tree_id); + heap = mem_heap_create(100); + offsets = rec_get_offsets(ins_rec, cursor->index, + ULINT_UNDEFINED, heap); + ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, tree_id); if (next_rec != page_get_supremum_rec(page)) { - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); } if (rec != page_get_infimum_rec(page)) { - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); } else { if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1461,6 +1489,7 @@ check_next_rec: } function_exit: + mem_heap_free(heap); if (locked) { rw_lock_x_unlock(&btr_search_latch); } @@ -1470,9 +1499,10 @@ function_exit: Validates the search system. */ ibool -btr_search_validate(void) -/*=====================*/ +btr_search_validate( +/*================*/ /* out: TRUE if ok */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; page_t* page; @@ -1480,6 +1510,8 @@ btr_search_validate(void) ulint n_page_dumps = 0; ibool ok = TRUE; ulint i; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; rw_lock_x_lock(&btr_search_latch); @@ -1489,9 +1521,13 @@ btr_search_validate(void) while (node != NULL) { block = buf_block_align(node->data); page = buf_frame_align(node->data); + offsets = rec_reget_offsets((rec_t*) node->data, index, + offsets, block->curr_n_fields + + (block->curr_n_bytes > 0), heap); if (!block->is_hashed || node->fold != rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))) { @@ -1507,12 +1543,13 @@ btr_search_validate(void) (ulong) ut_dulint_get_low(btr_page_get_index_id(page)), (ulong) node->fold, (ulong) rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))); fputs("InnoDB: Record ", stderr); - rec_print(stderr, (rec_t*)(node->data)); + rec_print(stderr, (rec_t*)node->data, offsets); fprintf(stderr, "\nInnoDB: on that page." "Page mem address %p, is hashed %lu, n fields %lu, n bytes %lu\n" "side %lu\n", @@ -1536,6 +1573,7 @@ btr_search_validate(void) } rw_lock_x_unlock(&btr_search_latch); + mem_heap_free(heap); return(ok); } diff --git a/innobase/data/data0data.c b/innobase/data/data0data.c index 97ec1a1acd9..25ba19d0296 100644 --- a/innobase/data/data0data.c +++ b/innobase/data/data0data.c @@ -500,7 +500,7 @@ dtuple_convert_big_rec( ut_a(dtuple_check_typed_no_assert(entry)); - size = rec_get_converted_size(entry); + size = rec_get_converted_size(index, entry); if (size > 1000000000) { fprintf(stderr, @@ -524,9 +524,10 @@ dtuple_convert_big_rec( n_fields = 0; - while ((rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) - || rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE) { + while (rec_get_converted_size(index, entry) + >= ut_min(page_get_free_space_of_empty( + index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { longest = 0; for (i = dict_index_get_n_unique_in_tree(index); diff --git a/innobase/data/data0type.c b/innobase/data/data0type.c index 714cf92bc65..9b8fb084e33 100644 --- a/innobase/data/data0type.c +++ b/innobase/data/data0type.c @@ -195,7 +195,7 @@ dtype_validate( ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL)); if (type->mtype == DATA_SYS) { - ut_a(type->prtype <= DATA_MIX_ID); + ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS); } return(TRUE); diff --git a/innobase/dict/dict0boot.c b/innobase/dict/dict0boot.c index f156cf67a18..e500b92252f 100644 --- a/innobase/dict/dict0boot.c +++ b/innobase/dict/dict0boot.c @@ -158,7 +158,7 @@ dict_hdr_create( /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_TABLES_ID, mtr); + DICT_HDR_SPACE, DICT_TABLES_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -168,7 +168,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_UNIQUE, DICT_HDR_SPACE, - DICT_TABLE_IDS_ID, mtr); + DICT_TABLE_IDS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -178,7 +178,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_COLUMNS_ID, mtr); + DICT_HDR_SPACE, DICT_COLUMNS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -188,7 +188,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_INDEXES_ID, mtr); + DICT_HDR_SPACE, DICT_INDEXES_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -198,7 +198,7 @@ dict_hdr_create( MLOG_4BYTES, mtr); /*--------------------------*/ root_page_no = btr_create(DICT_CLUSTERED | DICT_UNIQUE, - DICT_HDR_SPACE, DICT_FIELDS_ID, mtr); + DICT_HDR_SPACE, DICT_FIELDS_ID, FALSE, mtr); if (root_page_no == FIL_NULL) { return(FALSE); @@ -254,7 +254,7 @@ dict_boot(void) /* Insert into the dictionary cache the descriptions of the basic system tables */ /*-------------------------*/ - table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE,8); + table = dict_mem_table_create("SYS_TABLES", DICT_HDR_SPACE, 8, FALSE); dict_mem_table_add_col(table, "NAME", DATA_BINARY, 0, 0, 0); dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); @@ -290,7 +290,7 @@ dict_boot(void) index->id = DICT_TABLE_IDS_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_COLUMNS",DICT_HDR_SPACE,7); + table = dict_mem_table_create("SYS_COLUMNS", DICT_HDR_SPACE, 7, FALSE); dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY,0,0,0); dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); @@ -316,7 +316,7 @@ dict_boot(void) index->id = DICT_COLUMNS_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_INDEXES",DICT_HDR_SPACE,7); + table = dict_mem_table_create("SYS_INDEXES", DICT_HDR_SPACE, 7, FALSE); dict_mem_table_add_col(table, "TABLE_ID", DATA_BINARY, 0,0,0); dict_mem_table_add_col(table, "ID", DATA_BINARY, 0, 0, 0); @@ -349,7 +349,7 @@ dict_boot(void) index->id = DICT_INDEXES_ID; ut_a(dict_index_add_to_cache(table, index)); /*-------------------------*/ - table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE,3); + table = dict_mem_table_create("SYS_FIELDS", DICT_HDR_SPACE, 3, FALSE); dict_mem_table_add_col(table, "INDEX_ID", DATA_BINARY, 0,0,0); dict_mem_table_add_col(table, "POS", DATA_INT, 0, 4, 0); diff --git a/innobase/dict/dict0crea.c b/innobase/dict/dict0crea.c index cbdc0aab53c..747a99ebdc9 100644 --- a/innobase/dict/dict0crea.c +++ b/innobase/dict/dict0crea.c @@ -84,7 +84,8 @@ dict_create_sys_tables_tuple( dfield = dtuple_get_nth_field(entry, 5); ptr = mem_heap_alloc(heap, 4); - mach_write_to_4(ptr, table->mix_len); + mach_write_to_4(ptr, (table->mix_len & 0x7fffffff) | + ((ulint) table->comp << 31)); dfield_set_data(dfield, ptr, 4); /* 8: CLUSTER_NAME ---------------------*/ @@ -624,7 +625,7 @@ dict_create_index_tree_step( btr_pcur_move_to_next_user_rec(&pcur, &mtr); index->page_no = btr_create(index->type, index->space, index->id, - &mtr); + table->comp, &mtr); /* printf("Created a new index tree in space %lu root page %lu\n", index->space, index->page_no); */ @@ -660,8 +661,9 @@ dict_drop_index_tree( #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&(dict_sys->mutex))); #endif /* UNIV_SYNC_DEBUG */ - - ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); + + ut_a(!dict_sys->sys_indexes->comp); + ptr = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, &len); ut_ad(len == 4); @@ -673,8 +675,9 @@ dict_drop_index_tree( return; } - ptr = rec_get_nth_field(rec, DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); - + ptr = rec_get_nth_field_old(rec, + DICT_SYS_INDEXES_SPACE_NO_FIELD, &len); + ut_ad(len == 4); space = mtr_read_ulint(ptr, MLOG_4BYTES, mtr); @@ -699,8 +702,8 @@ dict_drop_index_tree( root_page_no); */ btr_free_root(space, root_page_no, mtr); - page_rec_write_index_page_no(rec, DICT_SYS_INDEXES_PAGE_NO_FIELD, - FIL_NULL, mtr); + page_rec_write_index_page_no(rec, + DICT_SYS_INDEXES_PAGE_NO_FIELD, FIL_NULL, mtr); } /************************************************************************* diff --git a/innobase/dict/dict0dict.c b/innobase/dict/dict0dict.c index 183c547ab2b..0aaa3a9a721 100644 --- a/innobase/dict/dict0dict.c +++ b/innobase/dict/dict0dict.c @@ -814,23 +814,22 @@ dict_table_add_to_cache( system columns. */ dict_mem_table_add_col(table, "DB_ROW_ID", DATA_SYS, - DATA_ROW_ID, 0, 0); + DATA_ROW_ID | DATA_NOT_NULL, DATA_ROW_ID_LEN, 0); #if DATA_ROW_ID != 0 #error "DATA_ROW_ID != 0" #endif dict_mem_table_add_col(table, "DB_TRX_ID", DATA_SYS, - DATA_TRX_ID, 0, 0); + DATA_TRX_ID | DATA_NOT_NULL, DATA_TRX_ID_LEN, 0); #if DATA_TRX_ID != 1 #error "DATA_TRX_ID != 1" #endif dict_mem_table_add_col(table, "DB_ROLL_PTR", DATA_SYS, - DATA_ROLL_PTR, 0, 0); + DATA_ROLL_PTR | DATA_NOT_NULL, DATA_ROLL_PTR_LEN, 0); #if DATA_ROLL_PTR != 2 #error "DATA_ROLL_PTR != 2" #endif - dict_mem_table_add_col(table, "DB_MIX_ID", DATA_SYS, - DATA_MIX_ID, 0, 0); + DATA_MIX_ID | DATA_NOT_NULL, DATA_MIX_ID_LEN, 0); #if DATA_MIX_ID != 3 #error "DATA_MIX_ID != 3" #endif @@ -1588,7 +1587,7 @@ dict_index_find_cols( /*********************************************************************** Adds a column to index. */ -UNIV_INLINE + void dict_index_add_col( /*===============*/ @@ -1604,6 +1603,34 @@ dict_index_add_col( field = dict_index_get_nth_field(index, index->n_def - 1); field->col = col; + field->fixed_len = dtype_get_fixed_size(&col->type); + + if (prefix_len && field->fixed_len > prefix_len) { + field->fixed_len = prefix_len; + } + + /* Long fixed-length fields that need external storage are treated as + variable-length fields, so that the extern flag can be embedded in + the length word. */ + + if (field->fixed_len > DICT_MAX_COL_PREFIX_LEN) { + field->fixed_len = 0; + } + + if (!(dtype_get_prtype(&col->type) & DATA_NOT_NULL)) { + index->n_nullable++; + } + + if (index->n_def > 1) { + const dict_field_t* field2 = + dict_index_get_nth_field(index, index->n_def - 2); + field->fixed_offs = (!field2->fixed_len || + field2->fixed_offs == ULINT_UNDEFINED) + ? ULINT_UNDEFINED + : field2->fixed_len + field2->fixed_offs; + } else { + field->fixed_offs = 0; + } } /*********************************************************************** @@ -3580,9 +3607,10 @@ dict_tree_find_index_low( && (table->type != DICT_TABLE_ORDINARY)) { /* Get the mix id of the record */ + ut_a(!table->comp); mix_id = mach_dulint_read_compressed( - rec_get_nth_field(rec, table->mix_len, &len)); + rec_get_nth_field_old(rec, table->mix_len, &len)); while (ut_dulint_cmp(table->mix_id, mix_id) != 0) { @@ -3715,7 +3743,8 @@ dict_tree_build_node_ptr( on non-leaf levels we remove the last field, which contains the page number of the child page */ - n_unique = rec_get_n_fields(rec); + ut_a(!ind->table->comp); + n_unique = rec_get_n_fields_old(rec); if (level > 0) { ut_a(n_unique > 1); @@ -3744,9 +3773,11 @@ dict_tree_build_node_ptr( field = dtuple_get_nth_field(tuple, n_unique); dfield_set_data(field, buf, 4); - dtype_set(dfield_get_type(field), DATA_SYS_CHILD, 0, 0, 0); + dtype_set(dfield_get_type(field), DATA_SYS_CHILD, DATA_NOT_NULL, 4, 0); - rec_copy_prefix_to_dtuple(tuple, rec, n_unique, heap); + rec_copy_prefix_to_dtuple(tuple, rec, ind, n_unique, heap); + dtuple_set_info_bits(tuple, dtuple_get_info_bits(tuple) | + REC_STATUS_NODE_PTR); ut_ad(dtuple_check_typed(tuple)); @@ -3763,27 +3794,26 @@ dict_tree_copy_rec_order_prefix( /* out: pointer to the prefix record */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to copy prefix */ + ulint* n_fields,/* out: number of fields copied */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size)/* in/out: buffer size */ { - dict_index_t* ind; - rec_t* order_rec; - ulint n_fields; - - ind = dict_tree_find_index_low(tree, rec); + dict_index_t* index; + ulint n; - n_fields = dict_index_get_n_unique_in_tree(ind); - - if (tree->type & DICT_UNIVERSAL) { + index = dict_tree_find_index_low(tree, rec); - n_fields = rec_get_n_fields(rec); + if (tree->type & DICT_UNIVERSAL) { + ut_a(!index->table->comp); + n = rec_get_n_fields_old(rec); + } else { + n = dict_index_get_n_unique_in_tree(index); } - order_rec = rec_copy_prefix_to_buf(rec, n_fields, buf, buf_size); - - return(order_rec); -} + *n_fields = n; + return(rec_copy_prefix_to_buf(rec, index, n, buf, buf_size)); +} /************************************************************************** Builds a typed data tuple out of a physical record. */ @@ -3794,21 +3824,21 @@ dict_tree_build_data_tuple( /* out, own: data tuple */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ mem_heap_t* heap) /* in: memory heap where tuple created */ { dtuple_t* tuple; dict_index_t* ind; - ulint n_fields; ind = dict_tree_find_index_low(tree, rec); - n_fields = rec_get_n_fields(rec); + ut_ad(ind->table->comp || n_fields <= rec_get_n_fields_old(rec)); tuple = dtuple_create(heap, n_fields); dict_index_copy_types(tuple, ind, n_fields); - rec_copy_prefix_to_dtuple(tuple, rec, n_fields, heap); + rec_copy_prefix_to_dtuple(tuple, rec, ind, n_fields, heap); ut_ad(dtuple_check_typed(tuple)); @@ -3826,6 +3856,27 @@ dict_index_calc_min_rec_len( ulint sum = 0; ulint i; + if (index->table->comp) { + ulint nullable = 0; + sum = REC_N_NEW_EXTRA_BYTES; + for (i = 0; i < dict_index_get_n_fields(index); i++) { + dtype_t*t = dict_index_get_nth_type(index, i); + ulint size = dtype_get_fixed_size(t); + sum += size; + if (!size) { + size = dtype_get_len(t); + sum += size < 128 ? 1 : 2; + } + if (!(dtype_get_prtype(t) & DATA_NOT_NULL)) + nullable++; + } + + /* round the NULL flags up to full bytes */ + sum += (nullable + 7) / 8; + + return(sum); + } + for (i = 0; i < dict_index_get_n_fields(index); i++) { sum += dtype_get_fixed_size(dict_index_get_nth_type(index, i)); } @@ -3836,7 +3887,7 @@ dict_index_calc_min_rec_len( sum += dict_index_get_n_fields(index); } - sum += REC_N_EXTRA_BYTES; + sum += REC_N_OLD_EXTRA_BYTES; return(sum); } diff --git a/innobase/dict/dict0load.c b/innobase/dict/dict0load.c index 8fc6eb9141e..c80f8346abf 100644 --- a/innobase/dict/dict0load.c +++ b/innobase/dict/dict0load.c @@ -55,6 +55,7 @@ dict_get_first_table_name_in_db( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -77,7 +78,7 @@ loop: return(NULL); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); if (len < strlen(name) || ut_memcmp(name, field, strlen(name)) != 0) { @@ -90,7 +91,7 @@ loop: return(NULL); } - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ @@ -163,9 +164,9 @@ loop: return; } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ @@ -229,6 +230,7 @@ dict_check_tablespaces_or_store_max_id( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); btr_pcur_open_at_index_side(TRUE, sys_index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr); @@ -255,15 +257,15 @@ loop: return; } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, sys_tables->comp)) { /* We found one */ char* name = mem_strdupl((char*) field, len); - field = rec_get_nth_field(rec, 9, &len); + field = rec_get_nth_field_old(rec, 9, &len); ut_a(len == 4); space_id = mach_read_from_4(field); @@ -328,6 +330,7 @@ dict_load_columns( sys_columns = dict_table_get_low("SYS_COLUMNS"); sys_index = UT_LIST_GET_FIRST(sys_columns->indexes); + ut_a(!sys_columns->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -346,28 +349,27 @@ dict_load_columns( ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - ut_a(!rec_get_deleted_flag(rec)); - - field = rec_get_nth_field(rec, 0, &len); + ut_a(!rec_get_deleted_flag(rec, sys_columns->comp)); + + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); ut_a(ut_dulint_cmp(table->id, mach_read_from_8(field)) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 4); ut_a(i == mach_read_from_4(field)); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_columns), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); name = mem_heap_strdupl(heap, (char*) field, len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); mtype = mach_read_from_4(field); - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); prtype = mach_read_from_4(field); if (dtype_is_non_binary_string_type(mtype, prtype) @@ -379,15 +381,14 @@ dict_load_columns( data_mysql_default_charset_coll); } - field = rec_get_nth_field(rec, 7, &len); + field = rec_get_nth_field_old(rec, 7, &len); col_len = mach_read_from_4(field); ut_a(0 == ut_strcmp("PREC", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_columns), 8))->name)); + dict_index_get_nth_field(sys_index, 8))->name)); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); prec = mach_read_from_4(field); dict_mem_table_add_col(table, name, mtype, prtype, col_len, @@ -452,6 +453,7 @@ dict_load_fields( sys_fields = dict_table_get_low("SYS_FIELDS"); sys_index = UT_LIST_GET_FIRST(sys_fields->indexes); + ut_a(!sys_fields->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -469,15 +471,15 @@ dict_load_fields( rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, sys_fields->comp)) { dict_load_report_deleted_index(table->name, i); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); ut_a(ut_memcmp(buf, field, len) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); /* The next field stores the field position in the index @@ -503,10 +505,9 @@ dict_load_fields( ut_a(0 == ut_strcmp("COL_NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_fields), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); dict_mem_index_add_field(index, mem_heap_strdupl(heap, (char*) field, len), 0, prefix_len); @@ -565,6 +566,7 @@ dict_load_indexes( sys_indexes = dict_table_get_low("SYS_INDEXES"); sys_index = UT_LIST_GET_FIRST(sys_indexes->indexes); + ut_a(!sys_indexes->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -585,14 +587,14 @@ dict_load_indexes( rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); if (ut_memcmp(buf, field, len) != 0) { break; } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, table->comp)) { dict_load_report_deleted_index(table->name, ULINT_UNDEFINED); @@ -602,33 +604,31 @@ dict_load_indexes( return(FALSE); } - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_ad(len == 8); id = mach_read_from_8(field); ut_a(0 == ut_strcmp("NAME", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_indexes), 4))->name)); - - field = rec_get_nth_field(rec, 4, &name_len); + dict_index_get_nth_field(sys_index, 4))->name)); + + field = rec_get_nth_field_old(rec, 4, &name_len); name_buf = mem_heap_strdupl(heap, (char*) field, name_len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); n_fields = mach_read_from_4(field); - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); type = mach_read_from_4(field); - field = rec_get_nth_field(rec, 7, &len); + field = rec_get_nth_field_old(rec, 7, &len); space = mach_read_from_4(field); ut_a(0 == ut_strcmp("PAGE_NO", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_indexes), 8))->name)); + dict_index_get_nth_field(sys_index, 8))->name)); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); page_no = mach_read_from_4(field); if (page_no == FIL_NULL) { @@ -731,6 +731,7 @@ dict_load_table( sys_tables = dict_table_get_low("SYS_TABLES"); sys_index = UT_LIST_GET_FIRST(sys_tables->indexes); + ut_a(!sys_tables->comp); tuple = dtuple_create(heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -743,7 +744,7 @@ dict_load_table( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_tables->comp)) { /* Not found */ btr_pcur_close(&pcur); @@ -753,7 +754,7 @@ dict_load_table( return(NULL); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the table name in record is the searched one */ if (len != ut_strlen(name) || ut_memcmp(name, field, len) != 0) { @@ -767,10 +768,9 @@ dict_load_table( ut_a(0 == ut_strcmp("SPACE", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 9))->name)); + dict_index_get_nth_field(sys_index, 9))->name)); - field = rec_get_nth_field(rec, 9, &len); + field = rec_get_nth_field_old(rec, 9, &len); space = mach_read_from_4(field); /* Check if the tablespace exists and has the right name */ @@ -792,43 +792,45 @@ dict_load_table( ut_a(0 == ut_strcmp("N_COLS", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 4))->name)); + dict_index_get_nth_field(sys_index, 4))->name)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); n_cols = mach_read_from_4(field); - table = dict_mem_table_create(name, space, n_cols); + /* table->comp will be initialized later, in this function */ + table = dict_mem_table_create(name, space, n_cols, FALSE); table->ibd_file_missing = ibd_file_missing; ut_a(0 == ut_strcmp("ID", dict_field_get_col( - dict_index_get_nth_field( - dict_table_get_first_index(sys_tables), 3))->name)); + dict_index_get_nth_field(sys_index, 3))->name)); - field = rec_get_nth_field(rec, 3, &len); + field = rec_get_nth_field_old(rec, 3, &len); table->id = mach_read_from_8(field); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); table->type = mach_read_from_4(field); if (table->type == DICT_TABLE_CLUSTER_MEMBER) { ut_error; #if 0 /* clustered tables have not been implemented yet */ - field = rec_get_nth_field(rec, 6, &len); + field = rec_get_nth_field_old(rec, 6, &len); table->mix_id = mach_read_from_8(field); - field = rec_get_nth_field(rec, 8, &len); + field = rec_get_nth_field_old(rec, 8, &len); table->cluster_name = mem_heap_strdupl(heap, (char*) field, len); #endif } + /* The high-order bit of MIX_LEN is the "compact format" flag */ + field = rec_get_nth_field_old(rec, 7, &len); + table->comp = !!(mach_read_from_1(field) & 0x80); + if ((table->type == DICT_TABLE_CLUSTER) || (table->type == DICT_TABLE_CLUSTER_MEMBER)) { - - field = rec_get_nth_field(rec, 7, &len); - table->mix_len = mach_read_from_4(field); + + table->mix_len = mach_read_from_4(field) & 0x7fffffff; } btr_pcur_close(&pcur); @@ -906,6 +908,7 @@ dict_load_table_on_id( sys_tables = dict_sys->sys_tables; sys_table_ids = dict_table_get_next_index( dict_table_get_first_index(sys_tables)); + ut_a(!sys_tables->comp); heap = mem_heap_create(256); tuple = dtuple_create(heap, 1); @@ -922,7 +925,7 @@ dict_load_table_on_id( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_tables->comp)) { /* Not found */ btr_pcur_close(&pcur); @@ -937,7 +940,7 @@ dict_load_table_on_id( table ID and NAME */ rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_ad(len == 8); /* Check if the table id in record is the one searched for */ @@ -951,7 +954,7 @@ dict_load_table_on_id( } /* Now we get the table name from the record */ - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); /* Load the table definition to memory */ table = dict_load_table(mem_heap_strdupl(heap, (char*) field, len)); @@ -1019,6 +1022,7 @@ dict_load_foreign_cols( sys_foreign_cols = dict_table_get_low("SYS_FOREIGN_COLS"); sys_index = UT_LIST_GET_FIRST(sys_foreign_cols->indexes); + ut_a(!sys_foreign_cols->comp); tuple = dtuple_create(foreign->heap, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -1033,21 +1037,21 @@ dict_load_foreign_cols( rec = btr_pcur_get_rec(&pcur); ut_a(btr_pcur_is_on_user_rec(&pcur, &mtr)); - ut_a(!rec_get_deleted_flag(rec)); - - field = rec_get_nth_field(rec, 0, &len); + ut_a(!rec_get_deleted_flag(rec, sys_foreign_cols->comp)); + + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == ut_strlen(id)); ut_a(ut_memcmp(id, field, len) == 0); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); ut_a(len == 4); ut_a(i == mach_read_from_4(field)); - field = rec_get_nth_field(rec, 4, &len); + field = rec_get_nth_field_old(rec, 4, &len); foreign->foreign_col_names[i] = mem_heap_strdupl(foreign->heap, (char*) field, len); - field = rec_get_nth_field(rec, 5, &len); + field = rec_get_nth_field_old(rec, 5, &len); foreign->referenced_col_names[i] = mem_heap_strdupl(foreign->heap, (char*) field, len); @@ -1091,6 +1095,7 @@ dict_load_foreign( sys_foreign = dict_table_get_low("SYS_FOREIGN"); sys_index = UT_LIST_GET_FIRST(sys_foreign->indexes); + ut_a(!sys_foreign->comp); tuple = dtuple_create(heap2, 1); dfield = dtuple_get_nth_field(tuple, 0); @@ -1103,7 +1108,7 @@ dict_load_foreign( rec = btr_pcur_get_rec(&pcur); if (!btr_pcur_is_on_user_rec(&pcur, &mtr) - || rec_get_deleted_flag(rec)) { + || rec_get_deleted_flag(rec, sys_foreign->comp)) { /* Not found */ fprintf(stderr, @@ -1117,7 +1122,7 @@ dict_load_foreign( return(DB_ERROR); } - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the id in record is the searched one */ if (len != ut_strlen(id) || ut_memcmp(id, field, len) != 0) { @@ -1140,7 +1145,8 @@ dict_load_foreign( foreign = dict_mem_foreign_create(); - foreign->n_fields = mach_read_from_4(rec_get_nth_field(rec, 5, &len)); + foreign->n_fields = + mach_read_from_4(rec_get_nth_field_old(rec, 5, &len)); ut_a(len == 4); @@ -1151,11 +1157,11 @@ dict_load_foreign( foreign->id = mem_heap_strdup(foreign->heap, id); - field = rec_get_nth_field(rec, 3, &len); + field = rec_get_nth_field_old(rec, 3, &len); foreign->foreign_table_name = mem_heap_strdupl(foreign->heap, (char*) field, len); - - field = rec_get_nth_field(rec, 4, &len); + + field = rec_get_nth_field_old(rec, 4, &len); foreign->referenced_table_name = mem_heap_strdupl(foreign->heap, (char*) field, len); @@ -1224,6 +1230,7 @@ dict_load_foreigns( return(DB_ERROR); } + ut_a(!sys_foreign->comp); mtr_start(&mtr); /* Get the secondary index based on FOR_NAME from table @@ -1255,7 +1262,7 @@ loop: name and a foreign constraint ID */ rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); /* Check if the table name in the record is the one searched for; the following call does the comparison in the latin1_swedish_ci @@ -1278,13 +1285,13 @@ loop: goto next_rec; } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, sys_foreign->comp)) { goto next_rec; } /* Now we get a foreign key constraint id */ - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); id = mem_heap_strdupl(heap, (char*) field, len); btr_pcur_store_position(&pcur, &mtr); diff --git a/innobase/dict/dict0mem.c b/innobase/dict/dict0mem.c index 1d45585aac1..48b9f28d292 100644 --- a/innobase/dict/dict0mem.c +++ b/innobase/dict/dict0mem.c @@ -35,7 +35,8 @@ dict_mem_table_create( the table is placed; this parameter is ignored if the table is made a member of a cluster */ - ulint n_cols) /* in: number of columns */ + ulint n_cols, /* in: number of columns */ + ibool comp) /* in: TRUE=compact page format */ { dict_table_t* table; mem_heap_t* heap; @@ -54,6 +55,7 @@ dict_mem_table_create( table->space = space; table->ibd_file_missing = FALSE; table->tablespace_discarded = FALSE; + table->comp = comp; table->n_def = 0; table->n_cols = n_cols + DATA_N_SYS_COLS; table->mem_fix = 0; @@ -110,7 +112,8 @@ dict_mem_cluster_create( { dict_table_t* cluster; - cluster = dict_mem_table_create(name, space, n_cols); + /* Clustered tables cannot work with the compact record format. */ + cluster = dict_mem_table_create(name, space, n_cols, FALSE); cluster->type = DICT_TABLE_CLUSTER; cluster->mix_len = mix_len; @@ -197,7 +200,7 @@ dict_mem_index_create( index->name = mem_heap_strdup(heap, index_name); index->table_name = table_name; index->table = NULL; - index->n_def = 0; + index->n_def = index->n_nullable = 0; index->n_fields = n_fields; index->fields = mem_heap_alloc(heap, 1 + n_fields * sizeof(dict_field_t)); diff --git a/innobase/fil/fil0fil.c b/innobase/fil/fil0fil.c index 6d3ffcd63f3..dea48117e00 100644 --- a/innobase/fil/fil0fil.c +++ b/innobase/fil/fil0fil.c @@ -1579,30 +1579,38 @@ fil_op_write_log( mtr_t* mtr) /* in: mini-transaction handle */ { byte* log_ptr; + ulint len; + + log_ptr = mlog_open(mtr, 11 + 2); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } - log_ptr = mlog_open(mtr, 30); - log_ptr = mlog_write_initial_log_record_for_file_op(type, space_id, 0, log_ptr, mtr); /* Let us store the strings as null-terminated for easier readability and handling */ - mach_write_to_2(log_ptr, ut_strlen(name) + 1); + len = strlen(name) + 1; + + mach_write_to_2(log_ptr, len); log_ptr += 2; - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, (byte*) name, ut_strlen(name) + 1); + mlog_catenate_string(mtr, (byte*) name, len); if (type == MLOG_FILE_RENAME) { - log_ptr = mlog_open(mtr, 30); - mach_write_to_2(log_ptr, ut_strlen(new_name) + 1); + ulint len = strlen(new_name) + 1; + log_ptr = mlog_open(mtr, 2 + len); + ut_a(log_ptr); + mach_write_to_2(log_ptr, len); log_ptr += 2; - mlog_close(mtr, log_ptr); - mlog_catenate_string(mtr, (byte*) new_name, - ut_strlen(new_name) + 1); + mlog_catenate_string(mtr, (byte*) new_name, len); } } #endif diff --git a/innobase/fsp/fsp0fsp.c b/innobase/fsp/fsp0fsp.c index e1621cc2765..ef8e70646c6 100644 --- a/innobase/fsp/fsp0fsp.c +++ b/innobase/fsp/fsp0fsp.c @@ -910,7 +910,7 @@ fsp_header_init( if (space == 0) { fsp_fill_free_list(FALSE, space, header, mtr); btr_create(DICT_CLUSTERED | DICT_UNIVERSAL | DICT_IBUF, space, - ut_dulint_add(DICT_IBUF_ID_MIN, space), mtr); + ut_dulint_add(DICT_IBUF_ID_MIN, space), FALSE, mtr); } else { fsp_fill_free_list(TRUE, space, header, mtr); } diff --git a/innobase/ibuf/ibuf0ibuf.c b/innobase/ibuf/ibuf0ibuf.c index 2191cdc0ee6..c7ca03f9901 100644 --- a/innobase/ibuf/ibuf0ibuf.c +++ b/innobase/ibuf/ibuf0ibuf.c @@ -46,7 +46,7 @@ Note that contary to what we planned in the 1990's, there will only be one insert buffer tree, and that is in the system tablespace of InnoDB. 1. The first field is the space id. -2. The second field is a one-byte marker which differentiates records from +2. The second field is a one-byte marker (0) which differentiates records from the < 4.1.x storage format. 3. The third field is the page number. 4. The fourth field contains the type info, where we have also added 2 bytes to @@ -55,7 +55,14 @@ insert buffer tree, and that is in the system tablespace of InnoDB. can use in the binary search on the index page in the ibuf merge phase. 5. The rest of the fields contain the fields of the actual index record. -*/ +In versions >= 5.0.3: + +The first byte of the fourth field is an additional marker (0) if the record +is in the compact format. The presence of this marker can be detected by +looking at the length of the field modulo DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE. + +The high-order bit of the character set field in the type info is the +"nullable" flag for the field. */ /* PREVENTING DEADLOCKS IN THE INSERT BUFFER SYSTEM @@ -525,8 +532,8 @@ ibuf_data_init_for_space( ibuf_exit(); sprintf(buf, "SYS_IBUF_TABLE_%lu", (ulong) space); - - table = dict_mem_table_create(buf, space, 2); + /* use old-style record format for the insert buffer */ + table = dict_mem_table_create(buf, space, 2, FALSE); dict_mem_table_add_col(table, "PAGE_NO", DATA_BINARY, 0, 0, 0); dict_mem_table_add_col(table, "TYPES", DATA_BINARY, 0, 0, 0); @@ -1049,20 +1056,20 @@ ibuf_rec_get_page_no( ulint len; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 2, &len); + field = rec_get_nth_field_old(rec, 2, &len); } else { ut_a(trx_doublewrite_must_reset_space_ids); ut_a(!trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); } ut_a(len == 4); @@ -1084,15 +1091,15 @@ ibuf_rec_get_space( ulint len; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(rec) > 2); + ut_ad(rec_get_n_fields_old(rec) > 2); - field = rec_get_nth_field(rec, 1, &len); + field = rec_get_nth_field_old(rec, 1, &len); if (len == 1) { /* This is of the >= 4.1.x record format */ ut_a(trx_sys_multiple_tablespace_format); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == 4); return(mach_read_from_4(field)); @@ -1104,6 +1111,161 @@ ibuf_rec_get_space( return(0); } +/************************************************************************ +Creates a dummy index for inserting a record to a non-clustered index. +*/ +static +dict_index_t* +ibuf_dummy_index_create( +/*====================*/ + /* out: dummy index */ + ulint n, /* in: number of fields */ + ibool comp) /* in: TRUE=use compact record format */ +{ + dict_table_t* table; + dict_index_t* index; + table = dict_mem_table_create("IBUF_DUMMY", + DICT_HDR_SPACE, n, comp); + index = dict_mem_index_create("IBUF_DUMMY", "IBUF_DUMMY", + DICT_HDR_SPACE, 0, n); + index->table = table; + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + index->cached = TRUE; + return(index); +} +/************************************************************************ +Add a column to the dummy index */ +static +void +ibuf_dummy_index_add_col( +/*====================*/ + dict_index_t* index, /* in: dummy index */ + dtype_t* type) /* in: the data type of the column */ +{ + ulint i = index->table->n_def; + dict_mem_table_add_col(index->table, "DUMMY", + dtype_get_mtype(type), + dtype_get_prtype(type), + dtype_get_len(type), + dtype_get_prec(type)); + dict_index_add_col(index, + dict_table_get_nth_col(index->table, i), 0, 0); +} +/************************************************************************ +Deallocates a dummy index for inserting a record to a non-clustered index. +*/ +static +void +ibuf_dummy_index_free( +/*====================*/ + dict_index_t* index) /* in: dummy index */ +{ + dict_table_t* table = index->table; + mem_heap_free(index->heap); + mutex_free(&(table->autoinc_mutex)); + mem_heap_free(table->heap); +} + +/************************************************************************* +Builds the entry to insert into a non-clustered index when we have the +corresponding record in an ibuf index. */ +static +dtuple_t* +ibuf_build_entry_from_ibuf_rec( +/*===========================*/ + /* out, own: entry to insert to + a non-clustered index; NOTE that + as we copy pointers to fields in + ibuf_rec, the caller must hold a + latch to the ibuf_rec page as long + as the entry is used! */ + rec_t* ibuf_rec, /* in: record in an insert buffer */ + mem_heap_t* heap, /* in: heap where built */ + dict_index_t** pindex) /* out, own: dummy index that + describes the entry */ +{ + dtuple_t* tuple; + dfield_t* field; + ulint n_fields; + byte* types; + const byte* data; + ulint len; + ulint i; + dict_index_t* index; + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); + + if (len > 1) { + /* This a < 4.1.x format record */ + + ut_a(trx_doublewrite_must_reset_space_ids); + ut_a(!trx_sys_multiple_tablespace_format); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; + tuple = dtuple_create(heap, n_fields); + types = rec_get_nth_field_old(ibuf_rec, 1, &len); + + ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); + + dfield_set_data(field, data, len); + + dtype_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + } + + *pindex = ibuf_dummy_index_create(n_fields, FALSE); + return(tuple); + } + + /* This a >= 4.1.x format record */ + + ut_a(trx_sys_multiple_tablespace_format); + ut_a(*data == 0); + ut_a(rec_get_n_fields_old(ibuf_rec) > 4); + + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; + + tuple = dtuple_create(heap, n_fields); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + index = ibuf_dummy_index_create(n_fields, + len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + len--; + ut_a(*types == 0); + types++; + } + + ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(tuple, i); + + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); + + dfield_set_data(field, data, len); + + dtype_new_read_for_order_and_null_size( + dfield_get_type(field), + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + + ibuf_dummy_index_add_col(index, dfield_get_type(field)); + } + + *pindex = index; + return(tuple); +} + /************************************************************************ Returns the space taken by a stored non-clustered index entry if converted to an index record. */ @@ -1125,43 +1287,60 @@ ibuf_rec_get_volume( ulint i; ut_ad(ibuf_inside()); - ut_ad(rec_get_n_fields(ibuf_rec) > 2); - - data = rec_get_nth_field(ibuf_rec, 1, &len); + ut_ad(rec_get_n_fields_old(ibuf_rec) > 2); + + data = rec_get_nth_field_old(ibuf_rec, 1, &len); if (len > 1) { - /* < 4.1.x format record */ + /* < 4.1.x format record */ ut_a(trx_doublewrite_must_reset_space_ids); ut_a(!trx_sys_multiple_tablespace_format); - n_fields = rec_get_n_fields(ibuf_rec) - 2; + n_fields = rec_get_n_fields_old(ibuf_rec) - 2; - types = rec_get_nth_field(ibuf_rec, 1, &len); + types = rec_get_nth_field_old(ibuf_rec, 1, &len); ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); } else { - /* >= 4.1.x format record */ + /* >= 4.1.x format record */ ut_a(trx_sys_multiple_tablespace_format); - new_format = TRUE; + ut_a(*data == 0); + + types = rec_get_nth_field_old(ibuf_rec, 3, &len); + + ut_a(len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE <= 1); + if (len % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE) { + /* compact record format */ + ulint volume; + dict_index_t* dummy_index; + mem_heap_t* heap = mem_heap_create(500); + dtuple_t* entry = + ibuf_build_entry_from_ibuf_rec( + ibuf_rec, heap, &dummy_index); + volume = rec_get_converted_size(dummy_index, entry); + ibuf_dummy_index_free(dummy_index); + mem_heap_free(heap); + return(volume + page_dir_calc_reserved_space(1)); + } - n_fields = rec_get_n_fields(ibuf_rec) - 4; + n_fields = rec_get_n_fields_old(ibuf_rec) - 4; - types = rec_get_nth_field(ibuf_rec, 3, &len); + new_format = TRUE; } for (i = 0; i < n_fields; i++) { if (new_format) { - data = rec_get_nth_field(ibuf_rec, i + 4, &len); + data = rec_get_nth_field_old(ibuf_rec, i + 4, &len); dtype_new_read_for_order_and_null_size(&dtype, - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); } else { - data = rec_get_nth_field(ibuf_rec, i + 2, &len); + data = rec_get_nth_field_old(ibuf_rec, i + 2, &len); dtype_read_for_order_and_null_size(&dtype, - types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); + types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); } if (len == UNIV_SQL_NULL) { @@ -1187,6 +1366,7 @@ ibuf_entry_build( must be kept because we copy pointers to its fields */ dtuple_t* entry, /* in: entry for a non-clustered index */ + ibool comp, /* in: flag: TRUE=compact record format */ ulint space, /* in: space id */ ulint page_no,/* in: index page number where entry should be inserted */ @@ -1202,11 +1382,14 @@ ibuf_entry_build( /* Starting from 4.1.x, we have to build a tuple whose (1) first field is the space id, - (2) the second field a single marker byte to tell that this + (2) the second field a single marker byte (0) to tell that this is a new format record, (3) the third contains the page number, and (4) the fourth contains the relevent type information of each data - field, + field; the length of this field % DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE is + (a) 0 for b-trees in the old format, and + (b) 1 for b-trees in the compact format, the first byte of the field + being the marker (0); (5) and the rest of the fields are copied from entry. All fields in the tuple are ordered like the type binary in our insert buffer tree. */ @@ -1247,10 +1430,15 @@ ibuf_entry_build( dfield_set_data(field, buf, 4); + ut_ad(comp == 0 || comp == 1); /* Store the type info in buf2, and add the fields from entry to tuple */ buf2 = mem_heap_alloc(heap, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); + if (comp) { + *buf2++ = 0; /* write the compact format indicator */ + } for (i = 0; i < n_fields; i++) { /* We add 4 below because we have the 4 extra fields at the start of an ibuf record */ @@ -1268,8 +1456,13 @@ ibuf_entry_build( field = dtuple_get_nth_field(tuple, 3); + if (comp) { + buf2--; + } + dfield_set_data(field, buf2, n_fields - * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); + * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE + + comp); /* Set all the types in the new tuple binary */ dtuple_set_types_binary(tuple, n_fields + 4); @@ -1277,88 +1470,6 @@ ibuf_entry_build( return(tuple); } -/************************************************************************* -Builds the entry to insert into a non-clustered index when we have the -corresponding record in an ibuf index. */ -static -dtuple_t* -ibuf_build_entry_from_ibuf_rec( -/*===========================*/ - /* out, own: entry to insert to - a non-clustered index; NOTE that - as we copy pointers to fields in - ibuf_rec, the caller must hold a - latch to the ibuf_rec page as long - as the entry is used! */ - rec_t* ibuf_rec, /* in: record in an insert buffer */ - mem_heap_t* heap) /* in: heap where built */ -{ - dtuple_t* tuple; - dfield_t* field; - ulint n_fields; - byte* types; - byte* data; - ulint len; - ulint i; - - data = rec_get_nth_field(ibuf_rec, 1, &len); - - if (len > 1) { - /* This a < 4.1.x format record */ - - ut_a(trx_doublewrite_must_reset_space_ids); - ut_a(!trx_sys_multiple_tablespace_format); - - n_fields = rec_get_n_fields(ibuf_rec) - 2; - tuple = dtuple_create(heap, n_fields); - types = rec_get_nth_field(ibuf_rec, 1, &len); - - ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field(ibuf_rec, i + 2, &len); - - dfield_set_data(field, data, len); - - dtype_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE); - } - - return(tuple); - } - - /* This a >= 4.1.x format record */ - - ut_a(trx_sys_multiple_tablespace_format); - - ut_a(rec_get_n_fields(ibuf_rec) > 4); - - n_fields = rec_get_n_fields(ibuf_rec) - 4; - - tuple = dtuple_create(heap, n_fields); - - types = rec_get_nth_field(ibuf_rec, 3, &len); - - ut_a(len == n_fields * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - - for (i = 0; i < n_fields; i++) { - field = dtuple_get_nth_field(tuple, i); - - data = rec_get_nth_field(ibuf_rec, i + 4, &len); - - dfield_set_data(field, data, len); - - dtype_new_read_for_order_and_null_size( - dfield_get_type(field), - types + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - } - - return(tuple); -} - /************************************************************************* Builds a search tuple used to search buffered inserts for an index page. This is for < 4.1.x format records */ @@ -2047,8 +2158,7 @@ loop: mutex_exit(&ibuf_mutex); sum_sizes = ibuf_get_merge_page_nos(TRUE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, page_nos, - &n_stored); + space_ids, space_versions, page_nos, &n_stored); #ifdef UNIV_IBUF_DEBUG /* fprintf(stderr, "Ibuf contract sync %lu pages %lu volume %lu\n", sync, n_stored, sum_sizes); */ @@ -2344,6 +2454,7 @@ ibuf_update_max_tablespace_id(void) ibuf_data = fil_space_get_ibuf_data(0); ibuf_index = ibuf_data->index; + ut_a(!ibuf_index->table->comp); ibuf_enter(); @@ -2360,7 +2471,7 @@ ibuf_update_max_tablespace_id(void) } else { rec = btr_pcur_get_rec(&pcur); - field = rec_get_nth_field(rec, 0, &len); + field = rec_get_nth_field_old(rec, 0, &len); ut_a(len == 4); @@ -2479,7 +2590,7 @@ ibuf_insert_low( ibuf_enter(); } - entry_size = rec_get_converted_size(entry); + entry_size = rec_get_converted_size(index, entry); heap = mem_heap_create(512); @@ -2487,7 +2598,8 @@ ibuf_insert_low( the first fields and the type information for other fields, and which will be inserted to the insert buffer. */ - ibuf_entry = ibuf_entry_build(entry, space, page_no, heap); + ibuf_entry = ibuf_entry_build(entry, index->table->comp, + space, page_no, heap); /* Open a cursor to the insert buffer tree to calculate if we can add the new entry to it without exceeding the free space limit for the @@ -2532,8 +2644,8 @@ ibuf_insert_low( do_merge = TRUE; ibuf_get_merge_page_nos(FALSE, btr_pcur_get_rec(&pcur), - space_ids, space_versions, page_nos, - &n_stored); + space_ids, space_versions, + page_nos, &n_stored); goto function_exit; } @@ -2656,8 +2768,8 @@ ibuf_insert( ut_a(!(index->type & DICT_CLUSTERED)); - if (rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) { + if (rec_get_converted_size(index, entry) + >= page_get_free_space_of_empty(index->table->comp) / 2) { return(FALSE); } @@ -2692,6 +2804,7 @@ ibuf_insert_to_index_page( dtuple_t* entry, /* in: buffered entry to insert */ page_t* page, /* in: index page where the buffered entry should be placed */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mtr */ { page_cur_t page_cur; @@ -2699,17 +2812,28 @@ ibuf_insert_to_index_page( rec_t* rec; page_t* bitmap_page; ulint old_bits; + mem_heap_t* heap; ut_ad(ibuf_inside()); ut_ad(dtuple_check_typed(entry)); - if (rec_get_n_fields(page_rec_get_next(page_get_infimum_rec(page))) - != dtuple_get_n_fields(entry)) { - - fprintf(stderr, + if (index->table->comp != page_is_comp(page)) { + fputs( "InnoDB: Trying to insert a record from the insert buffer to an index page\n" -"InnoDB: but the number of fields does not match!\n"); +"InnoDB: but the 'compact' flag does not match!\n", stderr); + goto dump; + } + heap = mem_heap_create(100); + rec = page_rec_get_next(page_get_infimum_rec(page)); + + if (rec_offs_n_fields(rec_get_offsets(rec, index, ULINT_UNDEFINED, + heap)) != dtuple_get_n_fields(entry)) { + mem_heap_free(heap); + fputs( +"InnoDB: Trying to insert a record from the insert buffer to an index page\n" +"InnoDB: but the number of fields does not match!\n", stderr); + dump: buf_page_print(page); dtuple_print(stderr, entry); @@ -2723,31 +2847,35 @@ ibuf_insert_to_index_page( return; } - low_match = page_cur_search(page, entry, PAGE_CUR_LE, &page_cur); + mem_heap_free(heap); + low_match = page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); if (low_match == dtuple_get_n_fields(entry)) { rec = page_cur_get_rec(&page_cur); - btr_cur_del_unmark_for_ibuf(rec, mtr); + btr_cur_del_unmark_for_ibuf(rec, index, mtr); } else { - rec = page_cur_tuple_insert(&page_cur, entry, mtr); + rec = page_cur_tuple_insert(&page_cur, entry, index, mtr); if (rec == NULL) { /* If the record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, index, mtr); - page_cur_search(page, entry, PAGE_CUR_LE, &page_cur); + page_cur_search(page, index, entry, + PAGE_CUR_LE, &page_cur); /* This time the record must fit */ - if (!page_cur_tuple_insert(&page_cur, entry, mtr)) { + if (!page_cur_tuple_insert(&page_cur, entry, + index, mtr)) { ut_print_timestamp(stderr); fprintf(stderr, "InnoDB: Error: Insert buffer insert fails; page free %lu, dtuple size %lu\n", (ulong) page_get_max_insert_size(page, 1), - (ulong) rec_get_converted_size(entry)); + (ulong) rec_get_converted_size(index, entry)); fputs("InnoDB: Cannot insert index record ", stderr); dtuple_print(stderr, entry); @@ -2836,11 +2964,12 @@ ibuf_delete_rec( "InnoDB: ibuf record inserted to page %lu\n", (ulong) page_no); fflush(stderr); - rec_print(stderr, btr_pcur_get_rec(pcur)); - rec_print(stderr, pcur->old_rec); + rec_print_old(stderr, btr_pcur_get_rec(pcur)); + rec_print_old(stderr, pcur->old_rec); dtuple_print(stderr, search_tuple); - rec_print(stderr, page_rec_get_next(btr_pcur_get_rec(pcur))); + rec_print_old(stderr, + page_rec_get_next(btr_pcur_get_rec(pcur))); fflush(stderr); btr_pcur_commit_specify_mtr(pcur, mtr); @@ -3075,7 +3204,7 @@ loop: if (corruption_noticed) { fputs("InnoDB: Discarding record\n ", stderr); - rec_print(stderr, ibuf_rec); + rec_print_old(stderr, ibuf_rec); fputs("\n from the insert buffer!\n\n", stderr); } else if (page) { /* Now we have at pcur a record which should be @@ -3083,19 +3212,22 @@ loop: copies pointers to fields in ibuf_rec, and we must keep the latch to the ibuf_rec page until the insertion is finished! */ - - dulint max_trx_id = page_get_max_trx_id( + dict_index_t* dummy_index; + dulint max_trx_id = page_get_max_trx_id( buf_frame_align(ibuf_rec)); page_update_max_trx_id(page, max_trx_id); - entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, heap); + entry = ibuf_build_entry_from_ibuf_rec(ibuf_rec, + heap, &dummy_index); #ifdef UNIV_IBUF_DEBUG - volume += rec_get_converted_size(entry) + volume += rec_get_converted_size(dummy_index, entry) + page_dir_calc_reserved_space(1); ut_a(volume <= 4 * UNIV_PAGE_SIZE / IBUF_PAGE_SIZE_PER_FREE_SPACE); #endif - ibuf_insert_to_index_page(entry, page, &mtr); + ibuf_insert_to_index_page(entry, page, + dummy_index, &mtr); + ibuf_dummy_index_free(dummy_index); } n_inserts++; diff --git a/innobase/include/btr0btr.h b/innobase/include/btr0btr.h index 8606fcd2a5c..0b19e64d4e0 100644 --- a/innobase/include/btr0btr.h +++ b/innobase/include/btr0btr.h @@ -155,7 +155,8 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec); /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /**************************************************************** Creates the root node for a new index tree. */ @@ -167,6 +168,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mini-transaction handle */ /**************************************************************** Frees a B-tree except the root page, which MUST be freed after this @@ -210,8 +212,9 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Decides if the page should be split at the convergence point of inserts converging to left. */ @@ -273,6 +276,7 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes on the upper level the node pointer to a page. */ @@ -332,6 +336,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** @@ -340,11 +345,12 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /****************************************************************** Gets the number of pages in a B-tree. */ diff --git a/innobase/include/btr0btr.ic b/innobase/include/btr0btr.ic index b0aa0756307..1d1f97d3668 100644 --- a/innobase/include/btr0btr.ic +++ b/innobase/include/btr0btr.ic @@ -183,17 +183,18 @@ ulint btr_node_ptr_get_child_page_no( /*===========================*/ /* out: child node address */ - rec_t* rec) /* in: node pointer record */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; byte* field; ulint len; ulint page_no; - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); diff --git a/innobase/include/btr0cur.h b/innobase/include/btr0cur.h index f1334656d53..0a8d8ceaeb7 100644 --- a/innobase/include/btr0cur.h +++ b/innobase/include/btr0cur.h @@ -34,7 +34,7 @@ page_cur_t* btr_cur_get_page_cur( /*=================*/ /* out: pointer to page cursor component */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the record pointer of a tree cursor. */ UNIV_INLINE @@ -42,14 +42,14 @@ rec_t* btr_cur_get_rec( /*============*/ /* out: pointer to record */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Invalidates a tree cursor by setting record pointer to NULL. */ UNIV_INLINE void btr_cur_invalidate( /*===============*/ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the page of a tree cursor. */ UNIV_INLINE @@ -57,7 +57,7 @@ page_t* btr_cur_get_page( /*=============*/ /* out: pointer to page */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Returns the tree of a cursor. */ UNIV_INLINE @@ -65,7 +65,7 @@ dict_tree_t* btr_cur_get_tree( /*=============*/ /* out: tree */ - btr_cur_t* cursor); /* in: tree cursor */ + btr_cur_t* cursor);/* in: tree cursor */ /************************************************************* Positions a tree cursor at a given record. */ UNIV_INLINE @@ -283,8 +283,9 @@ only used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Tries to compress a page of the tree on the leaf level. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid @@ -361,10 +362,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index); /* in: index corresponding to page */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a clustered index record. */ @@ -372,10 +374,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /******************************************************************** Parses the redo log record for delete marking or unmarking of a secondary index record. */ @@ -383,10 +386,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page); /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page); /* in: page or NULL */ /*********************************************************************** Estimates the number of rows in a given index range. */ @@ -417,9 +421,10 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr); /* in: mtr */ /*********************************************************************** The complement of the previous function: in an update entry may inherit some externally stored fields from a record. We must mark them as inherited @@ -456,6 +461,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr); /* in: mtr containing the latch to @@ -496,6 +502,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -510,6 +517,7 @@ btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap); /* in: mem heap */ @@ -540,10 +548,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated - to have place for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update); /* in: update vector */ + ulint* ext_vect,/* in: array of ulints, must be preallocated + to have space for all fields in rec */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector or NULL */ /*######################################################################*/ diff --git a/innobase/include/btr0cur.ic b/innobase/include/btr0cur.ic index a3a04b60c45..dcad3e9e14d 100644 --- a/innobase/include/btr0cur.ic +++ b/innobase/include/btr0cur.ic @@ -134,17 +134,15 @@ btr_cur_can_delete_without_compress( /* out: TRUE if can be deleted without recommended compression */ btr_cur_t* cursor, /* in: btr cursor */ + ulint rec_size,/* in: rec_get_size(btr_cur_get_rec(cursor))*/ mtr_t* mtr) /* in: mtr */ { - ulint rec_size; page_t* page; ut_ad(mtr_memo_contains(mtr, buf_block_align( btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); - rec_size = rec_get_size(btr_cur_get_rec(cursor)); - page = btr_cur_get_page(cursor); if ((page_get_data_size(page) - rec_size < BTR_CUR_PAGE_COMPRESS_LIMIT) diff --git a/innobase/include/btr0pcur.h b/innobase/include/btr0pcur.h index 81f19af4d40..6384222be51 100644 --- a/innobase/include/btr0pcur.h +++ b/innobase/include/btr0pcur.h @@ -462,6 +462,7 @@ struct btr_pcur_struct{ contains an initial segment of the latest record cursor was positioned either on, before, or after */ + ulint old_n_fields; /* number of fields in old_rec */ ulint rel_pos; /* BTR_PCUR_ON, BTR_PCUR_BEFORE, or BTR_PCUR_AFTER, depending on whether cursor was on, before, or after the diff --git a/innobase/include/btr0sea.h b/innobase/include/btr0sea.h index ce4140ecf92..73cd95d1464 100644 --- a/innobase/include/btr0sea.h +++ b/innobase/include/btr0sea.h @@ -77,8 +77,10 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page); /* in: index page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page */ + dict_index_t* index); /* in: record descriptor */ /************************************************************************ Drops a page hash index. */ @@ -128,9 +130,10 @@ btr_search_update_hash_on_delete( Validates the search system. */ ibool -btr_search_validate(void); -/*=====================*/ - +btr_search_validate( +/*================*/ + /* out: TRUE if ok */ + dict_index_t* index); /* in: record descriptor */ /* Search info directions */ #define BTR_SEA_NO_DIRECTION 1 diff --git a/innobase/include/data0type.ic b/innobase/include/data0type.ic index 946b646ffbf..5d6fcf581e5 100644 --- a/innobase/include/data0type.ic +++ b/innobase/include/data0type.ic @@ -149,8 +149,10 @@ dtype_new_store_for_order_and_null_size( bytes where we store the info */ dtype_t* type) /* in: type struct */ { - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); - +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif + buf[0] = (byte)(type->mtype & 0xFFUL); if (type->prtype & DATA_BINARY_TYPE) { @@ -166,10 +168,12 @@ dtype_new_store_for_order_and_null_size( mach_write_to_2(buf + 2, type->len & 0xFFFFUL); + ut_ad(dtype_get_charset_coll(type->prtype) < 256); mach_write_to_2(buf + 4, dtype_get_charset_coll(type->prtype)); - /* Note that the second last byte is left unused, because the - charset-collation code is always < 256 */ + if (type->prtype & DATA_NOT_NULL) { + buf[4] |= 128; + } } /************************************************************************** @@ -211,20 +215,26 @@ dtype_new_read_for_order_and_null_size( { ulint charset_coll; - ut_ad(6 == DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE); +#if 6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE +#error "6 != DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE" +#endif type->mtype = buf[0] & 63; type->prtype = buf[1]; if (buf[0] & 128) { - type->prtype = type->prtype | DATA_BINARY_TYPE; + type->prtype |= DATA_BINARY_TYPE; + } + + if (buf[4] & 128) { + type->prtype |= DATA_NOT_NULL; } type->len = mach_read_from_2(buf + 2); mach_read_from_2(buf + 4); - charset_coll = mach_read_from_2(buf + 4); + charset_coll = mach_read_from_2(buf + 4) & 0x7fff; if (dtype_is_string_type(type->mtype)) { ut_a(charset_coll < 256); @@ -257,6 +267,26 @@ dtype_get_fixed_size( mtype = dtype_get_mtype(type); switch (mtype) { + case DATA_SYS: +#ifdef UNIV_DEBUG + switch (type->prtype & DATA_MYSQL_TYPE_MASK) { + default: + ut_ad(0); + return(0); + case DATA_ROW_ID: + ut_ad(type->len == DATA_ROW_ID_LEN); + break; + case DATA_TRX_ID: + ut_ad(type->len == DATA_TRX_ID_LEN); + break; + case DATA_ROLL_PTR: + ut_ad(type->len == DATA_ROLL_PTR_LEN); + break; + case DATA_MIX_ID: + ut_ad(type->len == DATA_MIX_ID_LEN); + break; + } +#endif /* UNIV_DEBUG */ case DATA_CHAR: case DATA_FIXBINARY: case DATA_INT: @@ -264,16 +294,6 @@ dtype_get_fixed_size( case DATA_DOUBLE: case DATA_MYSQL: return(dtype_get_len(type)); - - case DATA_SYS: if (type->prtype == DATA_ROW_ID) { - return(DATA_ROW_ID_LEN); - } else if (type->prtype == DATA_TRX_ID) { - return(DATA_TRX_ID_LEN); - } else if (type->prtype == DATA_ROLL_PTR) { - return(DATA_ROLL_PTR_LEN); - } else { - return(0); - } case DATA_VARCHAR: case DATA_BINARY: case DATA_DECIMAL: diff --git a/innobase/include/dict0dict.h b/innobase/include/dict0dict.h index ca632691450..a2399a81ca9 100644 --- a/innobase/include/dict0dict.h +++ b/innobase/include/dict0dict.h @@ -639,6 +639,16 @@ dict_index_get_sys_col_pos( dict_index_t* index, /* in: index */ ulint type); /* in: DATA_ROW_ID, ... */ /*********************************************************************** +Adds a column to index. */ + +void +dict_index_add_col( +/*===============*/ + dict_index_t* index, /* in: index */ + dict_col_t* col, /* in: column */ + ulint order, /* in: order criterion */ + ulint prefix_len); /* in: column prefix length */ +/*********************************************************************** Copies types of fields contained in index to tuple. */ void @@ -657,6 +667,7 @@ dict_index_rec_get_sys_col( /*=======================*/ /* out: system column value */ dict_index_t* index, /* in: clustered index describing the record */ + const ulint* offsets,/* in: offsets returned by rec_get_offsets() */ ulint type, /* in: column type: DATA_ROLL_PTR, ... */ rec_t* rec); /* in: record */ /************************************************************************* @@ -770,6 +781,7 @@ dict_tree_copy_rec_order_prefix( /* out: pointer to the prefix record */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to copy prefix */ + ulint* n_fields,/* out: number of fields copied */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size);/* in/out: buffer size */ @@ -782,6 +794,7 @@ dict_tree_build_data_tuple( /* out, own: data tuple */ dict_tree_t* tree, /* in: index tree */ rec_t* rec, /* in: record for which to build data tuple */ + ulint n_fields,/* in: number of data fields */ mem_heap_t* heap); /* in: memory heap where tuple created */ /************************************************************************* Gets the space id of the root of the index tree. */ diff --git a/innobase/include/dict0dict.ic b/innobase/include/dict0dict.ic index 85e4aaf1a05..7f754e316b3 100644 --- a/innobase/include/dict0dict.ic +++ b/innobase/include/dict0dict.ic @@ -168,7 +168,7 @@ dict_table_get_sys_col( col = dict_table_get_nth_col(table, table->n_cols - DATA_N_SYS_COLS + sys); ut_ad(col->type.mtype == DATA_SYS); - ut_ad(col->type.prtype == sys); + ut_ad(col->type.prtype == (sys | DATA_NOT_NULL)); return(col); } @@ -322,6 +322,7 @@ dict_index_rec_get_sys_col( /*=======================*/ /* out: system column value */ dict_index_t* index, /* in: clustered index describing the record */ + const ulint* offsets,/* in: offsets returned by rec_get_offsets() */ ulint type, /* in: column type: DATA_ROLL_PTR, ... */ rec_t* rec) /* in: record */ { @@ -331,12 +332,13 @@ dict_index_rec_get_sys_col( ut_ad(index); ut_ad(index->type & DICT_CLUSTERED); - + ut_ad(rec_offs_validate(rec, index, offsets)); + pos = dict_index_get_sys_col_pos(index, type); ut_ad(pos != ULINT_UNDEFINED); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_ROLL_PTR) { ut_ad(len == 7); @@ -677,7 +679,10 @@ dict_is_mixed_table_rec( byte* mix_id_field; ulint len; - mix_id_field = rec_get_nth_field(rec, table->mix_len, &len); + ut_ad(!table->comp); + + mix_id_field = rec_get_nth_field_old(rec, + table->mix_len, &len); if ((len != table->mix_id_len) || (0 != ut_memcmp(table->mix_id_buf, mix_id_field, len))) { diff --git a/innobase/include/dict0mem.h b/innobase/include/dict0mem.h index 1e496a25477..670b3445a55 100644 --- a/innobase/include/dict0mem.h +++ b/innobase/include/dict0mem.h @@ -54,7 +54,8 @@ dict_mem_table_create( of the table is placed; this parameter is ignored if the table is made a member of a cluster */ - ulint n_cols); /* in: number of columns */ + ulint n_cols, /* in: number of columns */ + ibool comp); /* in: TRUE=compact page format */ /************************************************************************** Creates a cluster memory object. */ @@ -171,6 +172,13 @@ struct dict_field_struct{ DICT_MAX_COL_PREFIX_LEN; NOTE that in the UTF-8 charset, MySQL sets this to 3 * the prefix len in UTF-8 chars */ + ulint fixed_len; /* 0 or the fixed length of the + column if smaller than + DICT_MAX_COL_PREFIX_LEN */ + ulint fixed_offs; /* offset to the field, or + ULINT_UNDEFINED if it is not fixed + within the record (due to preceding + variable-length fields) */ }; /* Data structure for an index tree */ @@ -225,6 +233,7 @@ struct dict_index_struct{ ulint n_def; /* number of fields defined so far */ ulint n_fields;/* number of fields in the index */ dict_field_t* fields; /* array of field descriptions */ + ulint n_nullable;/* number of nullable fields */ UT_LIST_NODE_T(dict_index_t) indexes;/* list of indexes of the table */ dict_tree_t* tree; /* index tree struct */ @@ -320,6 +329,7 @@ struct dict_table_struct{ ibool tablespace_discarded;/* this flag is set TRUE when the user calls DISCARD TABLESPACE on this table, and reset to FALSE in IMPORT TABLESPACE */ + ibool comp; /* flag: TRUE=compact page format */ hash_node_t name_hash; /* hash chain node */ hash_node_t id_hash; /* hash chain node */ ulint n_def; /* number of columns defined so far */ diff --git a/innobase/include/lock0lock.h b/innobase/include/lock0lock.h index db8dd24973d..d642fe46fef 100644 --- a/innobase/include/lock0lock.h +++ b/innobase/include/lock0lock.h @@ -47,7 +47,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: secondary index */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Checks if some transaction has an implicit x-lock on a record in a clustered index. */ @@ -58,7 +59,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /***************************************************************** Resets the lock bits for a single record. Releases transactions waiting for lock requests here. */ @@ -275,6 +277,7 @@ lock_clust_rec_modify_check_and_lock( does nothing */ rec_t* rec, /* in: record which should be modified */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr); /* in: query thread */ /************************************************************************* Checks if locks of other transactions prevent an immediate modify @@ -308,6 +311,7 @@ lock_sec_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -333,6 +337,7 @@ lock_clust_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -350,6 +355,7 @@ lock_clust_rec_cons_read_sees( rec_t* rec, /* in: user record which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ read_view_t* view); /* in: consistent read view */ /************************************************************************* Checks that a non-clustered index record is seen in a consistent read. */ @@ -499,6 +505,7 @@ lock_check_trx_id_sanity( dulint trx_id, /* in: trx id */ rec_t* rec, /* in: user record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ ibool has_kernel_mutex);/* in: TRUE if the caller owns the kernel mutex */ /************************************************************************* @@ -509,7 +516,8 @@ lock_rec_queue_validate( /*====================*/ /* out: TRUE if ok */ rec_t* rec, /* in: record to look at */ - dict_index_t* index); /* in: index, or NULL if not known */ + dict_index_t* index, /* in: index, or NULL if not known */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Prints info of a table lock. */ diff --git a/innobase/include/lock0lock.ic b/innobase/include/lock0lock.ic index fabc9256401..c7a71bb45d8 100644 --- a/innobase/include/lock0lock.ic +++ b/innobase/include/lock0lock.ic @@ -60,7 +60,8 @@ lock_clust_rec_some_has_impl( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { dulint trx_id; @@ -70,7 +71,7 @@ lock_clust_rec_some_has_impl( ut_ad(index->type & DICT_CLUSTERED); ut_ad(page_rec_is_user_rec(rec)); - trx_id = row_get_rec_trx_id(rec, index); + trx_id = row_get_rec_trx_id(rec, index, offsets); if (trx_is_active(trx_id)) { /* The modifying or inserting transaction is active */ diff --git a/innobase/include/mtr0log.h b/innobase/include/mtr0log.h index 9c9c6f696e8..c0636ea1e1e 100644 --- a/innobase/include/mtr0log.h +++ b/innobase/include/mtr0log.h @@ -11,6 +11,7 @@ Created 12/7/1995 Heikki Tuuri #include "univ.i" #include "mtr0mtr.h" +#include "dict0types.h" /************************************************************ Writes 1 - 4 bytes to a file page buffered in the buffer pool. @@ -173,6 +174,38 @@ mlog_parse_string( byte* page); /* in: page where to apply the log record, or NULL */ +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. Reserves space +for further log entries. The log entry must be closed with +mtr_close(). */ + +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size); /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ + +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index); /* out, own: dummy index */ + /* Insert, update, and maybe other functions may use this value to define an extra mlog buffer size for variable size data */ #define MLOG_BUF_MARGIN 256 diff --git a/innobase/include/mtr0mtr.h b/innobase/include/mtr0mtr.h index e8c68a91dad..071279d5259 100644 --- a/innobase/include/mtr0mtr.h +++ b/innobase/include/mtr0mtr.h @@ -102,7 +102,31 @@ flag value must give the length also! */ file rename */ #define MLOG_FILE_DELETE ((byte)35) /* log record about an .ibd file deletion */ -#define MLOG_BIGGEST_TYPE ((byte)35) /* biggest value (used in +#define MLOG_COMP_REC_MIN_MARK ((byte)36) /* mark a compact index record + as the predefined minimum + record */ +#define MLOG_COMP_PAGE_CREATE ((byte)37) /* create a compact + index page */ +#define MLOG_COMP_REC_INSERT ((byte)38) /* compact record insert */ +#define MLOG_COMP_REC_CLUST_DELETE_MARK ((byte)39) + /* mark compact clustered index + record deleted */ +#define MLOG_COMP_REC_SEC_DELETE_MARK ((byte)40)/* mark compact secondary index + record deleted */ +#define MLOG_COMP_REC_UPDATE_IN_PLACE ((byte)41)/* update of a compact record, + preserves record field sizes */ +#define MLOG_COMP_REC_DELETE ((byte)42) /* delete a compact record + from a page */ +#define MLOG_COMP_LIST_END_DELETE ((byte)43) /* delete compact record list + end on index page */ +#define MLOG_COMP_LIST_START_DELETE ((byte)44) /* delete compact record list + start on index page */ +#define MLOG_COMP_LIST_END_COPY_CREATED ((byte)45) + /* copy compact record list end + to a new created index page */ +#define MLOG_COMP_PAGE_REORGANIZE ((byte)46) /* reorganize an index page */ + +#define MLOG_BIGGEST_TYPE ((byte)46) /* biggest value (used in asserts) */ /******************************************************************* diff --git a/innobase/include/page0cur.h b/innobase/include/page0cur.h index c85669ed4df..a693931968e 100644 --- a/innobase/include/page0cur.h +++ b/innobase/include/page0cur.h @@ -128,7 +128,8 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -142,6 +143,7 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /*************************************************************** Inserts a record next to page cursor. Returns pointer to inserted record if @@ -155,9 +157,9 @@ page_cur_insert_rec_low( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - ulint data_size,/* in: data size of tuple */ - rec_t* rec, /* in: pointer to a physical record or NULL */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + dict_index_t* index, /* in: record descriptor */ + rec_t* rec, /* in: pointer to a physical record or NULL */ mtr_t* mtr); /* in: mini-transaction handle */ /***************************************************************** Copies records from page to a newly created page, from a given record onward, @@ -166,10 +168,11 @@ including that record. Infimum and supremum records are not copied. */ void page_copy_rec_list_end_to_created_page( /*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /*************************************************************** Deletes a record at the page cursor. The cursor is moved to the next record after the deleted one. */ @@ -178,6 +181,7 @@ void page_cur_delete_rec( /*================*/ page_cur_t* cursor, /* in: a page cursor */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr); /* in: mini-transaction handle */ /******************************************************************** Searches the right position for a page cursor. */ @@ -187,6 +191,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -198,6 +203,7 @@ void page_cur_search_with_match( /*=======================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -229,34 +235,37 @@ Parses a log record of a record insert on a page. */ byte* page_cur_parse_insert_rec( /*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /************************************************************** Parses a log record of copying a record list end to a new created page. */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses log record of a record delete on a page. */ byte* page_cur_parse_delete_rec( /*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /* Index page cursor */ diff --git a/innobase/include/page0cur.ic b/innobase/include/page0cur.ic index 39f8ab11513..03010fbd766 100644 --- a/innobase/include/page0cur.ic +++ b/innobase/include/page0cur.ic @@ -143,7 +143,7 @@ UNIV_INLINE void page_cur_move_to_prev( /*==================*/ - page_cur_t* cur) /* in: cursor; must not before first */ + page_cur_t* cur) /* in: page cursor, not before first */ { ut_ad(!page_cur_is_before_first(cur)); @@ -158,6 +158,7 @@ page_cur_search( /*============*/ /* out: number of matched fields on the left */ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -170,7 +171,7 @@ page_cur_search( ut_ad(dtuple_check_typed(tuple)); - page_cur_search_with_match(page, tuple, mode, + page_cur_search_with_match(page, index, tuple, mode, &up_matched_fields, &up_matched_bytes, &low_matched_fields, @@ -190,16 +191,11 @@ page_cur_tuple_insert( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple */ + dtuple_t* tuple, /* in: pointer to a data tuple */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - ulint data_size; - - ut_ad(dtuple_check_typed(tuple)); - - data_size = dtuple_get_data_size(tuple); - - return(page_cur_insert_rec_low(cursor, tuple, data_size, NULL, mtr)); + return(page_cur_insert_rec_low(cursor, tuple, index, NULL, mtr)); } /*************************************************************** @@ -214,8 +210,9 @@ page_cur_rec_insert( otherwise */ page_cur_t* cursor, /* in: a page cursor */ rec_t* rec, /* in: record to insert */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { - return(page_cur_insert_rec_low(cursor, NULL, 0, rec, mtr)); + return(page_cur_insert_rec_low(cursor, NULL, index, rec, mtr)); } diff --git a/innobase/include/page0page.h b/innobase/include/page0page.h index 969313614e3..d3ef8214eb6 100644 --- a/innobase/include/page0page.h +++ b/innobase/include/page0page.h @@ -37,7 +37,8 @@ typedef byte page_header_t; /*-----------------------------*/ #define PAGE_N_DIR_SLOTS 0 /* number of slots in page directory */ #define PAGE_HEAP_TOP 2 /* pointer to record heap top */ -#define PAGE_N_HEAP 4 /* number of records in the heap */ +#define PAGE_N_HEAP 4 /* number of records in the heap, + bit 15=flag: new-style compact page format */ #define PAGE_FREE 6 /* pointer to start of page free record list */ #define PAGE_GARBAGE 8 /* number of bytes in deleted records */ #define PAGE_LAST_INSERT 10 /* pointer to the last inserted record, or @@ -79,15 +80,24 @@ typedef byte page_header_t; #define PAGE_DATA (PAGE_HEADER + 36 + 2 * FSEG_HEADER_SIZE) /* start of data on the page */ -#define PAGE_INFIMUM (PAGE_DATA + 1 + REC_N_EXTRA_BYTES) - /* offset of the page infimum record on the - page */ -#define PAGE_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_EXTRA_BYTES + 8) - /* offset of the page supremum record on the - page */ -#define PAGE_SUPREMUM_END (PAGE_SUPREMUM + 9) +#define PAGE_OLD_INFIMUM (PAGE_DATA + 1 + REC_N_OLD_EXTRA_BYTES) + /* offset of the page infimum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM (PAGE_DATA + 2 + 2 * REC_N_OLD_EXTRA_BYTES + 8) + /* offset of the page supremum record on an + old-style page */ +#define PAGE_OLD_SUPREMUM_END (PAGE_OLD_SUPREMUM + 9) /* offset of the page supremum record end on - the page */ + an old-style page */ +#define PAGE_NEW_INFIMUM (PAGE_DATA + REC_N_NEW_EXTRA_BYTES) + /* offset of the page infimum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM (PAGE_DATA + 2 * REC_N_NEW_EXTRA_BYTES + 8) + /* offset of the page supremum record on a + new-style compact page */ +#define PAGE_NEW_SUPREMUM_END (PAGE_NEW_SUPREMUM + 8) + /* offset of the page supremum record end on + a new-style compact page */ /*-----------------------------*/ /* Directions of cursor movement */ @@ -233,6 +243,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -259,6 +270,22 @@ page_rec_get_n_recs_before( /* out: number of records */ rec_t* rec); /* in: the physical record */ /***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page); /* in: index page */ +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap);/* in: number of records */ +/***************************************************************** Gets the number of dir slots in directory. */ UNIV_INLINE ulint @@ -267,6 +294,15 @@ page_dir_get_n_slots( /* out: number of slots */ page_t* page); /* in: index page */ /***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots);/* in: number of slots */ +/***************************************************************** Gets pointer to nth directory slot. */ UNIV_INLINE page_dir_slot_t* @@ -333,7 +369,16 @@ ulint page_dir_find_owner_slot( /*=====================*/ /* out: the directory slot number */ - rec_t* rec); /* in: the physical record */ + rec_t* rec); /* in: the physical record */ +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page); /* in: index page */ /**************************************************************** Gets the pointer to the next record on the page. */ UNIV_INLINE @@ -359,9 +404,10 @@ UNIV_INLINE rec_t* page_rec_get_prev( /*==============*/ - /* out: pointer to previous record */ - rec_t* rec); /* in: pointer to record, must not be page - infimum */ + /* out: pointer to previous record */ + rec_t* rec); /* in: pointer to record, + must not be page infimum */ + /**************************************************************** TRUE if the record is a user record on the page. */ UNIV_INLINE @@ -446,9 +492,11 @@ page_get_max_insert_size_after_reorganize( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void); -/*==============================*/ - /* out: free space */ +page_get_free_space_of_empty( +/*=========================*/ + /* out: free space */ + ibool comp) /* in: TRUE=compact page format */ + __attribute__((const)); /**************************************************************** Returns the sum of the sizes of the records in the record list excluding the infimum and supremum records. */ @@ -464,20 +512,23 @@ Allocates a block of memory from an index page. */ byte* page_mem_alloc( /*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - ulint* heap_no);/* out: this contains the heap number - of the allocated record if allocation succeeds */ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + dict_index_t* index, /* in: record descriptor */ + ulint* heap_no);/* out: this contains the heap number + of the allocated record + if allocation succeeds */ /**************************************************************** Puts a record to free list. */ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec); /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** The index page creation function. */ @@ -487,7 +538,8 @@ page_create( /* out: pointer to the page */ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr); /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp); /* in: TRUE=compact page format */ /***************************************************************** Differs from page_copy_rec_list_end, because this function does not touch the lock table and max trx id on page. */ @@ -495,10 +547,11 @@ touch the lock table and max trx id on page. */ void page_copy_rec_list_end_no_locks( /*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, from the given record onward, including that record. Infimum and supremum records are not copied. @@ -507,10 +560,11 @@ The records are copied to the start of the record list on new_page. */ void page_copy_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Copies records from page to new_page, up to the given record, NOT including that record. Infimum and supremum records are not copied. @@ -519,10 +573,11 @@ The records are copied to the end of the record list on new_page. */ void page_copy_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ @@ -530,14 +585,15 @@ The infimum and supremum records are not deleted. */ void page_delete_rec_list_end( /*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED - if not known */ - ulint size, /* in: the sum of the sizes of the records in the end - of the chain to delete, or ULINT_UNDEFINED if not - known */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Deletes records from page, up to the given record, NOT including that record. Infimum and supremum records are not deleted. */ @@ -545,9 +601,10 @@ that record. Infimum and supremum records are not deleted. */ void page_delete_rec_list_start( /*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr); /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list end to another page. Moved records include split_rec. */ @@ -555,10 +612,11 @@ split_rec. */ void page_move_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Moves record list start to another page. Moved records do not include split_rec. */ @@ -566,10 +624,11 @@ split_rec. */ void page_move_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - mtr_t* mtr); /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr); /* in: mtr */ /******************************************************************** Splits a directory slot which owns too many records. */ @@ -595,13 +654,16 @@ Parses a log record of a record list end or start deletion. */ byte* page_parse_delete_rec_list( /*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE or - MLOG_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr); /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr); /* in: mtr or NULL */ /*************************************************************** Parses a redo log record of creating a page. */ @@ -611,6 +673,7 @@ page_parse_create( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr); /* in: mtr or NULL */ /**************************************************************** @@ -620,7 +683,8 @@ the index page context. */ void page_rec_print( /*===========*/ - rec_t* rec); + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: record descriptor */ /******************************************************************* This is used to print the contents of the directory for debugging purposes. */ @@ -637,8 +701,9 @@ debugging purposes. */ void page_print_list( /*============*/ - page_t* page, /* in: index page */ - ulint pr_n); /* in: print n first and n last entries */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n); /* in: print n first and n last entries */ /******************************************************************* Prints the info in a page header. */ @@ -653,9 +718,12 @@ debugging purposes. */ void page_print( /*======*/ - page_t* page, /* in: index page */ - ulint dn, /* in: print dn first and last entries in directory */ - ulint rn); /* in: print rn first and last records on page */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn); /* in: print rn first and last records + in directory */ /******************************************************************* The following is used to validate a record on a page. This function differs from rec_validate as it can also check the n_owned field and @@ -664,8 +732,9 @@ the heap_no field. */ ibool page_rec_validate( /*==============*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: record on the page */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Checks that the first directory slot points to the infimum record and the last to the supremum. This function is intended to track if the diff --git a/innobase/include/page0page.ic b/innobase/include/page0page.ic index 3d2bf3b090e..1d5ea337031 100644 --- a/innobase/include/page0page.ic +++ b/innobase/include/page0page.ic @@ -73,7 +73,8 @@ page_header_set_field( { ut_ad(page); ut_ad(field <= PAGE_N_RECS); - ut_ad(val < UNIV_PAGE_SIZE); + ut_ad(field == PAGE_N_HEAP || val < UNIV_PAGE_SIZE); + ut_ad(field != PAGE_N_HEAP || (val & 0x7fff) < UNIV_PAGE_SIZE); mach_write_to_2(page + PAGE_HEADER + field, val); } @@ -162,7 +163,11 @@ page_get_infimum_rec( { ut_ad(page); - return(page + PAGE_INFIMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_INFIMUM); + } else { + return(page + PAGE_OLD_INFIMUM); + } } /**************************************************************** @@ -176,7 +181,11 @@ page_get_supremum_rec( { ut_ad(page); - return(page + PAGE_SUPREMUM); + if (page_is_comp(page)) { + return(page + PAGE_NEW_SUPREMUM); + } else { + return(page + PAGE_OLD_SUPREMUM); + } } /**************************************************************** @@ -309,6 +318,7 @@ page_cmp_dtuple_rec_with_match( be page infimum or supremum, in which case matched-parameter values below are not affected */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns contains the value for current comparison */ @@ -320,6 +330,7 @@ page_cmp_dtuple_rec_with_match( page_t* page; ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); page = buf_frame_align(rec); @@ -328,7 +339,7 @@ page_cmp_dtuple_rec_with_match( } else if (rec == page_get_supremum_rec(page)) { return(-1); } else { - return(cmp_dtuple_rec_with_match(dtuple, rec, + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, matched_fields, matched_bytes)); } @@ -358,6 +369,45 @@ page_dir_get_n_slots( { return(page_header_get_field(page, PAGE_N_DIR_SLOTS)); } +/***************************************************************** +Sets the number of dir slots in directory. */ +UNIV_INLINE +void +page_dir_set_n_slots( +/*=================*/ + /* out: number of slots */ + page_t* page, /* in: index page */ + ulint n_slots)/* in: number of slots */ +{ + page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots); +} + +/***************************************************************** +Gets the number of records in the heap. */ +UNIV_INLINE +ulint +page_dir_get_n_heap( +/*================*/ + /* out: number of user records */ + page_t* page) /* in: index page */ +{ + return(page_header_get_field(page, PAGE_N_HEAP) & 0x7fff); +} + +/***************************************************************** +Sets the number of records in the heap. */ +UNIV_INLINE +void +page_dir_set_n_heap( +/*================*/ + page_t* page, /* in: index page */ + ulint n_heap) /* in: number of records */ +{ + ut_ad(n_heap < 0x8000); + + page_header_set_field(page, PAGE_N_HEAP, n_heap | (0x8000 & + page_header_get_field(page, PAGE_N_HEAP))); +} /***************************************************************** Gets pointer to nth directory slot. */ @@ -369,7 +419,7 @@ page_dir_get_nth_slot( page_t* page, /* in: index page */ ulint n) /* in: position */ { - ut_ad(page_header_get_field(page, PAGE_N_DIR_SLOTS) > n); + ut_ad(page_dir_get_n_slots(page) > n); return(page + UNIV_PAGE_SIZE - PAGE_DIR - (n + 1) * PAGE_DIR_SLOT_SIZE); @@ -431,7 +481,8 @@ page_dir_slot_get_n_owned( /* out: number of records */ page_dir_slot_t* slot) /* in: page directory slot */ { - return(rec_get_n_owned(page_dir_slot_get_rec(slot))); + return(rec_get_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)))); } /******************************************************************* @@ -444,7 +495,8 @@ page_dir_slot_set_n_owned( ulint n) /* in: number of records owned by the slot */ { - rec_set_n_owned(page_dir_slot_get_rec(slot), n); + rec_set_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(buf_frame_align(slot)), n); } /**************************************************************** @@ -461,6 +513,19 @@ page_dir_calc_reserved_space( / PAGE_DIR_SLOT_MIN_N_OWNED); } +/**************************************************************** +Determine whether the page is in new-style compact format. */ +UNIV_INLINE +ibool +page_is_comp( +/*=========*/ + /* out: TRUE if the page is in compact format + FALSE if it is in old-style format */ + page_t* page) /* in: index page */ +{ + return(!!(page_header_get_field(page, PAGE_N_HEAP) & 0x8000)); +} + /**************************************************************** Gets the pointer to the next record on the page. */ UNIV_INLINE @@ -477,7 +542,7 @@ page_rec_get_next( page = buf_frame_align(rec); - offs = rec_get_next_offs(rec); + offs = rec_get_next_offs(rec, page_is_comp(page)); if (offs >= UNIV_PAGE_SIZE) { fprintf(stderr, @@ -513,6 +578,7 @@ page_rec_set_next( infimum */ { page_t* page; + ulint offs; ut_ad(page_rec_check(rec)); ut_a((next == NULL) @@ -523,11 +589,13 @@ page_rec_set_next( ut_ad(rec != page_get_supremum_rec(page)); ut_ad(next != page_get_infimum_rec(page)); - if (next == NULL) { - rec_set_next_offs(rec, 0); + if (next) { + offs = (ulint) (next - page); } else { - rec_set_next_offs(rec, (ulint)(next - page)); + offs = 0; } + + rec_set_next_offs(rec, page_is_comp(page), offs); } /**************************************************************** @@ -545,6 +613,7 @@ page_rec_get_prev( rec_t* rec2; rec_t* prev_rec = NULL; page_t* page; + ibool comp; ut_ad(page_rec_check(rec)); @@ -559,6 +628,7 @@ page_rec_get_prev( slot = page_dir_get_nth_slot(page, slot_no - 1); rec2 = page_dir_slot_get_rec(slot); + comp = page_is_comp(page); while (rec != rec2) { prev_rec = rec2; @@ -579,9 +649,12 @@ page_rec_find_owner_rec( /* out: the owner record */ rec_t* rec) /* in: the physical record */ { + ibool comp; + ut_ad(page_rec_check(rec)); + comp = page_is_comp(buf_frame_align(rec)); - while (rec_get_n_owned(rec) == 0) { + while (rec_get_n_owned(rec, comp) == 0) { rec = page_rec_get_next(rec); } @@ -601,7 +674,9 @@ page_get_data_size( ulint ret; ret = (ulint)(page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (page_is_comp(page) + ? PAGE_NEW_SUPREMUM_END + : PAGE_OLD_SUPREMUM_END) - page_header_get_field(page, PAGE_GARBAGE)); ut_ad(ret < UNIV_PAGE_SIZE); @@ -613,12 +688,13 @@ page_get_data_size( Calculates free space if a page is emptied. */ UNIV_INLINE ulint -page_get_free_space_of_empty(void) -/*==============================*/ +page_get_free_space_of_empty( +/*=========================*/ /* out: free space */ + ibool comp) /* in: TRUE=compact page layout */ { return((ulint)(UNIV_PAGE_SIZE - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) - PAGE_DIR - 2 * PAGE_DIR_SLOT_SIZE)); } @@ -640,13 +716,16 @@ page_get_max_insert_size( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_header_get_field(page, PAGE_HEAP_TOP) - - PAGE_SUPREMUM_END + - (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END) + page_dir_calc_reserved_space( - n_recs + (page_header_get_field(page, PAGE_N_HEAP) - 2)); + n_recs + page_dir_get_n_heap(page) - 2); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); /* Above the 'n_recs +' part reserves directory space for the new inserted records; the '- 2' excludes page infimum and supremum @@ -673,11 +752,14 @@ page_get_max_insert_size_after_reorganize( { ulint occupied; ulint free_space; + ibool comp; + + comp = page_is_comp(page); occupied = page_get_data_size(page) + page_dir_calc_reserved_space(n_recs + page_get_n_recs(page)); - free_space = page_get_free_space_of_empty(); + free_space = page_get_free_space_of_empty(comp); if (occupied > free_space) { @@ -693,11 +775,12 @@ UNIV_INLINE void page_mem_free( /*==========*/ - page_t* page, /* in: index page */ - rec_t* rec) /* in: pointer to the (origin of) record */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: pointer to the (origin of) record */ + dict_index_t* index) /* in: record descriptor */ { - rec_t* free; - ulint garbage; + rec_t* free; + ulint garbage; free = page_header_get_ptr(page, PAGE_FREE); @@ -707,7 +790,7 @@ page_mem_free( garbage = page_header_get_field(page, PAGE_GARBAGE); page_header_set_field(page, PAGE_GARBAGE, - garbage + rec_get_size(rec)); + garbage + rec_get_size(rec, index)); } #ifdef UNIV_MATERIALIZE diff --git a/innobase/include/rem0cmp.h b/innobase/include/rem0cmp.h index 712e263350e..77a5a42c2d5 100644 --- a/innobase/include/rem0cmp.h +++ b/innobase/include/rem0cmp.h @@ -90,6 +90,7 @@ cmp_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns, contains the value for current comparison */ @@ -107,7 +108,8 @@ cmp_dtuple_rec( less than rec, respectively; see the comments for cmp_dtuple_rec_with_match */ dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Checks if a dtuple is a prefix of a record. The last field in dtuple is allowed to be a prefix of the corresponding field in the record. */ @@ -116,23 +118,9 @@ ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec); /* in: physical record */ -/****************************************************************** -Compares a prefix of a data tuple to a prefix of a physical record for -equality. If there are less fields in rec than parameter n_fields, FALSE -is returned. NOTE that n_fields_cmp of dtuple does not affect this -comparison. */ - -ibool -cmp_dtuple_rec_prefix_equal( -/*========================*/ - /* out: TRUE if equal */ dtuple_t* dtuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ - ulint n_fields); /* in: number of fields which should be - compared; must not exceed the number of - fields in dtuple */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /***************************************************************** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is @@ -146,7 +134,13 @@ cmp_rec_rec_with_match( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -167,6 +161,12 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ dict_index_t* index); /* in: data dictionary index */ diff --git a/innobase/include/rem0cmp.ic b/innobase/include/rem0cmp.ic index 75cb3ef04e8..d4c30f25f03 100644 --- a/innobase/include/rem0cmp.ic +++ b/innobase/include/rem0cmp.ic @@ -57,10 +57,14 @@ cmp_rec_rec( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ + ulint n, /* in: number of fields to compare */ dict_index_t* index) /* in: data dictionary index */ { ulint match_f = 0; ulint match_b = 0; - return(cmp_rec_rec_with_match(rec1, rec2, index, &match_f, &match_b)); + return(cmp_rec_rec_with_match(rec1, rec2, offsets1, offsets2, index, n, + &match_f, &match_b)); } diff --git a/innobase/include/rem0rec.h b/innobase/include/rem0rec.h index 86bf263170f..d450df82311 100644 --- a/innobase/include/rem0rec.h +++ b/innobase/include/rem0rec.h @@ -23,9 +23,18 @@ Created 5/30/1994 Heikki Tuuri info bits of a record */ #define REC_INFO_MIN_REC_FLAG 0x10UL -/* Number of extra bytes in a record, in addition to the data and the -offsets */ -#define REC_N_EXTRA_BYTES 6 +/* Number of extra bytes in an old-style record, +in addition to the data and the offsets */ +#define REC_N_OLD_EXTRA_BYTES 6 +/* Number of extra bytes in a new-style record, +in addition to the data and the offsets */ +#define REC_N_NEW_EXTRA_BYTES 5 + +/* Record status values */ +#define REC_STATUS_ORDINARY 0 +#define REC_STATUS_NODE_PTR 1 +#define REC_STATUS_INFIMUM 2 +#define REC_STATUS_SUPREMUM 3 /********************************************************** The following function is used to get the offset of the @@ -36,7 +45,8 @@ rec_get_next_offs( /*==============*/ /* out: the page offset of the next chained record */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the next record offset field of the record. */ @@ -45,14 +55,15 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint next); /* in: offset of the next record */ /********************************************************** The following function is used to get the number of fields -in the record. */ +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec); /* in: physical record */ /********************************************************** @@ -63,7 +74,8 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the number of owned records. */ @@ -72,6 +84,7 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned); /* in: the number of owned */ /********************************************************** The following function is used to retrieve the info bits of @@ -81,7 +94,8 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the info bits of a record. */ UNIV_INLINE @@ -89,15 +103,26 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits); /* in: info bits */ /********************************************************** -Gets the value of the deleted falg in info bits. */ +The following function retrieves the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits); /* in: info bits from a record */ +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits); /* in: info bits */ + /********************************************************** The following function tells if record is delete marked. */ UNIV_INLINE @@ -105,7 +130,8 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the deleted bit. */ UNIV_INLINE @@ -113,8 +139,25 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag); /* in: TRUE if delete marked */ /********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec); /* in: physical record */ +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag); /* in: TRUE if the record is a node pointer */ +/********************************************************** The following function is used to get the order number of the record in the heap of the index page. */ UNIV_INLINE @@ -122,7 +165,8 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec); /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp); /* in: TRUE=compact page format */ /********************************************************** The following function is used to set the heap number field in the record. */ @@ -131,6 +175,7 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no);/* in: the heap number */ /********************************************************** The following function is used to test whether the data offsets @@ -141,31 +186,84 @@ rec_get_1byte_offs_flag( /*====================*/ /* out: TRUE if 1-byte form */ rec_t* rec); /* in: physical record */ +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are returned in an array of +ulint, with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ + +ulint* +rec_get_offsets( +/*============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap); /* in: memory heap */ +/********************************************************** +The following function determines the offsets to each field +in the record. It differs from rec_get_offsets() by trying to +reuse a previously returned array. */ + +ulint* +rec_reget_offsets( +/*==============*/ + /* out: the new offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in: array of offsets + from rec_get_offsets() + or rec_reget_offsets(), or NULL */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap); /* in: memory heap */ + +/**************************************************************** +Validates offsets returned by rec_get_offsets() or rec_reget_offsets(). */ +UNIV_INLINE +ibool +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets);/* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec, /* in: record */ + const dict_index_t* index,/* in: record descriptor */ + ulint* offsets);/* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ + /**************************************************************** The following function is used to get a pointer to the nth -data field in the record. */ +data field in an old-style record. */ byte* -rec_get_nth_field( -/*==============*/ +rec_get_nth_field_old( +/*==================*/ /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL null */ /**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n); /* in: index of the field */ -/**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -173,131 +271,194 @@ rec_get_nth_field_size( /* out: field size in bytes */ rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len); /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +/********************************************************** +Determine if the offsets are for a record in the new +compact format. */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i); /* in: ith field */ +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n); /* in: nth field */ + /********************************************************** Returns TRUE if the extern bit is set in any of the fields of rec. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec); /* in: record */ +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*************************************************************** Sets the value of the ith field extern storage bit. */ - +UNIV_INLINE void rec_set_nth_field_extern_bit( /*=========================*/ - rec_t* rec, /* in: record */ - ulint i, /* in: ith field */ - ibool val, /* in: value to set */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page where - rec is, or NULL; in the NULL case we do not - write to log about the change */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** Sets TRUE the extern storage bits of fields mentioned in an array. */ void rec_set_field_extern_bits( /*======================*/ - rec_t* rec, /* in: record */ - ulint* vec, /* in: array of field numbers */ - ulint n_fields, /* in: number of fields numbers */ - mtr_t* mtr); /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case we - do not write to log about the change */ -/**************************************************************** -The following function is used to get a copy of the nth -data field in the record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len); /* out: length of the field; UNIV_SQL_NULL if SQL - null */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + const ulint* vec, /* in: array of field numbers */ + ulint n_fields,/* in: number of fields numbers */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ /*************************************************************** -This is used to modify the value of an already existing field in -a physical record. The previous value must have exactly the same -size as the new value. If len is UNIV_SQL_NULL then the field is -treated as SQL null. */ +This is used to modify the value of an already existing field in a record. +The previous value must have exactly the same size as the new value. If len +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len); /* in: length of the data or UNIV_SQL_NULL. - If not SQL null, must have the same length as the - previous value. If SQL null, previous value must be - SQL null. */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data if not SQL null */ + ulint len); /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ /************************************************************** -The following function returns the data size of a physical +The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ +rec_get_data_size_old( +/*==================*/ + /* out: size */ rec_t* rec); /* in: physical record */ /************************************************************** +The following function returns the number of fields in a record. */ +UNIV_INLINE +ulint +rec_offs_n_fields( +/*===============*/ + /* out: number of fields */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of record minus data size of record. The value returned by the function is the distance from record start to record origin in bytes. */ UNIV_INLINE ulint -rec_get_extra_size( -/*===============*/ - /* out: size */ - rec_t* rec); /* in: physical record */ -/************************************************************** +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE +ulint +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/************************************************************** +Returns the total size of a physical record. */ + ulint rec_get_size( /*=========*/ - /* out: size */ - rec_t* rec); /* in: physical record */ + /* out: size */ + rec_t* rec, /* in: physical record */ + dict_index_t* index); /* in: record descriptor */ /************************************************************** Returns a pointer to the start of the record. */ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /************************************************************** Returns a pointer to the end of the record. */ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec); /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Copies a physical record to a buffer. */ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec); /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /****************************************************************** Copies the first n fields of a physical record to a new physical record in a buffer. */ @@ -305,49 +466,43 @@ a buffer. */ rec_t* rec_copy_prefix_to_buf( /*===================*/ - /* out, own: copied record */ - rec_t* rec, /* in: physical record */ - ulint n_fields, /* in: number of fields to copy */ - byte** buf, /* in/out: memory buffer for the copied prefix, - or NULL */ - ulint* buf_size); /* in/out: buffer size */ + /* out, own: copied record */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, or NULL */ + ulint* buf_size); /* in/out: buffer size */ /**************************************************************** Folds a prefix of a physical record to a ulint. */ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id); /* in: index tree id */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id); /* in: index tree id */ /************************************************************* Builds a physical record out of a data tuple and stores it beginning from address destination. */ -UNIV_INLINE + rec_t* rec_convert_dtuple_to_rec( /*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple); /* in: data tuple */ -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ - -rec_t* -rec_convert_dtuple_to_rec_low( -/*==========================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple, /* in: data tuple */ - ulint data_size); /* in: data size of dtuple */ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ /************************************************************** -Returns the extra size of a physical record if we know its +Returns the extra size of an old-style physical record if we know its data size and number of fields. */ UNIV_INLINE ulint @@ -355,7 +510,8 @@ rec_get_converted_extra_size( /*=========================*/ /* out: extra size */ ulint data_size, /* in: data size */ - ulint n_fields); /* in: number of fields */ + ulint n_fields) /* in: number of fields */ + __attribute__((const)); /************************************************************** The following function returns the size of a data tuple when converted to a physical record. */ @@ -364,6 +520,7 @@ ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple);/* in: data tuple */ /****************************************************************** Copies the first n fields of a physical record to a data tuple. @@ -374,6 +531,7 @@ rec_copy_prefix_to_dtuple( /*======================*/ dtuple_t* tuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ ulint n_fields, /* in: number of fields to copy */ mem_heap_t* heap); /* in: memory heap */ /******************************************************************* @@ -382,16 +540,27 @@ Validates the consistency of a physical record. */ ibool rec_validate( /*=========*/ - /* out: TRUE if ok */ - rec_t* rec); /* in: physical record */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ +/******************************************************************* +Prints an old-style physical record. */ + +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec); /* in: physical record */ + /******************************************************************* Prints a physical record. */ void rec_print( /*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec); /* in: physical record */ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ #define REC_INFO_BITS 6 /* This is single byte bit-field */ diff --git a/innobase/include/rem0rec.ic b/innobase/include/rem0rec.ic index c36bf8f6d6e..a8acb4e304a 100644 --- a/innobase/include/rem0rec.ic +++ b/innobase/include/rem0rec.ic @@ -8,9 +8,19 @@ Created 5/30/1994 Heikki Tuuri #include "mach0data.h" #include "ut0byte.h" +#include "dict0dict.h" -/* Offsets of the bit-fields in the record. NOTE! In the table the most -significant bytes and bits are written below less significant. +/* Compact flag ORed to the extra size returned by rec_get_offsets() */ +#define REC_OFFS_COMPACT ((ulint) 1 << 31) +/* SQL NULL flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_SQL_NULL ((ulint) 1 << 31) +/* External flag in offsets returned by rec_get_offsets() */ +#define REC_OFFS_EXTERNAL ((ulint) 1 << 30) +/* Mask for offsets returned by rec_get_offsets() */ +#define REC_OFFS_MASK (REC_OFFS_EXTERNAL - 1) + +/* Offsets of the bit-fields in an old-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. (1) byte offset (2) bit usage within byte downward from @@ -25,6 +35,25 @@ significant bytes and bits are written below less significant. 4 bits info bits */ +/* Offsets of the bit-fields in a new-style record. NOTE! In the table the +most significant bytes and bits are written below less significant. + + (1) byte offset (2) bit usage within byte + downward from + origin -> 1 8 bits pointer to next record (relative) + 2 8 bits pointer to next record (relative) + 3 3 bits status: + 000=conventional record + 001=node pointer record (inside B-tree) + 010=infimum record + 011=supremum record + 1xx=reserved + 5 bits heap number + 4 8 bits heap number + 5 4 bits n_owned + 4 bits info bits +*/ + /* We list the byte offsets from the origin of the record, the mask, and the shift needed to obtain each bit-field of the record. */ @@ -32,22 +61,30 @@ and the shift needed to obtain each bit-field of the record. */ #define REC_NEXT_MASK 0xFFFFUL #define REC_NEXT_SHIFT 0 -#define REC_SHORT 3 /* This is single byte bit-field */ -#define REC_SHORT_MASK 0x1UL -#define REC_SHORT_SHIFT 0 +#define REC_OLD_SHORT 3 /* This is single byte bit-field */ +#define REC_OLD_SHORT_MASK 0x1UL +#define REC_OLD_SHORT_SHIFT 0 + +#define REC_OLD_N_FIELDS 4 +#define REC_OLD_N_FIELDS_MASK 0x7FEUL +#define REC_OLD_N_FIELDS_SHIFT 1 -#define REC_N_FIELDS 4 -#define REC_N_FIELDS_MASK 0x7FEUL -#define REC_N_FIELDS_SHIFT 1 +#define REC_NEW_STATUS 3 /* This is single byte bit-field */ +#define REC_NEW_STATUS_MASK 0x7UL +#define REC_NEW_STATUS_SHIFT 0 -#define REC_HEAP_NO 5 +#define REC_OLD_HEAP_NO 5 +#define REC_NEW_HEAP_NO 4 #define REC_HEAP_NO_MASK 0xFFF8UL #define REC_HEAP_NO_SHIFT 3 -#define REC_N_OWNED 6 /* This is single byte bit-field */ +#define REC_OLD_N_OWNED 6 /* This is single byte bit-field */ +#define REC_NEW_N_OWNED 5 /* This is single byte bit-field */ #define REC_N_OWNED_MASK 0xFUL #define REC_N_OWNED_SHIFT 0 +#define REC_OLD_INFO_BITS 6 /* This is single byte bit-field */ +#define REC_NEW_INFO_BITS 5 /* This is single byte bit-field */ #define REC_INFO_BITS_MASK 0xF0UL #define REC_INFO_BITS_SHIFT 0 @@ -65,26 +102,24 @@ a field stored to another page: */ #define REC_2BYTE_EXTERN_MASK 0x4000UL -/**************************************************************** -Return field length or UNIV_SQL_NULL. */ -UNIV_INLINE -ulint -rec_get_nth_field_len( -/*==================*/ - /* out: length of the field; UNIV_SQL_NULL if SQL - null */ - rec_t* rec, /* in: record */ - ulint n) /* in: index of the field */ -{ - ulint len; - - rec_get_nth_field(rec, n, &len); - - return(len); -} +#if REC_OLD_SHORT_MASK << (8 * (REC_OLD_SHORT - 3)) \ + ^ REC_OLD_N_FIELDS_MASK << (8 * (REC_OLD_N_FIELDS - 4)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_OLD_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_OLD_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_OLD_INFO_BITS - 3)) \ + ^ 0xFFFFFFFFUL +# error "sum of old-style masks != 0xFFFFFFFFUL" +#endif +#if REC_NEW_STATUS_MASK << (8 * (REC_NEW_STATUS - 3)) \ + ^ REC_HEAP_NO_MASK << (8 * (REC_NEW_HEAP_NO - 4)) \ + ^ REC_N_OWNED_MASK << (8 * (REC_NEW_N_OWNED - 3)) \ + ^ REC_INFO_BITS_MASK << (8 * (REC_NEW_INFO_BITS - 3)) \ + ^ 0xFFFFFFUL +# error "sum of new-style masks != 0xFFFFFFUL" +#endif /*************************************************************** -Sets the value of the ith field SQL null bit. */ +Sets the value of the ith field SQL null bit of an old-style record. */ void rec_set_nth_field_null_bit( @@ -93,8 +128,8 @@ rec_set_nth_field_null_bit( ulint i, /* in: ith field */ ibool val); /* in: value to set */ /*************************************************************** -Sets a record field to SQL null. The physical size of the field is not -changed. */ +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ void rec_set_nth_field_sql_null( @@ -102,6 +137,32 @@ rec_set_nth_field_sql_null( rec_t* rec, /* in: record */ ulint n); /* in: index of the field */ +/*************************************************************** +Sets the value of the ith field extern storage bit of an old-style record. */ + +void +rec_set_nth_field_extern_bit_old( +/*=============================*/ + rec_t* rec, /* in: old-style record */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page where + rec is, or NULL; in the NULL case we do not + write to log about the change */ +/*************************************************************** +Sets the value of the ith field extern storage bit of a new-style record. */ + +void +rec_set_nth_field_extern_bit_new( +/*=============================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint ith, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr); /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ + /********************************************************** Gets a bit field from within 1 byte. */ UNIV_INLINE @@ -131,7 +192,7 @@ rec_set_bit_field_1( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask); ut_ad(mask <= 0xFFUL); ut_ad(((mask >> shift) << shift) == mask); @@ -171,30 +232,14 @@ rec_set_bit_field_2( ulint shift) /* in: shift right applied after masking */ { ut_ad(rec); - ut_ad(offs <= REC_N_EXTRA_BYTES); + ut_ad(offs <= REC_N_OLD_EXTRA_BYTES); ut_ad(mask > 0xFFUL); ut_ad(mask <= 0xFFFFUL); ut_ad((mask >> shift) & 1); ut_ad(0 == ((mask >> shift) & ((mask >> shift) + 1))); ut_ad(((mask >> shift) << shift) == mask); ut_ad(((val << shift) & mask) == (val << shift)); -#ifdef UNIV_DEBUG - { - ulint m; - - /* The following assertion checks that the masks of currently - defined bit-fields in bytes 3-6 do not overlap. */ - m = (ulint)((REC_SHORT_MASK << (8 * (REC_SHORT - 3))) - + (REC_N_FIELDS_MASK << (8 * (REC_N_FIELDS - 4))) - + (REC_HEAP_NO_MASK << (8 * (REC_HEAP_NO - 4))) - + (REC_N_OWNED_MASK << (8 * (REC_N_OWNED - 3))) - + (REC_INFO_BITS_MASK << (8 * (REC_INFO_BITS - 3)))); - if (m != ut_dbg_zero + 0xFFFFFFFFUL) { - fprintf(stderr, "Sum of masks %lx\n", m); - ut_error; - } - } -#endif + mach_write_to_2(rec - offs, (mach_read_from_2(rec - offs) & ~mask) | (val << shift)); @@ -208,17 +253,26 @@ ulint rec_get_next_offs( /*==============*/ /* out: the page offset of the next chained record */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - ulint ret; - - ut_ad(rec); - - ret = rec_get_bit_field_2(rec, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); - ut_ad(ret < UNIV_PAGE_SIZE); - - return(ret); + if (comp) { + lint ret = (int16_t) rec_get_bit_field_2(rec, REC_NEXT, + REC_NEXT_MASK, REC_NEXT_SHIFT); +#if UNIV_PAGE_SIZE <= 32768 + /* with 64 KiB page size, the pointer will "wrap around", + and the following assertions are invalid */ + ut_ad(ret + ut_align_offset(rec, UNIV_PAGE_SIZE) < + UNIV_PAGE_SIZE); +#endif + return(ret ? ut_align_offset(rec + ret, UNIV_PAGE_SIZE) : 0); + } + else { + ulint ret = rec_get_bit_field_2(rec, REC_NEXT, + REC_NEXT_MASK, REC_NEXT_SHIFT); + ut_ad(ret < UNIV_PAGE_SIZE); + return(ret); + } } /********************************************************** @@ -229,21 +283,32 @@ void rec_set_next_offs( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint next) /* in: offset of the next record */ { ut_ad(rec); ut_ad(UNIV_PAGE_SIZE > next); - rec_set_bit_field_2(rec, next, REC_NEXT, REC_NEXT_MASK, - REC_NEXT_SHIFT); + if (comp) { + rec_set_bit_field_2(rec, next + ? (next - ut_align_offset(rec, UNIV_PAGE_SIZE)) +#ifdef UNIV_DEBUG /* avoid an assertion failure */ + & (REC_NEXT_MASK >> REC_NEXT_SHIFT) +#endif + : 0, REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + } else { + rec_set_bit_field_2(rec, next, + REC_NEXT, REC_NEXT_MASK, REC_NEXT_SHIFT); + } } /********************************************************** -The following function is used to get the number of fields in the record. */ +The following function is used to get the number of fields +in an old-style record. */ UNIV_INLINE ulint -rec_get_n_fields( -/*=============*/ +rec_get_n_fields_old( +/*=================*/ /* out: number of data fields */ rec_t* rec) /* in: physical record */ { @@ -251,8 +316,8 @@ rec_get_n_fields( ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + ret = rec_get_bit_field_2(rec, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); ut_ad(ret <= REC_MAX_N_FIELDS); ut_ad(ret > 0); @@ -260,12 +325,12 @@ rec_get_n_fields( } /********************************************************** -The following function is used to set the number of fields field in the -record. */ +The following function is used to set the number of fields +in an old-style record. */ UNIV_INLINE void -rec_set_n_fields( -/*=============*/ +rec_set_n_fields_old( +/*=================*/ rec_t* rec, /* in: physical record */ ulint n_fields) /* in: the number of fields */ { @@ -273,8 +338,38 @@ rec_set_n_fields( ut_ad(n_fields <= REC_MAX_N_FIELDS); ut_ad(n_fields > 0); - rec_set_bit_field_2(rec, n_fields, REC_N_FIELDS, REC_N_FIELDS_MASK, - REC_N_FIELDS_SHIFT); + rec_set_bit_field_2(rec, n_fields, REC_OLD_N_FIELDS, + REC_OLD_N_FIELDS_MASK, REC_OLD_N_FIELDS_SHIFT); +} + +/********************************************************** +The following function is used to get the number of fields +in a record. */ +UNIV_INLINE +ulint +rec_get_n_fields( +/*=============*/ + /* out: number of data fields */ + rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + ut_ad(rec); + ut_ad(index); + if (!index->table->comp) { + return(rec_get_n_fields_old(rec)); + } + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + return(dict_index_get_n_fields(index)); + case REC_STATUS_NODE_PTR: + return(dict_index_get_n_unique_in_tree(index) + 1); + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + return(1); + default: + ut_error; + return(ULINT_UNDEFINED); + } } /********************************************************** @@ -285,14 +380,16 @@ ulint rec_get_n_owned( /*============*/ /* out: number of owned records */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); ut_ad(ret <= REC_MAX_N_OWNED); return(ret); @@ -305,13 +402,15 @@ void rec_set_n_owned( /*============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint n_owned) /* in: the number of owned */ { ut_ad(rec); ut_ad(n_owned <= REC_MAX_N_OWNED); - rec_set_bit_field_1(rec, n_owned, REC_N_OWNED, REC_N_OWNED_MASK, - REC_N_OWNED_SHIFT); + rec_set_bit_field_1(rec, n_owned, + comp ? REC_NEW_N_OWNED : REC_OLD_N_OWNED, + REC_N_OWNED_MASK, REC_N_OWNED_SHIFT); } /********************************************************** @@ -321,14 +420,16 @@ ulint rec_get_info_bits( /*==============*/ /* out: info bits */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_1(rec, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + ret = rec_get_bit_field_1(rec, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); ut_ad((ret & ~REC_INFO_BITS_MASK) == 0); return(ret); @@ -341,30 +442,51 @@ void rec_set_info_bits( /*==============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint bits) /* in: info bits */ { ut_ad(rec); ut_ad((bits & ~REC_INFO_BITS_MASK) == 0); - rec_set_bit_field_1(rec, bits, REC_INFO_BITS, REC_INFO_BITS_MASK, - REC_INFO_BITS_SHIFT); + rec_set_bit_field_1(rec, bits, + comp ? REC_NEW_INFO_BITS : REC_OLD_INFO_BITS, + REC_INFO_BITS_MASK, REC_INFO_BITS_SHIFT); } /********************************************************** -Gets the value of the deleted flag in info bits. */ +The following function retrieves the status bits of a new-style record. */ UNIV_INLINE -ibool -rec_info_bits_get_deleted_flag( -/*===========================*/ - /* out: TRUE if deleted flag set */ - ulint info_bits) /* in: info bits from a record */ +ulint +rec_get_status( +/*===========*/ + /* out: status bits */ + rec_t* rec) /* in: physical record */ { - if (info_bits & REC_INFO_DELETED_FLAG) { + ulint ret; - return(TRUE); - } + ut_ad(rec); - return(FALSE); + ret = rec_get_bit_field_1(rec, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); + ut_ad((ret & ~REC_NEW_STATUS_MASK) == 0); + + return(ret); +} + +/********************************************************** +The following function is used to set the status bits of a new-style record. */ +UNIV_INLINE +void +rec_set_status( +/*===========*/ + rec_t* rec, /* in: physical record */ + ulint bits) /* in: info bits */ +{ + ut_ad(rec); + ut_ad((bits & ~REC_NEW_STATUS_MASK) == 0); + + rec_set_bit_field_1(rec, bits, REC_NEW_STATUS, + REC_NEW_STATUS_MASK, REC_NEW_STATUS_SHIFT); } /********************************************************** @@ -374,9 +496,10 @@ ibool rec_get_deleted_flag( /*=================*/ /* out: TRUE if delete marked */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { - if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec)) { + if (REC_INFO_DELETED_FLAG & rec_get_info_bits(rec, comp)) { return(TRUE); } @@ -391,6 +514,7 @@ void rec_set_deleted_flag( /*=================*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ibool flag) /* in: TRUE if delete marked */ { ulint old_val; @@ -399,7 +523,7 @@ rec_set_deleted_flag( ut_ad(TRUE == 1); ut_ad(flag <= TRUE); - old_val = rec_get_info_bits(rec); + old_val = rec_get_info_bits(rec, comp); if (flag) { new_val = REC_INFO_DELETED_FLAG | old_val; @@ -407,7 +531,39 @@ rec_set_deleted_flag( new_val = ~REC_INFO_DELETED_FLAG & old_val; } - rec_set_info_bits(rec, new_val); + rec_set_info_bits(rec, comp, new_val); +} + +/********************************************************** +The following function tells if a new-style record is a node pointer. */ +UNIV_INLINE +ibool +rec_get_node_ptr_flag( +/*=================*/ + /* out: TRUE if node pointer */ + rec_t* rec) /* in: physical record */ +{ + return(REC_STATUS_NODE_PTR == rec_get_status(rec)); +} + +/********************************************************** +The following function is used to flag a record as a node pointer. */ +UNIV_INLINE +void +rec_set_node_ptr_flag( +/*=================*/ + rec_t* rec, /* in: physical record */ + ibool flag) /* in: TRUE if the record is a node pointer */ +{ + ulint status; + ut_ad(flag <= TRUE); + ut_ad(REC_STATUS_NODE_PTR >= rec_get_status(rec)); + if (flag) { + status = REC_STATUS_NODE_PTR; + } else { + status = REC_STATUS_ORDINARY; + } + rec_set_status(rec, status); } /********************************************************** @@ -418,14 +574,16 @@ ulint rec_get_heap_no( /*=============*/ /* out: heap order number */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + ibool comp) /* in: TRUE=compact page format */ { ulint ret; ut_ad(rec); - ret = rec_get_bit_field_2(rec, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + ret = rec_get_bit_field_2(rec, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); ut_ad(ret <= REC_MAX_HEAP_NO); return(ret); @@ -438,12 +596,14 @@ void rec_set_heap_no( /*=============*/ rec_t* rec, /* in: physical record */ + ibool comp, /* in: TRUE=compact page format */ ulint heap_no)/* in: the heap number */ { ut_ad(heap_no <= REC_MAX_HEAP_NO); - rec_set_bit_field_2(rec, heap_no, REC_HEAP_NO, REC_HEAP_NO_MASK, - REC_HEAP_NO_SHIFT); + rec_set_bit_field_2(rec, heap_no, + comp ? REC_NEW_HEAP_NO : REC_OLD_HEAP_NO, + REC_HEAP_NO_MASK, REC_HEAP_NO_SHIFT); } /********************************************************** @@ -456,10 +616,12 @@ rec_get_1byte_offs_flag( /* out: TRUE if 1-byte form */ rec_t* rec) /* in: physical record */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif - return(rec_get_bit_field_1(rec, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT)); + return(rec_get_bit_field_1(rec, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT)); } /********************************************************** @@ -471,11 +633,13 @@ rec_set_1byte_offs_flag( rec_t* rec, /* in: physical record */ ibool flag) /* in: TRUE if 1byte form */ { - ut_ad(TRUE == 1); +#if TRUE != 1 +#error "TRUE != 1" +#endif ut_ad(flag <= TRUE); - rec_set_bit_field_1(rec, flag, REC_SHORT, REC_SHORT_MASK, - REC_SHORT_SHIFT); + rec_set_bit_field_1(rec, flag, REC_OLD_SHORT, REC_OLD_SHORT_MASK, + REC_OLD_SHORT_SHIFT); } /********************************************************** @@ -492,9 +656,9 @@ rec_1_get_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n + 1))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1))); } /********************************************************** @@ -511,68 +675,233 @@ rec_2_get_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2))); } -/*************************************************************** -Gets the value of the ith field extern storage bit. If it is TRUE -it means that the field is stored on another page. */ +#ifdef UNIV_DEBUG +# define REC_OFFS_HEADER_SIZE 3 +#else /* UNIV_DEBUG */ +# define REC_OFFS_HEADER_SIZE 1 +#endif /* UNIV_DEBUG */ + +/* Get the base address of offsets. The extra_size is stored at +this position, and following positions hold the end offsets of +the fields. */ +#define rec_offs_base(offsets) (offsets + REC_OFFS_HEADER_SIZE) + +/**************************************************************** +Validates offsets returned by rec_get_offsets() or rec_reget_offsets(). */ UNIV_INLINE ibool -rec_get_nth_field_extern_bit( -/*=========================*/ - /* in: TRUE or FALSE */ - rec_t* rec, /* in: record */ - ulint i) /* in: ith field */ +rec_offs_validate( +/*==============*/ + /* out: TRUE if valid */ + rec_t* rec, /* in: record or NULL */ + dict_index_t* index, /* in: record descriptor or NULL */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint i = rec_offs_n_fields(offsets); + ulint last = ULINT_MAX; + ibool comp = (*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0; + ut_a(offsets); + if (rec) { + ut_ad((ulint) rec == offsets[1]); + if (!comp) { + ut_a(rec_get_n_fields_old(rec) >= i); + } + } + if (index) { + ut_ad((ulint) index == offsets[2]); + ulint max_n_fields = ut_max( + dict_index_get_n_fields(index), + dict_index_get_n_unique_in_tree(index) + 1); + if (comp && rec) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + break; + case REC_STATUS_NODE_PTR: + max_n_fields = + dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + max_n_fields = 1; + break; + default: + ut_error; + } + } + ut_a(i <= max_n_fields); + } + while (i--) { + ulint curr = rec_offs_base(offsets)[1 + i] & REC_OFFS_MASK; + ut_a(curr <= last); + last = curr; + } + return(TRUE); +} +/**************************************************************** +Updates debug data in offsets, in order to avoid bogus +rec_offs_validate() failures. */ +UNIV_INLINE +void +rec_offs_make_valid( +/*================*/ + const rec_t* rec __attribute__((unused)), + /* in: record */ + const dict_index_t* index __attribute__((unused)), + /* in: record descriptor */ + ulint* offsets __attribute__((unused))) + /* in: array returned by rec_get_offsets() + or rec_reget_offsets() */ { - ulint info; +#ifdef UNIV_DEBUG + offsets[1] = (ulint) rec; + offsets[2] = (ulint) index; +#endif /* UNIV_DEBUG */ +} - if (rec_get_1byte_offs_flag(rec)) { +/**************************************************************** +The following function is used to get a pointer to the nth +data field in an old-style record. */ +UNIV_INLINE +byte* +rec_get_nth_field( +/*==============*/ + /* out: pointer to the field */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index of the field */ + ulint* len) /* out: length of the field; UNIV_SQL_NULL + if SQL null */ +{ + byte* field; + ulint length; + ut_ad(rec); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + ut_ad(len); - return(FALSE); + if (n == 0) { + field = rec; + } else { + field = rec + (rec_offs_base(offsets)[n] & REC_OFFS_MASK); } - info = rec_2_get_field_end_info(rec, i); + length = rec_offs_base(offsets)[1 + n]; - if (info & REC_2BYTE_EXTERN_MASK) { - return(TRUE); + if (length & REC_OFFS_SQL_NULL) { + field = NULL; + length = UNIV_SQL_NULL; + } else { + length &= REC_OFFS_MASK; + length -= field - rec; } - return(FALSE); + *len = length; + return(field); } /********************************************************** -Returns TRUE if the extern bit is set in any of the fields -of rec. */ +Determine if the offsets are for a record in the new +compact format. */ UNIV_INLINE ibool -rec_contains_externally_stored_field( -/*=================================*/ - /* out: TRUE if a field is stored externally */ - rec_t* rec) /* in: record */ +rec_offs_comp( +/*==========*/ + /* out: TRUE if compact format */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n; - ulint i; - - if (rec_get_1byte_offs_flag(rec)) { - - return(FALSE); - } + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + return((*rec_offs_base(offsets) & REC_OFFS_COMPACT) != 0); +} - n = rec_get_n_fields(rec); +/********************************************************** +Returns TRUE if the nth field of rec is SQL NULL. */ +UNIV_INLINE +ibool +rec_offs_nth_null( +/*==============*/ + /* out: TRUE if SQL NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_SQL_NULL) != 0); +} +/********************************************************** +Returns TRUE if the extern bit is set in nth field of rec. */ +UNIV_INLINE +ibool +rec_offs_nth_extern( +/*================*/ + /* out: TRUE if externally stored */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return((rec_offs_base(offsets)[1 + n] & REC_OFFS_EXTERNAL) != 0); +} - for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { +/********************************************************** +Gets the physical size of a field. */ +UNIV_INLINE +ulint +rec_offs_nth_size( +/*==============*/ + /* out: length of field */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: nth field */ +{ + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + ut_ad(n < rec_offs_n_fields(offsets)); + return(rec_offs_base(offsets)[1 + n] & REC_OFFS_MASK); +} +/********************************************************** +Returns TRUE if the extern bit is set in any of the fields +of an old-style record. */ +UNIV_INLINE +ibool +rec_offs_any_extern( +/*================*/ + /* out: TRUE if a field is stored externally */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint i; + for (i = rec_offs_n_fields(offsets); i--; ) { + if (rec_offs_nth_extern(offsets, i)) { return(TRUE); } } - return(FALSE); } +/*************************************************************** +Sets the value of the ith field extern storage bit. */ +UNIV_INLINE +void +rec_set_nth_field_extern_bit( +/*=========================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint i, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ +{ + if (index->table->comp) { + rec_set_nth_field_extern_bit_new(rec, index, i, val, mtr); + } else { + rec_set_nth_field_extern_bit_old(rec, i, val, mtr); + } +} + /********************************************************** Returns the offset of n - 1th field end if the record is stored in the 1-byte offsets form. If the field is SQL null, the flag is ORed in the returned @@ -589,9 +918,9 @@ rec_1_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_1(rec - (REC_N_EXTRA_BYTES + n))); + return(mach_read_from_1(rec - (REC_N_OLD_EXTRA_BYTES + n))); } /********************************************************** @@ -608,9 +937,9 @@ rec_2_get_prev_field_end_info( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); - return(mach_read_from_2(rec - (REC_N_EXTRA_BYTES + 2 * n))); + return(mach_read_from_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n))); } /********************************************************** @@ -625,9 +954,9 @@ rec_1_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_1(rec - (REC_N_EXTRA_BYTES + n + 1), info); + mach_write_to_1(rec - (REC_N_OLD_EXTRA_BYTES + n + 1), info); } /********************************************************** @@ -642,9 +971,9 @@ rec_2_set_field_end_info( ulint info) /* in: value to set */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - mach_write_to_2(rec - (REC_N_EXTRA_BYTES + 2 * n + 2), info); + mach_write_to_2(rec - (REC_N_OLD_EXTRA_BYTES + 2 * n + 2), info); } /********************************************************** @@ -659,7 +988,7 @@ rec_1_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -682,7 +1011,7 @@ rec_2_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(!rec_get_1byte_offs_flag(rec)); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -707,7 +1036,7 @@ rec_get_field_start_offs( ulint n) /* in: field index */ { ut_ad(rec); - ut_ad(n <= rec_get_n_fields(rec)); + ut_ad(n <= rec_get_n_fields_old(rec)); if (n == 0) { @@ -723,8 +1052,9 @@ rec_get_field_start_offs( } /**************************************************************** -Gets the physical size of a field. Also an SQL null may have a field of -size > 0, if the data type is of a fixed size. */ +Gets the physical size of an old-style field. +Also an SQL null may have a field of size > 0, +if the data type is of a fixed size. */ UNIV_INLINE ulint rec_get_nth_field_size( @@ -744,133 +1074,132 @@ rec_get_nth_field_size( return(next_os - os); } -/**************************************************************** -The following function is used to get a copy of the nth data field in a -record to a buffer. */ -UNIV_INLINE -void -rec_copy_nth_field( -/*===============*/ - void* buf, /* in: pointer to the buffer */ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - ulint* len) /* out: length of the field; UNIV_SQL_NULL if SQL - null */ -{ - byte* ptr; - - ut_ad(buf && rec && len); - - ptr = rec_get_nth_field(rec, n, len); - - if (*len == UNIV_SQL_NULL) { - - return; - } - - ut_memcpy(buf, ptr, *len); -} - /*************************************************************** This is used to modify the value of an already existing field in a record. The previous value must have exactly the same size as the new value. If len -is UNIV_SQL_NULL then the field is treated as an SQL null. */ +is UNIV_SQL_NULL then the field is treated as an SQL null for old-style +records. For new-style records, len must not be UNIV_SQL_NULL. */ UNIV_INLINE void rec_set_nth_field( /*==============*/ - rec_t* rec, /* in: record */ - ulint n, /* in: index of the field */ - void* data, /* in: pointer to the data if not SQL null */ - ulint len) /* in: length of the data or UNIV_SQL_NULL */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n, /* in: index number of the field */ + const void* data, /* in: pointer to the data + if not SQL null */ + ulint len) /* in: length of the data or UNIV_SQL_NULL. + If not SQL null, must have the same + length as the previous value. + If SQL null, previous value must be + SQL null. */ { byte* data2; ulint len2; - ut_ad((len == UNIV_SQL_NULL) - || (rec_get_nth_field_size(rec, n) == len)); - + ut_ad(rec_offs_validate(rec, NULL, offsets)); + if (len == UNIV_SQL_NULL) { + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_sql_null(rec, n); return; } - data2 = rec_get_nth_field(rec, n, &len2); + data2 = rec_get_nth_field(rec, offsets, n, &len2); + ut_ad(len2 == len); ut_memcpy(data2, data, len); if (len2 == UNIV_SQL_NULL) { - + ut_ad(!rec_offs_comp(offsets)); rec_set_nth_field_null_bit(rec, n, FALSE); } } /************************************************************** -The following function returns the data size of a physical +The following function returns the data size of an old-style physical record, that is the sum of field lengths. SQL null fields are counted as length 0 fields. The value returned by the function is the distance from record origin to record end in bytes. */ UNIV_INLINE ulint -rec_get_data_size( -/*==============*/ - /* out: size */ +rec_get_data_size_old( +/*==================*/ + /* out: size */ rec_t* rec) /* in: physical record */ { ut_ad(rec); - return(rec_get_field_start_offs(rec, rec_get_n_fields(rec))); + return(rec_get_field_start_offs(rec, rec_get_n_fields_old(rec))); } /************************************************************** -Returns the total size of record minus data size of record. The value -returned by the function is the distance from record start to record origin -in bytes. */ +The following function returns the number of fields in a record. */ UNIV_INLINE ulint -rec_get_extra_size( +rec_offs_n_fields( /*===============*/ - /* out: size */ - rec_t* rec) /* in: physical record */ + /* out: number of fields */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); + ut_ad(offsets); + n_fields = offsets[0]; + ut_ad(n_fields > 0); + ut_ad(n_fields <= REC_MAX_N_FIELDS); + return(n_fields); +} - if (rec_get_1byte_offs_flag(rec)) { +/************************************************************** +The following function returns the data size of a physical +record, that is the sum of field lengths. SQL null fields +are counted as length 0 fields. The value returned by the function +is the distance from record origin to record end in bytes. */ +UNIV_INLINE +ulint +rec_offs_data_size( +/*===============*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint size; - return(REC_N_EXTRA_BYTES + n_fields); - } + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = rec_offs_base(offsets)[rec_offs_n_fields(offsets)] + & REC_OFFS_MASK; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); +} - return(REC_N_EXTRA_BYTES + 2 * n_fields); +/************************************************************** +Returns the total size of record minus data size of record. The value +returned by the function is the distance from record start to record origin +in bytes. */ +UNIV_INLINE +ulint +rec_offs_extra_size( +/*================*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + ulint size; + ut_ad(rec_offs_validate(NULL, NULL, offsets)); + size = *rec_offs_base(offsets) & ~REC_OFFS_COMPACT; + ut_ad(size < UNIV_PAGE_SIZE); + return(size); } -/************************************************************** +/************************************************************** Returns the total size of a physical record. */ UNIV_INLINE ulint -rec_get_size( -/*=========*/ - /* out: size */ - rec_t* rec) /* in: physical record */ +rec_offs_size( +/*==========*/ + /* out: size */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - ulint n_fields; - - ut_ad(rec); - - n_fields = rec_get_n_fields(rec); - - if (rec_get_1byte_offs_flag(rec)) { - - return(REC_N_EXTRA_BYTES + n_fields - + rec_1_get_field_start_offs(rec, n_fields)); - } - - return(REC_N_EXTRA_BYTES + 2 * n_fields - + rec_2_get_field_start_offs(rec, n_fields)); + return(rec_offs_data_size(offsets) + rec_offs_extra_size(offsets)); } /************************************************************** @@ -879,10 +1208,11 @@ UNIV_INLINE byte* rec_get_end( /*========*/ - /* out: pointer to end */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to end */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec + rec_get_data_size(rec)); + return(rec + rec_offs_data_size(offsets)); } /************************************************************** @@ -891,10 +1221,11 @@ UNIV_INLINE byte* rec_get_start( /*==========*/ - /* out: pointer to start */ - rec_t* rec) /* in: pointer to record */ + /* out: pointer to start */ + rec_t* rec, /* in: pointer to record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - return(rec - rec_get_extra_size(rec)); + return(rec - rec_offs_extra_size(offsets)); } /******************************************************************* @@ -903,18 +1234,20 @@ UNIV_INLINE rec_t* rec_copy( /*=====*/ - /* out: pointer to the origin of the copied record */ - void* buf, /* in: buffer */ - rec_t* rec) /* in: physical record */ + /* out: pointer to the origin of the copy */ + void* buf, /* in: buffer */ + const rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint extra_len; ulint data_len; ut_ad(rec && buf); - ut_ad(rec_validate(rec)); + ut_ad(rec_offs_validate((rec_t*) rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); - extra_len = rec_get_extra_size(rec); - data_len = rec_get_data_size(rec); + extra_len = rec_offs_extra_size(offsets); + data_len = rec_offs_data_size(offsets); ut_memcpy(buf, rec - extra_len, extra_len + data_len); @@ -922,8 +1255,8 @@ rec_copy( } /************************************************************** -Returns the extra size of a physical record if we know its data size and -the number of fields. */ +Returns the extra size of an old-style physical record if we know its +data size and number of fields. */ UNIV_INLINE ulint rec_get_converted_extra_size( @@ -934,12 +1267,22 @@ rec_get_converted_extra_size( { if (data_size <= REC_1BYTE_OFFS_LIMIT) { - return(REC_N_EXTRA_BYTES + n_fields); + return(REC_N_OLD_EXTRA_BYTES + n_fields); } - return(REC_N_EXTRA_BYTES + 2 * n_fields); + return(REC_N_OLD_EXTRA_BYTES + 2 * n_fields); } +/************************************************************** +The following function returns the size of a data tuple when converted to +a new-style physical record. */ + +ulint +rec_get_converted_size_new( +/*=======================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple);/* in: data tuple */ /************************************************************** The following function returns the size of a data tuple when converted to a physical record. */ @@ -948,14 +1291,27 @@ ulint rec_get_converted_size( /*===================*/ /* out: size */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* dtuple) /* in: data tuple */ { ulint data_size; ulint extra_size; - + + ut_ad(index); ut_ad(dtuple); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(index->type & DICT_UNIVERSAL + || dtuple_get_n_fields(dtuple) == + (((dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) + == REC_STATUS_NODE_PTR) + ? dict_index_get_n_unique_in_tree(index) + 1 + : dict_index_get_n_fields(index))); + + if (index->table->comp) { + return(rec_get_converted_size_new(index, dtuple)); + } + data_size = dtuple_get_data_size(dtuple); extra_size = rec_get_converted_extra_size( @@ -971,12 +1327,15 @@ UNIV_INLINE ulint rec_fold( /*=====*/ - /* out: the folded value */ - rec_t* rec, /* in: the physical record */ - ulint n_fields, /* in: number of complete fields to fold */ - ulint n_bytes, /* in: number of bytes to fold in an - incomplete last field */ - dulint tree_id) /* in: index tree id */ + /* out: the folded value */ + rec_t* rec, /* in: the physical record */ + const ulint* offsets, /* in: array returned by + rec_get_offsets() */ + ulint n_fields, /* in: number of complete + fields to fold */ + ulint n_bytes, /* in: number of bytes to fold + in an incomplete last field */ + dulint tree_id) /* in: index tree id */ { ulint i; byte* data; @@ -984,12 +1343,13 @@ rec_fold( ulint fold; ulint n_fields_rec; - ut_ad(rec_validate(rec)); - ut_ad(n_fields <= rec_get_n_fields(rec)); - ut_ad((n_fields < rec_get_n_fields(rec)) || (n_bytes == 0)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(rec_validate((rec_t*) rec, offsets)); ut_ad(n_fields + n_bytes > 0); - - n_fields_rec = rec_get_n_fields(rec); + + n_fields_rec = rec_offs_n_fields(offsets); + ut_ad(n_fields <= n_fields_rec); + ut_ad(n_fields < n_fields_rec || n_bytes == 0); if (n_fields > n_fields_rec) { n_fields = n_fields_rec; @@ -1002,7 +1362,7 @@ rec_fold( fold = ut_fold_dulint(tree_id); for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { fold = ut_fold_ulint_pair(fold, @@ -1011,7 +1371,7 @@ rec_fold( } if (n_bytes > 0) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { if (len > n_bytes) { @@ -1025,19 +1385,3 @@ rec_fold( return(fold); } - -/************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -the address destination. */ -UNIV_INLINE -rec_t* -rec_convert_dtuple_to_rec( -/*======================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple) /* in: data tuple */ -{ - return(rec_convert_dtuple_to_rec_low(destination, dtuple, - dtuple_get_data_size(dtuple))); -} diff --git a/innobase/include/row0row.h b/innobase/include/row0row.h index 951e211fb37..782973d8f5d 100644 --- a/innobase/include/row0row.h +++ b/innobase/include/row0row.h @@ -27,7 +27,8 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Reads the roll pointer field from a clustered index record. */ UNIV_INLINE @@ -36,7 +37,8 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Writes the trx id field to a clustered index record. */ UNIV_INLINE @@ -45,7 +47,8 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ - dulint trx_id); /* in: value of the field */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ + dulint trx_id);/* in: value of the field */ /************************************************************************* Sets the roll pointer field in a clustered index record. */ UNIV_INLINE @@ -54,6 +57,7 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr);/* in: value of the field */ /********************************************************************* When an insert to a table is performed, this function builds the entry which @@ -90,6 +94,9 @@ row_build( the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL, in which case this function + will invoke rec_get_offsets() */ mem_heap_t* heap); /* in: memory heap from which the memory needed is allocated */ /*********************************************************************** @@ -175,14 +182,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec); /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /******************************************************************* Searches the clustered index record for a row, if we have the row reference. */ diff --git a/innobase/include/row0row.ic b/innobase/include/row0row.ic index 8e5121f5a96..85410beacf0 100644 --- a/innobase/include/row0row.ic +++ b/innobase/include/row0row.ic @@ -20,7 +20,8 @@ row_get_rec_sys_field( /* out: value of the field */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ - dict_index_t* index); /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /************************************************************************* Sets the trx id or roll ptr field in a clustered index record: this function is slower than the specialized inline functions. */ @@ -32,6 +33,7 @@ row_set_rec_sys_field( ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint val); /* in: value to set */ /************************************************************************* @@ -42,18 +44,21 @@ row_get_rec_trx_id( /*===============*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_trx_id(rec + offset)); } else { - return(row_get_rec_sys_field(DATA_TRX_ID, rec, index)); + return(row_get_rec_sys_field(DATA_TRX_ID, + rec, index, offsets)); } } @@ -65,18 +70,21 @@ row_get_rec_roll_ptr( /*=================*/ /* out: value of the field */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { return(trx_read_roll_ptr(rec + offset + DATA_TRX_ID_LEN)); } else { - return(row_get_rec_sys_field(DATA_ROLL_PTR, rec, index)); + return(row_get_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets)); } } @@ -88,18 +96,21 @@ row_set_rec_trx_id( /*===============*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint trx_id) /* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_trx_id(rec + offset, trx_id); } else { - row_set_rec_sys_field(DATA_TRX_ID, rec, index, trx_id); + row_set_rec_sys_field(DATA_TRX_ID, + rec, index, offsets, trx_id); } } @@ -111,18 +122,21 @@ row_set_rec_roll_ptr( /*=================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint roll_ptr)/* in: value of the field */ { ulint offset; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); offset = index->trx_id_offset; if (offset) { trx_write_roll_ptr(rec + offset + DATA_TRX_ID_LEN, roll_ptr); } else { - row_set_rec_sys_field(DATA_ROLL_PTR, rec, index, roll_ptr); + row_set_rec_sys_field(DATA_ROLL_PTR, + rec, index, offsets, roll_ptr); } } @@ -133,14 +147,15 @@ UNIV_INLINE void row_build_row_ref_fast( /*===================*/ - dtuple_t* ref, /* in: typed data tuple where the reference - is built */ - ulint* map, /* in: array of field numbers in rec telling - how ref should be built from the fields of - rec */ - rec_t* rec) /* in: record in the index; must be preserved - while ref is used, as we do not copy field - values to heap */ + dtuple_t* ref, /* in: typed data tuple where the + reference is built */ + const ulint* map, /* in: array of field numbers in rec + telling how ref should be built from + the fields of rec */ + rec_t* rec, /* in: record in the index; must be + preserved while ref is used, as we do + not copy field values to heap */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { dfield_t* dfield; byte* field; @@ -149,6 +164,7 @@ row_build_row_ref_fast( ulint field_no; ulint i; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ref_len = dtuple_get_n_fields(ref); for (i = 0; i < ref_len; i++) { @@ -158,7 +174,8 @@ row_build_row_ref_fast( if (field_no != ULINT_UNDEFINED) { - field = rec_get_nth_field(rec, field_no, &len); + field = rec_get_nth_field(rec, offsets, + field_no, &len); dfield_set_data(dfield, field, len); } } diff --git a/innobase/include/row0upd.h b/innobase/include/row0upd.h index 28210364833..673e0511153 100644 --- a/innobase/include/row0upd.h +++ b/innobase/include/row0upd.h @@ -80,6 +80,7 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* @@ -124,8 +125,8 @@ row_upd_changes_field_size_or_external( /* out: TRUE if the update changes the size of some field in index or the field is external in rec or update */ - rec_t* rec, /* in: record in index */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update);/* in: update vector */ /*************************************************************** Replaces the new column values stored in the update vector to the record @@ -135,8 +136,9 @@ a clustered index */ void row_upd_rec_in_place( /*=================*/ - rec_t* rec, /* in/out: record where replaced */ - upd_t* update);/* in: update vector */ + rec_t* rec, /* in/out: record where replaced */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update);/* in: update vector */ /******************************************************************* Builds an update vector from those fields which in a secondary index entry differ from a record that has the equal ordering fields. NOTE: we compare @@ -274,10 +276,11 @@ recovery. */ void row_upd_rec_sys_fields_in_recovery( /*===============================*/ - rec_t* rec, /* in: record */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr);/* in: roll ptr of the undo log record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr);/* in: roll ptr of the undo log record */ /************************************************************************* Parses the log data written by row_upd_index_write_log. */ diff --git a/innobase/include/row0upd.ic b/innobase/include/row0upd.ic index a124228a0de..e2d81a39cfa 100644 --- a/innobase/include/row0upd.ic +++ b/innobase/include/row0upd.ic @@ -106,15 +106,17 @@ row_upd_rec_sys_fields( /*===================*/ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ trx_t* trx, /* in: transaction */ dulint roll_ptr)/* in: roll ptr of the undo log record */ { ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); #ifdef UNIV_SYNC_DEBUG ut_ad(!buf_block_align(rec)->is_hashed || rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ - row_set_rec_trx_id(rec, index, trx->id); - row_set_rec_roll_ptr(rec, index, roll_ptr); + row_set_rec_trx_id(rec, index, offsets, trx->id); + row_set_rec_roll_ptr(rec, index, offsets, roll_ptr); } diff --git a/innobase/include/row0vers.h b/innobase/include/row0vers.h index 30cf82144e9..0dd40fda65f 100644 --- a/innobase/include/row0vers.h +++ b/innobase/include/row0vers.h @@ -30,7 +30,8 @@ row_vers_impl_x_locked_off_kernel( transaction; NOTE that the kernel mutex is temporarily released! */ rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index); /* in: the secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets);/* in: rec_get_offsets(rec, index) */ /********************************************************************* Finds out if we must preserve a delete marked earlier version of a clustered index record, because it is >= the purge view. */ diff --git a/innobase/include/row0vers.ic b/innobase/include/row0vers.ic index 5ece47c35d1..ab1e264635b 100644 --- a/innobase/include/row0vers.ic +++ b/innobase/include/row0vers.ic @@ -11,73 +11,3 @@ Created 2/6/1997 Heikki Tuuri #include "read0read.h" #include "page0page.h" #include "log0recv.h" - -/************************************************************************* -Fetches the trx id of a clustered index record or version. */ -UNIV_INLINE -dulint -row_vers_get_trx_id( -/*================*/ - /* out: trx id or ut_dulint_zero if the - clustered index record not found */ - rec_t* rec, /* in: clustered index record, or an old - version of it */ - dict_table_t* table) /* in: table */ -{ - return(row_get_rec_trx_id(rec, dict_table_get_first_index(table))); -} - -/************************************************************************* -Checks if a consistent read can be performed immediately on the index -record, or if an older version is needed. */ -UNIV_INLINE -ibool -row_vers_clust_rec_sees_older( -/*==========================*/ - /* out: FALSE if can read immediately */ - rec_t* rec, /* in: record which should be read or passed - over by a read cursor */ - dict_index_t* index, /* in: clustered index */ - read_view_t* view) /* in: read view */ -{ - ut_ad(index->type & DICT_CLUSTERED); - - if (read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))) { - - return(FALSE); - } - - return(TRUE); -} - -/************************************************************************* -Checks if a secondary index record can be read immediately by a consistent -read, or if an older version may be needed. To be sure, we will have to -look in the clustered index. */ -UNIV_INLINE -ibool -row_vers_sec_rec_may_see_older( -/*===========================*/ - /* out: FALSE if can be read immediately */ - rec_t* rec, /* in: record which should be read or passed */ - dict_index_t* index __attribute__((unused)),/* in: secondary index */ - read_view_t* view) /* in: read view */ -{ - page_t* page; - - ut_ad(!(index->type & DICT_CLUSTERED)); - - page = buf_frame_align(rec); - - if ((ut_dulint_cmp(page_get_max_trx_id(page), view->up_limit_id) >= 0) - || recv_recovery_is_on()) { - - /* It may be that the record was inserted or modified by a - transaction the view should not see: we have to look in the - clustered index */ - - return(TRUE); - } - - return(FALSE); -} diff --git a/innobase/include/srv0srv.h b/innobase/include/srv0srv.h index b9963d93265..d4cc7d8222f 100644 --- a/innobase/include/srv0srv.h +++ b/innobase/include/srv0srv.h @@ -538,6 +538,10 @@ struct srv_sys_struct{ srv_table_t* threads; /* server thread table */ UT_LIST_BASE_NODE_T(que_thr_t) tasks; /* task queue */ + dict_index_t* dummy_ind1; /* dummy index for old-style + supremum and infimum records */ + dict_index_t* dummy_ind2; /* dummy index for new-style + supremum and infimum records */ }; extern ulint srv_n_threads_active[]; diff --git a/innobase/include/trx0rec.h b/innobase/include/trx0rec.h index 9d7f41cd94e..4387ce1a61e 100644 --- a/innobase/include/trx0rec.h +++ b/innobase/include/trx0rec.h @@ -246,6 +246,7 @@ trx_undo_prev_version_build( index_rec page and purge_view */ rec_t* rec, /* in: version of a clustered index record */ dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ mem_heap_t* heap, /* in: memory heap from which the memory needed is allocated */ rec_t** old_vers);/* out, own: previous version, or NULL if diff --git a/innobase/include/ut0byte.h b/innobase/include/ut0byte.h index a62c2e2e318..22d488abeaf 100644 --- a/innobase/include/ut0byte.h +++ b/innobase/include/ut0byte.h @@ -208,7 +208,20 @@ ut_align_down( /*==========*/ /* out: aligned pointer */ void* ptr, /* in: pointer */ - ulint align_no); /* in: align by this number */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*==========*/ + /* out: distance from aligned + pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ + __attribute__((const)); /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/include/ut0byte.ic b/innobase/include/ut0byte.ic index 5a70dcf12a8..e141de3aa3f 100644 --- a/innobase/include/ut0byte.ic +++ b/innobase/include/ut0byte.ic @@ -335,6 +335,27 @@ ut_align_down( return((void*)((((ulint)ptr)) & ~(align_no - 1))); } +/************************************************************* +The following function computes the offset of a pointer from the nearest +aligned address. */ +UNIV_INLINE +ulint +ut_align_offset( +/*============*/ + /* out: distance from + aligned pointer */ + const void* ptr, /* in: pointer */ + ulint align_no) /* in: align by this number */ +{ + ut_ad(align_no > 0); + ut_ad(((align_no - 1) & align_no) == 0); + ut_ad(ptr); + + ut_ad(sizeof(void*) == sizeof(ulint)); + + return(((ulint)ptr) & (align_no - 1)); +} + /********************************************************************* Gets the nth bit of a ulint. */ UNIV_INLINE diff --git a/innobase/lock/lock0lock.c b/innobase/lock/lock0lock.c index 32a6bcadecc..d2d16a1ae4e 100644 --- a/innobase/lock/lock0lock.c +++ b/innobase/lock/lock0lock.c @@ -425,11 +425,14 @@ lock_check_trx_id_sanity( dulint trx_id, /* in: trx id */ rec_t* rec, /* in: user record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ ibool has_kernel_mutex)/* in: TRUE if the caller owns the kernel mutex */ { ibool is_ok = TRUE; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (!has_kernel_mutex) { mutex_enter(&kernel_mutex); } @@ -442,7 +445,7 @@ lock_check_trx_id_sanity( fputs(" InnoDB: Error: transaction id associated" " with record\n", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); fputs("InnoDB: in ", stderr); dict_index_name_print(stderr, NULL, index); fprintf(stderr, "\n" @@ -474,18 +477,20 @@ lock_clust_rec_cons_read_sees( rec_t* rec, /* in: user record which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ read_view_t* view) /* in: consistent read view */ { dulint trx_id; ut_ad(index->type & DICT_CLUSTERED); ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); /* NOTE that we call this function while holding the search system latch. To obey the latching order we must NOT reserve the kernel mutex here! */ - trx_id = row_get_rec_trx_id(rec, index); + trx_id = row_get_rec_trx_id(rec, index, offsets); if (read_view_sees_trx_id(view, trx_id)) { @@ -1256,6 +1261,7 @@ lock_rec_get_next( /*==============*/ /* out: next lock, NULL if none exists */ rec_t* rec, /* in: record on a page */ + ibool comp, /* in: TRUE=compact page format */ lock_t* lock) /* in: lock */ { #ifdef UNIV_SYNC_DEBUG @@ -1271,7 +1277,7 @@ lock_rec_get_next( return(NULL); } - if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec, comp))) { return(lock); } @@ -1288,15 +1294,17 @@ lock_rec_get_first( rec_t* rec) /* in: record on a page */ { lock_t* lock; + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first_on_page(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock) { - if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec))) { + if (lock_rec_get_nth_bit(lock, rec_get_heap_no(rec, comp))) { break; } @@ -1463,6 +1471,7 @@ lock_rec_has_expl( for a supremum record we regard this always a gap type request */ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ trx_t* trx) /* in: transaction */ { lock_t* lock; @@ -1492,7 +1501,7 @@ lock_rec_has_expl( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1511,6 +1520,7 @@ lock_rec_other_has_expl_req( ulint wait, /* in: LOCK_WAIT if also waiting locks are taken into account, or 0 if not */ rec_t* rec, /* in: record to look at */ + ibool comp, /* in: TRUE=compact record format */ trx_t* trx) /* in: transaction, or NULL if requests by all transactions are taken into account */ { @@ -1535,7 +1545,7 @@ lock_rec_other_has_expl_req( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1556,12 +1566,13 @@ lock_rec_other_has_conflicting( trx_t* trx) /* in: our transaction */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock) { if (lock_rec_has_to_wait(trx, mode, lock, @@ -1570,7 +1581,7 @@ lock_rec_other_has_conflicting( return(lock); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } return(NULL); @@ -1596,8 +1607,7 @@ lock_rec_find_similar_on_page( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(rec); - + heap_no = rec_get_heap_no(rec, page_is_comp(buf_frame_align(rec))); lock = lock_rec_get_first_on_page(rec); while (lock != NULL) { @@ -1624,7 +1634,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* out: transaction which has the x-lock, or NULL */ rec_t* rec, /* in: user record */ - dict_index_t* index) /* in: secondary index */ + dict_index_t* index, /* in: secondary index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { page_t* page; @@ -1633,6 +1644,7 @@ lock_sec_rec_some_has_impl_off_kernel( #endif /* UNIV_SYNC_DEBUG */ ut_ad(!(index->type & DICT_CLUSTERED)); ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); page = buf_frame_align(rec); @@ -1652,8 +1664,8 @@ lock_sec_rec_some_has_impl_off_kernel( /* Ok, in this case it is possible that some transaction has an implicit x-lock. We have to look in the clustered index. */ - if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), rec, index, - TRUE)) { + if (!lock_check_trx_id_sanity(page_get_max_trx_id(page), + rec, index, offsets, TRUE)) { buf_page_print(page); /* The page is corrupt: try to avoid a crash by returning @@ -1661,7 +1673,7 @@ lock_sec_rec_some_has_impl_off_kernel( return(NULL); } - return(row_vers_impl_x_locked_off_kernel(rec, index)); + return(row_vers_impl_x_locked_off_kernel(rec, index, offsets)); } /*============== RECORD LOCK CREATION AND QUEUE MANAGEMENT =============*/ @@ -1695,7 +1707,7 @@ lock_rec_create( page = buf_frame_align(rec); space = buf_frame_get_space_id(page); page_no = buf_frame_get_page_no(page); - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(page)); /* If rec is the supremum record, then we reset the gap and LOCK_REC_NOT_GAP bits, as all locks on the supremum are @@ -1708,8 +1720,7 @@ lock_rec_create( } /* Make lock bitmap bigger by a safety margin */ - n_bits = page_header_get_field(page, PAGE_N_HEAP) - + LOCK_PAGE_BITMAP_MARGIN; + n_bits = page_dir_get_n_heap(page) + LOCK_PAGE_BITMAP_MARGIN; n_bytes = 1 + n_bits / 8; lock = mem_heap_alloc(trx->lock_heap, sizeof(lock_t) + n_bytes); @@ -1814,7 +1825,8 @@ lock_rec_enqueue_waiting( if (lock_deadlock_occurs(lock, trx)) { lock_reset_lock_and_trx_wait(lock); - lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec)); + lock_rec_reset_nth_bit(lock, rec_get_heap_no(rec, + page_is_comp(buf_frame_align(rec)))); return(DB_DEADLOCK); } @@ -1864,7 +1876,7 @@ lock_rec_add_to_queue( lock_t* lock; lock_t* similar_lock = NULL; ulint heap_no; - page_t* page; + page_t* page = buf_frame_align(rec); ibool somebody_waits = FALSE; #ifdef UNIV_SYNC_DEBUG @@ -1872,15 +1884,15 @@ lock_rec_add_to_queue( #endif /* UNIV_SYNC_DEBUG */ ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) || ((type_mode & LOCK_MODE_MASK) != LOCK_S) - || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, rec, trx)); + || !lock_rec_other_has_expl_req(LOCK_X, 0, LOCK_WAIT, + rec, page_is_comp(page), trx)); ut_ad((type_mode & (LOCK_WAIT | LOCK_GAP)) || ((type_mode & LOCK_MODE_MASK) != LOCK_X) - || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, rec, trx)); + || !lock_rec_other_has_expl_req(LOCK_S, 0, LOCK_WAIT, + rec, page_is_comp(page), trx)); type_mode = type_mode | LOCK_REC; - page = buf_frame_align(rec); - /* If rec is the supremum record, then we can reset the gap bit, as all locks on the supremum are automatically of the gap type, and we try to avoid unnecessary memory consumption of a new record lock @@ -1897,7 +1909,7 @@ lock_rec_add_to_queue( /* Look for a waiting lock request on the same record or on a gap */ - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(page)); lock = lock_rec_get_first_on_page(rec); while (lock != NULL) { @@ -1972,7 +1984,7 @@ lock_rec_lock_fast( || mode - (LOCK_MODE_MASK & mode) == 0 || mode - (LOCK_MODE_MASK & mode) == LOCK_REC_NOT_GAP); - heap_no = rec_get_heap_no(rec); + heap_no = rec_get_heap_no(rec, page_is_comp(buf_frame_align(rec))); lock = lock_rec_get_first_on_page(rec); @@ -2053,7 +2065,8 @@ lock_rec_lock_slow( trx = thr_get_trx(thr); - if (lock_rec_has_expl(mode, rec, trx)) { + if (lock_rec_has_expl(mode, rec, + page_is_comp(buf_frame_align(rec)), trx)) { /* The trx already has a strong enough lock on rec: do nothing */ @@ -2369,12 +2382,14 @@ lock_rec_reset_and_release_wait( { lock_t* lock; ulint heap_no; - + ibool comp; + #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(rec); + comp = page_is_comp(buf_frame_align(rec)); + heap_no = rec_get_heap_no(rec, comp); lock = lock_rec_get_first(rec); @@ -2385,7 +2400,7 @@ lock_rec_reset_and_release_wait( lock_rec_reset_nth_bit(lock, heap_no); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2403,12 +2418,13 @@ lock_rec_inherit_to_gap( the locks on this record */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock != NULL) { if (!lock_rec_get_insert_intention(lock)) { @@ -2418,7 +2434,7 @@ lock_rec_inherit_to_gap( heir, lock->index, lock->trx); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2435,12 +2451,13 @@ lock_rec_inherit_to_gap_if_gap_lock( the locks on this record */ { lock_t* lock; - + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ lock = lock_rec_get_first(rec); + comp = page_is_comp(buf_frame_align(rec)); while (lock != NULL) { if (!lock_rec_get_insert_intention(lock) @@ -2452,7 +2469,7 @@ lock_rec_inherit_to_gap_if_gap_lock( heir, lock->index, lock->trx); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } } @@ -2465,7 +2482,8 @@ lock_rec_move( /*==========*/ rec_t* receiver, /* in: record which gets locks; this record must have no lock requests on it! */ - rec_t* donator) /* in: record which gives locks */ + rec_t* donator, /* in: record which gives locks */ + ibool comp) /* in: TRUE=compact page format */ { lock_t* lock; ulint heap_no; @@ -2475,7 +2493,7 @@ lock_rec_move( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ - heap_no = rec_get_heap_no(donator); + heap_no = rec_get_heap_no(donator, comp); lock = lock_rec_get_first(donator); @@ -2495,7 +2513,7 @@ lock_rec_move( lock_rec_add_to_queue(type_mode, receiver, lock->index, lock->trx); - lock = lock_rec_get_next(donator, lock); + lock = lock_rec_get_next(donator, comp, lock); } ut_ad(lock_rec_get_first(donator) == NULL); @@ -2521,6 +2539,7 @@ lock_move_reorganize_page( UT_LIST_BASE_NODE_T(lock_t) old_locks; mem_heap_t* heap = NULL; rec_t* sup; + ibool comp; lock_mutex_enter_kernel(); @@ -2561,6 +2580,9 @@ lock_move_reorganize_page( lock = UT_LIST_GET_FIRST(old_locks); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(old_page)); + while (lock) { /* NOTE: we copy also the locks set on the infimum and supremum of the page; the infimum may carry locks if an @@ -2572,12 +2594,12 @@ lock_move_reorganize_page( /* Set locks according to old locks */ for (;;) { - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2)); + old_heap_no = rec_get_heap_no(page_cur_get_rec(&cur2), + comp); if (lock_rec_get_nth_bit(lock, old_heap_no)) { @@ -2636,6 +2658,7 @@ lock_move_rec_list_end( ulint heap_no; rec_t* sup; ulint type_mode; + ibool comp; lock_mutex_enter_kernel(); @@ -2649,6 +2672,8 @@ lock_move_rec_list_end( lock = lock_rec_get_first_on_page(page); + comp = page_is_comp(page); + while (lock != NULL) { page_cur_position(rec, &cur1); @@ -2664,13 +2689,12 @@ lock_move_rec_list_end( reset the lock bits on the old */ while (page_cur_get_rec(&cur1) != sup) { - - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), + comp); if (lock_rec_get_nth_bit(lock, heap_no)) { type_mode = lock->type_mode; @@ -2720,12 +2744,15 @@ lock_move_rec_list_start( page_cur_t cur2; ulint heap_no; ulint type_mode; + ibool comp; ut_a(new_page); lock_mutex_enter_kernel(); lock = lock_rec_get_first_on_page(page); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(new_page)); while (lock != NULL) { @@ -2739,13 +2766,12 @@ lock_move_rec_list_start( reset the lock bits on the old */ while (page_cur_get_rec(&cur1) != rec) { - - ut_ad(0 == ut_memcmp(page_cur_get_rec(&cur1), + ut_ad(comp || 0 == ut_memcmp(page_cur_get_rec(&cur1), page_cur_get_rec(&cur2), - rec_get_data_size( + rec_get_data_size_old( page_cur_get_rec(&cur2)))); - - heap_no = rec_get_heap_no(page_cur_get_rec(&cur1)); + heap_no = rec_get_heap_no(page_cur_get_rec(&cur1), + comp); if (lock_rec_get_nth_bit(lock, heap_no)) { type_mode = lock->type_mode; @@ -2785,13 +2811,16 @@ lock_update_split_right( page_t* right_page, /* in: right page */ page_t* left_page) /* in: left page */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(left_page); + ut_ad(comp == page_is_comp(right_page)); + /* Move the locks on the supremum of the left page to the supremum of the right page */ lock_rec_move(page_get_supremum_rec(right_page), - page_get_supremum_rec(left_page)); + page_get_supremum_rec(left_page), comp); /* Inherit the locks to the supremum of left page from the successor of the infimum on right page */ @@ -2845,13 +2874,16 @@ lock_update_root_raise( page_t* new_page, /* in: index page to which copied */ page_t* root) /* in: root page */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(root); + ut_ad(comp == page_is_comp(new_page)); + /* Move the locks on the supremum of the root to the supremum of new_page */ lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(root)); + page_get_supremum_rec(root), comp); lock_mutex_exit_kernel(); } @@ -2865,13 +2897,16 @@ lock_update_copy_and_discard( page_t* new_page, /* in: index page to which copied */ page_t* page) /* in: index page; NOT the root! */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(new_page)); + /* Move the locks on the supremum of the old page to the supremum of new_page */ lock_rec_move(page_get_supremum_rec(new_page), - page_get_supremum_rec(page)); + page_get_supremum_rec(page), comp); lock_rec_free_all_from_discard_page(page); lock_mutex_exit_kernel(); @@ -2909,8 +2944,11 @@ lock_update_merge_left( page_t* right_page) /* in: merged index page which will be discarded */ { + ibool comp; lock_mutex_enter_kernel(); - + comp = page_is_comp(left_page); + ut_ad(comp == page_is_comp(right_page)); + if (page_rec_get_next(orig_pred) != page_get_supremum_rec(left_page)) { /* Inherit the locks on the supremum of the left page to the @@ -2930,7 +2968,7 @@ lock_update_merge_left( of the left page */ lock_rec_move(page_get_supremum_rec(left_page), - page_get_supremum_rec(right_page)); + page_get_supremum_rec(right_page), comp); lock_rec_free_all_from_discard_page(right_page); @@ -3057,12 +3095,14 @@ lock_rec_store_on_page_infimum( bits are reset on the record */ { page_t* page; + ibool comp; page = buf_frame_align(rec); + comp = page_is_comp(page); lock_mutex_enter_kernel(); - lock_rec_move(page_get_infimum_rec(page), rec); + lock_rec_move(page_get_infimum_rec(page), rec, comp); lock_mutex_exit_kernel(); } @@ -3079,9 +3119,12 @@ lock_rec_restore_from_page_infimum( whose infimum stored the lock state; lock bits are reset on the infimum */ { + ibool comp; lock_mutex_enter_kernel(); - - lock_rec_move(rec, page_get_infimum_rec(page)); + comp = page_is_comp(page); + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + + lock_rec_move(rec, page_get_infimum_rec(page), comp); lock_mutex_exit_kernel(); } @@ -4025,11 +4068,15 @@ lock_rec_print( FILE* file, /* in: file where to print */ lock_t* lock) /* in: record type lock */ { - page_t* page; - ulint space; - ulint page_no; - ulint i; - mtr_t mtr; + page_t* page; + ulint space; + ulint page_no; + ulint i; + mtr_t mtr; + mem_heap_t* heap; + ulint* offsets = NULL; + + heap = mem_heap_create(100); #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); @@ -4108,8 +4155,11 @@ lock_rec_print( fprintf(file, "Record lock, heap no %lu ", (ulong) i); if (page) { - rec_print(file, - page_find_rec_with_heap_no(page, i)); + rec_t* rec + = page_find_rec_with_heap_no(page, i); + offsets = rec_reget_offsets(rec, lock->index, + offsets, ULINT_UNDEFINED, heap); + rec_print(file, rec, offsets); } putc('\n', file); @@ -4117,6 +4167,7 @@ lock_rec_print( } mtr_commit(&mtr); + mem_heap_free(heap); } /************************************************************************* @@ -4394,12 +4445,16 @@ lock_rec_queue_validate( /*====================*/ /* out: TRUE if ok */ rec_t* rec, /* in: record to look at */ - dict_index_t* index) /* in: index, or NULL if not known */ + dict_index_t* index, /* in: index, or NULL if not known */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { trx_t* impl_trx; lock_t* lock; - + ibool comp; + ut_a(rec); + ut_ad(rec_offs_validate(rec, index, offsets)); + comp = page_is_comp(buf_frame_align(rec)); lock_mutex_enter_kernel(); @@ -4422,7 +4477,7 @@ lock_rec_queue_validate( ut_a(lock->index == index); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } lock_mutex_exit_kernel(); @@ -4432,13 +4487,13 @@ lock_rec_queue_validate( if (index && (index->type & DICT_CLUSTERED)) { - impl_trx = lock_clust_rec_some_has_impl(rec, index); + impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, - LOCK_WAIT, rec, impl_trx)) { + LOCK_WAIT, rec, comp, impl_trx)) { ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)); + comp, impl_trx)); } } @@ -4448,13 +4503,14 @@ lock_rec_queue_validate( next function call: we have to release lock table mutex to obey the latching order */ - impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + impl_trx = lock_sec_rec_some_has_impl_off_kernel( + rec, index, offsets); if (impl_trx && lock_rec_other_has_expl_req(LOCK_S, 0, - LOCK_WAIT, rec, impl_trx)) { + LOCK_WAIT, rec, comp, impl_trx)) { - ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)); + ut_a(lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, + rec, comp, impl_trx)); } } @@ -4473,10 +4529,10 @@ lock_rec_queue_validate( if (lock_get_mode(lock) == LOCK_S) { ut_a(!lock_rec_other_has_expl_req(LOCK_X, - 0, 0, rec, lock->trx)); + 0, 0, rec, comp, lock->trx)); } else { ut_a(!lock_rec_other_has_expl_req(LOCK_S, - 0, 0, rec, lock->trx)); + 0, 0, rec, comp, lock->trx)); } } else if (lock_get_wait(lock) && !lock_rec_get_gap(lock)) { @@ -4484,7 +4540,7 @@ lock_rec_queue_validate( ut_a(lock_rec_has_to_wait_in_queue(lock)); } - lock = lock_rec_get_next(rec, lock); + lock = lock_rec_get_next(rec, comp, lock); } lock_mutex_exit_kernel(); @@ -4510,6 +4566,8 @@ lock_rec_validate_page( ulint nth_bit = 0; ulint i; mtr_t mtr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(!mutex_own(&kernel_mutex)); @@ -4549,13 +4607,15 @@ loop: index = lock->index; rec = page_find_rec_with_heap_no(page, i); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); fprintf(stderr, "Validating %lu %lu\n", (ulong) space, (ulong) page_no); lock_mutex_exit_kernel(); - lock_rec_queue_validate(rec, index); + lock_rec_queue_validate(rec, index, offsets); lock_mutex_enter_kernel(); @@ -4575,6 +4635,7 @@ function_exit: mtr_commit(&mtr); + mem_heap_free(heap); return(TRUE); } @@ -4747,8 +4808,16 @@ lock_rec_insert_check_and_lock( page_update_max_trx_id(buf_frame_align(rec), thr_get_trx(thr)->id); } - - ut_ad(lock_rec_queue_validate(next_rec, index)); + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + const ulint* offsets = rec_get_offsets(next_rec, index, + ULINT_UNDEFINED, heap); + ut_ad(lock_rec_queue_validate(next_rec, index, offsets)); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ return(err); } @@ -4762,7 +4831,8 @@ void lock_rec_convert_impl_to_expl( /*==========================*/ rec_t* rec, /* in: user record on page */ - dict_index_t* index) /* in: index of record */ + dict_index_t* index, /* in: index of record */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { trx_t* impl_trx; @@ -4770,11 +4840,14 @@ lock_rec_convert_impl_to_expl( ut_ad(mutex_own(&kernel_mutex)); #endif /* UNIV_SYNC_DEBUG */ ut_ad(page_rec_is_user_rec(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(page_is_comp(buf_frame_align(rec)) == index->table->comp); if (index->type & DICT_CLUSTERED) { - impl_trx = lock_clust_rec_some_has_impl(rec, index); + impl_trx = lock_clust_rec_some_has_impl(rec, index, offsets); } else { - impl_trx = lock_sec_rec_some_has_impl_off_kernel(rec, index); + impl_trx = lock_sec_rec_some_has_impl_off_kernel( + rec, index, offsets); } if (impl_trx) { @@ -4782,7 +4855,7 @@ lock_rec_convert_impl_to_expl( record, set one for it */ if (!lock_rec_has_expl(LOCK_X | LOCK_REC_NOT_GAP, rec, - impl_trx)) { + index->table->comp, impl_trx)) { lock_rec_add_to_queue(LOCK_REC | LOCK_X | LOCK_REC_NOT_GAP, rec, index, @@ -4808,17 +4881,19 @@ lock_clust_rec_modify_check_and_lock( does nothing */ rec_t* rec, /* in: record which should be modified */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; - + + ut_ad(rec_offs_validate(rec, index, offsets)); + ut_ad(index->type & DICT_CLUSTERED); + if (flags & BTR_NO_LOCKING_FLAG) { return(DB_SUCCESS); } - ut_ad(index->type & DICT_CLUSTERED); - lock_mutex_enter_kernel(); ut_ad(lock_table_has(thr_get_trx(thr), index->table, LOCK_IX)); @@ -4826,13 +4901,13 @@ lock_clust_rec_modify_check_and_lock( /* If a transaction has no explicit x-lock set on the record, set one for it */ - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); return(err); } @@ -4876,8 +4951,16 @@ lock_sec_rec_modify_check_and_lock( err = lock_rec_lock(TRUE, LOCK_X | LOCK_REC_NOT_GAP, rec, index, thr); lock_mutex_exit_kernel(); - - ut_ad(lock_rec_queue_validate(rec, index)); + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + const ulint* offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, heap); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ if (err == DB_SUCCESS) { /* Update the page max trx id field */ @@ -4904,6 +4987,7 @@ lock_sec_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: secondary index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -4915,6 +4999,7 @@ lock_sec_rec_read_check_and_lock( ut_ad(!(index->type & DICT_CLUSTERED)); ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); + ut_ad(rec_offs_validate(rec, index, offsets)); if (flags & BTR_NO_LOCKING_FLAG) { @@ -4937,14 +5022,14 @@ lock_sec_rec_read_check_and_lock( || recv_recovery_is_on()) && !page_rec_is_supremum(rec)) { - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); } err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); + ut_ad(lock_rec_queue_validate(rec, index, offsets)); return(err); } @@ -4968,6 +5053,7 @@ lock_clust_rec_read_check_and_lock( which should be read or passed over by a read cursor */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: mode of the lock which the read cursor should set on records: LOCK_S or LOCK_X; the latter is possible in SELECT FOR UPDATE */ @@ -4981,6 +5067,9 @@ lock_clust_rec_read_check_and_lock( ut_ad(page_rec_is_user_rec(rec) || page_rec_is_supremum(rec)); ut_ad(gap_mode == LOCK_ORDINARY || gap_mode == LOCK_GAP || gap_mode == LOCK_REC_NOT_GAP); + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); + if (flags & BTR_NO_LOCKING_FLAG) { return(DB_SUCCESS); @@ -4995,14 +5084,14 @@ lock_clust_rec_read_check_and_lock( if (!page_rec_is_supremum(rec)) { - lock_rec_convert_impl_to_expl(rec, index); + lock_rec_convert_impl_to_expl(rec, index, offsets); } err = lock_rec_lock(FALSE, mode | gap_mode, rec, index, thr); lock_mutex_exit_kernel(); - ut_ad(lock_rec_queue_validate(rec, index)); - + ut_ad(lock_rec_queue_validate(rec, index, offsets)); + return(err); } diff --git a/innobase/log/log0recv.c b/innobase/log/log0recv.c index 28b9dec8aa0..f42f0eb8c72 100644 --- a/innobase/log/log0recv.c +++ b/innobase/log/log0recv.c @@ -756,81 +756,124 @@ recv_parse_or_apply_log_rec_body( mtr_t* mtr) /* in: mtr or NULL; should be non-NULL if and only if page is non-NULL */ { - byte* new_ptr; - - if (type <= MLOG_8BYTES) { - new_ptr = mlog_parse_nbytes(type, ptr, end_ptr, page); - - } else if (type == MLOG_REC_INSERT) { - new_ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_REC_CLUST_DELETE_MARK) { - new_ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, end_ptr, - page); - } else if (type == MLOG_REC_SEC_DELETE_MARK) { - new_ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, - page); - } else if (type == MLOG_REC_UPDATE_IN_PLACE) { - new_ptr = btr_cur_parse_update_in_place(ptr, end_ptr, page); - - } else if ((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)) { - new_ptr = page_parse_delete_rec_list(type, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_LIST_END_COPY_CREATED) { - new_ptr = page_parse_copy_rec_list_to_created_page(ptr, - end_ptr, page, mtr); - } else if (type == MLOG_PAGE_REORGANIZE) { - new_ptr = btr_parse_page_reorganize(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_PAGE_CREATE) { - new_ptr = page_parse_create(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_UNDO_INSERT) { - new_ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); - - } else if (type == MLOG_UNDO_ERASE_END) { - new_ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, - mtr); - } else if (type == MLOG_UNDO_INIT) { - new_ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_UNDO_HDR_DISCARD) { - new_ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, - mtr); - } else if ((type == MLOG_UNDO_HDR_CREATE) - || (type == MLOG_UNDO_HDR_REUSE)) { - new_ptr = trx_undo_parse_page_header(type, ptr, end_ptr, page, - mtr); - } else if (type == MLOG_REC_MIN_MARK) { - new_ptr = btr_parse_set_min_rec_mark(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_REC_DELETE) { - new_ptr = page_cur_parse_delete_rec(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_IBUF_BITMAP_INIT) { - new_ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr); - - } else if (type == MLOG_INIT_FILE_PAGE) { - new_ptr = fsp_parse_init_file_page(ptr, end_ptr, page); - - } else if (type == MLOG_WRITE_STRING) { - new_ptr = mlog_parse_string(ptr, end_ptr, page); - - } else if (type == MLOG_FILE_CREATE - || type == MLOG_FILE_RENAME - || type == MLOG_FILE_DELETE) { - new_ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, + dict_index_t* index = NULL; + + switch (type) { + case MLOG_1BYTE: case MLOG_2BYTES: case MLOG_4BYTES: case MLOG_8BYTES: + ptr = mlog_parse_nbytes(type, ptr, end_ptr, page); + break; + case MLOG_REC_INSERT: case MLOG_COMP_REC_INSERT: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_INSERT, &index))) { + ptr = page_cur_parse_insert_rec(FALSE, ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_REC_CLUST_DELETE_MARK: case MLOG_COMP_REC_CLUST_DELETE_MARK: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_CLUST_DELETE_MARK, &index))) { + ptr = btr_cur_parse_del_mark_set_clust_rec(ptr, + end_ptr, index, page); + } + break; + case MLOG_REC_SEC_DELETE_MARK: case MLOG_COMP_REC_SEC_DELETE_MARK: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_SEC_DELETE_MARK, &index))) { + ptr = btr_cur_parse_del_mark_set_sec_rec(ptr, end_ptr, + index, page); + } + break; + case MLOG_REC_UPDATE_IN_PLACE: case MLOG_COMP_REC_UPDATE_IN_PLACE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_UPDATE_IN_PLACE, &index))) { + ptr = btr_cur_parse_update_in_place(ptr, end_ptr, + page, index); + } + break; + case MLOG_LIST_END_DELETE: case MLOG_COMP_LIST_END_DELETE: + case MLOG_LIST_START_DELETE: case MLOG_COMP_LIST_START_DELETE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE, &index))) { + ptr = page_parse_delete_rec_list(type, ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_LIST_END_COPY_CREATED: case MLOG_COMP_LIST_END_COPY_CREATED: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_LIST_END_COPY_CREATED, &index))) { + ptr = page_parse_copy_rec_list_to_created_page(ptr, + end_ptr, index, page, mtr); + } + break; + case MLOG_PAGE_REORGANIZE: case MLOG_COMP_PAGE_REORGANIZE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_PAGE_REORGANIZE, &index))) { + ptr = btr_parse_page_reorganize(ptr, end_ptr, index, + page, mtr); + } + break; + case MLOG_PAGE_CREATE: case MLOG_COMP_PAGE_CREATE: + ptr = page_parse_create(ptr, end_ptr, + type == MLOG_COMP_PAGE_CREATE, page, mtr); + break; + case MLOG_UNDO_INSERT: + ptr = trx_undo_parse_add_undo_rec(ptr, end_ptr, page); + break; + case MLOG_UNDO_ERASE_END: + ptr = trx_undo_parse_erase_page_end(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_INIT: + ptr = trx_undo_parse_page_init(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_HDR_DISCARD: + ptr = trx_undo_parse_discard_latest(ptr, end_ptr, page, mtr); + break; + case MLOG_UNDO_HDR_CREATE: + case MLOG_UNDO_HDR_REUSE: + ptr = trx_undo_parse_page_header(type, ptr, end_ptr, + page, mtr); + break; + case MLOG_REC_MIN_MARK: case MLOG_COMP_REC_MIN_MARK: + ptr = btr_parse_set_min_rec_mark(ptr, end_ptr, + type == MLOG_COMP_REC_MIN_MARK, page, mtr); + break; + case MLOG_REC_DELETE: case MLOG_COMP_REC_DELETE: + if (NULL != (ptr = mlog_parse_index(ptr, end_ptr, + type == MLOG_COMP_REC_DELETE, &index))) { + ptr = page_cur_parse_delete_rec(ptr, end_ptr, + index, page, mtr); + } + break; + case MLOG_IBUF_BITMAP_INIT: + ptr = ibuf_parse_bitmap_init(ptr, end_ptr, page, mtr); + break; + case MLOG_INIT_FILE_PAGE: + ptr = fsp_parse_init_file_page(ptr, end_ptr, page); + break; + case MLOG_WRITE_STRING: + ptr = mlog_parse_string(ptr, end_ptr, page); + break; + case MLOG_FILE_CREATE: + case MLOG_FILE_RENAME: + case MLOG_FILE_DELETE: + ptr = fil_op_log_parse_or_replay(ptr, end_ptr, type, FALSE, ULINT_UNDEFINED); - } else { - new_ptr = NULL; - + break; + default: + ptr = NULL; recv_sys->found_corrupt_log = TRUE; } - ut_ad(!page || new_ptr); + ut_ad(!page || ptr); + if (index) { + dict_table_t* table = index->table; + mem_heap_free(index->heap); + mutex_free(&(table->autoinc_mutex)); + mem_heap_free(table->heap); + } - return(new_ptr); + return(ptr); } /************************************************************************* diff --git a/innobase/mtr/mtr0log.c b/innobase/mtr/mtr0log.c index 82baa8905ba..417093134c3 100644 --- a/innobase/mtr/mtr0log.c +++ b/innobase/mtr/mtr0log.c @@ -384,3 +384,160 @@ mlog_parse_string( return(ptr + len); } + +/************************************************************ +Opens a buffer for mlog, writes the initial log record and, +if needed, the field lengths of an index. */ + +byte* +mlog_open_and_write_index( +/*======================*/ + /* out: buffer, NULL if log mode + MTR_LOG_NONE */ + mtr_t* mtr, /* in: mtr */ + byte* rec, /* in: index record or page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: log item type */ + ulint size) /* in: requested buffer size in bytes + (if 0, calls mlog_close() and returns NULL) */ +{ + byte* log_ptr; + const byte* log_start; + const byte* log_end; + + if (!index->table->comp) { + log_start = log_ptr = mlog_open(mtr, 11 + size); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_ptr = mlog_write_initial_log_record_fast(rec, type, + log_ptr, mtr); + log_end = log_ptr + 11 + size; + } else { + ulint i; + ulint n = dict_index_get_n_fields(index); + /* total size needed */ + ulint total = 11 + size + (n + 2) * 2; + ulint alloc = total; + /* allocate at most DYN_ARRAY_DATA_SIZE at a time */ + if (alloc > DYN_ARRAY_DATA_SIZE) { + alloc = DYN_ARRAY_DATA_SIZE; + } + log_start = log_ptr = mlog_open(mtr, alloc); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_end = log_ptr + alloc; + log_ptr = mlog_write_initial_log_record_fast(rec, type, + log_ptr, mtr); + mach_write_to_2(log_ptr, n); + log_ptr += 2; + mach_write_to_2(log_ptr, + dict_index_get_n_unique_in_tree(index)); + log_ptr += 2; + for (i = 0; i < n; i++) { + dict_field_t* field; + dtype_t* type; + ulint len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + len = field->fixed_len; + ut_ad(len < 0x7fff); + if (len == 0 && dtype_get_len(type) > 255) { + /* variable-length field + with maximum length > 255 */ + len = 0x7fff; + } + if (dtype_get_prtype(type) & DATA_NOT_NULL) { + len |= 0x8000; + } + if (log_ptr + 2 > log_end) { + mlog_close(mtr, log_ptr); + ut_a(total > (ulint) (log_ptr - log_start)); + total -= log_ptr - log_start; + alloc = total; + if (alloc > DYN_ARRAY_DATA_SIZE) { + alloc = DYN_ARRAY_DATA_SIZE; + } + log_start = log_ptr = mlog_open(mtr, alloc); + if (!log_ptr) { + return(NULL); /* logging is disabled */ + } + log_end = log_ptr + alloc; + } + mach_write_to_2(log_ptr, len); + log_ptr += 2; + } + } + if (size == 0) { + mlog_close(mtr, log_ptr); + log_ptr = NULL; + } else if (log_ptr + size > log_end) { + mlog_close(mtr, log_ptr); + log_ptr = mlog_open(mtr, size); + } + return(log_ptr); +} + +/************************************************************ +Parses a log record written by mlog_open_and_write_index. */ + +byte* +mlog_parse_index( +/*=============*/ + /* out: parsed record end, + NULL if not a complete record */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + /* out: new value of log_ptr */ + ibool comp, /* in: TRUE=compact record format */ + dict_index_t** index) /* out, own: dummy index */ +{ + ulint i, n, n_uniq; + dict_table_t* table; + dict_index_t* ind; + + if (comp) { + if (end_ptr < ptr + 4) { + return(NULL); + } + n = mach_read_from_2(ptr); + ptr += 2; + n_uniq = mach_read_from_2(ptr); + ut_ad(n_uniq <= n); + if (end_ptr < ptr + (n + 1) * 2) { + return(NULL); + } + } else { + n = n_uniq = 1; + } + table = dict_mem_table_create("LOG_DUMMY", DICT_HDR_SPACE, n, comp); + ind = dict_mem_index_create("LOG_DUMMY", "LOG_DUMMY", + DICT_HDR_SPACE, 0, n); + ind->table = table; + ind->n_uniq = n_uniq; + if (n_uniq != n) { + ind->type = DICT_CLUSTERED; + } + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + ind->cached = TRUE; + if (comp) { + for (i = 0; i < n; i++) { + ulint len = mach_read_from_2(ptr += 2); + /* The high-order bit of len is the NOT NULL flag; + the rest is 0 or 0x7fff for variable-length fields, + and 1..0x7ffe for fixed-length fields. */ + dict_mem_table_add_col(table, "DUMMY", + ((len + 1) & 0x7fff) <= 1 + ? DATA_BINARY + : DATA_FIXBINARY, + len & 0x8000 ? DATA_NOT_NULL : 0, + len & 0x7fff, 0); + dict_index_add_col(ind, + dict_table_get_nth_col(table, i), 0, 0); + } + ptr += 2; + } + *index = ind; + return(ptr); +} diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c index 459ab986610..8def8474d9a 100644 --- a/innobase/page/page0cur.c +++ b/innobase/page/page0cur.c @@ -30,6 +30,7 @@ ibool page_cur_try_search_shortcut( /*=========================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint* iup_matched_fields, /* in/out: already matched fields in upper @@ -55,9 +56,14 @@ page_cur_try_search_shortcut( #ifdef UNIV_SEARCH_DEBUG page_cur_t cursor2; #endif + mem_heap_t* heap; + ulint* offsets; ut_ad(dtuple_check_typed(tuple)); rec = page_header_get_ptr(page, PAGE_LAST_INSERT); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, + dtuple_get_n_fields(tuple), heap); ut_ad(rec); ut_ad(page_rec_is_user_rec(rec)); @@ -69,26 +75,30 @@ page_cur_try_search_shortcut( up_match = low_match; up_bytes = low_bytes; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &low_match, + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match, &low_bytes); if (cmp == -1) { + mem_heap_free(heap); return(FALSE); } next_rec = page_rec_get_next(rec); + offsets = rec_reget_offsets(next_rec, index, offsets, + dtuple_get_n_fields(tuple), heap); - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &up_match, - &up_bytes); + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, + &up_match, &up_bytes); if (cmp != -1) { + mem_heap_free(heap); return(FALSE); } cursor->rec = rec; #ifdef UNIV_SEARCH_DEBUG - page_cur_search_with_match(page, tuple, PAGE_CUR_DBG, + page_cur_search_with_match(page, index, tuple, PAGE_CUR_DBG, iup_matched_fields, iup_matched_bytes, ilow_matched_fields, @@ -117,6 +127,7 @@ page_cur_try_search_shortcut( #ifdef UNIV_SEARCH_PERF_STAT page_cur_short_succ++; #endif + mem_heap_free(heap); return(TRUE); } @@ -130,22 +141,24 @@ static ibool page_cur_rec_field_extends( /*=======================*/ - /* out: TRUE if rec field extends tuple - field */ - dtuple_t* tuple, /* in: data tuple */ - rec_t* rec, /* in: record */ - ulint n) /* in: compare nth field */ + /* out: TRUE if rec field + extends tuple field */ + dtuple_t* tuple, /* in: data tuple */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint n) /* in: compare nth field */ { dtype_t* type; dfield_t* dfield; byte* rec_f; ulint rec_f_len; + ut_ad(rec_offs_validate(rec, NULL, offsets)); dfield = dtuple_get_nth_field(tuple, n); type = dfield_get_type(dfield); - rec_f = rec_get_nth_field(rec, n, &rec_f_len); + rec_f = rec_get_nth_field(rec, offsets, n, &rec_f_len); if (type->mtype == DATA_VARCHAR || type->mtype == DATA_CHAR @@ -176,6 +189,7 @@ void page_cur_search_with_match( /*=======================*/ page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ dtuple_t* tuple, /* in: data tuple */ ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ @@ -212,6 +226,9 @@ page_cur_search_with_match( ulint dbg_matched_fields; ulint dbg_matched_bytes; #endif + mem_heap_t* heap; + ulint* offsets = NULL; + ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes && ilow_matched_fields && ilow_matched_bytes && cursor); ut_ad(dtuple_validate(tuple)); @@ -229,7 +246,7 @@ page_cur_search_with_match( && (page_header_get_ptr(page, PAGE_LAST_INSERT)) && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { - if (page_cur_try_search_shortcut(page, tuple, + if (page_cur_try_search_shortcut(page, index, tuple, iup_matched_fields, iup_matched_bytes, ilow_matched_fields, @@ -245,6 +262,8 @@ page_cur_search_with_match( /*#endif */ #endif + heap = mem_heap_create(100); + /* The following flag does not work for non-latin1 char sets because cmp_full_field does not tell how many bytes matched */ ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); @@ -279,7 +298,10 @@ page_cur_search_with_match( low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + offsets = rec_reget_offsets(mid_rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), heap); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); if (cmp == 1) { @@ -288,10 +310,12 @@ page_cur_search_with_match( low_matched_bytes = cur_matched_bytes; } else if (cmp == -1) { + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, - cur_matched_fields)) { + offsets, cur_matched_fields)) { low = mid; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; @@ -329,7 +353,10 @@ page_cur_search_with_match( low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, &cur_matched_fields, &cur_matched_bytes); if (cmp == 1) { @@ -338,9 +365,12 @@ page_cur_search_with_match( low_matched_bytes = cur_matched_bytes; } else if (cmp == -1) { + offsets = rec_reget_offsets(mid_rec, index, + offsets, dtuple_get_n_fields_cmp(tuple), heap); + if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends(tuple, mid_rec, - cur_matched_fields)) { + offsets, cur_matched_fields)) { low_rec = mid_rec; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; @@ -368,7 +398,9 @@ page_cur_search_with_match( dbg_matched_fields = 0; dbg_matched_bytes = 0; - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, + offsets = rec_reget_offsets(low_rec, index, + offsets, ULINT_UNDEFINED, heap); + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets, &dbg_matched_fields, &dbg_matched_bytes); if (mode == PAGE_CUR_G) { @@ -390,7 +422,9 @@ page_cur_search_with_match( dbg_matched_fields = 0; dbg_matched_bytes = 0; - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, + offsets = rec_reget_offsets(up_rec, index, + offsets, ULINT_UNDEFINED, heap); + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets, &dbg_matched_fields, &dbg_matched_bytes); if (mode == PAGE_CUR_G) { @@ -419,6 +453,7 @@ page_cur_search_with_match( *iup_matched_bytes = up_matched_bytes; *ilow_matched_fields = low_matched_fields; *ilow_matched_bytes = low_matched_bytes; + mem_heap_free(heap); } /*************************************************************** @@ -463,10 +498,12 @@ static void page_cur_insert_rec_write_log( /*==========================*/ - rec_t* insert_rec, /* in: inserted physical record */ - ulint rec_size, /* in: insert_rec size */ - rec_t* cursor_rec, /* in: record the cursor is pointing to */ - mtr_t* mtr) /* in: mini-transaction handle */ + rec_t* insert_rec, /* in: inserted physical record */ + ulint rec_size, /* in: insert_rec size */ + rec_t* cursor_rec, /* in: record the + cursor is pointing to */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mini-transaction handle */ { ulint cur_rec_size; ulint extra_size; @@ -476,22 +513,29 @@ page_cur_insert_rec_write_log( byte* cur_ptr; ulint extra_info_yes; byte* log_ptr; + byte* log_end; ulint i; ut_a(rec_size < UNIV_PAGE_SIZE); - ut_ad(rec_size == rec_get_size(insert_rec)); - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + { + mem_heap_t* heap; + ulint* cur_offs; + ulint* ins_offs; - if (log_ptr == NULL) { + heap = mem_heap_create(100); + cur_offs = rec_get_offsets(cursor_rec, index, + ULINT_UNDEFINED, heap); + ins_offs = rec_get_offsets(insert_rec, index, + ULINT_UNDEFINED, heap); - return; - } - - extra_size = rec_get_extra_size(insert_rec); + extra_size = rec_offs_extra_size(ins_offs); + cur_extra_size = rec_offs_extra_size(cur_offs); + ut_ad(rec_size == rec_offs_size(ins_offs)); + cur_rec_size = rec_offs_size(cur_offs); - cur_extra_size = rec_get_extra_size(cursor_rec); - cur_rec_size = rec_get_size(cursor_rec); + mem_heap_free(heap); + } ins_ptr = insert_rec - extra_size; @@ -514,7 +558,9 @@ page_cur_insert_rec_write_log( ins_ptr++; cur_ptr++; } else if ((i < extra_size) - && (i >= extra_size - REC_N_EXTRA_BYTES)) { + && (i >= extra_size - (index->table->comp + ? REC_N_NEW_EXTRA_BYTES + : REC_N_OLD_EXTRA_BYTES))) { i = extra_size; ins_ptr = insert_rec; cur_ptr = cursor_rec; @@ -525,16 +571,35 @@ page_cur_insert_rec_write_log( } if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { - - log_ptr = mlog_write_initial_log_record_fast(insert_rec, - MLOG_REC_INSERT, log_ptr, mtr); + + log_ptr = mlog_open_and_write_index(mtr, insert_rec, index, + index->table->comp + ? MLOG_COMP_REC_INSERT : MLOG_REC_INSERT, + 2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash + recovery: in that case mlog_open returns NULL */ + return; + } + + log_end = &log_ptr[2 + 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; /* Write the cursor rec offset as a 2-byte ulint */ mach_write_to_2(log_ptr, cursor_rec - buf_frame_align(cursor_rec)); log_ptr += 2; + } else { + log_ptr = mlog_open(mtr, 5 + 1 + 5 + 5 + MLOG_BUF_MARGIN); + if (!log_ptr) { + /* Logging in mtr is switched off during crash + recovery: in that case mlog_open returns NULL */ + return; + } + log_end = &log_ptr[5 + 1 + 5 + 5 + MLOG_BUF_MARGIN]; } - if ((rec_get_info_bits(insert_rec) != rec_get_info_bits(cursor_rec)) + if ((rec_get_info_bits(insert_rec, index->table->comp) != + rec_get_info_bits(cursor_rec, index->table->comp)) || (extra_size != cur_extra_size) || (rec_size != cur_rec_size)) { @@ -549,7 +614,8 @@ page_cur_insert_rec_write_log( + extra_info_yes); if (extra_info_yes) { /* Write the info bits */ - mach_write_to_1(log_ptr, rec_get_info_bits(insert_rec)); + mach_write_to_1(log_ptr, + rec_get_info_bits(insert_rec, index->table->comp)); log_ptr++; /* Write the record origin offset */ @@ -565,17 +631,15 @@ page_cur_insert_rec_write_log( /* Write to the log the inserted index record end segment which differs from the cursor record */ - if (rec_size - i < MLOG_BUF_MARGIN) { - ut_memcpy(log_ptr, ins_ptr, rec_size - i); - log_ptr += rec_size - i; - } - - mlog_close(mtr, log_ptr); - - ut_a(rec_size - i < UNIV_PAGE_SIZE); + rec_size -= i; - if (rec_size - i >= MLOG_BUF_MARGIN) { - mlog_catenate_string(mtr, ins_ptr, rec_size - i); + if (log_ptr + rec_size <= log_end) { + memcpy(log_ptr, ins_ptr, rec_size); + mlog_close(mtr, log_ptr + rec_size); + } else { + mlog_close(mtr, log_ptr); + ut_a(rec_size < UNIV_PAGE_SIZE); + mlog_catenate_string(mtr, ins_ptr, rec_size); } } @@ -585,12 +649,13 @@ Parses a log record of a record insert on a page. */ byte* page_cur_parse_insert_rec( /*======================*/ - /* out: end of log record or NULL */ - ibool is_short,/* in: TRUE if short inserts */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint extra_info_yes; ulint offset = 0; /* remove warning */ @@ -603,6 +668,8 @@ page_cur_parse_insert_rec( byte* ptr2 = ptr; ulint info_bits = 0; /* remove warning */ page_cur_t cursor; + mem_heap_t* heap; + ulint* offsets; if (!is_short) { /* Read the cursor rec offset as a 2-byte ulint */ @@ -689,11 +756,14 @@ page_cur_parse_insert_rec( cursor_rec = page + offset; } + heap = mem_heap_create(100); + offsets = rec_get_offsets(cursor_rec, index, ULINT_UNDEFINED, heap); + if (extra_info_yes == 0) { - info_bits = rec_get_info_bits(cursor_rec); - origin_offset = rec_get_extra_size(cursor_rec); - mismatch_index = rec_get_size(cursor_rec) - end_seg_len; - } + info_bits = rec_get_info_bits(cursor_rec, index->table->comp); + origin_offset = rec_offs_extra_size(offsets); + mismatch_index = rec_offs_size(offsets) - end_seg_len; + } if (mismatch_index + end_seg_len < sizeof buf1) { buf = buf1; @@ -722,14 +792,24 @@ page_cur_parse_insert_rec( ut_error; } - ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index); + ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); ut_memcpy(buf + mismatch_index, ptr, end_seg_len); - rec_set_info_bits(buf + origin_offset, info_bits); + rec_set_info_bits(buf + origin_offset, index->table->comp, info_bits); + + /* Set the status bits for new-style records. */ + if (index->table->comp) { + /* Leaf pages (level 0) contain ordinary records; + non-leaf pages contain node pointer records. */ + ulint level = page_header_get_field( + buf_frame_align(cursor_rec), PAGE_LEVEL); + rec_set_status(buf + origin_offset, + level ? REC_STATUS_NODE_PTR : REC_STATUS_ORDINARY); + } page_cur_position(cursor_rec, &cursor); - page_cur_rec_insert(&cursor, buf + origin_offset, mtr); + page_cur_rec_insert(&cursor, buf + origin_offset, index, mtr); if (buf != buf1) { @@ -751,68 +831,80 @@ page_cur_insert_rec_low( /* out: pointer to record if succeed, NULL otherwise */ page_cur_t* cursor, /* in: a page cursor */ - dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ - ulint data_size,/* in: data size of tuple */ - rec_t* rec, /* in: pointer to a physical record or NULL */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + dict_index_t* index, /* in: record descriptor */ + rec_t* rec, /* in: pointer to a physical record or NULL */ mtr_t* mtr) /* in: mini-transaction handle */ { - byte* insert_buf = NULL; - ulint rec_size; - byte* page; /* the relevant page */ - rec_t* last_insert; /* cursor position at previous insert */ - rec_t* insert_rec; /* inserted record */ - ulint heap_no; /* heap number of the inserted record */ - rec_t* current_rec; /* current record after which the - new record is inserted */ - rec_t* next_rec; /* next record after current before - the insertion */ - ulint owner_slot; /* the slot which owns the inserted record */ - rec_t* owner_rec; - ulint n_owned; - + byte* insert_buf = NULL; + ulint rec_size; + byte* page; /* the relevant page */ + rec_t* last_insert; /* cursor position at previous insert */ + rec_t* insert_rec; /* inserted record */ + ulint heap_no; /* heap number of the inserted record */ + rec_t* current_rec; /* current record after which the + new record is inserted */ + rec_t* next_rec; /* next record after current before + the insertion */ + ulint owner_slot; /* the slot which owns the + inserted record */ + rec_t* owner_rec; + ulint n_owned; + mem_heap_t* heap; + ulint* offsets; + ibool comp = index->table->comp; + ut_ad(cursor && mtr); ut_ad(tuple || rec); ut_ad(!(tuple && rec)); ut_ad(rec || dtuple_check_typed(tuple)); - ut_ad(rec || (dtuple_get_data_size(tuple) == data_size)); page = page_cur_get_page(cursor); + ut_ad(page_is_comp(page) == comp); + ut_ad(cursor->rec != page_get_supremum_rec(page)); + heap = mem_heap_create(100); + /* 1. Get the size of the physical record in the page */ if (tuple != NULL) { - rec_size = data_size + rec_get_converted_extra_size( - data_size, - dtuple_get_n_fields(tuple)); + offsets = NULL; + rec_size = rec_get_converted_size(index, tuple); } else { - rec_size = rec_get_size(rec); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + rec_size = rec_offs_size(offsets); } /* 2. Try to find suitable space from page memory management */ - insert_buf = page_mem_alloc(page, rec_size, &heap_no); + insert_buf = page_mem_alloc(page, rec_size, index, &heap_no); if (insert_buf == NULL) { - + mem_heap_free(heap); return(NULL); } /* 3. Create the record */ if (tuple != NULL) { - insert_rec = rec_convert_dtuple_to_rec_low(insert_buf, tuple, - data_size); + insert_rec = rec_convert_dtuple_to_rec(insert_buf, + index, tuple); } else { - insert_rec = rec_copy(insert_buf, rec); + insert_rec = rec_copy(insert_buf, rec, offsets); } ut_ad(insert_rec); - ut_ad(rec_size == rec_get_size(insert_rec)); + offsets = rec_reget_offsets(insert_rec, index, + offsets, ULINT_UNDEFINED, heap); + ut_ad(rec_size == rec_offs_size(offsets)); /* 4. Insert the record in the linked list of records */ - current_rec = cursor->rec; + ut_ad(!comp || rec_get_status(current_rec) <= REC_STATUS_INFIMUM); + ut_ad(!comp || rec_get_status(insert_rec) < REC_STATUS_INFIMUM); + next_rec = page_rec_get_next(current_rec); + ut_ad(!comp || rec_get_status(next_rec) != REC_STATUS_INFIMUM); page_rec_set_next(insert_rec, next_rec); page_rec_set_next(current_rec, insert_rec); @@ -821,12 +913,15 @@ page_cur_insert_rec_low( /* 5. Set the n_owned field in the inserted record to zero, and set the heap_no field */ - rec_set_n_owned(insert_rec, 0); - rec_set_heap_no(insert_rec, heap_no); + rec_set_n_owned(insert_rec, comp, 0); + rec_set_heap_no(insert_rec, comp, heap_no); /* 6. Update the last insertion info in page header */ last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); + ut_ad(!last_insert || !comp + || rec_get_node_ptr_flag(last_insert) + == rec_get_node_ptr_flag(insert_rec)); if (last_insert == NULL) { page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); @@ -855,8 +950,8 @@ page_cur_insert_rec_low( /* 7. It remains to update the owner record. */ owner_rec = page_rec_find_owner_rec(insert_rec); - n_owned = rec_get_n_owned(owner_rec); - rec_set_n_owned(owner_rec, n_owned + 1); + n_owned = rec_get_n_owned(owner_rec, comp); + rec_set_n_owned(owner_rec, comp, n_owned + 1); /* 8. Now we have incremented the n_owned field of the owner record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, @@ -868,8 +963,10 @@ page_cur_insert_rec_low( } /* 9. Write log record of the insert */ - page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, mtr); + page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, + index, mtr); + mem_heap_free(heap); return(insert_rec); } @@ -879,17 +976,19 @@ UNIV_INLINE byte* page_copy_rec_list_to_created_page_write_log( /*=========================================*/ - /* out: 4-byte field where to write the log data - length */ - page_t* page, /* in: index page */ - mtr_t* mtr) /* in: mtr */ + /* out: 4-byte field where to + write the log data length */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { byte* log_ptr; - - mlog_write_initial_log_record(page, MLOG_LIST_END_COPY_CREATED, mtr); - - log_ptr = mlog_open(mtr, 4); + log_ptr = mlog_open_and_write_index(mtr, page, index, + index->table->comp + ? MLOG_COMP_LIST_END_COPY_CREATED + : MLOG_LIST_END_COPY_CREATED, 4); + ut_a(log_ptr); mlog_close(mtr, log_ptr + 4); return(log_ptr); @@ -901,11 +1000,12 @@ Parses a log record of copying a record list end to a new created page. */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { byte* rec_end; ulint log_data_len; @@ -931,7 +1031,8 @@ page_parse_copy_rec_list_to_created_page( } while (ptr < rec_end) { - ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, page, mtr); + ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, + index, page, mtr); } ut_a(ptr == rec_end); @@ -950,10 +1051,11 @@ including that record. Infimum and supremum records are not copied. */ void page_copy_rec_list_end_to_created_page( /*===================================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: first record to copy */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_dir_slot_t* slot = 0; /* remove warning */ byte* heap_top; @@ -966,9 +1068,13 @@ page_copy_rec_list_end_to_created_page( ulint log_mode; byte* log_ptr; ulint log_data_len; + ibool comp = page_is_comp(page); + mem_heap_t* heap; + ulint* offsets = NULL; - ut_ad(page_header_get_field(new_page, PAGE_N_HEAP) == 2); + ut_ad(page_dir_get_n_heap(new_page) == 2); ut_ad(page != new_page); + ut_ad(comp == page_is_comp(new_page)); if (rec == page_get_infimum_rec(page)) { @@ -983,12 +1089,13 @@ page_copy_rec_list_end_to_created_page( #ifdef UNIV_DEBUG /* To pass the debug tests we have to set these dummy values in the debug version */ - page_header_set_field(new_page, PAGE_N_DIR_SLOTS, UNIV_PAGE_SIZE / 2); + page_dir_set_n_slots(new_page, UNIV_PAGE_SIZE / 2); page_header_set_ptr(new_page, PAGE_HEAP_TOP, new_page + UNIV_PAGE_SIZE - 1); #endif - log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, mtr); + log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, + index, mtr); log_data_len = dyn_array_get_data_size(&(mtr->log)); @@ -997,22 +1104,29 @@ page_copy_rec_list_end_to_created_page( log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); prev_rec = page_get_infimum_rec(new_page); - heap_top = new_page + PAGE_SUPREMUM_END; + if (comp) { + heap_top = new_page + PAGE_NEW_SUPREMUM_END; + } else { + heap_top = new_page + PAGE_OLD_SUPREMUM_END; + } count = 0; slot_index = 0; n_recs = 0; + heap = mem_heap_create(100); + /* should be do ... until, comment by Jani */ while (rec != page_get_supremum_rec(page)) { - - insert_rec = rec_copy(heap_top, rec); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + insert_rec = rec_copy(heap_top, rec, offsets); - rec_set_next_offs(prev_rec, insert_rec - new_page); + rec_set_next_offs(prev_rec, comp, insert_rec - new_page); - rec_set_n_owned(insert_rec, 0); - rec_set_heap_no(insert_rec, 2 + n_recs); + rec_set_n_owned(insert_rec, comp, 0); + rec_set_heap_no(insert_rec, comp, 2 + n_recs); - rec_size = rec_get_size(insert_rec); + rec_size = rec_offs_size(offsets); heap_top = heap_top + rec_size; @@ -1034,7 +1148,7 @@ page_copy_rec_list_end_to_created_page( } page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, - mtr); + index, mtr); prev_rec = insert_rec; rec = page_rec_get_next(rec); } @@ -1056,22 +1170,25 @@ page_copy_rec_list_end_to_created_page( slot_index--; } + mem_heap_free(heap); + log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; ut_a(log_data_len < 100 * UNIV_PAGE_SIZE); mach_write_to_4(log_ptr, log_data_len); - rec_set_next_offs(insert_rec, PAGE_SUPREMUM); + rec_set_next_offs(insert_rec, comp, + comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM); slot = page_dir_get_nth_slot(new_page, 1 + slot_index); page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); page_dir_slot_set_n_owned(slot, count + 1); - page_header_set_field(new_page, PAGE_N_DIR_SLOTS, 2 + slot_index); + page_dir_set_n_slots(new_page, 2 + slot_index); page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top); - page_header_set_field(new_page, PAGE_N_HEAP, 2 + n_recs); + page_dir_set_n_heap(new_page, 2 + n_recs); page_header_set_field(new_page, PAGE_N_RECS, n_recs); page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL); @@ -1089,14 +1206,27 @@ UNIV_INLINE void page_cur_delete_rec_write_log( /*==========================*/ - rec_t* cursor_rec, /* in: record to be deleted */ - mtr_t* mtr) /* in: mini-transaction handle */ + rec_t* rec, /* in: record to be deleted */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mini-transaction handle */ { - mlog_write_initial_log_record(cursor_rec, MLOG_REC_DELETE, mtr); + byte* log_ptr; + + log_ptr = mlog_open_and_write_index(mtr, rec, index, + index->table->comp + ? MLOG_COMP_REC_DELETE + : MLOG_REC_DELETE, 2); + + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } /* Write the cursor rec offset as a 2-byte ulint */ - mlog_catenate_ulint(mtr, cursor_rec - buf_frame_align(cursor_rec), - MLOG_2BYTES); + mach_write_to_2(log_ptr, rec - buf_frame_align(rec)); + + mlog_close(mtr, log_ptr + 2); } /*************************************************************** @@ -1105,11 +1235,12 @@ Parses log record of a record delete on a page. */ byte* page_cur_parse_delete_rec( /*======================*/ - /* out: pointer to record end or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint offset; page_cur_t cursor; @@ -1128,7 +1259,7 @@ page_cur_parse_delete_rec( if (page) { page_cur_position(page + offset, &cursor); - page_cur_delete_rec(&cursor, mtr); + page_cur_delete_rec(&cursor, index, mtr); } return(ptr); @@ -1142,6 +1273,7 @@ void page_cur_delete_rec( /*================*/ page_cur_t* cursor, /* in: a page cursor */ + dict_index_t* index, /* in: record descriptor */ mtr_t* mtr) /* in: mini-transaction handle */ { page_dir_slot_t* cur_dir_slot; @@ -1169,7 +1301,7 @@ page_cur_delete_rec( cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); /* 0. Write the log record */ - page_cur_delete_rec_write_log(current_rec, mtr); + page_cur_delete_rec_write_log(current_rec, index, mtr); /* 1. Reset the last insert info in the page header and increment the modify clock for the frame */ @@ -1223,7 +1355,7 @@ page_cur_delete_rec( page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1); /* 6. Free the memory occupied by the record */ - page_mem_free(page, current_rec); + page_mem_free(page, current_rec, index); /* 7. Now we have decremented the number of owned records of the slot. If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c index 343f300fc77..38b1e503c8f 100644 --- a/innobase/page/page0page.c +++ b/innobase/page/page0page.c @@ -18,6 +18,8 @@ Created 2/2/1994 Heikki Tuuri #include "fut0lst.h" #include "btr0sea.h" #include "buf0buf.h" +#include "srv0srv.h" +#include "btr0btr.h" /* THE INDEX PAGE ============== @@ -75,10 +77,14 @@ page_dir_find_owner_slot( page_t* page; page_dir_slot_t* slot; rec_t* original_rec = rec; + ibool comp; ut_ad(page_rec_check(rec)); - while (rec_get_n_owned(rec) == 0) { + page = buf_frame_align(rec); + comp = page_is_comp(page); + + while (rec_get_n_owned(rec, comp) == 0) { steps++; rec = page_rec_get_next(rec); } @@ -96,14 +102,18 @@ page_dir_find_owner_slot( "InnoDB: Original record ", (ulong) buf_frame_get_page_no(page)); - rec_print(stderr, original_rec); + if (comp) { + fputs("(compact record)\n", stderr); + } else { + rec_print_old(stderr, original_rec); + } fprintf(stderr, "\n" "InnoDB: on that page. Steps %lu.\n", (ulong) steps); fputs( "InnoDB: Cannot find the dir slot for record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, NULL); fputs("\n" "InnoDB: on that page!\n", stderr); @@ -136,14 +146,15 @@ page_dir_slot_check( page = buf_frame_align(slot); - n_slots = page_header_get_field(page, PAGE_N_DIR_SLOTS); + n_slots = page_dir_get_n_slots(page); ut_a(slot <= page_dir_get_nth_slot(page, 0)); ut_a(slot >= page_dir_get_nth_slot(page, n_slots - 1)); - ut_a(page_rec_check(page + mach_read_from_2(slot))); + ut_a(page_rec_check(page_dir_slot_get_rec(slot))); - n_owned = rec_get_n_owned(page + mach_read_from_2(slot)); + n_owned = rec_get_n_owned(page_dir_slot_get_rec(slot), + page_is_comp(page)); if (slot == page_dir_get_nth_slot(page, 0)) { ut_a(n_owned == 1); @@ -194,12 +205,14 @@ Allocates a block of memory from an index page. */ byte* page_mem_alloc( /*===========*/ - /* out: pointer to start of allocated - buffer, or NULL if allocation fails */ - page_t* page, /* in: index page */ - ulint need, /* in: number of bytes needed */ - ulint* heap_no)/* out: this contains the heap number - of the allocated record if allocation succeeds */ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + dict_index_t* index, /* in: record descriptor */ + ulint* heap_no)/* out: this contains the heap number + of the allocated record + if allocation succeeds */ { rec_t* rec; byte* block; @@ -213,18 +226,30 @@ page_mem_alloc( rec = page_header_get_ptr(page, PAGE_FREE); - if (rec && (rec_get_size(rec) >= need)) { + if (rec) { + mem_heap_t* heap + = mem_heap_create(100); + const ulint* offsets + = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + + if (rec_offs_size(offsets) >= need) { + page_header_set_ptr(page, PAGE_FREE, + page_rec_get_next(rec)); - page_header_set_ptr(page, PAGE_FREE, page_rec_get_next(rec)); + garbage = page_header_get_field(page, PAGE_GARBAGE); + ut_ad(garbage >= need); - garbage = page_header_get_field(page, PAGE_GARBAGE); - ut_ad(garbage >= need); + page_header_set_field(page, PAGE_GARBAGE, + garbage - need); - page_header_set_field(page, PAGE_GARBAGE, garbage - need); + *heap_no = rec_get_heap_no(rec, page_is_comp(page)); - *heap_no = rec_get_heap_no(rec); + block = rec_get_start(rec, offsets); + mem_heap_free(heap); + return(block); + } - return(rec_get_start(rec)); + mem_heap_free(heap); } /* Could not find space from the free list, try top of heap */ @@ -235,9 +260,9 @@ page_mem_alloc( block = page_header_get_ptr(page, PAGE_HEAP_TOP); page_header_set_ptr(page, PAGE_HEAP_TOP, block + need); - *heap_no = page_header_get_field(page, PAGE_N_HEAP); + *heap_no = page_dir_get_n_heap(page); - page_header_set_field(page, PAGE_N_HEAP, 1 + *heap_no); + page_dir_set_n_heap(page, 1 + *heap_no); return(block); } @@ -253,9 +278,11 @@ page_create_write_log( /*==================*/ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp) /* in: TRUE=compact page format */ { - mlog_write_initial_log_record(frame, MLOG_PAGE_CREATE, mtr); + mlog_write_initial_log_record(frame, + comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE, mtr); } /*************************************************************** @@ -267,6 +294,7 @@ page_parse_create( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr __attribute__((unused)), /* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr) /* in: mtr or NULL */ { @@ -275,7 +303,7 @@ page_parse_create( /* The record is empty, except for the record initial part */ if (page) { - page_create(page, mtr); + page_create(page, mtr, comp); } return(ptr); @@ -290,7 +318,8 @@ page_create( /* out: pointer to the page */ buf_frame_t* frame, /* in: a buffer frame where the page is created */ - mtr_t* mtr) /* in: mini-transaction handle */ + mtr_t* mtr, /* in: mini-transaction handle */ + ibool comp) /* in: TRUE=compact page format */ { page_dir_slot_t* slot; mem_heap_t* heap; @@ -300,6 +329,10 @@ page_create( rec_t* infimum_rec; rec_t* supremum_rec; page_t* page; + dict_index_t* index; + ulint* offsets; + + index = comp ? srv_sys->dummy_ind2 : srv_sys->dummy_ind1; ut_ad(frame && mtr); ut_ad(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE @@ -311,7 +344,7 @@ page_create( buf_frame_modify_clock_inc(frame); /* 2. WRITE LOG INFORMATION */ - page_create_write_log(frame, mtr); + page_create_write_log(frame, mtr, comp); page = frame; @@ -323,43 +356,52 @@ page_create( /* Create first a data tuple for infimum record */ tuple = dtuple_create(heap, 1); + dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); field = dtuple_get_nth_field(tuple, 0); - dfield_set_data(field, "infimum", sizeof "infimum"); - dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); - + dfield_set_data(field, "infimum", 8); + dtype_set(dfield_get_type(field), + DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8, 0); /* Set the corresponding physical record to its place in the page record heap */ heap_top = page + PAGE_DATA; - infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); + + ut_a(infimum_rec == + page + (comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); + + rec_set_n_owned(infimum_rec, comp, 1); + rec_set_heap_no(infimum_rec, comp, 0); + offsets = rec_get_offsets(infimum_rec, index, ULINT_UNDEFINED, heap); + + heap_top = rec_get_end(infimum_rec, offsets); - ut_a(infimum_rec == page + PAGE_INFIMUM); - - rec_set_n_owned(infimum_rec, 1); - rec_set_heap_no(infimum_rec, 0); - - heap_top = rec_get_end(infimum_rec); - /* Create then a tuple for supremum */ tuple = dtuple_create(heap, 1); + dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); field = dtuple_get_nth_field(tuple, 0); - dfield_set_data(field, "supremum", sizeof "supremum"); - dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); + dfield_set_data(field, "supremum", 9 - comp); + dtype_set(dfield_get_type(field), + DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 9 - comp, 0); - supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple); - ut_a(supremum_rec == page + PAGE_SUPREMUM); + ut_a(supremum_rec == + page + (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)); - rec_set_n_owned(supremum_rec, 1); - rec_set_heap_no(supremum_rec, 1); - - heap_top = rec_get_end(supremum_rec); + rec_set_n_owned(supremum_rec, comp, 1); + rec_set_heap_no(supremum_rec, comp, 1); - ut_ad(heap_top == page + PAGE_SUPREMUM_END); + offsets = rec_reget_offsets(supremum_rec, index, + offsets, ULINT_UNDEFINED, heap); + heap_top = rec_get_end(supremum_rec, offsets); + + ut_ad(heap_top == + page + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); mem_heap_free(heap); @@ -367,7 +409,7 @@ page_create( page_header_set_field(page, PAGE_N_DIR_SLOTS, 2); page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top); - page_header_set_field(page, PAGE_N_HEAP, 2); + page_header_set_field(page, PAGE_N_HEAP, comp ? 0x8002 : 2); page_header_set_ptr(page, PAGE_FREE, NULL); page_header_set_field(page, PAGE_GARBAGE, 0); page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); @@ -388,8 +430,8 @@ page_create( /* Set the next pointers in infimum and supremum */ - rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page)); - rec_set_next_offs(supremum_rec, 0); + rec_set_next_offs(infimum_rec, comp, (ulint)(supremum_rec - page)); + rec_set_next_offs(supremum_rec, comp, 0); return(page); } @@ -401,10 +443,11 @@ touch the lock table and max trx id on page. */ void page_copy_rec_list_end_no_locks( /*============================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; page_cur_t cur2; @@ -416,8 +459,11 @@ page_copy_rec_list_end_no_locks( page_cur_move_to_next(&cur1); } - - ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == PAGE_INFIMUM); + + ut_a(index->table->comp == page_is_comp(page)); + ut_a(index->table->comp == page_is_comp(new_page)); + ut_a(mach_read_from_2(new_page + UNIV_PAGE_SIZE - 10) == (ulint) + (index->table->comp ? PAGE_NEW_INFIMUM : PAGE_OLD_INFIMUM)); page_cur_set_before_first(new_page, &cur2); @@ -427,7 +473,7 @@ page_copy_rec_list_end_no_locks( while (sup != page_cur_get_rec(&cur1)) { if (!page_cur_rec_insert(&cur2, - page_cur_get_rec(&cur1), mtr)) { + page_cur_get_rec(&cur1), index, mtr)) { /* Track an assertion failure reported on the mailing list on June 18th, 2003 */ @@ -456,16 +502,18 @@ The records are copied to the start of the record list on new_page. */ void page_copy_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - if (page_header_get_field(new_page, PAGE_N_HEAP) == 2) { + if (page_dir_get_n_heap(new_page) == 2) { page_copy_rec_list_end_to_created_page(new_page, page, rec, - mtr); + index, mtr); } else { - page_copy_rec_list_end_no_locks(new_page, page, rec, mtr); + page_copy_rec_list_end_no_locks(new_page, page, rec, + index, mtr); } /* Update the lock table, MAX_TRX_ID, and possible hash index */ @@ -474,7 +522,7 @@ page_copy_rec_list_end( page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - btr_search_move_or_delete_hash_entries(new_page, page); + btr_search_move_or_delete_hash_entries(new_page, page, index); } /***************************************************************** @@ -485,10 +533,11 @@ The records are copied to the end of the record list on new_page. */ void page_copy_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page to copy to */ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; page_cur_t cur2; @@ -510,8 +559,8 @@ page_copy_rec_list_start( /* Copy records from the original page to the new page */ while (page_cur_get_rec(&cur1) != rec) { - ut_a( - page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr)); + ut_a(page_cur_rec_insert(&cur2, + page_cur_get_rec(&cur1), index, mtr)); page_cur_move_to_next(&cur1); page_cur_move_to_next(&cur2); @@ -523,7 +572,7 @@ page_copy_rec_list_start( page_update_max_trx_id(new_page, page_get_max_trx_id(page)); - btr_search_move_or_delete_hash_entries(new_page, page); + btr_search_move_or_delete_hash_entries(new_page, page, index); } /************************************************************** @@ -532,18 +581,25 @@ UNIV_INLINE void page_delete_rec_list_write_log( /*===========================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - byte type, /* in: operation type: MLOG_LIST_END_DELETE, ... */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + byte type, /* in: operation type: + MLOG_LIST_END_DELETE, ... */ + mtr_t* mtr) /* in: mtr */ { - ut_ad((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)); - - mlog_write_initial_log_record(page, type, mtr); - - /* Write the parameter as a 2-byte ulint */ - mlog_catenate_ulint(mtr, rec - page, MLOG_2BYTES); + byte* log_ptr; + ut_ad(type == MLOG_LIST_END_DELETE + || type == MLOG_LIST_START_DELETE + || type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE); + + log_ptr = mlog_open_and_write_index(mtr, page, index, type, 2); + if (log_ptr) { + /* Write the parameter as a 2-byte ulint */ + mach_write_to_2(log_ptr, rec - page); + mlog_close(mtr, log_ptr + 2); + } } /************************************************************** @@ -552,18 +608,23 @@ Parses a log record of a record list end or start deletion. */ byte* page_parse_delete_rec_list( /*=======================*/ - /* out: end of log record or NULL */ - byte type, /* in: MLOG_LIST_END_DELETE or - MLOG_LIST_START_DELETE */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE, + MLOG_LIST_START_DELETE, + MLOG_COMP_LIST_END_DELETE or + MLOG_COMP_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ulint offset; - ut_ad((type == MLOG_LIST_END_DELETE) - || (type == MLOG_LIST_START_DELETE)); + ut_ad(type == MLOG_LIST_END_DELETE + || type == MLOG_LIST_START_DELETE + || type == MLOG_COMP_LIST_END_DELETE + || type == MLOG_COMP_LIST_START_DELETE); /* Read the record offset as a 2-byte ulint */ @@ -580,11 +641,12 @@ page_parse_delete_rec_list( return(ptr); } - if (type == MLOG_LIST_END_DELETE) { - page_delete_rec_list_end(page, page + offset, ULINT_UNDEFINED, - ULINT_UNDEFINED, mtr); + if (type == MLOG_LIST_END_DELETE + || type == MLOG_COMP_LIST_END_DELETE) { + page_delete_rec_list_end(page, page + offset, index, + ULINT_UNDEFINED, ULINT_UNDEFINED, mtr); } else { - page_delete_rec_list_start(page, page + offset, mtr); + page_delete_rec_list_start(page, page + offset, index, mtr); } return(ptr); @@ -597,14 +659,15 @@ The infimum and supremum records are not deleted. */ void page_delete_rec_list_end( /*=====================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED - if not known */ - ulint size, /* in: the sum of the sizes of the records in the end - of the chain to delete, or ULINT_UNDEFINED if not - known */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + ulint n_recs, /* in: number of records to delete, + or ULINT_UNDEFINED if not known */ + ulint size, /* in: the sum of the sizes of the + records in the end of the chain to + delete, or ULINT_UNDEFINED if not known */ + mtr_t* mtr) /* in: mtr */ { page_dir_slot_t* slot; ulint slot_index; @@ -615,10 +678,12 @@ page_delete_rec_list_end( ulint count; ulint n_owned; rec_t* sup; + ibool comp; /* Reset the last insert info in the page header and increment the modify clock for the frame */ + ut_ad(size == ULINT_UNDEFINED || size < UNIV_PAGE_SIZE); page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); /* The page gets invalid for optimistic searches: increment the @@ -632,7 +697,9 @@ page_delete_rec_list_end( rec = page_rec_get_next(rec); } - page_delete_rec_list_write_log(page, rec, MLOG_LIST_END_DELETE, mtr); + comp = page_is_comp(page); + page_delete_rec_list_write_log(page, rec, index, + comp ? MLOG_COMP_LIST_END_DELETE : MLOG_LIST_END_DELETE, mtr); if (rec == sup) { @@ -644,19 +711,32 @@ page_delete_rec_list_end( last_rec = page_rec_get_prev(sup); if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; /* Calculate the sum of sizes and the number of records */ size = 0; n_recs = 0; rec2 = rec; while (rec2 != sup) { - size += rec_get_size(rec2); + ulint s; + offsets = rec_reget_offsets(rec2, index, + offsets, ULINT_UNDEFINED, heap); + s = rec_offs_size(offsets); + ut_ad(rec2 - page + s - rec_offs_extra_size(offsets) + < UNIV_PAGE_SIZE); + ut_ad(size + s < UNIV_PAGE_SIZE); + size += s; n_recs++; rec2 = page_rec_get_next(rec2); } + + mem_heap_free(heap); } + ut_ad(size < UNIV_PAGE_SIZE); + /* Update the page directory; there is no need to balance the number of the records owned by the supremum record, as it is allowed to be less than PAGE_DIR_SLOT_MIN_N_OWNED */ @@ -664,15 +744,15 @@ page_delete_rec_list_end( rec2 = rec; count = 0; - while (rec_get_n_owned(rec2) == 0) { + while (rec_get_n_owned(rec2, comp) == 0) { count++; rec2 = page_rec_get_next(rec2); } - ut_ad(rec_get_n_owned(rec2) - count > 0); + ut_ad(rec_get_n_owned(rec2, comp) - count > 0); - n_owned = rec_get_n_owned(rec2) - count; + n_owned = rec_get_n_owned(rec2, comp) - count; slot_index = page_dir_find_owner_slot(rec2); slot = page_dir_get_nth_slot(page, slot_index); @@ -680,7 +760,7 @@ page_delete_rec_list_end( page_dir_slot_set_rec(slot, sup); page_dir_slot_set_n_owned(slot, n_owned); - page_header_set_field(page, PAGE_N_DIR_SLOTS, slot_index + 1); + page_dir_set_n_slots(page, slot_index + 1); /* Remove the record chain segment from the record chain */ page_rec_set_next(prev_rec, page_get_supremum_rec(page)); @@ -706,14 +786,19 @@ that record. Infimum and supremum records are not deleted. */ void page_delete_rec_list_start( /*=======================*/ - page_t* page, /* in: index page */ - rec_t* rec, /* in: record on page */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_cur_t cur1; ulint log_mode; - page_delete_rec_list_write_log(page, rec, MLOG_LIST_START_DELETE, mtr); + page_delete_rec_list_write_log(page, rec, index, + index->table->comp + ? MLOG_COMP_LIST_START_DELETE + : MLOG_LIST_START_DELETE, + mtr); page_cur_set_before_first(page, &cur1); @@ -730,7 +815,7 @@ page_delete_rec_list_start( while (page_cur_get_rec(&cur1) != rec) { - page_cur_delete_rec(&cur1, mtr); + page_cur_delete_rec(&cur1, index, mtr); } /* Restore log mode */ @@ -745,10 +830,11 @@ split_rec. */ void page_move_rec_list_end( /*===================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record to move */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { ulint old_data_size; ulint new_data_size; @@ -758,15 +844,15 @@ page_move_rec_list_end( old_data_size = page_get_data_size(new_page); old_n_recs = page_get_n_recs(new_page); - page_copy_rec_list_end(new_page, page, split_rec, mtr); + page_copy_rec_list_end(new_page, page, split_rec, index, mtr); new_data_size = page_get_data_size(new_page); new_n_recs = page_get_n_recs(new_page); ut_ad(new_data_size >= old_data_size); - page_delete_rec_list_end(page, split_rec, new_n_recs - old_n_recs, - new_data_size - old_data_size, mtr); + page_delete_rec_list_end(page, split_rec, index, + new_n_recs - old_n_recs, new_data_size - old_data_size, mtr); } /***************************************************************** @@ -776,14 +862,15 @@ split_rec. */ void page_move_rec_list_start( /*=====================*/ - page_t* new_page, /* in: index page where to move */ - page_t* page, /* in: index page */ - rec_t* split_rec, /* in: first record not to move */ - mtr_t* mtr) /* in: mtr */ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - page_copy_rec_list_start(new_page, page, split_rec, mtr); + page_copy_rec_list_start(new_page, page, split_rec, index, mtr); - page_delete_rec_list_start(page, split_rec, mtr); + page_delete_rec_list_start(page, split_rec, index, mtr); } /*************************************************************************** @@ -801,7 +888,7 @@ page_rec_write_index_page_no( byte* data; ulint len; - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field_old(rec, i, &len); ut_ad(len == 4); @@ -885,7 +972,7 @@ page_dir_add_slots( ut_ad(start < n_slots - 1); /* Update the page header */ - page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots + n); + page_dir_set_n_slots(page, n_slots + n); /* Move slots up */ @@ -1006,8 +1093,8 @@ page_dir_balance_slot( old_rec = page_dir_slot_get_rec(slot); new_rec = page_rec_get_next(old_rec); - rec_set_n_owned(old_rec, 0); - rec_set_n_owned(new_rec, n_owned + 1); + rec_set_n_owned(old_rec, page_is_comp(page), 0); + rec_set_n_owned(new_rec, page_is_comp(page), n_owned + 1); page_dir_slot_set_rec(slot, new_rec); @@ -1080,13 +1167,15 @@ page_rec_get_n_recs_before( rec_t* slot_rec; page_t* page; ulint i; + ibool comp; lint n = 0; ut_ad(page_rec_check(rec)); page = buf_frame_align(rec); - - while (rec_get_n_owned(rec) == 0) { + comp = page_is_comp(page); + + while (rec_get_n_owned(rec, comp) == 0) { rec = page_rec_get_next(rec); n--; @@ -1096,7 +1185,7 @@ page_rec_get_n_recs_before( slot = page_dir_get_nth_slot(page, i); slot_rec = page_dir_slot_get_rec(slot); - n += rec_get_n_owned(slot_rec); + n += rec_get_n_owned(slot_rec, comp); if (rec == slot_rec) { @@ -1118,17 +1207,21 @@ the index page context. */ void page_rec_print( /*===========*/ - rec_t* rec) + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: record descriptor */ { - rec_print(stderr, rec); + ibool comp = page_is_comp(buf_frame_align(rec)); + + ut_a(comp == rec_offs_comp(offsets)); + rec_print(stderr, rec, offsets); fprintf(stderr, " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned(rec), - (ulong) rec_get_heap_no(rec), - (ulong) rec_get_next_offs(rec)); + (ulong) rec_get_n_owned(rec, comp), + (ulong) rec_get_heap_no(rec, comp), + (ulong) rec_get_next_offs(rec, comp)); page_rec_check(rec); - rec_validate(rec); + rec_validate(rec, offsets); } /******************************************************************* @@ -1176,12 +1269,18 @@ debugging purposes. */ void page_print_list( /*============*/ - page_t* page, /* in: index page */ - ulint pr_n) /* in: print n first and n last entries */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint pr_n) /* in: print n first and n last entries */ { page_cur_t cur; ulint count; ulint n_recs; + mem_heap_t* heap; + ulint* offsets = NULL; + + ut_a(page_is_comp(page) == index->table->comp); + heap = mem_heap_create(100); fprintf(stderr, "--------------------------------\n" @@ -1193,7 +1292,9 @@ page_print_list( page_cur_set_before_first(page, &cur); count = 0; for (;;) { - page_rec_print(cur.rec); + offsets = rec_reget_offsets(cur.rec, index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(cur.rec, offsets); if (count == pr_n) { break; @@ -1213,7 +1314,9 @@ page_print_list( page_cur_move_to_next(&cur); if (count + pr_n >= n_recs) { - page_rec_print(cur.rec); + offsets = rec_reget_offsets(cur.rec, index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(cur.rec, offsets); } count++; } @@ -1222,6 +1325,8 @@ page_print_list( "Total of %lu records \n" "--------------------------------\n", (ulong) (count + 1)); + + mem_heap_free(heap); } /******************************************************************* @@ -1235,14 +1340,15 @@ page_header_print( fprintf(stderr, "--------------------------------\n" "PAGE HEADER INFO\n" - "Page address %p, n records %lu\n" + "Page address %p, n records %lu (%s)\n" "n dir slots %lu, heap top %lu\n" "Page n heap %lu, free %lu, garbage %lu\n" "Page last insert %lu, direction %lu, n direction %lu\n", page, (ulong) page_header_get_field(page, PAGE_N_RECS), + page_is_comp(page) ? "compact format" : "original format", (ulong) page_header_get_field(page, PAGE_N_DIR_SLOTS), (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) page_header_get_field(page, PAGE_FREE), (ulong) page_header_get_field(page, PAGE_GARBAGE), (ulong) page_header_get_field(page, PAGE_LAST_INSERT), @@ -1257,13 +1363,16 @@ debugging purposes. */ void page_print( /*======*/ - page_t* page, /* in: index page */ - ulint dn, /* in: print dn first and last entries in directory */ - ulint rn) /* in: print rn first and last records on page */ + page_t* page, /* in: index page */ + dict_index_t* index, /* in: dictionary index of the page */ + ulint dn, /* in: print dn first and last entries + in directory */ + ulint rn) /* in: print rn first and last records + in directory */ { page_header_print(page); page_dir_print(page, dn); - page_print_list(page, rn); + page_print_list(page, index, rn); } /******************************************************************* @@ -1274,20 +1383,24 @@ the heap_no field. */ ibool page_rec_validate( /*==============*/ - /* out: TRUE if ok */ - rec_t* rec) /* in: record on the page */ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_owned; ulint heap_no; - page_t* page; + page_t* page; + ibool comp; page = buf_frame_align(rec); + comp = page_is_comp(page); + ut_a(comp == rec_offs_comp(offsets)); page_rec_check(rec); - rec_validate(rec); + rec_validate(rec, offsets); - n_owned = rec_get_n_owned(rec); - heap_no = rec_get_heap_no(rec); + n_owned = rec_get_n_owned(rec, comp); + heap_no = rec_get_heap_no(rec, comp); if (!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED)) { fprintf(stderr, @@ -1296,11 +1409,11 @@ page_rec_validate( return(FALSE); } - if (!(heap_no < page_header_get_field(page, PAGE_N_HEAP))) { + if (!(heap_no < page_dir_get_n_heap(page))) { fprintf(stderr, "InnoDB: Heap no of rec %lu too big %lu %lu\n", (ulong)(rec - page), (ulong) heap_no, - (ulong) page_header_get_field(page, PAGE_N_HEAP)); + (ulong) page_dir_get_n_heap(page)); return(FALSE); } @@ -1358,6 +1471,7 @@ page_simple_validate( ulint count; ulint own_count; ibool ret = FALSE; + ibool comp = page_is_comp(page); /* Check first that the record heap and the directory do not overlap. */ @@ -1404,13 +1518,13 @@ page_simple_validate( goto func_exit; } - if (rec_get_n_owned(rec) != 0) { + if (rec_get_n_owned(rec, comp) != 0) { /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec) != own_count) { + if (rec_get_n_owned(rec, comp) != own_count) { fprintf(stderr, "InnoDB: Wrong owned count %lu, %lu, rec %lu\n", - (ulong) rec_get_n_owned(rec), + (ulong) rec_get_n_owned(rec, comp), (ulong) own_count, (ulong)(rec - page)); @@ -1438,11 +1552,11 @@ page_simple_validate( break; } - if (rec_get_next_offs(rec) < FIL_PAGE_DATA - || rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) { + if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA + || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Next record offset nonsensical %lu for rec %lu\n", - (ulong) rec_get_next_offs(rec), + (ulong) rec_get_next_offs(rec, comp), (ulong)(rec - page)); goto func_exit; @@ -1461,7 +1575,7 @@ page_simple_validate( own_count++; } - if (rec_get_n_owned(rec) == 0) { + if (rec_get_n_owned(rec, comp) == 0) { fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); goto func_exit; @@ -1514,10 +1628,10 @@ page_simple_validate( rec = page_rec_get_next(rec); } - if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) { + if (page_dir_get_n_heap(page) != count + 1) { fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) (count + 1)); goto func_exit; @@ -1549,12 +1663,19 @@ page_validate( ulint slot_no; ulint data_size; rec_t* rec; - rec_t* old_rec = NULL; + rec_t* old_rec = NULL; ulint offs; ulint n_slots; - ibool ret = FALSE; + ibool ret = FALSE; ulint i; - + ibool comp = page_is_comp(page); + ulint* offsets = NULL; + ulint* old_offsets = NULL; + + if (comp != index->table->comp) { + fputs("InnoDB: 'compact format' flag mismatch\n", stderr); + goto func_exit2; + } if (!page_simple_validate(page)) { goto func_exit2; } @@ -1599,22 +1720,33 @@ page_validate( for (;;) { rec = cur.rec; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); - if (!page_rec_validate(rec)) { + if (comp && page_rec_is_user_rec(rec) + && rec_get_node_ptr_flag(rec) + == !btr_page_get_level_low(page)) { + fputs("InnoDB: node_ptr flag mismatch\n", stderr); + goto func_exit; + } + + if (!page_rec_validate(rec, offsets)) { goto func_exit; } /* Check that the records are in the ascending order */ if ((count >= 2) && (!page_cur_is_after_last(&cur))) { - if (!(1 == cmp_rec_rec(rec, old_rec, index))) { + if (!(1 == cmp_rec_rec(rec, old_rec, + offsets, old_offsets, + ULINT_UNDEFINED, index))) { fprintf(stderr, "InnoDB: Records in wrong order on page %lu", (ulong) buf_frame_get_page_no(page)); dict_index_name_print(stderr, NULL, index); fputs("\nInnoDB: previous record ", stderr); - rec_print(stderr, old_rec); + rec_print(stderr, old_rec, old_offsets); fputs("\nInnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); goto func_exit; @@ -1624,12 +1756,12 @@ page_validate( if ((rec != page_get_supremum_rec(page)) && (rec != page_get_infimum_rec(page))) { - data_size += rec_get_size(rec); + data_size += rec_offs_size(offsets); } - offs = rec_get_start(rec) - page; + offs = rec_get_start(rec, offsets) - page; - for (i = 0; i < rec_get_size(rec); i++) { + for (i = 0; i < rec_offs_size(offsets); i++) { if (!buf[offs + i] == 0) { /* No other record may overlap this */ @@ -1641,12 +1773,12 @@ page_validate( buf[offs + i] = 1; } - if (rec_get_n_owned(rec) != 0) { + if (rec_get_n_owned(rec, comp) != 0) { /* This is a record pointed to by a dir slot */ - if (rec_get_n_owned(rec) != own_count) { + if (rec_get_n_owned(rec, comp) != own_count) { fprintf(stderr, "InnoDB: Wrong owned count %lu, %lu\n", - (ulong) rec_get_n_owned(rec), + (ulong) rec_get_n_owned(rec, comp), (ulong) own_count); goto func_exit; } @@ -1671,11 +1803,11 @@ page_validate( break; } - if (rec_get_next_offs(rec) < FIL_PAGE_DATA - || rec_get_next_offs(rec) >= UNIV_PAGE_SIZE) { + if (rec_get_next_offs(rec, comp) < FIL_PAGE_DATA + || rec_get_next_offs(rec, comp) >= UNIV_PAGE_SIZE) { fprintf(stderr, "InnoDB: Next record offset wrong %lu\n", - (ulong) rec_get_next_offs(rec)); + (ulong) rec_get_next_offs(rec, comp)); goto func_exit; } @@ -1683,9 +1815,15 @@ page_validate( page_cur_move_to_next(&cur); own_count++; old_rec = rec; + /* set old_offsets to offsets; recycle offsets */ + { + ulint* offs = old_offsets; + old_offsets = offsets; + offsets = offs; + } } - if (rec_get_n_owned(rec) == 0) { + if (rec_get_n_owned(rec, comp) == 0) { fputs("InnoDB: n owned is zero\n", stderr); goto func_exit; } @@ -1714,15 +1852,17 @@ page_validate( rec = page_header_get_ptr(page, PAGE_FREE); while (rec != NULL) { - if (!page_rec_validate(rec)) { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (!page_rec_validate(rec, offsets)) { goto func_exit; } count++; - offs = rec_get_start(rec) - page; + offs = rec_get_start(rec, offsets) - page; - for (i = 0; i < rec_get_size(rec); i++) { + for (i = 0; i < rec_offs_size(offsets); i++) { if (buf[offs + i] != 0) { fputs( @@ -1736,9 +1876,9 @@ page_validate( rec = page_rec_get_next(rec); } - if (page_header_get_field(page, PAGE_N_HEAP) != count + 1) { + if (page_dir_get_n_heap(page) != count + 1) { fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_HEAP), + (ulong) page_dir_get_n_heap(page), (ulong) count + 1); goto func_exit; } @@ -1775,7 +1915,7 @@ page_find_rec_with_heap_no( page_cur_set_before_first(page, &cur); for (;;) { - if (rec_get_heap_no(cur.rec) == heap_no) { + if (rec_get_heap_no(cur.rec, page_is_comp(page)) == heap_no) { return(cur.rec); } diff --git a/innobase/pars/pars0pars.c b/innobase/pars/pars0pars.c index 16d630dd318..c62184abd85 100644 --- a/innobase/pars/pars0pars.c +++ b/innobase/pars/pars0pars.c @@ -1514,8 +1514,11 @@ pars_create_table( n_cols = que_node_list_get_len(column_defs); - table = dict_mem_table_create(table_sym->name, 0, n_cols); - + /* As the InnoDB SQL parser is for internal use only, + for creating some system tables, this function will only + create tables in the old (not compact) record format. */ + table = dict_mem_table_create(table_sym->name, 0, n_cols, FALSE); + if (not_fit_in_memory != NULL) { table->does_not_fit_in_memory = TRUE; } diff --git a/innobase/rem/rem0cmp.c b/innobase/rem/rem0cmp.c index 041fb7914e2..974fc7a24d0 100644 --- a/innobase/rem/rem0cmp.c +++ b/innobase/rem/rem0cmp.c @@ -51,6 +51,7 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields);/* in/out: number of already completely matched fields; when function returns, contains the value for current @@ -426,6 +427,7 @@ cmp_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields, /* in/out: number of already completely matched fields; when function returns, contains the value for current comparison */ @@ -455,12 +457,13 @@ cmp_dtuple_rec_with_match( ut_ad(dtuple && rec && matched_fields && matched_bytes); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); cur_field = *matched_fields; cur_bytes = *matched_bytes; ut_ad(cur_field <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(cur_field <= rec_get_n_fields(rec)); + ut_ad(cur_field <= rec_offs_n_fields(offsets)); /* Match fields in a loop; stop if we run out of fields in dtuple or find an externally stored field */ @@ -472,7 +475,8 @@ cmp_dtuple_rec_with_match( dtuple_f_len = dfield_get_len(dtuple_field); - rec_b_ptr = rec_get_nth_field(rec, cur_field, &rec_f_len); + rec_b_ptr = rec_get_nth_field(rec, offsets, + cur_field, &rec_f_len); /* If we have matched yet 0 bytes, it may be that one or both the fields are SQL null, or the record or dtuple may be @@ -482,7 +486,8 @@ cmp_dtuple_rec_with_match( if (cur_bytes == 0) { if (cur_field == 0) { - if (rec_get_info_bits(rec) + if (rec_get_info_bits(rec, + rec_offs_comp(offsets)) & REC_INFO_MIN_REC_FLAG) { if (dtuple_get_info_bits(dtuple) @@ -504,7 +509,7 @@ cmp_dtuple_rec_with_match( } } - if (rec_get_nth_field_extern_bit(rec, cur_field)) { + if (rec_offs_nth_extern(offsets, cur_field)) { /* We do not compare to an externally stored field */ @@ -635,7 +640,7 @@ cmp_dtuple_rec_with_match( up to the common fields */ order_resolved: ut_ad((ret >= - 1) && (ret <= 1)); - ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, + ut_ad(ret == cmp_debug_dtuple_rec_with_match(dtuple, rec, offsets, matched_fields)); ut_ad(*matched_fields == cur_field); /* In the debug version, the above cmp_debug_... sets @@ -656,13 +661,15 @@ cmp_dtuple_rec( less than rec, respectively; see the comments for cmp_dtuple_rec_with_match */ dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec) /* in: physical record */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint matched_fields = 0; ulint matched_bytes = 0; - return(cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + return(cmp_dtuple_rec_with_match(dtuple, rec, offsets, + &matched_fields, &matched_bytes)); } /****************************************************************** @@ -673,22 +680,24 @@ ibool cmp_dtuple_is_prefix_of_rec( /*========================*/ /* out: TRUE if prefix */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec) /* in: physical record */ + dtuple_t* dtuple, /* in: data tuple */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; ulint matched_fields = 0; ulint matched_bytes = 0; + ut_ad(rec_offs_validate(rec, NULL, offsets)); n_fields = dtuple_get_n_fields(dtuple); - if (n_fields > rec_get_n_fields(rec)) { + if (n_fields > rec_offs_n_fields(offsets)) { return(FALSE); } - cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes); + cmp_dtuple_rec_with_match(dtuple, rec, offsets, + &matched_fields, &matched_bytes); if (matched_fields == n_fields) { return(TRUE); @@ -703,42 +712,6 @@ cmp_dtuple_is_prefix_of_rec( return(FALSE); } -/****************************************************************** -Compares a prefix of a data tuple to a prefix of a physical record for -equality. If there are less fields in rec than parameter n_fields, FALSE -is returned. NOTE that n_fields_cmp of dtuple does not affect this -comparison. */ - -ibool -cmp_dtuple_rec_prefix_equal( -/*========================*/ - /* out: TRUE if equal */ - dtuple_t* dtuple, /* in: data tuple */ - rec_t* rec, /* in: physical record */ - ulint n_fields) /* in: number of fields which should be - compared; must not exceed the number of - fields in dtuple */ -{ - ulint matched_fields = 0; - ulint matched_bytes = 0; - - ut_ad(n_fields <= dtuple_get_n_fields(dtuple)); - - if (rec_get_n_fields(rec) < n_fields) { - - return(FALSE); - } - - cmp_dtuple_rec_with_match(dtuple, rec, &matched_fields, - &matched_bytes); - if (matched_fields >= n_fields) { - - return(TRUE); - } - - return(FALSE); -} - /***************************************************************** This function is used to compare two physical records. Only the common first fields are compared, and if an externally stored field is @@ -752,7 +725,13 @@ cmp_rec_rec_with_match( first fields are compared */ rec_t* rec1, /* in: physical record */ rec_t* rec2, /* in: physical record */ + const ulint* offsets1,/* in: rec_get_offsets(rec1, index) */ + const ulint* offsets2,/* in: rec_get_offsets(rec2, index) */ dict_index_t* index, /* in: data dictionary index */ + ulint n, /* in: number of fields to compare, + or ULINT_UNDEFINED if both records + contain all fields, and all fields + should be compared */ ulint* matched_fields, /* in/out: number of already completely matched fields; when the function returns, contains the value the for current @@ -778,17 +757,27 @@ cmp_rec_rec_with_match( ulint cur_bytes; /* number of already matched bytes in current field */ int ret = 3333; /* return value */ + ibool comp; ut_ad(rec1 && rec2 && index); - - rec1_n_fields = rec_get_n_fields(rec1); - rec2_n_fields = rec_get_n_fields(rec2); + ut_ad(rec_offs_validate(rec1, index, offsets1)); + ut_ad(rec_offs_validate(rec2, index, offsets2)); + ut_ad(rec_offs_comp(offsets1) == rec_offs_comp(offsets2)); + + comp = rec_offs_comp(offsets1); + if (n == ULINT_UNDEFINED) { + rec1_n_fields = rec_offs_n_fields(offsets1); + rec2_n_fields = rec_offs_n_fields(offsets2); + } else { + ut_ad(n <= rec_offs_n_fields(offsets1)); + ut_ad(n <= rec_offs_n_fields(offsets2)); + rec1_n_fields = rec2_n_fields = n; + } cur_field = *matched_fields; cur_bytes = *matched_bytes; - /* Match fields in a loop; stop if we run out of fields in either - record */ + /* Match fields in a loop */ while ((cur_field < rec1_n_fields) && (cur_field < rec2_n_fields)) { @@ -800,17 +789,19 @@ cmp_rec_rec_with_match( dict_index_get_nth_field(index, cur_field))); } - rec1_b_ptr = rec_get_nth_field(rec1, cur_field, &rec1_f_len); - rec2_b_ptr = rec_get_nth_field(rec2, cur_field, &rec2_f_len); - + rec1_b_ptr = rec_get_nth_field(rec1, offsets1, + cur_field, &rec1_f_len); + rec2_b_ptr = rec_get_nth_field(rec2, offsets2, + cur_field, &rec2_f_len); + if (cur_bytes == 0) { if (cur_field == 0) { /* Test if rec is the predefined minimum record */ - if (rec_get_info_bits(rec1) + if (rec_get_info_bits(rec1, comp) & REC_INFO_MIN_REC_FLAG) { - if (rec_get_info_bits(rec2) + if (rec_get_info_bits(rec2, comp) & REC_INFO_MIN_REC_FLAG) { ret = 0; } else { @@ -819,7 +810,7 @@ cmp_rec_rec_with_match( goto order_resolved; - } else if (rec_get_info_bits(rec2) + } else if (rec_get_info_bits(rec2, comp) & REC_INFO_MIN_REC_FLAG) { ret = 1; @@ -828,8 +819,8 @@ cmp_rec_rec_with_match( } } - if (rec_get_nth_field_extern_bit(rec1, cur_field) - || rec_get_nth_field_extern_bit(rec2, cur_field)) { + if (rec_offs_nth_extern(offsets1, cur_field) + || rec_offs_nth_extern(offsets2, cur_field)) { /* We do not compare to an externally stored field */ @@ -984,6 +975,7 @@ cmp_debug_dtuple_rec_with_match( dtuple in some of the common fields, or which has an equal number or more fields than dtuple */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint* matched_fields) /* in/out: number of already completely matched fields; when function returns, contains the value for current @@ -1003,14 +995,16 @@ cmp_debug_dtuple_rec_with_match( ut_ad(dtuple && rec && matched_fields); ut_ad(dtuple_check_typed(dtuple)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(*matched_fields <= dtuple_get_n_fields_cmp(dtuple)); - ut_ad(*matched_fields <= rec_get_n_fields(rec)); + ut_ad(*matched_fields <= rec_offs_n_fields(offsets)); cur_field = *matched_fields; if (cur_field == 0) { - if (rec_get_info_bits(rec) & REC_INFO_MIN_REC_FLAG) { + if (rec_get_info_bits(rec, rec_offs_comp(offsets)) + & REC_INFO_MIN_REC_FLAG) { if (dtuple_get_info_bits(dtuple) & REC_INFO_MIN_REC_FLAG) { @@ -1040,9 +1034,10 @@ cmp_debug_dtuple_rec_with_match( dtuple_f_data = dfield_get_data(dtuple_field); dtuple_f_len = dfield_get_len(dtuple_field); - rec_f_data = rec_get_nth_field(rec, cur_field, &rec_f_len); + rec_f_data = rec_get_nth_field(rec, offsets, + cur_field, &rec_f_len); - if (rec_get_nth_field_extern_bit(rec, cur_field)) { + if (rec_offs_nth_extern(offsets, cur_field)) { /* We do not compare to an externally stored field */ ret = 0; diff --git a/innobase/rem/rem0rec.c b/innobase/rem/rem0rec.c index 1db89241dff..e4fa213480f 100644 --- a/innobase/rem/rem0rec.c +++ b/innobase/rem/rem0rec.c @@ -15,8 +15,8 @@ Created 5/30/1994 Heikki Tuuri #include "mtr0mtr.h" #include "mtr0log.h" -/* PHYSICAL RECORD - =============== +/* PHYSICAL RECORD (OLD STYLE) + =========================== The physical record, which is the data type of all the records found in index pages of the database, has the following format @@ -39,7 +39,7 @@ represented on a higher text line): | 10 bits giving the number of fields in this record | | 1 bit which is set to 1 if the offsets above are given in one byte format, 0 if in two byte format | -| two bytes giving the pointer to the next record in the page | +| two bytes giving an absolute pointer to the next record in the page | ORIGIN of the record | first field of data | ... @@ -55,9 +55,50 @@ The offsets of the data fields are given as one-byte (if there are less than 127 bytes of data in the record) or two-byte unsigned integers. The most significant bit is not part of the offset, instead it indicates the SQL-null -if the bit is set to 1. +if the bit is set to 1. */ -CANONICAL COORDINATES. A record can be seen as a single +/* PHYSICAL RECORD (NEW STYLE) + =========================== + +The physical record, which is the data type of all the records +found in index pages of the database, has the following format +(lower addresses and more significant bits inside a byte are below +represented on a higher text line): + +| length of the last non-null variable-length field of data: + if the maximum length is 255, one byte; otherwise, + 0xxxxxxx (one byte, length=0..127), or 1exxxxxxxxxxxxxx (two bytes, + length=128..16383, extern storage flag) | +... +| length of first variable-length field of data | +| SQL-null flags (1 bit per nullable field), padded to full bytes | +| 4 bits used to delete mark a record, and mark a predefined + minimum record in alphabetical order | +| 4 bits giving the number of records owned by this record + (this term is explained in page0page.h) | +| 13 bits giving the order number of this record in the + heap of the index page | +| 3 bits record type: 000=conventional, 001=node pointer (inside B-tree), + 010=infimum, 011=supremum, 1xx=reserved | +| two bytes giving a relative pointer to the next record in the page | +ORIGIN of the record +| first field of data | +... +| last field of data | + +The origin of the record is the start address of the first field +of data. The offsets are given relative to the origin. +The offsets of the data fields are stored in an inverted +order because then the offset of the first fields are near the +origin, giving maybe a better processor cache hit rate in searches. + +The offsets of the data fields are given as one-byte +(if there are less than 127 bytes of data in the record) +or two-byte unsigned integers. The most significant bit +is not part of the offset, instead it indicates the SQL-null +if the bit is set to 1. */ + +/* CANONICAL COORDINATES. A record can be seen as a single string of 'characters' in the following way: catenate the bytes in each field, in the order of fields. An SQL-null field is taken to be an empty sequence of bytes. Then after @@ -86,13 +127,291 @@ the corresponding canonical strings have the same property. */ ulint rec_dummy; /* this is used to fool compiler in rec_validate */ +/******************************************************************* +Validates the consistency of an old-style physical record. */ +static +ibool +rec_validate_old( +/*=============*/ + /* out: TRUE if ok */ + rec_t* rec); /* in: physical record */ + +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are written to an array of +ulint[n+2], with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ +static +void +rec_init_offsets( +/*=============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets)/* in:/out: ulint[n+2]; + n=rec_offs_n_fields(offsets) */ +{ + ulint n_fields = rec_offs_n_fields(offsets); + ulint i = 0; + ulint offs; + + rec_offs_make_valid(rec, index, offsets); + + if (index->table->comp) { + const byte* nulls; + const byte* lens; + dict_field_t* field; + dtype_t* type; + ulint null_mask; + ulint status = rec_get_status(rec); + ulint n_node_ptr_field = ULINT_UNDEFINED; + + switch (status) { + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* the field is 8 bytes long */ + rec_offs_base(offsets)[0] = + REC_N_NEW_EXTRA_BYTES | REC_OFFS_COMPACT; + rec_offs_base(offsets)[1] = 8; + return; + case REC_STATUS_NODE_PTR: + n_node_ptr_field = + dict_index_get_n_unique_in_tree(index); + break; + case REC_STATUS_ORDINARY: + break; + } + + nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + lens = nulls - (index->n_nullable + 7) / 8; + offs = 0; + null_mask = 1; + + /* read the lengths of fields 0..n */ + for (; i < n_fields; i++) { + ibool is_null = FALSE, is_external = FALSE; + ulint len; + if (i == n_node_ptr_field) { + len = 4; + goto resolved; + } + + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + /* nullable field => read the null flag */ + is_null = (*nulls & null_mask) != 0; + null_mask <<= 1; + if (null_mask == 0x100) { + nulls--; + null_mask = 1; + } + } + + if (is_null) { + /* No length is stored for NULL fields. */ + len = 0; + } else if (!field->fixed_len) { + /* Variable-length field: read the length */ + len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { + /* 1exxxxxxx xxxxxxxx */ + is_external = !!(len & 0x40); + len &= 0x3f; + len <<= 8; + len |= *lens--; + } + } + } else { + len = field->fixed_len; + } + resolved: + offs += len; + len = offs; + if (is_external) { + len |= REC_OFFS_EXTERNAL; + } + if (is_null) { + len |= REC_OFFS_SQL_NULL; + } + rec_offs_base(offsets)[i + 1] = len; + } + + *rec_offs_base(offsets) = + (rec - (lens + 1)) | REC_OFFS_COMPACT; + } else { + /* Old-style record: determine extra size and end offsets */ + offs = REC_N_OLD_EXTRA_BYTES; + if (rec_get_1byte_offs_flag(rec)) { + offs += n_fields; + *rec_offs_base(offsets) = offs; + /* Determine offsets to fields */ + for (; i < n_fields; i++) { + offs = rec_1_get_field_end_info(rec, i); + if (offs & REC_1BYTE_SQL_NULL_MASK) { + offs &= ~REC_1BYTE_SQL_NULL_MASK; + offs |= REC_OFFS_SQL_NULL; + } + rec_offs_base(offsets)[1 + i] = offs; + } + } else { + offs += 2 * n_fields; + *rec_offs_base(offsets) = offs; + /* Determine offsets to fields */ + for (; i < n_fields; i++) { + offs = rec_2_get_field_end_info(rec, i); + if (offs & REC_2BYTE_SQL_NULL_MASK) { + offs &= ~REC_2BYTE_SQL_NULL_MASK; + offs |= REC_OFFS_SQL_NULL; + } + if (offs & REC_2BYTE_EXTERN_MASK) { + offs &= ~REC_2BYTE_EXTERN_MASK; + offs |= REC_OFFS_EXTERNAL; + } + rec_offs_base(offsets)[1 + i] = offs; + } + } + } +} + +/********************************************************** +The following function determines the offsets to each field +in the record. The offsets are returned in an array of +ulint, with [0] being the number of fields (n), [1] being the +extra size (if REC_OFFS_COMPACT is set, the record is in the new +format), and [2]..[n+1] being the offsets past the end of +fields 0..n, or to the beginning of fields 1..n+1. When the +high-order bit of the offset at [n+1] is set (REC_OFFS_SQL_NULL), +the field n is NULL. When the second high-order bit of the offset +at [n+1] is set (REC_OFFS_EXTERNAL), the field n is being stored +externally. */ + +ulint* +rec_get_offsets( +/*============*/ + /* out: the offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap) /* in: memory heap */ +{ + ulint* offsets; + ulint n; + + ut_ad(rec); + ut_ad(index); + ut_ad(heap); + + if (index->table->comp) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + n = dict_index_get_n_fields(index); + break; + case REC_STATUS_NODE_PTR: + n = dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record */ + n = 1; + break; + default: + ut_error; + return(NULL); + } + } else { + n = rec_get_n_fields_old(rec); + } + + if (n_fields < n) { + n = n_fields; + } + + offsets = mem_heap_alloc(heap, + (n + (1 + REC_OFFS_HEADER_SIZE)) * sizeof(ulint)); + + offsets[0] = n; + + rec_init_offsets(rec, index, offsets); + return(offsets); +} + +/********************************************************** +The following function determines the offsets to each field +in the record. It differs from rec_get_offsets() by trying to +reuse a previously returned array. */ + +ulint* +rec_reget_offsets( +/*==============*/ + /* out: the new offsets */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint* offsets,/* in: array of offsets + from rec_get_offsets() + or rec_reget_offsets(), or NULL */ + ulint n_fields,/* in: maximum number of initialized fields + (ULINT_UNDEFINED if all fields) */ + mem_heap_t* heap) /* in: memory heap */ +{ + ulint n; + + ut_ad(rec); + ut_ad(index); + ut_ad(heap); + + if (index->table->comp) { + switch (rec_get_status(rec)) { + case REC_STATUS_ORDINARY: + n = dict_index_get_n_fields(index); + break; + case REC_STATUS_NODE_PTR: + n = dict_index_get_n_unique_in_tree(index) + 1; + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record */ + n = 1; + break; + default: + ut_error; + return(NULL); + } + } else { + n = rec_get_n_fields_old(rec); + } + + if (n_fields < n) { + n = n_fields; + } + + if (!offsets || rec_offs_n_fields(offsets) < n) { + offsets = mem_heap_alloc(heap, + (n + (1 + REC_OFFS_HEADER_SIZE)) * sizeof(ulint)); + } + + offsets[0] = n; + + rec_init_offsets(rec, index, offsets); + return(offsets); +} + /**************************************************************** -The following function is used to get a pointer to the nth data field in a -record. */ +The following function is used to get a pointer to the nth +data field in an old-style record. */ byte* -rec_get_nth_field( -/*==============*/ +rec_get_nth_field_old( +/*==================*/ /* out: pointer to the field */ rec_t* rec, /* in: record */ ulint n, /* in: index of the field */ @@ -103,9 +422,9 @@ rec_get_nth_field( ulint next_os; ut_ad(rec && len); - ut_ad(n < rec_get_n_fields(rec)); + ut_ad(n < rec_get_n_fields_old(rec)); - if (n > 1024) { + if (n > REC_MAX_N_FIELDS) { fprintf(stderr, "Error: trying to access field %lu in rec\n", (ulong) n); ut_error; @@ -150,8 +469,78 @@ rec_get_nth_field( return(rec + os); } +/************************************************************** +The following function returns the size of a data tuple when converted to +a new-style physical record. */ + +ulint +rec_get_converted_size_new( +/*=======================*/ + /* out: size */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + ulint size = REC_N_NEW_EXTRA_BYTES + + (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint n_fields; + ut_ad(index && dtuple); + ut_ad(index->table->comp); + + switch (dtuple_get_info_bits(dtuple) & REC_NEW_STATUS_MASK) { + case REC_STATUS_ORDINARY: + n_fields = dict_index_get_n_fields(index); + ut_ad(n_fields == dtuple_get_n_fields(dtuple)); + break; + case REC_STATUS_NODE_PTR: + n_fields = dict_index_get_n_unique_in_tree(index); + ut_ad(n_fields + 1 == dtuple_get_n_fields(dtuple)); + ut_ad(dtuple_get_nth_field(dtuple, n_fields)->len == 4); + size += 4; /* child page number */ + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record, 8 bytes */ + return(size + 8); /* no extra data needed */ + default: + ut_a(0); + return(ULINT_UNDEFINED); + } + + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + ulint len = dtuple_get_nth_field(dtuple, i)->len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + ut_ad(len != UNIV_SQL_NULL || + !(dtype_get_prtype(type) & DATA_NOT_NULL)); + + if (len == UNIV_SQL_NULL) { + /* No length is stored for NULL fields. */ + continue; + } + + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + ut_ad(!field->fixed_len || len == field->fixed_len); + + if (field->fixed_len) { + } else if (len < 128 || (dtype_get_len(type) < 256 + && dtype_get_mtype(type) != DATA_BLOB)) { + size++; + } else { + size += 2; + } + size += len; + } + + return(size); +} + /*************************************************************** -Sets the value of the ith field SQL null bit. */ +Sets the value of the ith field SQL null bit of an old-style record. */ void rec_set_nth_field_null_bit( @@ -189,12 +578,12 @@ rec_set_nth_field_null_bit( } /*************************************************************** -Sets the value of the ith field extern storage bit. */ +Sets the value of the ith field extern storage bit of an old-style record. */ void -rec_set_nth_field_extern_bit( -/*=========================*/ - rec_t* rec, /* in: record */ +rec_set_nth_field_extern_bit_old( +/*=============================*/ + rec_t* rec, /* in: old-style record */ ulint i, /* in: ith field */ ibool val, /* in: value to set */ mtr_t* mtr) /* in: mtr holding an X-latch to the page where @@ -204,7 +593,7 @@ rec_set_nth_field_extern_bit( ulint info; ut_a(!rec_get_1byte_offs_flag(rec)); - ut_a(i < rec_get_n_fields(rec)); + ut_a(i < rec_get_n_fields_old(rec)); info = rec_2_get_field_end_info(rec, i); @@ -215,36 +604,138 @@ rec_set_nth_field_extern_bit( } if (mtr) { - mlog_write_ulint(rec - REC_N_EXTRA_BYTES - 2 * (i + 1), info, - MLOG_2BYTES, mtr); + mlog_write_ulint(rec - REC_N_OLD_EXTRA_BYTES - 2 * (i + 1), + info, MLOG_2BYTES, mtr); } else { rec_2_set_field_end_info(rec, i, info); } } +/*************************************************************** +Sets the value of the ith field extern storage bit of a new-style record. */ + +void +rec_set_nth_field_extern_bit_new( +/*=============================*/ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ulint ith, /* in: ith field */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr holding an X-latch to the page + where rec is, or NULL; in the NULL case + we do not write to log about the change */ +{ + byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + byte* lens = nulls - (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint n_fields; + ulint null_mask = 1; + ut_ad(rec && index); + ut_ad(index->table->comp); + ut_ad(rec_get_status(rec) == REC_STATUS_ORDINARY); + + n_fields = dict_index_get_n_fields(index); + + ut_ad(ith < n_fields); + + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + ibool is_null; + ulint len; + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); + if (is_null) { + /* nullable field => read the null flag */ + is_null = !!(*nulls & null_mask); + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + } + if (is_null || field->fixed_len) { + /* No length (or extern bit) is stored for + fields that are NULL or fixed-length. */ + ut_ad(i != ith); + continue; + } + len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { /* 1exxxxxx: 2-byte length */ + if (i == ith) { + if (!val == !(len & 0x20)) { + return; /* no change */ + } + /* toggle the extern bit */ + len ^= 0x40; + if (mtr) { + mlog_write_ulint(lens + 1, len, + MLOG_1BYTE, mtr); + } else { + lens[1] = len; + } + return; + } + lens--; + } else { + /* short fields cannot be external */ + ut_ad(i != ith); + } + } else { + /* short fields cannot be external */ + ut_ad(i != ith); + } + } +} + /*************************************************************** Sets TRUE the extern storage bits of fields mentioned in an array. */ void rec_set_field_extern_bits( /*======================*/ - rec_t* rec, /* in: record */ - ulint* vec, /* in: array of field numbers */ - ulint n_fields, /* in: number of fields numbers */ - mtr_t* mtr) /* in: mtr holding an X-latch to the page - where rec is, or NULL; in the NULL case we - do not write to log about the change */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + const ulint* vec, /* in: array of field numbers */ + ulint n_fields,/* in: number of fields numbers */ + mtr_t* mtr) /* in: mtr holding an X-latch to the + page where rec is, or NULL; + in the NULL case we do not write + to log about the change */ { ulint i; for (i = 0; i < n_fields; i++) { - rec_set_nth_field_extern_bit(rec, vec[i], TRUE, mtr); + rec_set_nth_field_extern_bit(rec, index, vec[i], TRUE, mtr); } } +/************************************************************** +Returns the total size of a physical record. */ + +ulint +rec_get_size( +/*=========*/ + /* out: size */ + rec_t* rec, /* in: physical record */ + dict_index_t* index) /* in: record descriptor */ +{ + mem_heap_t* heap + = mem_heap_create(100); + ulint* offsets + = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + ulint size + = rec_offs_size(offsets); + + mem_heap_free(heap); + return(size); +} + /*************************************************************** -Sets a record field to SQL null. The physical size of the field is not -changed. */ +Sets an old-style record field to SQL null. +The physical size of the field is not changed. */ void rec_set_nth_field_sql_null( @@ -262,20 +753,20 @@ rec_set_nth_field_sql_null( } /************************************************************* -Builds a physical record out of a data tuple and stores it beginning from -address destination. */ - -rec_t* -rec_convert_dtuple_to_rec_low( +Builds an old-style physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ +static +rec_t* +rec_convert_dtuple_to_rec_old( /*==========================*/ - /* out: pointer to the origin of physical - record */ - byte* destination, /* in: start address of the physical record */ - dtuple_t* dtuple, /* in: data tuple */ - ulint data_size) /* in: data size of dtuple */ + /* out: pointer to the origin of + physical record */ + byte* buf, /* in: start address of the physical record */ + dtuple_t* dtuple)/* in: data tuple */ { dfield_t* field; ulint n_fields; + ulint data_size; rec_t* rec; ulint end_offset; ulint ored_offset; @@ -283,24 +774,25 @@ rec_convert_dtuple_to_rec_low( ulint len; ulint i; - ut_ad(destination && dtuple); + ut_ad(buf && dtuple); ut_ad(dtuple_validate(dtuple)); ut_ad(dtuple_check_typed(dtuple)); - ut_ad(dtuple_get_data_size(dtuple) == data_size); n_fields = dtuple_get_n_fields(dtuple); + data_size = dtuple_get_data_size(dtuple); ut_ad(n_fields > 0); /* Calculate the offset of the origin in the physical record */ - rec = destination + rec_get_converted_extra_size(data_size, n_fields); + rec = buf + rec_get_converted_extra_size(data_size, n_fields); /* Store the number of fields */ - rec_set_n_fields(rec, n_fields); + rec_set_n_fields_old(rec, n_fields); /* Set the info bits of the record */ - rec_set_info_bits(rec, dtuple_get_info_bits(dtuple)); + rec_set_info_bits(rec, FALSE, + dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); /* Store the data and the offsets */ @@ -361,11 +853,194 @@ rec_convert_dtuple_to_rec_low( } } - ut_ad(rec_validate(rec)); + return(rec); +} + +/************************************************************* +Builds a new-style physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ +static +rec_t* +rec_convert_dtuple_to_rec_new( +/*==========================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + dfield_t* field; + dtype_t* type; + rec_t* rec = buf + REC_N_NEW_EXTRA_BYTES; + byte* end; + byte* nulls; + byte* lens; + ulint len; + ulint i; + ulint fixed_len; + ulint null_mask = 1; + const ulint n_fields = dtuple_get_n_fields(dtuple); + const ulint status = dtuple_get_info_bits(dtuple) + & REC_NEW_STATUS_MASK; + ut_ad(index->table->comp); + + ut_ad(n_fields > 0); + switch (status) { + case REC_STATUS_ORDINARY: + ut_ad(n_fields <= dict_index_get_n_fields(index)); + break; + case REC_STATUS_NODE_PTR: + ut_ad(n_fields == dict_index_get_n_unique_in_tree(index) + 1); + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + ut_ad(n_fields == 1); + goto init; + default: + ut_a(0); + return(0); + } + + /* Calculate the offset of the origin in the physical record. + We must loop over all fields to do this. */ + rec += (index->n_nullable + 7) / 8; + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + len = dfield_get_len(field); + if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { + fixed_len = 4; + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(len == 4); + continue; + } + fixed_len = dict_index_get_nth_field(index, i)->fixed_len; + + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + if (len == UNIV_SQL_NULL) + continue; + } + /* only nullable fields can be null */ + ut_ad(len != UNIV_SQL_NULL); + if (fixed_len) { + ut_ad(len == fixed_len); + } else { + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + rec++; + if (len >= 128 && (dtype_get_len(type) >= 256 + || dtype_get_mtype(type) == DATA_BLOB)) { + rec++; + } + } + } + +init: + end = rec; + nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + lens = nulls - (index->n_nullable + 7) / 8; + /* clear the SQL-null flags */ + memset (lens + 1, 0, nulls - lens); + + /* Set the info bits of the record */ + rec_set_status(rec, status); + + rec_set_info_bits(rec, TRUE, + dtuple_get_info_bits(dtuple) & REC_INFO_BITS_MASK); + + /* Store the data and the offsets */ + + for (i = 0; i < n_fields; i++) { + field = dtuple_get_nth_field(dtuple, i); + type = dfield_get_type(field); + len = dfield_get_len(field); + + if (status == REC_STATUS_NODE_PTR && i == n_fields - 1) { + fixed_len = 4; + ut_ad(dtype_get_prtype(type) & DATA_NOT_NULL); + ut_ad(len == 4); + goto copy; + } + fixed_len = dict_index_get_nth_field(index, i)->fixed_len; + + if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) { + /* nullable field */ + ut_ad(index->n_nullable > 0); + ut_ad(*nulls < null_mask); + /* set the null flag if necessary */ + if (len == UNIV_SQL_NULL) { + *nulls |= null_mask; + } + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + if (len == UNIV_SQL_NULL) + continue; + } + /* only nullable fields can be null */ + ut_ad(len != UNIV_SQL_NULL); + if (fixed_len) { + ut_ad(len == fixed_len); + } else { + ut_ad(len <= dtype_get_len(type) + || dtype_get_mtype(type) == DATA_BLOB); + if (len < 128 || (dtype_get_len(type) < 256 + && dtype_get_mtype(type) != DATA_BLOB)) { + *lens-- = len; + } + else { + /* the extern bits will be set later */ + ut_ad(len < 16384); + *lens-- = len >> 8 | 0x80; + *lens-- = len; + } + } + copy: + memcpy(end, dfield_get_data(field), len); + end += len; + } return(rec); } +/************************************************************* +Builds a physical record out of a data tuple and +stores it beginning from the start of the given buffer. */ + +rec_t* +rec_convert_dtuple_to_rec( +/*======================*/ + /* out: pointer to the origin + of physical record */ + byte* buf, /* in: start address of the + physical record */ + dict_index_t* index, /* in: record descriptor */ + dtuple_t* dtuple) /* in: data tuple */ +{ + rec_t* rec; + + ut_ad(buf && index && dtuple); + ut_ad(dtuple_validate(dtuple)); + ut_ad(dtuple_check_typed(dtuple)); + + if (index->table->comp) { + rec = rec_convert_dtuple_to_rec_new(buf, index, dtuple); + } else { + rec = rec_convert_dtuple_to_rec_old(buf, dtuple); + } + +#ifdef UNIV_DEBUG + { + mem_heap_t* heap = mem_heap_create(100); + ut_ad(rec_validate(rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap))); + mem_heap_free(heap); + } +#endif /* UNIV_DEBUG */ + return(rec); +} + /****************************************************************** Copies the first n fields of a physical record to a data tuple. The fields are copied to the memory heap. */ @@ -375,6 +1050,7 @@ rec_copy_prefix_to_dtuple( /*======================*/ dtuple_t* tuple, /* in: data tuple */ rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ ulint n_fields, /* in: number of fields to copy */ mem_heap_t* heap) /* in: memory heap */ { @@ -383,16 +1059,20 @@ rec_copy_prefix_to_dtuple( ulint len; byte* buf = NULL; ulint i; - - ut_ad(rec_validate(rec)); + ulint* offsets; + + offsets = rec_get_offsets(rec, index, n_fields, heap); + + ut_ad(rec_validate(rec, offsets)); ut_ad(dtuple_check_typed(tuple)); - dtuple_set_info_bits(tuple, rec_get_info_bits(rec)); + dtuple_set_info_bits(tuple, + rec_get_info_bits(rec, index->table->comp)); for (i = 0; i < n_fields; i++) { field = dtuple_get_nth_field(tuple, i); - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); if (len != UNIV_SQL_NULL) { buf = mem_heap_alloc(heap, len); @@ -405,32 +1085,28 @@ rec_copy_prefix_to_dtuple( } /****************************************************************** -Copies the first n fields of a physical record to a new physical record in -a buffer. */ - +Copies the first n fields of an old-style physical record +to a new physical record in a buffer. */ +static rec_t* -rec_copy_prefix_to_buf( -/*===================*/ +rec_copy_prefix_to_buf_old( +/*=======================*/ /* out, own: copied record */ rec_t* rec, /* in: physical record */ ulint n_fields, /* in: number of fields to copy */ + ulint area_end, /* in: end of the prefix data */ byte** buf, /* in/out: memory buffer for the copied prefix, or NULL */ ulint* buf_size) /* in/out: buffer size */ { rec_t* copy_rec; ulint area_start; - ulint area_end; ulint prefix_len; - ut_ad(rec_validate(rec)); - - area_end = rec_get_field_start_offs(rec, n_fields); - if (rec_get_1byte_offs_flag(rec)) { - area_start = REC_N_EXTRA_BYTES + n_fields; + area_start = REC_N_OLD_EXTRA_BYTES + n_fields; } else { - area_start = REC_N_EXTRA_BYTES + 2 * n_fields; + area_start = REC_N_OLD_EXTRA_BYTES + 2 * n_fields; } prefix_len = area_start + area_end; @@ -448,17 +1124,114 @@ rec_copy_prefix_to_buf( copy_rec = *buf + area_start; - rec_set_n_fields(copy_rec, n_fields); + rec_set_n_fields_old(copy_rec, n_fields); return(copy_rec); } -/******************************************************************* -Validates the consistency of a physical record. */ +/****************************************************************** +Copies the first n fields of a physical record to a new physical record in +a buffer. */ + +rec_t* +rec_copy_prefix_to_buf( +/*===================*/ + /* out, own: copied record */ + rec_t* rec, /* in: physical record */ + dict_index_t* index, /* in: record descriptor */ + ulint n_fields, /* in: number of fields to copy */ + byte** buf, /* in/out: memory buffer + for the copied prefix, or NULL */ + ulint* buf_size) /* in/out: buffer size */ +{ + byte* nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1); + byte* lens = nulls - (index->n_nullable + 7) / 8; + dict_field_t* field; + dtype_t* type; + ulint i; + ulint prefix_len = 0; + ibool is_null; + ulint null_mask = 1; + ulint status; + + if (!index->table->comp) { + ut_ad(rec_validate_old(rec)); + return(rec_copy_prefix_to_buf_old(rec, n_fields, + rec_get_field_start_offs(rec, n_fields), + buf, buf_size)); + } + + status = rec_get_status(rec); + + switch (status) { + case REC_STATUS_ORDINARY: + ut_ad(n_fields <= dict_index_get_n_fields(index)); + break; + case REC_STATUS_NODE_PTR: + /* it doesn't make sense to copy the child page number field */ + ut_ad(n_fields <= dict_index_get_n_unique_in_tree(index)); + break; + case REC_STATUS_INFIMUM: + case REC_STATUS_SUPREMUM: + /* infimum or supremum record: no sense to copy anything */ + default: + ut_a(0); + return(NULL); + } + /* read the lengths of fields 0..n */ + for (i = 0; i < n_fields; i++) { + field = dict_index_get_nth_field(index, i); + type = dict_col_get_type(dict_field_get_col(field)); + is_null = !(dtype_get_prtype(type) & DATA_NOT_NULL); + if (is_null) { + /* nullable field => read the null flag */ + is_null = !!(*nulls & null_mask); + null_mask <<= 1; + if (null_mask == 0x100) + nulls--, null_mask = 1; + } + + if (is_null) { + } else if (field->fixed_len) { + prefix_len += field->fixed_len; + } else { + ulint len = *lens--; + if (dtype_get_len(type) > 255 + || dtype_get_mtype(type) == DATA_BLOB) { + if (len & 0x80) { + /* 1exxxxxx */ + len &= 0x3f; + len <<= 8; + len |= *lens--; + } + } + prefix_len += len; + } + } + + prefix_len += rec - (lens + 1); + + if ((*buf == NULL) || (*buf_size < prefix_len)) { + if (*buf != NULL) { + mem_free(*buf); + } + + *buf = mem_alloc(prefix_len); + *buf_size = prefix_len; + } + + memcpy(*buf, lens + 1, prefix_len); + + return(*buf + (rec - (lens + 1))); +} + +/******************************************************************* +Validates the consistency of an old-style physical record. */ +static ibool -rec_validate( -/*=========*/ +rec_validate_old( +/*=============*/ /* out: TRUE if ok */ rec_t* rec) /* in: physical record */ { @@ -470,7 +1243,7 @@ rec_validate( ulint i; ut_a(rec); - n_fields = rec_get_n_fields(rec); + n_fields = rec_get_n_fields_old(rec); if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { fprintf(stderr, "InnoDB: Error: record has %lu fields\n", @@ -479,7 +1252,7 @@ rec_validate( } for (i = 0; i < n_fields; i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field_old(rec, i, &len); if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { fprintf(stderr, @@ -499,45 +1272,165 @@ rec_validate( } } - if (len_sum != (ulint)(rec_get_end(rec) - rec)) { + if (len_sum != rec_get_data_size_old(rec)) { fprintf(stderr, "InnoDB: Error: record len should be %lu, len %lu\n", (ulong) len_sum, - (ulong) (rec_get_end(rec) - rec)); + rec_get_data_size_old(rec)); + return(FALSE); + } + + rec_dummy = sum; /* This is here only to fool the compiler */ + + return(TRUE); +} + +/******************************************************************* +Validates the consistency of a physical record. */ + +ibool +rec_validate( +/*=========*/ + /* out: TRUE if ok */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ +{ + const byte* data; + ulint len; + ulint n_fields; + ulint len_sum = 0; + ulint sum = 0; + ulint i; + + ut_a(rec); + n_fields = rec_offs_n_fields(offsets); + + if ((n_fields == 0) || (n_fields > REC_MAX_N_FIELDS)) { + fprintf(stderr, "InnoDB: Error: record has %lu fields\n", + (ulong) n_fields); + return(FALSE); + } + + ut_a(rec_offs_comp(offsets) || n_fields <= rec_get_n_fields_old(rec)); + + for (i = 0; i < n_fields; i++) { + data = rec_get_nth_field(rec, offsets, i, &len); + + if (!((len < UNIV_PAGE_SIZE) || (len == UNIV_SQL_NULL))) { + fprintf(stderr, + "InnoDB: Error: record field %lu len %lu\n", (ulong) i, + (ulong) len); + return(FALSE); + } + + if (len != UNIV_SQL_NULL) { + len_sum += len; + sum += *(data + len -1); /* dereference the + end of the field to + cause a memory trap + if possible */ + } else if (!rec_offs_comp(offsets)) { + len_sum += rec_get_nth_field_size(rec, i); + } + } + + if (len_sum != (ulint)(rec_get_end(rec, offsets) - rec)) { + fprintf(stderr, + "InnoDB: Error: record len should be %lu, len %lu\n", + (ulong) len_sum, + (ulong) (rec_get_end(rec, offsets) - rec)); return(FALSE); } rec_dummy = sum; /* This is here only to fool the compiler */ + if (!rec_offs_comp(offsets)) { + ut_a(rec_validate_old(rec)); + } + return(TRUE); } +/******************************************************************* +Prints an old-style physical record. */ + +void +rec_print_old( +/*==========*/ + FILE* file, /* in: file where to print */ + rec_t* rec) /* in: physical record */ +{ + const byte* data; + ulint len; + ulint n; + ulint i; + + ut_ad(rec); + + n = rec_get_n_fields_old(rec); + + fprintf(file, "PHYSICAL RECORD: n_fields %lu;" + " %u-byte offsets; info bits %lu\n", + (ulong) n, + rec_get_1byte_offs_flag(rec) ? 1 : 2, + (ulong) rec_get_info_bits(rec, FALSE)); + + for (i = 0; i < n; i++) { + + data = rec_get_nth_field_old(rec, i, &len); + + fprintf(file, " %lu:", (ulong) i); + + if (len != UNIV_SQL_NULL) { + if (len <= 30) { + + ut_print_buf(file, data, len); + } else { + ut_print_buf(file, data, 30); + + fputs("...(truncated)", file); + } + } else { + fprintf(file, " SQL NULL, size %lu ", + rec_get_nth_field_size(rec, i)); + } + putc(';', file); + } + + putc('\n', file); + + rec_validate_old(rec); +} + /******************************************************************* Prints a physical record. */ void rec_print( /*======*/ - FILE* file, /* in: file where to print */ - rec_t* rec) /* in: physical record */ + FILE* file, /* in: file where to print */ + rec_t* rec, /* in: physical record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { - byte* data; - ulint len; - ulint n; - ulint i; + const byte* data; + ulint len; + ulint i; + + if (!rec_offs_comp(offsets)) { + rec_print_old(file, rec); + return; + } ut_ad(rec); - - n = rec_get_n_fields(rec); fprintf(file, "PHYSICAL RECORD: n_fields %lu;" - " 1-byte offs %s; info bits %lu\n", - (ulong) n, rec_get_1byte_offs_flag(rec) ? "TRUE" : "FALSE", - (ulong) rec_get_info_bits(rec)); + " compact format; info bits %lu\n", + (ulong) rec_offs_n_fields(offsets), + (ulong) rec_get_info_bits(rec, TRUE)); - for (i = 0; i < n; i++) { + for (i = 0; i < rec_offs_n_fields(offsets); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); fprintf(file, " %lu:", (ulong) i); @@ -551,14 +1444,12 @@ rec_print( fputs("...(truncated)", file); } } else { - fprintf(file, " SQL NULL, size %lu ", - (ulong) rec_get_nth_field_size(rec, i)); - + fputs(" SQL NULL", file); } putc(';', file); } putc('\n', file); - rec_validate(rec); + rec_validate(rec, offsets); } diff --git a/innobase/row/row0ins.c b/innobase/row/row0ins.c index 6d1482b6720..c3149401f3c 100644 --- a/innobase/row/row0ins.c +++ b/innobase/row/row0ins.c @@ -251,7 +251,7 @@ row_ins_sec_index_entry_by_modify( rec = btr_cur_get_rec(cursor); ut_ad((cursor->index->type & DICT_CLUSTERED) == 0); - ut_ad(rec_get_deleted_flag(rec)); + ut_ad(rec_get_deleted_flag(rec, cursor->index->table->comp)); /* We know that in the alphabetical ordering, entry and rec are identified. But in their binary form there may be differences if @@ -316,7 +316,7 @@ row_ins_clust_index_entry_by_modify( rec = btr_cur_get_rec(cursor); - ut_ad(rec_get_deleted_flag(rec)); + ut_ad(rec_get_deleted_flag(rec, cursor->index->table->comp)); heap = mem_heap_create(1024); @@ -588,8 +588,16 @@ row_ins_foreign_report_err( fputs(", in index ", ef); ut_print_name(ef, trx, foreign->foreign_index->name); if (rec) { + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, foreign->foreign_index, + ULINT_UNDEFINED, heap); + fputs(", there is a record:\n", ef); - rec_print(ef, rec); + rec_print(ef, rec, offsets); + mem_heap_free(heap); } else { fputs(", the record is not available\n", ef); } @@ -644,7 +652,16 @@ row_ins_foreign_report_add_err( } if (rec) { - rec_print(ef, rec); + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, foreign->foreign_index, + ULINT_UNDEFINED, heap); + + rec_print(ef, rec, offsets); + + mem_heap_free(heap); } putc('\n', ef); @@ -706,7 +723,6 @@ row_ins_foreign_check_on_constraint( dict_index_t* index; dict_index_t* clust_index; dtuple_t* ref; - mem_heap_t* tmp_heap; mem_heap_t* upd_vec_heap = NULL; rec_t* rec; rec_t* clust_rec; @@ -715,8 +731,9 @@ row_ins_foreign_check_on_constraint( ulint err; ulint i; trx_t* trx; + mem_heap_t* tmp_heap = NULL; + ulint* offsets; - ut_a(thr && foreign && pcur && mtr); trx = thr_get_trx(thr); @@ -816,7 +833,7 @@ row_ins_foreign_check_on_constraint( err = DB_ROW_IS_REFERENCED; row_ins_foreign_report_err( -(char*)"Trying a too deep cascaded delete or update\n", +"Trying a too deep cascaded delete or update\n", thr, foreign, btr_pcur_get_rec(pcur), entry); goto nonstandard_exit_func; @@ -848,8 +865,6 @@ row_ins_foreign_check_on_constraint( PAGE_CUR_LE, BTR_SEARCH_LEAF, cascade->pcur, 0, mtr); - mem_heap_free(tmp_heap); - clust_rec = btr_pcur_get_rec(cascade->pcur); if (!page_rec_is_user_rec(clust_rec) @@ -863,10 +878,14 @@ row_ins_foreign_check_on_constraint( fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, tmp_heap); + rec_print(stderr, rec, offsets); fputs("\n" "InnoDB: clustered record ", stderr); - rec_print(stderr, clust_rec); + offsets = rec_reget_offsets(clust_rec, clust_index, + offsets, ULINT_UNDEFINED, tmp_heap); + rec_print(stderr, clust_rec, offsets); fputs("\n" "InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n", stderr); @@ -884,9 +903,14 @@ row_ins_foreign_check_on_constraint( /* Here it suffices to use a LOCK_REC_NOT_GAP type lock; we already have a normal shared lock on the appropriate gap if the search criterion was not unique */ - + + if (!tmp_heap) { + tmp_heap = mem_heap_create(256); + } + offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, tmp_heap); err = lock_clust_rec_read_check_and_lock(0, clust_rec, - clust_index, LOCK_X, LOCK_REC_NOT_GAP, thr); + clust_index, offsets, LOCK_X, LOCK_REC_NOT_GAP, thr); } if (err != DB_SUCCESS) { @@ -894,7 +918,7 @@ row_ins_foreign_check_on_constraint( goto nonstandard_exit_func; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, table->comp)) { /* This can happen if there is a circular reference of rows such that cascading delete comes to delete a row already in the process of being delete marked */ @@ -1003,6 +1027,10 @@ row_ins_foreign_check_on_constraint( btr_pcur_restore_position(BTR_SEARCH_LEAF, pcur, mtr); + if (tmp_heap) { + mem_heap_free(tmp_heap); + } + if (upd_vec_heap) { mem_heap_free(upd_vec_heap); } @@ -1010,6 +1038,9 @@ row_ins_foreign_check_on_constraint( return(err); nonstandard_exit_func: + if (tmp_heap) { + mem_heap_free(tmp_heap); + } if (upd_vec_heap) { mem_heap_free(upd_vec_heap); @@ -1037,16 +1068,19 @@ row_ins_set_shared_rec_lock( LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_S, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_S, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_S, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_S, type, thr); } return(err); @@ -1064,16 +1098,19 @@ row_ins_set_exclusive_rec_lock( LOCK_REC_NOT_GAP type lock */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ que_thr_t* thr) /* in: query thread */ { ulint err; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, LOCK_X, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_X, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, LOCK_X, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, LOCK_X, type, thr); } return(err); @@ -1114,6 +1151,10 @@ row_ins_check_foreign_constraint( ulint i; mtr_t mtr; trx_t* trx = thr_get_trx(thr); + mem_heap_t* heap; + ulint* offsets = NULL; + + heap = mem_heap_create(100); run_again: #ifdef UNIV_SYNC_DEBUG @@ -1125,7 +1166,7 @@ run_again: if (trx->check_foreigns == FALSE) { /* The user has suppressed foreign key checks currently for this session */ - + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1137,6 +1178,7 @@ run_again: if (UNIV_SQL_NULL == dfield_get_len( dtuple_get_nth_field(entry, i))) { + mem_heap_free(heap); return(DB_SUCCESS); } } @@ -1160,7 +1202,8 @@ run_again: with each foreign key constraint, one after another, and the user has problems predicting in which order they are performed. */ - + + mem_heap_free(heap); return(DB_SUCCESS); } } @@ -1174,6 +1217,8 @@ run_again: } if (check_table == NULL) { + mem_heap_free(heap); + if (check_ref) { FILE* ef = dict_foreign_err_file; mutex_enter(&dict_foreign_err_mutex); @@ -1244,10 +1289,13 @@ run_again: goto next_rec; } + offsets = rec_reget_offsets(rec, check_index, + offsets, ULINT_UNDEFINED, heap); + if (rec == page_get_supremum_rec(buf_frame_align(rec))) { - + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, rec, - check_index, thr); + check_index, offsets, thr); if (err != DB_SUCCESS) { break; @@ -1256,29 +1304,30 @@ run_again: goto next_rec; } - cmp = cmp_dtuple_rec(entry, rec); + cmp = cmp_dtuple_rec(entry, rec, offsets); if (cmp == 0) { - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, + rec_offs_comp(offsets))) { err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, - rec, check_index, thr); + LOCK_ORDINARY, rec, + check_index, offsets, thr); if (err != DB_SUCCESS) { break; } } else { /* Found a matching record */ + ulint lock_type; if (unique_search) { - err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP, - rec, check_index, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, - rec, check_index, thr); + lock_type = LOCK_ORDINARY; } + + err = row_ins_set_shared_rec_lock(lock_type, + rec, check_index, offsets, thr); if (err != DB_SUCCESS) { @@ -1315,7 +1364,7 @@ run_again: if (cmp < 0) { err = row_ins_set_shared_rec_lock(LOCK_GAP, - rec, check_index, thr); + rec, check_index, offsets, thr); if (err != DB_SUCCESS) { break; @@ -1373,6 +1422,7 @@ do_possible_lock_wait: err = trx->error_state; } + mem_heap_free(heap); return(err); } @@ -1444,19 +1494,23 @@ row_ins_dupl_error_with_rec( that the caller already has a record lock on the record! */ dtuple_t* entry, /* in: entry to insert */ - dict_index_t* index) /* in: index */ + dict_index_t* index, /* in: index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { ulint matched_fields; ulint matched_bytes; ulint n_unique; ulint i; - + + ut_ad(rec_offs_validate(rec, index, offsets)); + n_unique = dict_index_get_n_unique(index); matched_fields = 0; matched_bytes = 0; - cmp_dtuple_rec_with_match(entry, rec, &matched_fields, &matched_bytes); + cmp_dtuple_rec_with_match(entry, rec, offsets, + &matched_fields, &matched_bytes); if (matched_fields < n_unique) { @@ -1477,7 +1531,7 @@ row_ins_dupl_error_with_rec( } } - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { return(TRUE); } @@ -1509,7 +1563,9 @@ row_ins_scan_sec_index_for_duplicate( ibool moved; mtr_t mtr; trx_t* trx; - + mem_heap_t* heap; + ulint* offsets = NULL; + n_unique = dict_index_get_n_unique(index); /* If the secondary index is unique, but one of the fields in the @@ -1524,6 +1580,7 @@ row_ins_scan_sec_index_for_duplicate( } } + heap = mem_heap_create(100); mtr_start(&mtr); /* Store old value on n_fields_cmp */ @@ -1549,6 +1606,9 @@ row_ins_scan_sec_index_for_duplicate( trx = thr_get_trx(thr); ut_ad(trx); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (innobase_query_is_replace()) { /* The manual defines the REPLACE semantics that it @@ -1556,12 +1616,12 @@ row_ins_scan_sec_index_for_duplicate( + INSERT. Therefore, we should take X-lock for duplicates */ - err = row_ins_set_exclusive_rec_lock( - LOCK_ORDINARY,rec,index,thr); + err = row_ins_set_exclusive_rec_lock(LOCK_ORDINARY, + rec, index, offsets, thr); } else { - err = row_ins_set_shared_rec_lock( - LOCK_ORDINARY, rec, index,thr); + err = row_ins_set_shared_rec_lock(LOCK_ORDINARY, + rec, index, offsets, thr); } if (err != DB_SUCCESS) { @@ -1574,10 +1634,11 @@ row_ins_scan_sec_index_for_duplicate( goto next_rec; } - cmp = cmp_dtuple_rec(entry, rec); + cmp = cmp_dtuple_rec(entry, rec, offsets); if (cmp == 0) { - if (row_ins_dupl_error_with_rec(rec, entry, index)) { + if (row_ins_dupl_error_with_rec(rec, entry, + index, offsets)) { err = DB_DUPLICATE_KEY; thr_get_trx(thr)->error_info = index; @@ -1599,6 +1660,7 @@ next_rec: } } + mem_heap_free(heap); mtr_commit(&mtr); /* Restore old value */ @@ -1656,6 +1718,12 @@ row_ins_duplicate_error_in_clust( page = buf_frame_align(rec); if (rec != page_get_infimum_rec(page)) { + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap); /* We set a lock on the possible duplicate: this is needed in logical logging of MySQL to make @@ -1671,24 +1739,26 @@ row_ins_duplicate_error_in_clust( err = row_ins_set_exclusive_rec_lock( LOCK_REC_NOT_GAP,rec,cursor->index, - thr); + offsets, thr); } else { err = row_ins_set_shared_rec_lock( LOCK_REC_NOT_GAP,rec, cursor->index, - thr); + offsets, thr); } if (err != DB_SUCCESS) { - + mem_heap_free(heap); return(err); } if (row_ins_dupl_error_with_rec(rec, entry, - cursor->index)) { + cursor->index, offsets)) { trx->error_info = cursor->index; + mem_heap_free(heap); return(DB_DUPLICATE_KEY); } + mem_heap_free(heap); } } @@ -1698,7 +1768,12 @@ row_ins_duplicate_error_in_clust( page = buf_frame_align(rec); if (rec != page_get_supremum_rec(page)) { + mem_heap_t* heap; + ulint* offsets; + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap); /* The manual defines the REPLACE semantics that it is either an INSERT or DELETE(s) for duplicate key @@ -1708,25 +1783,27 @@ row_ins_duplicate_error_in_clust( if (innobase_query_is_replace()) { err = row_ins_set_exclusive_rec_lock( - LOCK_REC_NOT_GAP, - rec,cursor->index,thr); + LOCK_REC_NOT_GAP, rec, + cursor->index, offsets, thr); } else { err = row_ins_set_shared_rec_lock( - LOCK_REC_NOT_GAP,rec, - cursor->index, thr); + LOCK_REC_NOT_GAP, rec, + cursor->index, offsets, thr); } if (err != DB_SUCCESS) { - + mem_heap_free(heap); return(err); } if (row_ins_dupl_error_with_rec(rec, entry, - cursor->index)) { + cursor->index, offsets)) { trx->error_info = cursor->index; + mem_heap_free(heap); return(DB_DUPLICATE_KEY); } + mem_heap_free(heap); } ut_a(!(cursor->index->type & DICT_CLUSTERED)); @@ -1815,6 +1892,8 @@ row_ins_index_entry_low( ulint n_unique; big_rec_t* big_rec = NULL; mtr_t mtr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; log_free_check(); @@ -1847,8 +1926,9 @@ row_ins_index_entry_low( buf_frame_align(btr_cur_get_rec(&cursor)))); if (!page_rec_is_supremum(first_rec)) { - ut_a((rec_get_n_fields(first_rec)) - == dtuple_get_n_fields(entry)); + offsets = rec_get_offsets(first_rec, index, + ULINT_UNDEFINED, heap); + ut_a(rec_offs_n_fields(offsets) == dtuple_get_n_fields(entry)); } n_unique = dict_index_get_n_unique(index); @@ -1926,7 +2006,7 @@ row_ins_index_entry_low( if (err == DB_SUCCESS) { if (ext_vec) { - rec_set_field_extern_bits(insert_rec, + rec_set_field_extern_bits(insert_rec, index, ext_vec, n_ext_vec, &mtr); } } @@ -1936,14 +2016,18 @@ function_exit: mtr_commit(&mtr); if (big_rec) { + rec_t* rec; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, BTR_MODIFY_TREE, &cursor, 0, &mtr); + rec = btr_cur_get_rec(&cursor); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + + err = btr_store_big_rec_extern_fields(index, rec, + offsets, big_rec, &mtr); - err = btr_store_big_rec_extern_fields(index, - btr_cur_get_rec(&cursor), - big_rec, &mtr); if (modify) { dtuple_big_rec_free(big_rec); } else { @@ -1953,6 +2037,7 @@ function_exit: mtr_commit(&mtr); } + mem_heap_free(heap); return(err); } diff --git a/innobase/row/row0mysql.c b/innobase/row/row0mysql.c index adf9d6695bc..be243b44488 100644 --- a/innobase/row/row0mysql.c +++ b/innobase/row/row0mysql.c @@ -3226,7 +3226,8 @@ row_scan_and_check_index( int cmp; ibool contains_null; ulint i; - + ulint* offsets = NULL; + *n_rows = 0; buf = mem_alloc(UNIV_PAGE_SIZE); @@ -3266,8 +3267,10 @@ loop: if (prev_entry != NULL) { matched_fields = 0; matched_bytes = 0; - - cmp = cmp_dtuple_rec_with_match(prev_entry, rec, + + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + cmp = cmp_dtuple_rec_with_match(prev_entry, rec, offsets, &matched_fields, &matched_bytes); contains_null = FALSE; @@ -3296,7 +3299,7 @@ loop: dtuple_print(stderr, prev_entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); is_ok = FALSE; } else if ((index->type & DICT_UNIQUE) @@ -3310,6 +3313,7 @@ loop: } mem_heap_empty(heap); + offsets = NULL; prev_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); @@ -3394,7 +3398,7 @@ row_check_table_for_mysql( /* We validate also the whole adaptive hash index for all tables at every CHECK TABLE */ - if (!btr_search_validate()) { + if (!btr_search_validate(index)) { ret = DB_ERROR; } diff --git a/innobase/row/row0purge.c b/innobase/row/row0purge.c index f7e01169b9d..109d0f3b976 100644 --- a/innobase/row/row0purge.c +++ b/innobase/row/row0purge.c @@ -99,6 +99,8 @@ row_purge_remove_clust_if_poss_low( ibool success; ulint err; mtr_t mtr; + rec_t* rec; + mem_heap_t* heap; index = dict_table_get_first_index(node->table); @@ -117,15 +119,21 @@ row_purge_remove_clust_if_poss_low( return(TRUE); } + rec = btr_pcur_get_rec(pcur); + heap = mem_heap_create(100); + if (0 != ut_dulint_cmp(node->roll_ptr, - row_get_rec_roll_ptr(btr_pcur_get_rec(pcur), index))) { - + row_get_rec_roll_ptr(rec, index, rec_get_offsets( + rec, index, ULINT_UNDEFINED, heap)))) { + mem_heap_free(heap); /* Someone else has modified the record later: do not remove */ btr_pcur_commit_specify_mtr(pcur, &mtr); return(TRUE); } + mem_heap_free(heap); + if (mode == BTR_MODIFY_LEAF) { success = btr_cur_optimistic_delete(btr_cur, &mtr); } else { diff --git a/innobase/row/row0row.c b/innobase/row/row0row.c index 38714b0c49b..9cf285a519d 100644 --- a/innobase/row/row0row.c +++ b/innobase/row/row0row.c @@ -37,17 +37,18 @@ row_get_rec_sys_field( /* out: value of the field */ ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ - dict_index_t* index) /* in: clustered index */ + dict_index_t* index, /* in: clustered index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { - ulint pos; - byte* field; - ulint len; + ulint pos; + byte* field; + ulint len; ut_ad(index->type & DICT_CLUSTERED); pos = dict_index_get_sys_col_pos(index, type); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_TRX_ID) { @@ -70,6 +71,7 @@ row_set_rec_sys_field( ulint type, /* in: DATA_TRX_ID or DATA_ROLL_PTR */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ dulint val) /* in: value to set */ { ulint pos; @@ -77,10 +79,11 @@ row_set_rec_sys_field( ulint len; ut_ad(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); pos = dict_index_get_sys_col_pos(index, type); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); if (type == DATA_TRX_ID) { @@ -182,6 +185,9 @@ row_build( the buffer page of this record must be at least s-latched and the latch held as long as the row dtuple is used! */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) + or NULL, in which case this function + will invoke rec_get_offsets() */ mem_heap_t* heap) /* in: memory heap from which the memory needed is allocated */ { @@ -196,14 +202,26 @@ row_build( ulint row_len; byte* buf; ulint i; - + mem_heap_t* tmp_heap; + ut_ad(index && rec && heap); ut_ad(index->type & DICT_CLUSTERED); + if (!offsets) { + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, + ULINT_UNDEFINED, tmp_heap); + } else { + tmp_heap = NULL; + ut_ad(rec_offs_validate(rec, index, offsets)); + } + if (type != ROW_COPY_POINTERS) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); - rec = rec_copy(buf, rec); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, (ulint*) offsets); } table = index->table; @@ -211,11 +229,9 @@ row_build( row = dtuple_create(heap, row_len); - dtuple_set_info_bits(row, rec_get_info_bits(rec)); - - n_fields = dict_index_get_n_fields(index); + dtuple_set_info_bits(row, rec_get_info_bits(rec, table->comp)); - ut_ad(n_fields == rec_get_n_fields(rec)); + n_fields = rec_offs_n_fields(offsets); dict_table_copy_types(row, table); @@ -227,13 +243,13 @@ row_build( col = dict_field_get_col(ind_field); dfield = dtuple_get_nth_field(row, dict_col_get_no(col)); - field = rec_get_nth_field(rec, i, &len); + field = rec_get_nth_field(rec, offsets, i, &len); if (type == ROW_COPY_ALSO_EXTERNALS - && rec_get_nth_field_extern_bit(rec, i)) { + && rec_offs_nth_extern(offsets, i)) { field = btr_rec_copy_externally_stored_field( - rec, i, &len, heap); + rec, offsets, i, &len, heap); } dfield_set_data(dfield, field, len); @@ -242,6 +258,10 @@ row_build( ut_ad(dtuple_check_typed(row)); + if (tmp_heap) { + mem_heap_free(tmp_heap); + } + return(row); } @@ -276,16 +296,23 @@ row_rec_to_index_entry( ulint len; ulint rec_len; byte* buf; - + mem_heap_t* tmp_heap; + ulint* offsets; + ut_ad(rec && heap && index); + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, tmp_heap); + if (type == ROW_COPY_DATA) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); - rec = rec_copy(buf, rec); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, offsets); } - rec_len = rec_get_n_fields(rec); + rec_len = rec_offs_n_fields(offsets); entry = dtuple_create(heap, rec_len); @@ -295,17 +322,19 @@ row_rec_to_index_entry( dict_index_copy_types(entry, index, rec_len); - dtuple_set_info_bits(entry, rec_get_info_bits(rec)); + dtuple_set_info_bits(entry, + rec_get_info_bits(rec, rec_offs_comp(offsets))); for (i = 0; i < rec_len; i++) { dfield = dtuple_get_nth_field(entry, i); - field = rec_get_nth_field(rec, i, &len); + field = rec_get_nth_field(rec, offsets, i, &len); dfield_set_data(dfield, field, len); } ut_ad(dtuple_check_typed(entry)); + mem_heap_free(tmp_heap); return(entry); } @@ -345,15 +374,22 @@ row_build_row_ref( byte* buf; ulint clust_col_prefix_len; ulint i; + mem_heap_t* tmp_heap; + ulint* offsets; ut_ad(index && rec && heap); - + + tmp_heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, tmp_heap); + if (type == ROW_COPY_DATA) { /* Take a copy of rec to heap */ - buf = mem_heap_alloc(heap, rec_get_size(rec)); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); - rec = rec_copy(buf, rec); + rec = rec_copy(buf, rec, offsets); + /* Avoid a debug assertion in rec_offs_validate(). */ + rec_offs_make_valid(rec, index, offsets); } table = index->table; @@ -373,7 +409,7 @@ row_build_row_ref( ut_a(pos != ULINT_UNDEFINED); - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); @@ -397,6 +433,7 @@ row_build_row_ref( } ut_ad(dtuple_check_typed(ref)); + mem_heap_free(tmp_heap); return(ref); } @@ -427,7 +464,9 @@ row_build_row_ref_in_tuple( ulint pos; ulint clust_col_prefix_len; ulint i; - + mem_heap_t* heap; + ulint* offsets; + ut_a(ref && index && rec); if (!index->table) { @@ -446,7 +485,10 @@ row_build_row_ref_in_tuple( fputs("InnoDB: clust index for table ", stderr); goto notfound; } - + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + ref_len = dict_index_get_n_unique(clust_index); ut_ad(ref_len == dtuple_get_n_fields(ref)); @@ -459,8 +501,8 @@ row_build_row_ref_in_tuple( pos = dict_index_get_nth_field_pos(index, clust_index, i); ut_a(pos != ULINT_UNDEFINED); - - field = rec_get_nth_field(rec, pos, &len); + + field = rec_get_nth_field(rec, offsets, pos, &len); dfield_set_data(dfield, field, len); @@ -484,6 +526,7 @@ row_build_row_ref_in_tuple( } ut_ad(dtuple_check_typed(ref)); + mem_heap_free(heap); } /*********************************************************************** diff --git a/innobase/row/row0sel.c b/innobase/row/row0sel.c index d3b834dd866..c36e62f4e74 100644 --- a/innobase/row/row0sel.c +++ b/innobase/row/row0sel.c @@ -78,8 +78,14 @@ row_sel_sec_rec_is_for_clust_rec( ulint n; ulint i; dtype_t* cur_type; + mem_heap_t* heap; + ulint* clust_offs; + ulint* sec_offs; - UT_NOT_USED(clust_index); + heap = mem_heap_create(100); + clust_offs = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + sec_offs = rec_get_offsets(sec_rec, sec_index, ULINT_UNDEFINED, heap); n = dict_index_get_n_ordering_defined_by_user(sec_index); @@ -87,10 +93,10 @@ row_sel_sec_rec_is_for_clust_rec( ifield = dict_index_get_nth_field(sec_index, i); col = dict_field_get_col(ifield); - clust_field = rec_get_nth_field(clust_rec, + clust_field = rec_get_nth_field(clust_rec, clust_offs, dict_col_get_clust_pos(col), &clust_len); - sec_field = rec_get_nth_field(sec_rec, i, &sec_len); + sec_field = rec_get_nth_field(sec_rec, sec_offs, i, &sec_len); if (ifield->prefix_len > 0 && clust_len != UNIV_SQL_NULL) { @@ -107,10 +113,12 @@ row_sel_sec_rec_is_for_clust_rec( if (0 != cmp_data_data(dict_col_get_type(col), clust_field, clust_len, sec_field, sec_len)) { + mem_heap_free(heap); return(FALSE); } } + mem_heap_free(heap); return(TRUE); } @@ -266,6 +274,7 @@ row_sel_fetch_columns( dict_index_t* index, /* in: record index */ rec_t* rec, /* in: record in a clustered or non-clustered index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ sym_node_t* column) /* in: first column in a column list, or NULL */ { @@ -275,6 +284,8 @@ row_sel_fetch_columns( byte* data; ulint len; + ut_ad(rec_offs_validate(rec, index, offsets)); + if (index->type & DICT_CLUSTERED) { index_type = SYM_CLUST_FIELD_NO; } else { @@ -286,7 +297,7 @@ row_sel_fetch_columns( if (field_no != ULINT_UNDEFINED) { - data = rec_get_nth_field(rec, field_no, &len); + data = rec_get_nth_field(rec, offsets, field_no, &len); if (column->copy_val) { eval_node_copy_and_alloc_val(column, data, @@ -601,8 +612,15 @@ row_sel_get_clust_rec( rec_t* clust_rec; rec_t* old_vers; ulint err; + mem_heap_t* heap; + ulint* offsets; + + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, + btr_pcur_get_btr_cur(&plan->pcur)->index, + ULINT_UNDEFINED, heap); - row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec); + row_build_row_ref_fast(plan->clust_ref, plan->clust_map, rec, offsets); index = dict_table_get_first_index(plan->table); @@ -619,7 +637,7 @@ row_sel_get_clust_rec( || btr_pcur_get_low_match(&(plan->clust_pcur)) < dict_index_get_n_unique(index)) { - ut_a(rec_get_deleted_flag(rec)); + ut_a(rec_get_deleted_flag(rec, plan->table->comp)); ut_a(node->read_view); /* In a rare case it is possible that no clust rec is found @@ -636,28 +654,30 @@ row_sel_get_clust_rec( goto func_exit; } + offsets = rec_reget_offsets(clust_rec, index, + offsets, ULINT_UNDEFINED, heap); + if (!node->read_view) { /* Try to place a lock on the index record */ /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + ulint lock_type; if (srv_locks_unsafe_for_binlog) { - err = lock_clust_rec_read_check_and_lock(0, - clust_rec, - index, node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = lock_clust_rec_read_check_and_lock(0, - clust_rec, - index, node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = lock_clust_rec_read_check_and_lock(0, + clust_rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } } else { @@ -666,22 +686,21 @@ row_sel_get_clust_rec( old_vers = NULL; - if (!lock_clust_rec_cons_read_sees(clust_rec, index, + if (!lock_clust_rec_cons_read_sees(clust_rec, index, offsets, node->read_view)) { err = row_sel_build_prev_vers(node->read_view, plan, clust_rec, &old_vers, mtr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } clust_rec = old_vers; if (clust_rec == NULL) { - *out_rec = clust_rec; - - return(DB_SUCCESS); + goto func_exit; } } @@ -698,23 +717,22 @@ row_sel_get_clust_rec( visit through secondary index records that would not really exist in our snapshot. */ - if ((old_vers || rec_get_deleted_flag(rec)) + if ((old_vers || rec_get_deleted_flag(rec, plan->table->comp)) && !row_sel_sec_rec_is_for_clust_rec(rec, plan->index, clust_rec, index)) { clust_rec = NULL; - *out_rec = clust_rec; - - return(DB_SUCCESS); + goto func_exit; } } /* Fetch the columns needed in test conditions */ - - row_sel_fetch_columns(index, clust_rec, + + row_sel_fetch_columns(index, clust_rec, offsets, UT_LIST_GET_FIRST(plan->columns)); func_exit: *out_rec = clust_rec; + mem_heap_free(heap); return(DB_SUCCESS); } @@ -727,6 +745,7 @@ sel_set_rec_lock( /* out: DB_SUCCESS or error code */ rec_t* rec, /* in: record */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ulint mode, /* in: lock mode */ ulint type, /* in: LOCK_ORDINARY, LOCK_GAP, or LOC_REC_NOT_GAP */ que_thr_t* thr) /* in: query thread */ @@ -744,11 +763,11 @@ sel_set_rec_lock( } if (index->type & DICT_CLUSTERED) { - err = lock_clust_rec_read_check_and_lock(0, rec, index, mode, - type, thr); + err = lock_clust_rec_read_check_and_lock(0, + rec, index, offsets, mode, type, thr); } else { - err = lock_sec_rec_read_check_and_lock(0, rec, index, mode, - type, thr); + err = lock_sec_rec_read_check_and_lock(0, + rec, index, offsets, mode, type, thr); } return(err); @@ -956,6 +975,8 @@ row_sel_try_search_shortcut( { dict_index_t* index; rec_t* rec; + mem_heap_t* heap; + ulint* offsets; index = plan->index; @@ -989,21 +1010,28 @@ row_sel_try_search_shortcut( /* This is a non-locking consistent read: if necessary, fetch a previous version of the record */ + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (index->type & DICT_CLUSTERED) { - if (!lock_clust_rec_cons_read_sees(rec, index, + if (!lock_clust_rec_cons_read_sees(rec, index, offsets, node->read_view)) { + mem_heap_free(heap); return(SEL_RETRY); } } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) { + mem_heap_free(heap); return(SEL_RETRY); } /* Test deleted flag. Fetch the columns needed in test conditions. */ - - row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns)); - if (rec_get_deleted_flag(rec)) { + row_sel_fetch_columns(index, rec, offsets, + UT_LIST_GET_FIRST(plan->columns)); + mem_heap_free(heap); + + if (rec_get_deleted_flag(rec, plan->table->comp)) { return(SEL_EXHAUSTED); } @@ -1067,7 +1095,9 @@ row_sel( to the next non-clustered record */ ulint found_flag; ulint err; - + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; + ut_ad(thr->run_node == node); search_latch_locked = FALSE; @@ -1218,22 +1248,23 @@ rec_loop: if (!consistent_read) { /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + + rec_t* next_rec = page_rec_get_next(rec); + ulint lock_type; + offsets = rec_reget_offsets(next_rec, index, + offsets, ULINT_UNDEFINED, heap); if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(page_rec_get_next(rec), - index, - node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(page_rec_get_next(rec), - index, - node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = sel_set_rec_lock(next_rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { /* Note that in this case we will store in pcur the PREDECESSOR of the record we are waiting @@ -1260,18 +1291,22 @@ rec_loop: /* Try to place a lock on the index record */ /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is - not used. - */ + we lock only the record, i.e., next-key locking is + not used. */ + + ulint lock_type; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(rec, index, node->row_lock_mode, - LOCK_REC_NOT_GAP, thr); + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(rec, index, node->row_lock_mode, - LOCK_ORDINARY, thr); + lock_type = LOCK_ORDINARY; } + err = sel_set_rec_lock(rec, index, offsets, + node->row_lock_mode, lock_type, thr); + if (err != DB_SUCCESS) { goto lock_wait_or_error; @@ -1334,6 +1369,8 @@ rec_loop: /* PHASE 3: Get previous version in a consistent read */ cons_read_requires_clust_rec = FALSE; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); if (consistent_read) { /* This is a non-locking consistent read: if necessary, fetch @@ -1341,7 +1378,7 @@ rec_loop: if (index->type & DICT_CLUSTERED) { - if (!lock_clust_rec_cons_read_sees(rec, index, + if (!lock_clust_rec_cons_read_sees(rec, index, offsets, node->read_view)) { err = row_sel_build_prev_vers(node->read_view, @@ -1354,6 +1391,7 @@ rec_loop: if (old_vers == NULL) { row_sel_fetch_columns(index, rec, + offsets, UT_LIST_GET_FIRST(plan->columns)); if (!row_sel_test_end_conds(plan)) { @@ -1365,6 +1403,8 @@ rec_loop: } rec = old_vers; + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); } } else if (!lock_sec_rec_cons_read_sees(rec, index, node->read_view)) { @@ -1376,7 +1416,8 @@ rec_loop: /* Fetch the columns needed in test conditions */ - row_sel_fetch_columns(index, rec, UT_LIST_GET_FIRST(plan->columns)); + row_sel_fetch_columns(index, rec, offsets, + UT_LIST_GET_FIRST(plan->columns)); /* Test the selection end conditions: these can only contain columns which already are found in the index, even though the index might be @@ -1391,7 +1432,8 @@ rec_loop: goto table_exhausted; } - if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) { + if (rec_get_deleted_flag(rec, plan->table->comp) + && !cons_read_requires_clust_rec) { /* The record is delete marked: we can skip it if this is not a consistent read which might see an earlier version @@ -1434,7 +1476,7 @@ rec_loop: goto next_rec; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, plan->table->comp)) { /* The record is delete marked: we can skip it */ @@ -1592,7 +1634,8 @@ next_table_no_mtr: if (search_latch_locked) { rw_lock_s_unlock(&btr_search_latch); } - + + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1626,6 +1669,7 @@ table_exhausted: table_exhausted_no_mtr: if (node->fetch_table == 0) { + mem_heap_free(heap); if (node->is_aggregate && !node->aggregate_already_fetched) { @@ -1674,7 +1718,7 @@ stop_for_a_while: mtr_commit(&mtr); ut_ad(sync_thread_levels_empty_gen(TRUE)); - + mem_heap_free(heap); return(DB_SUCCESS); commit_mtr_for_a_while: @@ -1710,6 +1754,7 @@ lock_wait_or_error: ut_ad(sync_thread_levels_empty_gen(TRUE)); + mem_heap_free(heap); return(err); } @@ -2133,11 +2178,16 @@ row_sel_store_row_id_to_prebuilt( /*=============================*/ row_prebuilt_t* prebuilt, /* in: prebuilt */ rec_t* index_rec, /* in: record */ - dict_index_t* index) /* in: index of the record */ + dict_index_t* index, /* in: index of the record */ + const ulint* offsets) /* in: rec_get_offsets + (index_rec, index) */ { byte* data; ulint len; - data = rec_get_nth_field(index_rec, + + ut_ad(rec_offs_validate(index_rec, index, offsets)); + + data = rec_get_nth_field(index_rec, offsets, dict_index_get_sys_col_pos(index, DATA_ROW_ID), &len); if (len != DATA_ROW_ID_LEN) { @@ -2147,7 +2197,7 @@ row_sel_store_row_id_to_prebuilt( fprintf(stderr, "\n" "InnoDB: Field number %lu, record:\n", (ulong) dict_index_get_sys_col_pos(index, DATA_ROW_ID)); - rec_print(stderr, index_rec); + rec_print(stderr, index_rec, offsets); putc('\n', stderr); ut_error; } @@ -2236,9 +2286,11 @@ row_sel_store_mysql_rec( case) */ byte* mysql_rec, /* out: row in the MySQL format */ row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec) /* in: Innobase record in the index + rec_t* rec, /* in: Innobase record in the index which was described in prebuilt's template */ + const ulint* offsets) /* in: array returned by + rec_get_offsets() */ { mysql_row_templ_t* templ; mem_heap_t* extern_field_heap = NULL; @@ -2247,8 +2299,15 @@ row_sel_store_mysql_rec( byte* blob_buf; int pad_char; ulint i; + dict_index_t* index; ut_ad(prebuilt->mysql_template); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + index = prebuilt->index; + if (prebuilt->need_to_access_clustered) { + index = dict_table_get_first_index(index->table); + } if (prebuilt->blob_heap != NULL) { mem_heap_free(prebuilt->blob_heap); @@ -2264,9 +2323,10 @@ row_sel_store_mysql_rec( templ = prebuilt->mysql_template + i; - data = rec_get_nth_field(rec, templ->rec_field_no, &len); + data = rec_get_nth_field(rec, offsets, + templ->rec_field_no, &len); - if (rec_get_nth_field_extern_bit(rec, templ->rec_field_no)) { + if (rec_offs_nth_extern(offsets, templ->rec_field_no)) { /* Copy an externally stored field to the temporary heap */ @@ -2280,7 +2340,7 @@ row_sel_store_mysql_rec( causes an assert */ data = btr_rec_copy_externally_stored_field(rec, - templ->rec_field_no, &len, + offsets, templ->rec_field_no, &len, extern_field_heap); ut_a(len != UNIV_SQL_NULL); @@ -2436,6 +2496,8 @@ row_sel_get_clust_rec_for_mysql( rec_t* old_vers; ulint err; trx_t* trx; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; *out_rec = NULL; trx = thr_get_trx(thr); @@ -2466,9 +2528,8 @@ row_sel_get_clust_rec_for_mysql( clustered index record did not exist in the read view of trx. */ - if (!rec_get_deleted_flag(rec) + if (!rec_get_deleted_flag(rec, sec_index->table->comp) || prebuilt->select_lock_type != LOCK_NONE) { - ut_print_timestamp(stderr); fputs(" InnoDB: error clustered record" " for sec rec not found\n" @@ -2476,10 +2537,14 @@ row_sel_get_clust_rec_for_mysql( dict_index_name_print(stderr, trx, sec_index); fputs("\n" "InnoDB: sec index record ", stderr); - rec_print(stderr, rec); + offsets = rec_get_offsets(rec, sec_index, + ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); fputs("\n" "InnoDB: clust index record ", stderr); - rec_print(stderr, clust_rec); + offsets = rec_reget_offsets(clust_rec, clust_index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, clust_rec, offsets); putc('\n', stderr); trx_print(stderr, trx); @@ -2492,17 +2557,21 @@ row_sel_get_clust_rec_for_mysql( goto func_exit; } + offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + if (prebuilt->select_lock_type != LOCK_NONE) { /* Try to place a lock on the index record; we are searching the clust rec with a unique condition, hence we set a LOCK_REC_NOT_GAP type lock */ err = lock_clust_rec_read_check_and_lock(0, clust_rec, - clust_index, + clust_index, offsets, prebuilt->select_lock_type, LOCK_REC_NOT_GAP, thr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } } else { @@ -2516,7 +2585,7 @@ row_sel_get_clust_rec_for_mysql( if (trx->isolation_level > TRX_ISO_READ_UNCOMMITTED && !lock_clust_rec_cons_read_sees(clust_rec, clust_index, - trx->read_view)) { + offsets, trx->read_view)) { err = row_sel_build_prev_vers_for_mysql( trx->read_view, clust_index, @@ -2525,6 +2594,7 @@ row_sel_get_clust_rec_for_mysql( if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2544,7 +2614,8 @@ row_sel_get_clust_rec_for_mysql( visit through secondary index records that would not really exist in our snapshot. */ - if (clust_rec && (old_vers || rec_get_deleted_flag(rec)) + if (clust_rec && (old_vers + || rec_get_deleted_flag(rec, sec_index->table->comp)) && !row_sel_sec_rec_is_for_clust_rec(rec, sec_index, clust_rec, clust_index)) { clust_rec = NULL; @@ -2566,6 +2637,7 @@ func_exit: btr_pcur_store_position(prebuilt->clust_pcur, mtr); } + mem_heap_free(heap); return(DB_SUCCESS); } @@ -2687,12 +2759,14 @@ void row_sel_push_cache_row_for_mysql( /*=============================*/ row_prebuilt_t* prebuilt, /* in: prebuilt struct */ - rec_t* rec) /* in: record to push */ + rec_t* rec, /* in: record to push */ + const ulint* offsets) /* in: rec_get_offsets() */ { byte* buf; ulint i; ut_ad(prebuilt->n_fetch_cached < MYSQL_FETCH_CACHE_SIZE); + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_a(!prebuilt->templ_contains_blob); if (prebuilt->fetch_cache[0] == NULL) { @@ -2718,7 +2792,7 @@ row_sel_push_cache_row_for_mysql( ut_a(row_sel_store_mysql_rec( prebuilt->fetch_cache[prebuilt->n_fetch_cached], - prebuilt, rec)); + prebuilt, rec, offsets)); prebuilt->n_fetch_cached++; } @@ -2735,6 +2809,8 @@ row_sel_try_search_shortcut_for_mysql( /* out: SEL_FOUND, SEL_EXHAUSTED, SEL_RETRY */ rec_t** out_rec,/* out: record if found */ row_prebuilt_t* prebuilt,/* in: prebuilt struct */ + ulint** offsets,/* in/out: for rec_reget_offsets(*out_rec) */ + mem_heap_t* heap, /* in: heap for rec_reget_offsets() */ mtr_t* mtr) /* in: started mtr */ { dict_index_t* index = prebuilt->index; @@ -2772,13 +2848,17 @@ row_sel_try_search_shortcut_for_mysql( /* This is a non-locking consistent read: if necessary, fetch a previous version of the record */ - - if (!lock_clust_rec_cons_read_sees(rec, index, trx->read_view)) { + + *offsets = rec_reget_offsets(rec, index, + *offsets, ULINT_UNDEFINED, heap); + + if (!lock_clust_rec_cons_read_sees(rec, index, + *offsets, trx->read_view)) { return(SEL_RETRY); } - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, index->table->comp)) { return(SEL_EXHAUSTED); } @@ -2847,9 +2927,12 @@ row_search_for_mysql( level is <= TRX_ISO_READ_COMMITTED, then this is set to FALSE */ ibool success; + ibool comp; ulint cnt = 0; ulint next_offs; mtr_t mtr; + mem_heap_t* heap; + ulint* offsets = NULL; ut_ad(index && pcur && search_tuple); ut_ad(trx->mysql_thread_id == os_thread_get_curr_id()); @@ -3003,6 +3086,7 @@ row_search_for_mysql( } mtr_start(&mtr); + heap = mem_heap_create(100); /*-------------------------------------------------------------*/ /* PHASE 2: Try fast adaptive hash index search if possible */ @@ -3048,13 +3132,14 @@ row_search_for_mysql( } #endif shortcut = row_sel_try_search_shortcut_for_mysql(&rec, - prebuilt, &mtr); + prebuilt, &offsets, heap, &mtr); if (shortcut == SEL_FOUND) { #ifdef UNIV_SEARCH_DEBUG - ut_a(0 == cmp_dtuple_rec(search_tuple, rec)); + ut_a(0 == cmp_dtuple_rec(search_tuple, + rec, offsets)); #endif if (!row_sel_store_mysql_rec(buf, prebuilt, - rec)) { + rec, offsets)) { err = DB_TOO_BIG_RECORD; /* We let the main loop to do the @@ -3082,7 +3167,7 @@ row_search_for_mysql( /* NOTE that we do NOT store the cursor position */ - + mem_heap_free(heap); return(DB_SUCCESS); } else if (shortcut == SEL_EXHAUSTED) { @@ -3106,6 +3191,7 @@ row_search_for_mysql( /* NOTE that we do NOT store the cursor position */ + mem_heap_free(heap); return(DB_RECORD_NOT_FOUND); } shortcut_fails_too_big_rec: @@ -3219,6 +3305,8 @@ rec_loop: /* PHASE 4: Look for matching records in a loop */ rec = btr_pcur_get_rec(pcur); + comp = index->table->comp; + ut_ad(comp == page_is_comp(buf_frame_align(rec))); /* fputs("Using ", stderr); dict_index_name_print(stderr, index); @@ -3247,7 +3335,9 @@ rec_loop: a gap and therefore we do not set locks there. */ if (srv_locks_unsafe_for_binlog == FALSE) { - err = sel_set_rec_lock(rec, index, + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + err = sel_set_rec_lock(rec, index, offsets, prebuilt->select_lock_type, LOCK_ORDINARY, thr); } @@ -3267,9 +3357,11 @@ rec_loop: /* Do sanity checks in case our cursor has bumped into page corruption */ - next_offs = rec_get_next_offs(rec); + next_offs = rec_get_next_offs(rec, comp); - if (next_offs >= UNIV_PAGE_SIZE || next_offs < PAGE_SUPREMUM) { + if (next_offs >= UNIV_PAGE_SIZE + || next_offs < + (ulint) (comp ? PAGE_NEW_SUPREMUM : PAGE_OLD_SUPREMUM)) { if (srv_force_recovery == 0 || moves_up == FALSE) { ut_print_timestamp(stderr); @@ -3314,9 +3406,12 @@ rec_loop: } } + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + if (srv_force_recovery > 0) { - if (!rec_validate(rec) || !btr_index_rec_validate(rec, index, - FALSE)) { + if (!rec_validate(rec, offsets) + || !btr_index_rec_validate(rec, index, FALSE)) { fprintf(stderr, "InnoDB: Index corruption: rec offs %lu next offs %lu, page no %lu,\n" "InnoDB: ", @@ -3344,7 +3439,7 @@ rec_loop: /* fputs("Comparing rec and search tuple\n", stderr); */ - if (0 != cmp_dtuple_rec(search_tuple, rec)) { + if (0 != cmp_dtuple_rec(search_tuple, rec, offsets)) { if (prebuilt->select_lock_type != LOCK_NONE && set_also_gap_locks) { @@ -3356,6 +3451,7 @@ rec_loop: if (srv_locks_unsafe_for_binlog == FALSE) { err = sel_set_rec_lock(rec, index, + offsets, prebuilt->select_lock_type, LOCK_GAP, thr); } @@ -3377,7 +3473,7 @@ rec_loop: } else if (match_mode == ROW_SEL_EXACT_PREFIX) { - if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec)) { + if (!cmp_dtuple_is_prefix_of_rec(search_tuple, rec, offsets)) { if (prebuilt->select_lock_type != LOCK_NONE && set_also_gap_locks) { @@ -3389,6 +3485,7 @@ rec_loop: if (srv_locks_unsafe_for_binlog == FALSE) { err = sel_set_rec_lock(rec, index, + offsets, prebuilt->select_lock_type, LOCK_GAP, thr); } @@ -3420,27 +3517,27 @@ rec_loop: is a non-delete marked record, then it is enough to lock its existence with LOCK_REC_NOT_GAP. */ + ulint lock_type; + if (!set_also_gap_locks - || (unique_search && !rec_get_deleted_flag(rec))) { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_REC_NOT_GAP, thr); + || (unique_search && !rec_get_deleted_flag(rec, comp))) { + lock_type = LOCK_REC_NOT_GAP; } else { /* If innodb_locks_unsafe_for_binlog option is used, - we lock only the record, i.e. next-key locking is + we lock only the record, i.e., next-key locking is not used. */ - if (srv_locks_unsafe_for_binlog) { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_REC_NOT_GAP, thr); + if (srv_locks_unsafe_for_binlog) { + lock_type = LOCK_REC_NOT_GAP; } else { - err = sel_set_rec_lock(rec, index, - prebuilt->select_lock_type, - LOCK_ORDINARY, thr); - } + lock_type = LOCK_ORDINARY; + } } - + + err = sel_set_rec_lock(rec, index, offsets, + prebuilt->select_lock_type, + lock_type, thr); + if (err != DB_SUCCESS) { goto lock_wait_or_error; @@ -3463,7 +3560,7 @@ rec_loop: if (srv_force_recovery < 5 && !lock_clust_rec_cons_read_sees(rec, index, - trx->read_view)) { + offsets, trx->read_view)) { err = row_sel_build_prev_vers_for_mysql( trx->read_view, clust_index, @@ -3496,7 +3593,8 @@ rec_loop: } } - if (rec_get_deleted_flag(rec) && !cons_read_requires_clust_rec) { + if (rec_get_deleted_flag(rec, comp) + && !cons_read_requires_clust_rec) { /* The record is delete-marked: we can skip it if this is not a consistent read which might see an earlier version @@ -3532,7 +3630,7 @@ rec_loop: goto next_rec; } - if (rec_get_deleted_flag(clust_rec)) { + if (rec_get_deleted_flag(clust_rec, comp)) { /* The record is delete marked: we can skip it */ @@ -3544,6 +3642,15 @@ rec_loop: } } + if (prebuilt->need_to_access_clustered) { + ut_ad(rec == clust_rec || index == clust_index); + offsets = rec_reget_offsets(rec, clust_index, + offsets, ULINT_UNDEFINED, heap); + } else { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + } + /* We found a qualifying row */ if (prebuilt->n_rows_fetched >= MYSQL_FETCH_CACHE_THRESHOLD @@ -3563,7 +3670,7 @@ rec_loop: not cache rows because there the cursor is a scrollable cursor. */ - row_sel_push_cache_row_for_mysql(prebuilt, rec); + row_sel_push_cache_row_for_mysql(prebuilt, rec, offsets); if (prebuilt->n_fetch_cached == MYSQL_FETCH_CACHE_SIZE) { @@ -3573,11 +3680,13 @@ rec_loop: goto next_rec; } else { if (prebuilt->template_type == ROW_MYSQL_DUMMY_TEMPLATE) { - ut_memcpy(buf + 4, rec - rec_get_extra_size(rec), - rec_get_size(rec)); - mach_write_to_4(buf, rec_get_extra_size(rec) + 4); + memcpy(buf + 4, rec - rec_offs_extra_size(offsets), + rec_offs_size(offsets)); + mach_write_to_4(buf, + rec_offs_extra_size(offsets) + 4); } else { - if (!row_sel_store_mysql_rec(buf, prebuilt, rec)) { + if (!row_sel_store_mysql_rec(buf, prebuilt, + rec, offsets)) { err = DB_TOO_BIG_RECORD; goto lock_wait_or_error; @@ -3586,7 +3695,7 @@ rec_loop: if (prebuilt->clust_index_was_generated) { row_sel_store_row_id_to_prebuilt(prebuilt, index_rec, - index); + index, offsets); } } got_row: @@ -3688,6 +3797,7 @@ lock_wait_or_error: fprintf(stderr, " cnt %lu ret value %lu err\n", cnt, err); */ trx->op_info = ""; + mem_heap_free(heap); return(err); normal_return: @@ -3711,6 +3821,7 @@ normal_return: trx->op_info = ""; + mem_heap_free(heap); return(ret); } diff --git a/innobase/row/row0umod.c b/innobase/row/row0umod.c index e16d696314b..ee9066a0d6f 100644 --- a/innobase/row/row0umod.c +++ b/innobase/row/row0umod.c @@ -430,6 +430,7 @@ row_undo_mod_del_unmark_sec_and_undo_update( found = row_search_index_entry(index, entry, mode, &pcur, &mtr); if (!found) { + heap = mem_heap_create(100); fputs("InnoDB: error in sec index entry del undo in\n" "InnoDB: ", stderr); dict_index_name_print(stderr, trx, index); @@ -438,11 +439,14 @@ row_undo_mod_del_unmark_sec_and_undo_update( dtuple_print(stderr, entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, btr_pcur_get_rec(&pcur)); + rec_print(stderr, btr_pcur_get_rec(&pcur), + rec_get_offsets(btr_pcur_get_rec(&pcur), + index, ULINT_UNDEFINED, heap)); putc('\n', stderr); trx_print(stderr, trx); fputs("\n" "InnoDB: Submit a detailed bug report to http://bugs.mysql.com\n", stderr); + mem_heap_free(heap); } else { btr_cur_t* btr_cur = btr_pcur_get_btr_cur(&pcur); diff --git a/innobase/row/row0undo.c b/innobase/row/row0undo.c index bc3cc8ea9f3..42f5ef94854 100644 --- a/innobase/row/row0undo.c +++ b/innobase/row/row0undo.c @@ -151,6 +151,8 @@ row_undo_search_clust_to_pcur( mtr_t mtr; ibool ret; rec_t* rec; + mem_heap_t* heap; + const ulint* offsets; mtr_start(&mtr); @@ -161,8 +163,11 @@ row_undo_search_clust_to_pcur( rec = btr_pcur_get_rec(&(node->pcur)); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, clust_index, ULINT_UNDEFINED, heap); + if (!found || 0 != ut_dulint_cmp(node->roll_ptr, - row_get_rec_roll_ptr(rec, clust_index))) { + row_get_rec_roll_ptr(rec, clust_index, offsets))) { /* We must remove the reservation on the undo log record BEFORE releasing the latch on the clustered index page: this @@ -175,7 +180,7 @@ row_undo_search_clust_to_pcur( ret = FALSE; } else { node->row = row_build(ROW_COPY_DATA, clust_index, rec, - node->heap); + offsets, node->heap); btr_pcur_store_position(&(node->pcur), &mtr); ret = TRUE; diff --git a/innobase/row/row0upd.c b/innobase/row/row0upd.c index a449b9f1736..e080d0ba577 100644 --- a/innobase/row/row0upd.c +++ b/innobase/row/row0upd.c @@ -301,19 +301,20 @@ recovery. */ void row_upd_rec_sys_fields_in_recovery( /*===============================*/ - rec_t* rec, /* in: record */ - ulint pos, /* in: TRX_ID position in rec */ - dulint trx_id, /* in: transaction id */ - dulint roll_ptr)/* in: roll ptr of the undo log record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint pos, /* in: TRX_ID position in rec */ + dulint trx_id, /* in: transaction id */ + dulint roll_ptr)/* in: roll ptr of the undo log record */ { byte* field; ulint len; - field = rec_get_nth_field(rec, pos, &len); + field = rec_get_nth_field(rec, offsets, pos, &len); ut_ad(len == DATA_TRX_ID_LEN); trx_write_trx_id(field, trx_id); - field = rec_get_nth_field(rec, pos + 1, &len); + field = rec_get_nth_field(rec, offsets, pos + 1, &len); ut_ad(len == DATA_ROLL_PTR_LEN); trx_write_roll_ptr(field, roll_ptr); } @@ -361,8 +362,8 @@ row_upd_changes_field_size_or_external( /* out: TRUE if the update changes the size of some field in index or the field is external in rec or update */ - rec_t* rec, /* in: record in index */ dict_index_t* index, /* in: index */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update) /* in: update vector */ { upd_field_t* upd_field; @@ -372,6 +373,7 @@ row_upd_changes_field_size_or_external( ulint n_fields; ulint i; + ut_ad(rec_offs_validate(NULL, index, offsets)); n_fields = upd_get_n_fields(update); for (i = 0; i < n_fields; i++) { @@ -380,19 +382,19 @@ row_upd_changes_field_size_or_external( new_val = &(upd_field->new_val); new_len = new_val->len; - if (new_len == UNIV_SQL_NULL) { + if (new_len == UNIV_SQL_NULL && !rec_offs_comp(offsets)) { new_len = dtype_get_sql_null_size( dict_index_get_nth_type(index, i)); } - old_len = rec_get_nth_field_size(rec, upd_field->field_no); - + old_len = rec_offs_nth_size(offsets, upd_field->field_no); + if (old_len != new_len) { return(TRUE); } - if (rec_get_nth_field_extern_bit(rec, upd_field->field_no)) { + if (rec_offs_nth_extern(offsets, upd_field->field_no)) { return(TRUE); } @@ -414,15 +416,18 @@ a clustered index */ void row_upd_rec_in_place( /*=================*/ - rec_t* rec, /* in/out: record where replaced */ - upd_t* update) /* in: update vector */ + rec_t* rec, /* in/out: record where replaced */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update) /* in: update vector */ { upd_field_t* upd_field; dfield_t* new_val; ulint n_fields; ulint i; - rec_set_info_bits(rec, update->info_bits); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + + rec_set_info_bits(rec, rec_offs_comp(offsets), update->info_bits); n_fields = upd_get_n_fields(update); @@ -430,7 +435,7 @@ row_upd_rec_in_place( upd_field = upd_get_nth_field(update, i); new_val = &(upd_field->new_val); - rec_set_nth_field(rec, upd_field->field_no, + rec_set_nth_field(rec, offsets, upd_field->field_no, dfield_get_data(new_val), dfield_get_len(new_val)); } @@ -695,6 +700,7 @@ row_upd_build_sec_rec_difference_binary( upd_t* update; ulint n_diff; ulint i; + const ulint* offsets; /* This function is used only for a secondary index */ ut_a(0 == (index->type & DICT_CLUSTERED)); @@ -702,10 +708,11 @@ row_upd_build_sec_rec_difference_binary( update = upd_create(dtuple_get_n_fields(entry), heap); n_diff = 0; + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); for (i = 0; i < dtuple_get_n_fields(entry); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -768,6 +775,7 @@ row_upd_build_difference_binary( ulint trx_id_pos; ibool extern_bit; ulint i; + const ulint* offsets; /* This function is used only for a clustered index */ ut_a(index->type & DICT_CLUSTERED); @@ -779,9 +787,11 @@ row_upd_build_difference_binary( roll_ptr_pos = dict_index_get_sys_col_pos(index, DATA_ROLL_PTR); trx_id_pos = dict_index_get_sys_col_pos(index, DATA_TRX_ID); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + for (i = 0; i < dtuple_get_n_fields(entry); i++) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); dfield = dtuple_get_nth_field(entry, i); @@ -793,7 +803,7 @@ row_upd_build_difference_binary( goto skip_compare; } - extern_bit = rec_get_nth_field_extern_bit(rec, i); + extern_bit = rec_offs_nth_extern(offsets, i); if (extern_bit != upd_ext_vec_contains(ext_vec, n_ext_vec, i) || !dfield_data_is_binary_equal(dfield, len, data)) { @@ -1117,6 +1127,7 @@ void row_upd_copy_columns( /*=================*/ rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ sym_node_t* column) /* in: first column in a column list, or NULL */ { @@ -1124,7 +1135,7 @@ row_upd_copy_columns( ulint len; while (column) { - data = rec_get_nth_field(rec, + data = rec_get_nth_field(rec, offsets, column->field_nos[SYM_CLUST_FIELD_NO], &len); eval_node_copy_and_alloc_val(column, data, len); @@ -1171,7 +1182,9 @@ row_upd_store_row( dict_index_t* clust_index; upd_t* update; rec_t* rec; - + mem_heap_t* heap; + const ulint* offsets; + ut_ad(node->pcur->latch_mode != BTR_NO_LATCHES); if (node->row != NULL) { @@ -1183,10 +1196,12 @@ row_upd_store_row( rec = btr_pcur_get_rec(node->pcur); - node->row = row_build(ROW_COPY_DATA, clust_index, rec, node->heap); - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, clust_index, ULINT_UNDEFINED, heap); + node->row = row_build(ROW_COPY_DATA, clust_index, rec, offsets, + node->heap); node->ext_vec = mem_heap_alloc(node->heap, sizeof(ulint) - * rec_get_n_fields(rec)); + * rec_offs_n_fields(offsets)); if (node->is_delete) { update = NULL; } else { @@ -1194,7 +1209,8 @@ row_upd_store_row( } node->n_ext_vec = btr_push_update_extern_fields(node->ext_vec, - rec, update); + offsets, update); + mem_heap_free(heap); } /*************************************************************** @@ -1247,7 +1263,8 @@ row_upd_sec_index_entry( dtuple_print(stderr, entry); fputs("\n" "InnoDB: record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap)); putc('\n', stderr); trx_print(stderr, trx); @@ -1259,7 +1276,7 @@ row_upd_sec_index_entry( delete marked if we return after a lock wait in row_ins_index_entry below */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { err = btr_cur_del_mark_set_sec_rec(0, btr_cur, TRUE, thr, &mtr); if (err == DB_SUCCESS && check_ref) { @@ -1362,6 +1379,7 @@ row_upd_clust_rec_by_insert( table = node->table; pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); + heap = mem_heap_create(500); if (node->state != UPD_NODE_INSERT_CLUSTERED) { @@ -1369,7 +1387,7 @@ row_upd_clust_rec_by_insert( btr_cur, TRUE, thr, mtr); if (err != DB_SUCCESS) { mtr_commit(mtr); - + mem_heap_free(heap); return(err); } @@ -1379,7 +1397,9 @@ row_upd_clust_rec_by_insert( record is removed from the index tree, or updated. */ btr_cur_mark_extern_inherited_fields(btr_cur_get_rec(btr_cur), - node->update, mtr); + rec_get_offsets(btr_cur_get_rec(btr_cur), + dict_table_get_first_index(table), + ULINT_UNDEFINED, heap), node->update, mtr); if (check_ref) { /* NOTE that the following call loses the position of pcur ! */ @@ -1399,8 +1419,6 @@ row_upd_clust_rec_by_insert( node->state = UPD_NODE_INSERT_CLUSTERED; - heap = mem_heap_create(500); - entry = row_build_index_entry(node->row, index, heap); row_upd_index_replace_new_col_vals(entry, index, node->update, NULL); @@ -1452,7 +1470,8 @@ row_upd_clust_rec( pcur = node->pcur; btr_cur = btr_pcur_get_btr_cur(pcur); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + index->table->comp)); /* Try optimistic updating of the record, keeping changes within the page; we do not check locks because we assume the x-lock on the @@ -1488,7 +1507,8 @@ row_upd_clust_rec( ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + index->table->comp)); err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, &big_rec, node->update, @@ -1496,12 +1516,17 @@ row_upd_clust_rec( mtr_commit(mtr); if (err == DB_SUCCESS && big_rec) { + mem_heap_t* heap; + rec_t* rec; mtr_start(mtr); + + heap = mem_heap_create(100); + rec = btr_cur_get_rec(btr_cur); ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); - - err = btr_store_big_rec_extern_fields(index, - btr_cur_get_rec(btr_cur), - big_rec, mtr); + err = btr_store_big_rec_extern_fields(index, rec, + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap), + big_rec, mtr); + mem_heap_free(heap); mtr_commit(mtr); } @@ -1585,7 +1610,10 @@ row_upd_clust_step( ulint err; mtr_t* mtr; mtr_t mtr_buf; - + rec_t* rec; + mem_heap_t* heap; + const ulint* offsets; + index = dict_table_get_first_index(node->table); check_ref = row_upd_index_is_referenced(index, thr_get_trx(thr)); @@ -1641,13 +1669,16 @@ row_upd_clust_step( } } + rec = btr_pcur_get_rec(pcur); + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (!node->has_clust_rec_x_lock) { err = lock_clust_rec_modify_check_and_lock(0, - btr_pcur_get_rec(pcur), - index, thr); + rec, index, offsets, thr); if (err != DB_SUCCESS) { mtr_commit(mtr); - + mem_heap_free(heap); return(err); } } @@ -1655,6 +1686,7 @@ row_upd_clust_step( /* NOTE: the following function calls will also commit mtr */ if (node->is_delete) { + mem_heap_free(heap); err = row_upd_del_mark_clust_rec(node, index, thr, check_ref, mtr); if (err != DB_SUCCESS) { @@ -1674,12 +1706,13 @@ row_upd_clust_step( if (!node->in_mysql_interface) { /* Copy the necessary columns from clust_rec and calculate the new values to set */ - - row_upd_copy_columns(btr_pcur_get_rec(pcur), + row_upd_copy_columns(rec, offsets, UT_LIST_GET_FIRST(node->columns)); row_upd_eval_new_vals(node->update); } + mem_heap_free(heap); + if (node->cmpl_info & UPD_NODE_NO_ORD_CHANGE) { err = row_upd_clust_rec(node, index, thr, mtr); @@ -1935,6 +1968,7 @@ row_upd_in_place_in_select( btr_pcur_t* pcur; btr_cur_t* btr_cur; ulint err; + mem_heap_t* heap; ut_ad(sel_node->select_will_do_update); ut_ad(sel_node->latch_mode == BTR_MODIFY_LEAF); @@ -1950,11 +1984,15 @@ row_upd_in_place_in_select( /* Copy the necessary columns from clust_rec and calculate the new values to set */ - row_upd_copy_columns(btr_pcur_get_rec(pcur), - UT_LIST_GET_FIRST(node->columns)); + heap = mem_heap_create(100); + row_upd_copy_columns(btr_pcur_get_rec(pcur), rec_get_offsets( + btr_pcur_get_rec(pcur), btr_cur->index, ULINT_UNDEFINED, heap), + UT_LIST_GET_FIRST(node->columns)); + mem_heap_free(heap); row_upd_eval_new_vals(node->update); - ut_ad(FALSE == rec_get_deleted_flag(btr_pcur_get_rec(pcur))); + ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), + btr_cur->index->table->comp)); ut_ad(node->cmpl_info & UPD_NODE_NO_SIZE_CHANGE); ut_ad(node->cmpl_info & UPD_NODE_NO_ORD_CHANGE); diff --git a/innobase/row/row0vers.c b/innobase/row/row0vers.c index bc17ede89e3..5281dbd67d7 100644 --- a/innobase/row/row0vers.c +++ b/innobase/row/row0vers.c @@ -41,10 +41,12 @@ row_vers_impl_x_locked_off_kernel( transaction; NOTE that the kernel mutex is temporarily released! */ rec_t* rec, /* in: record in a secondary index */ - dict_index_t* index) /* in: the secondary index */ + dict_index_t* index, /* in: the secondary index */ + const ulint* offsets)/* in: rec_get_offsets(rec, index) */ { dict_index_t* clust_index; rec_t* clust_rec; + ulint* clust_offsets; rec_t* version; rec_t* prev_version; dulint trx_id; @@ -59,6 +61,7 @@ row_vers_impl_x_locked_off_kernel( ibool rec_del; ulint err; mtr_t mtr; + ibool comp; #ifdef UNIV_SYNC_DEBUG ut_ad(mutex_own(&kernel_mutex)); @@ -96,7 +99,10 @@ row_vers_impl_x_locked_off_kernel( return(NULL); } - trx_id = row_get_rec_trx_id(clust_rec, clust_index); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(clust_rec, clust_index, + ULINT_UNDEFINED, heap); + trx_id = row_get_rec_trx_id(clust_rec, clust_index, clust_offsets); mtr_s_lock(&(purge_sys->latch), &mtr); @@ -106,19 +112,27 @@ row_vers_impl_x_locked_off_kernel( /* The transaction that modified or inserted clust_rec is no longer active: no implicit lock on rec */ + mem_heap_free(heap); mtr_commit(&mtr); return(NULL); } - if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, TRUE)) { + if (!lock_check_trx_id_sanity(trx_id, clust_rec, clust_index, + clust_offsets, TRUE)) { /* Corruption noticed: try to avoid a crash by returning */ + mem_heap_free(heap); mtr_commit(&mtr); return(NULL); } + comp = index->table->comp; + ut_ad(index->table == clust_index->table); + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + ut_ad(comp == page_is_comp(buf_frame_align(clust_rec))); + /* We look up if some earlier version, which was modified by the trx_id transaction, of the clustered index record would require rec to be in a different state (delete marked or unmarked, or have different field @@ -128,11 +142,10 @@ row_vers_impl_x_locked_off_kernel( different state, then the trx_id transaction has not yet had time to modify rec, and does not necessarily have an implicit x-lock on rec. */ - rec_del = rec_get_deleted_flag(rec); + rec_del = rec_get_deleted_flag(rec, comp); trx = NULL; version = clust_rec; - heap = NULL; for (;;) { mutex_exit(&kernel_mutex); @@ -146,18 +159,16 @@ row_vers_impl_x_locked_off_kernel( heap2 = heap; heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(clust_rec, &mtr, version, - clust_index, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + clust_index, clust_offsets, heap, + &prev_version); + mem_heap_free(heap2); /* free version and clust_offsets */ if (prev_version) { + clust_offsets = rec_get_offsets(prev_version, + clust_index, ULINT_UNDEFINED, heap); row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, heap); + prev_version, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); } @@ -189,11 +200,11 @@ row_vers_impl_x_locked_off_kernel( if prev_version would require rec to be in a different state. */ - vers_del = rec_get_deleted_flag(prev_version); + vers_del = rec_get_deleted_flag(prev_version, comp); /* We check if entry and rec are identified in the alphabetical ordering */ - if (0 == cmp_dtuple_rec(entry, rec)) { + if (0 == cmp_dtuple_rec(entry, rec, offsets)) { /* The delete marks of rec and prev_version should be equal for rec to be in the state required by prev_version */ @@ -211,7 +222,7 @@ row_vers_impl_x_locked_off_kernel( dtuple_set_types_binary(entry, dtuple_get_n_fields(entry)); - if (0 != cmp_dtuple_rec(entry, rec)) { + if (0 != cmp_dtuple_rec(entry, rec, offsets)) { trx = trx_get_on_id(trx_id); @@ -226,7 +237,8 @@ row_vers_impl_x_locked_off_kernel( break; } - prev_trx_id = row_get_rec_trx_id(prev_version, clust_index); + prev_trx_id = row_get_rec_trx_id(prev_version, clust_index, + clust_offsets); if (0 != ut_dulint_cmp(trx_id, prev_trx_id)) { /* The versions modified by the trx_id transaction end @@ -297,12 +309,14 @@ row_vers_old_has_index_entry( rec_t* version; rec_t* prev_version; dict_index_t* clust_index; + ulint* clust_offsets; mem_heap_t* heap; mem_heap_t* heap2; dtuple_t* row; dtuple_t* entry; ulint err; - + ibool comp; + ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) || mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_S_FIX)); @@ -313,10 +327,15 @@ row_vers_old_has_index_entry( clust_index = dict_table_get_first_index(index->table); - if (also_curr && !rec_get_deleted_flag(rec)) { + comp = index->table->comp; + ut_ad(comp == page_is_comp(buf_frame_align(rec))); + heap = mem_heap_create(1024); + clust_offsets = rec_get_offsets(rec, clust_index, + ULINT_UNDEFINED, heap); - heap = mem_heap_create(1024); - row = row_build(ROW_COPY_POINTERS, clust_index, rec, heap); + if (also_curr && !rec_get_deleted_flag(rec, comp)) { + row = row_build(ROW_COPY_POINTERS, clust_index, + rec, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); /* NOTE that we cannot do the comparison as binary @@ -331,24 +350,17 @@ row_vers_old_has_index_entry( return(TRUE); } - - mem_heap_free(heap); } version = rec; - heap = NULL; for (;;) { heap2 = heap; heap = mem_heap_create(1024); - err = trx_undo_prev_version_build(rec, mtr, version, - clust_index, heap, - &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + clust_index, clust_offsets, heap, + &prev_version); + mem_heap_free(heap2); /* free version and clust_offsets */ if (err != DB_SUCCESS || !prev_version) { /* Versions end here */ @@ -358,9 +370,12 @@ row_vers_old_has_index_entry( return(FALSE); } - if (!rec_get_deleted_flag(prev_version)) { + clust_offsets = rec_get_offsets(prev_version, clust_index, + ULINT_UNDEFINED, heap); + + if (!rec_get_deleted_flag(prev_version, comp)) { row = row_build(ROW_COPY_POINTERS, clust_index, - prev_version, heap); + prev_version, clust_offsets, heap); entry = row_build_index_entry(row, index, heap); /* NOTE that we cannot do the comparison as binary @@ -412,6 +427,7 @@ row_vers_build_for_consistent_read( mem_heap_t* heap2; byte* buf; ulint err; + ulint* offsets; ut_ad(index->type & DICT_CLUSTERED); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX) @@ -420,22 +436,23 @@ row_vers_build_for_consistent_read( #ifdef UNIV_SYNC_DEBUG ut_ad(!rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ - ut_ad(!read_view_sees_trx_id(view, row_get_rec_trx_id(rec, index))); + + heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + + ut_ad(!read_view_sees_trx_id(view, + row_get_rec_trx_id(rec, index, offsets))); rw_lock_s_lock(&(purge_sys->latch)); version = rec; - heap = NULL; for (;;) { heap2 = heap; heap = mem_heap_create(1024); err = trx_undo_prev_version_build(rec, mtr, version, index, - heap, &prev_version); - if (heap2) { - mem_heap_free(heap2); /* version was stored in heap2, - if heap2 != NULL */ - } + offsets, heap, &prev_version); + mem_heap_free(heap2); /* free version and offsets */ if (err != DB_SUCCESS) { break; @@ -449,16 +466,17 @@ row_vers_build_for_consistent_read( break; } - prev_trx_id = row_get_rec_trx_id(prev_version, index); + offsets = rec_get_offsets(prev_version, index, + ULINT_UNDEFINED, heap); + prev_trx_id = row_get_rec_trx_id(prev_version, index, offsets); if (read_view_sees_trx_id(view, prev_trx_id)) { /* The view already sees this version: we can copy it to in_heap and return */ - buf = mem_heap_alloc(in_heap, rec_get_size( - prev_version)); - *old_vers = rec_copy(buf, prev_version); + buf = mem_heap_alloc(in_heap, rec_offs_size(offsets)); + *old_vers = rec_copy(buf, prev_version, offsets); err = DB_SUCCESS; break; diff --git a/innobase/srv/srv0srv.c b/innobase/srv/srv0srv.c index ba102b6f4b9..40befae424e 100644 --- a/innobase/srv/srv0srv.c +++ b/innobase/srv/srv0srv.c @@ -44,6 +44,7 @@ Created 10/8/1995 Heikki Tuuri #include "buf0flu.h" #include "btr0sea.h" #include "dict0load.h" +#include "dict0boot.h" #include "srv0start.h" #include "row0mysql.h" @@ -845,6 +846,7 @@ srv_init(void) { srv_conc_slot_t* conc_slot; srv_slot_t* slot; + dict_table_t* table; ulint i; srv_sys = mem_alloc(sizeof(srv_sys_t)); @@ -894,6 +896,31 @@ srv_init(void) UT_LIST_INIT(srv_sys->tasks); + /* create dummy table and index for old-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY1", + DICT_HDR_SPACE, 1, FALSE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + + srv_sys->dummy_ind1 = dict_mem_index_create("SYS_DUMMY1", + "SYS_DUMMY1", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind1, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind1->table = table; + /* create dummy table and index for new-style infimum and supremum */ + table = dict_mem_table_create("SYS_DUMMY2", + DICT_HDR_SPACE, 1, TRUE); + dict_mem_table_add_col(table, "DUMMY", DATA_CHAR, + DATA_ENGLISH | DATA_NOT_NULL, 8, 0); + srv_sys->dummy_ind2 = dict_mem_index_create("SYS_DUMMY2", + "SYS_DUMMY2", DICT_HDR_SPACE, 0, 1); + dict_index_add_col(srv_sys->dummy_ind2, + dict_table_get_nth_col(table, 0), 0, 0); + srv_sys->dummy_ind2->table = table; + + /* avoid ut_ad(index->cached) in dict_index_get_n_unique_in_tree */ + srv_sys->dummy_ind1->cached = srv_sys->dummy_ind2->cached = TRUE; + /* Init the server concurrency restriction data structures */ os_fast_mutex_init(&srv_conc_mutex); diff --git a/innobase/trx/trx0rec.c b/innobase/trx/trx0rec.c index fe429d1cc62..484d4f62744 100644 --- a/innobase/trx/trx0rec.c +++ b/innobase/trx/trx0rec.c @@ -38,16 +38,18 @@ trx_undof_page_add_undo_rec_log( ulint new_free, /* in: end offset of the entry */ mtr_t* mtr) /* in: mtr */ { - byte* log_ptr; - ulint len; + byte* log_ptr; + const byte* log_end; + ulint len; - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + log_ptr = mlog_open(mtr, 11 + 13 + MLOG_BUF_MARGIN); if (log_ptr == NULL) { return; } + log_end = &log_ptr[11 + 13 + MLOG_BUF_MARGIN]; log_ptr = mlog_write_initial_log_record_fast(undo_page, MLOG_UNDO_INSERT, log_ptr, mtr); len = new_free - old_free - 4; @@ -55,14 +57,11 @@ trx_undof_page_add_undo_rec_log( mach_write_to_2(log_ptr, len); log_ptr += 2; - if (len < 256) { - ut_memcpy(log_ptr, undo_page + old_free + 2, len); - log_ptr += len; - } - - mlog_close(mtr, log_ptr); - - if (len >= MLOG_BUF_MARGIN) { + if (log_ptr + len <= log_end) { + memcpy(log_ptr, undo_page + old_free + 2, len); + mlog_close(mtr, log_ptr + len); + } else { + mlog_close(mtr, log_ptr); mlog_catenate_string(mtr, undo_page + old_free + 2, len); } } @@ -404,6 +403,7 @@ trx_undo_page_report_modify( delete marking is done */ rec_t* rec, /* in: clustered index record which has NOT yet been modified */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector which tells the columns to be updated; in the case of a delete, this should be set to NULL */ @@ -430,6 +430,7 @@ trx_undo_page_report_modify( ulint i; ut_a(index->type & DICT_CLUSTERED); + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mach_read_from_2(undo_page + TRX_UNDO_PAGE_HDR + TRX_UNDO_PAGE_TYPE) == TRX_UNDO_UPDATE); table = index->table; @@ -454,7 +455,7 @@ trx_undo_page_report_modify( /* Store first some general parameters to the undo log */ if (update) { - if (rec_get_deleted_flag(rec)) { + if (rec_get_deleted_flag(rec, table->comp)) { type_cmpl = TRX_UNDO_UPD_DEL_REC; } else { type_cmpl = TRX_UNDO_UPD_EXIST_REC; @@ -479,14 +480,15 @@ trx_undo_page_report_modify( /*----------------------------------------*/ /* Store the state of the info bits */ - bits = rec_get_info_bits(rec); + bits = rec_get_info_bits(rec, table->comp); mach_write_to_1(ptr, bits); ptr += 1; /* Store the values of the system columns */ - trx_id = dict_index_rec_get_sys_col(index, DATA_TRX_ID, rec); - - roll_ptr = dict_index_rec_get_sys_col(index, DATA_ROLL_PTR, rec); + trx_id = dict_index_rec_get_sys_col(index, offsets, + DATA_TRX_ID, rec); + roll_ptr = dict_index_rec_get_sys_col(index, offsets, + DATA_ROLL_PTR, rec); len = mach_dulint_write_compressed(ptr, trx_id); ptr += len; @@ -499,7 +501,7 @@ trx_undo_page_report_modify( for (i = 0; i < dict_index_get_n_unique(index); i++) { - field = rec_get_nth_field(rec, i, &flen); + field = rec_get_nth_field(rec, offsets, i, &flen); if (trx_undo_left(undo_page, ptr) < 4) { @@ -547,14 +549,14 @@ trx_undo_page_report_modify( ptr += len; /* Save the old value of field */ - field = rec_get_nth_field(rec, pos, &flen); + field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { return(0); } - if (rec_get_nth_field_extern_bit(rec, pos)) { + if (rec_offs_nth_extern(offsets, pos)) { /* If a field has external storage, we add to flen the flag */ @@ -631,7 +633,7 @@ trx_undo_page_report_modify( ptr += len; /* Save the old value of field */ - field = rec_get_nth_field(rec, pos, &flen); + field = rec_get_nth_field(rec, offsets, pos, &flen); if (trx_undo_left(undo_page, ptr) < 5) { @@ -1008,7 +1010,9 @@ trx_undo_report_row_operation( ibool is_insert; trx_rseg_t* rseg; mtr_t mtr; - + mem_heap_t* heap; + ulint* offsets = NULL; + ut_a(index->type & DICT_CLUSTERED); if (flags & BTR_NO_UNDO_LOG_FLAG) { @@ -1019,7 +1023,6 @@ trx_undo_report_row_operation( } ut_ad(thr); - ut_a(index->type & DICT_CLUSTERED); ut_ad((op_type != TRX_UNDO_INSERT_OP) || (clust_entry && !update && !rec)); @@ -1063,6 +1066,8 @@ trx_undo_report_row_operation( mtr_start(&mtr); + heap = mem_heap_create(100); + for (;;) { undo_page = buf_page_get_gen(undo->space, page_no, RW_X_LATCH, undo->guess_page, @@ -1079,9 +1084,10 @@ trx_undo_report_row_operation( index, clust_entry, &mtr); } else { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); offset = trx_undo_page_report_modify(undo_page, trx, - index, rec, update, - cmpl_info, &mtr); + index, rec, offsets, update, cmpl_info, &mtr); } if (offset == 0) { @@ -1123,7 +1129,7 @@ trx_undo_report_row_operation( mutex_exit(&(trx->undo_mutex)); mtr_commit(&mtr); - + mem_heap_free(heap); return(DB_OUT_OF_FILE_SPACE); } } @@ -1140,6 +1146,7 @@ trx_undo_report_row_operation( *roll_ptr = trx_undo_build_roll_ptr(is_insert, rseg->id, page_no, offset); + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1236,6 +1243,7 @@ trx_undo_prev_version_build( index_rec page and purge_view */ rec_t* rec, /* in: version of a clustered index record */ dict_index_t* index, /* in: clustered index */ + ulint* offsets,/* in: rec_get_offsets(rec, index) */ mem_heap_t* heap, /* in: memory heap from which the memory needed is allocated */ rec_t** old_vers)/* out, own: previous version, or NULL if @@ -1258,7 +1266,7 @@ trx_undo_prev_version_build( ibool dummy_extern; byte* buf; ulint err; - + ulint* index_offsets = NULL; #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(purge_sys->latch), RW_LOCK_SHARED)); #endif /* UNIV_SYNC_DEBUG */ @@ -1266,21 +1274,25 @@ trx_undo_prev_version_build( MTR_MEMO_PAGE_S_FIX) || mtr_memo_contains(index_mtr, buf_block_align(index_rec), MTR_MEMO_PAGE_X_FIX)); + ut_ad(rec_offs_validate(rec, index, offsets)); + if (!(index->type & DICT_CLUSTERED)) { fprintf(stderr, "InnoDB: Error: trying to access" " update undo rec for non-clustered index %s\n" "InnoDB: Submit a detailed bug report to" " http://bugs.mysql.com\n" "InnoDB: index record ", index->name); - rec_print(stderr, index_rec); + index_offsets = rec_get_offsets(index_rec, index, + ULINT_UNDEFINED, heap); + rec_print(stderr, index_rec, index_offsets); fputs("\n" "InnoDB: record version ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); return(DB_ERROR); } - roll_ptr = row_get_rec_roll_ptr(rec, index); + roll_ptr = row_get_rec_roll_ptr(rec, index, offsets); old_roll_ptr = roll_ptr; *old_vers = NULL; @@ -1292,7 +1304,7 @@ trx_undo_prev_version_build( return(DB_SUCCESS); } - rec_trx_id = row_get_rec_trx_id(rec, index); + rec_trx_id = row_get_rec_trx_id(rec, index, offsets); err = trx_undo_get_undo_rec(roll_ptr, rec_trx_id, &undo_rec, heap); @@ -1341,10 +1353,12 @@ trx_undo_prev_version_build( ut_print_buf(stderr, undo_rec, 150); fputs("\n" "InnoDB: index record ", stderr); - rec_print(stderr, index_rec); + index_offsets = rec_get_offsets(index_rec, index, + ULINT_UNDEFINED, heap); + rec_print(stderr, index_rec, index_offsets); fputs("\n" "InnoDB: record version ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); fprintf(stderr, "\n" "InnoDB: Record trx id %lu %lu, update rec trx id %lu %lu\n" "InnoDB: Roll ptr in rec %lu %lu, in update rec %lu %lu\n", @@ -1358,11 +1372,10 @@ trx_undo_prev_version_build( (ulong) ut_dulint_get_low(roll_ptr)); trx_purge_sys_print(); - return(DB_ERROR); } - if (row_upd_changes_field_size_or_external(rec, index, update)) { + if (row_upd_changes_field_size_or_external(index, offsets, update)) { ulint* ext_vect; ulint n_ext_vect; @@ -1372,27 +1385,28 @@ trx_undo_prev_version_build( those fields that update updates to become externally stored fields. Store the info to ext_vect: */ - ext_vect = mem_alloc(sizeof(ulint) * rec_get_n_fields(rec)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, + ext_vect = mem_alloc(sizeof(ulint) + * rec_offs_n_fields(offsets)); + n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); row_upd_index_replace_new_col_vals(entry, index, update, heap); - buf = mem_heap_alloc(heap, rec_get_converted_size(entry)); + buf = mem_heap_alloc(heap, + rec_get_converted_size(index, entry)); - *old_vers = rec_convert_dtuple_to_rec(buf, entry); + *old_vers = rec_convert_dtuple_to_rec(buf, index, entry); /* Now set the extern bits in the old version of the record */ - rec_set_field_extern_bits(*old_vers, ext_vect, n_ext_vect, - NULL); + rec_set_field_extern_bits(*old_vers, index, + ext_vect, n_ext_vect, NULL); mem_free(ext_vect); } else { - buf = mem_heap_alloc(heap, rec_get_size(rec)); - - *old_vers = rec_copy(buf, rec); - - row_upd_rec_in_place(*old_vers, update); + buf = mem_heap_alloc(heap, rec_offs_size(offsets)); + *old_vers = rec_copy(buf, rec, offsets); + rec_offs_make_valid(*old_vers, index, offsets); + row_upd_rec_in_place(*old_vers, offsets, update); } return(DB_SUCCESS); diff --git a/innobase/trx/trx0undo.c b/innobase/trx/trx0undo.c index b6c931f61fa..4bfa9c20a54 100644 --- a/innobase/trx/trx0undo.c +++ b/innobase/trx/trx0undo.c @@ -880,7 +880,6 @@ trx_undo_free_page( list */ ulint space, /* in: space */ ulint hdr_page_no, /* in: header page number */ - ulint hdr_offset, /* in: header offset */ ulint page_no, /* in: page number to free: must not be the header page */ mtr_t* mtr) /* in: mtr which does not have a latch to any @@ -893,7 +892,6 @@ trx_undo_free_page( trx_rsegf_t* rseg_header; ulint hist_size; - UT_NOT_USED(hdr_offset); ut_a(hdr_page_no != page_no); #ifdef UNIV_SYNC_DEBUG ut_ad(!mutex_own(&kernel_mutex)); @@ -950,8 +948,7 @@ trx_undo_free_page_in_rollback( #endif /* UNIV_SYNC_DEBUG */ last_page_no = trx_undo_free_page(undo->rseg, FALSE, undo->space, - undo->hdr_page_no, undo->hdr_offset, - page_no, mtr); + undo->hdr_page_no, page_no, mtr); undo->last_page_no = last_page_no; undo->size--; @@ -1119,7 +1116,7 @@ loop: trx_undo_empty_header_page(space, hdr_page_no, hdr_offset, &mtr); } else { - trx_undo_free_page(rseg, TRUE, space, hdr_page_no, hdr_offset, + trx_undo_free_page(rseg, TRUE, space, hdr_page_no, page_no, &mtr); } -- cgit v1.2.1