diff options
author | unknown <marko@hundin.mysql.fi> | 2004-12-02 19:45:07 +0200 |
---|---|---|
committer | unknown <marko@hundin.mysql.fi> | 2004-12-02 19:45:07 +0200 |
commit | d2c4b545405845900af52e033240040ee2ab83dd (patch) | |
tree | c3c716219a8f464ef096a6dd06835d4bfb627c8a /innobase/btr | |
parent | 4a9ef43a4961ee8795f143be4d390ab67bbf65d7 (diff) | |
download | mariadb-git-d2c4b545405845900af52e033240040ee2ab83dd.tar.gz |
Many files:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/btr/btr0btr.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/btr/btr0cur.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/btr/btr0pcur.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/btr/btr0sea.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/data/data0data.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/data/data0type.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/dict/dict0boot.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/dict/dict0crea.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/dict/dict0dict.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/dict/dict0load.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/dict/dict0mem.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/fil/fil0fil.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/fsp/fsp0fsp.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/ibuf/ibuf0ibuf.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0btr.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0btr.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0cur.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0cur.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0pcur.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/btr0sea.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/data0type.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/dict0dict.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/dict0dict.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/dict0mem.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/lock0lock.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/lock0lock.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/mtr0log.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/mtr0mtr.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/page0cur.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/page0cur.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/page0page.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/page0page.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/rem0cmp.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/rem0cmp.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/rem0rec.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/rem0rec.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0row.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0row.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0upd.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0upd.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0vers.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/row0vers.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/srv0srv.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/trx0rec.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/ut0byte.h:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/include/ut0byte.ic:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/lock/lock0lock.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/log/log0recv.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/mtr/mtr0log.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/page/page0cur.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/page/page0page.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/pars/pars0pars.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/rem/rem0cmp.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/rem/rem0rec.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0ins.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0mysql.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0purge.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0row.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0sel.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0umod.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0undo.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0upd.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/row/row0vers.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/srv/srv0srv.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/trx/trx0rec.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
innobase/trx/trx0undo.c:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
sql/ha_innodb.cc:
Implement more compact InnoDB record format.
Old format is available as CREATE TABLE ... ROW_FORMAT=DYNAMIC.
Diffstat (limited to 'innobase/btr')
-rw-r--r-- | innobase/btr/btr0btr.c | 474 | ||||
-rw-r--r-- | innobase/btr/btr0cur.c | 546 | ||||
-rw-r--r-- | innobase/btr/btr0pcur.c | 52 | ||||
-rw-r--r-- | innobase/btr/btr0sea.c | 166 |
4 files changed, 789 insertions, 449 deletions
diff --git a/innobase/btr/btr0btr.c b/innobase/btr/btr0btr.c index ae967e0525e..06602c856fa 100644 --- a/innobase/btr/btr0btr.c +++ b/innobase/btr/btr0btr.c @@ -86,15 +86,6 @@ btr_page_create( page_t* page, /* in: page to be created */ dict_tree_t* tree, /* in: index tree */ mtr_t* mtr); /* in: mtr */ -/****************************************************************** -Sets the child node file address in a node pointer. */ -UNIV_INLINE -void -btr_node_ptr_set_child_page_no( -/*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr); /* in: mtr */ /**************************************************************** Returns the upper level node pointer to a page. It is assumed that mtr holds an x-latch on the tree. */ @@ -128,7 +119,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple should be first */ - dtuple_t* tuple); /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap); /* in: temporary memory heap */ /****************************************************************** Gets the root node of a tree and x-latches it. */ @@ -143,11 +137,13 @@ btr_root_get( ulint space; ulint root_page_no; page_t* root; + ibool comp = UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp; space = dict_tree_get_space(tree); root_page_no = dict_tree_get_page(tree); root = btr_page_get(space, root_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(root) == comp); return(root); } @@ -194,6 +190,7 @@ btr_get_prev_user_rec( MTR_MEMO_PAGE_S_FIX)) || (mtr_memo_contains(mtr, buf_block_align(prev_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); prev_rec = page_rec_get_prev(page_get_supremum_rec(prev_page)); @@ -246,6 +243,7 @@ btr_get_next_user_rec( || (mtr_memo_contains(mtr, buf_block_align(next_page), MTR_MEMO_PAGE_X_FIX))); + ut_a(page_is_comp(next_page) == page_is_comp(page)); next_rec = page_rec_get_next(page_get_infimum_rec(next_page)); return(next_rec); @@ -267,7 +265,8 @@ btr_page_create( { ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - page_create(page, mtr); + page_create(page, mtr, + UT_LIST_GET_FIRST(tree->tree_indexes)->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; btr_page_set_index_id(page, tree->id, mtr); @@ -503,20 +502,21 @@ UNIV_INLINE void btr_node_ptr_set_child_page_no( /*===========================*/ - rec_t* rec, /* in: node pointer record */ - ulint page_no, /* in: child node address */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: node pointer record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint page_no,/* in: child node address */ + mtr_t* mtr) /* in: mtr */ { - ulint n_fields; byte* field; ulint len; + ut_ad(rec_offs_validate(rec, NULL, offsets)); ut_ad(0 < btr_page_get_level(buf_frame_align(rec), mtr)); - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || rec_get_node_ptr_flag(rec)); /* The child address is in the last field */ - field = rec_get_nth_field(rec, n_fields - 1, &len); + field = rec_get_nth_field(rec, offsets, + rec_offs_n_fields(offsets) - 1, &len); ut_ad(len == 4); @@ -529,16 +529,18 @@ static page_t* btr_node_ptr_get_child( /*===================*/ - /* out: child page, x-latched */ - rec_t* node_ptr, /* in: node pointer */ - mtr_t* mtr) /* in: mtr */ + /* out: child page, x-latched */ + rec_t* node_ptr,/* in: node pointer */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + mtr_t* mtr) /* in: mtr */ { ulint page_no; ulint space; page_t* page; - + + ut_ad(rec_offs_validate(node_ptr, NULL, offsets)); space = buf_frame_get_space_id(node_ptr); - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); page = btr_page_get(space, page_no, RW_X_LATCH, mtr); @@ -564,6 +566,8 @@ btr_page_get_father_for_rec( dtuple_t* tuple; btr_cur_t cursor; rec_t* node_ptr; + dict_index_t* index; + ulint* offsets; ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -576,18 +580,20 @@ btr_page_get_father_for_rec( tuple = dict_tree_build_node_ptr(tree, user_rec, 0, heap, btr_page_get_level(page, mtr)); + index = UT_LIST_GET_FIRST(tree->tree_indexes); /* In the following, we choose just any index from the tree as the first parameter for btr_cur_search_to_nth_level. */ - - btr_cur_search_to_nth_level(UT_LIST_GET_FIRST(tree->tree_indexes), + + btr_cur_search_to_nth_level(index, btr_page_get_level(page, mtr) + 1, tuple, PAGE_CUR_LE, BTR_CONT_MODIFY_TREE, &cursor, 0, mtr); node_ptr = btr_cur_get_rec(&cursor); + offsets = rec_get_offsets(node_ptr, index, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page)) { fputs("InnoDB: Dump of the child page:\n", stderr); buf_page_print(buf_frame_align(page)); @@ -595,17 +601,22 @@ btr_page_get_father_for_rec( buf_page_print(buf_frame_align(node_ptr)); fputs("InnoDB: Corruption of an index tree: table ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->table_name); + ut_print_name(stderr, NULL, index->table_name); fputs(", index ", stderr); - ut_print_name(stderr, NULL, - UT_LIST_GET_FIRST(tree->tree_indexes)->name); + ut_print_name(stderr, NULL, index->name); fprintf(stderr, ",\n" "InnoDB: father ptr page no %lu, child page no %lu\n", - (ulong) btr_node_ptr_get_child_page_no(node_ptr), + (ulong) + btr_node_ptr_get_child_page_no(node_ptr, offsets), (ulong) buf_frame_get_page_no(page)); - page_rec_print(page_rec_get_next(page_get_infimum_rec(page))); - page_rec_print(node_ptr); + offsets = rec_reget_offsets(page_rec_get_next( + page_get_infimum_rec(page)), index, + offsets, ULINT_UNDEFINED, heap); + page_rec_print(page_rec_get_next(page_get_infimum_rec(page)), + offsets); + offsets = rec_reget_offsets(node_ptr, index, offsets, + ULINT_UNDEFINED, heap); + page_rec_print(node_ptr, offsets); fputs( "InnoDB: You should dump + drop + reimport the table to fix the\n" @@ -614,7 +625,7 @@ btr_page_get_father_for_rec( "InnoDB: forcing recovery. Then dump + drop + reimport.\n", stderr); } - ut_a(btr_node_ptr_get_child_page_no(node_ptr) == + ut_a(btr_node_ptr_get_child_page_no(node_ptr, offsets) == buf_frame_get_page_no(page)); mem_heap_free(heap); @@ -649,6 +660,7 @@ btr_create( ulint type, /* in: type of the index */ ulint space, /* in: space where created */ dulint index_id,/* in: index id */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mini-transaction handle */ { ulint page_no; @@ -716,7 +728,7 @@ btr_create( } /* Create a new index page on the the allocated segment page */ - page = page_create(frame, mtr); + page = page_create(frame, mtr, comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Set the index id of the page */ @@ -821,12 +833,14 @@ static void btr_page_reorganize_low( /*====================*/ - ibool recovery,/* in: TRUE if called in recovery: locks should not - be updated, i.e., there cannot exist locks on the - page, and a hash index should not be dropped: it - cannot exist */ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + ibool recovery,/* in: TRUE if called in recovery: + locks should not be updated, i.e., + there cannot exist locks on the + page, and a hash index should not be + dropped: it cannot exist */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { page_t* new_page; ulint log_mode; @@ -841,7 +855,9 @@ btr_page_reorganize_low( max_ins_size1 = page_get_max_insert_size_after_reorganize(page, 1); /* Write the log record */ - mlog_write_initial_log_record(page, MLOG_PAGE_REORGANIZE, mtr); + mlog_open_and_write_index(mtr, page, index, index->table->comp + ? MLOG_COMP_PAGE_REORGANIZE + : MLOG_PAGE_REORGANIZE, 0); /* Turn logging off */ log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); @@ -858,14 +874,14 @@ btr_page_reorganize_low( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, index->table->comp); buf_block_align(page)->check_index_page_at_flush = TRUE; /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ page_copy_rec_list_end_no_locks(page, new_page, - page_get_infimum_rec(new_page), mtr); + page_get_infimum_rec(new_page), index, mtr); /* Copy max trx id to recreated page */ page_set_max_trx_id(page, page_get_max_trx_id(new_page)); @@ -901,10 +917,11 @@ Reorganizes an index page. */ void btr_page_reorganize( /*================*/ - page_t* page, /* in: page to be reorganized */ - mtr_t* mtr) /* in: mtr */ + page_t* page, /* in: page to be reorganized */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { - btr_page_reorganize_low(FALSE, page, mtr); + btr_page_reorganize_low(FALSE, page, index, mtr); } /*************************************************************** @@ -913,18 +930,20 @@ Parses a redo log record of reorganizing a page. */ byte* btr_parse_page_reorganize( /*======================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr __attribute__((unused)), /* in: buffer end */ - page_t* page, /* in: page or NULL */ - mtr_t* mtr) /* in: mtr or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr __attribute__((unused)), + /* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ { ut_ad(ptr && end_ptr); /* The record is empty, except for the record initial part */ if (page) { - btr_page_reorganize_low(TRUE, page, mtr); + btr_page_reorganize_low(TRUE, page, index, mtr); } return(ptr); @@ -946,7 +965,7 @@ btr_page_empty( /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(page, mtr); + page_create(page, mtr, page_is_comp(page)); buf_block_align(page)->check_index_page_at_flush = TRUE; } @@ -1011,7 +1030,7 @@ btr_root_raise_and_insert( /* Move the records from root to the new page */ page_move_rec_list_end(new_page, root, page_get_infimum_rec(root), - mtr); + cursor->index, mtr); /* If this is a pessimistic insert which is actually done to perform a pessimistic update then we have stored the lock information of the record to be inserted on the infimum of the @@ -1031,7 +1050,7 @@ btr_root_raise_and_insert( node_ptr = dict_tree_build_node_ptr(tree, rec, new_page_no, heap, level); /* Reorganize the root to get free space */ - btr_page_reorganize(root, mtr); + btr_page_reorganize(root, cursor->index, mtr); page_cursor = btr_cur_get_page_cur(cursor); @@ -1039,7 +1058,8 @@ btr_root_raise_and_insert( page_cur_set_before_first(root, page_cursor); - node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, mtr); + node_ptr_rec = page_cur_tuple_insert(page_cursor, node_ptr, + cursor->index, mtr); ut_ad(node_ptr_rec); @@ -1047,7 +1067,7 @@ btr_root_raise_and_insert( as there is no lower alphabetical limit to records in the leftmost node of a level: */ - btr_set_min_rec_mark(node_ptr_rec, mtr); + btr_set_min_rec_mark(node_ptr_rec, cursor->index->table->comp, mtr); /* Free the memory heap */ mem_heap_free(heap); @@ -1060,7 +1080,8 @@ btr_root_raise_and_insert( ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), new_page); /* Reposition the cursor to the child node */ - page_cur_search(new_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(new_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); /* Split the child and insert tuple */ return(btr_page_split_and_insert(cursor, tuple, mtr)); @@ -1190,11 +1211,13 @@ btr_page_get_sure_split_rec( rec_t* rec; rec_t* next_rec; ulint n; - + mem_heap_t* heap; + ulint* offsets; + page = btr_cur_get_page(cursor); - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1208,6 +1231,9 @@ btr_page_get_sure_split_rec( ins_rec = btr_cur_get_rec(cursor); rec = page_get_infimum_rec(page); + heap = mem_heap_create(100); + offsets = NULL; + /* We start to include records to the left half, and when the space reserved by them exceeds half of total_space, then if the included records fit on the left page, they will be put there @@ -1230,7 +1256,9 @@ btr_page_get_sure_split_rec( /* Include tuple */ incl_data += insert_size; } else { - incl_data += rec_get_size(rec); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, ULINT_UNDEFINED, heap); + incl_data += rec_offs_size(offsets); } n++; @@ -1252,11 +1280,12 @@ btr_page_get_sure_split_rec( next_rec = page_rec_get_next(rec); } if (next_rec != page_get_supremum_rec(page)) { - + mem_heap_free(heap); return(next_rec); } } + mem_heap_free(heap); return(rec); } } @@ -1275,7 +1304,10 @@ btr_page_insert_fits( rec_t* split_rec, /* in: suggestion for first record on upper half-page, or NULL if tuple to be inserted should be first */ - dtuple_t* tuple) /* in: tuple to insert */ + const ulint* offsets, /* in: rec_get_offsets( + split_rec, cursor->index) */ + dtuple_t* tuple, /* in: tuple to insert */ + mem_heap_t* heap) /* in: temporary memory heap */ { page_t* page; ulint insert_size; @@ -1284,11 +1316,19 @@ btr_page_insert_fits( ulint total_n_recs; rec_t* rec; rec_t* end_rec; + ulint* offs; page = btr_cur_get_page(cursor); - - insert_size = rec_get_converted_size(tuple); - free_space = page_get_free_space_of_empty(); + + ut_ad(!split_rec == !offsets); + ut_ad(!offsets + || cursor->index->table->comp == rec_offs_comp(offsets)); + ut_ad(!offsets + || rec_offs_validate(split_rec, cursor->index, offsets)); + ut_ad(page_is_comp(page) == cursor->index->table->comp); + + insert_size = rec_get_converted_size(cursor->index, tuple); + free_space = page_get_free_space_of_empty(cursor->index->table->comp); /* free_space is now the free space of a created new page */ @@ -1303,7 +1343,7 @@ btr_page_insert_fits( rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = page_rec_get_next(btr_cur_get_rec(cursor)); - } else if (cmp_dtuple_rec(tuple, split_rec) >= 0) { + } else if (cmp_dtuple_rec(tuple, split_rec, offsets) >= 0) { rec = page_rec_get_next(page_get_infimum_rec(page)); end_rec = split_rec; @@ -1321,11 +1361,16 @@ btr_page_insert_fits( return(TRUE); } + offs = NULL; + while (rec != end_rec) { /* In this loop we calculate the amount of reserved space after rec is removed from page. */ - total_data -= rec_get_size(rec); + offs = rec_reget_offsets(rec, cursor->index, offs, + ULINT_UNDEFINED, heap); + + total_data -= rec_offs_size(offs); total_n_recs--; if (total_data + page_dir_calc_reserved_space(total_n_recs) @@ -1411,6 +1456,10 @@ btr_attach_half_pages( MTR_MEMO_PAGE_X_FIX)); ut_ad(mtr_memo_contains(mtr, buf_block_align(new_page), MTR_MEMO_PAGE_X_FIX)); + ut_a(page_is_comp(page) == page_is_comp(new_page)); + + /* Create a memory heap where the data tuple is stored */ + heap = mem_heap_create(100); /* Based on split direction, decide upper and lower pages */ if (direction == FSP_DOWN) { @@ -1426,7 +1475,12 @@ btr_attach_half_pages( /* Replace the address of the old child node (= page) with the address of the new lower half */ - btr_node_ptr_set_child_page_no(node_ptr, lower_page_no, mtr); + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, + UT_LIST_GET_FIRST(tree->tree_indexes), + ULINT_UNDEFINED, heap), + lower_page_no, mtr); + mem_heap_empty(heap); } else { lower_page_no = buf_frame_get_page_no(page); upper_page_no = buf_frame_get_page_no(new_page); @@ -1434,9 +1488,6 @@ btr_attach_half_pages( upper_page = new_page; } - /* Create a memory heap where the data tuple is stored */ - heap = mem_heap_create(100); - /* Get the level of the split pages */ level = btr_page_get_level(page, mtr); @@ -1465,6 +1516,7 @@ btr_attach_half_pages( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, lower_page_no, mtr); } @@ -1472,6 +1524,7 @@ btr_attach_half_pages( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, upper_page_no, mtr); } @@ -1522,7 +1575,15 @@ btr_page_split_and_insert( ibool insert_will_fit; ulint n_iterations = 0; rec_t* rec; + mem_heap_t* heap; + ulint n_uniq; + ulint* offsets; + + heap = mem_heap_create(1024); + n_uniq = dict_index_get_n_unique_in_tree(cursor->index); func_start: + mem_heap_empty(heap); + offsets = NULL; tree = btr_cur_get_tree(cursor); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), @@ -1574,9 +1635,10 @@ func_start: first_rec = split_rec; move_limit = split_rec; } else { - buf = mem_alloc(rec_get_converted_size(tuple)); + buf = mem_alloc(rec_get_converted_size(cursor->index, tuple)); - first_rec = rec_convert_dtuple_to_rec(buf, tuple); + first_rec = rec_convert_dtuple_to_rec(buf, + cursor->index, tuple); move_limit = page_rec_get_next(btr_cur_get_rec(cursor)); } @@ -1593,7 +1655,16 @@ func_start: We can then move the records after releasing the tree latch, thus reducing the tree latch contention. */ - insert_will_fit = btr_page_insert_fits(cursor, split_rec, tuple); + if (split_rec) { + offsets = rec_reget_offsets(split_rec, cursor->index, + offsets, n_uniq, heap); + + insert_will_fit = btr_page_insert_fits(cursor, + split_rec, offsets, tuple, heap); + } else { + insert_will_fit = btr_page_insert_fits(cursor, + NULL, NULL, tuple, heap); + } if (insert_will_fit && (btr_page_get_level(page, mtr) == 0)) { @@ -1605,7 +1676,8 @@ func_start: if (direction == FSP_DOWN) { /* fputs("Split left\n", stderr); */ - page_move_rec_list_start(new_page, page, move_limit, mtr); + page_move_rec_list_start(new_page, page, move_limit, + cursor->index, mtr); left_page = new_page; right_page = page; @@ -1613,7 +1685,8 @@ func_start: } else { /* fputs("Split right\n", stderr); */ - page_move_rec_list_end(new_page, page, move_limit, mtr); + page_move_rec_list_end(new_page, page, move_limit, + cursor->index, mtr); left_page = page; right_page = new_page; @@ -1626,19 +1699,25 @@ func_start: if (split_rec == NULL) { insert_page = right_page; - } else if (cmp_dtuple_rec(tuple, first_rec) >= 0) { - - insert_page = right_page; } else { - insert_page = left_page; + offsets = rec_reget_offsets(first_rec, cursor->index, + offsets, n_uniq, heap); + + if (cmp_dtuple_rec(tuple, first_rec, offsets) >= 0) { + + insert_page = right_page; + } else { + insert_page = left_page; + } } /* 7. Reposition the cursor for insert and try insertion */ page_cursor = btr_cur_get_page_cur(cursor); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec != NULL) { /* Insert fit on the page: update the free bits for the @@ -1650,15 +1729,17 @@ func_start: /* fprintf(stderr, "Split and insert done %lu %lu\n", buf_frame_get_page_no(left_page), buf_frame_get_page_no(right_page)); */ + mem_heap_free(heap); return(rec); } /* 8. If insert did not fit, try page reorganization */ - btr_page_reorganize(insert_page, mtr); + btr_page_reorganize(insert_page, cursor->index, mtr); - page_cur_search(insert_page, tuple, PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + page_cur_search(insert_page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (rec == NULL) { /* The insert did not fit on the page: loop back to the @@ -1688,6 +1769,7 @@ func_start: ut_ad(page_validate(left_page, UT_LIST_GET_FIRST(tree->tree_indexes))); ut_ad(page_validate(right_page, UT_LIST_GET_FIRST(tree->tree_indexes))); + mem_heap_free(heap); return(rec); } @@ -1721,6 +1803,7 @@ btr_level_list_remove( if (prev_page_no != FIL_NULL) { prev_page = btr_page_get(space, prev_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(prev_page) == page_is_comp(page)); btr_page_set_next(prev_page, next_page_no, mtr); } @@ -1728,6 +1811,7 @@ btr_level_list_remove( if (next_page_no != FIL_NULL) { next_page = btr_page_get(space, next_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); btr_page_set_prev(next_page, prev_page_no, mtr); } @@ -1741,9 +1825,11 @@ void btr_set_min_rec_mark_log( /*=====================*/ rec_t* rec, /* in: record */ + ibool comp, /* TRUE=compact record format */ mtr_t* mtr) /* in: mtr */ { - mlog_write_initial_log_record(rec, MLOG_REC_MIN_MARK, mtr); + mlog_write_initial_log_record(rec, + comp ? MLOG_COMP_REC_MIN_MARK : MLOG_REC_MIN_MARK, mtr); /* Write rec offset as a 2-byte ulint */ mlog_catenate_ulint(mtr, rec - buf_frame_align(rec), MLOG_2BYTES); @@ -1759,6 +1845,7 @@ btr_parse_set_min_rec_mark( /* out: end of log record or NULL */ byte* ptr, /* in: buffer */ byte* end_ptr,/* in: buffer end */ + ibool comp, /* in: TRUE=compact page format */ page_t* page, /* in: page or NULL */ mtr_t* mtr) /* in: mtr or NULL */ { @@ -1772,7 +1859,7 @@ btr_parse_set_min_rec_mark( if (page) { rec = page + mach_read_from_2(ptr); - btr_set_min_rec_mark(rec, mtr); + btr_set_min_rec_mark(rec, comp, mtr); } return(ptr + 2); @@ -1785,15 +1872,16 @@ void btr_set_min_rec_mark( /*=================*/ rec_t* rec, /* in: record */ + ibool comp, /* in: TRUE=compact page format */ mtr_t* mtr) /* in: mtr */ { ulint info_bits; - info_bits = rec_get_info_bits(rec); + info_bits = rec_get_info_bits(rec, comp); - rec_set_info_bits(rec, info_bits | REC_INFO_MIN_REC_FLAG); + rec_set_info_bits(rec, comp, info_bits | REC_INFO_MIN_REC_FLAG); - btr_set_min_rec_mark_log(rec, mtr); + btr_set_min_rec_mark_log(rec, comp, mtr); } /***************************************************************** @@ -1842,18 +1930,19 @@ btr_lift_page_up( record from the page should be removed */ mtr_t* mtr) /* in: mtr */ { - rec_t* node_ptr; - page_t* father_page; - ulint page_level; - + page_t* father_page; + ulint page_level; + dict_index_t* index; + ut_ad(btr_page_get_prev(page, mtr) == FIL_NULL); ut_ad(btr_page_get_next(page, mtr) == FIL_NULL); ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); - father_page = buf_frame_align(node_ptr); + father_page = buf_frame_align( + btr_page_get_father_node_ptr(tree, page, mtr)); page_level = btr_page_get_level(page, mtr); + index = UT_LIST_GET_FIRST(tree->tree_indexes); btr_search_drop_page_hash_index(page); @@ -1862,7 +1951,7 @@ btr_lift_page_up( /* Move records to the father */ page_copy_rec_list_end(father_page, page, page_get_infimum_rec(page), - mtr); + index, mtr); lock_update_copy_and_discard(father_page, page); btr_page_set_level(father_page, page_level, mtr); @@ -1871,10 +1960,8 @@ btr_lift_page_up( btr_page_free(tree, page, mtr); /* We play safe and reset the free bits for the father */ - ibuf_reset_free_bits(UT_LIST_GET_FIRST(tree->tree_indexes), - father_page); - ut_ad(page_validate(father_page, - UT_LIST_GET_FIRST(tree->tree_indexes))); + ibuf_reset_free_bits(index, father_page); + ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(tree, father_page, mtr)); } @@ -1914,9 +2001,11 @@ btr_compress( ulint max_ins_size; ulint max_ins_size_reorg; ulint level; - + ibool comp = cursor->index->table->comp; + page = btr_cur_get_page(cursor); tree = btr_cur_get_tree(cursor); + ut_a(comp == page_is_comp(page)); ut_ad(mtr_memo_contains(mtr, dict_tree_get_lock(tree), MTR_MEMO_X_LOCK)); @@ -1932,7 +2021,9 @@ btr_compress( right_page_no); */ node_ptr = btr_page_get_father_node_ptr(tree, page, mtr); + ut_ad(!comp || rec_get_status(node_ptr) == REC_STATUS_NODE_PTR); father_page = buf_frame_align(node_ptr); + ut_a(comp == page_is_comp(father_page)); /* Decide the page to which we try to merge and which will inherit the locks */ @@ -1957,6 +2048,7 @@ btr_compress( n_recs = page_get_n_recs(page); data_size = page_get_data_size(page); + ut_a(page_is_comp(merge_page) == page_is_comp(page)); max_ins_size_reorg = page_get_max_insert_size_after_reorganize( merge_page, n_recs); @@ -1975,7 +2067,7 @@ btr_compress( /* We have to reorganize merge_page */ - btr_page_reorganize(merge_page, mtr); + btr_page_reorganize(merge_page, cursor->index, mtr); max_ins_size = page_get_max_insert_size(merge_page, n_recs); @@ -1999,11 +2091,14 @@ btr_compress( if (is_left) { btr_node_ptr_delete(tree, page, mtr); } else { + mem_heap_t* heap = mem_heap_create(100); /* Replace the address of the old child node (= page) with the address of the merge page to the right */ - btr_node_ptr_set_child_page_no(node_ptr, right_page_no, mtr); - + btr_node_ptr_set_child_page_no(node_ptr, + rec_get_offsets(node_ptr, cursor->index, + ULINT_UNDEFINED, heap), right_page_no, mtr); + mem_heap_free(heap); btr_node_ptr_delete(tree, merge_page, mtr); } @@ -2012,14 +2107,14 @@ btr_compress( orig_pred = page_rec_get_prev( page_get_supremum_rec(merge_page)); page_copy_rec_list_start(merge_page, page, - page_get_supremum_rec(page), mtr); + page_get_supremum_rec(page), cursor->index, mtr); lock_update_merge_left(merge_page, orig_pred, page); } else { orig_succ = page_rec_get_next( page_get_infimum_rec(merge_page)); page_copy_rec_list_end(merge_page, page, - page_get_infimum_rec(page), mtr); + page_get_infimum_rec(page), cursor->index, mtr); lock_update_merge_right(orig_succ, page); } @@ -2133,6 +2228,7 @@ btr_discard_page( return; } + ut_a(page_is_comp(merge_page) == page_is_comp(page)); btr_search_drop_page_hash_index(page); if (left_page_no == FIL_NULL && btr_page_get_level(page, mtr) > 0) { @@ -2144,7 +2240,8 @@ btr_discard_page( ut_ad(node_ptr != page_get_supremum_rec(merge_page)); - btr_set_min_rec_mark(node_ptr, mtr); + btr_set_min_rec_mark(node_ptr, + cursor->index->table->comp, mtr); } btr_node_ptr_delete(tree, page, mtr); @@ -2215,6 +2312,8 @@ btr_print_recursive( page_t* page, /* in: index page */ ulint width, /* in: print this many entries from start and end */ + mem_heap_t* heap, /* in: heap for rec_reget_offsets() */ + ulint** offsets,/* in/out: buffer for rec_reget_offsets() */ mtr_t* mtr) /* in: mtr */ { page_cur_t cursor; @@ -2223,14 +2322,16 @@ btr_print_recursive( mtr_t mtr2; rec_t* node_ptr; page_t* child; - + dict_index_t* index; + ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); fprintf(stderr, "NODE ON LEVEL %lu page number %lu\n", (ulong) btr_page_get_level(page, mtr), (ulong) buf_frame_get_page_no(page)); - page_print(page, width, width); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + page_print(page, index, width, width); n_recs = page_get_n_recs(page); @@ -2249,15 +2350,20 @@ btr_print_recursive( node_ptr = page_cur_get_rec(&cursor); - child = btr_node_ptr_get_child(node_ptr, &mtr2); - - btr_print_recursive(tree, child, width, &mtr2); + *offsets = rec_reget_offsets(node_ptr, index, + *offsets, ULINT_UNDEFINED, heap); + child = btr_node_ptr_get_child(node_ptr, + *offsets, &mtr2); + btr_print_recursive(tree, child, width, + heap, offsets, &mtr2); mtr_commit(&mtr2); } page_cur_move_to_next(&cursor); i++; } + + mem_heap_free(heap); } /****************************************************************** @@ -2270,8 +2376,10 @@ btr_print_tree( ulint width) /* in: print this many entries from start and end */ { - mtr_t mtr; - page_t* root; + mtr_t mtr; + page_t* root; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; fputs("--------------------------\n" "INDEX TREE PRINT\n", stderr); @@ -2280,7 +2388,8 @@ btr_print_tree( root = btr_root_get(tree, &mtr); - btr_print_recursive(tree, root, width, &mtr); + btr_print_recursive(tree, root, width, heap, &offsets, &mtr); + mem_heap_free(heap); mtr_commit(&mtr); @@ -2323,7 +2432,10 @@ btr_check_node_ptr( page_rec_get_next(page_get_infimum_rec(page)), 0, heap, btr_page_get_level(page, mtr)); - ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr) == 0); + ut_a(cmp_dtuple_rec(node_ptr_tuple, node_ptr, + rec_get_offsets(node_ptr, + dict_tree_find_index(tree, node_ptr), + ULINT_UNDEFINED, heap)) == 0); mem_heap_free(heap); @@ -2360,10 +2472,12 @@ btr_index_rec_validate( should print hex dump of record and page on error */ { - ulint len; - ulint n; - ulint i; - page_t* page; + ulint len; + ulint n; + ulint i; + page_t* page; + mem_heap_t* heap; + ulint* offsets; page = buf_frame_align(rec); @@ -2377,10 +2491,10 @@ btr_index_rec_validate( n = dict_index_get_n_fields(index); - if (rec_get_n_fields(rec) != n) { + if (!index->table->comp && rec_get_n_fields_old(rec) != n) { btr_index_rec_validate_report(page, rec, index); fprintf(stderr, "InnoDB: has %lu fields, should have %lu\n", - (ulong) rec_get_n_fields(rec), (ulong) n); + (ulong) rec_get_n_fields_old(rec), (ulong) n); if (!dump_on_error) { @@ -2390,16 +2504,19 @@ btr_index_rec_validate( buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print_old(stderr, rec); putc('\n', stderr); return(FALSE); } + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + for (i = 0; i < n; i++) { dtype_t* type = dict_index_get_nth_type(index, i); - rec_get_nth_field(rec, i, &len); + rec_get_nth_field(rec, offsets, i, &len); /* Note that prefix indexes are not fixed size even when their type is CHAR. */ @@ -2419,20 +2536,22 @@ btr_index_rec_validate( (ulong) i, (ulong) len, (ulong) dtype_get_fixed_size(type)); if (!dump_on_error) { - + mem_heap_free(heap); return(FALSE); } buf_page_print(page); fputs("InnoDB: corrupt record ", stderr); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); putc('\n', stderr); + mem_heap_free(heap); return(FALSE); } } + mem_heap_free(heap); return(TRUE); } @@ -2527,15 +2646,18 @@ btr_validate_level( page_t* right_father_page; rec_t* node_ptr; rec_t* right_node_ptr; + rec_t* rec; ulint right_page_no; ulint left_page_no; page_cur_t cursor; - mem_heap_t* heap; dtuple_t* node_ptr_tuple; ibool ret = TRUE; dict_index_t* index; mtr_t mtr; - + mem_heap_t* heap = mem_heap_create(256); + ulint* offsets = NULL; + ulint* offsets2= NULL; + mtr_start(&mtr); mtr_x_lock(dict_tree_get_lock(tree), &mtr); @@ -2544,6 +2666,8 @@ btr_validate_level( space = buf_frame_get_space_id(page); + index = UT_LIST_GET_FIRST(tree->tree_indexes); + while (level != btr_page_get_level(page, &mtr)) { ut_a(btr_page_get_level(page, &mtr) > 0); @@ -2552,14 +2676,16 @@ btr_validate_level( page_cur_move_to_next(&cursor); node_ptr = page_cur_get_rec(&cursor); - page = btr_node_ptr_get_child(node_ptr, &mtr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); + page = btr_node_ptr_get_child(node_ptr, offsets, &mtr); } - index = UT_LIST_GET_FIRST(tree->tree_indexes); - /* Now we are on the desired level. Loop through the pages on that level. */ loop: + mem_heap_empty(heap); + offsets = offsets2 = NULL; mtr_x_lock(dict_tree_get_lock(tree), &mtr); /* Check ordering etc. of records */ @@ -2588,12 +2714,20 @@ loop: (buf_frame_get_page_no(page) == dict_tree_get_page(tree)))); if (right_page_no != FIL_NULL) { - + rec_t* right_rec; right_page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); - if (cmp_rec_rec(page_rec_get_prev(page_get_supremum_rec(page)), - page_rec_get_next(page_get_infimum_rec(right_page)), - UT_LIST_GET_FIRST(tree->tree_indexes)) >= 0) { + ut_a(page_is_comp(right_page) == page_is_comp(page)); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + right_rec = page_rec_get_next( + page_get_infimum_rec(right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(right_rec, index, + offsets2, ULINT_UNDEFINED, heap); + if (cmp_rec_rec(rec, right_rec, offsets, offsets2, + dict_index_get_n_fields(index), + index) >= 0) { btr_validate_report2(index, level, page, right_page); @@ -2604,12 +2738,17 @@ loop: buf_page_print(right_page); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_prev( - page_get_supremum_rec(page))); + rec = page_rec_get_prev(page_get_supremum_rec(page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); fputs("InnoDB: record ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(right_page))); + rec = page_rec_get_next(page_get_infimum_rec( + right_page)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2618,7 +2757,8 @@ loop: if (level > 0 && left_page_no == FIL_NULL) { ut_a(REC_INFO_MIN_REC_FLAG & rec_get_info_bits( - page_rec_get_next(page_get_infimum_rec(page)))); + page_rec_get_next(page_get_infimum_rec(page)), + index->table->comp)); } if (buf_frame_get_page_no(page) != dict_tree_get_page(tree)) { @@ -2627,12 +2767,14 @@ loop: node_ptr = btr_page_get_father_node_ptr(tree, page, &mtr); father_page = buf_frame_align(node_ptr); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); - if (btr_node_ptr_get_child_page_no(node_ptr) != + if (btr_node_ptr_get_child_page_no(node_ptr, offsets) != buf_frame_get_page_no(page) || node_ptr != btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)) { + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr)) { btr_validate_report1(index, level, page); fputs("InnoDB: node pointer to the page is wrong\n", @@ -2642,17 +2784,20 @@ loop: buf_page_print(page); fputs("InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fprintf(stderr, "\n" "InnoDB: node ptr child page n:o %lu\n", - (unsigned long) btr_node_ptr_get_child_page_no(node_ptr)); + (unsigned long) btr_node_ptr_get_child_page_no( + node_ptr, offsets)); fputs("InnoDB: record on page ", stderr); - rec_print(stderr, - btr_page_get_father_for_rec(tree, page, - page_rec_get_prev(page_get_supremum_rec(page)), - &mtr)); + rec = btr_page_get_father_for_rec(tree, page, + page_rec_get_prev(page_get_supremum_rec(page)), + &mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, rec, offsets); putc('\n', stderr); ret = FALSE; @@ -2660,7 +2805,8 @@ loop: } if (btr_page_get_level(page, &mtr) > 0) { - heap = mem_heap_create(256); + offsets = rec_reget_offsets(node_ptr, index, + offsets, ULINT_UNDEFINED, heap); node_ptr_tuple = dict_tree_build_node_ptr( tree, @@ -2669,7 +2815,10 @@ loop: 0, heap, btr_page_get_level(page, &mtr)); - if (cmp_dtuple_rec(node_ptr_tuple, node_ptr) != 0) { + if (cmp_dtuple_rec(node_ptr_tuple, node_ptr, + offsets)) { + rec_t* first_rec = page_rec_get_next( + page_get_infimum_rec(page)); btr_validate_report1(index, level, page); @@ -2679,18 +2828,16 @@ loop: fputs("InnoDB: Error: node ptrs differ" " on levels > 0\n" "InnoDB: node ptr ", stderr); - rec_print(stderr, node_ptr); + rec_print(stderr, node_ptr, offsets); fputs("InnoDB: first rec ", stderr); - rec_print(stderr, page_rec_get_next( - page_get_infimum_rec(page))); + offsets = rec_reget_offsets(first_rec, index, + offsets, ULINT_UNDEFINED, heap); + rec_print(stderr, first_rec, offsets); putc('\n', stderr); ret = FALSE; - mem_heap_free(heap); goto node_ptr_fails; } - - mem_heap_free(heap); } if (left_page_no == FIL_NULL) { @@ -2701,7 +2848,7 @@ loop: if (right_page_no == FIL_NULL) { ut_a(node_ptr == page_rec_get_prev( - page_get_supremum_rec(father_page))); + page_get_supremum_rec(father_page))); ut_a(btr_page_get_next(father_page, &mtr) == FIL_NULL); } @@ -2771,13 +2918,16 @@ node_ptr_fails: mtr_commit(&mtr); if (right_page_no != FIL_NULL) { + ibool comp = page_is_comp(page); mtr_start(&mtr); page = btr_page_get(space, right_page_no, RW_X_LATCH, &mtr); + ut_a(page_is_comp(page) == comp); goto loop; } + mem_heap_free(heap); return(ret); } diff --git a/innobase/btr/btr0cur.c b/innobase/btr/btr0cur.c index 48de5644908..f5e146172ed 100644 --- a/innobase/btr/btr0cur.c +++ b/innobase/btr/btr0cur.c @@ -73,8 +73,9 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr); /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*********************************************************************** Adds path information to the cursor for the current page, for which the binary search has been performed. */ @@ -96,6 +97,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -108,9 +110,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec); /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets);/* in: array returned by rec_get_offsets() */ /*==================== B-TREE SEARCH =========================*/ @@ -137,11 +140,13 @@ btr_cur_latch_leaves( if (latch_mode == BTR_SEARCH_LEAF) { get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_LEAF) { get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_TREE) { @@ -152,11 +157,13 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { get_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; right_page_no = btr_page_get_next(page, mtr); @@ -176,11 +183,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_S_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else if (latch_mode == BTR_MODIFY_PREV) { @@ -191,11 +201,14 @@ btr_cur_latch_leaves( if (left_page_no != FIL_NULL) { cursor->left_page = btr_page_get(space, left_page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(cursor->left_page) == + page_is_comp(page)); buf_block_align( cursor->left_page)->check_index_page_at_flush = TRUE; } get_page = btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(get_page) == page_is_comp(page)); buf_block_align(get_page)->check_index_page_at_flush = TRUE; } else { ut_error; @@ -261,6 +274,8 @@ btr_cur_search_to_nth_level( #ifdef BTR_CUR_ADAPT btr_search_t* info; #endif + mem_heap_t* heap; + ulint* offsets; /* Currently, PAGE_CUR_LE is the only search mode used for searches ending to upper levels */ @@ -379,7 +394,9 @@ btr_cur_search_to_nth_level( page_mode = mode; break; } - + + heap = mem_heap_create(100); + offsets = NULL; /* Loop and search until we arrive at the desired level */ for (;;) { @@ -414,7 +431,7 @@ retry_page_get: cursor->thr)) { /* Insertion to the insert buffer succeeded */ cursor->flag = BTR_CUR_INSERT_TO_IBUF; - + mem_heap_free(heap); return; } @@ -470,9 +487,9 @@ retry_page_get: page_mode = mode; } - page_cur_search_with_match(page, tuple, page_mode, &up_match, - &up_bytes, &low_match, &low_bytes, - page_cursor); + page_cur_search_with_match(page, index, tuple, page_mode, + &up_match, &up_bytes, + &low_match, &low_bytes, page_cursor); if (estimate) { btr_cur_add_path_info(cursor, height, root_height); } @@ -486,7 +503,9 @@ retry_page_get: if (level > 0) { /* x-latch the page */ - btr_page_get(space, page_no, RW_X_LATCH, mtr); + ut_a(page_is_comp(btr_page_get(space, + page_no, RW_X_LATCH, mtr)) + == index->table->comp); } break; @@ -498,11 +517,14 @@ retry_page_get: guess = NULL; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + mem_heap_free(heap); + if (level == 0) { cursor->low_match = low_match; cursor->low_bytes = low_bytes; @@ -552,6 +574,8 @@ btr_cur_open_at_index_side( rec_t* node_ptr; ulint estimate; ulint savepoint; + mem_heap_t* heap; + ulint* offsets = NULL; estimate = latch_mode & BTR_ESTIMATE; latch_mode = latch_mode & ~BTR_ESTIMATE; @@ -576,7 +600,8 @@ btr_cur_open_at_index_side( page_no = dict_tree_get_page(tree); height = ULINT_UNDEFINED; - + heap = mem_heap_create(100); + for (;;) { page = buf_page_get_gen(space, page_no, RW_NO_LATCH, NULL, BUF_GET, @@ -645,10 +670,13 @@ btr_cur_open_at_index_side( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /************************************************************************** @@ -669,6 +697,8 @@ btr_cur_open_at_rnd_pos( ulint space; ulint height; rec_t* node_ptr; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; tree = index->tree; @@ -717,10 +747,13 @@ btr_cur_open_at_rnd_pos( height--; node_ptr = page_cur_get_rec(page_cursor); - + offsets = rec_reget_offsets(node_ptr, cursor->index, + offsets, ULINT_UNDEFINED, heap); /* Go to the child node */ - page_no = btr_node_ptr_get_child_page_no(node_ptr); + page_no = btr_node_ptr_get_child_page_no(node_ptr, offsets); } + + mem_heap_free(heap); } /*==================== B-TREE INSERT =========================*/ @@ -758,18 +791,20 @@ btr_cur_insert_if_possible( page_cursor = btr_cur_get_page_cur(cursor); /* Now, try the insert */ - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, cursor->index, mtr); if (!rec) { /* If record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, cursor->index, mtr); *reorg = TRUE; - page_cur_search(page, tuple, PAGE_CUR_LE, page_cursor); + page_cur_search(page, cursor->index, tuple, + PAGE_CUR_LE, page_cursor); - rec = page_cur_tuple_insert(page_cursor, tuple, mtr); + rec = page_cur_tuple_insert(page_cursor, tuple, + cursor->index, mtr); } return(rec); @@ -887,8 +922,6 @@ btr_cur_optimistic_insert( ibool reorg; ibool inherit; ulint rec_size; - ulint data_size; - ulint extra_size; ulint type; ulint err; @@ -914,13 +947,11 @@ btr_cur_optimistic_insert( calculate_sizes_again: /* Calculate the record size when entry is converted to a record */ - data_size = dtuple_get_data_size(entry); - extra_size = rec_get_converted_extra_size(data_size, - dtuple_get_n_fields(entry)); - rec_size = data_size + extra_size; + rec_size = rec_get_converted_size(index, entry); - if ((rec_size >= page_get_free_space_of_empty() / 2) - || (rec_size >= REC_MAX_DATA_SIZE)) { + if (rec_size >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -983,19 +1014,18 @@ calculate_sizes_again: /* Now, try the insert */ - *rec = page_cur_insert_rec_low(page_cursor, entry, data_size, - NULL, mtr); + *rec = page_cur_insert_rec_low(page_cursor, entry, index, NULL, mtr); if (!(*rec)) { /* If the record did not fit, reorganize */ - btr_page_reorganize(page, mtr); + btr_page_reorganize(page, index, mtr); ut_ad(page_get_max_insert_size(page, 1) == max_size); reorg = TRUE; - page_cur_search(page, entry, PAGE_CUR_LE, page_cursor); + page_cur_search(page, index, entry, PAGE_CUR_LE, page_cursor); - *rec = page_cur_tuple_insert(page_cursor, entry, mtr); + *rec = page_cur_tuple_insert(page_cursor, entry, index, mtr); if (!*rec) { fputs("InnoDB: Error: cannot insert tuple ", stderr); @@ -1123,9 +1153,9 @@ btr_cur_pessimistic_insert( } } - if ((rec_get_converted_size(entry) - >= page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(entry) >= REC_MAX_DATA_SIZE)) { + if (rec_get_converted_size(index, entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { /* The record is so big that we have to store some fields externally on separate database pages */ @@ -1212,8 +1242,11 @@ btr_cur_upd_lock_and_undo( err = DB_SUCCESS; if (!(flags & BTR_NO_LOCKING_FLAG)) { + mem_heap_t* heap = mem_heap_create(100); err = lock_clust_rec_modify_check_and_lock(flags, rec, index, - thr); + rec_get_offsets(rec, index, ULINT_UNDEFINED, heap), + thr); + mem_heap_free(heap); if (err != DB_SUCCESS) { return(err); @@ -1243,14 +1276,17 @@ btr_cur_update_in_place_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); - log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); - - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_UPDATE_IN_PLACE, log_ptr, mtr); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_UPDATE_IN_PLACE + : MLOG_REC_UPDATE_IN_PLACE, + 1 + DATA_ROLL_PTR_LEN + 14 + 2 + MLOG_BUF_MARGIN); - mach_write_to_1(log_ptr, flags); - log_ptr++; + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } /* The code below assumes index is a clustered index: change index to the clustered index if we are updating a secondary index record (or we @@ -1259,6 +1295,9 @@ btr_cur_update_in_place_log( index = dict_table_get_first_index(index->table); + mach_write_to_1(log_ptr, flags); + log_ptr++; + log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, mtr); mach_write_to_2(log_ptr, rec - buf_frame_align(rec)); @@ -1273,10 +1312,11 @@ Parses a redo log record of updating a record in-place. */ byte* btr_cur_parse_update_in_place( /*==========================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + dict_index_t* index) /* in: index corresponding to page */ { ulint flags; rec_t* rec; @@ -1286,6 +1326,7 @@ btr_cur_parse_update_in_place( dulint roll_ptr; ulint rec_offset; mem_heap_t* heap; + ulint* offsets; if (end_ptr < ptr + 1) { @@ -1333,11 +1374,14 @@ btr_cur_parse_update_in_place( /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, roll_ptr); + row_upd_rec_sys_fields_in_recovery(rec, offsets, + pos, trx_id, roll_ptr); } - row_upd_rec_in_place(rec, update); + row_upd_rec_in_place(rec, offsets, update); mem_heap_free(heap); @@ -1369,14 +1413,18 @@ btr_cur_update_in_place( dulint roll_ptr = ut_dulint_zero; trx_t* trx; ibool was_delete_marked; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; trx = thr_get_trx(thr); - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(trx, index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } /* Do lock checking and undo logging */ @@ -1384,6 +1432,7 @@ btr_cur_update_in_place( thr, &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -1405,15 +1454,15 @@ btr_cur_update_in_place( } if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } /* FIXME: in a mixed tree, all records may not have enough ordering fields for btr search: */ - was_delete_marked = rec_get_deleted_flag(rec); - - row_upd_rec_in_place(rec, update); + was_delete_marked = rec_get_deleted_flag(rec, index->table->comp); + + row_upd_rec_in_place(rec, offsets, update); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); @@ -1421,13 +1470,14 @@ btr_cur_update_in_place( btr_cur_update_in_place_log(flags, rec, index, update, trx, roll_ptr, mtr); - if (was_delete_marked && !rec_get_deleted_flag(rec)) { + if (was_delete_marked && !rec_get_deleted_flag(rec, index->table->comp)) { /* The new updated record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } + mem_heap_free(heap); return(DB_SUCCESS); } @@ -1469,24 +1519,28 @@ btr_cur_optimistic_update( mem_heap_t* heap; ibool reorganized = FALSE; ulint i; - + ulint* offsets; + page = btr_cur_get_page(cursor); rec = btr_cur_get_rec(cursor); index = cursor->index; + heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "update "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(mtr_memo_contains(mtr, buf_block_align(page), MTR_MEMO_PAGE_X_FIX)); - if (!row_upd_changes_field_size_or_external(rec, index, update)) { + if (!row_upd_changes_field_size_or_external(index, offsets, update)) { /* The simplest and the most common case: the update does not change the size of any field and none of the updated fields is externally stored in rec or update */ - + mem_heap_free(heap); return(btr_cur_update_in_place(flags, cursor, update, cmpl_info, thr, mtr)); } @@ -1497,29 +1551,30 @@ btr_cur_optimistic_update( /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } } - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { + if (rec_offs_any_extern(offsets)) { /* Externally stored fields are treated in pessimistic update */ + mem_heap_free(heap); return(DB_OVERFLOW); } page_cursor = btr_cur_get_page_cur(cursor); - heap = mem_heap_create(1024); - new_entry = row_rec_to_index_entry(ROW_COPY_DATA, index, rec, heap); row_upd_index_replace_new_col_vals_index_pos(new_entry, index, update, NULL); - old_rec_size = rec_get_size(rec); - new_rec_size = rec_get_converted_size(new_entry); + old_rec_size = rec_offs_size(offsets); + new_rec_size = rec_get_converted_size(index, new_entry); - if (new_rec_size >= page_get_free_space_of_empty() / 2) { + if (new_rec_size >= + page_get_free_space_of_empty(index->table->comp) / 2) { mem_heap_free(heap); @@ -1570,7 +1625,7 @@ btr_cur_optimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1587,11 +1642,13 @@ btr_cur_optimistic_update( ut_a(rec); /* <- We calculated above the insert would fit */ - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, index->table->comp)) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } /* Restore the old explicit lock state on the record */ @@ -1690,6 +1747,7 @@ btr_cur_pessimistic_update( ulint* ext_vect; ulint n_ext_vect; ulint reserve_flag; + ulint* offsets = NULL; *big_rec = NULL; @@ -1743,6 +1801,7 @@ btr_cur_pessimistic_update( } heap = mem_heap_create(1024); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); trx = thr_get_trx(thr); @@ -1767,28 +1826,29 @@ btr_cur_pessimistic_update( ut_a(big_rec_vec == NULL); - btr_rec_free_updated_extern_fields(index, rec, update, - TRUE, mtr); + btr_rec_free_updated_extern_fields(index, rec, offsets, + update, TRUE, mtr); } /* We have to set appropriate extern storage bits in the new record to be inserted: we have to remember which fields were such */ - ext_vect = mem_heap_alloc(heap, sizeof(ulint) * rec_get_n_fields(rec)); - n_ext_vect = btr_push_update_extern_fields(ext_vect, rec, update); - - if ((rec_get_converted_size(new_entry) >= - page_get_free_space_of_empty() / 2) - || (rec_get_converted_size(new_entry) >= REC_MAX_DATA_SIZE)) { + ext_vect = mem_heap_alloc(heap, sizeof(ulint) + * dict_index_get_n_fields(index)); + ut_ad(!cursor->index->table->comp || !rec_get_node_ptr_flag(rec)); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + n_ext_vect = btr_push_update_extern_fields(ext_vect, offsets, update); + + if (rec_get_converted_size(index, new_entry) >= + ut_min(page_get_free_space_of_empty(index->table->comp) / 2, + REC_MAX_DATA_SIZE)) { big_rec_vec = dtuple_convert_big_rec(index, new_entry, ext_vect, n_ext_vect); if (big_rec_vec == NULL) { - mem_heap_free(heap); - err = DB_TOO_BIG_RECORD; - goto return_after_reservations; } } @@ -1808,7 +1868,7 @@ btr_cur_pessimistic_update( btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(page_cursor, mtr); + page_cur_delete_rec(page_cursor, index, mtr); page_cur_move_to_prev(page_cursor); @@ -1817,21 +1877,22 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); + lock_rec_restore_from_page_infimum(rec, page); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, + ext_vect, n_ext_vect, mtr); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; - mem_heap_free(heap); - goto return_after_reservations; } @@ -1856,13 +1917,15 @@ btr_cur_pessimistic_update( ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - rec_set_field_extern_bits(rec, ext_vect, n_ext_vect, mtr); + rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); + offsets = rec_reget_offsets(rec, index, + offsets, ULINT_UNDEFINED, heap); - if (!rec_get_deleted_flag(rec)) { + if (!rec_get_deleted_flag(rec, rec_offs_comp(offsets))) { /* The new inserted record owns its possible externally stored fields */ - btr_cur_unmark_extern_fields(rec, mtr); + btr_cur_unmark_extern_fields(rec, mtr, offsets); } lock_rec_restore_from_page_infimum(rec, page); @@ -1876,9 +1939,8 @@ btr_cur_pessimistic_update( btr_cur_pess_upd_restore_supremum(rec, mtr); } - mem_heap_free(heap); - return_after_reservations: + mem_heap_free(heap); if (n_extents > 0) { fil_space_release_free_extents(cursor->index->space, @@ -1908,11 +1970,18 @@ btr_cur_del_mark_set_clust_rec_log( mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(flags < 256); + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_CLUST_DELETE_MARK + : MLOG_REC_CLUST_DELETE_MARK, + 1 + 1 + DATA_ROLL_PTR_LEN + 14 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_CLUST_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery */ + return; + } mach_write_to_1(log_ptr, flags); log_ptr++; @@ -1934,10 +2003,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_clust_rec( /*=================================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: index corresponding to page */ + page_t* page) /* in: page or NULL */ { ulint flags; ibool val; @@ -1978,15 +2048,19 @@ btr_cur_parse_del_mark_set_clust_rec( rec = page + offset; if (!(flags & BTR_KEEP_SYS_FLAG)) { - row_upd_rec_sys_fields_in_recovery(rec, pos, trx_id, - roll_ptr); + mem_heap_t* heap = mem_heap_create(100); + row_upd_rec_sys_fields_in_recovery(rec, + rec_get_offsets(rec, index, + ULINT_UNDEFINED, heap), + pos, trx_id, roll_ptr); + mem_heap_free(heap); } /* We do not need to reserve btr_search_latch, as the page is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2015,22 +2089,28 @@ btr_cur_del_mark_set_clust_rec( ulint err; rec_t* rec; trx_t* trx; + mem_heap_t* heap; + const ulint* offsets; rec = btr_cur_get_rec(cursor); index = cursor->index; - + heap = mem_heap_create(100); + offsets = rec_get_offsets(rec, index, ULINT_UNDEFINED, heap); + if (btr_cur_print_record_ops && thr) { btr_cur_trx_report(thr_get_trx(thr), index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, offsets); } ut_ad(index->type & DICT_CLUSTERED); - ut_ad(rec_get_deleted_flag(rec) == FALSE); + ut_ad(rec_get_deleted_flag(rec, index->table->comp) == FALSE); - err = lock_clust_rec_modify_check_and_lock(flags, rec, index, thr); + err = lock_clust_rec_modify_check_and_lock(flags, + rec, index, offsets, thr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2039,6 +2119,7 @@ btr_cur_del_mark_set_clust_rec( &roll_ptr); if (err != DB_SUCCESS) { + mem_heap_free(heap); return(err); } @@ -2048,13 +2129,12 @@ btr_cur_del_mark_set_clust_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); trx = thr_get_trx(thr); if (!(flags & BTR_KEEP_SYS_FLAG)) { - - row_upd_rec_sys_fields(rec, index, trx, roll_ptr); + row_upd_rec_sys_fields(rec, index, offsets, trx, roll_ptr); } if (block->is_hashed) { @@ -2063,6 +2143,7 @@ btr_cur_del_mark_set_clust_rec( btr_cur_del_mark_set_clust_rec_log(flags, rec, index, val, trx, roll_ptr, mtr); + mem_heap_free(heap); return(DB_SUCCESS); } @@ -2073,16 +2154,24 @@ UNIV_INLINE void btr_cur_del_mark_set_sec_rec_log( /*=============================*/ - rec_t* rec, /* in: record */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record */ + dict_index_t* index, /* in: record descriptor */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* log_ptr; + ut_ad(val <= 1); - log_ptr = mlog_open(mtr, 30); + log_ptr = mlog_open_and_write_index(mtr, rec, index, index->table->comp + ? MLOG_COMP_REC_SEC_DELETE_MARK + : MLOG_REC_SEC_DELETE_MARK, + 1 + 2); - log_ptr = mlog_write_initial_log_record_fast(rec, - MLOG_REC_SEC_DELETE_MARK, log_ptr, mtr); + if (!log_ptr) { + /* Logging in mtr is switched off during crash recovery: + in that case mlog_open returns NULL */ + return; + } mach_write_to_1(log_ptr, val); log_ptr++; @@ -2100,10 +2189,11 @@ index record. */ byte* btr_cur_parse_del_mark_set_sec_rec( /*===============================*/ - /* out: end of log record or NULL */ - byte* ptr, /* in: buffer */ - byte* end_ptr,/* in: buffer end */ - page_t* page) /* in: page or NULL */ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + dict_index_t* index, /* in: record descriptor */ + page_t* page) /* in: page or NULL */ { ibool val; ulint offset; @@ -2129,7 +2219,7 @@ btr_cur_parse_del_mark_set_sec_rec( is only being recovered, and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, index->table->comp, val); } return(ptr); @@ -2156,9 +2246,12 @@ btr_cur_del_mark_set_sec_rec( rec = btr_cur_get_rec(cursor); if (btr_cur_print_record_ops && thr) { + mem_heap_t* heap = mem_heap_create(100); btr_cur_trx_report(thr_get_trx(thr), cursor->index, "del mark "); - rec_print(stderr, rec); + rec_print(stderr, rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap)); + mem_heap_free(heap); } err = lock_sec_rec_modify_check_and_lock(flags, rec, cursor->index, @@ -2174,13 +2267,13 @@ btr_cur_del_mark_set_sec_rec( rw_lock_x_lock(&btr_search_latch); } - rec_set_deleted_flag(rec, val); + rec_set_deleted_flag(rec, cursor->index->table->comp, val); if (block->is_hashed) { rw_lock_x_unlock(&btr_search_latch); } - btr_cur_del_mark_set_sec_rec_log(rec, val, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, cursor->index, val, mtr); return(DB_SUCCESS); } @@ -2192,15 +2285,16 @@ used by the insert buffer insert merge mechanism. */ void btr_cur_del_unmark_for_ibuf( /*========================*/ - rec_t* rec, /* in: record to delete unmark */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record to delete unmark */ + dict_index_t* index, /* in: record descriptor */ + mtr_t* mtr) /* in: mtr */ { /* We do not need to reserve btr_search_latch, as the page has just been read to the buffer pool and there cannot be a hash index to it. */ - rec_set_deleted_flag(rec, FALSE); + rec_set_deleted_flag(rec, index->table->comp, FALSE); - btr_cur_del_mark_set_sec_rec_log(rec, FALSE, mtr); + btr_cur_del_mark_set_sec_rec_log(rec, index, FALSE, mtr); } /*==================== B-TREE RECORD REMOVE =========================*/ @@ -2279,8 +2373,11 @@ btr_cur_optimistic_delete( successor of the deleted record */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - ulint max_ins_size; + page_t* page; + ulint max_ins_size; + mem_heap_t* heap; + rec_t* rec; + const ulint* offsets; ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_page(cursor)), MTR_MEMO_PAGE_X_FIX)); @@ -2290,26 +2387,30 @@ btr_cur_optimistic_delete( ut_ad(btr_page_get_level(page, mtr) == 0); - if (rec_contains_externally_stored_field(btr_cur_get_rec(cursor))) { - - return(FALSE); - } + heap = mem_heap_create(100); + rec = btr_cur_get_rec(cursor); + offsets = rec_get_offsets(rec, cursor->index, ULINT_UNDEFINED, heap); - if (btr_cur_can_delete_without_compress(cursor, mtr)) { + if (!rec_offs_any_extern(offsets) + && btr_cur_can_delete_without_compress( + cursor, rec_offs_size(offsets), mtr)) { - lock_update_delete(btr_cur_get_rec(cursor)); + lock_update_delete(rec); btr_search_update_hash_on_delete(cursor); max_ins_size = page_get_max_insert_size_after_reorganize(page, 1); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), + cursor->index, mtr); ibuf_update_free_bits_low(cursor->index, page, max_ins_size, mtr); + mem_heap_free(heap); return(TRUE); } + mem_heap_free(heap); return(FALSE); } @@ -2375,8 +2476,20 @@ btr_cur_pessimistic_delete( } } - btr_rec_free_externally_stored_fields(cursor->index, - btr_cur_get_rec(cursor), in_rollback, mtr); + heap = mem_heap_create(256); + rec = btr_cur_get_rec(cursor); + + /* Free externally stored fields if the record is neither + a node pointer nor in two-byte format. + This avoids unnecessary calls to rec_get_offsets(). */ + if (cursor->index->table->comp + ? !rec_get_node_ptr_flag(rec) + : !rec_get_1byte_offs_flag(rec)) { + btr_rec_free_externally_stored_fields(cursor->index, + rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), + in_rollback, mtr); + } if ((page_get_n_recs(page) < 2) && (dict_tree_get_page(btr_cur_get_tree(cursor)) @@ -2393,8 +2506,6 @@ btr_cur_pessimistic_delete( goto return_after_reservations; } - rec = btr_cur_get_rec(cursor); - lock_update_delete(rec); if ((btr_page_get_level(page, mtr) > 0) @@ -2406,7 +2517,8 @@ btr_cur_pessimistic_delete( non-leaf level, we must mark the new leftmost node pointer as the predefined minimum record */ - btr_set_min_rec_mark(page_rec_get_next(rec), mtr); + btr_set_min_rec_mark(page_rec_get_next(rec), + cursor->index->table->comp, mtr); } else { /* Otherwise, if we delete the leftmost node pointer on a page, we have to change the father node pointer @@ -2415,8 +2527,6 @@ btr_cur_pessimistic_delete( btr_node_ptr_delete(tree, page, mtr); - heap = mem_heap_create(256); - node_ptr = dict_tree_build_node_ptr( tree, page_rec_get_next(rec), buf_frame_get_page_no(page), @@ -2425,20 +2535,19 @@ btr_cur_pessimistic_delete( btr_insert_on_non_leaf_level(tree, btr_page_get_level(page, mtr) + 1, node_ptr, mtr); - - mem_heap_free(heap); } } btr_search_update_hash_on_delete(cursor); - page_cur_delete_rec(btr_cur_get_page_cur(cursor), mtr); + page_cur_delete_rec(btr_cur_get_page_cur(cursor), cursor->index, mtr); ut_ad(btr_check_node_ptr(tree, page, mtr)); *err = DB_SUCCESS; return_after_reservations: + mem_heap_free(heap); if (ret == FALSE) { ret = btr_cur_compress_if_useful(cursor, mtr); @@ -2663,9 +2772,13 @@ btr_estimate_number_of_different_key_vals( ulint j; ulint add_on; mtr_t mtr; + mem_heap_t* heap; + ulint* offsets1 = 0; + ulint* offsets2 = 0; n_cols = dict_index_get_n_unique(index); + heap = mem_heap_create(100); n_diff = mem_alloc((n_cols + 1) * sizeof(ib_longlong)); for (j = 0; j <= n_cols; j++) { @@ -2697,11 +2810,17 @@ btr_estimate_number_of_different_key_vals( while (rec != page_get_supremum_rec(page) && page_rec_get_next(rec) != page_get_supremum_rec(page)) { + rec_t* next_rec = page_rec_get_next(rec); matched_fields = 0; matched_bytes = 0; - - cmp_rec_rec_with_match(rec, page_rec_get_next(rec), - index, &matched_fields, + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); + offsets2 = rec_reget_offsets(next_rec, index, + offsets2, n_cols, heap); + + cmp_rec_rec_with_match(rec, next_rec, + offsets1, offsets2, + index, n_cols, &matched_fields, &matched_bytes); for (j = matched_fields + 1; j <= n_cols; j++) { @@ -2712,7 +2831,8 @@ btr_estimate_number_of_different_key_vals( } total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len( + rec, offsets1); rec = page_rec_get_next(rec); } @@ -2736,8 +2856,11 @@ btr_estimate_number_of_different_key_vals( } } + offsets1 = rec_reget_offsets(rec, index, + offsets1, ULINT_UNDEFINED, heap); total_external_size += - btr_rec_get_externally_stored_len(rec); + btr_rec_get_externally_stored_len(rec, + offsets1); mtr_commit(&mtr); } @@ -2778,6 +2901,7 @@ btr_estimate_number_of_different_key_vals( } mem_free(n_diff); + mem_heap_free(heap); } /*================== EXTERNAL STORAGE OF BIG FIELDS ===================*/ @@ -2788,9 +2912,10 @@ static ulint btr_rec_get_externally_stored_len( /*==============================*/ - /* out: externally stored part, in units of a - database page */ - rec_t* rec) /* in: record */ + /* out: externally stored part, + in units of a database page */ + rec_t* rec, /* in: record */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n_fields; byte* data; @@ -2799,17 +2924,13 @@ btr_rec_get_externally_stored_len( ulint total_extern_len = 0; ulint i; - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return(0); - } - - n_fields = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); local_len -= BTR_EXTERN_FIELD_REF_SIZE; @@ -2830,16 +2951,17 @@ static void btr_cur_set_ownership_of_extern_field( /*==================================*/ - rec_t* rec, /* in: clustered index record */ - ulint i, /* in: field number */ - ibool val, /* in: value to set */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: clustered index record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + ulint i, /* in: field number */ + ibool val, /* in: value to set */ + mtr_t* mtr) /* in: mtr */ { byte* data; ulint local_len; ulint byte_val; - data = rec_get_nth_field(rec, i, &local_len); + data = rec_get_nth_field(rec, offsets, i, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); @@ -2866,19 +2988,22 @@ to free the field. */ void btr_cur_mark_extern_inherited_fields( /*=================================*/ - rec_t* rec, /* in: record in a clustered index */ - upd_t* update, /* in: update vector */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update, /* in: update vector */ + mtr_t* mtr) /* in: mtr */ { ibool is_updated; ulint n; ulint j; ulint i; - - n = rec_get_n_fields(rec); + + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -2894,8 +3019,8 @@ btr_cur_mark_extern_inherited_fields( } if (!is_updated) { - btr_cur_set_ownership_of_extern_field(rec, i, - FALSE, mtr); + btr_cur_set_ownership_of_extern_field(rec, + offsets, i, FALSE, mtr); } } } @@ -2967,18 +3092,20 @@ static void btr_cur_unmark_extern_fields( /*=========================*/ - rec_t* rec, /* in: record in a clustered index */ - mtr_t* mtr) /* in: mtr */ + rec_t* rec, /* in: record in a clustered index */ + mtr_t* mtr, /* in: mtr */ + const ulint* offsets)/* in: array returned by rec_get_offsets() */ { ulint n; ulint i; - n = rec_get_n_fields(rec); + ut_ad(!rec_offs_comp(offsets) || !rec_get_node_ptr_flag(rec)); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { - - btr_cur_set_ownership_of_extern_field(rec, i, + if (rec_offs_nth_extern(offsets, i)) { + + btr_cur_set_ownership_of_extern_field(rec, offsets, i, TRUE, mtr); } } @@ -3028,10 +3155,10 @@ ulint btr_push_update_extern_fields( /*==========================*/ /* out: number of values stored in ext_vect */ - ulint* ext_vect, /* in: array of ulints, must be preallocated + ulint* ext_vect,/* in: array of ulints, must be preallocated to have space for all fields in rec */ - rec_t* rec, /* in: record */ - upd_t* update) /* in: update vector or NULL */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ + upd_t* update) /* in: update vector or NULL */ { ulint n_pushed = 0; ibool is_updated; @@ -3054,10 +3181,10 @@ btr_push_update_extern_fields( } } - n = rec_get_n_fields(rec); + n = rec_offs_n_fields(offsets); for (i = 0; i < n; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { /* Check it is not in updated fields */ is_updated = FALSE; @@ -3119,6 +3246,7 @@ btr_store_big_rec_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets, /* in: rec_get_offsets(rec, index) */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ mtr_t* local_mtr __attribute__((unused))) /* in: mtr @@ -3139,6 +3267,7 @@ btr_store_big_rec_extern_fields( ulint i; mtr_t mtr; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_tree_get_lock(index->tree), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, buf_block_align(rec), @@ -3152,8 +3281,8 @@ btr_store_big_rec_extern_fields( for (i = 0; i < big_rec_vec->n_fields; i++) { - data = rec_get_nth_field(rec, big_rec_vec->fields[i].field_no, - &local_len); + data = rec_get_nth_field(rec, offsets, + big_rec_vec->fields[i].field_no, &local_len); ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE); local_len -= BTR_EXTERN_FIELD_REF_SIZE; extern_len = big_rec_vec->fields[i].len; @@ -3254,7 +3383,7 @@ btr_store_big_rec_extern_fields( /* Set the bit denoting that this field in rec is stored externally */ - rec_set_nth_field_extern_bit(rec, + rec_set_nth_field_extern_bit(rec, index, big_rec_vec->fields[i].field_no, TRUE, &mtr); } @@ -3407,6 +3536,7 @@ btr_rec_free_externally_stored_fields( dict_index_t* index, /* in: index of the data, the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free inherited fields */ @@ -3419,21 +3549,18 @@ btr_rec_free_externally_stored_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - - return; - } - /* Free possible externally stored fields in the record */ - n_fields = rec_get_n_fields(rec); + ut_ad(index->table->comp == rec_offs_comp(offsets)); + n_fields = rec_offs_n_fields(offsets); for (i = 0; i < n_fields; i++) { - if (rec_get_nth_field_extern_bit(rec, i)) { + if (rec_offs_nth_extern(offsets, i)) { - data = rec_get_nth_field(rec, i, &len); + data = rec_get_nth_field(rec, offsets, i, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3450,6 +3577,7 @@ btr_rec_free_updated_extern_fields( dict_index_t* index, /* in: index of rec; the index tree MUST be X-latched */ rec_t* rec, /* in: record */ + const ulint* offsets,/* in: rec_get_offsets(rec, index) */ upd_t* update, /* in: update vector */ ibool do_not_free_inherited,/* in: TRUE if called in a rollback and we do not want to free @@ -3463,13 +3591,10 @@ btr_rec_free_updated_extern_fields( ulint len; ulint i; + ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(mtr, buf_block_align(rec), MTR_MEMO_PAGE_X_FIX)); - if (rec_get_data_size(rec) <= REC_1BYTE_OFFS_LIMIT) { - return; - } - /* Free possible externally stored fields in the record */ n_fields = upd_get_n_fields(update); @@ -3477,9 +3602,10 @@ btr_rec_free_updated_extern_fields( for (i = 0; i < n_fields; i++) { ufield = upd_get_nth_field(update, i); - if (rec_get_nth_field_extern_bit(rec, ufield->field_no)) { + if (rec_offs_nth_extern(offsets, ufield->field_no)) { - data = rec_get_nth_field(rec, ufield->field_no, &len); + data = rec_get_nth_field(rec, offsets, + ufield->field_no, &len); btr_free_externally_stored_field(index, data, len, do_not_free_inherited, mtr); } @@ -3583,7 +3709,8 @@ byte* btr_rec_copy_externally_stored_field( /*=================================*/ /* out: the field copied to heap */ - rec_t* rec, /* in: record */ + rec_t* rec, /* in: record */ + const ulint* offsets,/* in: array returned by rec_get_offsets() */ ulint no, /* in: field number */ ulint* len, /* out: length of the field */ mem_heap_t* heap) /* in: mem heap */ @@ -3591,7 +3718,8 @@ btr_rec_copy_externally_stored_field( ulint local_len; byte* data; - ut_a(rec_get_nth_field_extern_bit(rec, no)); + ut_ad(rec_offs_validate(rec, NULL, offsets)); + ut_a(rec_offs_nth_extern(offsets, no)); /* An externally stored field can contain some initial data from the field, and in the last 20 bytes it has the @@ -3602,7 +3730,7 @@ btr_rec_copy_externally_stored_field( limit so that field offsets are stored in two bytes, and the extern bit is available in those two bytes. */ - data = rec_get_nth_field(rec, no, &local_len); + data = rec_get_nth_field(rec, offsets, no, &local_len); return(btr_copy_externally_stored_field(len, data, local_len, heap)); } diff --git a/innobase/btr/btr0pcur.c b/innobase/btr/btr0pcur.c index cf8a612ef28..7df8e53cd07 100644 --- a/innobase/btr/btr0pcur.c +++ b/innobase/btr/btr0pcur.c @@ -45,12 +45,12 @@ btr_pcur_free_for_mysql( mem_free(cursor->old_rec_buf); - cursor->old_rec = NULL; cursor->old_rec_buf = NULL; } cursor->btr_cur.page_cur.rec = NULL; cursor->old_rec = NULL; + cursor->old_n_fields = 0; cursor->old_stored = BTR_PCUR_OLD_NOT_STORED; cursor->latch_mode = BTR_NO_LATCHES; @@ -133,9 +133,10 @@ btr_pcur_store_position( cursor->old_stored = BTR_PCUR_OLD_STORED; cursor->old_rec = dict_tree_copy_rec_order_prefix(tree, rec, - &(cursor->old_rec_buf), - &(cursor->buf_size)); - + &cursor->old_n_fields, + &cursor->old_rec_buf, + &cursor->buf_size); + cursor->block_when_stored = buf_block_align(page); cursor->modify_clock = buf_frame_get_modify_clock(page); } @@ -166,6 +167,8 @@ btr_pcur_copy_stored_position( pcur_receive->old_rec = pcur_receive->old_rec_buf + (pcur_donate->old_rec - pcur_donate->old_rec_buf); } + + pcur_receive->old_n_fields = pcur_donate->old_n_fields; } /****************************************************************** @@ -228,6 +231,7 @@ btr_pcur_restore_position( } ut_a(cursor->old_rec); + ut_a(cursor->old_n_fields); page = btr_cur_get_page(btr_pcur_get_btr_cur(cursor)); @@ -242,17 +246,32 @@ btr_pcur_restore_position( buf_page_dbg_add_level(page, SYNC_TREE_NODE); #endif /* UNIV_SYNC_DEBUG */ if (cursor->rel_pos == BTR_PCUR_ON) { - +#ifdef UNIV_DEBUG + rec_t* rec; + ulint* offsets1; + ulint* offsets2; + dict_index_t* index; +#endif /* UNIV_DEBUG */ cursor->latch_mode = latch_mode; - - ut_ad(cmp_rec_rec(cursor->old_rec, - btr_pcur_get_rec(cursor), - dict_tree_find_index( - btr_cur_get_tree( +#ifdef UNIV_DEBUG + rec = btr_pcur_get_rec(cursor); + index = dict_tree_find_index( + btr_cur_get_tree( btr_pcur_get_btr_cur(cursor)), - btr_pcur_get_rec(cursor))) - == 0); + rec); + + heap = mem_heap_create(256); + offsets1 = rec_get_offsets(cursor->old_rec, + index, ULINT_UNDEFINED, heap); + offsets2 = rec_get_offsets(rec, + index, ULINT_UNDEFINED, heap); + ut_ad(cmp_rec_rec(cursor->old_rec, + rec, offsets1, offsets2, + cursor->old_n_fields, + index) == 0); + mem_heap_free(heap); +#endif /* UNIV_DEBUG */ return(TRUE); } @@ -265,7 +284,8 @@ btr_pcur_restore_position( heap = mem_heap_create(256); tree = btr_cur_get_tree(btr_pcur_get_btr_cur(cursor)); - tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, heap); + tuple = dict_tree_build_data_tuple(tree, cursor->old_rec, + cursor->old_n_fields, heap); /* Save the old search mode of the cursor */ old_mode = cursor->search_mode; @@ -287,7 +307,10 @@ btr_pcur_restore_position( if (cursor->rel_pos == BTR_PCUR_ON && btr_pcur_is_on_user_rec(cursor, mtr) - && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor))) { + && 0 == cmp_dtuple_rec(tuple, btr_pcur_get_rec(cursor), + rec_get_offsets(btr_pcur_get_rec(cursor), + btr_pcur_get_btr_cur(cursor)->index, + ULINT_UNDEFINED, heap))) { /* We have to store the NEW value for the modify clock, since the cursor can now be on a different page! But we can retain @@ -376,6 +399,7 @@ btr_pcur_move_to_next_page( ut_ad(next_page_no != FIL_NULL); next_page = btr_page_get(space, next_page_no, cursor->latch_mode, mtr); + ut_a(page_is_comp(next_page) == page_is_comp(page)); buf_block_align(next_page)->check_index_page_at_flush = TRUE; btr_leaf_page_release(page, cursor->latch_mode, mtr); diff --git a/innobase/btr/btr0sea.c b/innobase/btr/btr0sea.c index ad74f9704da..40ccf56492f 100644 --- a/innobase/btr/btr0sea.c +++ b/innobase/btr/btr0sea.c @@ -416,7 +416,7 @@ btr_search_update_hash_ref( && (block->curr_n_fields == info->n_fields) && (block->curr_n_bytes == info->n_bytes) && (block->curr_side == info->side)) { - + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); if (!page_rec_is_user_rec(rec)) { @@ -425,10 +425,11 @@ btr_search_update_hash_ref( } tree_id = ((cursor->index)->tree)->id; - - fold = rec_fold(rec, block->curr_n_fields, - block->curr_n_bytes, tree_id); - + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EX)); #endif /* UNIV_SYNC_DEBUG */ @@ -535,15 +536,17 @@ btr_search_check_guess( or PAGE_CUR_GE */ mtr_t* mtr) /* in: mtr */ { - page_t* page; - rec_t* rec; - rec_t* prev_rec; - rec_t* next_rec; - ulint n_unique; - ulint match; - ulint bytes; - int cmp; - + page_t* page; + rec_t* rec; + rec_t* prev_rec; + rec_t* next_rec; + ulint n_unique; + ulint match; + ulint bytes; + int cmp; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; + n_unique = dict_index_get_n_unique_in_tree(cursor->index); rec = btr_cur_get_rec(cursor); @@ -554,23 +557,25 @@ btr_search_check_guess( match = 0; bytes = 0; - cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &match, &bytes); + offsets = rec_get_offsets(rec, cursor->index, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, + offsets, &match, &bytes); if (mode == PAGE_CUR_GE) { if (cmp == 1) { - + mem_heap_free(heap); return(FALSE); } cursor->up_match = match; if (match >= n_unique) { - + mem_heap_free(heap); return(TRUE); } } else if (mode == PAGE_CUR_LE) { if (cmp == -1) { - + mem_heap_free(heap); return(FALSE); } @@ -578,12 +583,12 @@ btr_search_check_guess( } else if (mode == PAGE_CUR_G) { if (cmp != -1) { - + mem_heap_free(heap); return(FALSE); } } else if (mode == PAGE_CUR_L) { if (cmp != 1) { - + mem_heap_free(heap); return(FALSE); } } @@ -591,7 +596,7 @@ btr_search_check_guess( if (can_only_compare_to_cursor_rec) { /* Since we could not determine if our guess is right just by looking at the record under the cursor, return FALSE */ - + mem_heap_free(heap); return(FALSE); } @@ -605,17 +610,15 @@ btr_search_check_guess( prev_rec = page_rec_get_prev(rec); if (prev_rec == page_get_infimum_rec(page)) { - - if (btr_page_get_prev(page, mtr) != FIL_NULL) { - - return(FALSE); - } - - return(TRUE); + mem_heap_free(heap); + return(btr_page_get_prev(page, mtr) == FIL_NULL); } + offsets = rec_reget_offsets(prev_rec, cursor->index, + offsets, n_unique, heap); cmp = page_cmp_dtuple_rec_with_match(tuple, prev_rec, - &match, &bytes); + offsets, &match, &bytes); + mem_heap_free(heap); if (mode == PAGE_CUR_GE) { if (cmp != 1) { @@ -636,6 +639,7 @@ btr_search_check_guess( next_rec = page_rec_get_next(rec); if (next_rec == page_get_supremum_rec(page)) { + mem_heap_free(heap); if (btr_page_get_next(page, mtr) == FIL_NULL) { @@ -647,8 +651,12 @@ btr_search_check_guess( return(FALSE); } - cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &match, &bytes); - + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_unique, heap); + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, + offsets, &match, &bytes); + mem_heap_free(heap); + if (mode == PAGE_CUR_LE) { if (cmp != -1) { @@ -1003,8 +1011,7 @@ static void btr_search_build_page_hash_index( /*=============================*/ - dict_index_t* index, /* in: index for which to build, or NULL if - not known */ + dict_index_t* index, /* in: index for which to build */ page_t* page, /* in: index page, s- or x-latched */ ulint n_fields,/* in: hash this many full fields */ ulint n_bytes,/* in: hash this many bytes from the next @@ -1024,7 +1031,11 @@ btr_search_build_page_hash_index( ulint* folds; rec_t** recs; ulint i; - + mem_heap_t* heap; + ulint* offsets; + + ut_ad(index); + block = buf_block_align(page); table = btr_search_sys->hash_index; @@ -1061,9 +1072,9 @@ btr_search_build_page_hash_index( return; } - if (index && (dict_index_get_n_unique_in_tree(index) < n_fields + if (dict_index_get_n_unique_in_tree(index) < n_fields || (dict_index_get_n_unique_in_tree(index) == n_fields - && n_bytes > 0))) { + && n_bytes > 0)) { return; } @@ -1072,6 +1083,7 @@ btr_search_build_page_hash_index( folds = mem_alloc(n_recs * sizeof(ulint)); recs = mem_alloc(n_recs * sizeof(rec_t*)); + heap = mem_heap_create(100); n_cached = 0; @@ -1082,18 +1094,19 @@ btr_search_build_page_hash_index( rec = page_get_infimum_rec(page); rec = page_rec_get_next(rec); + offsets = rec_get_offsets(rec, index, n_fields + (n_bytes > 0), heap); + if (rec != sup) { - ut_a(n_fields <= rec_get_n_fields(rec)); + ut_a(n_fields <= rec_offs_n_fields(offsets)); if (n_bytes > 0) { - ut_a(n_fields < rec_get_n_fields(rec)); + ut_a(n_fields < rec_offs_n_fields(offsets)); } } /* FIXME: in a mixed tree, all records may not have enough ordering fields: */ - - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1117,7 +1130,10 @@ btr_search_build_page_hash_index( break; } - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); if (fold != next_fold) { /* Insert an entry into the hash index */ @@ -1145,13 +1161,7 @@ btr_search_build_page_hash_index( if (block->is_hashed && ((block->curr_n_fields != n_fields) || (block->curr_n_bytes != n_bytes) || (block->curr_side != side))) { - - rw_lock_x_unlock(&btr_search_latch); - - mem_free(folds); - mem_free(recs); - - return; + goto exit_func; } block->is_hashed = TRUE; @@ -1166,10 +1176,12 @@ btr_search_build_page_hash_index( ha_insert_for_fold(table, folds[i], recs[i]); } +exit_func: rw_lock_x_unlock(&btr_search_latch); mem_free(folds); mem_free(recs); + mem_heap_free(heap); } /************************************************************************ @@ -1181,10 +1193,13 @@ parameters as page (this often happens when a page is split). */ void btr_search_move_or_delete_hash_entries( /*===================================*/ - page_t* new_page, /* in: records are copied to this page */ - page_t* page) /* in: index page from which records were - copied, and the copied records will be deleted - from this page */ + page_t* new_page, /* in: records are copied + to this page */ + page_t* page, /* in: index page from which + records were copied, and the + copied records will be deleted + from this page */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; buf_block_t* new_block; @@ -1194,6 +1209,7 @@ btr_search_move_or_delete_hash_entries( block = buf_block_align(page); new_block = buf_block_align(new_page); + ut_a(page_is_comp(page) == page_is_comp(new_page)); #ifdef UNIV_SYNC_DEBUG ut_ad(rw_lock_own(&(block->lock), RW_LOCK_EX)); @@ -1224,8 +1240,8 @@ btr_search_move_or_delete_hash_entries( rw_lock_s_unlock(&btr_search_latch); ut_a(n_fields + n_bytes > 0); - - btr_search_build_page_hash_index(NULL, new_page, n_fields, + + btr_search_build_page_hash_index(index, new_page, n_fields, n_bytes, side); ut_a(n_fields == block->curr_n_fields); ut_a(n_bytes == block->curr_n_bytes); @@ -1253,6 +1269,7 @@ btr_search_update_hash_on_delete( ulint fold; dulint tree_id; ibool found; + mem_heap_t* heap; rec = btr_cur_get_rec(cursor); @@ -1272,9 +1289,11 @@ btr_search_update_hash_on_delete( table = btr_search_sys->hash_index; tree_id = cursor->index->tree->id; - - fold = rec_fold(rec, block->curr_n_fields, block->curr_n_bytes, - tree_id); + heap = mem_heap_create(100); + fold = rec_fold(rec, rec_get_offsets(rec, cursor->index, + ULINT_UNDEFINED, heap), block->curr_n_fields, + block->curr_n_bytes, tree_id); + mem_heap_free(heap); rw_lock_x_lock(&btr_search_latch); found = ha_search_and_delete_if_found(table, fold, rec); @@ -1355,6 +1374,8 @@ btr_search_update_hash_on_insert( ulint n_bytes; ulint side; ibool locked = FALSE; + mem_heap_t* heap; + ulint* offsets; table = btr_search_sys->hash_index; @@ -1383,15 +1404,22 @@ btr_search_update_hash_on_insert( next_rec = page_rec_get_next(ins_rec); page = buf_frame_align(rec); - - ins_fold = rec_fold(ins_rec, n_fields, n_bytes, tree_id); + heap = mem_heap_create(100); + offsets = rec_get_offsets(ins_rec, cursor->index, + ULINT_UNDEFINED, heap); + ins_fold = rec_fold(ins_rec, offsets, n_fields, n_bytes, tree_id); if (next_rec != page_get_supremum_rec(page)) { - next_fold = rec_fold(next_rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(next_rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + next_fold = rec_fold(next_rec, offsets, n_fields, + n_bytes, tree_id); } if (rec != page_get_infimum_rec(page)) { - fold = rec_fold(rec, n_fields, n_bytes, tree_id); + offsets = rec_reget_offsets(rec, cursor->index, + offsets, n_fields + (n_bytes > 0), heap); + fold = rec_fold(rec, offsets, n_fields, n_bytes, tree_id); } else { if (side == BTR_SEARCH_LEFT_SIDE) { @@ -1461,6 +1489,7 @@ check_next_rec: } function_exit: + mem_heap_free(heap); if (locked) { rw_lock_x_unlock(&btr_search_latch); } @@ -1470,9 +1499,10 @@ function_exit: Validates the search system. */ ibool -btr_search_validate(void) -/*=====================*/ +btr_search_validate( +/*================*/ /* out: TRUE if ok */ + dict_index_t* index) /* in: record descriptor */ { buf_block_t* block; page_t* page; @@ -1480,6 +1510,8 @@ btr_search_validate(void) ulint n_page_dumps = 0; ibool ok = TRUE; ulint i; + mem_heap_t* heap = mem_heap_create(100); + ulint* offsets = NULL; rw_lock_x_lock(&btr_search_latch); @@ -1489,9 +1521,13 @@ btr_search_validate(void) while (node != NULL) { block = buf_block_align(node->data); page = buf_frame_align(node->data); + offsets = rec_reget_offsets((rec_t*) node->data, index, + offsets, block->curr_n_fields + + (block->curr_n_bytes > 0), heap); if (!block->is_hashed || node->fold != rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))) { @@ -1507,12 +1543,13 @@ btr_search_validate(void) (ulong) ut_dulint_get_low(btr_page_get_index_id(page)), (ulong) node->fold, (ulong) rec_fold((rec_t*)(node->data), + offsets, block->curr_n_fields, block->curr_n_bytes, btr_page_get_index_id(page))); fputs("InnoDB: Record ", stderr); - rec_print(stderr, (rec_t*)(node->data)); + rec_print(stderr, (rec_t*)node->data, offsets); fprintf(stderr, "\nInnoDB: on that page." "Page mem address %p, is hashed %lu, n fields %lu, n bytes %lu\n" "side %lu\n", @@ -1536,6 +1573,7 @@ btr_search_validate(void) } rw_lock_x_unlock(&btr_search_latch); + mem_heap_free(heap); return(ok); } |