diff options
Diffstat (limited to 'innobase/page')
-rw-r--r-- | innobase/page/Makefile.am | 24 | ||||
-rw-r--r-- | innobase/page/makefilewin | 12 | ||||
-rw-r--r-- | innobase/page/page0cur.c | 1110 | ||||
-rw-r--r-- | innobase/page/page0page.c | 1371 | ||||
-rw-r--r-- | innobase/page/ts/makefile | 16 | ||||
-rw-r--r-- | innobase/page/ts/tspage.c | 705 |
6 files changed, 3238 insertions, 0 deletions
diff --git a/innobase/page/Makefile.am b/innobase/page/Makefile.am new file mode 100644 index 00000000000..85fe585a633 --- /dev/null +++ b/innobase/page/Makefile.am @@ -0,0 +1,24 @@ +# Copyright (C) 2000 MySQL AB & MySQL Finland AB & TCX DataKonsult AB +# & Innobase Oy +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +include ../include/Makefile.i + +libs_LIBRARIES = libpage.a + +libpage_a_SOURCES = page0page.c page0cur.c + +EXTRA_PROGRAMS = diff --git a/innobase/page/makefilewin b/innobase/page/makefilewin new file mode 100644 index 00000000000..4a132cf828c --- /dev/null +++ b/innobase/page/makefilewin @@ -0,0 +1,12 @@ +include ..\include\makefile.i + +page.lib: page0page.obj page0cur.obj + lib -out:..\libs\page.lib page0page.obj page0cur.obj + +page0page.obj: page0page.c + $(CCOM) $(CFL) -c page0page.c + +page0cur.obj: page0cur.c + $(CCOM) $(CFL) -c page0cur.c + + diff --git a/innobase/page/page0cur.c b/innobase/page/page0cur.c new file mode 100644 index 00000000000..e329b916b1b --- /dev/null +++ b/innobase/page/page0cur.c @@ -0,0 +1,1110 @@ +/************************************************************************ +The page cursor + +(c) 1994-1996 Innobase Oy + +Created 10/4/1994 Heikki Tuuri +*************************************************************************/ + +#include "page0cur.h" +#ifdef UNIV_NONINL +#include "page0cur.ic" +#endif + +#include "rem0cmp.h" +#include "mtr0log.h" + +ulint page_cur_short_succ = 0; + +ulint page_rnd = 976722341; + +#ifdef PAGE_CUR_ADAPT + +/******************************************************************** +Tries a search shortcut based on the last insert. */ +UNIV_INLINE +ibool +page_cur_try_search_shortcut( +/*=========================*/ + page_t* page, /* in: index page */ + dtuple_t* tuple, /* in: data tuple */ + ulint* iup_matched_fields, + /* in/out: already matched fields in upper + limit record */ + ulint* iup_matched_bytes, + /* in/out: already matched bytes in a field + not yet completely matched */ + ulint* ilow_matched_fields, + /* in/out: already matched fields in lower + limit record */ + ulint* ilow_matched_bytes, + /* in/out: already matched bytes in a field + not yet completely matched */ + page_cur_t* cursor) /* out: page cursor */ +{ + int cmp; + rec_t* rec; + rec_t* next_rec; + ulint low_match; + ulint low_bytes; + ulint up_match; + ulint up_bytes; +#ifdef UNIV_SEARCH_DEBUG + page_cur_t cursor2; +#endif + ut_ad(dtuple_check_typed(tuple)); + + rec = page_header_get_ptr(page, PAGE_LAST_INSERT); + + ut_ad(rec); + ut_ad(page_rec_is_user_rec(rec)); + + ut_pair_min(&low_match, &low_bytes, + *ilow_matched_fields, *ilow_matched_bytes, + *iup_matched_fields, *iup_matched_bytes); + + up_match = low_match; + up_bytes = low_bytes; + + cmp = page_cmp_dtuple_rec_with_match(tuple, rec, &low_match, + &low_bytes); + if (cmp == -1) { + + return(FALSE); + } + + next_rec = page_rec_get_next(rec); + + cmp = page_cmp_dtuple_rec_with_match(tuple, next_rec, &up_match, + &up_bytes); + if (cmp != -1) { + + return(FALSE); + } + + cursor->rec = rec; + +#ifdef UNIV_SEARCH_DEBUG + page_cur_search_with_match(page, tuple, PAGE_CUR_DBG, + iup_matched_fields, + iup_matched_bytes, + ilow_matched_fields, + ilow_matched_bytes, + &cursor2); + ut_a(cursor2.rec == cursor->rec); + + if (next_rec != page_get_supremum_rec(page)) { + + ut_a(*iup_matched_fields == up_match); + ut_a(*iup_matched_bytes == up_bytes); + } + + ut_a(*ilow_matched_fields == low_match); + ut_a(*ilow_matched_bytes == low_bytes); +#endif + if (next_rec != page_get_supremum_rec(page)) { + + *iup_matched_fields = up_match; + *iup_matched_bytes = up_bytes; + } + + *ilow_matched_fields = low_match; + *ilow_matched_bytes = low_bytes; + +#ifdef UNIV_SEARCH_PERF_STAT + page_cur_short_succ++; +#endif + return(TRUE); +} + +#endif + +/******************************************************************** +Searches the right position for a page cursor. */ + +void +page_cur_search_with_match( +/*=======================*/ + page_t* page, /* in: index page */ + dtuple_t* tuple, /* in: data tuple */ + ulint mode, /* in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, + or PAGE_CUR_GE */ + ulint* iup_matched_fields, + /* in/out: already matched fields in upper + limit record */ + ulint* iup_matched_bytes, + /* in/out: already matched bytes in a field + not yet completely matched */ + ulint* ilow_matched_fields, + /* in/out: already matched fields in lower + limit record */ + ulint* ilow_matched_bytes, + /* in/out: already matched bytes in a field + not yet completely matched */ + page_cur_t* cursor) /* out: page cursor */ +{ + ulint up; + ulint low; + ulint mid; + page_dir_slot_t* slot; + rec_t* up_rec; + rec_t* low_rec; + rec_t* mid_rec; + ulint up_matched_fields; + ulint up_matched_bytes; + ulint low_matched_fields; + ulint low_matched_bytes; + ulint cur_matched_fields; + ulint cur_matched_bytes; + int cmp; +#ifdef UNIV_SEARCH_DEBUG + int dbg_cmp; + ulint dbg_matched_fields; + ulint dbg_matched_bytes; +#endif + ut_ad(page && tuple && iup_matched_fields && iup_matched_bytes + && ilow_matched_fields && ilow_matched_bytes && cursor); + ut_ad(dtuple_validate(tuple)); + ut_ad(dtuple_check_typed(tuple)); + ut_ad((mode == PAGE_CUR_L) || (mode == PAGE_CUR_LE) + || (mode == PAGE_CUR_G) || (mode == PAGE_CUR_GE) + || (mode == PAGE_CUR_DBG)); + +#ifdef PAGE_CUR_ADAPT + if ((page_header_get_field(page, PAGE_LEVEL) == 0) + && (mode == PAGE_CUR_LE) + && (page_header_get_field(page, PAGE_N_DIRECTION) > 3) + && (page_header_get_ptr(page, PAGE_LAST_INSERT)) + && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { + + if (page_cur_try_search_shortcut(page, tuple, + iup_matched_fields, + iup_matched_bytes, + ilow_matched_fields, + ilow_matched_bytes, + cursor)) { + return; + } + } +/*#ifdef UNIV_SEARCH_DEBUG */ + if (mode == PAGE_CUR_DBG) { + mode = PAGE_CUR_LE; + } +/*#endif */ +#endif + /* If mode PAGE_CUR_G is specified, we are trying to position the + cursor to answer a query of the form "tuple < X", where tuple is + the input parameter, and X denotes an arbitrary physical record on + the page. We want to position the cursor on the first X which + satisfies the condition. */ + + up_matched_fields = *iup_matched_fields; + up_matched_bytes = *iup_matched_bytes; + low_matched_fields = *ilow_matched_fields; + low_matched_bytes = *ilow_matched_bytes; + + /* Perform binary search. First the search is done through the page + directory, after that as a linear search in the list of records + owned by the upper limit directory slot. */ + + low = 0; + up = page_dir_get_n_slots(page) - 1; + + /* Perform binary search until the lower and upper limit directory + slots come to the distance 1 of each other */ + + while (up - low > 1) { + mid = (low + up) / 2; + slot = page_dir_get_nth_slot(page, mid); + mid_rec = page_dir_slot_get_rec(slot); + + ut_pair_min(&cur_matched_fields, &cur_matched_bytes, + low_matched_fields, low_matched_bytes, + up_matched_fields, up_matched_bytes); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + &cur_matched_fields, + &cur_matched_bytes); + if (cmp == 1) { + low = mid; + low_matched_fields = cur_matched_fields; + low_matched_bytes = cur_matched_bytes; + + } else if (cmp == -1) { + up = mid; + up_matched_fields = cur_matched_fields; + up_matched_bytes = cur_matched_bytes; + + } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) { + low = mid; + low_matched_fields = cur_matched_fields; + low_matched_bytes = cur_matched_bytes; + } else { + up = mid; + up_matched_fields = cur_matched_fields; + up_matched_bytes = cur_matched_bytes; + } + } + + slot = page_dir_get_nth_slot(page, low); + low_rec = page_dir_slot_get_rec(slot); + slot = page_dir_get_nth_slot(page, up); + up_rec = page_dir_slot_get_rec(slot); + + /* Perform linear search until the upper and lower records + come to distance 1 of each other. */ + + while (page_rec_get_next(low_rec) != up_rec) { + + mid_rec = page_rec_get_next(low_rec); + + ut_pair_min(&cur_matched_fields, &cur_matched_bytes, + low_matched_fields, low_matched_bytes, + up_matched_fields, up_matched_bytes); + + cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, + &cur_matched_fields, + &cur_matched_bytes); + if (cmp == 1) { + low_rec = mid_rec; + low_matched_fields = cur_matched_fields; + low_matched_bytes = cur_matched_bytes; + + } else if (cmp == -1) { + up_rec = mid_rec; + up_matched_fields = cur_matched_fields; + up_matched_bytes = cur_matched_bytes; + + } else if ((mode == PAGE_CUR_G) || (mode == PAGE_CUR_LE)) { + low_rec = mid_rec; + low_matched_fields = cur_matched_fields; + low_matched_bytes = cur_matched_bytes; + } else { + up_rec = mid_rec; + up_matched_fields = cur_matched_fields; + up_matched_bytes = cur_matched_bytes; + } + } + +#ifdef UNIV_SEARCH_DEBUG + + /* Check that the lower and upper limit records have the + right alphabetical order compared to tuple. */ + dbg_matched_fields = 0; + dbg_matched_bytes = 0; + + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, + &dbg_matched_fields, + &dbg_matched_bytes); + if (mode == PAGE_CUR_G) { + ut_a(dbg_cmp >= 0); + } else if (mode == PAGE_CUR_GE) { + ut_a(dbg_cmp == 1); + } else if (mode == PAGE_CUR_L) { + ut_a(dbg_cmp == 1); + } else if (mode == PAGE_CUR_LE) { + ut_a(dbg_cmp >= 0); + } + + if (low_rec != page_get_infimum_rec(page)) { + + ut_a(low_matched_fields == dbg_matched_fields); + ut_a(low_matched_bytes == dbg_matched_bytes); + } + + dbg_matched_fields = 0; + dbg_matched_bytes = 0; + + dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, + &dbg_matched_fields, + &dbg_matched_bytes); + if (mode == PAGE_CUR_G) { + ut_a(dbg_cmp == -1); + } else if (mode == PAGE_CUR_GE) { + ut_a(dbg_cmp <= 0); + } else if (mode == PAGE_CUR_L) { + ut_a(dbg_cmp <= 0); + } else if (mode == PAGE_CUR_LE) { + ut_a(dbg_cmp == -1); + } + + if (up_rec != page_get_supremum_rec(page)) { + + ut_a(up_matched_fields == dbg_matched_fields); + ut_a(up_matched_bytes == dbg_matched_bytes); + } +#endif + if (mode <= PAGE_CUR_GE) { + cursor->rec = up_rec; + } else { + cursor->rec = low_rec; + } + + *iup_matched_fields = up_matched_fields; + *iup_matched_bytes = up_matched_bytes; + *ilow_matched_fields = low_matched_fields; + *ilow_matched_bytes = low_matched_bytes; +} + +/*************************************************************** +Positions a page cursor on a randomly chosen user record on a page. If there +are no user records, sets the cursor on the infimum record. */ + +void +page_cur_open_on_rnd_user_rec( +/*==========================*/ + page_t* page, /* in: page */ + page_cur_t* cursor) /* in/out: page cursor */ +{ + ulint rnd; + rec_t* rec; + + if (page_get_n_recs(page) == 0) { + page_cur_position(page_get_infimum_rec(page), cursor); + + return; + } + + page_rnd += 87584577; + + rnd = page_rnd % page_get_n_recs(page); + + rec = page_get_infimum_rec(page); + + rec = page_rec_get_next(rec); + + while (rnd > 0) { + rec = page_rec_get_next(rec); + + rnd--; + } + + page_cur_position(rec, cursor); +} + +/*************************************************************** +Writes the log record of a record insert on a page. */ +static +void +page_cur_insert_rec_write_log( +/*==========================*/ + rec_t* insert_rec, /* in: inserted physical record */ + ulint rec_size, /* in: insert_rec size */ + rec_t* cursor_rec, /* in: record the cursor is pointing to */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + ulint cur_rec_size; + ulint extra_size; + ulint cur_extra_size; + ulint min_rec_size; + byte* ins_ptr; + byte* cur_ptr; + ulint extra_info_yes; + byte* log_ptr; + ulint i; + + log_ptr = mlog_open(mtr, 30 + MLOG_BUF_MARGIN); + + if (log_ptr == NULL) { + + return; + } + + extra_size = rec_get_extra_size(insert_rec); + + cur_extra_size = rec_get_extra_size(cursor_rec); + cur_rec_size = rec_get_size(cursor_rec); + + ins_ptr = insert_rec - extra_size; + + i = 0; + + if (cur_extra_size == extra_size) { + min_rec_size = ut_min(cur_rec_size, rec_size); + + cur_ptr = cursor_rec - cur_extra_size; + + /* Find out the first byte in insert_rec which differs from + cursor_rec; skip the bytes in the record info */ + + for (;;) { + if (i >= min_rec_size) { + + break; + } else if (*ins_ptr == *cur_ptr) { + i++; + ins_ptr++; + cur_ptr++; + } else if ((i < extra_size) + && (i >= extra_size - REC_N_EXTRA_BYTES)) { + i = extra_size; + ins_ptr = insert_rec; + cur_ptr = cursor_rec; + } else { + break; + } + } + } + + if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { + + log_ptr = mlog_write_initial_log_record_fast(insert_rec, + MLOG_REC_INSERT, log_ptr, mtr); + /* Write the cursor rec offset as a 2-byte ulint */ + mach_write_to_2(log_ptr, cursor_rec + - buf_frame_align(cursor_rec)); + log_ptr += 2; + } + + if ((rec_get_info_bits(insert_rec) != rec_get_info_bits(cursor_rec)) + || (extra_size != cur_extra_size) + || (rec_size != cur_rec_size)) { + + extra_info_yes = 1; + } else { + extra_info_yes = 0; + } + + /* Write the record end segment length and the extra info storage + flag */ + log_ptr += mach_write_compressed(log_ptr, 2 * (rec_size - i) + + extra_info_yes); + if (extra_info_yes) { + /* Write the info bits */ + mach_write_to_1(log_ptr, rec_get_info_bits(insert_rec)); + log_ptr++; + + /* Write the record origin offset */ + log_ptr += mach_write_compressed(log_ptr, extra_size); + + /* Write the mismatch index */ + log_ptr += mach_write_compressed(log_ptr, i); + } + + /* Write to the log the inserted index record end segment which + differs from the cursor record */ + + if (rec_size - i < MLOG_BUF_MARGIN) { + ut_memcpy(log_ptr, ins_ptr, rec_size - i); + log_ptr += rec_size - i; + } + + mlog_close(mtr, log_ptr); + + if (rec_size - i >= MLOG_BUF_MARGIN) { + mlog_catenate_string(mtr, ins_ptr, rec_size - i); + } +} + +/*************************************************************** +Parses a log record of a record insert on a page. */ + +byte* +page_cur_parse_insert_rec( +/*======================*/ + /* out: end of log record or NULL */ + ibool is_short,/* in: TRUE if short inserts */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ulint extra_info_yes; + ulint offset; + ulint origin_offset; + ulint end_seg_len; + ulint mismatch_index; + rec_t* cursor_rec; + byte buf1[1024]; + byte* buf; + ulint info_bits; + page_cur_t cursor; + + if (!is_short) { + /* Read the cursor rec offset as a 2-byte ulint */ + + if (end_ptr < ptr + 2) { + + return(NULL); + } + + offset = mach_read_from_2(ptr); + + ptr += 2; + } + + ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len); + + if (ptr == NULL) { + + return(NULL); + } + + extra_info_yes = end_seg_len & 0x1; + end_seg_len = end_seg_len / 2; + + if (extra_info_yes) { + /* Read the info bits */ + + if (end_ptr < ptr + 1) { + + return(NULL); + } + + info_bits = mach_read_from_1(ptr); + ptr++; + + ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset); + + if (ptr == NULL) { + + return(NULL); + } + + ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index); + + if (ptr == NULL) { + + return(NULL); + } + } + + if (end_ptr < ptr + end_seg_len) { + + return(NULL); + } + + if (page == NULL) { + + return(ptr + end_seg_len); + } + + /* Read from the log the inserted index record end segment which + differs from the cursor record */ + + if (is_short) { + cursor_rec = page_rec_get_prev(page_get_supremum_rec(page)); + } else { + cursor_rec = page + offset; + } + + if (extra_info_yes == 0) { + info_bits = rec_get_info_bits(cursor_rec); + origin_offset = rec_get_extra_size(cursor_rec); + mismatch_index = rec_get_size(cursor_rec) - end_seg_len; + } + + if (mismatch_index + end_seg_len < 1024) { + buf = buf1; + } else { + buf = mem_alloc(mismatch_index + end_seg_len); + } + + /* Build the inserted record to buf */ + + ut_memcpy(buf, rec_get_start(cursor_rec), mismatch_index); + ut_memcpy(buf + mismatch_index, ptr, end_seg_len); + + rec_set_info_bits(buf + origin_offset, info_bits); + + page_cur_position(cursor_rec, &cursor); + + page_cur_rec_insert(&cursor, buf + origin_offset, mtr); + + if (mismatch_index + end_seg_len >= 1024) { + + mem_free(buf); + } + + return(ptr + end_seg_len); +} + +/*************************************************************** +Inserts a record next to page cursor. Returns pointer to inserted record if +succeed, i.e., enough space available, NULL otherwise. The record to be +inserted can be in a data tuple or as a physical record. The other parameter +must then be NULL. The cursor stays at the same position. */ + +rec_t* +page_cur_insert_rec_low( +/*====================*/ + /* out: pointer to record if succeed, NULL + otherwise */ + page_cur_t* cursor, /* in: a page cursor */ + dtuple_t* tuple, /* in: pointer to a data tuple or NULL */ + ulint data_size,/* in: data size of tuple */ + rec_t* rec, /* in: pointer to a physical record or NULL */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + byte* insert_buf = NULL; + ulint rec_size; + byte* page; /* the relevant page */ + rec_t* last_insert; /* cursor position at previous insert */ + rec_t* insert_rec; /* inserted record */ + ulint heap_no; /* heap number of the inserted record */ + rec_t* current_rec; /* current record after which the + new record is inserted */ + rec_t* next_rec; /* next record after current before + the insertion */ + ulint owner_slot; /* the slot which owns the inserted record */ + rec_t* owner_rec; + ulint n_owned; + + ut_ad(cursor && mtr); + ut_ad(tuple || rec); + ut_ad(!(tuple && rec)); + ut_ad(rec || dtuple_check_typed(tuple)); + ut_ad(rec || (dtuple_get_data_size(tuple) == data_size)); + + page = page_cur_get_page(cursor); + + ut_ad(cursor->rec != page_get_supremum_rec(page)); + + /* 1. Get the size of the physical record in the page */ + if (tuple != NULL) { + rec_size = data_size + rec_get_converted_extra_size( + data_size, + dtuple_get_n_fields(tuple)); + } else { + rec_size = rec_get_size(rec); + } + + /* 2. Try to find suitable space from page memory management */ + insert_buf = page_mem_alloc(page, rec_size, &heap_no); + + if (insert_buf == NULL) { + + return(NULL); + } + + /* 3. Create the record */ + if (tuple != NULL) { + insert_rec = rec_convert_dtuple_to_rec_low(insert_buf, tuple, + data_size); + } else { + insert_rec = rec_copy(insert_buf, rec); + } + + ut_ad(insert_rec); + ut_ad(rec_size == rec_get_size(insert_rec)); + + /* 4. Insert the record in the linked list of records */ + + current_rec = cursor->rec; + + next_rec = page_rec_get_next(current_rec); + page_rec_set_next(insert_rec, next_rec); + page_rec_set_next(current_rec, insert_rec); + + page_header_set_field(page, PAGE_N_RECS, 1 + page_get_n_recs(page)); + + /* 5. Set the n_owned field in the inserted record to zero, + and set the heap_no field */ + + rec_set_n_owned(insert_rec, 0); + rec_set_heap_no(insert_rec, heap_no); + + /* 6. Update the last insertion info in page header */ + + last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); + + if (last_insert == NULL) { + page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); + page_header_set_field(page, PAGE_N_DIRECTION, 0); + + } else if ((last_insert == current_rec) + && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_LEFT)) { + + page_header_set_field(page, PAGE_DIRECTION, PAGE_RIGHT); + page_header_set_field(page, PAGE_N_DIRECTION, + page_header_get_field(page, PAGE_N_DIRECTION) + 1); + + } else if ((page_rec_get_next(insert_rec) == last_insert) + && (page_header_get_field(page, PAGE_DIRECTION) != PAGE_RIGHT)) { + + page_header_set_field(page, PAGE_DIRECTION, PAGE_LEFT); + page_header_set_field(page, PAGE_N_DIRECTION, + page_header_get_field(page, PAGE_N_DIRECTION) + 1); + } else { + page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); + page_header_set_field(page, PAGE_N_DIRECTION, 0); + } + + page_header_set_ptr(page, PAGE_LAST_INSERT, insert_rec); + + /* 7. It remains to update the owner record. */ + + owner_rec = page_rec_find_owner_rec(insert_rec); + n_owned = rec_get_n_owned(owner_rec); + rec_set_n_owned(owner_rec, n_owned + 1); + + /* 8. Now we have incremented the n_owned field of the owner + record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, + we have to split the corresponding directory slot in two. */ + + if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) { + owner_slot = page_dir_find_owner_slot(owner_rec); + page_dir_split_slot(page, owner_slot); + } + + /* 9. Write log record of the insert */ + page_cur_insert_rec_write_log(insert_rec, rec_size, current_rec, mtr); + + return(insert_rec); +} + +/************************************************************** +Writes a log record of copying a record list end to a new created page. */ +UNIV_INLINE +byte* +page_copy_rec_list_to_created_page_write_log( +/*=========================================*/ + /* out: 4-byte field where to write the log data + length */ + page_t* page, /* in: index page */ + mtr_t* mtr) /* in: mtr */ +{ + byte* log_ptr; + + mlog_write_initial_log_record(page, MLOG_LIST_END_COPY_CREATED, mtr); + + log_ptr = mlog_open(mtr, 4); + + mlog_close(mtr, log_ptr + 4); + + return(log_ptr); +} + +/************************************************************** +Parses a log record of copying a record list end to a new created page. */ + +byte* +page_parse_copy_rec_list_to_created_page( +/*=====================================*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + byte* rec_end; + ulint log_data_len; + + if (ptr + 4 > end_ptr) { + + return(NULL); + } + + log_data_len = mach_read_from_4(ptr); + ptr += 4; + + rec_end = ptr + log_data_len; + + if (rec_end > end_ptr) { + + return(NULL); + } + + if (!page) { + + return(rec_end); + } + + while (ptr < rec_end) { + ptr = page_cur_parse_insert_rec(TRUE, ptr, end_ptr, page, mtr); + } + + ut_a(ptr == rec_end); + + page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); + page_header_set_field(page, PAGE_DIRECTION, PAGE_NO_DIRECTION); + page_header_set_field(page, PAGE_N_DIRECTION, 0); + + return(rec_end); +} + +/***************************************************************** +Copies records from page to a newly created page, from a given record onward, +including that record. Infimum and supremum records are not copied. */ + +void +page_copy_rec_list_end_to_created_page( +/*===================================*/ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: first record to copy */ + mtr_t* mtr) /* in: mtr */ +{ + page_dir_slot_t* slot; + byte* heap_top; + rec_t* insert_rec; + rec_t* prev_rec; + ulint count; + ulint n_recs; + ulint slot_index; + ulint rec_size; + ulint log_mode; + byte* log_ptr; + ulint log_data_len; + + ut_ad(page_header_get_field(new_page, PAGE_N_HEAP) == 2); + ut_ad(page != new_page); + + if (rec == page_get_infimum_rec(page)) { + + rec = page_rec_get_next(rec); + } + + if (rec == page_get_supremum_rec(page)) { + + return; + } + +#ifdef UNIV_DEBUG + /* To pass the debug tests we have to set these dummy values + in the debug version */ + page_header_set_field(new_page, PAGE_N_DIR_SLOTS, UNIV_PAGE_SIZE / 2); + page_header_set_ptr(new_page, PAGE_HEAP_TOP, + new_page + UNIV_PAGE_SIZE - 1); +#endif + + log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, mtr); + + log_data_len = dyn_array_get_data_size(&(mtr->log)); + + /* Individual inserts are logged in a shorter form */ + + log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); + + prev_rec = page_get_infimum_rec(new_page); + heap_top = new_page + PAGE_SUPREMUM_END; + count = 0; + slot_index = 0; + n_recs = 0; + + while (rec != page_get_supremum_rec(page)) { + + insert_rec = rec_copy(heap_top, rec); + + rec_set_next_offs(prev_rec, insert_rec - new_page); + + rec_set_n_owned(insert_rec, 0); + rec_set_heap_no(insert_rec, 2 + n_recs); + + rec_size = rec_get_size(insert_rec); + + heap_top = heap_top + rec_size; + + ut_ad(heap_top < new_page + UNIV_PAGE_SIZE); + + count++; + n_recs++; + + if (count == (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2) { + + slot_index++; + + slot = page_dir_get_nth_slot(new_page, slot_index); + + page_dir_slot_set_rec(slot, insert_rec); + page_dir_slot_set_n_owned(slot, count); + + count = 0; + } + + page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, + mtr); + prev_rec = insert_rec; + rec = page_rec_get_next(rec); + } + + if ((slot_index > 0) && (count + 1 + + (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2 + <= PAGE_DIR_SLOT_MAX_N_OWNED)) { + /* We can merge the two last dir slots. This operation is + here to make this function imitate exactly the equivalent + task made using page_cur_insert_rec, which we use in database + recovery to reproduce the task performed by this function. + To be able to check the correctness of recovery, it is good + that it imitates exactly. */ + + count += (PAGE_DIR_SLOT_MAX_N_OWNED + 1) / 2; + + page_dir_slot_set_n_owned(slot, 0); + + slot_index--; + } + + log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; + + mach_write_to_4(log_ptr, log_data_len); + + rec_set_next_offs(insert_rec, PAGE_SUPREMUM); + + slot = page_dir_get_nth_slot(new_page, 1 + slot_index); + + page_dir_slot_set_rec(slot, page_get_supremum_rec(new_page)); + page_dir_slot_set_n_owned(slot, count + 1); + + page_header_set_field(new_page, PAGE_N_DIR_SLOTS, 2 + slot_index); + page_header_set_ptr(new_page, PAGE_HEAP_TOP, heap_top); + page_header_set_field(new_page, PAGE_N_HEAP, 2 + n_recs); + page_header_set_field(new_page, PAGE_N_RECS, n_recs); + + page_header_set_ptr(new_page, PAGE_LAST_INSERT, NULL); + page_header_set_field(new_page, PAGE_DIRECTION, PAGE_NO_DIRECTION); + page_header_set_field(new_page, PAGE_N_DIRECTION, 0); + + /* Restore the log mode */ + + mtr_set_log_mode(mtr, log_mode); +} + +/*************************************************************** +Writes log record of a record delete on a page. */ +UNIV_INLINE +void +page_cur_delete_rec_write_log( +/*==========================*/ + rec_t* cursor_rec, /* in: record to be deleted */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + mlog_write_initial_log_record(cursor_rec, MLOG_REC_DELETE, mtr); + + /* Write the cursor rec offset as a 2-byte ulint */ + mlog_catenate_ulint(mtr, cursor_rec - buf_frame_align(cursor_rec), + MLOG_2BYTES); +} + +/*************************************************************** +Parses log record of a record delete on a page. */ + +byte* +page_cur_parse_delete_rec( +/*======================*/ + /* out: pointer to record end or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ulint offset; + page_cur_t cursor; + + if (end_ptr < ptr + 2) { + + return(NULL); + } + + /* Read the cursor rec offset as a 2-byte ulint */ + offset = mach_read_from_2(ptr); + ptr += 2; + + if (page) { + page_cur_position(page + offset, &cursor); + + page_cur_delete_rec(&cursor, mtr); + } + + return(ptr); +} + +/*************************************************************** +Deletes a record at the page cursor. The cursor is moved to the next +record after the deleted one. */ + +void +page_cur_delete_rec( +/*================*/ + page_cur_t* cursor, /* in: a page cursor */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + page_t* page; + rec_t* current_rec; + rec_t* prev_rec = NULL; + rec_t* next_rec; + ulint cur_slot_no; + page_dir_slot_t* cur_dir_slot; + page_dir_slot_t* prev_slot; + ulint cur_n_owned; + rec_t* rec; + + ut_ad(cursor && mtr); + + page = page_cur_get_page(cursor); + current_rec = cursor->rec; + + /* The record must not be the supremum or infimum record. */ + ut_ad(current_rec != page_get_supremum_rec(page)); + ut_ad(current_rec != page_get_infimum_rec(page)); + + /* Save to local variables some data associated with current_rec */ + cur_slot_no = page_dir_find_owner_slot(current_rec); + cur_dir_slot = page_dir_get_nth_slot(page, cur_slot_no); + cur_n_owned = page_dir_slot_get_n_owned(cur_dir_slot); + + /* 0. Write the log record */ + page_cur_delete_rec_write_log(current_rec, mtr); + + /* 1. Reset the last insert info in the page header and increment + the modify clock for the frame */ + + page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); + + /* The page gets invalid for optimistic searches: increment the + frame modify clock */ + + buf_frame_modify_clock_inc(page); + + /* 2. Find the next and the previous record. Note that the cursor is + left at the next record. */ + + ut_ad(cur_slot_no > 0); + prev_slot = page_dir_get_nth_slot(page, cur_slot_no - 1); + + rec = page_dir_slot_get_rec(prev_slot); + + /* rec now points to the record of the previous directory slot. Look + for the immediate predecessor of current_rec in a loop. */ + + while(current_rec != rec) { + prev_rec = rec; + rec = page_rec_get_next(rec); + } + + page_cur_move_to_next(cursor); + next_rec = cursor->rec; + + /* 3. Remove the record from the linked list of records */ + + page_rec_set_next(prev_rec, next_rec); + page_header_set_field(page, PAGE_N_RECS, + (ulint)(page_get_n_recs(page) - 1)); + + /* 4. If the deleted record is pointed to by a dir slot, update the + record pointer in slot. In the following if-clause we assume that + prev_rec is owned by the same slot, i.e., PAGE_DIR_SLOT_MIN_N_OWNED + >= 2. */ + + ut_ad(PAGE_DIR_SLOT_MIN_N_OWNED >= 2); + ut_ad(cur_n_owned > 1); + + if (current_rec == page_dir_slot_get_rec(cur_dir_slot)) { + page_dir_slot_set_rec(cur_dir_slot, prev_rec); + } + + /* 5. Update the number of owned records of the slot */ + + page_dir_slot_set_n_owned(cur_dir_slot, cur_n_owned - 1); + + /* 6. Free the memory occupied by the record */ + page_mem_free(page, current_rec); + + /* 7. Now we have decremented the number of owned records of the slot. + If the number drops below PAGE_DIR_SLOT_MIN_N_OWNED, we balance the + slots. */ + + if (cur_n_owned <= PAGE_DIR_SLOT_MIN_N_OWNED) { + page_dir_balance_slot(page, cur_slot_no); + } +} diff --git a/innobase/page/page0page.c b/innobase/page/page0page.c new file mode 100644 index 00000000000..7df23775db2 --- /dev/null +++ b/innobase/page/page0page.c @@ -0,0 +1,1371 @@ +/****************************************************** +Index page routines + +(c) 1994-1996 Innobase Oy + +Created 2/2/1994 Heikki Tuuri +*******************************************************/ + +#define THIS_MODULE +#include "page0page.h" +#ifdef UNIV_NONINL +#include "page0page.ic" +#endif +#undef THIS_MODULE + +#include "page0cur.h" +#include "lock0lock.h" +#include "fut0lst.h" +#include "btr0sea.h" + +/* A cached template page used in page_create */ +page_t* page_template = NULL; + +/* THE INDEX PAGE + ============== + +The index page consists of a page header which contains the page's +id and other information. On top of it are the the index records +in a heap linked into a one way linear list according to alphabetic order. + +Just below page end is an array of pointers which we call page directory, +to about every sixth record in the list. The pointers are placed in +the directory in the alphabetical order of the records pointed to, +enabling us to make binary search using the array. Each slot n:o I +in the directory points to a record, where a 4-bit field contains a count +of those records which are in the linear list between pointer I and +the pointer I - 1 in the directory, including the record +pointed to by pointer I and not including the record pointed to by I - 1. +We say that the record pointed to by slot I, or that slot I, owns +these records. The count is always kept in the range 4 to 8, with +the exception that it is 1 for the first slot, and 1--8 for the second slot. + +An essentially binary search can be performed in the list of index +records, like we could do if we had pointer to every record in the +page directory. The data structure is, however, more efficient when +we are doing inserts, because most inserts are just pushed on a heap. +Only every 8th insert requires block move in the directory pointer +table, which itself is quite small. A record is deleted from the page +by just taking it off the linear list and updating the number of owned +records-field of the record which owns it, and updating the page directory, +if necessary. A special case is the one when the record owns itself. +Because the overhead of inserts is so small, we may also increase the +page size from the projected default of 8 kB to 64 kB without too +much loss of efficiency in inserts. Bigger page becomes actual +when the disk transfer rate compared to seek and latency time rises. +On the present system, the page size is set so that the page transfer +time (3 ms) is 20 % of the disk random access time (15 ms). + +When the page is split, merged, or becomes full but contains deleted +records, we have to reorganize the page. + +Assuming a page size of 8 kB, a typical index page of a secondary +index contains 300 index entries, and the size of the page directory +is 50 x 4 bytes = 200 bytes. */ + +/***************************************************************** +Sets the max trx id field value. */ + +void +page_set_max_trx_id( +/*================*/ + page_t* page, /* in: page */ + dulint trx_id) /* in: transaction id */ +{ + buf_block_t* block; + + ut_ad(page); + + block = buf_block_align(page); + + if (block->is_hashed) { + rw_lock_x_lock(&btr_search_latch); + } + + /* It is not necessary to write this change to the redo log, as + during a database recovery we assume that the max trx id of every + page is the maximum trx id assigned before the crash. */ + + mach_write_to_8(page + PAGE_HEADER + PAGE_MAX_TRX_ID, trx_id); + + if (block->is_hashed) { + rw_lock_x_unlock(&btr_search_latch); + } +} + +/**************************************************************** +Allocates a block of memory from an index page. */ + +byte* +page_mem_alloc( +/*===========*/ + /* out: pointer to start of allocated + buffer, or NULL if allocation fails */ + page_t* page, /* in: index page */ + ulint need, /* in: number of bytes needed */ + ulint* heap_no)/* out: this contains the heap number + of the allocated record if allocation succeeds */ +{ + rec_t* rec; + byte* block; + ulint avl_space; + ulint garbage; + + ut_ad(page && heap_no); + + /* If there are records in the free list, look if the first is + big enough */ + + rec = page_header_get_ptr(page, PAGE_FREE); + + if (rec && (rec_get_size(rec) >= need)) { + + page_header_set_ptr(page, PAGE_FREE, page_rec_get_next(rec)); + + garbage = page_header_get_field(page, PAGE_GARBAGE); + ut_ad(garbage >= need); + + page_header_set_field(page, PAGE_GARBAGE, garbage - need); + + *heap_no = rec_get_heap_no(rec); + + return(rec_get_start(rec)); + } + + /* Could not find space from the free list, try top of heap */ + + avl_space = page_get_max_insert_size(page, 1); + + if (avl_space >= need) { + block = page_header_get_ptr(page, PAGE_HEAP_TOP); + + page_header_set_ptr(page, PAGE_HEAP_TOP, block + need); + *heap_no = page_header_get_field(page, PAGE_N_HEAP); + + page_header_set_field(page, PAGE_N_HEAP, 1 + *heap_no); + + return(block); + } + + return(NULL); +} + +/************************************************************** +Writes a log record of page creation. */ +UNIV_INLINE +void +page_create_write_log( +/*==================*/ + buf_frame_t* frame, /* in: a buffer frame where the page is + created */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + mlog_write_initial_log_record(frame, MLOG_PAGE_CREATE, mtr); +} + +/*************************************************************** +Parses a redo log record of creating a page. */ + +byte* +page_parse_create( +/*==============*/ + /* out: end of log record or NULL */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ut_ad(ptr && end_ptr); + + /* The record is empty, except for the record initial part */ + + if (page) { + page_create(page, mtr); + } + + return(ptr); +} + +/************************************************************** +The index page creation function. */ + +page_t* +page_create( +/*========*/ + /* out: pointer to the page */ + buf_frame_t* frame, /* in: a buffer frame where the page is + created */ + mtr_t* mtr) /* in: mini-transaction handle */ +{ + page_dir_slot_t* slot; + mem_heap_t* heap; + dtuple_t* tuple; + dfield_t* field; + byte* heap_top; + rec_t* infimum_rec; + rec_t* supremum_rec; + page_t* page; + + ut_ad(frame && mtr); + ut_ad(PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE + <= PAGE_DATA); + ut_ad(PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE + <= PAGE_DATA); + + /* 1. INCREMENT MODIFY CLOCK */ + buf_frame_modify_clock_inc(frame); + + /* 2. WRITE LOG INFORMATION */ + page_create_write_log(frame, mtr); + + page = frame; + + fil_page_set_type(page, FIL_PAGE_INDEX); + + /* If we have a page template, copy the page structure from there */ + + if (page_template) { + ut_memcpy(page + PAGE_HEADER, + page_template + PAGE_HEADER, PAGE_HEADER_PRIV_END); + ut_memcpy(page + PAGE_DATA, + page_template + PAGE_DATA, + PAGE_SUPREMUM_END - PAGE_DATA); + ut_memcpy(page + UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START, + page_template + UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START, + PAGE_EMPTY_DIR_START - PAGE_DIR); + return(frame); + } + + heap = mem_heap_create(200); + + /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */ + + /* Create first a data tuple for infimum record */ + tuple = dtuple_create(heap, 1); + field = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(field, "infimum", strlen("infimum") + 1); + dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); + + /* Set the corresponding physical record to its place in the page + record heap */ + + heap_top = page + PAGE_DATA; + + infimum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + + ut_ad(infimum_rec == page + PAGE_INFIMUM); + + rec_set_n_owned(infimum_rec, 1); + rec_set_heap_no(infimum_rec, 0); + + heap_top = rec_get_end(infimum_rec); + + /* Create then a tuple for supremum */ + + tuple = dtuple_create(heap, 1); + field = dtuple_get_nth_field(tuple, 0); + + dfield_set_data(field, "supremum", strlen("supremum") + 1); + dtype_set(dfield_get_type(field), DATA_VARCHAR, DATA_ENGLISH, 20, 0); + + supremum_rec = rec_convert_dtuple_to_rec(heap_top, tuple); + + ut_ad(supremum_rec == page + PAGE_SUPREMUM); + + rec_set_n_owned(supremum_rec, 1); + rec_set_heap_no(supremum_rec, 1); + + heap_top = rec_get_end(supremum_rec); + + ut_ad(heap_top == page + PAGE_SUPREMUM_END); + + mem_heap_free(heap); + + /* 4. INITIALIZE THE PAGE HEADER */ + + page_header_set_field(page, PAGE_N_DIR_SLOTS, 2); + page_header_set_ptr(page, PAGE_HEAP_TOP, heap_top); + page_header_set_field(page, PAGE_N_HEAP, 2); + page_header_set_ptr(page, PAGE_FREE, NULL); + page_header_set_field(page, PAGE_GARBAGE, 0); + page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); + page_header_set_field(page, PAGE_N_RECS, 0); + page_set_max_trx_id(page, ut_dulint_zero); + + /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */ + + /* Set the slots to point to infimum and supremum. */ + + slot = page_dir_get_nth_slot(page, 0); + page_dir_slot_set_rec(slot, infimum_rec); + + slot = page_dir_get_nth_slot(page, 1); + page_dir_slot_set_rec(slot, supremum_rec); + + /* Set next pointers in infimum and supremum */ + + rec_set_next_offs(infimum_rec, (ulint)(supremum_rec - page)); + rec_set_next_offs(supremum_rec, 0); + + if (page_template == NULL) { + page_template = mem_alloc(UNIV_PAGE_SIZE); + + ut_memcpy(page_template, page, UNIV_PAGE_SIZE); + } + + return(page); +} + +/***************************************************************** +Differs from page_copy_rec_list_end, because this function does not +touch the lock table and max trx id on page. */ + +void +page_copy_rec_list_end_no_locks( +/*============================*/ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + mtr_t* mtr) /* in: mtr */ +{ + page_cur_t cur1; + page_cur_t cur2; + rec_t* sup; + + page_cur_position(rec, &cur1); + + if (page_cur_is_before_first(&cur1)) { + + page_cur_move_to_next(&cur1); + } + + page_cur_set_before_first(new_page, &cur2); + + /* Copy records from the original page to the new page */ + + sup = page_get_supremum_rec(page); + + while (sup != page_cur_get_rec(&cur1)) { + ut_a( + page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr)); + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } +} + +/***************************************************************** +Copies records from page to new_page, from a given record onward, +including that record. Infimum and supremum records are not copied. +The records are copied to the start of the record list on new_page. */ + +void +page_copy_rec_list_end( +/*===================*/ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + mtr_t* mtr) /* in: mtr */ +{ + if (page_header_get_field(new_page, PAGE_N_HEAP) == 2) { + page_copy_rec_list_end_to_created_page(new_page, page, rec, + mtr); + } else { + page_copy_rec_list_end_no_locks(new_page, page, rec, mtr); + } + + /* Update the lock table, MAX_TRX_ID, and possible hash index */ + + lock_move_rec_list_end(new_page, page, rec); + + page_update_max_trx_id(new_page, page_get_max_trx_id(page)); + + btr_search_move_or_delete_hash_entries(new_page, page); +} + +/***************************************************************** +Copies records from page to new_page, up to the given record, +NOT including that record. Infimum and supremum records are not copied. +The records are copied to the end of the record list on new_page. */ + +void +page_copy_rec_list_start( +/*=====================*/ + page_t* new_page, /* in: index page to copy to */ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + mtr_t* mtr) /* in: mtr */ +{ + page_cur_t cur1; + page_cur_t cur2; + rec_t* old_end; + + page_cur_set_before_first(page, &cur1); + + if (rec == page_cur_get_rec(&cur1)) { + + return; + } + + page_cur_move_to_next(&cur1); + + page_cur_set_after_last(new_page, &cur2); + page_cur_move_to_prev(&cur2); + old_end = page_cur_get_rec(&cur2); + + /* Copy records from the original page to the new page */ + + while (page_cur_get_rec(&cur1) != rec) { + ut_a( + page_cur_rec_insert(&cur2, page_cur_get_rec(&cur1), mtr)); + + page_cur_move_to_next(&cur1); + page_cur_move_to_next(&cur2); + } + + /* Update the lock table, MAX_TRX_ID, and possible hash index */ + + lock_move_rec_list_start(new_page, page, rec, old_end); + + page_update_max_trx_id(new_page, page_get_max_trx_id(page)); + + btr_search_move_or_delete_hash_entries(new_page, page); +} + +/************************************************************** +Writes a log record of a record list end or start deletion. */ +UNIV_INLINE +void +page_delete_rec_list_write_log( +/*===========================*/ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + byte type, /* in: operation type: MLOG_LIST_END_DELETE, ... */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad((type == MLOG_LIST_END_DELETE) + || (type == MLOG_LIST_START_DELETE)); + + mlog_write_initial_log_record(page, type, mtr); + + /* Write the parameter as a 2-byte ulint */ + mlog_catenate_ulint(mtr, rec - page, MLOG_2BYTES); +} + +/************************************************************** +Parses a log record of a record list end or start deletion. */ + +byte* +page_parse_delete_rec_list( +/*=======================*/ + /* out: end of log record or NULL */ + byte type, /* in: MLOG_LIST_END_DELETE or MLOG_LIST_START_DELETE */ + byte* ptr, /* in: buffer */ + byte* end_ptr,/* in: buffer end */ + page_t* page, /* in: page or NULL */ + mtr_t* mtr) /* in: mtr or NULL */ +{ + ulint offset; + + ut_ad((type == MLOG_LIST_END_DELETE) + || (type == MLOG_LIST_START_DELETE)); + + /* Read the record offset as a 2-byte ulint */ + + if (end_ptr < ptr + 2) { + + return(NULL); + } + + offset = mach_read_from_2(ptr); + ptr += 2; + + if (!page) { + + return(ptr); + } + + if (type == MLOG_LIST_END_DELETE) { + page_delete_rec_list_end(page, page + offset, ULINT_UNDEFINED, + ULINT_UNDEFINED, mtr); + } else { + page_delete_rec_list_start(page, page + offset, mtr); + } + + return(ptr); +} + +/***************************************************************** +Deletes records from a page from a given record onward, including that record. +The infimum and supremum records are not deleted. */ + +void +page_delete_rec_list_end( +/*=====================*/ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + ulint n_recs, /* in: number of records to delete, or ULINT_UNDEFINED + if not known */ + ulint size, /* in: the sum of the sizes of the records in the end + of the chain to delete, or ULINT_UNDEFINED if not + known */ + mtr_t* mtr) /* in: mtr */ +{ + page_dir_slot_t* slot; + ulint slot_index; + rec_t* last_rec; + rec_t* prev_rec; + rec_t* free; + rec_t* rec2; + ulint count; + ulint n_owned; + rec_t* sup; + + /* Reset the last insert info in the page header and increment + the modify clock for the frame */ + + page_header_set_ptr(page, PAGE_LAST_INSERT, NULL); + + /* The page gets invalid for optimistic searches: increment the + frame modify clock */ + + buf_frame_modify_clock_inc(page); + + sup = page_get_supremum_rec(page); + + if (rec == page_get_infimum_rec(page)) { + rec = page_rec_get_next(rec); + } + + page_delete_rec_list_write_log(page, rec, MLOG_LIST_END_DELETE, mtr); + + if (rec == sup) { + + return; + } + + prev_rec = page_rec_get_prev(rec); + + last_rec = page_rec_get_prev(sup); + + if ((size == ULINT_UNDEFINED) || (n_recs == ULINT_UNDEFINED)) { + /* Calculate the sum of sizes and the number of records */ + size = 0; + n_recs = 0; + rec2 = rec; + + while (rec2 != sup) { + size += rec_get_size(rec2); + n_recs++; + + rec2 = page_rec_get_next(rec2); + } + } + + /* Update the page directory; there is no need to balance the number + of the records owned by the supremum record, as it is allowed to be + less than PAGE_DIR_SLOT_MIN_N_OWNED */ + + rec2 = rec; + count = 0; + + while (rec_get_n_owned(rec2) == 0) { + count++; + + rec2 = page_rec_get_next(rec2); + } + + ut_ad(rec_get_n_owned(rec2) - count > 0); + + n_owned = rec_get_n_owned(rec2) - count; + + slot_index = page_dir_find_owner_slot(rec2); + slot = page_dir_get_nth_slot(page, slot_index); + + page_dir_slot_set_rec(slot, sup); + page_dir_slot_set_n_owned(slot, n_owned); + + page_header_set_field(page, PAGE_N_DIR_SLOTS, slot_index + 1); + + /* Remove the record chain segment from the record chain */ + page_rec_set_next(prev_rec, page_get_supremum_rec(page)); + + /* Catenate the deleted chain segment to the page free list */ + + free = page_header_get_ptr(page, PAGE_FREE); + + page_rec_set_next(last_rec, free); + page_header_set_ptr(page, PAGE_FREE, rec); + + page_header_set_field(page, PAGE_GARBAGE, + size + page_header_get_field(page, PAGE_GARBAGE)); + + page_header_set_field(page, PAGE_N_RECS, + (ulint)(page_get_n_recs(page) - n_recs)); +} + +/***************************************************************** +Deletes records from page, up to the given record, NOT including +that record. Infimum and supremum records are not deleted. */ + +void +page_delete_rec_list_start( +/*=======================*/ + page_t* page, /* in: index page */ + rec_t* rec, /* in: record on page */ + mtr_t* mtr) /* in: mtr */ +{ + page_cur_t cur1; + ulint log_mode; + + page_delete_rec_list_write_log(page, rec, MLOG_LIST_START_DELETE, mtr); + + page_cur_set_before_first(page, &cur1); + + if (rec == page_cur_get_rec(&cur1)) { + + return; + } + + page_cur_move_to_next(&cur1); + + /* Individual deletes are not logged */ + + log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); + + while (page_cur_get_rec(&cur1) != rec) { + + page_cur_delete_rec(&cur1, mtr); + } + + /* Restore log mode */ + + mtr_set_log_mode(mtr, log_mode); +} + +/***************************************************************** +Moves record list end to another page. Moved records include +split_rec. */ + +void +page_move_rec_list_end( +/*===================*/ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record to move */ + mtr_t* mtr) /* in: mtr */ +{ + ulint old_data_size; + ulint new_data_size; + ulint old_n_recs; + ulint new_n_recs; + + old_data_size = page_get_data_size(new_page); + old_n_recs = page_get_n_recs(new_page); + + page_copy_rec_list_end(new_page, page, split_rec, mtr); + + new_data_size = page_get_data_size(new_page); + new_n_recs = page_get_n_recs(new_page); + + ut_ad(new_data_size >= old_data_size); + + page_delete_rec_list_end(page, split_rec, new_n_recs - old_n_recs, + new_data_size - old_data_size, mtr); +} + +/***************************************************************** +Moves record list start to another page. Moved records do not include +split_rec. */ + +void +page_move_rec_list_start( +/*=====================*/ + page_t* new_page, /* in: index page where to move */ + page_t* page, /* in: index page */ + rec_t* split_rec, /* in: first record not to move */ + mtr_t* mtr) /* in: mtr */ +{ + page_copy_rec_list_start(new_page, page, split_rec, mtr); + + page_delete_rec_list_start(page, split_rec, mtr); +} + +/*************************************************************************** +This is a low-level operation which is used in a database index creation +to update the page number of a created B-tree to a data dictionary record. */ + +void +page_rec_write_index_page_no( +/*=========================*/ + rec_t* rec, /* in: record to update */ + ulint i, /* in: index of the field to update */ + ulint page_no,/* in: value to write */ + mtr_t* mtr) /* in: mtr */ +{ + byte* data; + ulint len; + + data = rec_get_nth_field(rec, i, &len); + + ut_ad(len == 4); + + mlog_write_ulint(data, page_no, MLOG_4BYTES, mtr); +} + +/****************************************************************** +Used to delete n slots from the directory. This function updates +also n_owned fields in the records, so that the first slot after +the deleted ones inherits the records of the deleted slots. */ +UNIV_INLINE +void +page_dir_delete_slots( +/*==================*/ + page_t* page, /* in: the index page */ + ulint start, /* in: first slot to be deleted */ + ulint n) /* in: number of slots to delete (currently + only n == 1 allowed) */ +{ + page_dir_slot_t* slot; + ulint i; + ulint sum_owned = 0; + ulint n_slots; + rec_t* rec; + + ut_ad(n == 1); + ut_ad(start > 0); + ut_ad(start + n < page_dir_get_n_slots(page)); + + n_slots = page_dir_get_n_slots(page); + + /* 1. Reset the n_owned fields of the slots to be + deleted */ + for (i = start; i < start + n; i++) { + slot = page_dir_get_nth_slot(page, i); + sum_owned += page_dir_slot_get_n_owned(slot); + page_dir_slot_set_n_owned(slot, 0); + } + + /* 2. Update the n_owned value of the first non-deleted slot */ + + slot = page_dir_get_nth_slot(page, start + n); + page_dir_slot_set_n_owned(slot, + sum_owned + page_dir_slot_get_n_owned(slot)); + + /* 3. Destroy start and other slots by copying slots */ + for (i = start + n; i < n_slots; i++) { + slot = page_dir_get_nth_slot(page, i); + rec = page_dir_slot_get_rec(slot); + + slot = page_dir_get_nth_slot(page, i - n); + page_dir_slot_set_rec(slot, rec); + } + + /* 4. Update the page header */ + page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots - n); +} + +/****************************************************************** +Used to add n slots to the directory. Does not set the record pointers +in the added slots or update n_owned values: this is the responsibility +of the caller. */ +UNIV_INLINE +void +page_dir_add_slots( +/*===============*/ + page_t* page, /* in: the index page */ + ulint start, /* in: the slot above which the new slots are added */ + ulint n) /* in: number of slots to add (currently only n == 1 + allowed) */ +{ + page_dir_slot_t* slot; + ulint n_slots; + ulint i; + rec_t* rec; + + ut_ad(n == 1); + + n_slots = page_dir_get_n_slots(page); + + ut_ad(start < n_slots - 1); + + /* Update the page header */ + page_header_set_field(page, PAGE_N_DIR_SLOTS, n_slots + n); + + /* Move slots up */ + + for (i = n_slots - 1; i > start; i--) { + + slot = page_dir_get_nth_slot(page, i); + rec = page_dir_slot_get_rec(slot); + + slot = page_dir_get_nth_slot(page, i + n); + page_dir_slot_set_rec(slot, rec); + } +} + +/******************************************************************** +Splits a directory slot which owns too many records. */ + +void +page_dir_split_slot( +/*================*/ + page_t* page, /* in: the index page in question */ + ulint slot_no) /* in: the directory slot */ +{ + rec_t* rec; + page_dir_slot_t* new_slot; + page_dir_slot_t* prev_slot; + page_dir_slot_t* slot; + ulint i; + ulint n_owned; + + ut_ad(page); + ut_ad(slot_no > 0); + + slot = page_dir_get_nth_slot(page, slot_no); + + n_owned = page_dir_slot_get_n_owned(slot); + ut_ad(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED + 1); + + /* 1. We loop to find a record approximately in the middle of the + records owned by the slot. */ + + prev_slot = page_dir_get_nth_slot(page, slot_no - 1); + rec = page_dir_slot_get_rec(prev_slot); + + for (i = 0; i < n_owned / 2; i++) { + rec = page_rec_get_next(rec); + } + + ut_ad(n_owned / 2 >= PAGE_DIR_SLOT_MIN_N_OWNED); + + /* 2. We add one directory slot immediately below the slot to be + split. */ + + page_dir_add_slots(page, slot_no - 1, 1); + + /* The added slot is now number slot_no, and the old slot is + now number slot_no + 1 */ + + new_slot = page_dir_get_nth_slot(page, slot_no); + slot = page_dir_get_nth_slot(page, slot_no + 1); + + /* 3. We store the appropriate values to the new slot. */ + + page_dir_slot_set_rec(new_slot, rec); + page_dir_slot_set_n_owned(new_slot, n_owned / 2); + + /* 4. Finally, we update the number of records field of the + original slot */ + + page_dir_slot_set_n_owned(slot, n_owned - (n_owned / 2)); +} + +/***************************************************************** +Tries to balance the given directory slot with too few records with the upper +neighbor, so that there are at least the minimum number of records owned by +the slot; this may result in the merging of two slots. */ + +void +page_dir_balance_slot( +/*==================*/ + page_t* page, /* in: index page */ + ulint slot_no) /* in: the directory slot */ +{ + page_dir_slot_t* slot; + page_dir_slot_t* up_slot; + ulint n_owned; + ulint up_n_owned; + rec_t* old_rec; + rec_t* new_rec; + + ut_ad(page); + ut_ad(slot_no > 0); + + slot = page_dir_get_nth_slot(page, slot_no); + + /* The last directory slot cannot be balanced with the upper + neighbor, as there is none. */ + + if (slot_no == page_dir_get_n_slots(page) - 1) { + + return; + } + + up_slot = page_dir_get_nth_slot(page, slot_no + 1); + + n_owned = page_dir_slot_get_n_owned(slot); + up_n_owned = page_dir_slot_get_n_owned(up_slot); + + ut_ad(n_owned == PAGE_DIR_SLOT_MIN_N_OWNED - 1); + + /* If the upper slot has the minimum value of n_owned, we will merge + the two slots, therefore we assert: */ + ut_ad(2 * PAGE_DIR_SLOT_MIN_N_OWNED - 1 <= PAGE_DIR_SLOT_MAX_N_OWNED); + + if (up_n_owned > PAGE_DIR_SLOT_MIN_N_OWNED) { + + /* In this case we can just transfer one record owned + by the upper slot to the property of the lower slot */ + old_rec = page_dir_slot_get_rec(slot); + new_rec = page_rec_get_next(old_rec); + + rec_set_n_owned(old_rec, 0); + rec_set_n_owned(new_rec, n_owned + 1); + + page_dir_slot_set_rec(slot, new_rec); + + page_dir_slot_set_n_owned(up_slot, up_n_owned -1); + } else { + /* In this case we may merge the two slots */ + page_dir_delete_slots(page, slot_no, 1); + } +} + +/**************************************************************** +Returns the middle record of the record list. If there are an even number +of records in the list, returns the first record of the upper half-list. */ + +rec_t* +page_get_middle_rec( +/*================*/ + /* out: middle record */ + page_t* page) /* in: page */ +{ + page_dir_slot_t* slot; + ulint middle; + ulint i; + ulint n_owned; + ulint count; + rec_t* rec; + + /* This many records we must leave behind */ + middle = (page_get_n_recs(page) + 2) / 2; + + count = 0; + + for (i = 0;; i++) { + + slot = page_dir_get_nth_slot(page, i); + n_owned = page_dir_slot_get_n_owned(slot); + + if (count + n_owned > middle) { + break; + } else { + count += n_owned; + } + } + + ut_ad(i > 0); + slot = page_dir_get_nth_slot(page, i - 1); + rec = page_dir_slot_get_rec(slot); + rec = page_rec_get_next(rec); + + /* There are now count records behind rec */ + + for (i = 0; i < middle - count; i++) { + rec = page_rec_get_next(rec); + } + + return(rec); +} + +/******************************************************************* +Returns the number of records before the given record in chain. +The number includes infimum and supremum records. */ + +ulint +page_rec_get_n_recs_before( +/*=======================*/ + /* out: number of records */ + rec_t* rec) /* in: the physical record */ +{ + page_dir_slot_t* slot; + rec_t* slot_rec; + page_t* page; + ulint i; + lint n = 0; + + ut_ad(page_rec_check(rec)); + + page = buf_frame_align(rec); + + while (rec_get_n_owned(rec) == 0) { + + rec = page_rec_get_next(rec); + n--; + } + + for (i = 0; ; i++) { + slot = page_dir_get_nth_slot(page, i); + slot_rec = page_dir_slot_get_rec(slot); + + n += rec_get_n_owned(slot_rec); + + if (rec == slot_rec) { + + break; + } + } + + n--; + + ut_ad(n >= 0); + + return((ulint) n); +} + +/**************************************************************** +Prints record contents including the data relevant only in +the index page context. */ + +void +page_rec_print( +/*===========*/ + rec_t* rec) +{ + rec_print(rec); + printf( + " n_owned: %lu; heap_no: %lu; next rec: %lu\n", + rec_get_n_owned(rec), + rec_get_heap_no(rec), + rec_get_next_offs(rec)); + + page_rec_check(rec); + rec_validate(rec); +} + +/******************************************************************* +This is used to print the contents of the directory for +debugging purposes. */ + +void +page_dir_print( +/*===========*/ + page_t* page, /* in: index page */ + ulint pr_n) /* in: print n first and n last entries */ +{ + ulint n; + ulint i; + page_dir_slot_t* slot; + + n = page_dir_get_n_slots(page); + + printf("--------------------------------\n"); + printf("PAGE DIRECTORY\n"); + printf("Page address %lx\n", page); + printf("Directory stack top at offs: %lu; number of slots: %lu\n", + page_dir_get_nth_slot(page, n - 1) - page, n); + for (i = 0; i < n; i++) { + slot = page_dir_get_nth_slot(page, i); + if ((i == pr_n) && (i < n - pr_n)) { + printf(" ... \n"); + } + if ((i < pr_n) || (i >= n - pr_n)) { + printf( + "Contents of slot: %lu: n_owned: %lu, rec offs: %lu\n", + i, page_dir_slot_get_n_owned(slot), + page_dir_slot_get_rec(slot) - page); + } + } + printf("Total of %lu records\n", 2 + page_get_n_recs(page)); + printf("--------------------------------\n"); +} + +/******************************************************************* +This is used to print the contents of the page record list for +debugging purposes. */ + +void +page_print_list( +/*============*/ + page_t* page, /* in: index page */ + ulint pr_n) /* in: print n first and n last entries */ +{ + page_cur_t cur; + rec_t* rec; + ulint count; + ulint n_recs; + + printf("--------------------------------\n"); + printf("PAGE RECORD LIST\n"); + printf("Page address %lu\n", page); + + n_recs = page_get_n_recs(page); + + page_cur_set_before_first(page, &cur); + count = 0; + for (;;) { + rec = (&cur)->rec; + page_rec_print(rec); + + if (count == pr_n) { + break; + } + if (page_cur_is_after_last(&cur)) { + break; + } + page_cur_move_to_next(&cur); + count++; + } + + if (n_recs > 2 * pr_n) { + printf(" ... \n"); + } + + for (;;) { + if (page_cur_is_after_last(&cur)) { + break; + } + page_cur_move_to_next(&cur); + + if (count + pr_n >= n_recs) { + rec = (&cur)->rec; + page_rec_print(rec); + } + count++; + } + + printf("Total of %lu records \n", count + 1); + printf("--------------------------------\n"); +} + +/******************************************************************* +Prints the info in a page header. */ + +void +page_header_print( +/*==============*/ + page_t* page) +{ + printf("--------------------------------\n"); + printf("PAGE HEADER INFO\n"); + printf("Page address %lx, n records %lu\n", page, + page_header_get_field(page, PAGE_N_RECS)); + + printf("n dir slots %lu, heap top %lu\n", + page_header_get_field(page, PAGE_N_DIR_SLOTS), + page_header_get_field(page, PAGE_HEAP_TOP)); + + printf("Page n heap %lu, free %lu, garbage %lu\n", + page_header_get_field(page, PAGE_N_HEAP), + page_header_get_field(page, PAGE_FREE), + page_header_get_field(page, PAGE_GARBAGE)); + + printf("Page last insert %lu, direction %lu, n direction %lu\n", + page_header_get_field(page, PAGE_LAST_INSERT), + page_header_get_field(page, PAGE_DIRECTION), + page_header_get_field(page, PAGE_N_DIRECTION)); +} + +/******************************************************************* +This is used to print the contents of the page for +debugging purposes. */ + +void +page_print( +/*======*/ + page_t* page, /* in: index page */ + ulint dn, /* in: print dn first and last entries in directory */ + ulint rn) /* in: print rn first and last records on page */ +{ + page_header_print(page); + page_dir_print(page, dn); + page_print_list(page, rn); +} + +/******************************************************************* +The following is used to validate a record on a page. This function +differs from rec_validate as it can also check the n_owned field and +the heap_no field. */ + +ibool +page_rec_validate( +/*==============*/ + /* out: TRUE if ok */ + rec_t* rec) /* in: record on the page */ +{ + ulint n_owned; + ulint heap_no; + page_t* page; + + page = buf_frame_align(rec); + + page_rec_check(rec); + rec_validate(rec); + + n_owned = rec_get_n_owned(rec); + heap_no = rec_get_heap_no(rec); + + ut_a(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED); + ut_a(heap_no < page_header_get_field(page, PAGE_N_HEAP)); + + return(TRUE); +} + +/******************************************************************* +This function checks the consistency of an index page. */ + +ibool +page_validate( +/*==========*/ + /* out: TRUE if ok */ + page_t* page, /* in: index page */ + dict_index_t* index) /* in: data dictionary index containing + the page record type definition */ +{ + mem_heap_t* heap; + byte* buf; + ulint i; + ulint count; + ulint own_count; + ulint slot_no; + ulint data_size; + page_cur_t cur; + rec_t* rec; + rec_t* old_rec = NULL; + page_dir_slot_t* slot; + ulint offs; + ulint n_slots; + + heap = mem_heap_create(UNIV_PAGE_SIZE); + + /* The following buffer is used to check that the + records in the page record heap do not overlap */ + + buf = mem_heap_alloc(heap, UNIV_PAGE_SIZE); + for (i = 0; i < UNIV_PAGE_SIZE; i++) { + buf[i] = 0; + } + + /* Check first that the record heap and the directory do not + overlap. */ + + n_slots = page_dir_get_n_slots(page); + ut_ad(page_header_get_ptr(page, PAGE_HEAP_TOP) <= + page_dir_get_nth_slot(page, n_slots - 1)); + + /* Validate the record list in a loop checking also that + it is consistent with the directory. */ + count = 0; + data_size = 0; + own_count = 1; + slot_no = 0; + slot = page_dir_get_nth_slot(page, slot_no); + + page_cur_set_before_first(page, &cur); + + for (;;) { + rec = (&cur)->rec; + page_rec_validate(rec); + + /* Check that the records are in the ascending order */ + if ((count >= 2) && (!page_cur_is_after_last(&cur))) { + ut_a(1 == cmp_rec_rec(rec, old_rec, index)); + } + + if ((rec != page_get_supremum_rec(page)) + && (rec != page_get_infimum_rec(page))) { + + data_size += rec_get_size(rec); + } + + offs = rec_get_start(rec) - page; + + for (i = 0; i < rec_get_size(rec); i++) { + ut_a(buf[offs + i] == 0); /* No other record may + overlap this */ + buf[offs + i] = 1; + } + + if (rec_get_n_owned(rec) != 0) { + /* This is a record pointed to by a dir slot */ + ut_a(rec_get_n_owned(rec) == own_count); + + ut_a(page_dir_slot_get_rec(slot) == rec); + page_dir_slot_check(slot); + + own_count = 0; + if (!page_cur_is_after_last(&cur)) { + slot_no++; + slot = page_dir_get_nth_slot(page, slot_no); + } + } + + if (page_cur_is_after_last(&cur)) { + break; + } + + count++; + page_cur_move_to_next(&cur); + own_count++; + old_rec = rec; + } + + ut_a(rec_get_n_owned(rec) != 0); + ut_a(slot_no == n_slots - 1); + ut_a(page_header_get_field(page, PAGE_N_RECS) + 2 == count + 1); + + if (data_size != page_get_data_size(page)) { + printf("Summed data size %lu, returned by func %lu\n", + data_size, page_get_data_size(page)); + ut_error; + } + + /* Check then the free list */ + rec = page_header_get_ptr(page, PAGE_FREE); + + while (rec != NULL) { + page_rec_validate(rec); + + count++; + offs = rec_get_start(rec) - page; + + for (i = 0; i < rec_get_size(rec); i++) { + ut_a(buf[offs + i] == 0); + buf[offs + i] = 1; + } + + rec = page_rec_get_next(rec); + } + + ut_a(page_header_get_field(page, PAGE_N_HEAP) == count + 1); + + mem_heap_free(heap); + + return(TRUE); +} + +/******************************************************************* +Looks in the page record list for a record with the given heap number. */ + +rec_t* +page_find_rec_with_heap_no( +/*=======================*/ + /* out: record, NULL if not found */ + page_t* page, /* in: index page */ + ulint heap_no)/* in: heap number */ +{ + page_cur_t cur; + rec_t* rec; + + page_cur_set_before_first(page, &cur); + + for (;;) { + rec = (&cur)->rec; + + if (rec_get_heap_no(rec) == heap_no) { + + return(rec); + } + + if (page_cur_is_after_last(&cur)) { + + return(NULL); + } + + page_cur_move_to_next(&cur); + } +} diff --git a/innobase/page/ts/makefile b/innobase/page/ts/makefile new file mode 100644 index 00000000000..cf96509ee73 --- /dev/null +++ b/innobase/page/ts/makefile @@ -0,0 +1,16 @@ + + + +include ..\..\makefile.i + +tspage: ..\page.lib tspage.c + $(CCOM) $(CFL) -I.. -I..\.. ..\page.lib ..\..\btr.lib ..\..\dyn.lib ..\..\mtr.lib ..\..\log.lib ..\..\rem.lib ..\..\fil.lib ..\..\buf.lib ..\..\dict.lib ..\..\data.lib ..\..\mach.lib ..\..\ha.lib ..\..\ut.lib ..\..\sync.lib ..\..\mem.lib ..\..\os.lib tspage.c $(LFL) + + + + + + + + + diff --git a/innobase/page/ts/tspage.c b/innobase/page/ts/tspage.c new file mode 100644 index 00000000000..3b8869e5e14 --- /dev/null +++ b/innobase/page/ts/tspage.c @@ -0,0 +1,705 @@ +/************************************************************************ +The test for the index page + +(c) 1994-1996 Innobase Oy + +Created 1/31/1994 Heikki Tuuri +*************************************************************************/ + +#include "sync0sync.h" +#include "ut0mem.h" +#include "mem0mem.h" +#include "data0data.h" +#include "data0type.h" +#include "dict0dict.h" +#include "buf0buf.h" +#include "os0file.h" +#include "fil0fil.h" +#include "rem0rec.h" +#include "rem0cmp.h" +#include "mtr0mtr.h" +#include "log0log.h" +#include "..\page0page.h" +#include "..\page0cur.h" + +os_file_t files[1000]; + +mutex_t ios_mutex; +ulint ios; +ulint n[10]; + +mutex_t incs_mutex; +ulint incs; + +byte bigbuf[1000000]; + +#define N_SPACES 1 +#define N_FILES 2 +#define FILE_SIZE 1000 /* must be > 512 */ +#define POOL_SIZE 1000 +#define COUNTER_OFFSET 1500 + +#define LOOP_SIZE 150 +#define N_THREADS 5 + + +ulint zero = 0; + +buf_block_t* bl_arr[POOL_SIZE]; + +/************************************************************************ +Io-handler thread function. */ + +ulint +handler_thread( +/*===========*/ + void* arg) +{ + ulint segment; + void* mess; + ulint i; + bool ret; + + segment = *((ulint*)arg); + + printf("Io handler thread %lu starts\n", segment); + + for (i = 0;; i++) { + ret = fil_aio_wait(segment, &mess); + ut_a(ret); + + buf_page_io_complete((buf_block_t*)mess); + + mutex_enter(&ios_mutex); + ios++; + mutex_exit(&ios_mutex); + + } + + return(0); +} + +/************************************************************************* +Creates the files for the file system test and inserts them to +the file system. */ + +void +create_files(void) +/*==============*/ +{ + bool ret; + ulint i, k; + char name[20]; + os_thread_t thr[5]; + os_thread_id_t id[5]; + + printf("--------------------------------------------------------\n"); + printf("Create or open database files\n"); + + strcpy(name, "j:\\tsfile00"); + + for (k = 0; k < N_SPACES; k++) { + for (i = 0; i < N_FILES; i++) { + + name[9] = (char)((ulint)'0' + k); + name[10] = (char)((ulint)'0' + i); + + files[i] = os_file_create(name, OS_FILE_CREATE, + OS_FILE_TABLESPACE, &ret); + + if (ret == FALSE) { + ut_a(os_file_get_last_error() == + OS_FILE_ALREADY_EXISTS); + + files[i] = os_file_create( + name, OS_FILE_OPEN, + OS_FILE_TABLESPACE, &ret); + + ut_a(ret); + } + + ret = os_file_close(files[i]); + ut_a(ret); + + if (i == 0) { + fil_space_create(name, k, OS_FILE_TABLESPACE); + } + + ut_a(fil_validate()); + + fil_node_create(name, FILE_SIZE, k); + } + } + + ios = 0; + + mutex_create(&ios_mutex); + + for (i = 0; i < 5; i++) { + n[i] = i; + + thr[i] = os_thread_create(handler_thread, n + i, id + i); + } +} + +/********************************************************************* +Test for index page. */ + +void +test1(void) +/*=======*/ +{ + page_t* page; + dtuple_t* tuple; + mem_heap_t* heap; + ulint i; + ulint rnd = 0; + rec_t* rec; + page_cur_t cursor; + dict_index_t* index; + dict_table_t* table; + buf_block_t* block; + buf_frame_t* frame; + mtr_t mtr; + + printf("-------------------------------------------------\n"); + printf("TEST 1. Basic test\n"); + + heap = mem_heap_create(0); + + table = dict_table_create("TS_TABLE1", 3); + + dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + dict_table_add_col(table, "COL3", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + + ut_a(0 == dict_table_publish(table)); + + index = dict_index_create("TS_TABLE1", "IND1", 0, 3, 0); + + dict_index_add_field(index, "COL1", 0); + dict_index_add_field(index, "COL2", 0); + dict_index_add_field(index, "COL3", 0); + + ut_a(0 == dict_index_publish(index)); + + index = dict_index_get("TS_TABLE1", "IND1"); + ut_a(index); + + tuple = dtuple_create(heap, 3); + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 534671) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + +/* page_print_list(page, 151); */ + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 7771) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_get_n_recs(page) == 0); + + ut_a(page_validate(page, index)); + page = page_create(frame, &mtr); + + rnd = 311; + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + rnd = 217; + + for (i = 0; i < 512; i++) { + + rnd = (rnd + 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 0); + page = page_create(frame, &mtr); + + rnd = 291; + + for (i = 0; i < 512; i++) { + + rnd = (rnd - 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 512); + + rnd = 277; + + for (i = 0; i < 512; i++) { + + rnd = (rnd - 1) % 512; + + if (i % 27 == 0) { + ut_a(page_validate(page, index)); + } + + dtuple_gen_test_tuple(tuple, rnd); + +/* dtuple_print(tuple);*/ + + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + + ut_a(rec); + + rec_validate(rec); +/* page_print_list(page, 151); */ + } + + ut_a(page_validate(page, index)); + ut_a(page_get_n_recs(page) == 0); + + mtr_commit(&mtr); + mem_heap_free(heap); +} + +/********************************************************************* +Test for index page. */ + +void +test2(void) +/*=======*/ +{ + page_t* page; + dtuple_t* tuple; + mem_heap_t* heap; + ulint i, j; + ulint rnd = 0; + rec_t* rec; + page_cur_t cursor; + dict_index_t* index; + dict_table_t* table; + buf_block_t* block; + buf_frame_t* frame; + ulint tm, oldtm; + byte buf[8]; + mtr_t mtr; + mtr_t mtr2; + + printf("-------------------------------------------------\n"); + printf("TEST 2. Speed test\n"); + + oldtm = ut_clock(); + + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + ut_memcpy(bigbuf, bigbuf + 800, 800); + } + + tm = ut_clock(); + printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n", + i, tm - oldtm); + + oldtm = ut_clock(); + + rnd = 0; + for (i = 0; i < 1000 * UNIV_DBC * UNIV_DBC; i++) { + ut_memcpy(bigbuf + rnd, bigbuf + rnd + 800, 800); + rnd += 1600; + if (rnd > 995000) { + rnd = 0; + } + } + + tm = ut_clock(); + printf("Wall time for %lu mem copys of 800 bytes %lu millisecs\n", + i, tm - oldtm); + + heap = mem_heap_create(0); + + table = dict_table_create("TS_TABLE2", 2); + + dict_table_add_col(table, "COL1", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + dict_table_add_col(table, "COL2", DATA_VARCHAR, DATA_ENGLISH, 10, 0); + + ut_a(0 == dict_table_publish(table)); + + index = dict_index_create("TS_TABLE2", "IND2", 0, 2, 0); + + dict_index_add_field(index, "COL1", 0); + dict_index_add_field(index, "COL2", 0); + + ut_a(0 == dict_index_publish(index)); + + index = dict_index_get("TS_TABLE2", "IND2"); + ut_a(index); + + tuple = dtuple_create(heap, 3); + + oldtm = ut_clock(); + + rnd = 677; + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + mtr_start(&mtr); + + block = buf_page_get(0, 5, &mtr); + buf_page_s_lock(block, &mtr); + + page = buf_block_get_frame(block); + ut_a(page_validate(page, index)); + mtr_commit(&mtr); + + oldtm = ut_clock(); + + rnd = 677; + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf( + "Wall time for %lu empty loops with page create %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + mtr_commit(&mtr); + + rnd = 100; + for (j = 0; j < 200; j++) { + mtr_start(&mtr2); + + block = buf_page_get(0, 5, &mtr2); + buf_page_x_lock(block, &mtr2); + + page = buf_block_get_frame(block); + + rnd = (rnd + 1) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr2); + ut_a(rec); + + mtr_commit(&mtr2); + } + } + + tm = ut_clock(); + printf( + "Wall time for sequential insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + mtr_start(&mtr); + + block = buf_page_get(0, 5, &mtr); + buf_page_s_lock(block, &mtr); + + page = buf_block_get_frame(block); + ut_a(page_validate(page, index)); + mtr_commit(&mtr); + + oldtm = ut_clock(); + + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 500; + for (j = 0; j < 200; j++) { + rnd = (rnd - 1) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf( + "Wall time for descend. seq. insertion of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 677; + + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + + rnd = 677; + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + page_cur_delete_rec(&cursor, &mtr); + } + ut_a(page_get_n_recs(page) == 0); + + mtr_commit(&mtr); + } + + tm = ut_clock(); + printf("Wall time for insert and delete of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + mtr_start(&mtr); + + block = buf_page_create(0, 5, &mtr); + buf_page_x_lock(block, &mtr); + + frame = buf_block_get_frame(block); + + page = page_create(frame, &mtr); + + rnd = 677; + + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + + rec = page_cur_insert_rec(&cursor, tuple, NULL, &mtr); + ut_a(rec); + } + ut_a(page_validate(page, index)); + mtr_print(&mtr); + + oldtm = ut_clock(); + + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + rnd = 677; + for (j = 0; j < 200; j++) { +/* rnd = (rnd + 54841) % 1000;*/ + dtuple_gen_test_tuple3(tuple, rnd, buf); + page_cur_search(page, tuple, PAGE_CUR_G, &cursor); + } + } + + tm = ut_clock(); + printf("Wall time for search of %lu recs %lu milliseconds\n", + i * j, tm - oldtm); + + oldtm = ut_clock(); + + for (i = 0; i < 5 * UNIV_DBC * UNIV_DBC; i++) { + rnd = 677; + for (j = 0; j < 200; j++) { + rnd = (rnd + 54841) % 1000; + dtuple_gen_test_tuple3(tuple, rnd, buf); + } + } + + tm = ut_clock(); + printf("Wall time for %lu empty loops %lu milliseconds\n", + i * j, tm - oldtm); + mtr_commit(&mtr); +} + +/******************************************************************** +Main test function. */ + +void +main(void) +/*======*/ +{ + ulint tm, oldtm; + + sync_init(); + mem_init(); + os_aio_init(160, 5); + fil_init(25); + buf_pool_init(100, 100); + dict_init(); + log_init(); + + create_files(); + + oldtm = ut_clock(); + + ut_rnd_set_seed(19); + + test1(); + test2(); + + mem_print_info(); + + tm = ut_clock(); + printf("Wall time for test %lu milliseconds\n", tm - oldtm); + printf("TESTS COMPLETED SUCCESSFULLY!\n"); +} |