diff options
Diffstat (limited to 'storage/innobase/page')
-rw-r--r-- | storage/innobase/page/page0cur.cc | 1123 | ||||
-rw-r--r-- | storage/innobase/page/page0page.cc | 996 | ||||
-rw-r--r-- | storage/innobase/page/page0zip.cc | 861 |
3 files changed, 1824 insertions, 1156 deletions
diff --git a/storage/innobase/page/page0cur.cc b/storage/innobase/page/page0cur.cc index 97405261392..7f592b50154 100644 --- a/storage/innobase/page/page0cur.cc +++ b/storage/innobase/page/page0cur.cc @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2013, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1994, 2015, Oracle and/or its affiliates. All Rights Reserved. Copyright (c) 2012, Facebook Inc. This program is free software; you can redistribute it and/or modify it under @@ -24,6 +24,8 @@ The page cursor Created 10/4/1994 Heikki Tuuri *************************************************************************/ +#include "ha_prototypes.h" + #include "page0cur.h" #ifdef UNIV_NONINL #include "page0cur.ic" @@ -33,9 +35,11 @@ Created 10/4/1994 Heikki Tuuri #include "btr0btr.h" #include "mtr0log.h" #include "log0recv.h" -#include "ut0ut.h" #ifndef UNIV_HOTBACKUP #include "rem0cmp.h" +#include "gis0rtree.h" + +#include <algorithm> #ifdef PAGE_CUR_ADAPT # ifdef UNIV_SEARCH_PERF_STAT @@ -53,7 +57,7 @@ a = 1103515245 (3^5 * 5 * 7 * 129749) c = 12345 (3 * 5 * 823) m = 18446744073709551616 (2^64) -@return number between 0 and 2^64-1 */ +@return number between 0 and 2^64-1 */ static ib_uint64_t page_cur_lcg_prng(void) @@ -76,41 +80,30 @@ page_cur_lcg_prng(void) return(lcg_current); } -/****************************************************************//** -Tries a search shortcut based on the last insert. -@return TRUE on success */ +/** Try a search shortcut based on the last insert. +@param[in] block index page +@param[in] index index tree +@param[in] tuple search key +@param[in,out] iup_matched_fields already matched fields in the +upper limit record +@param[in,out] ilow_matched_fields already matched fields in the +lower limit record +@param[out] cursor page cursor +@return true on success */ UNIV_INLINE -ibool +bool page_cur_try_search_shortcut( -/*=========================*/ - const buf_block_t* block, /*!< in: index page */ - const dict_index_t* index, /*!< in: record descriptor */ - const dtuple_t* tuple, /*!< in: data tuple */ + const buf_block_t* block, + const dict_index_t* index, + const dtuple_t* tuple, ulint* iup_matched_fields, - /*!< in/out: already matched - fields in upper limit record */ - ulint* iup_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ ulint* ilow_matched_fields, - /*!< in/out: already matched - fields in lower limit record */ - ulint* ilow_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - page_cur_t* cursor) /*!< out: page cursor */ + page_cur_t* cursor) { const rec_t* rec; const rec_t* next_rec; ulint low_match; - ulint low_bytes; ulint up_match; - ulint up_bytes; -#ifdef UNIV_SEARCH_DEBUG - page_cur_t cursor2; -#endif ibool success = FALSE; const page_t* page = buf_block_get_frame(block); mem_heap_t* heap = NULL; @@ -127,53 +120,120 @@ page_cur_try_search_shortcut( ut_ad(rec); ut_ad(page_rec_is_user_rec(rec)); - ut_pair_min(&low_match, &low_bytes, - *ilow_matched_fields, *ilow_matched_bytes, - *iup_matched_fields, *iup_matched_bytes); + low_match = up_match = std::min(*ilow_matched_fields, + *iup_matched_fields); - up_match = low_match; - up_bytes = low_bytes; - - if (page_cmp_dtuple_rec_with_match(tuple, rec, offsets, - &low_match, &low_bytes) < 0) { + if (cmp_dtuple_rec_with_match(tuple, rec, offsets, &low_match) < 0) { goto exit_func; } next_rec = page_rec_get_next_const(rec); - offsets = rec_get_offsets(next_rec, index, offsets, - dtuple_get_n_fields(tuple), &heap); + if (!page_rec_is_supremum(next_rec)) { + offsets = rec_get_offsets(next_rec, index, offsets, + dtuple_get_n_fields(tuple), &heap); - if (page_cmp_dtuple_rec_with_match(tuple, next_rec, offsets, - &up_match, &up_bytes) >= 0) { - goto exit_func; + if (cmp_dtuple_rec_with_match(tuple, next_rec, offsets, + &up_match) >= 0) { + goto exit_func; + } + + *iup_matched_fields = up_match; } page_cur_position(rec, block, cursor); -#ifdef UNIV_SEARCH_DEBUG - page_cur_search_with_match(block, index, tuple, PAGE_CUR_DBG, - iup_matched_fields, - iup_matched_bytes, - ilow_matched_fields, - ilow_matched_bytes, - &cursor2); - ut_a(cursor2.rec == cursor->rec); + *ilow_matched_fields = low_match; - if (!page_rec_is_supremum(next_rec)) { +#ifdef UNIV_SEARCH_PERF_STAT + page_cur_short_succ++; +#endif + success = TRUE; +exit_func: + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return(success); +} - ut_a(*iup_matched_fields == up_match); - ut_a(*iup_matched_bytes == up_bytes); +/** Try a search shortcut based on the last insert. +@param[in] block index page +@param[in] index index tree +@param[in] tuple search key +@param[in,out] iup_matched_fields already matched fields in the +upper limit record +@param[in,out] iup_matched_bytes already matched bytes in the +first partially matched field in the upper limit record +@param[in,out] ilow_matched_fields already matched fields in the +lower limit record +@param[in,out] ilow_matched_bytes already matched bytes in the +first partially matched field in the lower limit record +@param[out] cursor page cursor +@return true on success */ +UNIV_INLINE +bool +page_cur_try_search_shortcut_bytes( + const buf_block_t* block, + const dict_index_t* index, + const dtuple_t* tuple, + ulint* iup_matched_fields, + ulint* iup_matched_bytes, + ulint* ilow_matched_fields, + ulint* ilow_matched_bytes, + page_cur_t* cursor) +{ + const rec_t* rec; + const rec_t* next_rec; + ulint low_match; + ulint low_bytes; + ulint up_match; + ulint up_bytes; + ibool success = FALSE; + const page_t* page = buf_block_get_frame(block); + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(dtuple_check_typed(tuple)); + + rec = page_header_get_ptr(page, PAGE_LAST_INSERT); + offsets = rec_get_offsets(rec, index, offsets, + dtuple_get_n_fields(tuple), &heap); + + ut_ad(rec); + ut_ad(page_rec_is_user_rec(rec)); + if (ut_pair_cmp(*ilow_matched_fields, *ilow_matched_bytes, + *iup_matched_fields, *iup_matched_bytes) < 0) { + up_match = low_match = *ilow_matched_fields; + up_bytes = low_bytes = *ilow_matched_bytes; + } else { + up_match = low_match = *iup_matched_fields; + up_bytes = low_bytes = *iup_matched_bytes; } - ut_a(*ilow_matched_fields == low_match); - ut_a(*ilow_matched_bytes == low_bytes); -#endif + if (cmp_dtuple_rec_with_match_bytes( + tuple, rec, index, offsets, &low_match, &low_bytes) < 0) { + goto exit_func; + } + + next_rec = page_rec_get_next_const(rec); if (!page_rec_is_supremum(next_rec)) { + offsets = rec_get_offsets(next_rec, index, offsets, + dtuple_get_n_fields(tuple), &heap); + + if (cmp_dtuple_rec_with_match_bytes( + tuple, next_rec, index, offsets, + &up_match, &up_bytes) + >= 0) { + goto exit_func; + } *iup_matched_fields = up_match; *iup_matched_bytes = up_bytes; } + page_cur_position(rec, block, cursor); + *ilow_matched_fields = low_match; *ilow_matched_bytes = low_bytes; @@ -187,7 +247,6 @@ exit_func: } return(success); } - #endif #ifdef PAGE_CUR_LE_OR_EXTENDS @@ -195,7 +254,7 @@ exit_func: Checks if the nth field in a record is a character type field which extends the nth field in tuple, i.e., the field is longer or equal in length and has common first characters. -@return TRUE if rec field extends tuple field */ +@return TRUE if rec field extends tuple field */ static ibool page_cur_rec_field_extends( @@ -222,16 +281,17 @@ page_cur_rec_field_extends( || type->mtype == DATA_FIXBINARY || type->mtype == DATA_BINARY || type->mtype == DATA_BLOB + || DATA_GEOMETRY_MTYPE(type->mtype) || type->mtype == DATA_VARMYSQL || type->mtype == DATA_MYSQL) { if (dfield_get_len(dfield) != UNIV_SQL_NULL && rec_f_len != UNIV_SQL_NULL && rec_f_len >= dfield_get_len(dfield) - && !cmp_data_data_slow(type->mtype, type->prtype, - dfield_get_data(dfield), - dfield_get_len(dfield), - rec_f, dfield_get_len(dfield))) { + && !cmp_data_data(type->mtype, type->prtype, + dfield_get_data(dfield), + dfield_get_len(dfield), + rec_f, dfield_get_len(dfield))) { return(TRUE); } @@ -241,33 +301,389 @@ page_cur_rec_field_extends( } #endif /* PAGE_CUR_LE_OR_EXTENDS */ +/** If key is fixed length then populate offset directly from +cached version. +@param[in] rec B-Tree record for which offset needs to be + populated. +@param[in,out] index index handler +@param[in] tuple data tuple +@param[in,out] offsets default offsets array +@param[in,out] heap heap +@return reference to populate offsets. */ +static +ulint* +populate_offsets( + const rec_t* rec, + const dtuple_t* tuple, + dict_index_t* index, + ulint* offsets, + mem_heap_t** heap) +{ + ut_ad(dict_table_is_intrinsic(index->table)); + + bool rec_has_null_values = false; + + if (index->rec_cache.key_has_null_cols) { + /* Check if record has null value. */ + const byte* nulls = rec - (1 + REC_N_NEW_EXTRA_BYTES); + ulint n_bytes_to_scan + = UT_BITS_IN_BYTES(index->n_nullable); + byte null_mask = 0xff; + ulint bits_examined = 0; + + for (ulint i = 0; i < n_bytes_to_scan - 1; i++) { + if (*nulls & null_mask) { + rec_has_null_values = true; + break; + } + --nulls; + bits_examined += 8; + } + + if (!rec_has_null_values) { + null_mask >>= (8 - (index->n_nullable - bits_examined)); + rec_has_null_values = *nulls & null_mask; + } + + if (rec_has_null_values) { + + offsets = rec_get_offsets( + rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), heap); + + return(offsets); + } + } + + /* Check if offsets are cached else cache them first. + There are queries that will first verify if key is present using index + search and then initiate insert. If offsets are cached during index + search it would be based on key part only but during insert that looks + out for exact location to insert key + db_row_id both columns would + be used and so re-compute offsets in such case. */ + if (!index->rec_cache.offsets_cached + || (rec_offs_n_fields(index->rec_cache.offsets) + < dtuple_get_n_fields_cmp(tuple))) { + + offsets = rec_get_offsets( + rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), heap); + + /* Reallocate if our offset array is not big + enough to hold the needed size. */ + ulint sz1 = index->rec_cache.sz_of_offsets; + ulint sz2 = offsets[0]; + if (sz1 < sz2) { + index->rec_cache.offsets = static_cast<ulint*>( + mem_heap_alloc( + index->heap, sizeof(ulint) * sz2)); + index->rec_cache.sz_of_offsets = + static_cast<uint32_t>(sz2); + } + + memcpy(index->rec_cache.offsets, + offsets, (sizeof(ulint) * sz2)); + index->rec_cache.offsets_cached = true; + } + + ut_ad(index->rec_cache.offsets[2] = (ulint) rec); + + return(index->rec_cache.offsets); +} + /****************************************************************//** Searches the right position for a page cursor. */ -UNIV_INTERN void page_cur_search_with_match( /*=======================*/ const buf_block_t* block, /*!< in: buffer block */ - const dict_index_t* index, /*!< in: record descriptor */ + const dict_index_t* index, /*!< in/out: record descriptor */ const dtuple_t* tuple, /*!< in: data tuple */ - ulint mode, /*!< in: PAGE_CUR_L, + page_cur_mode_t mode, /*!< in: PAGE_CUR_L, PAGE_CUR_LE, PAGE_CUR_G, or PAGE_CUR_GE */ ulint* iup_matched_fields, /*!< in/out: already matched fields in upper limit record */ - ulint* iup_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ ulint* ilow_matched_fields, /*!< in/out: already matched fields in lower limit record */ + page_cur_t* cursor, /*!< out: page cursor */ + rtr_info_t* rtr_info)/*!< in/out: rtree search stack */ +{ + ulint up; + ulint low; + ulint mid; + const page_t* page; + const page_dir_slot_t* slot; + const rec_t* up_rec; + const rec_t* low_rec; + const rec_t* mid_rec; + ulint up_matched_fields; + ulint low_matched_fields; + ulint cur_matched_fields; + int cmp; +#ifdef UNIV_ZIP_DEBUG + const page_zip_des_t* page_zip = buf_block_get_page_zip(block); +#endif /* UNIV_ZIP_DEBUG */ + mem_heap_t* heap = NULL; + ulint offsets_[REC_OFFS_NORMAL_SIZE]; + ulint* offsets = offsets_; + rec_offs_init(offsets_); + + ut_ad(dtuple_validate(tuple)); +#ifdef UNIV_DEBUG +# ifdef PAGE_CUR_DBG + if (mode != PAGE_CUR_DBG) +# endif /* PAGE_CUR_DBG */ +# ifdef PAGE_CUR_LE_OR_EXTENDS + if (mode != PAGE_CUR_LE_OR_EXTENDS) +# endif /* PAGE_CUR_LE_OR_EXTENDS */ + ut_ad(mode == PAGE_CUR_L || mode == PAGE_CUR_LE + || mode == PAGE_CUR_G || mode == PAGE_CUR_GE + || dict_index_is_spatial(index)); +#endif /* UNIV_DEBUG */ + page = buf_block_get_frame(block); +#ifdef UNIV_ZIP_DEBUG + ut_a(!page_zip || page_zip_validate(page_zip, page, index)); +#endif /* UNIV_ZIP_DEBUG */ + + ut_d(page_check_dir(page)); + +#ifdef PAGE_CUR_ADAPT + if (page_is_leaf(page) + && (mode == PAGE_CUR_LE) + && !dict_index_is_spatial(index) + && (page_header_get_field(page, PAGE_N_DIRECTION) > 3) + && (page_header_get_ptr(page, PAGE_LAST_INSERT)) + && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { + + if (page_cur_try_search_shortcut( + block, index, tuple, + iup_matched_fields, + ilow_matched_fields, + cursor)) { + return; + } + } +# ifdef PAGE_CUR_DBG + if (mode == PAGE_CUR_DBG) { + mode = PAGE_CUR_LE; + } +# endif +#endif + + /* If the mode is for R-tree indexes, use the special MBR + related compare functions */ + if (dict_index_is_spatial(index) && mode > PAGE_CUR_LE) { + /* For leaf level insert, we still use the traditional + compare function for now */ + if (mode == PAGE_CUR_RTREE_INSERT && page_is_leaf(page)){ + mode = PAGE_CUR_LE; + } else { + rtr_cur_search_with_match( + block, (dict_index_t*)index, tuple, mode, + cursor, rtr_info); + return; + } + } + + /* The following flag does not work for non-latin1 char sets because + cmp_full_field does not tell how many bytes matched */ +#ifdef PAGE_CUR_LE_OR_EXTENDS + ut_a(mode != PAGE_CUR_LE_OR_EXTENDS); +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + + /* If mode PAGE_CUR_G is specified, we are trying to position the + cursor to answer a query of the form "tuple < X", where tuple is + the input parameter, and X denotes an arbitrary physical record on + the page. We want to position the cursor on the first X which + satisfies the condition. */ + + up_matched_fields = *iup_matched_fields; + low_matched_fields = *ilow_matched_fields; + + /* Perform binary search. First the search is done through the page + directory, after that as a linear search in the list of records + owned by the upper limit directory slot. */ + + low = 0; + up = page_dir_get_n_slots(page) - 1; + + /* Perform binary search until the lower and upper limit directory + slots come to the distance 1 of each other */ + + while (up - low > 1) { + mid = (low + up) / 2; + slot = page_dir_get_nth_slot(page, mid); + mid_rec = page_dir_slot_get_rec(slot); + + cur_matched_fields = std::min(low_matched_fields, + up_matched_fields); + + offsets = offsets_; + if (index->rec_cache.fixed_len_key) { + offsets = populate_offsets( + mid_rec, tuple, + const_cast<dict_index_t*>(index), + offsets, &heap); + } else { + offsets = rec_get_offsets( + mid_rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), &heap); + + } + + cmp = cmp_dtuple_rec_with_match( + tuple, mid_rec, offsets, &cur_matched_fields); + + if (cmp > 0) { +low_slot_match: + low = mid; + low_matched_fields = cur_matched_fields; + + } else if (cmp) { +#ifdef PAGE_CUR_LE_OR_EXTENDS + if (mode == PAGE_CUR_LE_OR_EXTENDS + && page_cur_rec_field_extends( + tuple, mid_rec, offsets, + cur_matched_fields)) { + + goto low_slot_match; + } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_slot_match: + up = mid; + up_matched_fields = cur_matched_fields; + + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + goto low_slot_match; + } else { + + goto up_slot_match; + } + } + + slot = page_dir_get_nth_slot(page, low); + low_rec = page_dir_slot_get_rec(slot); + slot = page_dir_get_nth_slot(page, up); + up_rec = page_dir_slot_get_rec(slot); + + /* Perform linear search until the upper and lower records come to + distance 1 of each other. */ + + while (page_rec_get_next_const(low_rec) != up_rec) { + + mid_rec = page_rec_get_next_const(low_rec); + + cur_matched_fields = std::min(low_matched_fields, + up_matched_fields); + + offsets = offsets_; + if (index->rec_cache.fixed_len_key) { + offsets = populate_offsets( + mid_rec, tuple, + const_cast<dict_index_t*>(index), + offsets, &heap); + } else { + offsets = rec_get_offsets( + mid_rec, index, offsets, + dtuple_get_n_fields_cmp(tuple), &heap); + + } + + cmp = cmp_dtuple_rec_with_match( + tuple, mid_rec, offsets, &cur_matched_fields); + + if (cmp > 0) { +low_rec_match: + low_rec = mid_rec; + low_matched_fields = cur_matched_fields; + + } else if (cmp) { +#ifdef PAGE_CUR_LE_OR_EXTENDS + if (mode == PAGE_CUR_LE_OR_EXTENDS + && page_cur_rec_field_extends( + tuple, mid_rec, offsets, + cur_matched_fields)) { + + goto low_rec_match; + } +#endif /* PAGE_CUR_LE_OR_EXTENDS */ +up_rec_match: + up_rec = mid_rec; + up_matched_fields = cur_matched_fields; + } else if (mode == PAGE_CUR_G || mode == PAGE_CUR_LE +#ifdef PAGE_CUR_LE_OR_EXTENDS + || mode == PAGE_CUR_LE_OR_EXTENDS +#endif /* PAGE_CUR_LE_OR_EXTENDS */ + ) { + if (!cmp && !cur_matched_fields) { +#ifdef UNIV_DEBUG + mtr_t mtr; + mtr_start(&mtr); + + /* We got a match, but cur_matched_fields is + 0, it must have REC_INFO_MIN_REC_FLAG */ + ulint rec_info = rec_get_info_bits(mid_rec, + rec_offs_comp(offsets)); + ut_ad(rec_info & REC_INFO_MIN_REC_FLAG); + ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL); + mtr_commit(&mtr); +#endif + + cur_matched_fields = dtuple_get_n_fields_cmp(tuple); + } + + goto low_rec_match; + } else { + + goto up_rec_match; + } + } + + if (mode <= PAGE_CUR_GE) { + page_cur_position(up_rec, block, cursor); + } else { + page_cur_position(low_rec, block, cursor); + } + + *iup_matched_fields = up_matched_fields; + *ilow_matched_fields = low_matched_fields; + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } +} + +/** Search the right position for a page cursor. +@param[in] block buffer block +@param[in] index index tree +@param[in] tuple key to be searched for +@param[in] mode search mode +@param[in,out] iup_matched_fields already matched fields in the +upper limit record +@param[in,out] iup_matched_bytes already matched bytes in the +first partially matched field in the upper limit record +@param[in,out] ilow_matched_fields already matched fields in the +lower limit record +@param[in,out] ilow_matched_bytes already matched bytes in the +first partially matched field in the lower limit record +@param[out] cursor page cursor */ +void +page_cur_search_with_match_bytes( + const buf_block_t* block, + const dict_index_t* index, + const dtuple_t* tuple, + page_cur_mode_t mode, + ulint* iup_matched_fields, + ulint* iup_matched_bytes, + ulint* ilow_matched_fields, ulint* ilow_matched_bytes, - /*!< in/out: already matched - bytes in a field not yet - completely matched */ - page_cur_t* cursor) /*!< out: page cursor */ + page_cur_t* cursor) { ulint up; ulint low; @@ -284,11 +700,6 @@ page_cur_search_with_match( ulint cur_matched_fields; ulint cur_matched_bytes; int cmp; -#ifdef UNIV_SEARCH_DEBUG - int dbg_cmp; - ulint dbg_matched_fields; - ulint dbg_matched_bytes; -#endif #ifdef UNIV_ZIP_DEBUG const page_zip_des_t* page_zip = buf_block_get_page_zip(block); #endif /* UNIV_ZIP_DEBUG */ @@ -297,8 +708,6 @@ page_cur_search_with_match( ulint* offsets = offsets_; rec_offs_init(offsets_); - ut_ad(block && tuple && iup_matched_fields && iup_matched_bytes - && ilow_matched_fields && ilow_matched_bytes && cursor); ut_ad(dtuple_validate(tuple)); #ifdef UNIV_DEBUG # ifdef PAGE_CUR_DBG @@ -315,7 +724,7 @@ page_cur_search_with_match( ut_a(!page_zip || page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - page_check_dir(page); + ut_d(page_check_dir(page)); #ifdef PAGE_CUR_ADAPT if (page_is_leaf(page) @@ -324,7 +733,7 @@ page_cur_search_with_match( && (page_header_get_ptr(page, PAGE_LAST_INSERT)) && (page_header_get_field(page, PAGE_DIRECTION) == PAGE_RIGHT)) { - if (page_cur_try_search_shortcut( + if (page_cur_try_search_shortcut_bytes( block, index, tuple, iup_matched_fields, iup_matched_bytes, ilow_matched_fields, ilow_matched_bytes, @@ -352,7 +761,7 @@ page_cur_search_with_match( satisfies the condition. */ up_matched_fields = *iup_matched_fields; - up_matched_bytes = *iup_matched_bytes; + up_matched_bytes = *iup_matched_bytes; low_matched_fields = *ilow_matched_fields; low_matched_bytes = *ilow_matched_bytes; @@ -375,20 +784,21 @@ page_cur_search_with_match( low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); + offsets = rec_get_offsets( + mid_rec, index, offsets_, + dtuple_get_n_fields_cmp(tuple), &heap); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { + cmp = cmp_dtuple_rec_with_match_bytes( + tuple, mid_rec, index, offsets, + &cur_matched_fields, &cur_matched_bytes); + + if (cmp > 0) { low_slot_match: low = mid; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (UNIV_EXPECT(cmp, -1)) { + } else if (cmp) { #ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends( @@ -408,7 +818,6 @@ up_slot_match: || mode == PAGE_CUR_LE_OR_EXTENDS #endif /* PAGE_CUR_LE_OR_EXTENDS */ ) { - goto low_slot_match; } else { @@ -432,20 +841,21 @@ up_slot_match: low_matched_fields, low_matched_bytes, up_matched_fields, up_matched_bytes); - offsets = rec_get_offsets(mid_rec, index, offsets, - dtuple_get_n_fields_cmp(tuple), - &heap); + offsets = rec_get_offsets( + mid_rec, index, offsets_, + dtuple_get_n_fields_cmp(tuple), &heap); - cmp = cmp_dtuple_rec_with_match(tuple, mid_rec, offsets, - &cur_matched_fields, - &cur_matched_bytes); - if (UNIV_LIKELY(cmp > 0)) { + cmp = cmp_dtuple_rec_with_match_bytes( + tuple, mid_rec, index, offsets, + &cur_matched_fields, &cur_matched_bytes); + + if (cmp > 0) { low_rec_match: low_rec = mid_rec; low_matched_fields = cur_matched_fields; low_matched_bytes = cur_matched_bytes; - } else if (UNIV_EXPECT(cmp, -1)) { + } else if (cmp) { #ifdef PAGE_CUR_LE_OR_EXTENDS if (mode == PAGE_CUR_LE_OR_EXTENDS && page_cur_rec_field_extends( @@ -464,6 +874,22 @@ up_rec_match: || mode == PAGE_CUR_LE_OR_EXTENDS #endif /* PAGE_CUR_LE_OR_EXTENDS */ ) { + if (!cmp && !cur_matched_fields) { +#ifdef UNIV_DEBUG + mtr_t mtr; + mtr_start(&mtr); + + /* We got a match, but cur_matched_fields is + 0, it must have REC_INFO_MIN_REC_FLAG */ + ulint rec_info = rec_get_info_bits(mid_rec, + rec_offs_comp(offsets)); + ut_ad(rec_info & REC_INFO_MIN_REC_FLAG); + ut_ad(btr_page_get_prev(page, &mtr) == FIL_NULL); + mtr_commit(&mtr); +#endif + + cur_matched_fields = dtuple_get_n_fields_cmp(tuple); + } goto low_rec_match; } else { @@ -472,58 +898,6 @@ up_rec_match: } } -#ifdef UNIV_SEARCH_DEBUG - - /* Check that the lower and upper limit records have the - right alphabetical order compared to tuple. */ - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(low_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, low_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp >= 0); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp == 1); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp >= 0); - } - - if (!page_rec_is_infimum(low_rec)) { - - ut_a(low_matched_fields == dbg_matched_fields); - ut_a(low_matched_bytes == dbg_matched_bytes); - } - - dbg_matched_fields = 0; - dbg_matched_bytes = 0; - - offsets = rec_get_offsets(up_rec, index, offsets, - ULINT_UNDEFINED, &heap); - dbg_cmp = page_cmp_dtuple_rec_with_match(tuple, up_rec, offsets, - &dbg_matched_fields, - &dbg_matched_bytes); - if (mode == PAGE_CUR_G) { - ut_a(dbg_cmp == -1); - } else if (mode == PAGE_CUR_GE) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_L) { - ut_a(dbg_cmp <= 0); - } else if (mode == PAGE_CUR_LE) { - ut_a(dbg_cmp == -1); - } - - if (!page_rec_is_supremum(up_rec)) { - - ut_a(up_matched_fields == dbg_matched_fields); - ut_a(up_matched_bytes == dbg_matched_bytes); - } -#endif if (mode <= PAGE_CUR_GE) { page_cur_position(up_rec, block, cursor); } else { @@ -542,7 +916,6 @@ up_rec_match: /***********************************************************//** Positions a page cursor on a randomly chosen user record on a page. If there are no user records, sets the cursor on the infimum record. */ -UNIV_INTERN void page_cur_open_on_rnd_user_rec( /*==========================*/ @@ -583,11 +956,22 @@ page_cur_insert_rec_write_log( ulint extra_size; ulint cur_extra_size; const byte* ins_ptr; - byte* log_ptr; const byte* log_end; ulint i; + /* Avoid REDO logging to save on costly IO because + temporary tables are not recovered during crash recovery. */ + if (dict_table_is_temporary(index->table)) { + byte* log_ptr = mlog_open(mtr, 0); + if (log_ptr == NULL) { + return; + } + mlog_close(mtr, log_ptr); + log_ptr = NULL; + } + ut_a(rec_size < UNIV_PAGE_SIZE); + ut_ad(mtr->is_named_space(index->space)); ut_ad(page_align(insert_rec) == page_align(cursor_rec)); ut_ad(!page_rec_is_comp(insert_rec) == !dict_table_is_comp(index->table)); @@ -648,6 +1032,8 @@ page_cur_insert_rec_write_log( } while (i < min_rec_size); } + byte* log_ptr; + if (mtr_get_log_mode(mtr) != MTR_LOG_SHORT_INSERTS) { if (page_rec_is_comp(insert_rec)) { @@ -753,27 +1139,26 @@ need_extra_info: /***********************************************************//** Parses a log record of a record insert on a page. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_cur_parse_insert_rec( /*======================*/ ibool is_short,/*!< in: TRUE if short inserts */ - byte* ptr, /*!< in: buffer */ - byte* end_ptr,/*!< in: buffer end */ + const byte* ptr, /*!< in: buffer */ + const byte* end_ptr,/*!< in: buffer end */ buf_block_t* block, /*!< in: page or NULL */ dict_index_t* index, /*!< in: record descriptor */ mtr_t* mtr) /*!< in: mtr or NULL */ { - ulint origin_offset; + ulint origin_offset = 0; /* remove warning */ ulint end_seg_len; - ulint mismatch_index; + ulint mismatch_index = 0; /* remove warning */ page_t* page; rec_t* cursor_rec; byte buf1[1024]; byte* buf; - byte* ptr2 = ptr; - ulint info_and_status_bits = 0; /* remove warning */ + const byte* ptr2 = ptr; + ulint info_and_status_bits = 0; /* remove warning */ page_cur_t cursor; mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; @@ -799,7 +1184,7 @@ page_cur_parse_insert_rec( cursor_rec = page + offset; - if (UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE)) { + if (offset >= UNIV_PAGE_SIZE) { recv_sys->found_corrupt_log = TRUE; @@ -807,14 +1192,14 @@ page_cur_parse_insert_rec( } } - ptr = mach_parse_compressed(ptr, end_ptr, &end_seg_len); + end_seg_len = mach_parse_compressed(&ptr, end_ptr); if (ptr == NULL) { return(NULL); } - if (UNIV_UNLIKELY(end_seg_len >= UNIV_PAGE_SIZE << 1)) { + if (end_seg_len >= UNIV_PAGE_SIZE << 1) { recv_sys->found_corrupt_log = TRUE; return(NULL); @@ -831,7 +1216,7 @@ page_cur_parse_insert_rec( info_and_status_bits = mach_read_from_1(ptr); ptr++; - ptr = mach_parse_compressed(ptr, end_ptr, &origin_offset); + origin_offset = mach_parse_compressed(&ptr, end_ptr); if (ptr == NULL) { @@ -840,7 +1225,7 @@ page_cur_parse_insert_rec( ut_a(origin_offset < UNIV_PAGE_SIZE); - ptr = mach_parse_compressed(ptr, end_ptr, &mismatch_index); + mismatch_index = mach_parse_compressed(&ptr, end_ptr); if (ptr == NULL) { @@ -850,14 +1235,14 @@ page_cur_parse_insert_rec( ut_a(mismatch_index < UNIV_PAGE_SIZE); } - if (UNIV_UNLIKELY(end_ptr < ptr + (end_seg_len >> 1))) { + if (end_ptr < ptr + (end_seg_len >> 1)) { return(NULL); } if (!block) { - return(ptr + (end_seg_len >> 1)); + return(const_cast<byte*>(ptr + (end_seg_len >> 1))); } ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); @@ -882,30 +1267,19 @@ page_cur_parse_insert_rec( buf = buf1; } else { buf = static_cast<byte*>( - mem_alloc(mismatch_index + end_seg_len)); + ut_malloc_nokey(mismatch_index + end_seg_len)); } /* Build the inserted record to buf */ if (UNIV_UNLIKELY(mismatch_index >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "Is short %lu, info_and_status_bits %lu, offset %lu, " - "o_offset %lu\n" - "mismatch index %lu, end_seg_len %lu\n" - "parsed len %lu\n", - (ulong) is_short, (ulong) info_and_status_bits, - (ulong) page_offset(cursor_rec), - (ulong) origin_offset, - (ulong) mismatch_index, (ulong) end_seg_len, - (ulong) (ptr - ptr2)); - - fputs("Dump of 300 bytes of log:\n", stderr); - ut_print_buf(stderr, ptr2, 300); - putc('\n', stderr); - - buf_page_print(page, 0, 0); - ut_error; + ib::fatal() << "is_short " << is_short << ", " + << "info_and_status_bits " << info_and_status_bits + << ", offset " << page_offset(cursor_rec) << "," + " o_offset " << origin_offset << ", mismatch index " + << mismatch_index << ", end_seg_len " << end_seg_len + << " parsed len " << (ptr - ptr2); } ut_memcpy(buf, rec_get_start(cursor_rec, offsets), mismatch_index); @@ -913,10 +1287,10 @@ page_cur_parse_insert_rec( if (page_is_comp(page)) { rec_set_info_and_status_bits(buf + origin_offset, - info_and_status_bits); + info_and_status_bits); } else { rec_set_info_bits_old(buf + origin_offset, - info_and_status_bits); + info_and_status_bits); } page_cur_position(cursor_rec, block, &cursor); @@ -933,22 +1307,21 @@ page_cur_parse_insert_rec( if (buf != buf1) { - mem_free(buf); + ut_free(buf); } if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - return(ptr + end_seg_len); + return(const_cast<byte*>(ptr + end_seg_len)); } /***********************************************************//** Inserts a record next to page cursor on an uncompressed page. Returns pointer to inserted record if succeed, i.e., enough space available, NULL otherwise. The cursor stays at the same position. -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN +@return pointer to record if succeed, NULL otherwise */ rec_t* page_cur_insert_rec_low( /*====================*/ @@ -975,10 +1348,10 @@ page_cur_insert_rec_low( page = page_align(current_rec); ut_ad(dict_table_is_comp(index->table) == (ibool) !!page_is_comp(page)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); + ut_ad(fil_page_index_page_check(page)); + ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id + || recv_recovery_is_on() + || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index))); ut_ad(!page_rec_is_supremum(current_rec)); @@ -1095,7 +1468,214 @@ use_heap: || rec_get_node_ptr_flag(last_insert) == rec_get_node_ptr_flag(insert_rec)); - if (UNIV_UNLIKELY(last_insert == NULL)) { + if (!dict_index_is_spatial(index)) { + if (UNIV_UNLIKELY(last_insert == NULL)) { + page_header_set_field(page, NULL, PAGE_DIRECTION, + PAGE_NO_DIRECTION); + page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); + + } else if ((last_insert == current_rec) + && (page_header_get_field(page, PAGE_DIRECTION) + != PAGE_LEFT)) { + + page_header_set_field(page, NULL, PAGE_DIRECTION, + PAGE_RIGHT); + page_header_set_field(page, NULL, PAGE_N_DIRECTION, + page_header_get_field( + page, PAGE_N_DIRECTION) + 1); + + } else if ((page_rec_get_next(insert_rec) == last_insert) + && (page_header_get_field(page, PAGE_DIRECTION) + != PAGE_RIGHT)) { + + page_header_set_field(page, NULL, PAGE_DIRECTION, + PAGE_LEFT); + page_header_set_field(page, NULL, PAGE_N_DIRECTION, + page_header_get_field( + page, PAGE_N_DIRECTION) + 1); + } else { + page_header_set_field(page, NULL, PAGE_DIRECTION, + PAGE_NO_DIRECTION); + page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); + } + } + + page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, insert_rec); + + /* 7. It remains to update the owner record. */ + { + rec_t* owner_rec = page_rec_find_owner_rec(insert_rec); + ulint n_owned; + if (page_is_comp(page)) { + n_owned = rec_get_n_owned_new(owner_rec); + rec_set_n_owned_new(owner_rec, NULL, n_owned + 1); + } else { + n_owned = rec_get_n_owned_old(owner_rec); + rec_set_n_owned_old(owner_rec, n_owned + 1); + } + + /* 8. Now we have incremented the n_owned field of the owner + record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, + we have to split the corresponding directory slot in two. */ + + if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { + page_dir_split_slot( + page, NULL, + page_dir_find_owner_slot(owner_rec)); + } + } + + /* 9. Write log record of the insert */ + if (UNIV_LIKELY(mtr != NULL)) { + page_cur_insert_rec_write_log(insert_rec, rec_size, + current_rec, index, mtr); + } + + return(insert_rec); +} + +/** Inserts a record next to page cursor on an uncompressed page. +@param[in] current_rec pointer to current record after which + the new record is inserted. +@param[in] index record descriptor +@param[in] tuple pointer to a data tuple +@param[in] n_ext number of externally stored columns +@param[in] mtr mini-transaction handle, or NULL + +@return pointer to record if succeed, NULL otherwise */ +rec_t* +page_cur_direct_insert_rec_low( + rec_t* current_rec, + dict_index_t* index, + const dtuple_t* tuple, + ulint n_ext, + mtr_t* mtr) +{ + byte* insert_buf; + ulint rec_size; + page_t* page; /*!< the relevant page */ + rec_t* last_insert; /*!< cursor position at previous + insert */ + rec_t* free_rec; /*!< a free record that was reused, + or NULL */ + rec_t* insert_rec; /*!< inserted record */ + ulint heap_no; /*!< heap number of the inserted + record */ + + page = page_align(current_rec); + + ut_ad(dict_table_is_comp(index->table) + == (ibool) !!page_is_comp(page)); + + ut_ad(fil_page_index_page_check(page)); + + ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) + == index->id); + + ut_ad(!page_rec_is_supremum(current_rec)); + + /* 1. Get the size of the physical record in the page */ + rec_size = index->rec_cache.rec_size; + + /* 2. Try to find suitable space from page memory management */ + free_rec = page_header_get_ptr(page, PAGE_FREE); + if (free_rec) { + /* Try to allocate from the head of the free list. */ + ulint foffsets_[REC_OFFS_NORMAL_SIZE]; + ulint* foffsets = foffsets_; + mem_heap_t* heap = NULL; + + rec_offs_init(foffsets_); + + foffsets = rec_get_offsets( + free_rec, index, foffsets, ULINT_UNDEFINED, &heap); + if (rec_offs_size(foffsets) < rec_size) { + if (heap != NULL) { + mem_heap_free(heap); + heap = NULL; + } + + free_rec = NULL; + insert_buf = page_mem_alloc_heap( + page, NULL, rec_size, &heap_no); + + if (insert_buf == NULL) { + return(NULL); + } + } else { + insert_buf = free_rec - rec_offs_extra_size(foffsets); + + if (page_is_comp(page)) { + heap_no = rec_get_heap_no_new(free_rec); + page_mem_alloc_free( + page, NULL, + rec_get_next_ptr(free_rec, TRUE), + rec_size); + } else { + heap_no = rec_get_heap_no_old(free_rec); + page_mem_alloc_free( + page, NULL, + rec_get_next_ptr(free_rec, FALSE), + rec_size); + } + + if (heap != NULL) { + mem_heap_free(heap); + heap = NULL; + } + } + } else { + free_rec = NULL; + insert_buf = page_mem_alloc_heap(page, NULL, + rec_size, &heap_no); + + if (insert_buf == NULL) { + return(NULL); + } + } + + /* 3. Create the record */ + insert_rec = rec_convert_dtuple_to_rec(insert_buf, index, tuple, n_ext); + + /* 4. Insert the record in the linked list of records */ + ut_ad(current_rec != insert_rec); + + { + /* next record after current before the insertion */ + rec_t* next_rec = page_rec_get_next(current_rec); +#ifdef UNIV_DEBUG + if (page_is_comp(page)) { + ut_ad(rec_get_status(current_rec) + <= REC_STATUS_INFIMUM); + ut_ad(rec_get_status(insert_rec) < REC_STATUS_INFIMUM); + ut_ad(rec_get_status(next_rec) != REC_STATUS_INFIMUM); + } +#endif + page_rec_set_next(insert_rec, next_rec); + page_rec_set_next(current_rec, insert_rec); + } + + page_header_set_field(page, NULL, PAGE_N_RECS, + 1 + page_get_n_recs(page)); + + /* 5. Set the n_owned field in the inserted record to zero, + and set the heap_no field */ + if (page_is_comp(page)) { + rec_set_n_owned_new(insert_rec, NULL, 0); + rec_set_heap_no_new(insert_rec, heap_no); + } else { + rec_set_n_owned_old(insert_rec, 0); + rec_set_heap_no_old(insert_rec, heap_no); + } + + /* 6. Update the last insertion info in page header */ + + last_insert = page_header_get_ptr(page, PAGE_LAST_INSERT); + ut_ad(!last_insert || !page_is_comp(page) + || rec_get_node_ptr_flag(last_insert) + == rec_get_node_ptr_flag(insert_rec)); + + if (last_insert == NULL) { page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION); page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); @@ -1143,21 +1723,22 @@ use_heap: record. If the number exceeds PAGE_DIR_SLOT_MAX_N_OWNED, we have to split the corresponding directory slot in two. */ - if (UNIV_UNLIKELY(n_owned == PAGE_DIR_SLOT_MAX_N_OWNED)) { + if (n_owned == PAGE_DIR_SLOT_MAX_N_OWNED) { page_dir_split_slot( page, NULL, page_dir_find_owner_slot(owner_rec)); } } - /* 9. Write log record of the insert */ - if (UNIV_LIKELY(mtr != NULL)) { - page_cur_insert_rec_write_log(insert_rec, rec_size, - current_rec, index, mtr); + /* 8. Open the mtr for name sake to set the modification flag + to true failing which no flush would be done. */ + byte* log_ptr = mlog_open(mtr, 0); + ut_ad(log_ptr == NULL); + if (log_ptr != NULL) { + /* To keep complier happy. */ + mlog_close(mtr, log_ptr); } - btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert"); - return(insert_rec); } @@ -1172,8 +1753,7 @@ if this is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). -@return pointer to record if succeed, NULL otherwise */ -UNIV_INTERN +@return pointer to record if succeed, NULL otherwise */ rec_t* page_cur_insert_rec_zip( /*====================*/ @@ -1203,10 +1783,10 @@ page_cur_insert_rec_zip( page = page_cur_get_page(cursor); ut_ad(dict_table_is_comp(index->table)); ut_ad(page_is_comp(page)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); + ut_ad(fil_page_index_page_check(page)); + ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id + || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)) + || recv_recovery_is_on()); ut_ad(!page_cur_is_after_last(cursor)); #ifdef UNIV_ZIP_DEBUG @@ -1264,7 +1844,7 @@ page_cur_insert_rec_zip( get rid of the modification log. */ page_create_zip(page_cur_get_block(cursor), index, page_header_get_field(page, PAGE_LEVEL), - 0, mtr); + 0, NULL, mtr); ut_ad(!page_header_get_ptr(page, PAGE_FREE)); if (page_zip_available( @@ -1337,7 +1917,7 @@ page_cur_insert_rec_zip( if (!log_compressed) { if (page_zip_compress( page_zip, page, index, - level, NULL)) { + level, NULL, NULL)) { page_cur_insert_rec_write_log( insert_rec, rec_size, cursor->rec, index, mtr); @@ -1403,12 +1983,10 @@ page_cur_insert_rec_zip( } /* Out of space: restore the page */ - btr_blob_dbg_remove(page, index, "insert_zip_fail"); if (!page_zip_decompress(page_zip, page, FALSE)) { ut_error; /* Memory corrupted? */ } ut_ad(page_validate(page, index)); - btr_blob_dbg_add(page, index, "insert_zip_fail"); insert_rec = NULL; } @@ -1569,34 +2147,38 @@ use_heap: || rec_get_node_ptr_flag(last_insert) == rec_get_node_ptr_flag(insert_rec)); - if (UNIV_UNLIKELY(last_insert == NULL)) { - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); - - } else if ((last_insert == cursor->rec) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_LEFT)) { - - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_RIGHT); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - - } else if ((page_rec_get_next(insert_rec) == last_insert) - && (page_header_get_field(page, PAGE_DIRECTION) - != PAGE_RIGHT)) { - - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_LEFT); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, - page_header_get_field( - page, PAGE_N_DIRECTION) + 1); - } else { - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); + if (!dict_index_is_spatial(index)) { + if (UNIV_UNLIKELY(last_insert == NULL)) { + page_header_set_field(page, page_zip, PAGE_DIRECTION, + PAGE_NO_DIRECTION); + page_header_set_field(page, page_zip, + PAGE_N_DIRECTION, 0); + + } else if ((last_insert == cursor->rec) + && (page_header_get_field(page, PAGE_DIRECTION) + != PAGE_LEFT)) { + + page_header_set_field(page, page_zip, PAGE_DIRECTION, + PAGE_RIGHT); + page_header_set_field(page, page_zip, PAGE_N_DIRECTION, + page_header_get_field( + page, PAGE_N_DIRECTION) + 1); + + } else if ((page_rec_get_next(insert_rec) == last_insert) + && (page_header_get_field(page, PAGE_DIRECTION) + != PAGE_RIGHT)) { + + page_header_set_field(page, page_zip, PAGE_DIRECTION, + PAGE_LEFT); + page_header_set_field(page, page_zip, PAGE_N_DIRECTION, + page_header_get_field( + page, PAGE_N_DIRECTION) + 1); + } else { + page_header_set_field(page, page_zip, PAGE_DIRECTION, + PAGE_NO_DIRECTION); + page_header_set_field(page, page_zip, + PAGE_N_DIRECTION, 0); + } } page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, insert_rec); @@ -1622,8 +2204,6 @@ use_heap: page_zip_write_rec(page_zip, insert_rec, index, offsets, 1); - btr_blob_dbg_add_rec(insert_rec, index, offsets, "insert_zip_ok"); - /* 9. Write log record of the insert */ if (UNIV_LIKELY(mtr != NULL)) { page_cur_insert_rec_write_log(insert_rec, rec_size, @@ -1649,6 +2229,7 @@ page_copy_rec_list_to_created_page_write_log( byte* log_ptr; ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); + ut_ad(mtr->is_named_space(index->space)); log_ptr = mlog_open_and_write_index(mtr, page, index, page_is_comp(page) @@ -1664,8 +2245,7 @@ page_copy_rec_list_to_created_page_write_log( /**********************************************************//** Parses a log record of copying a record list end to a new created page. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_parse_copy_rec_list_to_created_page( /*=====================================*/ @@ -1711,9 +2291,12 @@ page_parse_copy_rec_list_to_created_page( page_zip = buf_block_get_page_zip(block); page_header_set_ptr(page, page_zip, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, page_zip, PAGE_DIRECTION, - PAGE_NO_DIRECTION); - page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); + + if (!dict_index_is_spatial(index)) { + page_header_set_field(page, page_zip, PAGE_DIRECTION, + PAGE_NO_DIRECTION); + page_header_set_field(page, page_zip, PAGE_N_DIRECTION, 0); + } return(rec_end); } @@ -1727,7 +2310,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if this is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN void page_copy_rec_list_end_to_created_page( /*===================================*/ @@ -1744,7 +2326,6 @@ page_copy_rec_list_end_to_created_page( ulint n_recs; ulint slot_index; ulint rec_size; - ulint log_mode; byte* log_ptr; ulint log_data_len; mem_heap_t* heap = NULL; @@ -1777,11 +2358,18 @@ page_copy_rec_list_end_to_created_page( log_ptr = page_copy_rec_list_to_created_page_write_log(new_page, index, mtr); - log_data_len = dyn_array_get_data_size(&(mtr->log)); + log_data_len = mtr->get_log()->size(); /* Individual inserts are logged in a shorter form */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); + mtr_log_t log_mode; + + if (dict_table_is_temporary(index->table) + || index->table->ibd_file_missing /* IMPORT TABLESPACE */) { + log_mode = mtr_get_log_mode(mtr); + } else { + log_mode = mtr_set_log_mode(mtr, MTR_LOG_SHORT_INSERTS); + } prev_rec = page_get_infimum_rec(new_page); if (page_is_comp(new_page)) { @@ -1837,8 +2425,6 @@ page_copy_rec_list_end_to_created_page( heap_top += rec_size; rec_offs_make_valid(insert_rec, index, offsets); - btr_blob_dbg_add_rec(insert_rec, index, offsets, "copy_end"); - page_cur_insert_rec_write_log(insert_rec, rec_size, prev_rec, index, mtr); prev_rec = insert_rec; @@ -1866,11 +2452,11 @@ page_copy_rec_list_end_to_created_page( mem_heap_free(heap); } - log_data_len = dyn_array_get_data_size(&(mtr->log)) - log_data_len; + log_data_len = mtr->get_log()->size() - log_data_len; ut_a(log_data_len < 100 * UNIV_PAGE_SIZE); - if (UNIV_LIKELY(log_ptr != NULL)) { + if (log_ptr != NULL) { mach_write_to_4(log_ptr, log_data_len); } @@ -1891,8 +2477,9 @@ page_copy_rec_list_end_to_created_page( page_header_set_field(new_page, NULL, PAGE_N_RECS, n_recs); page_header_set_ptr(new_page, NULL, PAGE_LAST_INSERT, NULL); + page_header_set_field(new_page, NULL, PAGE_DIRECTION, - PAGE_NO_DIRECTION); + PAGE_NO_DIRECTION); page_header_set_field(new_page, NULL, PAGE_N_DIRECTION, 0); /* Restore the log mode */ @@ -1913,6 +2500,7 @@ page_cur_delete_rec_write_log( byte* log_ptr; ut_ad(!!page_rec_is_comp(rec) == dict_table_is_comp(index->table)); + ut_ad(mtr->is_named_space(index->space)); log_ptr = mlog_open_and_write_index(mtr, rec, index, page_rec_is_comp(rec) @@ -1936,8 +2524,7 @@ page_cur_delete_rec_write_log( /***********************************************************//** Parses log record of a record delete on a page. -@return pointer to record end or NULL */ -UNIV_INTERN +@return pointer to record end or NULL */ byte* page_cur_parse_delete_rec( /*======================*/ @@ -1986,7 +2573,6 @@ page_cur_parse_delete_rec( /***********************************************************//** Deletes a record at the page cursor. The cursor is moved to the next record after the deleted one. */ -UNIV_INTERN void page_cur_delete_rec( /*================*/ @@ -2022,10 +2608,11 @@ page_cur_delete_rec( current_rec = cursor->rec; ut_ad(rec_offs_validate(current_rec, index, offsets)); ut_ad(!!page_is_comp(page) == dict_table_is_comp(index->table)); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); - ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) - == index->id || recv_recovery_is_on() - || (mtr ? mtr->inside_ibuf : dict_index_is_ibuf(index))); + ut_ad(fil_page_index_page_check(page)); + ut_ad(mach_read_from_8(page + PAGE_HEADER + PAGE_INDEX_ID) == index->id + || (mtr ? mtr->is_inside_ibuf() : dict_index_is_ibuf(index)) + || recv_recovery_is_on()); + ut_ad(mtr == NULL || mtr->is_named_space(index->space)); /* The record must not be the supremum or infimum record. */ ut_ad(page_rec_is_user_rec(current_rec)); @@ -2084,7 +2671,7 @@ page_cur_delete_rec( /* rec now points to the record of the previous directory slot. Look for the immediate predecessor of current_rec in a loop. */ - while(current_rec != rec) { + while (current_rec != rec) { prev_rec = rec; rec = page_rec_get_next(rec); } @@ -2115,8 +2702,6 @@ page_cur_delete_rec( page_dir_slot_set_n_owned(cur_dir_slot, page_zip, cur_n_owned - 1); /* 6. Free the memory occupied by the record */ - btr_blob_dbg_remove_rec(current_rec, const_cast<dict_index_t*>(index), - offsets, "delete"); page_mem_free(page, page_zip, current_rec, index, offsets); /* 7. Now we have decremented the number of owned records of the slot. diff --git a/storage/innobase/page/page0page.cc b/storage/innobase/page/page0page.cc index a09f270a54f..89669d09e89 100644 --- a/storage/innobase/page/page0page.cc +++ b/storage/innobase/page/page0page.cc @@ -24,17 +24,16 @@ Index page routines Created 2/2/1994 Heikki Tuuri *******************************************************/ -#define THIS_MODULE #include "page0page.h" #ifdef UNIV_NONINL #include "page0page.ic" #endif -#undef THIS_MODULE #include "page0cur.h" #include "page0zip.h" #include "buf0buf.h" #include "btr0btr.h" +#include "row0trunc.h" #ifndef UNIV_HOTBACKUP # include "srv0srv.h" # include "lock0lock.h" @@ -86,8 +85,7 @@ is 50 x 4 bytes = 200 bytes. */ /***************************************************************//** Looks for the directory slot which owns the given record. -@return the directory slot number */ -UNIV_INTERN +@return the directory slot number */ ulint page_dir_find_owner_slot( /*=====================*/ @@ -124,11 +122,9 @@ page_dir_find_owner_slot( while (UNIV_LIKELY(*(uint16*) slot != rec_offs_bytes)) { if (UNIV_UNLIKELY(slot == first_slot)) { - fprintf(stderr, - "InnoDB: Probable data corruption on" - " page %lu\n" - "InnoDB: Original record ", - (ulong) page_get_page_no(page)); + ib::error() << "Probable data corruption on page " + << page_get_page_no(page) + << ". Original record on that page;"; if (page_is_comp(page)) { fputs("(compact record)", stderr); @@ -136,20 +132,15 @@ page_dir_find_owner_slot( rec_print_old(stderr, rec); } - fputs("\n" - "InnoDB: on that page.\n" - "InnoDB: Cannot find the dir slot for record ", - stderr); + ib::error() << "Cannot find the dir slot for this" + " record on that page;"; + if (page_is_comp(page)) { fputs("(compact record)", stderr); } else { rec_print_old(stderr, page + mach_decode_2(rec_offs_bytes)); } - fputs("\n" - "InnoDB: on that page!\n", stderr); - - buf_page_print(page, 0, 0); ut_error; } @@ -162,7 +153,7 @@ page_dir_find_owner_slot( /**************************************************************//** Used to check the consistency of a directory slot. -@return TRUE if succeed */ +@return TRUE if succeed */ static ibool page_dir_slot_check( @@ -205,7 +196,6 @@ page_dir_slot_check( /*************************************************************//** Sets the max trx id field value. */ -UNIV_INTERN void page_set_max_trx_id( /*================*/ @@ -240,8 +230,7 @@ page_set_max_trx_id( /************************************************************//** Allocates a block of memory from the heap of an index page. -@return pointer to start of allocated buffer, or NULL if allocation fails */ -UNIV_INTERN +@return pointer to start of allocated buffer, or NULL if allocation fails */ byte* page_mem_alloc_heap( /*================*/ @@ -286,63 +275,70 @@ page_create_write_log( buf_frame_t* frame, /*!< in: a buffer frame where the page is created */ mtr_t* mtr, /*!< in: mini-transaction handle */ - ibool comp) /*!< in: TRUE=compact page format */ -{ - mlog_write_initial_log_record(frame, comp - ? MLOG_COMP_PAGE_CREATE - : MLOG_PAGE_CREATE, mtr); -} -#else /* !UNIV_HOTBACKUP */ -# define page_create_write_log(frame,mtr,comp) ((void) 0) -#endif /* !UNIV_HOTBACKUP */ - -/***********************************************************//** -Parses a redo log record of creating a page. -@return end of log record or NULL */ -UNIV_INTERN -byte* -page_parse_create( -/*==============*/ - byte* ptr, /*!< in: buffer */ - byte* end_ptr MY_ATTRIBUTE((unused)), /*!< in: buffer end */ - ulint comp, /*!< in: nonzero=compact page format */ - buf_block_t* block, /*!< in: block or NULL */ - mtr_t* mtr) /*!< in: mtr or NULL */ + ibool comp, /*!< in: TRUE=compact page format */ + bool is_rtree) /*!< in: whether it is R-tree */ { - ut_ad(ptr && end_ptr); - - /* The record is empty, except for the record initial part */ + mlog_id_t type; - if (block) { - page_create(block, mtr, comp); + if (is_rtree) { + type = comp ? MLOG_COMP_PAGE_CREATE_RTREE + : MLOG_PAGE_CREATE_RTREE; + } else { + type = comp ? MLOG_COMP_PAGE_CREATE : MLOG_PAGE_CREATE; } - return(ptr); + mlog_write_initial_log_record(frame, type, mtr); } +#else /* !UNIV_HOTBACKUP */ +# define page_create_write_log(frame,mtr,comp,is_rtree) ((void) 0) +#endif /* !UNIV_HOTBACKUP */ + +/** The page infimum and supremum of an empty page in ROW_FORMAT=REDUNDANT */ +static const byte infimum_supremum_redundant[] = { + /* the infimum record */ + 0x08/*end offset*/, + 0x01/*n_owned*/, + 0x00, 0x00/*heap_no=0*/, + 0x03/*n_fields=1, 1-byte offsets*/, + 0x00, 0x74/* pointer to supremum */, + 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0, + /* the supremum record */ + 0x09/*end offset*/, + 0x01/*n_owned*/, + 0x00, 0x08/*heap_no=1*/, + 0x03/*n_fields=1, 1-byte offsets*/, + 0x00, 0x00/* end of record list */, + 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm', 0 +}; + +/** The page infimum and supremum of an empty page in ROW_FORMAT=COMPACT */ +static const byte infimum_supremum_compact[] = { + /* the infimum record */ + 0x01/*n_owned=1*/, + 0x00, 0x02/* heap_no=0, REC_STATUS_INFIMUM */, + 0x00, 0x0d/* pointer to supremum */, + 'i', 'n', 'f', 'i', 'm', 'u', 'm', 0, + /* the supremum record */ + 0x01/*n_owned=1*/, + 0x00, 0x0b/* heap_no=1, REC_STATUS_SUPREMUM */, + 0x00, 0x00/* end of record list */, + 's', 'u', 'p', 'r', 'e', 'm', 'u', 'm' +}; /**********************************************************//** The index page creation function. -@return pointer to the page */ +@return pointer to the page */ static page_t* page_create_low( /*============*/ buf_block_t* block, /*!< in: a buffer block where the page is created */ - ulint comp) /*!< in: nonzero=compact page format */ + ulint comp, /*!< in: nonzero=compact page format */ + bool is_rtree) /*!< in: if it is an R-Tree page */ { - page_dir_slot_t* slot; - mem_heap_t* heap; - dtuple_t* tuple; - dfield_t* field; - byte* heap_top; - rec_t* infimum_rec; - rec_t* supremum_rec; page_t* page; - dict_index_t* index; - ulint* offsets; - ut_ad(block); #if PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA # error "PAGE_BTR_IBUF_FREE_LIST + FLST_BASE_NODE_SIZE > PAGE_DATA" #endif @@ -350,175 +346,132 @@ page_create_low( # error "PAGE_BTR_IBUF_FREE_LIST_NODE + FLST_NODE_SIZE > PAGE_DATA" #endif - /* The infimum and supremum records use a dummy index. */ - if (UNIV_LIKELY(comp)) { - index = dict_ind_compact; - } else { - index = dict_ind_redundant; - } - - /* 1. INCREMENT MODIFY CLOCK */ buf_block_modify_clock_inc(block); page = buf_block_get_frame(block); - fil_page_set_type(page, FIL_PAGE_INDEX); - - heap = mem_heap_create(200); - - /* 3. CREATE THE INFIMUM AND SUPREMUM RECORDS */ - - /* Create first a data tuple for infimum record */ - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_INFIMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "infimum", 8); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, 8); - /* Set the corresponding physical record to its place in the page - record heap */ - - heap_top = page + PAGE_DATA; - - infimum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0); - - if (UNIV_LIKELY(comp)) { - ut_a(infimum_rec == page + PAGE_NEW_INFIMUM); - - rec_set_n_owned_new(infimum_rec, NULL, 1); - rec_set_heap_no_new(infimum_rec, 0); + if (is_rtree) { + fil_page_set_type(page, FIL_PAGE_RTREE); } else { - ut_a(infimum_rec == page + PAGE_OLD_INFIMUM); - - rec_set_n_owned_old(infimum_rec, 1); - rec_set_heap_no_old(infimum_rec, 0); - } - - offsets = rec_get_offsets(infimum_rec, index, NULL, - ULINT_UNDEFINED, &heap); - - heap_top = rec_get_end(infimum_rec, offsets); - - /* Create then a tuple for supremum */ - - tuple = dtuple_create(heap, 1); - dtuple_set_info_bits(tuple, REC_STATUS_SUPREMUM); - field = dtuple_get_nth_field(tuple, 0); - - dfield_set_data(field, "supremum", comp ? 8 : 9); - dtype_set(dfield_get_type(field), - DATA_VARCHAR, DATA_ENGLISH | DATA_NOT_NULL, comp ? 8 : 9); - - supremum_rec = rec_convert_dtuple_to_rec(heap_top, index, tuple, 0); - - if (UNIV_LIKELY(comp)) { - ut_a(supremum_rec == page + PAGE_NEW_SUPREMUM); - - rec_set_n_owned_new(supremum_rec, NULL, 1); - rec_set_heap_no_new(supremum_rec, 1); + fil_page_set_type(page, FIL_PAGE_INDEX); + } + + memset(page + PAGE_HEADER, 0, PAGE_HEADER_PRIV_END); + page[PAGE_HEADER + PAGE_N_DIR_SLOTS + 1] = 2; + page[PAGE_HEADER + PAGE_DIRECTION + 1] = PAGE_NO_DIRECTION; + + if (comp) { + page[PAGE_HEADER + PAGE_N_HEAP] = 0x80;/*page_is_comp()*/ + page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW; + page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_NEW_SUPREMUM_END; + memcpy(page + PAGE_DATA, infimum_supremum_compact, + sizeof infimum_supremum_compact); + memset(page + + PAGE_NEW_SUPREMUM_END, 0, + UNIV_PAGE_SIZE - PAGE_DIR - PAGE_NEW_SUPREMUM_END); + page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1] + = PAGE_NEW_SUPREMUM; + page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1] + = PAGE_NEW_INFIMUM; } else { - ut_a(supremum_rec == page + PAGE_OLD_SUPREMUM); - - rec_set_n_owned_old(supremum_rec, 1); - rec_set_heap_no_old(supremum_rec, 1); + page[PAGE_HEADER + PAGE_N_HEAP + 1] = PAGE_HEAP_NO_USER_LOW; + page[PAGE_HEADER + PAGE_HEAP_TOP + 1] = PAGE_OLD_SUPREMUM_END; + memcpy(page + PAGE_DATA, infimum_supremum_redundant, + sizeof infimum_supremum_redundant); + memset(page + + PAGE_OLD_SUPREMUM_END, 0, + UNIV_PAGE_SIZE - PAGE_DIR - PAGE_OLD_SUPREMUM_END); + page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE * 2 + 1] + = PAGE_OLD_SUPREMUM; + page[UNIV_PAGE_SIZE - PAGE_DIR - PAGE_DIR_SLOT_SIZE + 1] + = PAGE_OLD_INFIMUM; } - offsets = rec_get_offsets(supremum_rec, index, offsets, - ULINT_UNDEFINED, &heap); - heap_top = rec_get_end(supremum_rec, offsets); - - ut_ad(heap_top == page - + (comp ? PAGE_NEW_SUPREMUM_END : PAGE_OLD_SUPREMUM_END)); - - mem_heap_free(heap); - - /* 4. INITIALIZE THE PAGE */ - - page_header_set_field(page, NULL, PAGE_N_DIR_SLOTS, 2); - page_header_set_ptr(page, NULL, PAGE_HEAP_TOP, heap_top); - page_header_set_field(page, NULL, PAGE_N_HEAP, comp - ? 0x8000 | PAGE_HEAP_NO_USER_LOW - : PAGE_HEAP_NO_USER_LOW); - page_header_set_ptr(page, NULL, PAGE_FREE, NULL); - page_header_set_field(page, NULL, PAGE_GARBAGE, 0); - page_header_set_ptr(page, NULL, PAGE_LAST_INSERT, NULL); - page_header_set_field(page, NULL, PAGE_DIRECTION, PAGE_NO_DIRECTION); - page_header_set_field(page, NULL, PAGE_N_DIRECTION, 0); - page_header_set_field(page, NULL, PAGE_N_RECS, 0); - page_set_max_trx_id(block, NULL, 0, NULL); - memset(heap_top, 0, UNIV_PAGE_SIZE - PAGE_EMPTY_DIR_START - - page_offset(heap_top)); - - /* 5. SET POINTERS IN RECORDS AND DIR SLOTS */ - - /* Set the slots to point to infimum and supremum. */ - - slot = page_dir_get_nth_slot(page, 0); - page_dir_slot_set_rec(slot, infimum_rec); - - slot = page_dir_get_nth_slot(page, 1); - page_dir_slot_set_rec(slot, supremum_rec); - - /* Set the next pointers in infimum and supremum */ + return(page); +} - if (UNIV_LIKELY(comp)) { - rec_set_next_offs_new(infimum_rec, PAGE_NEW_SUPREMUM); - rec_set_next_offs_new(supremum_rec, 0); - } else { - rec_set_next_offs_old(infimum_rec, PAGE_OLD_SUPREMUM); - rec_set_next_offs_old(supremum_rec, 0); +/** Parses a redo log record of creating a page. +@param[in,out] block buffer block, or NULL +@param[in] comp nonzero=compact page format +@param[in] is_rtree whether it is rtree page */ +void +page_parse_create( + buf_block_t* block, + ulint comp, + bool is_rtree) +{ + if (block != NULL) { + page_create_low(block, comp, is_rtree); } - - return(page); } /**********************************************************//** -Create an uncompressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN +Create an uncompressed B-tree or R-tree index page. +@return pointer to the page */ page_t* page_create( /*========*/ buf_block_t* block, /*!< in: a buffer block where the page is created */ mtr_t* mtr, /*!< in: mini-transaction handle */ - ulint comp) /*!< in: nonzero=compact page format */ + ulint comp, /*!< in: nonzero=compact page format */ + bool is_rtree) /*!< in: whether it is a R-Tree page */ { - page_create_write_log(buf_block_get_frame(block), mtr, comp); - return(page_create_low(block, comp)); + ut_ad(mtr->is_named_space(block->page.id.space())); + page_create_write_log(buf_block_get_frame(block), mtr, comp, is_rtree); + return(page_create_low(block, comp, is_rtree)); } /**********************************************************//** Create a compressed B-tree index page. -@return pointer to the page */ -UNIV_INTERN +@return pointer to the page */ page_t* page_create_zip( /*============*/ - buf_block_t* block, /*!< in/out: a buffer frame where the - page is created */ - dict_index_t* index, /*!< in: the index of the page */ - ulint level, /*!< in: the B-tree level of the page */ - trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + buf_block_t* block, /*!< in/out: a buffer frame + where the page is created */ + dict_index_t* index, /*!< in: the index of the + page, or NULL when applying + TRUNCATE log + record during recovery */ + ulint level, /*!< in: the B-tree level + of the page */ + trx_id_t max_trx_id, /*!< in: PAGE_MAX_TRX_ID */ + const redo_page_compress_t* page_comp_info, + /*!< in: used for applying + TRUNCATE log + record during recovery */ + mtr_t* mtr) /*!< in/out: mini-transaction + handle */ { - page_t* page; - page_zip_des_t* page_zip = buf_block_get_page_zip(block); + page_t* page; + page_zip_des_t* page_zip = buf_block_get_page_zip(block); + bool is_spatial; ut_ad(block); ut_ad(page_zip); - ut_ad(index); - ut_ad(dict_table_is_comp(index->table)); + ut_ad(index == NULL || dict_table_is_comp(index->table)); + is_spatial = index ? dict_index_is_spatial(index) + : page_comp_info->type & DICT_SPATIAL; - page = page_create_low(block, TRUE); + page = page_create_low(block, TRUE, is_spatial); mach_write_to_2(PAGE_HEADER + PAGE_LEVEL + page, level); mach_write_to_8(PAGE_HEADER + PAGE_MAX_TRX_ID + page, max_trx_id); - if (!page_zip_compress(page_zip, page, index, - page_zip_level, mtr)) { - /* The compression of a newly created page - should always succeed. */ + if (truncate_t::s_fix_up_active) { + /* Compress the index page created when applying + TRUNCATE log during recovery */ + if (!page_zip_compress(page_zip, page, index, page_zip_level, + page_comp_info, NULL)) { + /* The compression of a newly created + page should always succeed. */ + ut_error; + } + + } else if (!page_zip_compress(page_zip, page, index, + page_zip_level, NULL, mtr)) { + /* The compression of a newly created + page should always succeed. */ ut_error; } @@ -527,7 +480,6 @@ page_create_zip( /**********************************************************//** Empty a previously created B-tree index page. */ -UNIV_INTERN void page_create_empty( /*==============*/ @@ -539,9 +491,15 @@ page_create_empty( const page_t* page = buf_block_get_frame(block); page_zip_des_t* page_zip= buf_block_get_page_zip(block); - ut_ad(fil_page_get_type(page) == FIL_PAGE_INDEX); + ut_ad(fil_page_index_page_check(page)); - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { + /* Multiple transactions cannot simultaneously operate on the + same temp-table in parallel. + max_trx_id is ignored for temp tables because it not required + for MVCC. */ + if (dict_index_is_sec_or_ibuf(index) + && !dict_table_is_temporary(index->table) + && page_is_leaf(page)) { max_trx_id = page_get_max_trx_id(page); ut_ad(max_trx_id); } @@ -549,9 +507,10 @@ page_create_empty( if (page_zip) { page_create_zip(block, index, page_header_get_field(page, PAGE_LEVEL), - max_trx_id, mtr); + max_trx_id, NULL, mtr); } else { - page_create(block, mtr, page_is_comp(page)); + page_create(block, mtr, page_is_comp(page), + dict_index_is_spatial(index)); if (max_trx_id) { page_update_max_trx_id( @@ -568,7 +527,6 @@ IMPORTANT: The caller will have to update IBUF_BITMAP_FREE if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). */ -UNIV_INTERN void page_copy_rec_list_end_no_locks( /*============================*/ @@ -610,22 +568,10 @@ page_copy_rec_list_end_no_locks( ins_rec = page_cur_insert_rec_low(cur2, index, cur1_rec, offsets, mtr); if (UNIV_UNLIKELY(!ins_rec)) { - /* Track an assertion failure reported on the mailing - list on June 18th, 2003 */ - - buf_page_print(new_page, 0, - BUF_PAGE_PRINT_NO_CRASH); - buf_page_print(page_align(rec), 0, - BUF_PAGE_PRINT_NO_CRASH); - ut_print_timestamp(stderr); - - fprintf(stderr, - "InnoDB: rec offset %lu, cur1 offset %lu," - " cur2 offset %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(page_cur_get_rec(&cur1)), - (ulong) page_offset(cur2)); - ut_error; + ib::fatal() << "Rec offset " << page_offset(rec) + << ", cur1 offset " + << page_offset(page_cur_get_rec(&cur1)) + << ", cur2 offset " << page_offset(cur2); } page_cur_move_to_next(&cur1); @@ -650,7 +596,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit(). @return pointer to the original successor of the infimum record on new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN rec_t* page_copy_rec_list_end( /*===================*/ @@ -665,7 +610,9 @@ page_copy_rec_list_end( page_t* page = page_align(rec); rec_t* ret = page_rec_get_next( page_get_infimum_rec(new_page)); - ulint log_mode = 0; /* remove warning */ + ulint num_moved = 0; + rtr_rec_move_t* rec_move = NULL; + mem_heap_t* heap = NULL; #ifdef UNIV_ZIP_DEBUG if (new_page_zip) { @@ -685,6 +632,8 @@ page_copy_rec_list_end( /* Here, "ret" may be pointing to a user record or the predefined supremum record. */ + mtr_log_t log_mode = MTR_LOG_NONE; + if (new_page_zip) { log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); } @@ -693,14 +642,39 @@ page_copy_rec_list_end( page_copy_rec_list_end_to_created_page(new_page, rec, index, mtr); } else { - page_copy_rec_list_end_no_locks(new_block, block, rec, - index, mtr); + if (dict_index_is_spatial(index)) { + ulint max_to_move = page_get_n_recs( + buf_block_get_frame(block)); + heap = mem_heap_create(256); + + rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc( + heap, + sizeof (*rec_move) * max_to_move)); + + /* For spatial index, we need to insert recs one by one + to keep recs ordered. */ + rtr_page_copy_rec_list_end_no_locks(new_block, + block, rec, index, + heap, rec_move, + max_to_move, + &num_moved, + mtr); + } else { + page_copy_rec_list_end_no_locks(new_block, block, rec, + index, mtr); + } } /* Update PAGE_MAX_TRX_ID on the uncompressed page. Modifications will be redo logged and copied to the compressed - page in page_zip_compress() or page_zip_reorganize() below. */ - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) { + page in page_zip_compress() or page_zip_reorganize() below. + Multiple transactions cannot simultaneously operate on the + same temp-table in parallel. + max_trx_id is ignored for temp tables because it not required + for MVCC. */ + if (dict_index_is_sec_or_ibuf(index) + && page_is_leaf(page) + && !dict_table_is_temporary(index->table)) { page_update_max_trx_id(new_block, NULL, page_get_max_trx_id(page), mtr); } @@ -708,8 +682,11 @@ page_copy_rec_list_end( if (new_page_zip) { mtr_set_log_mode(mtr, log_mode); - if (!page_zip_compress(new_page_zip, new_page, - index, page_zip_level, mtr)) { + if (!page_zip_compress(new_page_zip, + new_page, + index, + page_zip_level, + NULL, mtr)) { /* Before trying to reorganize the page, store the number of preceding records on the page. */ ulint ret_pos @@ -723,15 +700,16 @@ page_copy_rec_list_end( if (!page_zip_reorganize(new_block, index, mtr)) { - btr_blob_dbg_remove(new_page, index, - "copy_end_reorg_fail"); if (!page_zip_decompress(new_page_zip, new_page, FALSE)) { ut_error; } ut_ad(page_validate(new_page, index)); - btr_blob_dbg_add(new_page, index, - "copy_end_reorg_fail"); + + if (heap) { + mem_heap_free(heap); + } + return(NULL); } else { /* The page was reorganized: @@ -747,7 +725,15 @@ page_copy_rec_list_end( /* Update the lock table and possible hash index */ - lock_move_rec_list_end(new_block, block, rec); + if (dict_index_is_spatial(index) && rec_move) { + lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); + } else if (!dict_table_is_locking_disabled(index->table)) { + lock_move_rec_list_end(new_block, block, rec); + } + + if (heap) { + mem_heap_free(heap); + } btr_search_move_or_delete_hash_entries(new_block, block, index); @@ -766,7 +752,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit(). @return pointer to the original predecessor of the supremum record on new_page, or NULL on zip overflow (new_block will be decompressed) */ -UNIV_INTERN rec_t* page_copy_rec_list_start( /*=====================*/ @@ -780,8 +765,9 @@ page_copy_rec_list_start( page_zip_des_t* new_page_zip = buf_block_get_page_zip(new_block); page_cur_t cur1; rec_t* cur2; - ulint log_mode = 0 /* remove warning */; mem_heap_t* heap = NULL; + ulint num_moved = 0; + rtr_rec_move_t* rec_move = NULL; rec_t* ret = page_rec_get_prev(page_get_supremum_rec(new_page)); ulint offsets_[REC_OFFS_NORMAL_SIZE]; @@ -796,6 +782,8 @@ page_copy_rec_list_start( return(ret); } + mtr_log_t log_mode = MTR_LOG_NONE; + if (new_page_zip) { log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); } @@ -806,27 +794,45 @@ page_copy_rec_list_start( cur2 = ret; /* Copy records from the original page to the new page */ + if (dict_index_is_spatial(index)) { + ulint max_to_move = page_get_n_recs( + buf_block_get_frame(block)); + heap = mem_heap_create(256); + + rec_move = static_cast<rtr_rec_move_t*>(mem_heap_alloc( + heap, + sizeof (*rec_move) * max_to_move)); + + /* For spatial index, we need to insert recs one by one + to keep recs ordered. */ + rtr_page_copy_rec_list_start_no_locks(new_block, + block, rec, index, heap, + rec_move, max_to_move, + &num_moved, mtr); + } else { - while (page_cur_get_rec(&cur1) != rec) { - rec_t* cur1_rec = page_cur_get_rec(&cur1); - offsets = rec_get_offsets(cur1_rec, index, offsets, - ULINT_UNDEFINED, &heap); - cur2 = page_cur_insert_rec_low(cur2, index, - cur1_rec, offsets, mtr); - ut_a(cur2); - - page_cur_move_to_next(&cur1); - } + while (page_cur_get_rec(&cur1) != rec) { + rec_t* cur1_rec = page_cur_get_rec(&cur1); + offsets = rec_get_offsets(cur1_rec, index, offsets, + ULINT_UNDEFINED, &heap); + cur2 = page_cur_insert_rec_low(cur2, index, + cur1_rec, offsets, mtr); + ut_a(cur2); - if (UNIV_LIKELY_NULL(heap)) { - mem_heap_free(heap); + page_cur_move_to_next(&cur1); + } } /* Update PAGE_MAX_TRX_ID on the uncompressed page. Modifications will be redo logged and copied to the compressed - page in page_zip_compress() or page_zip_reorganize() below. */ + page in page_zip_compress() or page_zip_reorganize() below. + Multiple transactions cannot simultaneously operate on the + same temp-table in parallel. + max_trx_id is ignored for temp tables because it not required + for MVCC. */ if (dict_index_is_sec_or_ibuf(index) - && page_is_leaf(page_align(rec))) { + && page_is_leaf(page_align(rec)) + && !dict_table_is_temporary(index->table)) { page_update_max_trx_id(new_block, NULL, page_get_max_trx_id(page_align(rec)), mtr); @@ -839,8 +845,7 @@ page_copy_rec_list_start( goto zip_reorganize;); if (!page_zip_compress(new_page_zip, new_page, index, - page_zip_level, mtr)) { - + page_zip_level, NULL, mtr)) { ulint ret_pos; #ifndef DBUG_OFF zip_reorganize: @@ -857,16 +862,17 @@ zip_reorganize: if (UNIV_UNLIKELY (!page_zip_reorganize(new_block, index, mtr))) { - btr_blob_dbg_remove(new_page, index, - "copy_start_reorg_fail"); if (UNIV_UNLIKELY (!page_zip_decompress(new_page_zip, new_page, FALSE))) { ut_error; } ut_ad(page_validate(new_page, index)); - btr_blob_dbg_add(new_page, index, - "copy_start_reorg_fail"); + + if (UNIV_LIKELY_NULL(heap)) { + mem_heap_free(heap); + } + return(NULL); } @@ -877,7 +883,15 @@ zip_reorganize: /* Update the lock table and possible hash index */ - lock_move_rec_list_start(new_block, block, rec, ret); + if (dict_index_is_spatial(index)) { + lock_rtr_move_rec_list(new_block, block, rec_move, num_moved); + } else if (!dict_table_is_locking_disabled(index->table)) { + lock_move_rec_list_start(new_block, block, rec, ret); + } + + if (heap) { + mem_heap_free(heap); + } btr_search_move_or_delete_hash_entries(new_block, block, index); @@ -892,7 +906,7 @@ page_delete_rec_list_write_log( /*===========================*/ rec_t* rec, /*!< in: record on page */ dict_index_t* index, /*!< in: record descriptor */ - byte type, /*!< in: operation type: + mlog_id_t type, /*!< in: operation type: MLOG_LIST_END_DELETE, ... */ mtr_t* mtr) /*!< in: mtr */ { @@ -915,12 +929,11 @@ page_delete_rec_list_write_log( /**********************************************************//** Parses a log record of a record list end or start deletion. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_parse_delete_rec_list( /*=======================*/ - byte type, /*!< in: MLOG_LIST_END_DELETE, + mlog_id_t type, /*!< in: MLOG_LIST_END_DELETE, MLOG_LIST_START_DELETE, MLOG_COMP_LIST_END_DELETE or MLOG_COMP_LIST_START_DELETE */ @@ -972,7 +985,6 @@ page_parse_delete_rec_list( /*************************************************************//** Deletes records from a page from a given record onward, including that record. The infimum and supremum records are not deleted. */ -UNIV_INTERN void page_delete_rec_list_end( /*=====================*/ @@ -1053,7 +1065,7 @@ delete_all: : MLOG_LIST_END_DELETE, mtr); if (page_zip) { - ulint log_mode; + mtr_log_t log_mode; ut_a(page_is_comp(page)); /* Individual deletes are not logged */ @@ -1168,9 +1180,6 @@ delete_all: /* Remove the record chain segment from the record chain */ page_rec_set_next(prev_rec, page_get_supremum_rec(page)); - btr_blob_dbg_op(page, rec, index, "delete_end", - btr_blob_dbg_remove_rec); - /* Catenate the deleted chain segment to the page free list */ page_rec_set_next(last_rec, page_header_get_ptr(page, PAGE_FREE)); @@ -1186,7 +1195,6 @@ delete_all: /*************************************************************//** Deletes records from page, up to the given record, NOT including that record. Infimum and supremum records are not deleted. */ -UNIV_INTERN void page_delete_rec_list_start( /*=======================*/ @@ -1196,11 +1204,9 @@ page_delete_rec_list_start( mtr_t* mtr) /*!< in: mtr */ { page_cur_t cur1; - ulint log_mode; ulint offsets_[REC_OFFS_NORMAL_SIZE]; ulint* offsets = offsets_; mem_heap_t* heap = NULL; - byte type; rec_offs_init(offsets_); @@ -1231,6 +1237,8 @@ page_delete_rec_list_start( return; } + mlog_id_t type; + if (page_rec_is_comp(rec)) { type = MLOG_COMP_LIST_START_DELETE; } else { @@ -1244,7 +1252,7 @@ page_delete_rec_list_start( /* Individual deletes are not logged */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); + mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); while (page_cur_get_rec(&cur1) != rec) { offsets = rec_get_offsets(page_cur_get_rec(&cur1), index, @@ -1273,7 +1281,6 @@ or by invoking ibuf_reset_free_bits() before mtr_commit(). @return TRUE on success; FALSE on compression failure (new_block will be decompressed) */ -UNIV_INTERN ibool page_move_rec_list_end( /*===================*/ @@ -1289,6 +1296,8 @@ page_move_rec_list_end( ulint old_n_recs; ulint new_n_recs; + ut_ad(!dict_index_is_spatial(index)); + old_data_size = page_get_data_size(new_page); old_n_recs = page_get_n_recs(new_page); #ifdef UNIV_ZIP_DEBUG @@ -1332,8 +1341,7 @@ if new_block is a compressed leaf page in a secondary index. This has to be done either within the same mini-transaction, or by invoking ibuf_reset_free_bits() before mtr_commit(). -@return TRUE on success; FALSE on compression failure */ -UNIV_INTERN +@return TRUE on success; FALSE on compression failure */ ibool page_move_rec_list_start( /*=====================*/ @@ -1434,7 +1442,6 @@ page_dir_add_slot( /****************************************************************//** Splits a directory slot which owns too many records. */ -UNIV_INTERN void page_dir_split_slot( /*================*/ @@ -1497,7 +1504,6 @@ page_dir_split_slot( Tries to balance the given directory slot with too few records with the upper neighbor, so that there are at least the minimum number of records owned by the slot; this may result in the merging of two slots. */ -UNIV_INTERN void page_dir_balance_slot( /*==================*/ @@ -1567,8 +1573,7 @@ page_dir_balance_slot( /************************************************************//** Returns the nth record of the record list. This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INTERN +@return nth record */ const rec_t* page_rec_get_nth_const( /*===================*/ @@ -1620,8 +1625,7 @@ page_rec_get_nth_const( /***************************************************************//** Returns the number of records before the given record in chain. The number includes infimum and supremum records. -@return number of records */ -UNIV_INTERN +@return number of records */ ulint page_rec_get_n_recs_before( /*=======================*/ @@ -1686,7 +1690,6 @@ page_rec_get_n_recs_before( /************************************************************//** Prints record contents including the data relevant only in the index page context. */ -UNIV_INTERN void page_rec_print( /*===========*/ @@ -1696,17 +1699,13 @@ page_rec_print( ut_a(!page_rec_is_comp(rec) == !rec_offs_comp(offsets)); rec_print_new(stderr, rec, offsets); if (page_rec_is_comp(rec)) { - fprintf(stderr, - " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned_new(rec), - (ulong) rec_get_heap_no_new(rec), - (ulong) rec_get_next_offs(rec, TRUE)); + ib::info() << "n_owned: " << rec_get_n_owned_new(rec) + << "; heap_no: " << rec_get_heap_no_new(rec) + << "; next rec: " << rec_get_next_offs(rec, TRUE); } else { - fprintf(stderr, - " n_owned: %lu; heap_no: %lu; next rec: %lu\n", - (ulong) rec_get_n_owned_old(rec), - (ulong) rec_get_heap_no_old(rec), - (ulong) rec_get_next_offs(rec, FALSE)); + ib::info() << "n_owned: " << rec_get_n_owned_old(rec) + << "; heap_no: " << rec_get_heap_no_old(rec) + << "; next rec: " << rec_get_next_offs(rec, FALSE); } page_rec_check(rec); @@ -1717,7 +1716,6 @@ page_rec_print( /***************************************************************//** This is used to print the contents of the directory for debugging purposes. */ -UNIV_INTERN void page_dir_print( /*===========*/ @@ -1759,7 +1757,6 @@ page_dir_print( /***************************************************************//** This is used to print the contents of the page record list for debugging purposes. */ -UNIV_INTERN void page_print_list( /*============*/ @@ -1778,7 +1775,7 @@ page_print_list( ut_a((ibool)!!page_is_comp(page) == dict_table_is_comp(index->table)); - fprintf(stderr, + fprint(stderr, "--------------------------------\n" "PAGE RECORD LIST\n" "Page address %p\n", page); @@ -1829,7 +1826,6 @@ page_print_list( /***************************************************************//** Prints the info in a page header. */ -UNIV_INTERN void page_header_print( /*==============*/ @@ -1857,7 +1853,6 @@ page_header_print( /***************************************************************//** This is used to print the contents of the page for debugging purposes. */ -UNIV_INTERN void page_print( /*=======*/ @@ -1881,8 +1876,7 @@ page_print( The following is used to validate a record on a page. This function differs from rec_validate as it can also check the n_owned field and the heap_no field. -@return TRUE if ok */ -UNIV_INTERN +@return TRUE if ok */ ibool page_rec_validate( /*==============*/ @@ -1908,17 +1902,15 @@ page_rec_validate( } if (UNIV_UNLIKELY(!(n_owned <= PAGE_DIR_SLOT_MAX_N_OWNED))) { - fprintf(stderr, - "InnoDB: Dir slot of rec %lu, n owned too big %lu\n", - (ulong) page_offset(rec), (ulong) n_owned); + ib::warn() << "Dir slot of rec " << page_offset(rec) + << ", n owned too big " << n_owned; return(FALSE); } if (UNIV_UNLIKELY(!(heap_no < page_dir_get_n_heap(page)))) { - fprintf(stderr, - "InnoDB: Heap no of rec %lu too big %lu %lu\n", - (ulong) page_offset(rec), (ulong) heap_no, - (ulong) page_dir_get_n_heap(page)); + ib::warn() << "Heap no of rec " << page_offset(rec) + << " too big " << heap_no << " " + << page_dir_get_n_heap(page); return(FALSE); } @@ -1926,11 +1918,11 @@ page_rec_validate( } #ifndef UNIV_HOTBACKUP +#ifdef UNIV_DEBUG /***************************************************************//** Checks that the first directory slot points to the infimum record and the last to the supremum. This function is intended to track if the bug fixed in 4.0.14 has caused corruption to users' databases. */ -UNIV_INTERN void page_check_dir( /*===========*/ @@ -1947,28 +1939,24 @@ page_check_dir( if (UNIV_UNLIKELY(!page_rec_is_infimum_low(infimum_offs))) { - fprintf(stderr, - "InnoDB: Page directory corruption:" - " infimum not pointed to\n"); - buf_page_print(page, 0, 0); + ib::fatal() << "Page directory corruption: infimum not" + " pointed to"; } if (UNIV_UNLIKELY(!page_rec_is_supremum_low(supremum_offs))) { - fprintf(stderr, - "InnoDB: Page directory corruption:" - " supremum not pointed to\n"); - buf_page_print(page, 0, 0); + ib::fatal() << "Page directory corruption: supremum not" + " pointed to"; } } +#endif /* UNIV_DEBUG */ #endif /* !UNIV_HOTBACKUP */ /***************************************************************//** This function checks the consistency of an index page when we do not know the index. This is also resilient so that this should never crash even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN +@return TRUE if ok */ ibool page_simple_validate_old( /*=====================*/ @@ -1991,9 +1979,8 @@ page_simple_validate_old( n_slots = page_dir_get_n_slots(page); if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) { - fprintf(stderr, - "InnoDB: Nonsensical number %lu of page dir slots\n", - (ulong) n_slots); + ib::error() << "Nonsensical number " << n_slots + << " of page dir slots"; goto func_exit; } @@ -2002,13 +1989,12 @@ page_simple_validate_old( if (UNIV_UNLIKELY(rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1))) { - - fprintf(stderr, - "InnoDB: Record heap and dir overlap on a page," - " heap top %lu, dir %lu\n", - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) - page_offset(page_dir_get_nth_slot(page, n_slots - 1))); + ib::error() + << "Record heap and dir overlap on a page, heap top " + << page_header_get_field(page, PAGE_HEAP_TOP) + << ", dir " + << page_offset(page_dir_get_nth_slot(page, + n_slots - 1)); goto func_exit; } @@ -2025,11 +2011,9 @@ page_simple_validate_old( for (;;) { if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Record %lu is above" - " rec heap top %lu\n", - (ulong)(rec - page), - (ulong)(rec_heap_top - page)); + ib::error() << "Record " << (rec - page) + << " is above rec heap top " + << (rec_heap_top - page); goto func_exit; } @@ -2039,22 +2023,18 @@ page_simple_validate_old( if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) != own_count)) { - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu," - " rec %lu\n", - (ulong) rec_get_n_owned_old(rec), - (ulong) own_count, - (ulong)(rec - page)); + ib::error() << "Wrong owned count " + << rec_get_n_owned_old(rec) + << ", " << own_count << ", rec " + << (rec - page); goto func_exit; } if (UNIV_UNLIKELY (page_dir_slot_get_rec(slot) != rec)) { - fprintf(stderr, - "InnoDB: Dir slot does not point" - " to right rec %lu\n", - (ulong)(rec - page)); + ib::error() << "Dir slot does not point" + " to right rec " << (rec - page); goto func_exit; } @@ -2075,11 +2055,10 @@ page_simple_validate_old( if (UNIV_UNLIKELY (rec_get_next_offs(rec, FALSE) < FIL_PAGE_DATA || rec_get_next_offs(rec, FALSE) >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Next record offset" - " nonsensical %lu for rec %lu\n", - (ulong) rec_get_next_offs(rec, FALSE), - (ulong) (rec - page)); + + ib::error() << "Next record offset nonsensical " + << rec_get_next_offs(rec, FALSE) << " for rec " + << (rec - page); goto func_exit; } @@ -2087,10 +2066,8 @@ page_simple_validate_old( count++; if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page record list appears" - " to be circular %lu\n", - (ulong) count); + ib::error() << "Page record list appears" + " to be circular " << count; goto func_exit; } @@ -2099,24 +2076,23 @@ page_simple_validate_old( } if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { - fprintf(stderr, "InnoDB: n owned is zero in a supremum rec\n"); + ib::error() << "n owned is zero in a supremum rec"; goto func_exit; } if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); + ib::error() << "n slots wrong " + << slot_no << ", " << (n_slots - 1); goto func_exit; } if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) + PAGE_HEAP_NO_USER_LOW != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); + ib::error() << "n recs wrong " + << page_header_get_field(page, PAGE_N_RECS) + + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); goto func_exit; } @@ -2127,20 +2103,16 @@ page_simple_validate_old( while (rec != NULL) { if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA || rec >= page + UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Free list record has" - " a nonsensical offset %lu\n", - (ulong) (rec - page)); + ib::error() << "Free list record has" + " a nonsensical offset " << (rec - page); goto func_exit; } if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Free list record %lu" - " is above rec heap top %lu\n", - (ulong) (rec - page), - (ulong) (rec_heap_top - page)); + ib::error() << "Free list record " << (rec - page) + << " is above rec heap top " + << (rec_heap_top - page); goto func_exit; } @@ -2148,10 +2120,8 @@ page_simple_validate_old( count++; if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page free list appears" - " to be circular %lu\n", - (ulong) count); + ib::error() << "Page free list appears" + " to be circular " << count; goto func_exit; } @@ -2160,9 +2130,8 @@ page_simple_validate_old( if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) (count + 1)); + ib::error() << "N heap is wrong " + << page_dir_get_n_heap(page) << ", " << (count + 1); goto func_exit; } @@ -2177,8 +2146,7 @@ func_exit: This function checks the consistency of an index page when we do not know the index. This is also resilient so that this should never crash even if the page is total garbage. -@return TRUE if ok */ -UNIV_INTERN +@return TRUE if ok */ ibool page_simple_validate_new( /*=====================*/ @@ -2201,9 +2169,8 @@ page_simple_validate_new( n_slots = page_dir_get_n_slots(page); if (UNIV_UNLIKELY(n_slots > UNIV_PAGE_SIZE / 4)) { - fprintf(stderr, - "InnoDB: Nonsensical number %lu" - " of page dir slots\n", (ulong) n_slots); + ib::error() << "Nonsensical number " << n_slots + << " of page dir slots"; goto func_exit; } @@ -2213,12 +2180,11 @@ page_simple_validate_new( if (UNIV_UNLIKELY(rec_heap_top > page_dir_get_nth_slot(page, n_slots - 1))) { - fprintf(stderr, - "InnoDB: Record heap and dir overlap on a page," - " heap top %lu, dir %lu\n", - (ulong) page_header_get_field(page, PAGE_HEAP_TOP), - (ulong) - page_offset(page_dir_get_nth_slot(page, n_slots - 1))); + ib::error() << "Record heap and dir overlap on a page," + " heap top " + << page_header_get_field(page, PAGE_HEAP_TOP) + << ", dir " << page_offset( + page_dir_get_nth_slot(page, n_slots - 1)); goto func_exit; } @@ -2235,11 +2201,10 @@ page_simple_validate_new( for (;;) { if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Record %lu is above rec" - " heap top %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(rec_heap_top)); + + ib::error() << "Record " << page_offset(rec) + << " is above rec heap top " + << page_offset(rec_heap_top); goto func_exit; } @@ -2249,22 +2214,18 @@ page_simple_validate_new( if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) != own_count)) { - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu," - " rec %lu\n", - (ulong) rec_get_n_owned_new(rec), - (ulong) own_count, - (ulong) page_offset(rec)); + ib::error() << "Wrong owned count " + << rec_get_n_owned_new(rec) << ", " + << own_count << ", rec " + << page_offset(rec); goto func_exit; } if (UNIV_UNLIKELY (page_dir_slot_get_rec(slot) != rec)) { - fprintf(stderr, - "InnoDB: Dir slot does not point" - " to right rec %lu\n", - (ulong) page_offset(rec)); + ib::error() << "Dir slot does not point" + " to right rec " << page_offset(rec); goto func_exit; } @@ -2285,11 +2246,10 @@ page_simple_validate_new( if (UNIV_UNLIKELY (rec_get_next_offs(rec, TRUE) < FIL_PAGE_DATA || rec_get_next_offs(rec, TRUE) >= UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Next record offset nonsensical %lu" - " for rec %lu\n", - (ulong) rec_get_next_offs(rec, TRUE), - (ulong) page_offset(rec)); + + ib::error() << "Next record offset nonsensical " + << rec_get_next_offs(rec, TRUE) + << " for rec " << page_offset(rec); goto func_exit; } @@ -2297,10 +2257,8 @@ page_simple_validate_new( count++; if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page record list appears" - " to be circular %lu\n", - (ulong) count); + ib::error() << "Page record list appears to be" + " circular " << count; goto func_exit; } @@ -2309,25 +2267,23 @@ page_simple_validate_new( } if (UNIV_UNLIKELY(rec_get_n_owned_new(rec) == 0)) { - fprintf(stderr, "InnoDB: n owned is zero" - " in a supremum rec\n"); + ib::error() << "n owned is zero in a supremum rec"; goto func_exit; } if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu, %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); + ib::error() << "n slots wrong " << slot_no << ", " + << (n_slots - 1); goto func_exit; } if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) + PAGE_HEAP_NO_USER_LOW != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); + ib::error() << "n recs wrong " + << page_header_get_field(page, PAGE_N_RECS) + + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); goto func_exit; } @@ -2338,20 +2294,17 @@ page_simple_validate_new( while (rec != NULL) { if (UNIV_UNLIKELY(rec < page + FIL_PAGE_DATA || rec >= page + UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Free list record has" - " a nonsensical offset %lu\n", - (ulong) page_offset(rec)); + + ib::error() << "Free list record has" + " a nonsensical offset " << page_offset(rec); goto func_exit; } if (UNIV_UNLIKELY(rec > rec_heap_top)) { - fprintf(stderr, - "InnoDB: Free list record %lu" - " is above rec heap top %lu\n", - (ulong) page_offset(rec), - (ulong) page_offset(rec_heap_top)); + ib::error() << "Free list record " << page_offset(rec) + << " is above rec heap top " + << page_offset(rec_heap_top); goto func_exit; } @@ -2359,10 +2312,8 @@ page_simple_validate_new( count++; if (UNIV_UNLIKELY(count > UNIV_PAGE_SIZE)) { - fprintf(stderr, - "InnoDB: Page free list appears" - " to be circular %lu\n", - (ulong) count); + ib::error() << "Page free list appears to be" + " circular " << count; goto func_exit; } @@ -2371,9 +2322,8 @@ page_simple_validate_new( if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - fprintf(stderr, "InnoDB: N heap is wrong %lu, %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) (count + 1)); + ib::error() << "N heap is wrong " + << page_dir_get_n_heap(page) << ", " << (count + 1); goto func_exit; } @@ -2386,8 +2336,7 @@ func_exit: /***************************************************************//** This function checks the consistency of an index page. -@return TRUE if ok */ -UNIV_INTERN +@return TRUE if ok */ ibool page_validate( /*==========*/ @@ -2412,9 +2361,15 @@ page_validate( ulint* offsets = NULL; ulint* old_offsets = NULL; +#ifdef UNIV_GIS_DEBUG + if (dict_index_is_spatial(index)) { + fprintf(stderr, "Page no: %lu\n", page_get_page_no(page)); + } +#endif /* UNIV_DEBUG */ + if (UNIV_UNLIKELY((ibool) !!page_is_comp(page) != dict_table_is_comp(index->table))) { - fputs("InnoDB: 'compact format' flag mismatch\n", stderr); + ib::error() << "'compact format' flag mismatch"; goto func_exit2; } if (page_is_comp(page)) { @@ -2427,16 +2382,20 @@ page_validate( } } - if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page) + /* Multiple transactions cannot simultaneously operate on the + same temp-table in parallel. + max_trx_id is ignored for temp tables because it not required + for MVCC. */ + if (dict_index_is_sec_or_ibuf(index) + && !dict_table_is_temporary(index->table) + && page_is_leaf(page) && !page_is_empty(page)) { trx_id_t max_trx_id = page_get_max_trx_id(page); trx_id_t sys_max_trx_id = trx_sys_get_max_trx_id(); if (max_trx_id == 0 || max_trx_id > sys_max_trx_id) { - ib_logf(IB_LOG_LEVEL_ERROR, - "PAGE_MAX_TRX_ID out of bounds: " - TRX_ID_FMT ", " TRX_ID_FMT, - max_trx_id, sys_max_trx_id); + ib::error() << "PAGE_MAX_TRX_ID out of bounds: " + << max_trx_id << ", " << sys_max_trx_id; goto func_exit2; } } @@ -2456,13 +2415,11 @@ page_validate( if (UNIV_UNLIKELY(!(page_header_get_ptr(page, PAGE_HEAP_TOP) <= page_dir_get_nth_slot(page, n_slots - 1)))) { - fprintf(stderr, - "InnoDB: Record heap and dir overlap" - " on space %lu page %lu index %s, %p, %p\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), index->name, - page_header_get_ptr(page, PAGE_HEAP_TOP), - page_dir_get_nth_slot(page, n_slots - 1)); + ib::warn() << "Record heap and dir overlap on space " + << page_get_space_id(page) << " page " + << page_get_page_no(page) << " index " << index->name + << ", " << page_header_get_ptr(page, PAGE_HEAP_TOP) + << ", " << page_dir_get_nth_slot(page, n_slots - 1); goto func_exit; } @@ -2484,7 +2441,7 @@ page_validate( if (page_is_comp(page) && page_rec_is_user_rec(rec) && UNIV_UNLIKELY(rec_get_node_ptr_flag(rec) == page_is_leaf(page))) { - fputs("InnoDB: node_ptr flag mismatch\n", stderr); + ib::error() << "'node_ptr' flag mismatch"; goto func_exit; } @@ -2494,22 +2451,43 @@ page_validate( #ifndef UNIV_HOTBACKUP /* Check that the records are in the ascending order */ - if (UNIV_LIKELY(count >= PAGE_HEAP_NO_USER_LOW) + if (count >= PAGE_HEAP_NO_USER_LOW && !page_rec_is_supremum(rec)) { - if (UNIV_UNLIKELY - (1 != cmp_rec_rec(rec, old_rec, - offsets, old_offsets, index))) { - fprintf(stderr, - "InnoDB: Records in wrong order" - " on space %lu page %lu index %s\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), - index->name); + + int ret = cmp_rec_rec( + rec, old_rec, offsets, old_offsets, index); + + /* For spatial index, on nonleaf leavel, we + allow recs to be equal. */ + bool rtr_equal_nodeptrs = + (ret == 0 && dict_index_is_spatial(index) + && !page_is_leaf(page)); + + if (ret <= 0 && !rtr_equal_nodeptrs) { + + ib::error() << "Records in wrong order on" + " space " << page_get_space_id(page) + << " page " << page_get_page_no(page) + << " index " << index->name; + fputs("\nInnoDB: previous record ", stderr); - rec_print_new(stderr, old_rec, old_offsets); - fputs("\nInnoDB: record ", stderr); - rec_print_new(stderr, rec, offsets); - putc('\n', stderr); + /* For spatial index, print the mbr info.*/ + if (index->type & DICT_SPATIAL) { + putc('\n', stderr); + rec_print_mbr_rec(stderr, + old_rec, old_offsets); + fputs("\nInnoDB: record ", stderr); + putc('\n', stderr); + rec_print_mbr_rec(stderr, rec, offsets); + putc('\n', stderr); + putc('\n', stderr); + + } else { + rec_print_new(stderr, old_rec, old_offsets); + fputs("\nInnoDB: record ", stderr); + rec_print_new(stderr, rec, offsets); + putc('\n', stderr); + } goto func_exit; } @@ -2519,21 +2497,27 @@ page_validate( if (page_rec_is_user_rec(rec)) { data_size += rec_offs_size(offsets); + +#if UNIV_GIS_DEBUG + /* For spatial index, print the mbr info.*/ + if (index->type & DICT_SPATIAL) { + rec_print_mbr_rec(stderr, rec, offsets); + putc('\n', stderr); + } +#endif /* UNIV_GIS_DEBUG */ } offs = page_offset(rec_get_start(rec, offsets)); i = rec_offs_size(offsets); if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { - fputs("InnoDB: record offset out of bounds\n", stderr); + ib::error() << "Record offset out of bounds"; goto func_exit; } while (i--) { if (UNIV_UNLIKELY(buf[offs + i])) { /* No other record may overlap this */ - - fputs("InnoDB: Record overlaps another\n", - stderr); + ib::error() << "Record overlaps another"; goto func_exit; } @@ -2549,17 +2533,14 @@ page_validate( if (UNIV_UNLIKELY(rec_own_count)) { /* This is a record pointed to by a dir slot */ if (UNIV_UNLIKELY(rec_own_count != own_count)) { - fprintf(stderr, - "InnoDB: Wrong owned count %lu, %lu\n", - (ulong) rec_own_count, - (ulong) own_count); + ib::error() << "Wrong owned count " + << rec_own_count << ", " << own_count; goto func_exit; } if (page_dir_slot_get_rec(slot) != rec) { - fputs("InnoDB: Dir slot does not" - " point to right rec\n", - stderr); + ib::error() << "Dir slot does not" + " point to right rec"; goto func_exit; } @@ -2596,30 +2577,28 @@ page_validate( } } else if (UNIV_UNLIKELY(rec_get_n_owned_old(rec) == 0)) { n_owned_zero: - fputs("InnoDB: n owned is zero\n", stderr); + ib::error() << "n owned is zero"; goto func_exit; } if (UNIV_UNLIKELY(slot_no != n_slots - 1)) { - fprintf(stderr, "InnoDB: n slots wrong %lu %lu\n", - (ulong) slot_no, (ulong) (n_slots - 1)); + ib::error() << "n slots wrong " << slot_no << " " + << (n_slots - 1); goto func_exit; } if (UNIV_UNLIKELY(page_header_get_field(page, PAGE_N_RECS) + PAGE_HEAP_NO_USER_LOW != count + 1)) { - fprintf(stderr, "InnoDB: n recs wrong %lu %lu\n", - (ulong) page_header_get_field(page, PAGE_N_RECS) - + PAGE_HEAP_NO_USER_LOW, - (ulong) (count + 1)); + ib::error() << "n recs wrong " + << page_header_get_field(page, PAGE_N_RECS) + + PAGE_HEAP_NO_USER_LOW << " " << (count + 1); goto func_exit; } if (UNIV_UNLIKELY(data_size != page_get_data_size(page))) { - fprintf(stderr, - "InnoDB: Summed data size %lu, returned by func %lu\n", - (ulong) data_size, (ulong) page_get_data_size(page)); + ib::error() << "Summed data size " << data_size + << ", returned by func " << page_get_data_size(page); goto func_exit; } @@ -2638,15 +2617,15 @@ n_owned_zero: offs = page_offset(rec_get_start(rec, offsets)); i = rec_offs_size(offsets); if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) { - fputs("InnoDB: record offset out of bounds\n", stderr); + ib::error() << "Record offset out of bounds"; goto func_exit; } while (i--) { if (UNIV_UNLIKELY(buf[offs + i])) { - fputs("InnoDB: Record overlaps another" - " in free list\n", stderr); + ib::error() << "Record overlaps another" + " in free list"; goto func_exit; } @@ -2657,9 +2636,8 @@ n_owned_zero: } if (UNIV_UNLIKELY(page_dir_get_n_heap(page) != count + 1)) { - fprintf(stderr, "InnoDB: N heap is wrong %lu %lu\n", - (ulong) page_dir_get_n_heap(page), - (ulong) count + 1); + ib::error() << "N heap is wrong " + << page_dir_get_n_heap(page) << " " << count + 1; goto func_exit; } @@ -2670,13 +2648,9 @@ func_exit: if (UNIV_UNLIKELY(ret == FALSE)) { func_exit2: - fprintf(stderr, - "InnoDB: Apparent corruption" - " in space %lu page %lu index %s\n", - (ulong) page_get_space_id(page), - (ulong) page_get_page_no(page), - index->name); - buf_page_print(page, 0, 0); + ib::error() << "Apparent corruption in space " + << page_get_space_id(page) << " page " + << page_get_page_no(page) << " index " << index->name; } return(ret); @@ -2685,8 +2659,7 @@ func_exit2: #ifndef UNIV_HOTBACKUP /***************************************************************//** Looks in the page record list for a record with the given heap number. -@return record, NULL if not found */ -UNIV_INTERN +@return record, NULL if not found */ const rec_t* page_find_rec_with_heap_no( /*=======================*/ @@ -2698,7 +2671,7 @@ page_find_rec_with_heap_no( if (page_is_comp(page)) { rec = page + PAGE_NEW_INFIMUM; - for(;;) { + for (;;) { ulint rec_heap_no = rec_get_heap_no_new(rec); if (rec_heap_no == heap_no) { @@ -2735,8 +2708,7 @@ page_find_rec_with_heap_no( Removes the record from a leaf page. This function does not log any changes. It is used by the IMPORT tablespace functions. The cursor is moved to the next record after the deleted one. -@return true if success, i.e., the page did not become too empty */ -UNIV_INTERN +@return true if success, i.e., the page did not become too empty */ bool page_delete_rec( /*============*/ @@ -2755,7 +2727,7 @@ page_delete_rec( if (!rec_offs_any_extern(offsets) && ((page_get_data_size(page) - rec_offs_size(offsets) - < BTR_CUR_PAGE_COMPRESS_LIMIT) + < BTR_CUR_PAGE_COMPRESS_LIMIT(index)) || (mach_read_from_4(page + FIL_PAGE_NEXT) == FIL_NULL && mach_read_from_4(page + FIL_PAGE_PREV) == FIL_NULL) || (page_get_n_recs(page) < 2))) { @@ -2791,7 +2763,6 @@ page_delete_rec( @param[in] page index tree leaf page @return the last record, not delete-marked @retval infimum record if all records are delete-marked */ - const rec_t* page_find_rec_max_not_deleted( const page_t* page) @@ -2824,14 +2795,12 @@ page_find_rec_max_not_deleted( but different than the global setting innodb_checksum_algorithm. @param[in] current_algo current checksum algorithm @param[in] page_checksum page valid checksum -@param[in] space_id tablespace id -@param[in] page_no page number */ +@param[in] page_id page identifier */ void page_warn_strict_checksum( srv_checksum_algorithm_t curr_algo, srv_checksum_algorithm_t page_checksum, - ulint space_id, - ulint page_no) + const page_id_t& page_id) { srv_checksum_algorithm_t curr_algo_nonstrict; switch (curr_algo) { @@ -2848,16 +2817,15 @@ page_warn_strict_checksum( ut_error; } - ib_logf(IB_LOG_LEVEL_WARN, - "innodb_checksum_algorithm is set to \"%s\"" - " but the page [page id: space=" ULINTPF "," - " page number=" ULINTPF "] contains a valid checksum \"%s\"." - " Accepting the page as valid. Change innodb_checksum_algorithm" - " to \"%s\" to silently accept such pages or rewrite all pages" - " so that they contain \"%s\" checksum.", - buf_checksum_algorithm_name(curr_algo), - space_id, page_no, - buf_checksum_algorithm_name(page_checksum), - buf_checksum_algorithm_name(curr_algo_nonstrict), - buf_checksum_algorithm_name(curr_algo_nonstrict)); + ib::warn() << "innodb_checksum_algorithm is set to \"" + << buf_checksum_algorithm_name(curr_algo) << "\"" + << " but the page " << page_id << " contains a valid checksum \"" + << buf_checksum_algorithm_name(page_checksum) << "\". " + << " Accepting the page as valid. Change" + << " innodb_checksum_algorithm to \"" + << buf_checksum_algorithm_name(curr_algo_nonstrict) + << "\" to silently accept such pages or rewrite all pages" + << " so that they contain \"" + << buf_checksum_algorithm_name(curr_algo_nonstrict) + << "\" checksum."; } diff --git a/storage/innobase/page/page0zip.cc b/storage/innobase/page/page0zip.cc index 2bf1f324784..167fc73a6d1 100644 --- a/storage/innobase/page/page0zip.cc +++ b/storage/innobase/page/page0zip.cc @@ -24,41 +24,37 @@ Compressed page interface Created June 2005 by Marko Makela *******************************************************/ -// First include (the generated) my_config.h, to get correct platform defines. -#include "my_config.h" - -#include <map> -using namespace std; - -#define THIS_MODULE +#include "page0size.h" #include "page0zip.h" #ifdef UNIV_NONINL # include "page0zip.ic" #endif -#undef THIS_MODULE -#include "fil0fil.h" -#include "buf0checksum.h" -#include "mach0data.h" + +/** A BLOB field reference full of zero, for use in assertions and tests. +Initially, BLOB field references are set to zero, in +dtuple_convert_big_rec(). */ +const byte field_ref_zero[FIELD_REF_SIZE] = { + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, +}; + #ifndef UNIV_INNOCHECKSUM #include "page0page.h" #include "mtr0log.h" -#include "ut0sort.h" #include "dict0dict.h" #include "btr0cur.h" #include "page0types.h" #include "log0recv.h" -#else -#define page_warn_strict_checksum(A,B,C,D) -#endif /* !UNIV_INNOCHECKSUM */ +#include "row0trunc.h" #include "zlib.h" #ifndef UNIV_HOTBACKUP -#ifndef UNIV_INNOCHECKSUM # include "buf0buf.h" # include "btr0sea.h" # include "dict0boot.h" # include "lock0lock.h" # include "srv0srv.h" -#endif /* !UNIV_INNOCHECKSUM */ # include "buf0lru.h" # include "srv0mon.h" # include "ut0crc32.h" @@ -68,26 +64,22 @@ using namespace std; # define buf_LRU_stat_inc_unzip() ((void) 0) #endif /* !UNIV_HOTBACKUP */ +#include <map> +#include <algorithm> + #ifndef UNIV_HOTBACKUP -#ifndef UNIV_INNOCHECKSUM /** Statistics on compression, indexed by page_zip_des_t::ssize - 1 */ -UNIV_INTERN page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; +page_zip_stat_t page_zip_stat[PAGE_ZIP_SSIZE_MAX]; /** Statistics on compression, indexed by index->id */ -UNIV_INTERN page_zip_stat_per_index_t page_zip_stat_per_index; -/** Mutex protecting page_zip_stat_per_index */ -UNIV_INTERN ib_mutex_t page_zip_stat_per_index_mutex; -#ifdef HAVE_PSI_INTERFACE -UNIV_INTERN mysql_pfs_key_t page_zip_stat_per_index_mutex_key; -#endif /* HAVE_PSI_INTERFACE */ -#endif /* !UNIV_INNOCHECKSUM */ +page_zip_stat_per_index_t page_zip_stat_per_index; #endif /* !UNIV_HOTBACKUP */ /* Compression level to be used by zlib. Settable by user. */ -UNIV_INTERN uint page_zip_level = DEFAULT_COMPRESSION_LEVEL; +uint page_zip_level = DEFAULT_COMPRESSION_LEVEL; /* Whether or not to log compressed page images to avoid possible compression algorithm changes in zlib. */ -UNIV_INTERN my_bool page_zip_log_pages = false; +my_bool page_zip_log_pages = true; /* Please refer to ../include/page0zip.ic for a description of the compressed page format. */ @@ -117,24 +109,24 @@ static const byte supremum_extra_data[] = { /** Assert that a block of memory is filled with zero bytes. Compare at most sizeof(field_ref_zero) bytes. -@param b in: memory block -@param s in: size of the memory block, in bytes */ -#define ASSERT_ZERO(b, s) \ - ut_ad(!memcmp(b, field_ref_zero, ut_min(s, sizeof field_ref_zero))) +@param b in: memory block +@param s in: size of the memory block, in bytes */ +#define ASSERT_ZERO(b, s) \ + ut_ad(!memcmp(b, field_ref_zero, \ + ut_min(static_cast<size_t>(s), sizeof field_ref_zero))); /** Assert that a BLOB pointer is filled with zero bytes. -@param b in: BLOB pointer */ +@param b in: BLOB pointer */ #define ASSERT_ZERO_BLOB(b) \ ut_ad(!memcmp(b, field_ref_zero, sizeof field_ref_zero)) /* Enable some extra debugging output. This code can be enabled independently of any UNIV_ debugging conditions. */ -#ifndef UNIV_INNOCHECKSUM #if defined UNIV_DEBUG || defined UNIV_ZIP_DEBUG # include <stdarg.h> MY_ATTRIBUTE((format (printf, 1, 2))) /**********************************************************************//** Report a failure to decompress or compress. -@return number of characters printed */ +@return number of characters printed */ static int page_zip_fail_func( @@ -154,21 +146,18 @@ page_zip_fail_func( return(res); } /** Wrapper for page_zip_fail_func() -@param fmt_args in: printf(3) format string and arguments */ +@param fmt_args in: printf(3) format string and arguments */ # define page_zip_fail(fmt_args) page_zip_fail_func fmt_args #else /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ /** Dummy wrapper for page_zip_fail_func() -@param fmt_args ignored: printf(3) format string and arguments */ +@param fmt_args ignored: printf(3) format string and arguments */ # define page_zip_fail(fmt_args) /* empty */ #endif /* UNIV_DEBUG || UNIV_ZIP_DEBUG */ -#endif /* !UNIV_INNOCHECKSUM */ -#ifndef UNIV_INNOCHECKSUM #ifndef UNIV_HOTBACKUP /**********************************************************************//** Determine the guaranteed free space on an empty page. -@return minimum payload size on the page */ -UNIV_INTERN +@return minimum payload size on the page */ ulint page_zip_empty_size( /*================*/ @@ -179,8 +168,7 @@ page_zip_empty_size( /* subtract the page header and the longest uncompressed data needed for one record */ - (PAGE_DATA - + PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN + + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE + 1/* encoded heap_no==2 in page_zip_write_rec() */ + 1/* end of modification log */ - REC_N_NEW_EXTRA_BYTES/* omitted bytes */) @@ -193,7 +181,7 @@ page_zip_empty_size( /*************************************************************//** Gets the number of elements in the dense page directory, including deleted records (the free list). -@return number of elements in the dense page directory */ +@return number of elements in the dense page directory */ UNIV_INLINE ulint page_zip_dir_elems( @@ -207,7 +195,7 @@ page_zip_dir_elems( /*************************************************************//** Gets the size of the compressed page trailer (the dense page directory), including deleted records (the free list). -@return length of dense page directory, in bytes */ +@return length of dense page directory, in bytes */ UNIV_INLINE ulint page_zip_dir_size( @@ -220,7 +208,7 @@ page_zip_dir_size( /*************************************************************//** Gets an offset to the compressed page trailer (the dense page directory), including deleted records (the free list). -@return offset of the dense page directory */ +@return offset of the dense page directory */ UNIV_INLINE ulint page_zip_dir_start_offs( @@ -236,23 +224,23 @@ page_zip_dir_start_offs( /*************************************************************//** Gets a pointer to the compressed page trailer (the dense page directory), including deleted records (the free list). -@param[in] page_zip compressed page -@param[in] n_dense number of entries in the directory -@return pointer to the dense page directory */ +@param[in] page_zip compressed page +@param[in] n_dense number of entries in the directory +@return pointer to the dense page directory */ #define page_zip_dir_start_low(page_zip, n_dense) \ ((page_zip)->data + page_zip_dir_start_offs(page_zip, n_dense)) /*************************************************************//** Gets a pointer to the compressed page trailer (the dense page directory), including deleted records (the free list). -@param[in] page_zip compressed page -@return pointer to the dense page directory */ +@param[in] page_zip compressed page +@return pointer to the dense page directory */ #define page_zip_dir_start(page_zip) \ page_zip_dir_start_low(page_zip, page_zip_dir_elems(page_zip)) /*************************************************************//** Gets the size of the compressed page trailer (the dense page directory), only including user records (excluding the free list). -@return length of dense page directory comprising existing records, in bytes */ +@return length of dense page directory comprising existing records, in bytes */ UNIV_INLINE ulint page_zip_dir_user_size( @@ -267,7 +255,7 @@ page_zip_dir_user_size( /*************************************************************//** Find the slot of the given record in the dense page directory. -@return dense directory slot, or NULL if record not found */ +@return dense directory slot, or NULL if record not found */ UNIV_INLINE byte* page_zip_dir_find_low( @@ -290,7 +278,7 @@ page_zip_dir_find_low( /*************************************************************//** Find the slot of the given non-free record in the dense page directory. -@return dense directory slot, or NULL if record not found */ +@return dense directory slot, or NULL if record not found */ UNIV_INLINE byte* page_zip_dir_find( @@ -309,7 +297,7 @@ page_zip_dir_find( /*************************************************************//** Find the slot of the given free record in the dense page directory. -@return dense directory slot, or NULL if record not found */ +@return dense directory slot, or NULL if record not found */ UNIV_INLINE byte* page_zip_dir_find_free( @@ -461,7 +449,7 @@ page_zip_get_n_prev_extern( /**********************************************************************//** Encode the length of a fixed-length column. -@return buf + length of encoded val */ +@return buf + length of encoded val */ static byte* page_zip_fixed_field_encode( @@ -489,17 +477,19 @@ page_zip_fixed_field_encode( /**********************************************************************//** Write the index information for the compressed page. -@return used size of buf */ -static +@return used size of buf */ ulint page_zip_fields_encode( /*===================*/ - ulint n, /*!< in: number of fields to compress */ - dict_index_t* index, /*!< in: index comprising at least n fields */ - ulint trx_id_pos,/*!< in: position of the trx_id column - in the index, or ULINT_UNDEFINED if - this is a non-leaf page */ - byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */ + ulint n, /*!< in: number of fields + to compress */ + const dict_index_t* index, /*!< in: index comprising + at least n fields */ + ulint trx_id_pos, + /*!< in: position of the trx_id column + in the index, or ULINT_UNDEFINED if + this is a non-leaf page */ + byte* buf) /*!< out: buffer of (n + 1) * 2 bytes */ { const byte* buf_start = buf; ulint i; @@ -525,8 +515,7 @@ page_zip_fields_encode( const dict_col_t* column = dict_field_get_col(field); - if (UNIV_UNLIKELY(column->len > 255) - || UNIV_UNLIKELY(column->mtype == DATA_BLOB)) { + if (DATA_BIG_COL(column)) { val |= 0x7e; /* max > 255 bytes */ } @@ -670,10 +659,10 @@ page_zip_dir_encode( ut_a(offs < UNIV_PAGE_SIZE - PAGE_DIR); ut_a(offs >= PAGE_ZIP_START); #if PAGE_ZIP_DIR_SLOT_MASK & (PAGE_ZIP_DIR_SLOT_MASK + 1) -# error "PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2" +# error PAGE_ZIP_DIR_SLOT_MASK is not 1 less than a power of 2 #endif -#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_PAGE_ZIP_SIZE_MAX - 1 -# error "PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1" +#if PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1 +# error PAGE_ZIP_DIR_SLOT_MASK < UNIV_ZIP_SIZE_MAX - 1 #endif if (UNIV_UNLIKELY(rec_get_n_owned_new(rec))) { offs |= PAGE_ZIP_DIR_SLOT_OWNED; @@ -761,7 +750,6 @@ page_zip_free( /**********************************************************************//** Configure the zlib allocator to use the given memory heap. */ -UNIV_INTERN void page_zip_set_alloc( /*===============*/ @@ -783,16 +771,16 @@ page_zip_set_alloc( #ifdef PAGE_ZIP_COMPRESS_DBG /** Set this variable in a debugger to enable excessive logging in page_zip_compress(). */ -UNIV_INTERN ibool page_zip_compress_dbg; +ibool page_zip_compress_dbg; /** Set this variable in a debugger to enable binary logging of the data passed to deflate(). When this variable is nonzero, it will act as a log file name generator. */ -UNIV_INTERN unsigned page_zip_compress_log; +unsigned page_zip_compress_log; /**********************************************************************//** Wrapper for deflate(). Log the operation if page_zip_compress_dbg is set. -@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ +@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ static int page_zip_compress_deflate( @@ -806,7 +794,10 @@ page_zip_compress_deflate( ut_print_buf(stderr, strm->next_in, strm->avail_in); } if (UNIV_LIKELY_NULL(logfile)) { - fwrite(strm->next_in, 1, strm->avail_in, logfile); + if (fwrite(strm->next_in, 1, strm->avail_in, logfile) + != strm->avail_in) { + perror("fwrite"); + } } status = deflate(strm, flush); if (UNIV_UNLIKELY(page_zip_compress_dbg)) { @@ -819,9 +810,9 @@ page_zip_compress_deflate( # undef deflate /** Debug wrapper for the zlib compression routine deflate(). Log the operation if page_zip_compress_dbg is set. -@param strm in/out: compressed stream -@param flush in: flushing method -@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ +@param strm in/out: compressed stream +@param flush in: flushing method +@return deflate() status: Z_OK, Z_BUF_ERROR, ... */ # define deflate(strm, flush) page_zip_compress_deflate(logfile, strm, flush) /** Declaration of the logfile parameter */ # define FILE_LOGFILE FILE* logfile, @@ -836,7 +827,7 @@ Log the operation if page_zip_compress_dbg is set. /**********************************************************************//** Compress the records of a node pointer page. -@return Z_OK, or a zlib error code */ +@return Z_OK, or a zlib error code */ static int page_zip_compress_node_ptrs( @@ -902,7 +893,7 @@ page_zip_compress_node_ptrs( /**********************************************************************//** Compress the records of a leaf node of a secondary index. -@return Z_OK, or a zlib error code */ +@return Z_OK, or a zlib error code */ static int page_zip_compress_sec( @@ -948,7 +939,7 @@ page_zip_compress_sec( /**********************************************************************//** Compress a record of a leaf node of a clustered index that contains externally stored columns. -@return Z_OK, or a zlib error code */ +@return Z_OK, or a zlib error code */ static int page_zip_compress_clust_ext( @@ -1075,7 +1066,7 @@ page_zip_compress_clust_ext( /**********************************************************************//** Compress the records of a leaf node of a clustered index. -@return Z_OK, or a zlib error code */ +@return Z_OK, or a zlib error code */ static int page_zip_compress_clust( @@ -1205,54 +1196,68 @@ page_zip_compress_clust( } while (--n_dense); func_exit: - return(err); -} + return(err);} /**********************************************************************//** Compress a page. @return TRUE on success, FALSE on failure; page_zip will be left intact on failure. */ -UNIV_INTERN ibool page_zip_compress( /*==============*/ - page_zip_des_t* page_zip,/*!< in: size; out: data, n_blobs, - m_start, m_end, m_nonempty */ - const page_t* page, /*!< in: uncompressed page */ - dict_index_t* index, /*!< in: index of the B-tree node */ - ulint level, /*!< in: compression level */ - mtr_t* mtr) /*!< in: mini-transaction, or NULL */ + page_zip_des_t* page_zip, /*!< in: size; out: data, + n_blobs, m_start, m_end, + m_nonempty */ + const page_t* page, /*!< in: uncompressed page */ + dict_index_t* index, /*!< in: index of the B-tree + node */ + ulint level, /*!< in: commpression level */ + const redo_page_compress_t* page_comp_info, + /*!< in: used for applying + TRUNCATE log + record during recovery */ + mtr_t* mtr) /*!< in/out: mini-transaction, + or NULL */ { - z_stream c_stream; - int err; - ulint n_fields;/* number of index fields needed */ - byte* fields; /*!< index field information */ - byte* buf; /*!< compressed payload of the page */ - byte* buf_end;/* end of buf */ - ulint n_dense; - ulint slot_size;/* amount of uncompressed bytes per record */ - const rec_t** recs; /*!< dense page directory, sorted by address */ - mem_heap_t* heap; - ulint trx_id_col; - ulint n_blobs = 0; - byte* storage;/* storage of uncompressed columns */ + z_stream c_stream; + int err; + ulint n_fields; /* number of index fields + needed */ + byte* fields; /*!< index field information */ + byte* buf; /*!< compressed payload of the + page */ + byte* buf_end; /* end of buf */ + ulint n_dense; + ulint slot_size; /* amount of uncompressed bytes + per record */ + const rec_t** recs; /*!< dense page directory, + sorted by address */ + mem_heap_t* heap; + ulint trx_id_col = ULINT_UNDEFINED; + ulint n_blobs = 0; + byte* storage; /* storage of uncompressed + columns */ + index_id_t ind_id; #ifndef UNIV_HOTBACKUP - ullint usec = ut_time_us(NULL); + uintmax_t usec = ut_time_us(NULL); #endif /* !UNIV_HOTBACKUP */ #ifdef PAGE_ZIP_COMPRESS_DBG - FILE* logfile = NULL; + FILE* logfile = NULL; #endif /* A local copy of srv_cmp_per_index_enabled to avoid reading that variable multiple times in this function since it can be changed at anytime. */ - my_bool cmp_per_index_enabled = srv_cmp_per_index_enabled; + my_bool cmp_per_index_enabled; + cmp_per_index_enabled = srv_cmp_per_index_enabled; ut_a(page_is_comp(page)); - ut_a(fil_page_get_type(page) == FIL_PAGE_INDEX); + ut_a(fil_page_index_page_check(page)); ut_ad(page_simple_validate_new((page_t*) page)); ut_ad(page_zip_simple_validate(page_zip)); - ut_ad(dict_table_is_comp(index->table)); - ut_ad(!dict_index_is_ibuf(index)); + ut_ad(!index + || (index + && dict_table_is_comp(index->table) + && !dict_index_is_ibuf(index))); UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); @@ -1272,21 +1277,30 @@ page_zip_compress( == PAGE_NEW_SUPREMUM); } - if (page_is_leaf(page)) { - n_fields = dict_index_get_n_fields(index); + if (truncate_t::s_fix_up_active) { + ut_ad(page_comp_info != NULL); + n_fields = page_comp_info->n_fields; + ind_id = page_comp_info->index_id; } else { - n_fields = dict_index_get_n_unique_in_tree(index); + if (page_is_leaf(page)) { + n_fields = dict_index_get_n_fields(index); + } else { + n_fields = dict_index_get_n_unique_in_tree_nonleaf(index); + } + ind_id = index->id; } /* The dense directory excludes the infimum and supremum records. */ n_dense = page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW; #ifdef PAGE_ZIP_COMPRESS_DBG if (UNIV_UNLIKELY(page_zip_compress_dbg)) { - fprintf(stderr, "compress %p %p %lu %lu %lu\n", - (void*) page_zip, (void*) page, - (ibool) page_is_leaf(page), - n_fields, n_dense); + ib::info() << "compress " + << static_cast<void*>(page_zip) << " " + << static_cast<const void*>(page) << " " + << page_is_leaf(page) << " " + << n_fields << " " << n_dense; } + if (UNIV_UNLIKELY(page_zip_compress_log)) { /* Create a log file for every compression attempt. */ char logfilename[9]; @@ -1296,7 +1310,10 @@ page_zip_compress( if (logfile) { /* Write the uncompressed page to the log. */ - fwrite(page, 1, UNIV_PAGE_SIZE, logfile); + if (fwrite(page, 1, UNIV_PAGE_SIZE, logfile) + != UNIV_PAGE_SIZE) { + perror("fwrite"); + } /* Record the compressed size as zero. This will be overwritten at successful exit. */ putc(0, logfile); @@ -1310,7 +1327,7 @@ page_zip_compress( page_zip_stat[page_zip->ssize - 1].compressed++; if (cmp_per_index_enabled) { mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed++; + page_zip_stat_per_index[ind_id].compressed++; mutex_exit(&page_zip_stat_per_index_mutex); } #endif /* !UNIV_HOTBACKUP */ @@ -1334,13 +1351,17 @@ page_zip_compress( && strcasecmp(index->table_name, "IBUF_DUMMY") != 0) { #ifdef UNIV_DEBUG - fprintf(stderr, - "InnoDB: Simulating a compression failure" - " for table %s, index %s, page %lu (%s)\n", - index->table_name, - index->name, - page_get_page_no(page), - page_is_leaf(page) ? "leaf" : "non-leaf"); + ib::error() + << "InnoDB: Simulating a compression failure" + << " for table " + << (index->table->name.m_name) + << " index " + << index->name() + << " page " + << page_get_page_no(page) + << "(" + << (page_is_leaf(page) ? "leaf" : "non-leaf") + << ")"; #endif @@ -1374,25 +1395,38 @@ page_zip_compress( ut_a(err == Z_OK); c_stream.next_out = buf; + /* Subtract the space reserved for uncompressed data. */ /* Page header and the end marker of the modification log */ c_stream.avail_out = static_cast<uInt>(buf_end - buf - 1); /* Dense page directory and uncompressed columns, if any */ if (page_is_leaf(page)) { - if (dict_index_is_clust(index)) { - trx_id_col = dict_index_get_sys_col_pos( - index, DATA_TRX_ID); - ut_ad(trx_id_col > 0); - ut_ad(trx_id_col != ULINT_UNDEFINED); + if ((index && dict_index_is_clust(index)) + || (page_comp_info + && (page_comp_info->type & DICT_CLUSTERED))) { + + if (index) { + trx_id_col = dict_index_get_sys_col_pos( + index, DATA_TRX_ID); + ut_ad(trx_id_col > 0); + ut_ad(trx_id_col != ULINT_UNDEFINED); + } else if (page_comp_info + && (page_comp_info->type + & DICT_CLUSTERED)) { + trx_id_col = page_comp_info->trx_id_pos; + } slot_size = PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; + } else { /* Signal the absence of trx_id in page_zip_fields_encode() */ - ut_ad(dict_index_get_sys_col_pos(index, DATA_TRX_ID) - == ULINT_UNDEFINED); + if (index) { + ut_ad(dict_index_get_sys_col_pos( + index, DATA_TRX_ID) == ULINT_UNDEFINED); + } trx_id_col = 0; slot_size = PAGE_ZIP_DIR_SLOT_SIZE; } @@ -1407,9 +1441,20 @@ page_zip_compress( } c_stream.avail_out -= static_cast<uInt>(n_dense * slot_size); - c_stream.avail_in = static_cast<uInt>( - page_zip_fields_encode(n_fields, index, trx_id_col, fields)); + if (truncate_t::s_fix_up_active) { + ut_ad(page_comp_info != NULL); + c_stream.avail_in = static_cast<uInt>( + page_comp_info->field_len); + for (ulint i = 0; i < page_comp_info->field_len; i++) { + fields[i] = page_comp_info->fields[i]; + } + } else { + c_stream.avail_in = static_cast<uInt>( + page_zip_fields_encode( + n_fields, index, trx_id_col, fields)); + } c_stream.next_in = fields; + if (UNIV_LIKELY(!trx_id_col)) { trx_id_col = ULINT_UNDEFINED; } @@ -1482,16 +1527,16 @@ err_exit: } #endif /* PAGE_ZIP_COMPRESS_DBG */ #ifndef UNIV_HOTBACKUP - if (page_is_leaf(page)) { + if (page_is_leaf(page) && index) { dict_index_zip_failure(index); } - ullint time_diff = ut_time_us(NULL) - usec; + uintmax_t time_diff = ut_time_us(NULL) - usec; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed_usec + page_zip_stat_per_index[ind_id].compressed_usec += time_diff; mutex_exit(&page_zip_stat_per_index_mutex); } @@ -1549,22 +1594,24 @@ err_exit: byte sz[4]; mach_write_to_4(sz, c_stream.total_out); fseek(logfile, UNIV_PAGE_SIZE, SEEK_SET); - fwrite(sz, 1, sizeof sz, logfile); + if (fwrite(sz, 1, sizeof sz, logfile) != sizeof sz) { + perror("fwrite"); + } fclose(logfile); } #endif /* PAGE_ZIP_COMPRESS_DBG */ #ifndef UNIV_HOTBACKUP - ullint time_diff = ut_time_us(NULL) - usec; + uintmax_t time_diff = ut_time_us(NULL) - usec; page_zip_stat[page_zip->ssize - 1].compressed_ok++; page_zip_stat[page_zip->ssize - 1].compressed_usec += time_diff; if (cmp_per_index_enabled) { mutex_enter(&page_zip_stat_per_index_mutex); - page_zip_stat_per_index[index->id].compressed_ok++; - page_zip_stat_per_index[index->id].compressed_usec += time_diff; + page_zip_stat_per_index[ind_id].compressed_ok++; + page_zip_stat_per_index[ind_id].compressed_usec += time_diff; mutex_exit(&page_zip_stat_per_index_mutex); } - if (page_is_leaf(page)) { + if (page_is_leaf(page) && !truncate_t::s_fix_up_active) { dict_index_zip_success(index); } #endif /* !UNIV_HOTBACKUP */ @@ -1573,34 +1620,6 @@ err_exit: } /**********************************************************************//** -Compare two page directory entries. -@return positive if rec1 > rec2 */ -UNIV_INLINE -ibool -page_zip_dir_cmp( -/*=============*/ - const rec_t* rec1, /*!< in: rec1 */ - const rec_t* rec2) /*!< in: rec2 */ -{ - return(rec1 > rec2); -} - -/**********************************************************************//** -Sort the dense page directory by address (heap_no). */ -static -void -page_zip_dir_sort( -/*==============*/ - rec_t** arr, /*!< in/out: dense page directory */ - rec_t** aux_arr,/*!< in/out: work area */ - ulint low, /*!< in: lower bound of the sorting area, inclusive */ - ulint high) /*!< in: upper bound of the sorting area, exclusive */ -{ - UT_SORT_FUNCTION_BODY(page_zip_dir_sort, arr, aux_arr, low, high, - page_zip_dir_cmp); -} - -/**********************************************************************//** Deallocate the index information initialized by page_zip_fields_decode(). */ static void @@ -1619,16 +1638,17 @@ page_zip_fields_free( /**********************************************************************//** Read the index information for the compressed page. -@return own: dummy index describing the page, or NULL on error */ +@return own: dummy index describing the page, or NULL on error */ static dict_index_t* page_zip_fields_decode( /*===================*/ const byte* buf, /*!< in: index information */ const byte* end, /*!< in: end of buf */ - ulint* trx_id_col)/*!< in: NULL for non-leaf pages; + ulint* trx_id_col,/*!< in: NULL for non-leaf pages; for leaf pages, pointer to where to store the position of the trx_id column */ + bool is_spatial)/*< in: is spatial index or not */ { const byte* b; ulint n; @@ -1660,7 +1680,7 @@ page_zip_fields_decode( return(NULL); } - table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, + table = dict_mem_table_create("ZIP_DUMMY", DICT_HDR_SPACE, n, 0, DICT_TF_COMPACT, 0); index = dict_mem_index_create("ZIP_DUMMY", "ZIP_DUMMY", DICT_HDR_SPACE, 0, n); @@ -1730,13 +1750,17 @@ page_zip_fields_decode( ut_ad(b == end); + if (is_spatial) { + index->type |= DICT_SPATIAL; + } + return(index); } /**********************************************************************//** Populate the sparse page directory from the dense directory. -@return TRUE on success, FALSE on failure */ -static +@return TRUE on success, FALSE on failure */ +static __attribute__((nonnull, warn_unused_result)) ibool page_zip_dir_decode( /*================*/ @@ -1747,9 +1771,8 @@ page_zip_dir_decode( filled in */ rec_t** recs, /*!< out: dense page directory sorted by ascending address (and heap_no) */ - rec_t** recs_aux,/*!< in/out: scratch area */ ulint n_dense)/*!< in: number of user records, and - size of recs[] and recs_aux[] */ + size of recs[] */ { ulint i; ulint n_recs; @@ -1824,15 +1847,13 @@ page_zip_dir_decode( recs[i] = page + offs; } - if (UNIV_LIKELY(n_dense > 1)) { - page_zip_dir_sort(recs, recs_aux, 0, n_dense); - } + std::sort(recs, recs + n_dense); return(TRUE); } /**********************************************************************//** Initialize the REC_N_NEW_EXTRA_BYTES of each record. -@return TRUE on success, FALSE on failure */ +@return TRUE on success, FALSE on failure */ static ibool page_zip_set_extra_bytes( @@ -1930,7 +1951,7 @@ page_zip_set_extra_bytes( /**********************************************************************//** Apply the modification log to a record containing externally stored columns. Do not copy the fields that are stored separately. -@return pointer to modification log, or NULL on failure */ +@return pointer to modification log, or NULL on failure */ static const byte* page_zip_apply_log_ext( @@ -1985,8 +2006,8 @@ page_zip_apply_log_ext( - BTR_EXTERN_FIELD_REF_SIZE; if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log_ext: " - "ext %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log_ext:" + " ext %p+%lu >= %p\n", (const void*) data, (ulong) len, (const void*) end)); @@ -2003,8 +2024,8 @@ page_zip_apply_log_ext( /* Copy the last bytes of the record. */ len = rec_get_end(rec, offsets) - next_out; if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log_ext: " - "last %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log_ext:" + " last %p+%lu >= %p\n", (const void*) data, (ulong) len, (const void*) end)); @@ -2019,7 +2040,7 @@ page_zip_apply_log_ext( /**********************************************************************//** Apply the modification log to an uncompressed page. Do not copy the fields that are stored separately. -@return pointer to end of modification log, or NULL on failure */ +@return pointer to end of modification log, or NULL on failure */ static const byte* page_zip_apply_log( @@ -2137,8 +2158,8 @@ page_zip_apply_log( /* Non-leaf nodes should not contain any externally stored columns. */ if (UNIV_UNLIKELY(hs & REC_STATUS_NODE_PTR)) { - page_zip_fail(("page_zip_apply_log: " - "%lu&REC_STATUS_NODE_PTR\n", + page_zip_fail(("page_zip_apply_log:" + " %lu&REC_STATUS_NODE_PTR\n", (ulong) hs)); return(NULL); } @@ -2154,8 +2175,8 @@ page_zip_apply_log( - REC_NODE_PTR_SIZE; /* Copy the data bytes, except node_ptr. */ if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "node_ptr %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log:" + " node_ptr %p+%lu >= %p\n", (const void*) data, (ulong) len, (const void*) end)); @@ -2169,8 +2190,8 @@ page_zip_apply_log( /* Copy all data bytes of a record in a secondary index. */ if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "sec %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log:" + " sec %p+%lu >= %p\n", (const void*) data, (ulong) len, (const void*) end)); @@ -2188,8 +2209,8 @@ page_zip_apply_log( if (UNIV_UNLIKELY(data + l >= end) || UNIV_UNLIKELY(len < (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN))) { - page_zip_fail(("page_zip_apply_log: " - "trx_id %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log:" + " trx_id %p+%lu >= %p\n", (const void*) data, (ulong) l, (const void*) end)); @@ -2204,8 +2225,8 @@ page_zip_apply_log( b = rec + l + (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); len = rec_get_end(rec, offsets) - b; if (UNIV_UNLIKELY(data + len >= end)) { - page_zip_fail(("page_zip_apply_log: " - "clust %p+%lu >= %p\n", + page_zip_fail(("page_zip_apply_log:" + " clust %p+%lu >= %p\n", (const void*) data, (ulong) len, (const void*) end)); @@ -2220,7 +2241,7 @@ page_zip_apply_log( /**********************************************************************//** Set the heap_no in a record, and skip the fixed-size record header that is not included in the d_stream. -@return TRUE on success, FALSE if d_stream does not end at rec */ +@return TRUE on success, FALSE if d_stream does not end at rec */ static ibool page_zip_decompress_heap_no( @@ -2245,7 +2266,7 @@ page_zip_decompress_heap_no( /**********************************************************************//** Decompress the records of a node pointer page. -@return TRUE on success, FALSE on failure */ +@return TRUE on success, FALSE on failure */ static ibool page_zip_decompress_node_ptrs( @@ -2434,7 +2455,7 @@ zlib_done: /**********************************************************************//** Decompress the records of a leaf node of a secondary index. -@return TRUE on success, FALSE on failure */ +@return TRUE on success, FALSE on failure */ static ibool page_zip_decompress_sec( @@ -2572,7 +2593,7 @@ zlib_done: /**********************************************************************//** Decompress a record of a leaf node of a clustered index that contains externally stored columns. -@return TRUE on success */ +@return TRUE on success */ static ibool page_zip_decompress_clust_ext( @@ -2682,7 +2703,7 @@ page_zip_decompress_clust_ext( /**********************************************************************//** Compress the records of a leaf node of a clustered index. -@return TRUE on success, FALSE on failure */ +@return TRUE on success, FALSE on failure */ static ibool page_zip_decompress_clust( @@ -2708,9 +2729,7 @@ page_zip_decompress_clust( /* Subtract the space reserved for uncompressed data. */ d_stream->avail_in -= static_cast<uInt>(n_dense) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN - + DATA_ROLL_PTR_LEN); + * (PAGE_ZIP_CLUST_LEAF_SLOT_SIZE); /* Decompress the records in heap_no order. */ for (slot = 0; slot < n_dense; slot++) { @@ -2952,8 +2971,8 @@ zlib_done: (externs < page_zip->data + page_zip->m_end)) { page_zip_fail(("page_zip_" - "decompress_clust: " - "%p < %p + %lu\n", + "decompress_clust:" + " %p < %p + %lu\n", (const void*) externs, (const void*) page_zip->data, @@ -2982,11 +3001,11 @@ zlib_done: Decompress a page. This function should tolerate errors on the compressed page. Instead of letting assertions fail, it will return FALSE if an inconsistency is detected. -@return TRUE on success, FALSE on failure */ -UNIV_INTERN +@return TRUE on success, FALSE on failure */ +static ibool -page_zip_decompress( -/*================*/ +page_zip_decompress_low( +/*====================*/ page_zip_des_t* page_zip,/*!< in: data, ssize; out: m_start, m_end, m_nonempty, n_blobs */ page_t* page, /*!< out: uncompressed page, may be trashed */ @@ -3002,9 +3021,6 @@ page_zip_decompress( ulint trx_id_col = ULINT_UNDEFINED; mem_heap_t* heap; ulint* offsets; -#ifndef UNIV_HOTBACKUP - ullint usec = ut_time_us(NULL); -#endif /* !UNIV_HOTBACKUP */ ut_ad(page_zip_simple_validate(page_zip)); UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE); @@ -3023,7 +3039,7 @@ page_zip_decompress( heap = mem_heap_create(n_dense * (3 * sizeof *recs) + UNIV_PAGE_SIZE); recs = static_cast<rec_t**>( - mem_heap_alloc(heap, n_dense * (2 * sizeof *recs))); + mem_heap_alloc(heap, n_dense * sizeof *recs)); if (all) { /* Copy the page header. */ @@ -3058,7 +3074,7 @@ page_zip_decompress( /* Copy the page directory. */ if (UNIV_UNLIKELY(!page_zip_dir_decode(page_zip, page, recs, - recs + n_dense, n_dense))) { + n_dense))) { zlib_error: mem_heap_free(heap); return(FALSE); @@ -3111,7 +3127,8 @@ zlib_error: index = page_zip_fields_decode( page + PAGE_ZIP_START, d_stream.next_out, - page_is_leaf(page) ? &trx_id_col : NULL); + page_is_leaf(page) ? &trx_id_col : NULL, + fil_page_get_type(page) == FIL_PAGE_RTREE); if (UNIV_UNLIKELY(!index)) { @@ -3188,8 +3205,36 @@ err_exit: page_zip_fields_free(index); mem_heap_free(heap); + + return(TRUE); +} + +/**********************************************************************//** +Decompress a page. This function should tolerate errors on the compressed +page. Instead of letting assertions fail, it will return FALSE if an +inconsistency is detected. +@return TRUE on success, FALSE on failure */ +ibool +page_zip_decompress( +/*================*/ + page_zip_des_t* page_zip,/*!< in: data, ssize; + out: m_start, m_end, m_nonempty, n_blobs */ + page_t* page, /*!< out: uncompressed page, may be trashed */ + ibool all) /*!< in: TRUE=decompress the whole page; + FALSE=verify but do not copy some + page header fields that should not change + after page creation */ +{ #ifndef UNIV_HOTBACKUP - ullint time_diff = ut_time_us(NULL) - usec; + uintmax_t usec = ut_time_us(NULL); +#endif /* !UNIV_HOTBACKUP */ + + if (!page_zip_decompress_low(page_zip, page, all)) { + return(FALSE); + } + +#ifndef UNIV_HOTBACKUP + uintmax_t time_diff = ut_time_us(NULL) - usec; page_zip_stat[page_zip->ssize - 1].decompressed++; page_zip_stat[page_zip->ssize - 1].decompressed_usec += time_diff; @@ -3244,17 +3289,16 @@ page_zip_hexdump_func( } /** Dump a block of memory on the standard error stream. -@param buf in: data -@param size in: length of the data, in bytes */ +@param buf in: data +@param size in: length of the data, in bytes */ #define page_zip_hexdump(buf, size) page_zip_hexdump_func(#buf, buf, size) /** Flag: make page_zip_validate() compare page headers only */ -UNIV_INTERN ibool page_zip_validate_header_only = FALSE; +ibool page_zip_validate_header_only = FALSE; /**********************************************************************//** Check that the compressed and decompressed pages match. -@return TRUE if valid, FALSE if not */ -UNIV_INTERN +@return TRUE if valid, FALSE if not */ ibool page_zip_validate_low( /*==================*/ @@ -3289,14 +3333,14 @@ page_zip_validate_low( /* page_zip_decompress() expects the uncompressed page to be UNIV_PAGE_SIZE aligned. */ - temp_page_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE)); + temp_page_buf = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE)); temp_page = static_cast<byte*>(ut_align(temp_page_buf, UNIV_PAGE_SIZE)); UNIV_MEM_ASSERT_RW(page, UNIV_PAGE_SIZE); UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); temp_page_zip = *page_zip; - valid = page_zip_decompress(&temp_page_zip, temp_page, TRUE); + valid = page_zip_decompress_low(&temp_page_zip, temp_page, TRUE); if (!valid) { fputs("page_zip_validate(): failed to decompress\n", stderr); goto func_exit; @@ -3354,15 +3398,23 @@ page_zip_validate_low( /* Only the minimum record flag differed. Let us ignore it. */ - page_zip_fail(("page_zip_validate: " - "min_rec_flag " - "(%s" - "%lu,%lu,0x%02lx)\n", + page_zip_fail(("page_zip_validate:" + " min_rec_flag" + " (%s%lu,%lu,0x%02lx)\n", sloppy ? "ignored, " : "", page_get_space_id(page), page_get_page_no(page), (ulong) page[offset])); - valid = sloppy; + /* We don't check for spatial index, since + the "minimum record" could be deleted when + doing rtr_update_mbr_field. + GIS_FIXME: need to validate why + rtr_update_mbr_field.() could affect this */ + if (index && dict_index_is_spatial(index)) { + valid = true; + } else { + valid = sloppy; + } goto func_exit; } } @@ -3373,8 +3425,8 @@ page_zip_validate_low( while (rec || trec) { if (page_offset(rec) != page_offset(trec)) { - page_zip_fail(("page_zip_validate: " - "PAGE_FREE list: %u!=%u\n", + page_zip_fail(("page_zip_validate:" + " PAGE_FREE list: %u!=%u\n", (unsigned) page_offset(rec), (unsigned) page_offset(trec))); valid = FALSE; @@ -3395,8 +3447,8 @@ page_zip_validate_low( do { if (page_offset(rec) != page_offset(trec)) { - page_zip_fail(("page_zip_validate: " - "record list: 0x%02x!=0x%02x\n", + page_zip_fail(("page_zip_validate:" + " record list: 0x%02x!=0x%02x\n", (unsigned) page_offset(rec), (unsigned) page_offset(trec))); valid = FALSE; @@ -3413,8 +3465,8 @@ page_zip_validate_low( trec - rec_offs_extra_size(offsets), rec_offs_size(offsets))) { page_zip_fail( - ("page_zip_validate: " - "record content: 0x%02x", + ("page_zip_validate:" + " record content: 0x%02x", (unsigned) page_offset(rec))); valid = FALSE; break; @@ -3443,8 +3495,7 @@ func_exit: /**********************************************************************//** Check that the compressed and decompressed pages match. -@return TRUE if valid, FALSE if not */ -UNIV_INTERN +@return TRUE if valid, FALSE if not */ ibool page_zip_validate( /*==============*/ @@ -3460,7 +3511,7 @@ page_zip_validate( #ifdef UNIV_DEBUG /**********************************************************************//** Assert that the compressed and decompressed page headers match. -@return TRUE */ +@return TRUE */ static ibool page_zip_header_cmp( @@ -3482,7 +3533,7 @@ page_zip_header_cmp( /**********************************************************************//** Write a record on the compressed page that contains externally stored columns. The data must already have been written to the uncompressed page. -@return end of modification log */ +@return end of modification log */ static byte* page_zip_write_rec_ext( @@ -3604,7 +3655,6 @@ page_zip_write_rec_ext( /**********************************************************************//** Write an entire record on the compressed page. The data must already have been written to the uncompressed page. */ -UNIV_INTERN void page_zip_write_rec( /*===============*/ @@ -3789,8 +3839,7 @@ page_zip_write_rec( /***********************************************************//** Parses a log record of writing a BLOB pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_zip_parse_write_blob_ptr( /*==========================*/ @@ -3802,6 +3851,8 @@ page_zip_parse_write_blob_ptr( ulint offset; ulint z_offset; + ut_ad(ptr != NULL); + ut_ad(end_ptr != NULL); ut_ad(!page == !page_zip); if (UNIV_UNLIKELY @@ -3813,9 +3864,9 @@ page_zip_parse_write_blob_ptr( offset = mach_read_from_2(ptr); z_offset = mach_read_from_2(ptr + 2); - if (UNIV_UNLIKELY(offset < PAGE_ZIP_START) - || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) { + if (offset < PAGE_ZIP_START + || offset >= UNIV_PAGE_SIZE + || z_offset >= UNIV_PAGE_SIZE) { corrupt: recv_sys->found_corrupt_log = TRUE; @@ -3823,8 +3874,8 @@ corrupt: } if (page) { - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(!page_is_leaf(page))) { + + if (!page_zip || !page_is_leaf(page)) { goto corrupt; } @@ -3849,7 +3900,6 @@ corrupt: /**********************************************************************//** Write a BLOB pointer of a record on the leaf page of a clustered index. The information must already have been updated on the uncompressed page. */ -UNIV_INTERN void page_zip_write_blob_ptr( /*====================*/ @@ -3868,6 +3918,10 @@ page_zip_write_blob_ptr( ulint blob_no; ulint len; + ut_ad(page_zip != NULL); + ut_ad(rec != NULL); + ut_ad(index != NULL); + ut_ad(offsets != NULL); ut_ad(PAGE_ZIP_MATCH(rec, page_zip)); ut_ad(page_simple_validate_new((page_t*) page)); ut_ad(page_zip_simple_validate(page_zip)); @@ -3895,8 +3949,7 @@ page_zip_write_blob_ptr( externs = page_zip->data + page_zip_get_size(page_zip) - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE; field = rec_get_nth_field(rec, offsets, n, &len); @@ -3932,8 +3985,7 @@ page_zip_write_blob_ptr( /***********************************************************//** Parses a log record of writing the node pointer of a record. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_zip_parse_write_node_ptr( /*==========================*/ @@ -3945,6 +3997,8 @@ page_zip_parse_write_node_ptr( ulint offset; ulint z_offset; + ut_ad(ptr != NULL); + ut_ad(end_ptr!= NULL); ut_ad(!page == !page_zip); if (UNIV_UNLIKELY(end_ptr < ptr + (2 + 2 + REC_NODE_PTR_SIZE))) { @@ -3955,9 +4009,9 @@ page_zip_parse_write_node_ptr( offset = mach_read_from_2(ptr); z_offset = mach_read_from_2(ptr + 2); - if (UNIV_UNLIKELY(offset < PAGE_ZIP_START) - || UNIV_UNLIKELY(offset >= UNIV_PAGE_SIZE) - || UNIV_UNLIKELY(z_offset >= UNIV_PAGE_SIZE)) { + if (offset < PAGE_ZIP_START + || offset >= UNIV_PAGE_SIZE + || z_offset >= UNIV_PAGE_SIZE) { corrupt: recv_sys->found_corrupt_log = TRUE; @@ -3970,8 +4024,7 @@ corrupt: byte* storage; ulint heap_no; - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(page_is_leaf(page))) { + if (!page_zip || page_is_leaf(page)) { goto corrupt; } @@ -4007,7 +4060,6 @@ corrupt: /**********************************************************************//** Write the node pointer of a record on a non-leaf compressed page. */ -UNIV_INTERN void page_zip_write_node_ptr( /*====================*/ @@ -4074,7 +4126,6 @@ page_zip_write_node_ptr( /**********************************************************************//** Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ -UNIV_INTERN void page_zip_write_trx_id_and_roll_ptr( /*===============================*/ @@ -4176,7 +4227,7 @@ page_zip_clear_rec( there is an array of node_ptr immediately before the dense page directory, at the very end of the page. */ storage = page_zip_dir_start(page_zip); - ut_ad(dict_index_get_n_unique_in_tree(index) == + ut_ad(dict_index_get_n_unique_in_tree_nonleaf(index) == rec_offs_n_fields(offsets) - 1); field = rec_get_nth_field(rec, offsets, rec_offs_n_fields(offsets) - 1, @@ -4233,7 +4284,6 @@ page_zip_clear_rec( /**********************************************************************//** Write the "deleted" flag of a record on a compressed page. The flag must already have been written on the uncompressed page. */ -UNIV_INTERN void page_zip_rec_set_deleted( /*=====================*/ @@ -4257,7 +4307,6 @@ page_zip_rec_set_deleted( /**********************************************************************//** Write the "owned" flag of a record on a compressed page. The n_owned field must already have been written on the uncompressed page. */ -UNIV_INTERN void page_zip_rec_set_owned( /*===================*/ @@ -4277,7 +4326,6 @@ page_zip_rec_set_owned( /**********************************************************************//** Insert a record to the dense page directory. */ -UNIV_INTERN void page_zip_dir_insert( /*================*/ @@ -4356,7 +4404,6 @@ page_zip_dir_insert( /**********************************************************************//** Shift the dense page directory and the array of BLOB pointers when a record is deleted. */ -UNIV_INTERN void page_zip_dir_delete( /*================*/ @@ -4429,8 +4476,7 @@ page_zip_dir_delete( externs = page_zip->data + page_zip_get_size(page_zip) - (page_dir_get_n_heap(page) - PAGE_HEAP_NO_USER_LOW) - * (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); + * PAGE_ZIP_CLUST_LEAF_SLOT_SIZE; ext_end = externs - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; @@ -4454,7 +4500,6 @@ skip_blobs: /**********************************************************************//** Add a slot to the dense page directory. */ -UNIV_INTERN void page_zip_dir_add_slot( /*==================*/ @@ -4488,19 +4533,15 @@ page_zip_dir_add_slot( * (DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); externs = stored - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; - ASSERT_ZERO(externs - - (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), - PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN); - memmove(externs - (PAGE_ZIP_DIR_SLOT_SIZE - + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN), + ASSERT_ZERO(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE, + PAGE_ZIP_CLUST_LEAF_SLOT_SIZE); + memmove(externs - PAGE_ZIP_CLUST_LEAF_SLOT_SIZE, externs, stored - externs); } else { stored = dir - page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE; ASSERT_ZERO(stored - PAGE_ZIP_DIR_SLOT_SIZE, - PAGE_ZIP_DIR_SLOT_SIZE); + static_cast<size_t>(PAGE_ZIP_DIR_SLOT_SIZE)); } /* Move the uncompressed area backwards to make space @@ -4510,8 +4551,7 @@ page_zip_dir_add_slot( /***********************************************************//** Parses a log record of writing to the header of a page. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_zip_parse_write_header( /*========================*/ @@ -4523,7 +4563,8 @@ page_zip_parse_write_header( ulint offset; ulint len; - ut_ad(ptr && end_ptr); + ut_ad(ptr != NULL); + ut_ad(end_ptr!= NULL); ut_ad(!page == !page_zip); if (UNIV_UNLIKELY(end_ptr < ptr + (1 + 1))) { @@ -4534,20 +4575,20 @@ page_zip_parse_write_header( offset = (ulint) *ptr++; len = (ulint) *ptr++; - if (UNIV_UNLIKELY(!len) || UNIV_UNLIKELY(offset + len >= PAGE_DATA)) { + if (len == 0 || offset + len >= PAGE_DATA) { corrupt: recv_sys->found_corrupt_log = TRUE; return(NULL); } - if (UNIV_UNLIKELY(end_ptr < ptr + len)) { + if (end_ptr < ptr + len) { return(NULL); } if (page) { - if (UNIV_UNLIKELY(!page_zip)) { + if (!page_zip) { goto corrupt; } @@ -4569,7 +4610,6 @@ corrupt: #ifndef UNIV_HOTBACKUP /**********************************************************************//** Write a log record of writing to the uncompressed header portion of a page. */ -UNIV_INTERN void page_zip_write_header_log( /*======================*/ @@ -4614,7 +4654,6 @@ bits in the same mini-transaction in such a way that the modification will be redo-logged. @return TRUE on success, FALSE on failure; page_zip will be left intact on failure, but page will be overwritten. */ -UNIV_INTERN ibool page_zip_reorganize( /*================*/ @@ -4632,7 +4671,6 @@ page_zip_reorganize( page_t* page = buf_block_get_frame(block); buf_block_t* temp_block; page_t* temp_page; - ulint log_mode; ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); ut_ad(page_is_comp(page)); @@ -4642,12 +4680,11 @@ page_zip_reorganize( UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); /* Disable logging */ - log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); + mtr_log_t log_mode = mtr_set_log_mode(mtr, MTR_LOG_NONE); #ifndef UNIV_HOTBACKUP temp_block = buf_block_alloc(buf_pool); btr_search_drop_page_hash_index(block); - block->check_index_page_at_flush = TRUE; #else /* !UNIV_HOTBACKUP */ ut_ad(block == back_block1); temp_block = back_block2; @@ -4657,12 +4694,10 @@ page_zip_reorganize( /* Copy the old page to temporary space */ buf_frame_copy(temp_page, page); - btr_blob_dbg_remove(page, index, "zip_reorg"); - /* Recreate the page: note that global data on page (possible segment headers, next page-field, etc.) is preserved intact */ - page_create(block, mtr, TRUE); + page_create(block, mtr, TRUE, dict_index_is_spatial(index)); /* Copy the records from the temporary space to the recreated page; do not copy the lock bits yet */ @@ -4671,7 +4706,14 @@ page_zip_reorganize( page_get_infimum_rec(temp_page), index, mtr); - if (!dict_index_is_clust(index) && page_is_leaf(temp_page)) { + /* Temp-Tables are not shared across connection and so we avoid + locking of temp-tables as there would be no 2 trx trying to + operate on same temp-table in parallel. + max_trx_id is use to track which all trxs wrote to the page + in parallel but in case of temp-table this can is not needed. */ + if (!dict_index_is_clust(index) + && !dict_table_is_temporary(index->table) + && page_is_leaf(temp_page)) { /* Copy max trx id to recreated page */ trx_id_t max_trx_id = page_get_max_trx_id(temp_page); page_set_max_trx_id(block, NULL, max_trx_id, NULL); @@ -4681,7 +4723,8 @@ page_zip_reorganize( /* Restore logging. */ mtr_set_log_mode(mtr, log_mode); - if (!page_zip_compress(page_zip, page, index, page_zip_level, mtr)) { + if (!page_zip_compress(page_zip, page, index, + page_zip_level, NULL, mtr)) { #ifndef UNIV_HOTBACKUP buf_block_free(temp_block); @@ -4703,7 +4746,6 @@ Copy the records of a page byte for byte. Do not copy the page header or trailer, except those B-tree header fields that are directly related to the storage of records. Also copy PAGE_MAX_TRX_ID. NOTE: The caller must update the lock table and the adaptive hash index. */ -UNIV_INTERN void page_zip_copy_recs( /*===============*/ @@ -4716,8 +4758,10 @@ page_zip_copy_recs( dict_index_t* index, /*!< in: index of the B-tree */ mtr_t* mtr) /*!< in: mini-transaction */ { - ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX)); - ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX)); + ut_ad(mtr_memo_contains_page(mtr, page, MTR_MEMO_PAGE_X_FIX) + || dict_table_is_intrinsic(index->table)); + ut_ad(mtr_memo_contains_page(mtr, src, MTR_MEMO_PAGE_X_FIX) + || dict_table_is_intrinsic(index->table)); ut_ad(!dict_index_is_ibuf(index)); #ifdef UNIV_ZIP_DEBUG /* The B-tree operations that call this function may set @@ -4734,7 +4778,9 @@ page_zip_copy_recs( /* The PAGE_MAX_TRX_ID must be set on leaf pages of secondary indexes. It does not matter on other pages. */ - ut_a(dict_index_is_clust(index) || !page_is_leaf(src) + ut_a(dict_index_is_clust(index) + || dict_table_is_temporary(index->table) + || !page_is_leaf(src) || page_get_max_trx_id(src)); UNIV_MEM_ASSERT_W(page, UNIV_PAGE_SIZE); @@ -4786,16 +4832,13 @@ page_zip_copy_recs( #ifdef UNIV_ZIP_DEBUG ut_a(page_zip_validate(page_zip, page, index)); #endif /* UNIV_ZIP_DEBUG */ - btr_blob_dbg_add(page, index, "page_zip_copy_recs"); - page_zip_compress_write_log(page_zip, page, index, mtr); } #endif /* !UNIV_HOTBACKUP */ /**********************************************************************//** Parses a log record of compressing an index page. -@return end of log record or NULL */ -UNIV_INTERN +@return end of log record or NULL */ byte* page_zip_parse_compress( /*====================*/ @@ -4808,7 +4851,7 @@ page_zip_parse_compress( ulint trailer_size; ut_ad(ptr != NULL); - ut_ad(end_ptr != NULL); + ut_ad(end_ptr!= NULL); ut_ad(!page == !page_zip); if (UNIV_UNLIKELY(ptr + (2 + 2) > end_ptr)) { @@ -4827,8 +4870,7 @@ page_zip_parse_compress( } if (page) { - if (UNIV_UNLIKELY(!page_zip) - || UNIV_UNLIKELY(page_zip_get_size(page_zip) < size)) { + if (!page_zip || page_zip_get_size(page_zip) < size) { corrupt: recv_sys->found_corrupt_log = TRUE; @@ -4855,19 +4897,22 @@ corrupt: } #endif /* !UNIV_INNOCHECKSUM */ -/**********************************************************************//** -Calculate the compressed page checksum. -@return page checksum */ -UNIV_INTERN -ulint +/** Calculate the compressed page checksum. +@param[in] data compressed page +@param[in] size size of compressed page +@param[in] algo algorithm to use +@param[in] use_legacy_big_endian only used if algo is +SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true +then use big endian byteorder when converting byte strings to integers. +@return page checksum */ +uint32_t page_zip_calc_checksum( -/*===================*/ - const void* data, /*!< in: compressed page */ - ulint size, /*!< in: size of compressed page */ - srv_checksum_algorithm_t algo) /*!< in: algorithm to use */ + const void* data, + ulint size, + srv_checksum_algorithm_t algo, + bool use_legacy_big_endian /* = false */) { - uLong adler; - ib_uint32_t crc32; + uint32_t adler; const Bytef* s = static_cast<const byte*>(data); /* Exclude FIL_PAGE_SPACE_OR_CHKSUM, FIL_PAGE_LSN, @@ -4876,16 +4921,25 @@ page_zip_calc_checksum( switch (algo) { case SRV_CHECKSUM_ALGORITHM_CRC32: case SRV_CHECKSUM_ALGORITHM_STRICT_CRC32: - - ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - crc32 = ut_crc32(s + FIL_PAGE_OFFSET, - FIL_PAGE_LSN - FIL_PAGE_OFFSET) - ^ ut_crc32(s + FIL_PAGE_TYPE, 2) - ^ ut_crc32(s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, - size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - - return((ulint) crc32); + { + ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + + ut_crc32_func_t crc32_func = use_legacy_big_endian + ? ut_crc32_legacy_big_endian + : ut_crc32; + + const uint32_t crc32 + = crc32_func( + s + FIL_PAGE_OFFSET, + FIL_PAGE_LSN - FIL_PAGE_OFFSET) + ^ crc32_func( + s + FIL_PAGE_TYPE, 2) + ^ crc32_func( + s + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, + size - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); + + return(crc32); + } case SRV_CHECKSUM_ALGORITHM_INNODB: case SRV_CHECKSUM_ALGORITHM_STRICT_INNODB: ut_ad(size > FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); @@ -4898,7 +4952,7 @@ page_zip_calc_checksum( static_cast<uInt>(size) - FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID); - return((ulint) adler); + return(adler); case SRV_CHECKSUM_ALGORITHM_NONE: case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: return(BUF_NO_CHECKSUM_MAGIC); @@ -4912,54 +4966,66 @@ page_zip_calc_checksum( /**********************************************************************//** Verify a compressed page's checksum. -@return TRUE if the stored checksum is valid according to the value of +@return TRUE if the stored checksum is valid according to the value of innodb_checksum_algorithm */ -UNIV_INTERN ibool page_zip_verify_checksum( /*=====================*/ - const void* data, /*!< in: compressed page */ - ulint size) /*!< in: size of compressed page */ + const void* data, /*!< in: compressed page */ + ulint size /*!< in: size of compressed page */ +#ifdef UNIV_INNOCHECKSUM + /* these variables are used only for innochecksum tool. */ + ,uintmax_t page_no, /*!< in: page number of + given read_buf */ + bool strict_check, /*!< in: true if strict-check + option is enable */ + bool is_log_enabled, /*!< in: true if log option is + enabled */ + FILE* log_file /*!< in: file pointer to + log_file */ +#endif /* UNIV_INNOCHECKSUM */ +) { - ib_uint32_t stored; - ib_uint32_t calc; - ib_uint32_t crc32 = 0 /* silence bogus warning */; - ib_uint32_t innodb = 0 /* silence bogus warning */; - - stored = static_cast<ib_uint32_t>(mach_read_from_4( - static_cast<const unsigned char*>(data) + FIL_PAGE_SPACE_OR_CHKSUM)); + const unsigned char* p = static_cast<const unsigned char*>(data) + + FIL_PAGE_SPACE_OR_CHKSUM; - ulint page_no MY_ATTRIBUTE((unused)) = - mach_read_from_4(static_cast<const unsigned char*> - (data) + FIL_PAGE_OFFSET); - ulint space_id MY_ATTRIBUTE((unused)) = - mach_read_from_4(static_cast<const unsigned char*> - (data) + FIL_PAGE_SPACE_ID); + const uint32_t stored = static_cast<uint32_t>( + mach_read_from_4(p)); #if FIL_PAGE_LSN % 8 #error "FIL_PAGE_LSN must be 64 bit aligned" #endif -#ifndef UNIV_INNOCHECKSUM - /* innochecksum doesn't compile with ut_d. Since we don't - need to check for empty pages when running innochecksum, - just don't include this code. */ /* Check if page is empty */ if (stored == 0 && *reinterpret_cast<const ib_uint64_t*>(static_cast<const char*>( data) + FIL_PAGE_LSN) == 0) { /* make sure that the page is really empty */ +#ifdef UNIV_INNOCHECKSUM ulint i; for (i = 0; i < size; i++) { + if (*((const char*) data + i) != 0) + break; + } + if (i >= size) { + if (is_log_enabled) { + fprintf(log_file, "Page::%lu is empty and" + " uncorrupted\n", page_no); + } + + return(TRUE); + } +#else + for (ulint i = 0; i < size; i++) { if (*((const char*) data + i) != 0) { return(FALSE); } } /* Empty page */ return(TRUE); +#endif /* UNIV_INNOCHECKSUM */ } -#endif const srv_checksum_algorithm_t curr_algo = static_cast<srv_checksum_algorithm_t>(srv_checksum_algorithm); @@ -4968,9 +5034,44 @@ page_zip_verify_checksum( return(TRUE); } - calc = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, curr_algo)); - +#ifndef UNIV_INNOCHECKSUM + ulint page_no = mach_read_from_4(static_cast< + const unsigned char*> + (data) + FIL_PAGE_OFFSET); + ulint space_id = mach_read_from_4(static_cast< + const unsigned char*> + (data) + FIL_PAGE_SPACE_ID); + const page_id_t page_id(space_id, page_no); +#endif /* UNIV_INNOCHECKSUM */ + + const uint32_t calc = page_zip_calc_checksum(data, size, curr_algo); + +#ifdef UNIV_INNOCHECKSUM + if (is_log_enabled) { + fprintf(log_file, "page::%lu;" + " %s checksum: calculated = %u;" + " recorded = %u\n", page_no, + buf_checksum_algorithm_name( + static_cast<srv_checksum_algorithm_t>( + srv_checksum_algorithm)), + calc, stored); + } + + if (!strict_check) { + + const uint32_t crc32 = page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_CRC32); + + if (is_log_enabled) { + fprintf(log_file, "page::%lu: crc32 checksum:" + " calculated = %u; recorded = %u\n", + page_no, crc32, stored); + fprintf(log_file, "page::%lu: none checksum:" + " calculated = %lu; recorded = %u\n", + page_no, BUF_NO_CHECKSUM_MAGIC, stored); + } + } +#endif /* UNIV_INNOCHECKSUM */ if (stored == calc) { return(TRUE); } @@ -4980,28 +5081,36 @@ page_zip_verify_checksum( case SRV_CHECKSUM_ALGORITHM_CRC32: if (stored == BUF_NO_CHECKSUM_MAGIC) { +#ifndef UNIV_INNOCHECKSUM if (curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { page_warn_strict_checksum( curr_algo, SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); + page_id); } +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } - innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_INNODB)); + if (stored == page_zip_calc_checksum(data, size, curr_algo, + true)) { + return(TRUE); + } - if (stored == innodb) { + if (stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_INNODB)) { + +#ifndef UNIV_INNOCHECKSUM if (curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_CRC32) { page_warn_strict_checksum( curr_algo, SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); + page_id); } +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } @@ -5011,54 +5120,60 @@ page_zip_verify_checksum( case SRV_CHECKSUM_ALGORITHM_INNODB: if (stored == BUF_NO_CHECKSUM_MAGIC) { +#ifndef UNIV_INNOCHECKSUM if (curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { page_warn_strict_checksum( curr_algo, SRV_CHECKSUM_ALGORITHM_NONE, - space_id, page_no); + page_id); } +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } - crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_CRC32)); - - if (stored == crc32) { + if (stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_CRC32) + || stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true)) { +#ifndef UNIV_INNOCHECKSUM if (curr_algo == SRV_CHECKSUM_ALGORITHM_STRICT_INNODB) { page_warn_strict_checksum( curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); + page_id); } - +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } break; case SRV_CHECKSUM_ALGORITHM_STRICT_NONE: - crc32 = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_CRC32)); - - if (stored == crc32) { + if (stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_CRC32) + || stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_CRC32, true)) { +#ifndef UNIV_INNOCHECKSUM page_warn_strict_checksum( - curr_algo, SRV_CHECKSUM_ALGORITHM_CRC32, - space_id, page_no); - + curr_algo, + SRV_CHECKSUM_ALGORITHM_CRC32, + page_id); +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } - innodb = static_cast<ib_uint32_t>(page_zip_calc_checksum( - data, size, SRV_CHECKSUM_ALGORITHM_INNODB)); + if (stored == page_zip_calc_checksum( + data, size, SRV_CHECKSUM_ALGORITHM_INNODB)) { - if (stored == innodb) { +#ifndef UNIV_INNOCHECKSUM page_warn_strict_checksum( curr_algo, SRV_CHECKSUM_ALGORITHM_INNODB, - space_id, page_no); + page_id); +#endif /* UNIV_INNOCHECKSUM */ return(TRUE); } |