diff options
author | Marko Mäkelä <marko.makela@oracle.com> | 2011-10-26 12:23:57 +0300 |
---|---|---|
committer | Marko Mäkelä <marko.makela@oracle.com> | 2011-10-26 12:23:57 +0300 |
commit | 2c67d5066db7d6aada4d93297918a8ff3e57aa33 (patch) | |
tree | 0b4f76505019be0313750af509a14e04cc031e00 /storage | |
parent | 91b5e9352a82b096197aa9f24f149cf3bf892b8a (diff) | |
download | mariadb-git-2c67d5066db7d6aada4d93297918a8ff3e57aa33.tar.gz |
Revert revno:3452.71.32 (Bug#12612184 fix).
Bug#12612184 RACE CONDITION AFTER BTR_CUR_PESSIMISTIC_UPDATE()
The fix introduced potentially more severe crash recovery problems
than the bug causes. Revert the fix for now.
Diffstat (limited to 'storage')
25 files changed, 192 insertions, 597 deletions
diff --git a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c index e8e065a3116..5079757272a 100644 --- a/storage/innobase/btr/btr0btr.c +++ b/storage/innobase/btr/btr0btr.c @@ -1948,7 +1948,7 @@ btr_node_ptr_delete( ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor, FALSE, mtr); + btr_cur_compress_if_useful(&cursor, mtr); } } @@ -1956,10 +1956,9 @@ btr_node_ptr_delete( If page is the only on its level, this function moves its records to the father page, thus reducing the tree height. */ static -page_t* +void btr_lift_page_up( /*=============*/ - /* out: father page */ dict_index_t* index, /* in: index tree */ page_t* page, /* in: page which is the only on its level; must not be empty: use @@ -2035,8 +2034,6 @@ btr_lift_page_up( ibuf_reset_free_bits(index, father_page); ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_page, mtr)); - - return(father_page); } /***************************************************************** @@ -2053,13 +2050,11 @@ enough free extents so that the compression will always succeed if done! */ void btr_compress( /*=========*/ - btr_cur_t* cursor, /* in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr) /* in: mtr */ { dict_index_t* index; ulint space; @@ -2074,7 +2069,6 @@ btr_compress( rec_t* node_ptr; ulint data_size; ulint n_recs; - ulint nth_rec = 0; /* remove bogus warning */ ulint max_ins_size; ulint max_ins_size_reorg; ulint comp; @@ -2082,7 +2076,6 @@ btr_compress( page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); comp = page_is_comp(page); - ut_a((ibool)!!comp == dict_table_is_comp(index->table)); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), @@ -2104,10 +2097,6 @@ btr_compress( father_page = buf_frame_align(node_ptr); ut_a(comp == page_is_comp(father_page)); - if (adjust) { - nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - } - /* Decide the page to which we try to merge and which will inherit the locks */ @@ -2132,8 +2121,9 @@ btr_compress( } else { /* The page is the only one on the level, lift the records to the father */ - merge_page = btr_lift_page_up(index, page, mtr); - goto func_exit; + btr_lift_page_up(index, page, mtr); + + return; } n_recs = page_get_n_recs(page); @@ -2209,10 +2199,6 @@ btr_compress( index, mtr); lock_update_merge_left(merge_page, orig_pred, page); - - if (adjust) { - nth_rec += page_rec_get_n_recs_before(orig_pred); - } } else { orig_succ = page_rec_get_next( page_get_infimum_rec(merge_page)); @@ -2233,12 +2219,6 @@ btr_compress( btr_page_free(index, page, mtr); ut_ad(btr_check_node_ptr(index, merge_page, mtr)); - -func_exit: - if (adjust) { - btr_cur_position(index, page_rec_get_nth(merge_page, nth_rec), - cursor); - } } /***************************************************************** diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 95d87344e93..3c12e28feb6 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -1790,9 +1790,7 @@ btr_cur_pessimistic_update( /* out: DB_SUCCESS or error code */ ulint flags, /* in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /* in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /* in: cursor on the record to update */ big_rec_t** big_rec,/* out: big rec vector whose fields have to be stored externally by the caller, or NULL */ upd_t* update, /* in: update vector; this is allowed also @@ -1927,10 +1925,6 @@ btr_cur_pessimistic_update( err = DB_TOO_BIG_RECORD; goto return_after_reservations; } - - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(btr_page_get_level(page, mtr) == 0); - ut_ad(flags & BTR_KEEP_POS_FLAG); } page_cursor = btr_cur_get_page_cur(cursor); @@ -1957,8 +1951,6 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { - page_cursor->rec = rec; - lock_rec_restore_from_page_infimum(rec, page); rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); @@ -1972,30 +1964,12 @@ btr_cur_pessimistic_update( btr_cur_unmark_extern_fields(rec, mtr, offsets); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + btr_cur_compress_if_useful(cursor, mtr); err = DB_SUCCESS; goto return_after_reservations; } - if (big_rec_vec) { - ut_ad(index->type & DICT_CLUSTERED); - ut_ad(btr_page_get_level(page, mtr) == 0); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - if (page_cur_is_before_first(page_cursor)) { /* The record to be updated was positioned as the first user record on its page */ @@ -2016,7 +1990,6 @@ btr_cur_pessimistic_update( ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - page_cursor->rec = rec; rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2051,43 +2024,6 @@ return_after_reservations: return(err); } -/***************************************************************** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ - -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr) /* in/out: mini-transaction */ -{ - buf_block_t* block; - - block = buf_block_align(btr_cur_get_rec(cursor)); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* Keep the locks across the mtr_commit(mtr). */ - rw_lock_x_lock(dict_index_get_lock(cursor->index)); - rw_lock_x_lock(&block->lock); - mutex_enter(&block->mutex); -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__); -#else - buf_block_buf_fix_inc(block); -#endif - mutex_exit(&block->mutex); - /* Write out the redo log. */ - mtr_commit(mtr); - mtr_start(mtr); - /* Reassociate the locks with the mini-transaction. - They will be released on mtr_commit(mtr). */ - mtr_memo_push(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); -} - /*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /******************************************************************** @@ -2456,6 +2392,30 @@ btr_cur_del_unmark_for_ibuf( /*==================== B-TREE RECORD REMOVE =========================*/ /***************************************************************** +Tries to compress a page of the tree on the leaf level. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. To avoid +deadlocks, mtr must also own x-latches to brothers of page, if those +brothers exist. NOTE: it is assumed that the caller has reserved enough +free extents so that the compression will always succeed if done! */ + +void +btr_cur_compress( +/*=============*/ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid */ + mtr_t* mtr) /* in: mtr */ +{ + ut_ad(mtr_memo_contains(mtr, + dict_index_get_lock(btr_cur_get_index(cursor)), + MTR_MEMO_X_LOCK)); + ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), + MTR_MEMO_PAGE_X_FIX)); + ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0); + + btr_compress(cursor, mtr); +} + +/***************************************************************** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those @@ -2466,12 +2426,10 @@ ibool btr_cur_compress_if_useful( /*=======================*/ /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr) /* in: mtr */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2481,7 +2439,7 @@ btr_cur_compress_if_useful( if (btr_cur_compress_recommendation(cursor, mtr)) { - btr_compress(cursor, adjust, mtr); + btr_compress(cursor, mtr); return(TRUE); } @@ -2694,7 +2652,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); + ret = btr_cur_compress_if_useful(cursor, mtr); } if (n_extents > 0) { diff --git a/storage/innobase/include/btr0btr.h b/storage/innobase/include/btr0btr.h index 269fa355558..1573de7e818 100644 --- a/storage/innobase/include/btr0btr.h +++ b/storage/innobase/include/btr0btr.h @@ -312,13 +312,11 @@ enough free extents so that the compression will always succeed if done! */ void btr_compress( /*=========*/ - btr_cur_t* cursor, /* in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr); /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /* in: mtr */ /***************************************************************** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot diff --git a/storage/innobase/include/btr0cur.h b/storage/innobase/include/btr0cur.h index c068d8d3318..20235c55f22 100644 --- a/storage/innobase/include/btr0cur.h +++ b/storage/innobase/include/btr0cur.h @@ -23,9 +23,6 @@ Created 10/16/1994 Heikki Tuuri #define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ #define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the update vector or inserted entry */ -#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update() - must keep cursor position when - moving columns to big_rec */ #define BTR_CUR_ADAPT #define BTR_CUR_HASH_ADAPT @@ -240,9 +237,7 @@ btr_cur_pessimistic_update( /* out: DB_SUCCESS or error code */ ulint flags, /* in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /* in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /* in: cursor on the record to update */ big_rec_t** big_rec,/* out: big rec vector whose fields have to be stored externally by the caller, or NULL */ upd_t* update, /* in: update vector; this is allowed also @@ -252,15 +247,6 @@ btr_cur_pessimistic_update( updates */ que_thr_t* thr, /* in: query thread */ mtr_t* mtr); /* in: mtr */ -/***************************************************************** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ - -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr); /* in/out: mini-transaction */ /*************************************************************** Marks a clustered index record deleted. Writes an undo log record to undo log on this delete marking. Writes in the trx id field the id @@ -300,6 +286,19 @@ btr_cur_del_unmark_for_ibuf( rec_t* rec, /* in: record to delete unmark */ mtr_t* mtr); /* in: mtr */ /***************************************************************** +Tries to compress a page of the tree on the leaf level. It is assumed +that mtr holds an x-latch on the tree and on the cursor page. To avoid +deadlocks, mtr must also own x-latches to brothers of page, if those +brothers exist. NOTE: it is assumed that the caller has reserved enough +free extents so that the compression will always succeed if done! */ + +void +btr_cur_compress( +/*=============*/ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid */ + mtr_t* mtr); /* in: mtr */ +/***************************************************************** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid deadlocks, mtr must also own x-latches to brothers of page, if those @@ -310,12 +309,10 @@ ibool btr_cur_compress_if_useful( /*=======================*/ /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /* in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr); /* in/out: mini-transaction */ + btr_cur_t* cursor, /* in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr); /* in: mtr */ /*********************************************************** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, diff --git a/storage/innobase/include/buf0buf.h b/storage/innobase/include/buf0buf.h index 50e0aa9376c..0f7553a7043 100644 --- a/storage/innobase/include/buf0buf.h +++ b/storage/innobase/include/buf0buf.h @@ -654,25 +654,6 @@ buf_page_address_fold( /* out: the folded value */ ulint space, /* in: space id */ ulint offset);/* in: offset of the page within space */ -#ifdef UNIV_SYNC_DEBUG -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_debug( -/*========================*/ - buf_block_t* block, /* in: block to bufferfix */ - const char* file __attribute__ ((unused)), /* in: file name */ - ulint line __attribute__ ((unused))); /* in: line */ -#else /* UNIV_SYNC_DEBUG */ -/*********************************************************************** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc( -/*==================*/ - buf_block_t* block); /* in: block to bufferfix */ -#endif /* UNIV_SYNC_DEBUG */ /********************************************************************** Returns the control block of a file page, NULL if not found. */ UNIV_INLINE diff --git a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h index 24698557e77..273007c2778 100644 --- a/storage/innobase/include/page0page.h +++ b/storage/innobase/include/page0page.h @@ -234,22 +234,11 @@ page_get_supremum_rec( /*==================*/ /* out: the last record in record list */ page_t* page); /* in: page which must have record(s) */ -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). */ +/**************************************************************** +Returns the middle record of record list. If there are an even number +of records in the list, returns the first record of upper half-list. */ rec_t* -page_rec_get_nth( -/*=============*/ - /* out: nth record */ - page_t* page, /* in: page */ - ulint nth); /* in: nth record */ -/***************************************************************** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. */ -UNIV_INLINE -rec_t* page_get_middle_rec( /*================*/ /* out: middle record */ @@ -291,8 +280,7 @@ page_get_n_recs( page_t* page); /* in: index page */ /******************************************************************* Returns the number of records before the given record in chain. -The number includes infimum and supremum records. -This is the inverse function of page_rec_get_nth(). */ +The number includes infimum and supremum records. */ ulint page_rec_get_n_recs_before( diff --git a/storage/innobase/include/page0page.ic b/storage/innobase/include/page0page.ic index a019aa28515..d9e67f3eeeb 100644 --- a/storage/innobase/include/page0page.ic +++ b/storage/innobase/include/page0page.ic @@ -341,22 +341,6 @@ page_rec_is_infimum( } /***************************************************************** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - /* out: middle record */ - page_t* page) /* in: page */ -{ - ulint middle = (page_get_n_recs(page) + 2) / 2; - - return(page_rec_get_nth(page, middle)); -} - -/***************************************************************** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an index page, and also page infimum and supremum records can be given in diff --git a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c index 6a89df7de22..543cf9e34eb 100644 --- a/storage/innobase/page/page0page.c +++ b/storage/innobase/page/page0page.c @@ -1194,42 +1194,49 @@ page_dir_balance_slot( } /**************************************************************** -Returns the nth record of the record list. */ +Returns the middle record of the record list. If there are an even number +of records in the list, returns the first record of the upper half-list. */ rec_t* -page_rec_get_nth( -/*=============*/ - /* out: nth record */ - page_t* page, /* in: page */ - ulint nth) /* in: nth record */ +page_get_middle_rec( +/*================*/ + /* out: middle record */ + page_t* page) /* in: page */ { page_dir_slot_t* slot; + ulint middle; ulint i; ulint n_owned; + ulint count; rec_t* rec; - ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); + /* This many records we must leave behind */ + middle = (page_get_n_recs(page) + 2) / 2; + + count = 0; for (i = 0;; i++) { slot = page_dir_get_nth_slot(page, i); n_owned = page_dir_slot_get_n_owned(slot); - if (n_owned > nth) { + if (count + n_owned > middle) { break; } else { - nth -= n_owned; + count += n_owned; } } ut_ad(i > 0); slot = page_dir_get_nth_slot(page, i - 1); rec = page_dir_slot_get_rec(slot); + rec = page_rec_get_next(rec); + + /* There are now count records behind rec */ - do { + for (i = 0; i < middle - count; i++) { rec = page_rec_get_next(rec); - ut_ad(rec); - } while (nth--); + } return(rec); } diff --git a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c index c3b8f54a3c5..4f5f948f218 100644 --- a/storage/innobase/row/row0ins.c +++ b/storage/innobase/row/row0ins.c @@ -259,7 +259,6 @@ row_ins_sec_index_entry_by_modify( err = btr_cur_pessimistic_update(BTR_KEEP_SYS_FLAG, cursor, &dummy_big_rec, update, 0, thr, mtr); - ut_a(!dummy_big_rec); } func_exit: mem_heap_free(heap); @@ -330,9 +329,8 @@ row_ins_clust_index_entry_by_modify( goto func_exit; } - err = btr_cur_pessimistic_update( - BTR_KEEP_POS_FLAG, cursor, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update(0, cursor, big_rec, update, + 0, thr, mtr); } func_exit: mem_heap_free(heap); @@ -2086,41 +2084,6 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &big_rec, entry, ext_vec, n_ext_vec, thr, &mtr); - - if (big_rec) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. We have - to mtr_commit(mtr) first, so that the - redo log will be written in the - correct order. Otherwise, we would run - into trouble on crash recovery if mtr - freed B-tree pages on which some of - the big_rec fields will be written. */ - btr_cur_mtr_commit_and_start(&cursor, &mtr); - - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets(rec, index, offsets, - ULINT_UNDEFINED, - &heap); - - err = btr_store_big_rec_extern_fields( - index, rec, offsets, big_rec, &mtr); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. */ - ut_a(err == DB_SUCCESS); - goto stored_big_rec; - } } else { err = row_ins_sec_index_entry_by_modify( mode, &cursor, entry, thr, &mtr); @@ -2157,6 +2120,7 @@ function_exit: mtr_commit(&mtr); if (big_rec) { + rec_t* rec; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2167,7 +2131,7 @@ function_exit: err = btr_store_big_rec_extern_fields(index, rec, offsets, big_rec, &mtr); -stored_big_rec: + if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/storage/innobase/row/row0umod.c b/storage/innobase/row/row0umod.c index 0b00aa2411a..a3333fcc536 100644 --- a/storage/innobase/row/row0umod.c +++ b/storage/innobase/row/row0umod.c @@ -119,7 +119,6 @@ row_undo_mod_clust_low( | BTR_KEEP_SYS_FLAG, btr_cur, &dummy_big_rec, node->update, node->cmpl_info, thr, mtr); - ut_ad(!dummy_big_rec); } return(err); @@ -472,7 +471,6 @@ row_undo_mod_del_unmark_sec_and_undo_update( BTR_KEEP_SYS_FLAG | BTR_NO_LOCKING_FLAG, btr_cur, &dummy_big_rec, update, 0, thr, &mtr); - ut_ad(!dummy_big_rec); } mem_heap_free(heap); diff --git a/storage/innobase/row/row0upd.c b/storage/innobase/row/row0upd.c index 694b00ea265..0790cfe02e2 100644 --- a/storage/innobase/row/row0upd.c +++ b/storage/innobase/row/row0upd.c @@ -1580,48 +1580,32 @@ row_upd_clust_rec( ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, - &big_rec, node->update, node->cmpl_info, thr, mtr); + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, + &big_rec, node->update, + node->cmpl_info, thr, mtr); + mtr_commit(mtr); - if (big_rec) { + if (err == DB_SUCCESS && big_rec) { mem_heap_t* heap = NULL; ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_t* rec; *offsets_ = (sizeof offsets_) / sizeof *offsets_; - ut_a(err == DB_SUCCESS); - /* Write out the externally stored columns while still - x-latching index->lock and block->lock. We have to - mtr_commit(mtr) first, so that the redo log will be - written in the correct order. Otherwise, we would run - into trouble on crash recovery if mtr freed B-tree - pages on which some of the big_rec fields will be - written. */ - btr_cur_mtr_commit_and_start(btr_cur, mtr); + mtr_start(mtr); + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), - big_rec, mtr); + big_rec, mtr); if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } - /* If writing big_rec fails (for example, because of - DB_OUT_OF_FILE_SPACE), the record will be corrupted. - Even if we did not update any externally stored - columns, our update could cause the record to grow so - that a non-updated column was selected for external - storage. This non-update would not have been written - to the undo log, and thus the record cannot be rolled - back. */ - ut_a(err == DB_SUCCESS); + mtr_commit(mtr); } - mtr_commit(mtr); - if (big_rec) { dtuple_big_rec_free(big_rec); } diff --git a/storage/innodb_plugin/ChangeLog b/storage/innodb_plugin/ChangeLog index e6724eb08c2..859b9fb627a 100644 --- a/storage/innodb_plugin/ChangeLog +++ b/storage/innodb_plugin/ChangeLog @@ -101,16 +101,6 @@ * page/page0zip.c, rem/rem0rec.c: Fix Bug#61191 question about page_zip_available() -2011-06-16 The InnoDB Team - - * btr/btr0btr.c, btr/btr0cur.c, include/btr0btr.h, include/btr0cur.h, - include/btr0cur.ic, include/buf0buf.h, include/buf0buf.ic, - include/page0cur.ic, include/page0page.h, include/page0page.ic, - include/sync0rw.ic, include/sync0sync.h, page/page0cur.c, - page/page0page.c, row/row0ins.c, row/row0upd.c, - sync/sync0rw.c, sync/sync0sync.c: - Fix Bug#12612184 Race condition after btr_cur_pessimistic_update() - 2011-06-09 The InnoDB Team * btr/btr0cur.c, include/rem0rec.h, include/rem0rec.ic, * row/row0row.c, row/row0vers.c, trx/trx0rec.c: diff --git a/storage/innodb_plugin/btr/btr0btr.c b/storage/innodb_plugin/btr/btr0btr.c index cb94ef08cd6..23729c12c1a 100644 --- a/storage/innodb_plugin/btr/btr0btr.c +++ b/storage/innodb_plugin/btr/btr0btr.c @@ -3001,16 +3001,15 @@ btr_node_ptr_delete( ut_a(err == DB_SUCCESS); if (!compressed) { - btr_cur_compress_if_useful(&cursor, FALSE, mtr); + btr_cur_compress_if_useful(&cursor, mtr); } } /*************************************************************//** If page is the only on its level, this function moves its records to the -father page, thus reducing the tree height. -@return father block */ +father page, thus reducing the tree height. */ static -buf_block_t* +void btr_lift_page_up( /*=============*/ dict_index_t* index, /*!< in: index tree */ @@ -3127,8 +3126,6 @@ btr_lift_page_up( } ut_ad(page_validate(father_page, index)); ut_ad(btr_check_node_ptr(index, father_block, mtr)); - - return(father_block); } /*************************************************************//** @@ -3145,13 +3142,11 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr) /*!< in: mtr */ { dict_index_t* index; ulint space; @@ -3169,14 +3164,12 @@ btr_compress( ulint* offsets; ulint data_size; ulint n_recs; - ulint nth_rec = 0; /* remove bogus warning */ ulint max_ins_size; ulint max_ins_size_reorg; block = btr_cur_get_block(cursor); page = btr_cur_get_page(cursor); index = btr_cur_get_index(cursor); - ut_a((ibool) !!page_is_comp(page) == dict_table_is_comp(index->table)); ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(index), @@ -3197,10 +3190,6 @@ btr_compress( offsets = btr_page_get_father_block(NULL, heap, index, block, mtr, &father_cursor); - if (adjust) { - nth_rec = page_rec_get_n_recs_before(btr_cur_get_rec(cursor)); - } - /* Decide the page to which we try to merge and which will inherit the locks */ @@ -3227,9 +3216,9 @@ btr_compress( } else { /* The page is the only one on the level, lift the records to the father */ - - merge_block = btr_lift_page_up(index, block, mtr); - goto func_exit; + btr_lift_page_up(index, block, mtr); + mem_heap_free(heap); + return(TRUE); } n_recs = page_get_n_recs(page); @@ -3311,10 +3300,6 @@ err_exit: btr_node_ptr_delete(index, block, mtr); lock_update_merge_left(merge_block, orig_pred, block); - - if (adjust) { - nth_rec += page_rec_get_n_recs_before(orig_pred); - } } else { rec_t* orig_succ; #ifdef UNIV_BTR_DEBUG @@ -3379,6 +3364,7 @@ err_exit: } btr_blob_dbg_remove(page, index, "btr_compress"); + mem_heap_free(heap); if (!dict_index_is_clust(index) && page_is_leaf(merge_page)) { /* Update the free bits of the B-tree page in the @@ -3430,16 +3416,6 @@ err_exit: btr_page_free(index, block, mtr); ut_ad(btr_check_node_ptr(index, merge_block, mtr)); -func_exit: - mem_heap_free(heap); - - if (adjust) { - btr_cur_position( - index, - page_rec_get_nth(merge_block->frame, nth_rec), - merge_block, cursor); - } - return(TRUE); } diff --git a/storage/innodb_plugin/btr/btr0cur.c b/storage/innodb_plugin/btr/btr0cur.c index 4c862f061c5..9f4be053a43 100644 --- a/storage/innodb_plugin/btr/btr0cur.c +++ b/storage/innodb_plugin/btr/btr0cur.c @@ -2099,9 +2099,7 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /*!< in: cursor on the record to update */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -2240,7 +2238,7 @@ btr_cur_pessimistic_update( record to be inserted: we have to remember which fields were such */ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); + offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap); n_ext += btr_push_update_extern_fields(new_entry, update, *heap); if (UNIV_LIKELY_NULL(page_zip)) { @@ -2263,10 +2261,6 @@ make_external: err = DB_TOO_BIG_RECORD; goto return_after_reservations; } - - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); } /* Store state of explicit locks on rec on the page infimum record, @@ -2294,8 +2288,6 @@ make_external: rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr); if (rec) { - page_cursor->rec = rec; - lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), rec, block); @@ -2309,10 +2301,7 @@ make_external: rec, index, offsets, mtr); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + btr_cur_compress_if_useful(cursor, mtr); if (page_zip && !dict_index_is_clust(index) && page_is_leaf(page)) { @@ -2332,21 +2321,6 @@ make_external: } } - if (big_rec_vec) { - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - /* Was the record to be updated positioned as the first user record on its page? */ was_first = page_cur_is_before_first(page_cursor); @@ -2362,7 +2336,6 @@ make_external: ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - page_cursor->rec = rec; if (dict_index_is_sec_or_ibuf(index)) { /* Update PAGE_MAX_TRX_ID in the index page header. @@ -2421,39 +2394,6 @@ return_after_reservations: return(err); } -/**************************************************************//** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ -UNIV_INTERN -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /*!< in: cursor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ -{ - buf_block_t* block; - - block = btr_cur_get_block(cursor); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* Keep the locks across the mtr_commit(mtr). */ - rw_lock_x_lock(dict_index_get_lock(cursor->index)); - rw_lock_x_lock(&block->lock); - mutex_enter(&block->mutex); - buf_block_buf_fix_inc(block, __FILE__, __LINE__); - mutex_exit(&block->mutex); - /* Write out the redo log. */ - mtr_commit(mtr); - mtr_start(mtr); - /* Reassociate the locks with the mini-transaction. - They will be released on mtr_commit(mtr). */ - mtr_memo_push(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); -} - /*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /****************************************************************//** @@ -2829,12 +2769,10 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + btr_cur_t* cursor, /*!< in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2843,7 +2781,7 @@ btr_cur_compress_if_useful( MTR_MEMO_PAGE_X_FIX)); return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, adjust, mtr)); + && btr_compress(cursor, mtr)); } /*******************************************************//** @@ -3085,7 +3023,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); + ret = btr_cur_compress_if_useful(cursor, mtr); } if (n_extents > 0) { diff --git a/storage/innodb_plugin/include/btr0btr.h b/storage/innodb_plugin/include/btr0btr.h index c0a038dd21d..e32da9e4c86 100644 --- a/storage/innodb_plugin/include/btr0btr.h +++ b/storage/innodb_plugin/include/btr0btr.h @@ -488,14 +488,11 @@ UNIV_INTERN ibool btr_compress( /*=========*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to merge - or lift; the page must not be empty: - when deleting records, use btr_discard_page() - if the page would become empty */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + btr_cur_t* cursor, /*!< in: cursor on the page to merge or lift; + the page must not be empty: in record delete + use btr_discard_page if the page would become + empty */ + mtr_t* mtr); /*!< in: mtr */ /*************************************************************//** Discards a page from a B-tree. This is used to remove the last record from a B-tree page: the whole page must be removed at the same time. This cannot diff --git a/storage/innodb_plugin/include/btr0cur.h b/storage/innodb_plugin/include/btr0cur.h index 6094a2a6c7a..3669ce28f02 100644 --- a/storage/innodb_plugin/include/btr0cur.h +++ b/storage/innodb_plugin/include/btr0cur.h @@ -36,9 +36,6 @@ Created 10/16/1994 Heikki Tuuri #define BTR_NO_LOCKING_FLAG 2 /* do no record lock checking */ #define BTR_KEEP_SYS_FLAG 4 /* sys fields will be found from the update vector or inserted entry */ -#define BTR_KEEP_POS_FLAG 8 /* btr_cur_pessimistic_update() - must keep cursor position when - moving columns to big_rec */ #ifndef UNIV_HOTBACKUP #include "que0types.h" @@ -312,9 +309,7 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /*!< in: cursor on the record to update */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -326,16 +321,6 @@ btr_cur_pessimistic_update( que_thr_t* thr, /*!< in: query thread */ mtr_t* mtr); /*!< in: mtr; must be committed before latching any further pages */ -/***************************************************************** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ -UNIV_INTERN -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /*!< in: cursor */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); /***********************************************************//** Marks a clustered index record deleted. Writes an undo log record to undo log on this delete marking. Writes in the trx id field the id @@ -391,13 +376,10 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; + btr_cur_t* cursor, /*!< in: cursor on the page to compress; cursor does not stay valid if compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ - __attribute__((nonnull)); + mtr_t* mtr); /*!< in: mtr */ /*******************************************************//** Removes the record on which the tree cursor is positioned. It is assumed that the mtr has an x-latch on the page where the cursor is positioned, diff --git a/storage/innodb_plugin/include/btr0cur.ic b/storage/innodb_plugin/include/btr0cur.ic index c833b3e8572..cd3a5d895bb 100644 --- a/storage/innodb_plugin/include/btr0cur.ic +++ b/storage/innodb_plugin/include/btr0cur.ic @@ -139,7 +139,7 @@ btr_cur_compress_recommendation( btr_cur_t* cursor, /*!< in: btr cursor */ mtr_t* mtr) /*!< in: mtr */ { - const page_t* page; + page_t* page; ut_ad(mtr_memo_contains(mtr, btr_cur_get_block(cursor), MTR_MEMO_PAGE_X_FIX)); diff --git a/storage/innodb_plugin/include/buf0buf.h b/storage/innodb_plugin/include/buf0buf.h index e7fe3901ad4..489d1bec5b6 100644 --- a/storage/innodb_plugin/include/buf0buf.h +++ b/storage/innodb_plugin/include/buf0buf.h @@ -468,31 +468,6 @@ buf_block_get_modify_clock( #else /* !UNIV_HOTBACKUP */ # define buf_block_modify_clock_inc(block) ((void) 0) #endif /* !UNIV_HOTBACKUP */ -/*******************************************************************//** -Increments the bufferfix count. */ -UNIV_INLINE -void -buf_block_buf_fix_inc_func( -/*=======================*/ -#ifdef UNIV_SYNC_DEBUG - const char* file, /*!< in: file name */ - ulint line, /*!< in: line */ -#endif /* UNIV_SYNC_DEBUG */ - buf_block_t* block) /*!< in/out: block to bufferfix */ - __attribute__((nonnull)); -#ifdef UNIV_SYNC_DEBUG -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) -#else /* UNIV_SYNC_DEBUG */ -/** Increments the bufferfix count. -@param b in/out: block to bufferfix -@param f in: file name where requested -@param l in: line number where requested */ -# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) -#endif /* UNIV_SYNC_DEBUG */ /********************************************************************//** Calculates a page checksum which is stored to the page when it is written to a file. Note that we must be careful to calculate the same value diff --git a/storage/innodb_plugin/include/buf0buf.ic b/storage/innodb_plugin/include/buf0buf.ic index 2f3d65e80bd..0fe1dbc2da5 100644 --- a/storage/innodb_plugin/include/buf0buf.ic +++ b/storage/innodb_plugin/include/buf0buf.ic @@ -916,6 +916,19 @@ buf_block_buf_fix_inc_func( block->page.buf_fix_count++; } +#ifdef UNIV_SYNC_DEBUG +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(f,l,b) +#else /* UNIV_SYNC_DEBUG */ +/** Increments the bufferfix count. +@param b in/out: block to bufferfix +@param f in: file name where requested +@param l in: line number where requested */ +# define buf_block_buf_fix_inc(b,f,l) buf_block_buf_fix_inc_func(b) +#endif /* UNIV_SYNC_DEBUG */ /*******************************************************************//** Decrements the bufferfix count. */ diff --git a/storage/innodb_plugin/include/page0page.h b/storage/innodb_plugin/include/page0page.h index caf4cee2c57..12c4fed75d2 100644 --- a/storage/innodb_plugin/include/page0page.h +++ b/storage/innodb_plugin/include/page0page.h @@ -281,42 +281,16 @@ page_get_supremum_offset( const page_t* page); /*!< in: page which must have record(s) */ #define page_get_infimum_rec(page) ((page) + page_get_infimum_offset(page)) #define page_get_supremum_rec(page) ((page) + page_get_supremum_offset(page)) - -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ - __attribute__((nonnull, warn_unused_result)); /************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*< in: page */ - ulint nth) /*!< in: nth record */ - __attribute__((nonnull, warn_unused_result)); - -#ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. +Returns the middle record of record list. If there are an even number +of records in the list, returns the first record of upper half-list. @return middle record */ -UNIV_INLINE +UNIV_INTERN rec_t* page_get_middle_rec( /*================*/ - page_t* page) /*!< in: page */ - __attribute__((nonnull, warn_unused_result)); + page_t* page); /*!< in: page */ +#ifndef UNIV_HOTBACKUP /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an @@ -371,7 +345,6 @@ page_get_n_recs( /***************************************************************//** Returns the number of records before the given record in chain. The number includes infimum and supremum records. -This is the inverse function of page_rec_get_nth(). @return number of records */ UNIV_INTERN ulint diff --git a/storage/innodb_plugin/include/page0page.ic b/storage/innodb_plugin/include/page0page.ic index 1450b0892b3..b096a5ba321 100644 --- a/storage/innodb_plugin/include/page0page.ic +++ b/storage/innodb_plugin/include/page0page.ic @@ -420,37 +420,7 @@ page_rec_is_infimum( return(page_rec_is_infimum_low(page_offset(rec))); } -/************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ -UNIV_INLINE -rec_t* -page_rec_get_nth( -/*=============*/ - page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ -{ - return((rec_t*) page_rec_get_nth_const(page, nth)); -} - #ifndef UNIV_HOTBACKUP -/************************************************************//** -Returns the middle record of the records on the page. If there is an -even number of records in the list, returns the first record of the -upper half-list. -@return middle record */ -UNIV_INLINE -rec_t* -page_get_middle_rec( -/*================*/ - page_t* page) /*!< in: page */ -{ - ulint middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; - - return(page_rec_get_nth(page, middle)); -} - /*************************************************************//** Compares a data tuple to a physical record. Differs from the function cmp_dtuple_rec_with_match in the way that the record must reside on an diff --git a/storage/innodb_plugin/page/page0cur.c b/storage/innodb_plugin/page/page0cur.c index b8c492328e8..ab5aa257338 100644 --- a/storage/innodb_plugin/page/page0cur.c +++ b/storage/innodb_plugin/page/page0cur.c @@ -1180,15 +1180,14 @@ page_cur_insert_rec_zip_reorg( /* Before trying to reorganize the page, store the number of preceding records on the page. */ pos = page_rec_get_n_recs_before(rec); - ut_ad(pos > 0); if (page_zip_reorganize(block, index, mtr)) { /* The page was reorganized: Find rec by seeking to pos, and update *current_rec. */ - if (pos > 1) { - rec = page_rec_get_nth(page, pos - 1); - } else { - rec = page + PAGE_NEW_INFIMUM; + rec = page + PAGE_NEW_INFIMUM; + + while (--pos) { + rec = page + rec_get_next_offs(rec, TRUE); } *current_rec = rec; @@ -1284,12 +1283,6 @@ page_cur_insert_rec_zip( insert_rec = page_cur_insert_rec_zip_reorg( current_rec, block, index, insert_rec, page, page_zip, mtr); -#ifdef UNIV_DEBUG - if (insert_rec) { - rec_offs_make_valid( - insert_rec, index, offsets); - } -#endif /* UNIV_DEBUG */ } return(insert_rec); diff --git a/storage/innodb_plugin/page/page0page.c b/storage/innodb_plugin/page/page0page.c index 1b9470acbbc..93869e997b5 100644 --- a/storage/innodb_plugin/page/page0page.c +++ b/storage/innodb_plugin/page/page0page.c @@ -1475,54 +1475,55 @@ page_dir_balance_slot( } } +#ifndef UNIV_HOTBACKUP /************************************************************//** -Returns the nth record of the record list. -This is the inverse function of page_rec_get_n_recs_before(). -@return nth record */ +Returns the middle record of the record list. If there are an even number +of records in the list, returns the first record of the upper half-list. +@return middle record */ UNIV_INTERN -const rec_t* -page_rec_get_nth_const( -/*===================*/ - const page_t* page, /*!< in: page */ - ulint nth) /*!< in: nth record */ +rec_t* +page_get_middle_rec( +/*================*/ + page_t* page) /*!< in: page */ { - const page_dir_slot_t* slot; + page_dir_slot_t* slot; + ulint middle; ulint i; ulint n_owned; - const rec_t* rec; + ulint count; + rec_t* rec; - ut_ad(nth < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); + /* This many records we must leave behind */ + middle = (page_get_n_recs(page) + PAGE_HEAP_NO_USER_LOW) / 2; + + count = 0; for (i = 0;; i++) { slot = page_dir_get_nth_slot(page, i); n_owned = page_dir_slot_get_n_owned(slot); - if (n_owned > nth) { + if (count + n_owned > middle) { break; } else { - nth -= n_owned; + count += n_owned; } } ut_ad(i > 0); slot = page_dir_get_nth_slot(page, i - 1); - rec = page_dir_slot_get_rec(slot); + rec = (rec_t*) page_dir_slot_get_rec(slot); + rec = page_rec_get_next(rec); - if (page_is_comp(page)) { - do { - rec = page_rec_get_next_low(rec, TRUE); - ut_ad(rec); - } while (nth--); - } else { - do { - rec = page_rec_get_next_low(rec, FALSE); - ut_ad(rec); - } while (nth--); + /* There are now count records behind rec */ + + for (i = 0; i < middle - count; i++) { + rec = page_rec_get_next(rec); } return(rec); } +#endif /* !UNIV_HOTBACKUP */ /***************************************************************//** Returns the number of records before the given record in chain. @@ -1584,7 +1585,6 @@ page_rec_get_n_recs_before( n--; ut_ad(n >= 0); - ut_ad(n < UNIV_PAGE_SIZE / (REC_N_NEW_EXTRA_BYTES + 1)); return((ulint) n); } diff --git a/storage/innodb_plugin/row/row0ins.c b/storage/innodb_plugin/row/row0ins.c index f0f6eca627f..82c3c931bef 100644 --- a/storage/innodb_plugin/row/row0ins.c +++ b/storage/innodb_plugin/row/row0ins.c @@ -345,9 +345,9 @@ row_ins_clust_index_entry_by_modify( return(DB_LOCK_TABLE_FULL); } - err = btr_cur_pessimistic_update( - BTR_KEEP_POS_FLAG, cursor, heap, big_rec, update, - 0, thr, mtr); + err = btr_cur_pessimistic_update(0, cursor, + heap, big_rec, update, + 0, thr, mtr); } return(err); @@ -1989,7 +1989,6 @@ row_ins_index_entry_low( ulint modify = 0; /* remove warning */ rec_t* insert_rec; rec_t* rec; - ulint* offsets; ulint err; ulint n_unique; big_rec_t* big_rec = NULL; @@ -2093,42 +2092,6 @@ row_ins_index_entry_low( err = row_ins_clust_index_entry_by_modify( mode, &cursor, &heap, &big_rec, entry, thr, &mtr); - - if (big_rec) { - ut_a(err == DB_SUCCESS); - /* Write out the externally stored - columns while still x-latching - index->lock and block->lock. We have - to mtr_commit(mtr) first, so that the - redo log will be written in the - correct order. Otherwise, we would run - into trouble on crash recovery if mtr - freed B-tree pages on which some of - the big_rec fields will be written. */ - btr_cur_mtr_commit_and_start(&cursor, &mtr); - - rec = btr_cur_get_rec(&cursor); - offsets = rec_get_offsets( - rec, index, NULL, - ULINT_UNDEFINED, &heap); - - err = btr_store_big_rec_extern_fields( - index, btr_cur_get_block(&cursor), - rec, offsets, &mtr, FALSE, big_rec); - /* If writing big_rec fails (for - example, because of DB_OUT_OF_FILE_SPACE), - the record will be corrupted. Even if - we did not update any externally - stored columns, our update could cause - the record to grow so that a - non-updated column was selected for - external storage. This non-update - would not have been written to the - undo log, and thus the record cannot - be rolled back. */ - ut_a(err == DB_SUCCESS); - goto stored_big_rec; - } } else { ut_ad(!n_ext); err = row_ins_sec_index_entry_by_modify( @@ -2157,6 +2120,8 @@ function_exit: mtr_commit(&mtr); if (UNIV_LIKELY_NULL(big_rec)) { + rec_t* rec; + ulint* offsets; mtr_start(&mtr); btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE, @@ -2170,7 +2135,6 @@ function_exit: index, btr_cur_get_block(&cursor), rec, offsets, &mtr, FALSE, big_rec); -stored_big_rec: if (modify) { dtuple_big_rec_free(big_rec); } else { diff --git a/storage/innodb_plugin/row/row0upd.c b/storage/innodb_plugin/row/row0upd.c index b5952ff0a78..072ca1d7b54 100644 --- a/storage/innodb_plugin/row/row0upd.c +++ b/storage/innodb_plugin/row/row0upd.c @@ -1969,43 +1969,28 @@ row_upd_clust_rec( ut_ad(!rec_get_deleted_flag(btr_pcur_get_rec(pcur), dict_table_is_comp(index->table))); - err = btr_cur_pessimistic_update( - BTR_NO_LOCKING_FLAG | BTR_KEEP_POS_FLAG, btr_cur, - &heap, &big_rec, node->update, node->cmpl_info, thr, mtr); - if (big_rec) { + err = btr_cur_pessimistic_update(BTR_NO_LOCKING_FLAG, btr_cur, + &heap, &big_rec, node->update, + node->cmpl_info, thr, mtr); + mtr_commit(mtr); + + if (err == DB_SUCCESS && big_rec) { ulint offsets_[REC_OFFS_NORMAL_SIZE]; rec_t* rec; rec_offs_init(offsets_); - ut_a(err == DB_SUCCESS); - /* Write out the externally stored columns while still - x-latching index->lock and block->lock. We have to - mtr_commit(mtr) first, so that the redo log will be - written in the correct order. Otherwise, we would run - into trouble on crash recovery if mtr freed B-tree - pages on which some of the big_rec fields will be - written. */ - btr_cur_mtr_commit_and_start(btr_cur, mtr); + mtr_start(mtr); + ut_a(btr_pcur_restore_position(BTR_MODIFY_TREE, pcur, mtr)); rec = btr_cur_get_rec(btr_cur); err = btr_store_big_rec_extern_fields( index, btr_cur_get_block(btr_cur), rec, rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED, &heap), mtr, TRUE, big_rec); - /* If writing big_rec fails (for example, because of - DB_OUT_OF_FILE_SPACE), the record will be corrupted. - Even if we did not update any externally stored - columns, our update could cause the record to grow so - that a non-updated column was selected for external - storage. This non-update would not have been written - to the undo log, and thus the record cannot be rolled - back. */ - ut_a(err == DB_SUCCESS); + mtr_commit(mtr); } - mtr_commit(mtr); - if (UNIV_LIKELY_NULL(heap)) { mem_heap_free(heap); } |