From 9df4c14354e73455f0e0057f92b9870369f05d43 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Fri, 17 Jun 2011 16:29:19 -0400 Subject: merge from mysql-5.1 Bug 12635227 - 61188: DROP TABLE EXTREMELY SLOW --- storage/innobase/btr/btr0cur.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index c50b05cc1f5..e1060af525c 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -4155,7 +4155,7 @@ btr_blob_free( && buf_block_get_space(block) == space && buf_block_get_page_no(block) == page_no) { - if (buf_LRU_free_block(&block->page, all) != BUF_LRU_FREED + if (!buf_LRU_free_block(&block->page, all) && all && block->page.zip.data) { /* Attempt to deallocate the uncompressed page if the whole block cannot be deallocted. */ -- cgit v1.2.1 From 5b4ceba58d1c9c35e0cba1f126290009bd7643ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Jun 2011 10:27:21 +0300 Subject: Bug#12612184 Race condition after btr_cur_pessimistic_update() btr_cur_compress_if_useful(), btr_compress(): Add the parameter ibool adjust. If adjust=TRUE, adjust the cursor position after compressing the page. btr_lift_page_up(): Return a pointer to the father page. BTR_KEEP_POS_FLAG: A new flag for btr_cur_pessimistic_update(). btr_cur_pessimistic_update(): If *big_rec != NULL and flags & BTR_KEEP_POS_FLAG, keep the cursor positioned on the updated record. Also, do not release the index tree x-lock if *big_rec != NULL. btr_cur_mtr_commit_and_start(): Commits and restarts a mini-transaction so that it will retain an x-lock on index->lock and the page of the cursor. This is invoked when btr_cur_pessimistic_update() returns *big_rec != NULL. In all callers of btr_cur_pessimistic_update() that do not pass BTR_KEEP_POS_FLAG, assert that *big_rec == NULL. btr_cur_compress(): Unused function [in the built-in MySQL 5.1], remove. page_rec_get_nth(): Return the nth record on the page (an inverse function of page_rec_get_n_recs_before()). Refactored from page_get_middle_rec(). page_get_middle_rec(): Invoke page_rec_get_nth(). page_cur_insert_rec_zip_reorg(): Make use of the page directory shortcuts in page_rec_get_nth() instead of scanning the whole list of records. row_ins_clust_index_entry_by_modify(): Pass BTR_KEEP_POS_FLAG to btr_cur_pessimistic_update(). row_ins_index_entry_low(): If row_ins_clust_index_entry_by_modify() returns a big_rec, invoke btr_cur_mtr_commit_and_start() in order to commit and start the mini-transaction without releasing the x-locks on index->lock and the cursor page, and write the big_rec. Releasing the page latch in mtr_commit() caused a race condition. row_upd_clust_rec(): Pass BTR_KEEP_POS_FLAG to btr_cur_pessimistic_update(). If it returns a big_rec, invoke btr_cur_mtr_commit_and_start() in order to commit and start the mini-transaction without releasing the x-locks on index->lock and the cursor page, and write the big_rec. Releasing the page latch in mtr_commit() caused a race condition. sync_thread_add_level(): Add the parameter ibool relock. When TRUE, bypass the latching order rules. rw_lock_add_debug_info(): For nested X-lock requests, pass relock=TRUE to sync_thread_add_level(). rb:678 approved by Jimmy Yang --- storage/innobase/btr/btr0cur.c | 106 ++++++++++++++++++++++++++++------------- 1 file changed, 74 insertions(+), 32 deletions(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index d5bed3bec99..7bdf87f2793 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -1791,7 +1791,9 @@ btr_cur_pessimistic_update( /* out: DB_SUCCESS or error code */ ulint flags, /* in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /* in: cursor on the record to update */ + btr_cur_t* cursor, /* in/out: cursor on the record to update; + cursor may become invalid if *big_rec == NULL + || !(flags & BTR_KEEP_POS_FLAG) */ big_rec_t** big_rec,/* out: big rec vector whose fields have to be stored externally by the caller, or NULL */ upd_t* update, /* in: update vector; this is allowed also @@ -1926,6 +1928,10 @@ btr_cur_pessimistic_update( err = DB_TOO_BIG_RECORD; goto return_after_reservations; } + + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(btr_page_get_level(page, mtr) == 0); + ut_ad(flags & BTR_KEEP_POS_FLAG); } page_cursor = btr_cur_get_page_cur(cursor); @@ -1952,6 +1958,8 @@ btr_cur_pessimistic_update( ut_a(rec || optim_err != DB_UNDERFLOW); if (rec) { + page_cursor->rec = rec; + lock_rec_restore_from_page_infimum(rec, page); rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); @@ -1965,12 +1973,30 @@ btr_cur_pessimistic_update( btr_cur_unmark_extern_fields(rec, mtr, offsets); } - btr_cur_compress_if_useful(cursor, mtr); + btr_cur_compress_if_useful( + cursor, + big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), + mtr); err = DB_SUCCESS; goto return_after_reservations; } + if (big_rec_vec) { + ut_ad(index->type & DICT_CLUSTERED); + ut_ad(btr_page_get_level(page, mtr) == 0); + ut_ad(flags & BTR_KEEP_POS_FLAG); + + /* btr_page_split_and_insert() in + btr_cur_pessimistic_insert() invokes + mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). + We must keep the index->lock when we created a + big_rec, so that row_upd_clust_rec() can store the + big_rec in the same mini-transaction. */ + + mtr_x_lock(dict_index_get_lock(index), mtr); + } + if (page_cur_is_before_first(page_cursor)) { /* The record to be updated was positioned as the first user record on its page */ @@ -1991,6 +2017,7 @@ btr_cur_pessimistic_update( ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); + page_cursor->rec = rec; rec_set_field_extern_bits(rec, index, ext_vect, n_ext_vect, mtr); offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, &heap); @@ -2025,6 +2052,43 @@ return_after_reservations: return(err); } +/***************************************************************** +Commits and restarts a mini-transaction so that it will retain an +x-lock on index->lock and the cursor page. */ + +void +btr_cur_mtr_commit_and_start( +/*=========================*/ + btr_cur_t* cursor, /* in: cursor */ + mtr_t* mtr) /* in/out: mini-transaction */ +{ + buf_block_t* block; + + block = buf_block_align(btr_cur_get_rec(cursor)); + + ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), + MTR_MEMO_X_LOCK)); + ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); + /* Keep the locks across the mtr_commit(mtr). */ + rw_lock_x_lock(dict_index_get_lock(cursor->index)); + rw_lock_x_lock(&block->lock); + mutex_enter(&block->mutex); +#ifdef UNIV_SYNC_DEBUG + buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__); +#else + buf_block_buf_fix_inc(block); +#endif + mutex_exit(&block->mutex); + /* Write out the redo log. */ + mtr_commit(mtr); + mtr_start(mtr); + /* Reassociate the locks with the mini-transaction. + They will be released on mtr_commit(mtr). */ + mtr_memo_push(mtr, dict_index_get_lock(cursor->index), + MTR_MEMO_X_LOCK); + mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); +} + /*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /******************************************************************** @@ -2392,30 +2456,6 @@ btr_cur_del_unmark_for_ibuf( /*==================== B-TREE RECORD REMOVE =========================*/ -/***************************************************************** -Tries to compress a page of the tree on the leaf level. It is assumed -that mtr holds an x-latch on the tree and on the cursor page. To avoid -deadlocks, mtr must also own x-latches to brothers of page, if those -brothers exist. NOTE: it is assumed that the caller has reserved enough -free extents so that the compression will always succeed if done! */ - -void -btr_cur_compress( -/*=============*/ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid */ - mtr_t* mtr) /* in: mtr */ -{ - ut_ad(mtr_memo_contains(mtr, - dict_index_get_lock(btr_cur_get_index(cursor)), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, buf_block_align(btr_cur_get_rec(cursor)), - MTR_MEMO_PAGE_X_FIX)); - ut_ad(btr_page_get_level(btr_cur_get_page(cursor), mtr) == 0); - - btr_compress(cursor, mtr); -} - /***************************************************************** Tries to compress a page of the tree if it seems useful. It is assumed that mtr holds an x-latch on the tree and on the cursor page. To avoid @@ -2427,10 +2467,12 @@ ibool btr_cur_compress_if_useful( /*=======================*/ /* out: TRUE if compression occurred */ - btr_cur_t* cursor, /* in: cursor on the page to compress; - cursor does not stay valid if compression - occurs */ - mtr_t* mtr) /* in: mtr */ + btr_cur_t* cursor, /* in/out: cursor on the page to compress; + cursor does not stay valid if !adjust and + compression occurs */ + ibool adjust, /* in: TRUE if should adjust the + cursor position even if compression occurs */ + mtr_t* mtr) /* in/out: mini-transaction */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2440,7 +2482,7 @@ btr_cur_compress_if_useful( if (btr_cur_compress_recommendation(cursor, mtr)) { - btr_compress(cursor, mtr); + btr_compress(cursor, adjust, mtr); return(TRUE); } @@ -2653,7 +2695,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, mtr); + ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); } if (n_extents > 0) { -- cgit v1.2.1 From 417a267927b9249868e8ca7bd3cb7b6e09485f43 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Thu, 16 Jun 2011 11:51:04 +0300 Subject: Re-enable the debug assertions for Bug#12650861. Replace UNIV_BLOB_NULL_DEBUG with UNIV_DEBUG||UNIV_BLOB_LIGHT_DEBUG. Fix known bogus failures. btr_cur_optimistic_update(): If rec_offs_any_null_extern(), assert that the current transaction is an incomplete transaction that is being rolled back in crash recovery. row_build(): If rec_offs_any_null_extern(), assert that the transaction that last updated the record was recovered during crash recovery (and will soon be rolled back). --- storage/innobase/btr/btr0cur.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 7bdf87f2793..9ce09929f9a 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -31,6 +31,7 @@ Created 10/16/1994 Heikki Tuuri #include "btr0sea.h" #include "row0upd.h" #include "trx0rec.h" +#include "trx0roll.h" /* trx_roll_crash_recv_trx */ #include "que0que.h" #include "row0row.h" #include "srv0srv.h" @@ -1579,7 +1580,6 @@ btr_cur_optimistic_update( ulint old_rec_size; dtuple_t* new_entry; dulint roll_ptr; - trx_t* trx; mem_heap_t* heap; ibool reorganized = FALSE; ulint i; @@ -1592,9 +1592,10 @@ btr_cur_optimistic_update( heap = mem_heap_create(1024); offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); -#ifdef UNIV_BLOB_NULL_DEBUG - ut_a(!rec_offs_any_null_extern(rec, offsets)); -#endif /* UNIV_BLOB_NULL_DEBUG */ +#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG + ut_a(!rec_offs_any_null_extern(rec, offsets) + || thr_get_trx(thr) == trx_roll_crash_recv_trx); +#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { @@ -1701,13 +1702,11 @@ btr_cur_optimistic_update( page_cur_move_to_prev(page_cursor); - trx = thr_get_trx(thr); - if (!(flags & BTR_KEEP_SYS_FLAG)) { row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, roll_ptr); row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - trx->id); + thr_get_trx(thr)->id); } rec = btr_cur_insert_if_possible(cursor, new_entry, &reorganized, mtr); -- cgit v1.2.1 From 71e0ff64f070b7ebcf9c25d7c9e75a0aa4970163 Mon Sep 17 00:00:00 2001 From: Davi Arnaut Date: Thu, 7 Jul 2011 08:22:43 -0300 Subject: Bug#12727287: Maintainer mode compilation fails with gcc 4.6 GCC 4.6 has new -Wunused-but-set-variable flag, which is enabled by -Wall, that causes GCC to emit a warning whenever a local variable is assigned to, but otherwise unused (aside from its declaration). Since the maintainer mode uses -Wall and -Werror, source code which triggers these warnings will be rejected. That is, these warnings become hard errors. The solution is to fix the code which triggers these specific warnings. In most of the cases, this is a welcome cleanup as code which triggers this warning is probably dead anyway. dbug/dbug.c: Unused but set. libmysqld/lib_sql.cc: Length is not necessary as the converted error message is always null-terminated. sql/item_func.cc: Make get_var_with_binlog private to this compilation unit. If a error was raised, do not attempt to evaluate the user variable as the statement execution will be interrupted anyway. sql/mysqld.cc: Use a void expression to silence the warning. Avoids the use of macros that would make the code more unreadable than it already is. sql/protocol.cc: Length is not necessary as the converted error message is always null-terminated. Remove unnecessary casts and assignment. sql/sql_class.h: Function is only used in a single compilation unit. sql/sql_load.cc: Only use the variable outside of EMBEDDED_LIBRARY. storage/innobase/btr/btr0cur.c: Do not retrieve field, only the record length is being used. storage/perfschema/pfs.cc: Use a void expression to silence the warning. tests/mysql_client_test.c: Unused but set. unittest/mysys/lf-t.c: Unused but set. --- storage/innobase/btr/btr0cur.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index e1060af525c..46a52b549af 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -3575,7 +3575,6 @@ static void btr_record_not_null_field_in_rec( /*=============================*/ - rec_t* rec, /*!< in: physical record */ ulint n_unique, /*!< in: dict_index_get_n_unique(index), number of columns uniquely determine an index entry */ @@ -3595,9 +3594,8 @@ btr_record_not_null_field_in_rec( for (i = 0; i < n_unique; i++) { ulint rec_len; - byte* field; - field = rec_get_nth_field(rec, offsets, i, &rec_len); + rec_get_nth_field_offs(offsets, i, &rec_len); if (rec_len != UNIV_SQL_NULL) { n_not_null[i]++; @@ -3712,7 +3710,7 @@ btr_estimate_number_of_different_key_vals( if (n_not_null) { btr_record_not_null_field_in_rec( - rec, n_cols, offsets_rec, n_not_null); + n_cols, offsets_rec, n_not_null); } } @@ -3747,7 +3745,7 @@ btr_estimate_number_of_different_key_vals( if (n_not_null) { btr_record_not_null_field_in_rec( - next_rec, n_cols, offsets_next_rec, + n_cols, offsets_next_rec, n_not_null); } -- cgit v1.2.1 From 41f229cd9ed5a1549fb6438bdc1614e48d003625 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Marko=20M=C3=A4kel=C3=A4?= Date: Mon, 29 Aug 2011 11:16:42 +0300 Subject: Bug#12704861 Corruption after a crash during BLOB update The fix of Bug#12612184 broke crash recovery. When a record that contains off-page columns (BLOBs) is updated, we must first write redo log about the BLOB page writes, and only after that write the redo log about the B-tree changes. The buggy fix would log the B-tree changes first, meaning that after recovery, we could end up having a record that contains a null BLOB pointer. Because we will be redo logging the writes off the off-page columns before the B-tree changes, we must make sure that the pages chosen for the off-page columns are free both before and after the B-tree changes. In this way, the worst thing that can happen in crash recovery is that the BLOBs are written to free pages, but the B-tree changes are not applied. The BLOB pages would correctly remain free in this case. To achieve this, we must allocate the BLOB pages in the mini-transaction of the B-tree operation. A further quirk is that BLOB pages are allocated from the same file segment as leaf pages. Because of this, we must temporarily "hide" any leaf pages that were freed during the B-tree operation by "fake allocating" them prior to writing the BLOBs, and freeing them again before the mtr_commit() of the B-tree operation, in btr_mark_freed_leaves(). btr_cur_mtr_commit_and_start(): Remove this faulty function that was introduced in the Bug#12612184 fix. The problem that this function was trying to address was that when we did mtr_commit() the BLOB writes before the mtr_commit() of the update, the new BLOB pages could have overwritten clustered index B-tree leaf pages that were freed during the update. If recovery applied the redo log of the BLOB writes but did not see the log of the record update, the index tree would be corrupted. The correct solution is to make the freed clustered index pages unavailable to the BLOB allocation. This function is also a likely culprit of InnoDB hangs that were observed when testing the Bug#12612184 fix. btr_mark_freed_leaves(): Mark all freed clustered index leaf pages of a mini-transaction allocated (nonfree=TRUE) before storing the BLOBs, or freed (nonfree=FALSE) before committing the mini-transaction. btr_freed_leaves_validate(): A debug function for checking that all clustered index leaf pages that have been marked free in the mini-transaction are consistent (have not been zeroed out). btr_page_alloc_low(): Refactored from btr_page_alloc(). Return the number of the allocated page, or FIL_NULL if out of space. Add the parameter "mtr_t* init_mtr" for specifying the mini-transaction where the page should be initialized, or if this is a "fake allocation" (init_mtr=NULL) by btr_mark_freed_leaves(nonfree=TRUE). btr_page_alloc(): Add the parameter init_mtr, allowing the page to be initialized and X-latched in a different mini-transaction than the one that is used for the allocation. Invoke btr_page_alloc_low(). If a clustered index leaf page was previously freed in mtr, remove it from the memo of previously freed pages. btr_page_free(): Assert that the page is a B-tree page and it has been X-latched by the mini-transaction. If the freed page was a leaf page of a clustered index, link it by a MTR_MEMO_FREE_CLUST_LEAF marker to the mini-transaction. btr_store_big_rec_extern_fields_func(): Add the parameter alloc_mtr, which is NULL (old behaviour in inserts) and the same as local_mtr in updates. If alloc_mtr!=NULL, the BLOB pages will be allocated from it instead of the mini-transaction that is used for writing the BLOBs. fsp_alloc_from_free_frag(): Refactored from fsp_alloc_free_page(). Allocate the specified page from a partially free extent. fseg_alloc_free_page_low(), fseg_alloc_free_page_general(): Add the parameter "mtr_t* init_mtr" for specifying the mini-transaction where the page should be initialized, or NULL if this is a "fake allocation" that prevents the reuse of a previously freed B-tree page for BLOB storage. If init_mtr==NULL, try harder to reallocate the specified page and assert that it succeeded. fsp_alloc_free_page(): Add the parameter "mtr_t* init_mtr" for specifying the mini-transaction where the page should be initialized. Do not allow init_mtr == NULL, because this function is never to be used for "fake allocations". mtr_t: Add the operation MTR_MEMO_FREE_CLUST_LEAF and the flag mtr->freed_clust_leaf for quickly determining if any MTR_MEMO_FREE_CLUST_LEAF operations have been posted. row_ins_index_entry_low(): When columns are being made off-page in insert-by-update, invoke btr_mark_freed_leaves(nonfree=TRUE) and pass the mini-transaction as the alloc_mtr to btr_store_big_rec_extern_fields(). Finally, invoke btr_mark_freed_leaves(nonfree=FALSE) to avoid leaking pages. row_build(): Correct a comment, and add a debug assertion that a record that contains NULL BLOB pointers must be a fresh insert. row_upd_clust_rec(): When columns are being moved off-page, invoke btr_mark_freed_leaves(nonfree=TRUE) and pass the mini-transaction as the alloc_mtr to btr_store_big_rec_extern_fields(). Finally, invoke btr_mark_freed_leaves(nonfree=FALSE) to avoid leaking pages. buf_reset_check_index_page_at_flush(): Remove. The function fsp_init_file_page_low() already sets bpage->check_index_page_at_flush=FALSE. There is a known issue in tablespace extension. If the request to allocate a BLOB page leads to the tablespace being extended, crash recovery could see BLOB writes to pages that are off the tablespace file bounds. This should trigger an assertion failure in fil_io() at crash recovery. The safe thing would be to write redo log about the tablespace extension to the mini-transaction of the BLOB write, not to the mini-transaction of the record update. However, there is no redo log record for file extension in the current redo log format. rb:693 approved by Sunny Bains --- storage/innobase/btr/btr0cur.c | 92 ++++++++++++++++++++---------------------- 1 file changed, 43 insertions(+), 49 deletions(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 9ce09929f9a..a1dda8edf69 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -2051,43 +2051,6 @@ return_after_reservations: return(err); } -/***************************************************************** -Commits and restarts a mini-transaction so that it will retain an -x-lock on index->lock and the cursor page. */ - -void -btr_cur_mtr_commit_and_start( -/*=========================*/ - btr_cur_t* cursor, /* in: cursor */ - mtr_t* mtr) /* in/out: mini-transaction */ -{ - buf_block_t* block; - - block = buf_block_align(btr_cur_get_rec(cursor)); - - ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK)); - ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - /* Keep the locks across the mtr_commit(mtr). */ - rw_lock_x_lock(dict_index_get_lock(cursor->index)); - rw_lock_x_lock(&block->lock); - mutex_enter(&block->mutex); -#ifdef UNIV_SYNC_DEBUG - buf_block_buf_fix_inc_debug(block, __FILE__, __LINE__); -#else - buf_block_buf_fix_inc(block); -#endif - mutex_exit(&block->mutex); - /* Write out the redo log. */ - mtr_commit(mtr); - mtr_start(mtr); - /* Reassociate the locks with the mini-transaction. - They will be released on mtr_commit(mtr). */ - mtr_memo_push(mtr, dict_index_get_lock(cursor->index), - MTR_MEMO_X_LOCK); - mtr_memo_push(mtr, block, MTR_MEMO_PAGE_X_FIX); -} - /*==================== B-TREE DELETE MARK AND UNMARK ===============*/ /******************************************************************** @@ -3494,6 +3457,11 @@ btr_store_big_rec_extern_fields( this function returns */ big_rec_t* big_rec_vec, /* in: vector containing fields to be stored externally */ + mtr_t* alloc_mtr, /* in/out: in an insert, NULL; + in an update, local_mtr for + allocating BLOB pages and + updating BLOB pointers; alloc_mtr + must not have freed any leaf pages */ mtr_t* local_mtr __attribute__((unused))) /* in: mtr containing the latch to rec and to the tree */ @@ -3514,6 +3482,8 @@ btr_store_big_rec_extern_fields( ulint i; mtr_t mtr; + ut_ad(local_mtr); + ut_ad(!alloc_mtr || alloc_mtr == local_mtr); ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); @@ -3523,6 +3493,25 @@ btr_store_big_rec_extern_fields( space_id = buf_frame_get_space_id(rec); + if (alloc_mtr) { + /* Because alloc_mtr will be committed after + mtr, it is possible that the tablespace has been + extended when the B-tree record was updated or + inserted, or it will be extended while allocating + pages for big_rec. + + TODO: In mtr (not alloc_mtr), write a redo log record + about extending the tablespace to its current size, + and remember the current size. Whenever the tablespace + grows as pages are allocated, write further redo log + records to mtr. (Currently tablespace extension is not + covered by the redo log. If it were, the record would + only be written to alloc_mtr, which is committed after + mtr.) */ + } else { + alloc_mtr = &mtr; + } + /* We have to create a file segment to the tablespace for each field and put the pointer to the field in rec */ @@ -3549,7 +3538,7 @@ btr_store_big_rec_extern_fields( } page = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, &mtr); + FSP_NO_DIR, 0, alloc_mtr, &mtr); if (page == NULL) { mtr_commit(&mtr); @@ -3603,37 +3592,42 @@ btr_store_big_rec_extern_fields( extern_len -= store_len; + if (alloc_mtr == &mtr) { #ifdef UNIV_SYNC_DEBUG - rec_page = + rec_page = #endif /* UNIV_SYNC_DEBUG */ - buf_page_get(space_id, - buf_frame_get_page_no(data), - RW_X_LATCH, &mtr); + buf_page_get( + space_id, + buf_frame_get_page_no(data), + RW_X_LATCH, &mtr); #ifdef UNIV_SYNC_DEBUG - buf_page_dbg_add_level(rec_page, SYNC_NO_ORDER_CHECK); + buf_page_dbg_add_level( + rec_page, SYNC_NO_ORDER_CHECK); #endif /* UNIV_SYNC_DEBUG */ + } + mlog_write_ulint(data + local_len + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); if (prev_page_no == FIL_NULL) { mlog_write_ulint(data + local_len + BTR_EXTERN_SPACE_ID, space_id, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_PAGE_NO, page_no, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); mlog_write_ulint(data + local_len + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, &mtr); + MLOG_4BYTES, alloc_mtr); /* Set the bit denoting that this field in rec is stored externally */ @@ -3641,7 +3635,7 @@ btr_store_big_rec_extern_fields( rec_set_nth_field_extern_bit( rec, index, big_rec_vec->fields[i].field_no, - TRUE, &mtr); + TRUE, alloc_mtr); } prev_page_no = page_no; -- cgit v1.2.1 From c95fdd936e73eb3bddad3fe02d087ccce67b00c9 Mon Sep 17 00:00:00 2001 From: Inaam Rana Date: Fri, 30 Sep 2011 07:02:19 -0400 Subject: Revert original fix for Bug 12612184 and the follow up fix for Bug 12704861. Bug 12704861 fix was revno: 3504.1.1 (rb://693) Bug 12612184 fix was revno: 3445.1.10 (rb://678) --- storage/innobase/btr/btr0cur.c | 140 +++++++++++------------------------------ 1 file changed, 38 insertions(+), 102 deletions(-) (limited to 'storage/innobase/btr/btr0cur.c') diff --git a/storage/innobase/btr/btr0cur.c b/storage/innobase/btr/btr0cur.c index 3021bb71929..f63ca20ef24 100644 --- a/storage/innobase/btr/btr0cur.c +++ b/storage/innobase/btr/btr0cur.c @@ -1985,6 +1985,7 @@ btr_cur_optimistic_update( ulint old_rec_size; dtuple_t* new_entry; roll_ptr_t roll_ptr; + trx_t* trx; mem_heap_t* heap; ulint i; ulint n_ext; @@ -2001,10 +2002,9 @@ btr_cur_optimistic_update( heap = mem_heap_create(1024); offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap); -#if defined UNIV_DEBUG || defined UNIV_BLOB_LIGHT_DEBUG - ut_a(!rec_offs_any_null_extern(rec, offsets) - || trx_is_recv(thr_get_trx(thr))); -#endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ +#ifdef UNIV_BLOB_NULL_DEBUG + ut_a(!rec_offs_any_null_extern(rec, offsets)); +#endif /* UNIV_BLOB_NULL_DEBUG */ #ifdef UNIV_DEBUG if (btr_cur_print_record_ops && thr) { @@ -2127,11 +2127,13 @@ any_extern: page_cur_move_to_prev(page_cursor); + trx = thr_get_trx(thr); + if (!(flags & BTR_KEEP_SYS_FLAG)) { row_upd_index_entry_sys_field(new_entry, index, DATA_ROLL_PTR, roll_ptr); row_upd_index_entry_sys_field(new_entry, index, DATA_TRX_ID, - thr_get_trx(thr)->id); + trx->id); } /* There are no externally stored columns in new_entry */ @@ -2217,9 +2219,7 @@ btr_cur_pessimistic_update( /*=======================*/ ulint flags, /*!< in: undo logging, locking, and rollback flags */ - btr_cur_t* cursor, /*!< in/out: cursor on the record to update; - cursor may become invalid if *big_rec == NULL - || !(flags & BTR_KEEP_POS_FLAG) */ + btr_cur_t* cursor, /*!< in: cursor on the record to update */ mem_heap_t** heap, /*!< in/out: pointer to memory heap, or NULL */ big_rec_t** big_rec,/*!< out: big rec vector whose fields have to be stored externally by the caller, or NULL */ @@ -2358,7 +2358,7 @@ btr_cur_pessimistic_update( record to be inserted: we have to remember which fields were such */ ut_ad(!page_is_comp(page) || !rec_get_node_ptr_flag(rec)); - ut_ad(rec_offs_validate(rec, index, offsets)); + offsets = rec_get_offsets(rec, index, offsets, ULINT_UNDEFINED, heap); n_ext += btr_push_update_extern_fields(new_entry, update, *heap); if (UNIV_LIKELY_NULL(page_zip)) { @@ -2381,10 +2381,6 @@ make_external: err = DB_TOO_BIG_RECORD; goto return_after_reservations; } - - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); } /* Store state of explicit locks on rec on the page infimum record, @@ -2412,8 +2408,6 @@ make_external: rec = btr_cur_insert_if_possible(cursor, new_entry, n_ext, mtr); if (rec) { - page_cursor->rec = rec; - lock_rec_restore_from_page_infimum(btr_cur_get_block(cursor), rec, block); @@ -2427,10 +2421,7 @@ make_external: rec, index, offsets, mtr); } - btr_cur_compress_if_useful( - cursor, - big_rec_vec != NULL && (flags & BTR_KEEP_POS_FLAG), - mtr); + btr_cur_compress_if_useful(cursor, mtr); if (page_zip && !dict_index_is_clust(index) && page_is_leaf(page)) { @@ -2450,21 +2441,6 @@ make_external: } } - if (big_rec_vec) { - ut_ad(page_is_leaf(page)); - ut_ad(dict_index_is_clust(index)); - ut_ad(flags & BTR_KEEP_POS_FLAG); - - /* btr_page_split_and_insert() in - btr_cur_pessimistic_insert() invokes - mtr_memo_release(mtr, index->lock, MTR_MEMO_X_LOCK). - We must keep the index->lock when we created a - big_rec, so that row_upd_clust_rec() can store the - big_rec in the same mini-transaction. */ - - mtr_x_lock(dict_index_get_lock(index), mtr); - } - /* Was the record to be updated positioned as the first user record on its page? */ was_first = page_cur_is_before_first(page_cursor); @@ -2480,7 +2456,6 @@ make_external: ut_a(rec); ut_a(err == DB_SUCCESS); ut_a(dummy_big_rec == NULL); - page_cursor->rec = rec; if (dict_index_is_sec_or_ibuf(index)) { /* Update PAGE_MAX_TRX_ID in the index page header. @@ -2915,12 +2890,10 @@ UNIV_INTERN ibool btr_cur_compress_if_useful( /*=======================*/ - btr_cur_t* cursor, /*!< in/out: cursor on the page to compress; - cursor does not stay valid if !adjust and - compression occurs */ - ibool adjust, /*!< in: TRUE if should adjust the - cursor position even if compression occurs */ - mtr_t* mtr) /*!< in/out: mini-transaction */ + btr_cur_t* cursor, /*!< in: cursor on the page to compress; + cursor does not stay valid if compression + occurs */ + mtr_t* mtr) /*!< in: mtr */ { ut_ad(mtr_memo_contains(mtr, dict_index_get_lock(btr_cur_get_index(cursor)), @@ -2929,7 +2902,7 @@ btr_cur_compress_if_useful( MTR_MEMO_PAGE_X_FIX)); return(btr_cur_compress_recommendation(cursor, mtr) - && btr_compress(cursor, adjust, mtr)); + && btr_compress(cursor, mtr)); } /*******************************************************//** @@ -3171,7 +3144,7 @@ return_after_reservations: mem_heap_free(heap); if (ret == FALSE) { - ret = btr_cur_compress_if_useful(cursor, FALSE, mtr); + ret = btr_cur_compress_if_useful(cursor, mtr); } if (n_extents > 0) { @@ -4160,9 +4133,6 @@ btr_store_big_rec_extern_fields_func( the "external storage" flags in offsets will not correspond to rec when this function returns */ - const big_rec_t*big_rec_vec, /*!< in: vector containing fields - to be stored externally */ - #ifdef UNIV_DEBUG mtr_t* local_mtr, /*!< in: mtr containing the latch to rec and to the tree */ @@ -4171,11 +4141,9 @@ btr_store_big_rec_extern_fields_func( ibool update_in_place,/*! in: TRUE if the record is updated in place (not delete+insert) */ #endif /* UNIV_DEBUG || UNIV_BLOB_LIGHT_DEBUG */ - mtr_t* alloc_mtr) /*!< in/out: in an insert, NULL; - in an update, local_mtr for - allocating BLOB pages and - updating BLOB pointers; alloc_mtr - must not have freed any leaf pages */ + const big_rec_t*big_rec_vec) /*!< in: vector containing fields + to be stored externally */ + { ulint rec_page_no; byte* field_ref; @@ -4194,9 +4162,6 @@ btr_store_big_rec_extern_fields_func( ut_ad(rec_offs_validate(rec, index, offsets)); ut_ad(rec_offs_any_extern(offsets)); - ut_ad(local_mtr); - ut_ad(!alloc_mtr || alloc_mtr == local_mtr); - ut_ad(!update_in_place || alloc_mtr); ut_ad(mtr_memo_contains(local_mtr, dict_index_get_lock(index), MTR_MEMO_X_LOCK)); ut_ad(mtr_memo_contains(local_mtr, rec_block, MTR_MEMO_PAGE_X_FIX)); @@ -4212,25 +4177,6 @@ btr_store_big_rec_extern_fields_func( rec_page_no = buf_block_get_page_no(rec_block); ut_a(fil_page_get_type(page_align(rec)) == FIL_PAGE_INDEX); - if (alloc_mtr) { - /* Because alloc_mtr will be committed after - mtr, it is possible that the tablespace has been - extended when the B-tree record was updated or - inserted, or it will be extended while allocating - pages for big_rec. - - TODO: In mtr (not alloc_mtr), write a redo log record - about extending the tablespace to its current size, - and remember the current size. Whenever the tablespace - grows as pages are allocated, write further redo log - records to mtr. (Currently tablespace extension is not - covered by the redo log. If it were, the record would - only be written to alloc_mtr, which is committed after - mtr.) */ - } else { - alloc_mtr = &mtr; - } - if (UNIV_LIKELY_NULL(page_zip)) { int err; @@ -4307,7 +4253,7 @@ btr_store_big_rec_extern_fields_func( } block = btr_page_alloc(index, hint_page_no, - FSP_NO_DIR, 0, alloc_mtr, &mtr); + FSP_NO_DIR, 0, &mtr); if (UNIV_UNLIKELY(block == NULL)) { mtr_commit(&mtr); @@ -4434,15 +4380,11 @@ btr_store_big_rec_extern_fields_func( goto next_zip_page; } - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } + rec_block = buf_page_get(space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(rec_block, + SYNC_NO_ORDER_CHECK); if (err == Z_STREAM_END) { mach_write_to_4(field_ref @@ -4476,8 +4418,7 @@ btr_store_big_rec_extern_fields_func( page_zip_write_blob_ptr( page_zip, rec, index, offsets, - big_rec_vec->fields[i].field_no, - alloc_mtr); + big_rec_vec->fields[i].field_no, &mtr); next_zip_page: prev_page_no = page_no; @@ -4522,23 +4463,19 @@ next_zip_page: extern_len -= store_len; - if (alloc_mtr == &mtr) { - rec_block = buf_page_get( - space_id, zip_size, - rec_page_no, - RW_X_LATCH, &mtr); - buf_block_dbg_add_level( - rec_block, - SYNC_NO_ORDER_CHECK); - } + rec_block = buf_page_get(space_id, zip_size, + rec_page_no, + RW_X_LATCH, &mtr); + buf_block_dbg_add_level(rec_block, + SYNC_NO_ORDER_CHECK); mlog_write_ulint(field_ref + BTR_EXTERN_LEN, 0, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_LEN + 4, big_rec_vec->fields[i].len - extern_len, - MLOG_4BYTES, alloc_mtr); + MLOG_4BYTES, &mtr); if (prev_page_no == FIL_NULL) { btr_blob_dbg_add_blob( @@ -4548,19 +4485,18 @@ next_zip_page: mlog_write_ulint(field_ref + BTR_EXTERN_SPACE_ID, - space_id, MLOG_4BYTES, - alloc_mtr); + space_id, + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_PAGE_NO, - page_no, MLOG_4BYTES, - alloc_mtr); + page_no, + MLOG_4BYTES, &mtr); mlog_write_ulint(field_ref + BTR_EXTERN_OFFSET, FIL_PAGE_DATA, - MLOG_4BYTES, - alloc_mtr); + MLOG_4BYTES, &mtr); } prev_page_no = page_no; -- cgit v1.2.1