diff options
author | Sergei Golubchik <sergii@pisem.net> | 2014-01-22 15:35:42 +0100 |
---|---|---|
committer | Sergei Golubchik <sergii@pisem.net> | 2014-01-22 15:35:42 +0100 |
commit | ec34edd9c383b65ab1758dbda417f3e984d08af7 (patch) | |
tree | cf74d8d86227b6e810f56d1052e729f587647413 | |
parent | 37d240ecf9213a29d6a0a236ebeb1e72c0b43ce6 (diff) | |
parent | 15b4441dcbdf6960a7ed9a15f8f03c01a24451e4 (diff) | |
download | mariadb-git-ec34edd9c383b65ab1758dbda417f3e984d08af7.tar.gz |
Percona-Server-5.5.35-rel33.0.tar.gz
-rw-r--r-- | storage/xtradb/btr/btr0cur.c | 31 | ||||
-rw-r--r-- | storage/xtradb/btr/btr0pcur.c | 46 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0buf.c | 6 | ||||
-rw-r--r-- | storage/xtradb/buf/buf0flu.c | 10 | ||||
-rw-r--r-- | storage/xtradb/dict/dict0dict.c | 13 | ||||
-rw-r--r-- | storage/xtradb/fil/fil0fil.c | 3 | ||||
-rw-r--r-- | storage/xtradb/handler/ha_innodb.cc | 82 | ||||
-rw-r--r-- | storage/xtradb/handler/i_s.cc | 4 | ||||
-rw-r--r-- | storage/xtradb/ibuf/ibuf0ibuf.c | 29 | ||||
-rw-r--r-- | storage/xtradb/include/btr0cur.h | 15 | ||||
-rw-r--r-- | storage/xtradb/include/btr0pcur.h | 44 | ||||
-rw-r--r-- | storage/xtradb/include/btr0pcur.ic | 4 | ||||
-rw-r--r-- | storage/xtradb/include/btr0sea.h | 2 | ||||
-rw-r--r-- | storage/xtradb/include/dict0types.h | 5 | ||||
-rw-r--r-- | storage/xtradb/include/ibuf0ibuf.h | 5 | ||||
-rw-r--r-- | storage/xtradb/include/log0log.h | 2 | ||||
-rw-r--r-- | storage/xtradb/include/univ.i | 2 | ||||
-rw-r--r-- | storage/xtradb/log/log0log.c | 110 | ||||
-rw-r--r-- | storage/xtradb/log/log0online.c | 19 | ||||
-rw-r--r-- | storage/xtradb/row/row0sel.c | 103 | ||||
-rw-r--r-- | storage/xtradb/trx/trx0trx.c | 8 |
21 files changed, 405 insertions, 138 deletions
diff --git a/storage/xtradb/btr/btr0cur.c b/storage/xtradb/btr/btr0cur.c index 8904270197a..e5d0f002af6 100644 --- a/storage/xtradb/btr/btr0cur.c +++ b/storage/xtradb/btr/btr0cur.c @@ -61,6 +61,7 @@ Created 10/16/1994 Heikki Tuuri #include "row0upd.h" #include "trx0rec.h" #include "trx0roll.h" /* trx_is_recv() */ +#include "trx0undo.h" #include "que0que.h" #include "row0row.h" #include "srv0srv.h" @@ -1757,7 +1758,7 @@ btr_cur_upd_lock_and_undo( /***********************************************************//** Writes a redo log record of updating a record in-place. */ -UNIV_INLINE +UNIV_INTERN void btr_cur_update_in_place_log( /*========================*/ @@ -1785,18 +1786,30 @@ btr_cur_update_in_place_log( return; } - /* The code below assumes index is a clustered index: change index to - the clustered index if we are updating a secondary index record (or we - could as well skip writing the sys col values to the log in this case - because they are not needed for a secondary index record update) */ - - index = dict_table_get_first_index(index->table); + /* For secondary indexes, we could skip writing the dummy system fields + to the redo log but we have to change redo log parsing of + MLOG_REC_UPDATE_IN_PLACE/MLOG_COMP_REC_UPDATE_IN_PLACE or we have to add + new redo log record. For now, just write dummy sys fields to the redo + log if we are updating a secondary index record. + */ mach_write_to_1(log_ptr, flags); log_ptr++; - log_ptr = row_upd_write_sys_vals_to_log(index, trx, roll_ptr, log_ptr, - mtr); + if (dict_index_is_clust(index)) { + log_ptr = row_upd_write_sys_vals_to_log( + index, trx, roll_ptr, log_ptr, mtr); + } else { + /* Dummy system fields for a secondary index */ + /* TRX_ID Position */ + log_ptr += mach_write_compressed(log_ptr, 0); + /* ROLL_PTR */ + trx_write_roll_ptr(log_ptr, 0); + log_ptr += DATA_ROLL_PTR_LEN; + /* TRX_ID */ + log_ptr += mach_ull_write_compressed(log_ptr, 0); + } + mach_write_to_2(log_ptr, page_offset(rec)); log_ptr += 2; diff --git a/storage/xtradb/btr/btr0pcur.c b/storage/xtradb/btr/btr0pcur.c index 3929c4a9c2d..d1b3dc4a3e6 100644 --- a/storage/xtradb/btr/btr0pcur.c +++ b/storage/xtradb/btr/btr0pcur.c @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2012, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -111,7 +111,7 @@ btr_pcur_store_position( page_t* page; ulint offs; - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); block = btr_pcur_get_block(cursor); @@ -128,7 +128,6 @@ btr_pcur_store_position( ut_ad(mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_S_FIX) || mtr_memo_contains(mtr, block, MTR_MEMO_PAGE_X_FIX)); - ut_a(cursor->latch_mode != BTR_NO_LATCHES); if (UNIV_UNLIKELY(page_get_n_recs(page) == 0)) { /* It must be an empty index tree; NOTE that in this case @@ -239,21 +238,12 @@ btr_pcur_restore_position_func( ut_ad(mtr); ut_ad(mtr->state == MTR_ACTIVE); + ut_ad(cursor->old_stored == BTR_PCUR_OLD_STORED); + ut_ad(cursor->pos_state == BTR_PCUR_WAS_POSITIONED + || cursor->pos_state == BTR_PCUR_IS_POSITIONED); index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor)); - if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED) - || UNIV_UNLIKELY(cursor->pos_state != BTR_PCUR_WAS_POSITIONED - && cursor->pos_state != BTR_PCUR_IS_POSITIONED)) { - ut_print_buf(stderr, cursor, sizeof(btr_pcur_t)); - putc('\n', stderr); - if (cursor->trx_if_known) { - trx_print(stderr, cursor->trx_if_known, 0); - } - - ut_error; - } - if (UNIV_UNLIKELY (cursor->rel_pos == BTR_PCUR_AFTER_LAST_IN_TREE || cursor->rel_pos == BTR_PCUR_BEFORE_FIRST_IN_TREE)) { @@ -277,14 +267,14 @@ btr_pcur_restore_position_func( if (UNIV_LIKELY(latch_mode == BTR_SEARCH_LEAF) || UNIV_LIKELY(latch_mode == BTR_MODIFY_LEAF)) { - /* Try optimistic restoration */ + /* Try optimistic restoration. */ - if (UNIV_LIKELY(buf_page_optimistic_get( - latch_mode, - cursor->block_when_stored, - cursor->modify_clock, - file, line, mtr))) { + if (buf_page_optimistic_get(latch_mode, + cursor->block_when_stored, + cursor->modify_clock, + file, line, mtr)) { cursor->pos_state = BTR_PCUR_IS_POSITIONED; + cursor->latch_mode = latch_mode; buf_block_dbg_add_level( btr_pcur_get_block(cursor), @@ -296,9 +286,6 @@ btr_pcur_restore_position_func( const rec_t* rec; const ulint* offsets1; const ulint* offsets2; -#endif /* UNIV_DEBUG */ - cursor->latch_mode = latch_mode; -#ifdef UNIV_DEBUG rec = btr_pcur_get_rec(cursor); heap = mem_heap_create(256); @@ -316,7 +303,13 @@ btr_pcur_restore_position_func( #endif /* UNIV_DEBUG */ return(TRUE); } - + /* This is the same record as stored, + may need to be adjusted for BTR_PCUR_BEFORE/AFTER, + depending on search mode and direction. */ + if (btr_pcur_is_on_user_rec(cursor)) { + cursor->pos_state + = BTR_PCUR_IS_POSITIONED_OPTIMISTIC; + } return(FALSE); } } @@ -418,7 +411,7 @@ btr_pcur_move_to_next_page( buf_block_t* next_block; page_t* next_page; - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); ut_ad(btr_pcur_is_after_last_on_page(cursor)); @@ -484,7 +477,6 @@ btr_pcur_move_backward_from_page( ulint latch_mode; ulint latch_mode2; - ut_a(cursor->pos_state == BTR_PCUR_IS_POSITIONED); ut_ad(cursor->latch_mode != BTR_NO_LATCHES); ut_ad(btr_pcur_is_before_first_on_page(cursor)); ut_ad(!btr_pcur_is_before_first_in_tree(cursor, mtr)); diff --git a/storage/xtradb/buf/buf0buf.c b/storage/xtradb/buf/buf0buf.c index f06fd4abfb1..efecef4cece 100644 --- a/storage/xtradb/buf/buf0buf.c +++ b/storage/xtradb/buf/buf0buf.c @@ -4815,12 +4815,16 @@ buf_get_latched_pages_number_instance( case BUF_BLOCK_FILE_PAGE: /* uncompressed page */ break; + case BUF_BLOCK_REMOVE_HASH: + /* We hold flush list but not LRU list mutex here. + Thus encountering BUF_BLOCK_REMOVE_HASH pages is + possible. */ + break; case BUF_BLOCK_ZIP_FREE: case BUF_BLOCK_ZIP_PAGE: case BUF_BLOCK_NOT_USED: case BUF_BLOCK_READY_FOR_USE: case BUF_BLOCK_MEMORY: - case BUF_BLOCK_REMOVE_HASH: ut_error; break; } diff --git a/storage/xtradb/buf/buf0flu.c b/storage/xtradb/buf/buf0flu.c index fea665eba40..d47f2d6fa9a 100644 --- a/storage/xtradb/buf/buf0flu.c +++ b/storage/xtradb/buf/buf0flu.c @@ -1605,6 +1605,16 @@ buf_flush_page_and_try_neighbors( ut_ad(block_mutex); } + if (UNIV_UNLIKELY(buf_page_get_state(bpage) + == BUF_BLOCK_REMOVE_HASH)) { + + /* In case we don't hold the LRU list mutex, we may see a page + that is about to be relocated on the flush list. Do not + attempt to flush it. */ + ut_ad(flush_type == BUF_FLUSH_LIST); + return (flushed); + } + ut_a(buf_page_in_file(bpage)); if (buf_flush_ready_for_flush(bpage, flush_type)) { diff --git a/storage/xtradb/dict/dict0dict.c b/storage/xtradb/dict/dict0dict.c index 50da1c97beb..5a61e39eb8b 100644 --- a/storage/xtradb/dict/dict0dict.c +++ b/storage/xtradb/dict/dict0dict.c @@ -36,6 +36,11 @@ UNIV_INTERN dict_index_t* dict_ind_redundant; /** dummy index for ROW_FORMAT=COMPACT supremum and infimum records */ UNIV_INTERN dict_index_t* dict_ind_compact; +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/** Flag to control insert buffer debugging. */ +UNIV_INTERN uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + #ifndef UNIV_HOTBACKUP #include "buf0buf.h" #include "data0type.h" @@ -4855,6 +4860,8 @@ dict_update_statistics( dict_index_t* index; ulint sum_of_index_sizes = 0; + DBUG_EXECUTE_IF("skip_innodb_statistics", return;); + if (table->ibd_file_missing) { ut_print_timestamp(stderr); fprintf(stderr, @@ -4916,6 +4923,12 @@ dict_update_statistics( continue; } +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if (ibuf_debug && !dict_index_is_clust(index)) { + goto fake_statistics; + } +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + if (UNIV_LIKELY (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE || (srv_force_recovery < SRV_FORCE_NO_LOG_REDO diff --git a/storage/xtradb/fil/fil0fil.c b/storage/xtradb/fil/fil0fil.c index 19e656dfd92..f598c717e00 100644 --- a/storage/xtradb/fil/fil0fil.c +++ b/storage/xtradb/fil/fil0fil.c @@ -4369,7 +4369,7 @@ fil_load_single_table_tablespace( if (check_msg) { fprintf(stderr, - "InnoDB: Error: %s in file %s", + "InnoDB: Error: %s in file %s\n", check_msg, filepath); goto func_exit; } @@ -4967,6 +4967,7 @@ fil_extend_space_to_desired_size( space->size += (size_after_extend - start_page_no); os_has_said_disk_full = FALSE; } + fil_node_complete_io(node, fil_system, OS_FILE_READ); goto complete_io; } #endif diff --git a/storage/xtradb/handler/ha_innodb.cc b/storage/xtradb/handler/ha_innodb.cc index 1946d091993..565d0189636 100644 --- a/storage/xtradb/handler/ha_innodb.cc +++ b/storage/xtradb/handler/ha_innodb.cc @@ -55,6 +55,7 @@ this program; if not, write to the Free Software Foundation, Inc., #include <innodb_priv.h> #include <mysql/psi/psi.h> #include <my_sys.h> +#include <my_check_opt.h> #ifdef MYSQL_SERVER #include <rpl_mi.h> @@ -8974,6 +8975,10 @@ ha_innobase::records_in_range( /* There exists possibility of not being able to find requested index due to inconsistency between MySQL and InoDB dictionary info. Necessary message should have been printed in innobase_get_index() */ + if (prebuilt->table->ibd_file_missing) { + n_rows = HA_POS_ERROR; + goto func_exit; + } if (UNIV_UNLIKELY(!index)) { n_rows = HA_POS_ERROR; goto func_exit; @@ -9744,8 +9749,7 @@ int ha_innobase::check( /*===============*/ THD* thd, /*!< in: user thread handle */ - HA_CHECK_OPT* check_opt) /*!< in: check options, currently - ignored */ + HA_CHECK_OPT* check_opt) /*!< in: check options */ { dict_index_t* index; ulint n_rows; @@ -9802,11 +9806,6 @@ ha_innobase::check( do additional check */ prebuilt->table->corrupted = FALSE; - /* Enlarge the fatal lock wait timeout during CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold += SRV_SEMAPHORE_WAIT_EXTENSION; - mutex_exit(&kernel_mutex); - for (index = dict_table_get_first_index(prebuilt->table); index != NULL; index = dict_table_get_next_index(index)) { @@ -9819,20 +9818,41 @@ ha_innobase::check( /* If this is an index being created, break */ if (*index->name == TEMP_INDEX_PREFIX) { - break; - } else if (!btr_validate_index(index, prebuilt->trx)) { - is_ok = FALSE; + continue; + } + if (!(check_opt->flags & T_QUICK)) { + /* Enlarge the fatal lock wait timeout during + CHECK TABLE. */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold += + SRV_SEMAPHORE_WAIT_EXTENSION; + mutex_exit(&kernel_mutex); + + ibool valid = TRUE; + valid = btr_validate_index(index, prebuilt->trx); + + /* Restore the fatal lock wait timeout after + CHECK TABLE. */ + mutex_enter(&kernel_mutex); + srv_fatal_semaphore_wait_threshold -= + SRV_SEMAPHORE_WAIT_EXTENSION; + mutex_exit(&kernel_mutex); + + if (!valid) { + is_ok = FALSE; - innobase_format_name( - index_name, sizeof index_name, - prebuilt->index->name, TRUE); + innobase_format_name( + index_name, sizeof index_name, + index->name, TRUE); + push_warning_printf(thd, + MYSQL_ERROR::WARN_LEVEL_WARN, + ER_NOT_KEYFILE, + "InnoDB: The B-tree of" + " index %s is corrupted.", + index_name); - push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN, - ER_NOT_KEYFILE, - "InnoDB: The B-tree of" - " index %s is corrupted.", - index_name); - continue; + continue; + } } /* Instead of invoking change_active_index(), set up @@ -9936,21 +9956,17 @@ ha_innobase::check( /* Restore the original isolation level */ prebuilt->trx->isolation_level = old_isolation_level; - /* We validate also the whole adaptive hash index for all tables - at every CHECK TABLE */ +#if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG + /* We validate the whole adaptive hash index for all tables + at every CHECK TABLE only when QUICK flag is not present. */ - if (!btr_search_validate()) { + if (!(check_opt->flags & T_QUICK) && !btr_search_validate()) { push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN, ER_NOT_KEYFILE, "InnoDB: The adaptive hash index is corrupted."); is_ok = FALSE; } - - /* Restore the fatal lock wait timeout after CHECK TABLE. */ - mutex_enter(&kernel_mutex); - srv_fatal_semaphore_wait_threshold -= SRV_SEMAPHORE_WAIT_EXTENSION; - mutex_exit(&kernel_mutex); - +#endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ prebuilt->trx->op_info = ""; if (thd_kill_level(user_thd)) { my_error(ER_QUERY_INTERRUPTED, MYF(0)); @@ -10835,7 +10851,7 @@ innodb_show_status( const long MAX_STATUS_SIZE = 1048576; ulint trx_list_start = ULINT_UNDEFINED; ulint trx_list_end = ULINT_UNDEFINED; - bool res; + bool ret_val; DBUG_ENTER("innodb_show_status"); DBUG_ASSERT(hton == innodb_hton_ptr); @@ -10900,13 +10916,13 @@ innodb_show_status( mutex_exit(&srv_monitor_file_mutex); - res= stat_print(thd, innobase_hton_name, - (uint) strlen(innobase_hton_name), - STRING_WITH_LEN(""), str, flen); + ret_val= stat_print(thd, innobase_hton_name, + (uint) strlen(innobase_hton_name), + STRING_WITH_LEN(""), str, flen); my_free(str); - DBUG_RETURN(res); + DBUG_RETURN(ret_val); } /************************************************************************//** diff --git a/storage/xtradb/handler/i_s.cc b/storage/xtradb/handler/i_s.cc index 71808bcd0e6..9bede650549 100644 --- a/storage/xtradb/handler/i_s.cc +++ b/storage/xtradb/handler/i_s.cc @@ -50,17 +50,21 @@ extern "C" { #include "dict0load.h" /* for file sys_tables related info. */ #include "buf0buddy.h" #include "buf0buf.h" +#include "buf0lru.h" #include "ibuf0ibuf.h" #include "dict0mem.h" #include "dict0types.h" #include "srv0srv.h" /* for srv_max_changed_pages */ #include "dict0boot.h" +#include "dict0load.h" #include "ha_prototypes.h" #include "srv0start.h" +#include "srv0srv.h" #include "trx0i_s.h" #include "trx0trx.h" /* for TRX_QUE_STATE_STR_MAX_LEN */ #include "buf0lru.h" /* for XTRA_LRU_[DUMP/RESTORE] */ #include "trx0rseg.h" +#include "trx0trx.h" #include "trx0undo.h" #include "log0online.h" #include "btr0btr.h" diff --git a/storage/xtradb/ibuf/ibuf0ibuf.c b/storage/xtradb/ibuf/ibuf0ibuf.c index 65489d13285..e04b6ac3ba6 100644 --- a/storage/xtradb/ibuf/ibuf0ibuf.c +++ b/storage/xtradb/ibuf/ibuf0ibuf.c @@ -194,11 +194,6 @@ access order rules. */ /** Operations that can currently be buffered. */ UNIV_INTERN ibuf_use_t ibuf_use = IBUF_USE_ALL; -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/** Flag to control insert buffer debugging. */ -UNIV_INTERN uint ibuf_debug; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - /** The insert buffer control structure */ UNIV_INTERN ibuf_t* ibuf = NULL; @@ -2693,6 +2688,12 @@ ibuf_contract_ext( return(0); } +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG + if (ibuf_debug) { + return(0); + } +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + ibuf_mtr_start(&mtr); /* Open a cursor to a randomly chosen leaf of the tree, at a random @@ -4066,6 +4067,24 @@ updated_in_place: to btr_cur_update_in_place(). */ row_upd_rec_in_place(rec, index, offsets, update, page_zip); + + /* Log the update in place operation. During recovery + MLOG_COMP_REC_UPDATE_IN_PLACE/MLOG_REC_UPDATE_IN_PLACE + expects trx_id, roll_ptr for secondary indexes. So we + just write dummy trx_id(0), roll_ptr(0) */ + btr_cur_update_in_place_log(BTR_KEEP_SYS_FLAG, rec, + index, update, + NULL, 0, mtr); + DBUG_EXECUTE_IF( + "crash_after_log_ibuf_upd_inplace", + log_buffer_flush_to_disk(); + fprintf(stderr, + "InnoDB: Wrote log record for ibuf " + "update in place operation\n"); + DBUG_SUICIDE(); + ); + + goto updated_in_place; } diff --git a/storage/xtradb/include/btr0cur.h b/storage/xtradb/include/btr0cur.h index 97929d44159..7b56c1bbb6e 100644 --- a/storage/xtradb/include/btr0cur.h +++ b/storage/xtradb/include/btr0cur.h @@ -637,6 +637,21 @@ btr_cur_set_deleted_flag_for_ibuf( uncompressed */ ibool val, /*!< in: value to set */ mtr_t* mtr); /*!< in/out: mini-transaction */ + +/***********************************************************//** +Writes a redo log record of updating a record in-place. */ +UNIV_INTERN +void +btr_cur_update_in_place_log( +/*========================*/ + ulint flags, /*!< in: flags */ + rec_t* rec, /*!< in: record */ + dict_index_t* index, /*!< in: index where cursor positioned */ + const upd_t* update, /*!< in: update vector */ + trx_t* trx, /*!< in: transaction */ + roll_ptr_t roll_ptr, /*!< in: roll ptr */ + mtr_t* mtr); /*!< in: mtr */ + /*######################################################################*/ /** In the pessimistic delete, if the page data size drops below this diff --git a/storage/xtradb/include/btr0pcur.h b/storage/xtradb/include/btr0pcur.h index 4312f73ca4a..d9ce02283d7 100644 --- a/storage/xtradb/include/btr0pcur.h +++ b/storage/xtradb/include/btr0pcur.h @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -447,6 +447,27 @@ btr_pcur_move_to_prev_on_page( /*==========================*/ btr_pcur_t* cursor);/*!< in/out: persistent cursor */ +/** Position state of persistent B-tree cursor. */ +enum pcur_pos_t { + /** The persistent cursor is not positioned. */ + BTR_PCUR_NOT_POSITIONED = 0, + /** The persistent cursor was previously positioned. + TODO: currently, the state can be BTR_PCUR_IS_POSITIONED, + though it really should be BTR_PCUR_WAS_POSITIONED, + because we have no obligation to commit the cursor with + mtr; similarly latch_mode may be out of date. This can + lead to problems if btr_pcur is not used the right way; + all current code should be ok. */ + BTR_PCUR_WAS_POSITIONED, + /** The persistent cursor is positioned by optimistic get to the same + record as it was positioned at. Not used for rel_pos == BTR_PCUR_ON. + It may need adjustment depending on previous/current search direction + and rel_pos. */ + BTR_PCUR_IS_POSITIONED_OPTIMISTIC, + /** The persistent cursor is positioned by index search. + Or optimistic get for rel_pos == BTR_PCUR_ON. */ + BTR_PCUR_IS_POSITIONED +}; /* The persistent B-tree cursor structure. This is used mainly for SQL selects, updates, and deletes. */ @@ -480,10 +501,8 @@ struct btr_pcur_struct{ ib_uint64_t modify_clock; /*!< the modify clock value of the buffer block when the cursor position was stored */ - ulint pos_state; /*!< see TODO note below! - BTR_PCUR_IS_POSITIONED, - BTR_PCUR_WAS_POSITIONED, - BTR_PCUR_NOT_POSITIONED */ + enum pcur_pos_t pos_state; /*!< btr_pcur_store_position() and + btr_pcur_restore_position() state. */ ulint search_mode; /*!< PAGE_CUR_G, ... */ trx_t* trx_if_known; /*!< the transaction, if we know it; otherwise this field is not defined; @@ -499,21 +518,6 @@ struct btr_pcur_struct{ is not NULL */ }; -#define BTR_PCUR_IS_POSITIONED 1997660512 /* TODO: currently, the state - can be BTR_PCUR_IS_POSITIONED, - though it really should be - BTR_PCUR_WAS_POSITIONED, - because we have no obligation - to commit the cursor with - mtr; similarly latch_mode may - be out of date. This can - lead to problems if btr_pcur - is not used the right way; - all current code should be - ok. */ -#define BTR_PCUR_WAS_POSITIONED 1187549791 -#define BTR_PCUR_NOT_POSITIONED 1328997689 - #define BTR_PCUR_OLD_STORED 908467085 #define BTR_PCUR_OLD_NOT_STORED 122766467 diff --git a/storage/xtradb/include/btr0pcur.ic b/storage/xtradb/include/btr0pcur.ic index 696dfc728dc..647e8d0cde9 100644 --- a/storage/xtradb/include/btr0pcur.ic +++ b/storage/xtradb/include/btr0pcur.ic @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2011, Oracle and/or its affiliates. All Rights Reserved. +Copyright (c) 1996, 2013, Oracle and/or its affiliates. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -379,7 +379,7 @@ btr_pcur_commit_specify_mtr( btr_pcur_t* pcur, /*!< in: persistent cursor */ mtr_t* mtr) /*!< in: mtr to commit */ { - ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED); pcur->latch_mode = BTR_NO_LATCHES; diff --git a/storage/xtradb/include/btr0sea.h b/storage/xtradb/include/btr0sea.h index 6fa7a2d87bf..39c0a66fb9a 100644 --- a/storage/xtradb/include/btr0sea.h +++ b/storage/xtradb/include/btr0sea.h @@ -196,8 +196,6 @@ UNIV_INTERN ibool btr_search_validate(void); /*======================*/ -#else -# define btr_search_validate() TRUE #endif /* defined UNIV_AHI_DEBUG || defined UNIV_DEBUG */ /********************************************************************//** diff --git a/storage/xtradb/include/dict0types.h b/storage/xtradb/include/dict0types.h index 330e6a25114..22407e2408e 100644 --- a/storage/xtradb/include/dict0types.h +++ b/storage/xtradb/include/dict0types.h @@ -63,4 +63,9 @@ typedef enum dict_err_ignore dict_err_ignore_t; #define TEMP_TABLE_PREFIX "#sql" #define TEMP_TABLE_PATH_PREFIX "/" TEMP_TABLE_PREFIX +#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG +/** Flag to control insert buffer debugging. */ +extern uint ibuf_debug; +#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ + #endif diff --git a/storage/xtradb/include/ibuf0ibuf.h b/storage/xtradb/include/ibuf0ibuf.h index 03ea0629af4..d0d09d3c753 100644 --- a/storage/xtradb/include/ibuf0ibuf.h +++ b/storage/xtradb/include/ibuf0ibuf.h @@ -63,11 +63,6 @@ typedef enum { /** Operations that can currently be buffered. */ extern ibuf_use_t ibuf_use; -#if defined UNIV_DEBUG || defined UNIV_IBUF_DEBUG -/** Flag to control insert buffer debugging. */ -extern uint ibuf_debug; -#endif /* UNIV_DEBUG || UNIV_IBUF_DEBUG */ - /** The insert buffer control structure */ extern ibuf_t* ibuf; diff --git a/storage/xtradb/include/log0log.h b/storage/xtradb/include/log0log.h index 424c8cd7d1d..18ae51cb0ca 100644 --- a/storage/xtradb/include/log0log.h +++ b/storage/xtradb/include/log0log.h @@ -837,6 +837,8 @@ struct log_struct{ later; this is advanced when a flush operation is completed to all the log groups */ + volatile ibool is_extending; /*!< this is set to true during extend + the log buffer size */ ib_uint64_t written_to_some_lsn; /*!< first log sequence number not yet written to any log group; for this to diff --git a/storage/xtradb/include/univ.i b/storage/xtradb/include/univ.i index 0b105f573c2..7474be81bd6 100644 --- a/storage/xtradb/include/univ.i +++ b/storage/xtradb/include/univ.i @@ -64,7 +64,7 @@ component, i.e. we show M.N.P as M.N */ (INNODB_VERSION_MAJOR << 8 | INNODB_VERSION_MINOR) #ifndef PERCONA_INNODB_VERSION -#define PERCONA_INNODB_VERSION 31.1 +#define PERCONA_INNODB_VERSION 33.0 #endif #define INNODB_VERSION_STR MYSQL_SERVER_VERSION "-" IB_TO_STR(PERCONA_INNODB_VERSION) diff --git a/storage/xtradb/log/log0log.c b/storage/xtradb/log/log0log.c index 9ff8c2448d4..2ec50332779 100644 --- a/storage/xtradb/log/log0log.c +++ b/storage/xtradb/log/log0log.c @@ -264,6 +264,85 @@ log_check_tracking_margin( return tracked_lsn_age + lsn_advance > log_sys->max_checkpoint_age; } +/** Extends the log buffer. +@param[in] len requested minimum size in bytes */ +static +void +log_buffer_extend( + ulint len) +{ + ulint move_start; + ulint move_end; + byte tmp_buf[OS_FILE_LOG_BLOCK_SIZE]; + + mutex_enter(&(log_sys->mutex)); + + while (log_sys->is_extending) { + /* Another thread is trying to extend already. + Needs to wait for. */ + mutex_exit(&(log_sys->mutex)); + + log_buffer_flush_to_disk(); + + mutex_enter(&(log_sys->mutex)); + + if (srv_log_buffer_size > len / UNIV_PAGE_SIZE) { + /* Already extended enough by the others */ + mutex_exit(&(log_sys->mutex)); + return; + } + } + + log_sys->is_extending = TRUE; + + while (log_sys->n_pending_writes != 0 + || ut_calc_align_down(log_sys->buf_free, + OS_FILE_LOG_BLOCK_SIZE) + != ut_calc_align_down(log_sys->buf_next_to_write, + OS_FILE_LOG_BLOCK_SIZE)) { + /* Buffer might have >1 blocks to write still. */ + mutex_exit(&(log_sys->mutex)); + + log_buffer_flush_to_disk(); + + mutex_enter(&(log_sys->mutex)); + } + + move_start = ut_calc_align_down( + log_sys->buf_free, + OS_FILE_LOG_BLOCK_SIZE); + move_end = log_sys->buf_free; + + /* store the last log block in buffer */ + ut_memcpy(tmp_buf, log_sys->buf + move_start, + move_end - move_start); + + log_sys->buf_free -= move_start; + log_sys->buf_next_to_write -= move_start; + + /* reallocate log buffer */ + srv_log_buffer_size = len / UNIV_PAGE_SIZE + 1; + mem_free(log_sys->buf_ptr); + log_sys->buf_ptr = mem_alloc(LOG_BUFFER_SIZE + OS_FILE_LOG_BLOCK_SIZE); + log_sys->buf = ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE); + log_sys->buf_size = LOG_BUFFER_SIZE; + memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); + log_sys->max_buf_free = log_sys->buf_size / LOG_BUF_FLUSH_RATIO + - LOG_BUF_FLUSH_MARGIN; + + /* restore the last log block */ + ut_memcpy(log_sys->buf, tmp_buf, move_end - move_start); + + ut_ad(log_sys->is_extending); + log_sys->is_extending = FALSE; + + mutex_exit(&(log_sys->mutex)); + + fprintf(stderr, + "InnoDB: innodb_log_buffer_size was extended to %lu.\n", + LOG_BUFFER_SIZE); +} + /************************************************************//** Opens the log for log_write_low. The log must be closed with log_close. @return start lsn of the log record */ @@ -281,10 +360,38 @@ log_open( #endif /* UNIV_LOG_ARCHIVE */ ulint count = 0; - ut_a(len < log->buf_size / 2); + if (len >= log->buf_size / 2) { + DBUG_EXECUTE_IF("ib_log_buffer_is_short_crash", + DBUG_SUICIDE();); + + /* log_buffer is too small. try to extend instead of crash. */ + ut_print_timestamp(stderr); + fprintf(stderr, + " InnoDB: Warning: " + "The transaction log size is too large" + " for innodb_log_buffer_size (%lu >= %lu / 2). " + "Trying to extend it.\n", + len, LOG_BUFFER_SIZE); + + log_buffer_extend((len + 1) * 2); + } loop: ut_ad(!recv_no_log_write); + if (log->is_extending) { + + mutex_exit(&(log->mutex)); + + /* Log buffer size is extending. Writing up to the next block + should wait for the extending finished. */ + + os_thread_sleep(100000); + + ut_ad(++count < 50); + + goto loop; + } + /* Calculate an upper limit for the space the string may take in the log buffer */ @@ -901,6 +1008,7 @@ log_init(void) log_sys->buf = ut_align(log_sys->buf_ptr, OS_FILE_LOG_BLOCK_SIZE); log_sys->buf_size = LOG_BUFFER_SIZE; + log_sys->is_extending = FALSE; memset(log_sys->buf, '\0', LOG_BUFFER_SIZE); diff --git a/storage/xtradb/log/log0online.c b/storage/xtradb/log/log0online.c index 5dfe08a4b65..d195a881348 100644 --- a/storage/xtradb/log/log0online.c +++ b/storage/xtradb/log/log0online.c @@ -1790,7 +1790,8 @@ log_online_purge_changed_page_bitmaps( mutex_enter(&log_bmp_sys->mutex); } - if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, lsn)) { + if (!log_online_setup_bitmap_file_range(&bitmap_files, 0, + IB_ULONGLONG_MAX)) { if (srv_track_changed_pages) { mutex_exit(&log_bmp_sys->mutex); } @@ -1805,8 +1806,20 @@ log_online_purge_changed_page_bitmaps( } for (i = 0; i < bitmap_files.count; i++) { - if (bitmap_files.files[i].seq_num == 0 - || bitmap_files.files[i].start_lsn >= lsn) { + + /* We consider the end LSN of the current bitmap, derived from + the start LSN of the subsequent bitmap file, to determine + whether to remove the current bitmap. Note that bitmap_files + does not contain an entry for the bitmap past the given LSN so + we must check the boundary conditions as well. For example, + consider 1_0.xdb and 2_10.xdb and querying LSN 5. bitmap_files + will only contain 1_0.xdb and we must not delete it since it + represents LSNs 0-9. */ + if ((i + 1 == bitmap_files.count + || bitmap_files.files[i + 1].seq_num == 0 + || bitmap_files.files[i + 1].start_lsn > lsn) + && (lsn != IB_ULONGLONG_MAX)) { + break; } if (!os_file_delete_if_exists(bitmap_files.files[i].name)) { diff --git a/storage/xtradb/row/row0sel.c b/storage/xtradb/row/row0sel.c index a902854d4ca..276a2cf1171 100644 --- a/storage/xtradb/row/row0sel.c +++ b/storage/xtradb/row/row0sel.c @@ -3130,48 +3130,78 @@ sel_restore_position_for_mysql( mtr_t* mtr) /*!< in: mtr; CAUTION: may commit mtr temporarily! */ { - ibool success; - ulint relative_position; - - relative_position = pcur->rel_pos; + ibool success; success = btr_pcur_restore_position(latch_mode, pcur, mtr); *same_user_rec = success; - if (relative_position == BTR_PCUR_ON) { - if (success) { - return(FALSE); - } - - if (moves_up) { - btr_pcur_move_to_next(pcur, mtr); - } - - return(TRUE); + ut_ad(!success || pcur->rel_pos == BTR_PCUR_ON); +#ifdef UNIV_DEBUG + if (pcur->pos_state == BTR_PCUR_IS_POSITIONED_OPTIMISTIC) { + ut_ad(pcur->rel_pos == BTR_PCUR_BEFORE + || pcur->rel_pos == BTR_PCUR_AFTER); + } else { + ut_ad(pcur->pos_state == BTR_PCUR_IS_POSITIONED); + ut_ad((pcur->rel_pos == BTR_PCUR_ON) + == btr_pcur_is_on_user_rec(pcur)); } +#endif - if (relative_position == BTR_PCUR_AFTER - || relative_position == BTR_PCUR_AFTER_LAST_IN_TREE) { + /* The position may need be adjusted for rel_pos and moves_up. */ - if (moves_up) { + switch (pcur->rel_pos) { + case BTR_PCUR_ON: + if (!success && moves_up) { +next: + btr_pcur_move_to_next(pcur, mtr); return(TRUE); } - - if (btr_pcur_is_on_user_rec(pcur)) { + return(!success); + case BTR_PCUR_AFTER_LAST_IN_TREE: + case BTR_PCUR_BEFORE_FIRST_IN_TREE: + return(TRUE); + case BTR_PCUR_AFTER: + /* positioned to record after pcur->old_rec. */ + pcur->pos_state = BTR_PCUR_IS_POSITIONED; +prev: + if (btr_pcur_is_on_user_rec(pcur) && !moves_up) { btr_pcur_move_to_prev(pcur, mtr); } - return(TRUE); + case BTR_PCUR_BEFORE: + /* For non optimistic restoration: + The position is now set to the record before pcur->old_rec. + + For optimistic restoration: + The position also needs to take the previous search_mode into + consideration. */ + + switch (pcur->pos_state) { + case BTR_PCUR_IS_POSITIONED_OPTIMISTIC: + pcur->pos_state = BTR_PCUR_IS_POSITIONED; + if (pcur->search_mode == PAGE_CUR_GE) { + /* Positioned during Greater or Equal search + with BTR_PCUR_BEFORE. Optimistic restore to + the same record. If scanning for lower then + we must move to previous record. + This can happen with: + HANDLER READ idx a = (const); + HANDLER READ idx PREV; */ + goto prev; + } + return(TRUE); + case BTR_PCUR_IS_POSITIONED: + if (moves_up && btr_pcur_is_on_user_rec(pcur)) { + goto next; + } + return(TRUE); + case BTR_PCUR_WAS_POSITIONED: + case BTR_PCUR_NOT_POSITIONED: + break; + } } - - ut_ad(relative_position == BTR_PCUR_BEFORE - || relative_position == BTR_PCUR_BEFORE_FIRST_IN_TREE); - - if (moves_up && btr_pcur_is_on_user_rec(pcur)) { - btr_pcur_move_to_next(pcur, mtr); - } - + ut_ad(0); return(TRUE); } @@ -4278,6 +4308,14 @@ wrong_offs: btr_pcur_store_position(pcur, &mtr); + /* The found record was not a match, but may be used + as NEXT record (index_next). Set the relative position + to BTR_PCUR_BEFORE, to reflect that the position of + the persistent cursor is before the found/stored row + (pcur->old_rec). */ + ut_ad(pcur->rel_pos == BTR_PCUR_ON); + pcur->rel_pos = BTR_PCUR_BEFORE; + err = DB_RECORD_NOT_FOUND; /* ut_print_name(stderr, index->name); fputs(" record not found 3\n", stderr); */ @@ -4317,6 +4355,14 @@ wrong_offs: btr_pcur_store_position(pcur, &mtr); + /* The found record was not a match, but may be used + as NEXT record (index_next). Set the relative position + to BTR_PCUR_BEFORE, to reflect that the position of + the persistent cursor is before the found/stored row + (pcur->old_rec). */ + ut_ad(pcur->rel_pos == BTR_PCUR_ON); + pcur->rel_pos = BTR_PCUR_BEFORE; + err = DB_RECORD_NOT_FOUND; /* ut_print_name(stderr, index->name); fputs(" record not found 4\n", stderr); */ @@ -4950,6 +4996,7 @@ normal_return: if (prebuilt->n_fetch_cached > 0) { row_sel_pop_cached_row_for_mysql(buf, prebuilt); + DEBUG_SYNC_C("row_search_cached_row"); err = DB_SUCCESS; } diff --git a/storage/xtradb/trx/trx0trx.c b/storage/xtradb/trx/trx0trx.c index 464ce6f3568..bc511504db9 100644 --- a/storage/xtradb/trx/trx0trx.c +++ b/storage/xtradb/trx/trx0trx.c @@ -1305,7 +1305,15 @@ trx_cleanup_at_db_startup( } trx->state = TRX_NOT_STARTED; + + /* This code is executed in a single threaded context, but we acquire + kernel_mutex to satisfy a debug assertion in + trx_release_descriptor(). */ + + mutex_enter(&kernel_mutex); trx_release_descriptor(trx); + mutex_exit(&kernel_mutex); + trx->rseg = NULL; trx->undo_no = 0; trx->last_sql_stat_start.least_undo_no = 0; |